]> git.proxmox.com Git - ceph.git/blame - ceph/src/crush/CrushCompiler.cc
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / crush / CrushCompiler.cc
CommitLineData
7c673cae
FG
1
2#include "CrushCompiler.h"
3
4#if defined(_AIX)
5#define EBADE ECORRUPT
6#endif
7
8#ifndef EBADE
9#define EBADE EFTYPE
10#endif
11
12#include <iostream>
13#include <stack>
14#include <functional>
15#include <string>
16#include <stdexcept>
17#include <map>
18#include <cctype>
19
20#include <typeinfo>
21#include "common/errno.h"
22#include <boost/algorithm/string.hpp>
23
24// -------------
25
26static void print_type_name(ostream& out, int t, CrushWrapper &crush)
27{
28 const char *name = crush.get_type_name(t);
29 if (name)
30 out << name;
31 else if (t == 0)
32 out << "device";
33 else
34 out << "type" << t;
35}
36
37static void print_item_name(ostream& out, int t, CrushWrapper &crush)
38{
39 const char *name = crush.get_item_name(t);
40 if (name)
41 out << name;
42 else if (t >= 0)
43 out << "device" << t;
44 else
45 out << "bucket" << (-1-t);
46}
47
48static void print_bucket_class_ids(ostream& out, int t, CrushWrapper &crush)
49{
50 if (crush.class_bucket.count(t) == 0)
51 return;
52 auto &class_to_id = crush.class_bucket[t];
53 for (auto &i : class_to_id) {
54 int c = i.first;
55 int cid = i.second;
56 const char* class_name = crush.get_class_name(c);
57 assert(class_name);
58 out << "\tid " << cid << " class " << class_name << "\t\t# do not change unnecessarily\n";
59 }
60}
61
62static void print_item_class(ostream& out, int t, CrushWrapper &crush)
63{
64 const char *c = crush.get_item_class(t);
65 if (c)
66 out << " class " << c;
67}
68
69static void print_class(ostream& out, int t, CrushWrapper &crush)
70{
71 const char *c = crush.get_class_name(t);
72 if (c)
73 out << " class " << c;
74 else
75 out << " # unexpected class " << t;
76}
77
78static void print_rule_name(ostream& out, int t, CrushWrapper &crush)
79{
80 const char *name = crush.get_rule_name(t);
81 if (name)
82 out << name;
83 else
84 out << "rule" << t;
85}
86
87static void print_fixedpoint(ostream& out, int i)
88{
89 char s[20];
90 snprintf(s, sizeof(s), "%.3f", (float)i / (float)0x10000);
91 out << s;
92}
93
94int CrushCompiler::decompile_bucket_impl(int i, ostream &out)
95{
96 const char *name = crush.get_item_name(i);
97 if (name && !crush.is_valid_crush_name(name))
98 return 0;
99 int type = crush.get_bucket_type(i);
100 print_type_name(out, type, crush);
101 out << " ";
102 print_item_name(out, i, crush);
103 out << " {\n";
104 out << "\tid " << i << "\t\t# do not change unnecessarily\n";
105 print_bucket_class_ids(out, i, crush);
106
107 out << "\t# weight ";
108 print_fixedpoint(out, crush.get_bucket_weight(i));
109 out << "\n";
110
111 int n = crush.get_bucket_size(i);
112
113 int alg = crush.get_bucket_alg(i);
114 out << "\talg " << crush_bucket_alg_name(alg);
115
116 // notate based on alg type
117 bool dopos = false;
118 switch (alg) {
119 case CRUSH_BUCKET_UNIFORM:
120 out << "\t# do not change bucket size (" << n << ") unnecessarily";
121 dopos = true;
122 break;
123 case CRUSH_BUCKET_LIST:
124 out << "\t# add new items at the end; do not change order unnecessarily";
125 break;
126 case CRUSH_BUCKET_TREE:
127 out << "\t# do not change pos for existing items unnecessarily";
128 dopos = true;
129 break;
130 }
131 out << "\n";
132
133 int hash = crush.get_bucket_hash(i);
134 out << "\thash " << hash << "\t# " << crush_hash_name(hash) << "\n";
135
136 for (int j=0; j<n; j++) {
137 int item = crush.get_bucket_item(i, j);
138 int w = crush.get_bucket_item_weight(i, j);
139 out << "\titem ";
140 print_item_name(out, item, crush);
141 out << " weight ";
142 print_fixedpoint(out, w);
143 if (dopos)
144 out << " pos " << j;
145
146 out << "\n";
147 }
148 out << "}\n";
149 return 0;
150}
151
152/* Basically, we just descend recursively into all of the buckets,
153 * executing a depth-first traversal of the graph. Since the buckets form a
154 * directed acyclic graph, this should work just fine. The graph isn't
155 * necessarily a tree, so we have to keep track of what buckets we already
156 * outputted. We don't want to output anything twice. We also keep track of
157 * what buckets are in progress so that we can detect cycles. These can
158 * arise through user error.
159 */
160int CrushCompiler::decompile_bucket(int cur,
161 std::map<int, dcb_state_t>& dcb_states,
162 ostream &out)
163{
164 if ((cur == 0) || (!crush.bucket_exists(cur)))
165 return 0;
166
167 std::map<int, dcb_state_t>::iterator c = dcb_states.find(cur);
168 if (c == dcb_states.end()) {
169 // Mark this bucket as "in progress."
170 std::map<int, dcb_state_t>::value_type val(cur, DCB_STATE_IN_PROGRESS);
171 std::pair <std::map<int, dcb_state_t>::iterator, bool> rval
172 (dcb_states.insert(val));
173 assert(rval.second);
174 c = rval.first;
175 }
176 else if (c->second == DCB_STATE_DONE) {
177 // We already did this bucket.
178 return 0;
179 }
180 else if (c->second == DCB_STATE_IN_PROGRESS) {
181 err << "decompile_crush_bucket: logic error: tried to decompile "
182 "a bucket that is already being decompiled" << std::endl;
183 return -EBADE;
184 }
185 else {
186 err << "decompile_crush_bucket: logic error: illegal bucket state! "
187 << c->second << std::endl;
188 return -EBADE;
189 }
190
191 int bsize = crush.get_bucket_size(cur);
192 for (int i = 0; i < bsize; ++i) {
193 int item = crush.get_bucket_item(cur, i);
194 std::map<int, dcb_state_t>::iterator d = dcb_states.find(item);
195 if (d == dcb_states.end()) {
196 int ret = decompile_bucket(item, dcb_states, out);
197 if (ret)
198 return ret;
199 }
200 else if (d->second == DCB_STATE_IN_PROGRESS) {
201 err << "decompile_crush_bucket: error: while trying to output bucket "
202 << cur << ", we found out that it contains one of the buckets that "
203 << "contain it. This is not allowed. The buckets must form a "
204 << "directed acyclic graph." << std::endl;
205 return -EINVAL;
206 }
207 else if (d->second != DCB_STATE_DONE) {
208 err << "decompile_crush_bucket: logic error: illegal bucket state "
209 << d->second << std::endl;
210 return -EBADE;
211 }
212 }
213 decompile_bucket_impl(cur, out);
214 c->second = DCB_STATE_DONE;
215 return 0;
216}
217
218int CrushCompiler::decompile_weight_set_weights(crush_weight_set weight_set,
219 ostream &out)
220{
221 out << " [ ";
222 for (__u32 i = 0; i < weight_set.size; i++) {
223 print_fixedpoint(out, weight_set.weights[i]);
224 out << " ";
225 }
226 out << "]\n";
227 return 0;
228}
229
230int CrushCompiler::decompile_weight_set(crush_weight_set *weight_set,
231 __u32 size,
232 ostream &out)
233{
234 out << " weight_set [\n";
235 for (__u32 i = 0; i < size; i++) {
236 int r = decompile_weight_set_weights(weight_set[i], out);
237 if (r < 0)
238 return r;
239 }
240 out << " ]\n";
241 return 0;
242}
243
244int CrushCompiler::decompile_ids(int *ids,
245 __u32 size,
246 ostream &out)
247{
248 out << " ids [ ";
249 for (__u32 i = 0; i < size; i++)
250 out << ids[i] << " ";
251 out << "]\n";
252 return 0;
253}
254
255int CrushCompiler::decompile_choose_arg(crush_choose_arg *arg,
256 int bucket_id,
257 ostream &out)
258{
259 int r;
260 out << " {\n";
261 out << " bucket_id " << bucket_id << "\n";
262 if (arg->weight_set_size > 0) {
263 r = decompile_weight_set(arg->weight_set, arg->weight_set_size, out);
264 if (r < 0)
265 return r;
266 }
267 if (arg->ids_size > 0) {
268 r = decompile_ids(arg->ids, arg->ids_size, out);
269 if (r < 0)
270 return r;
271 }
272 out << " }\n";
273 return 0;
274}
275
276int CrushCompiler::decompile_choose_arg_map(crush_choose_arg_map arg_map,
277 ostream &out)
278{
279 for (__u32 i = 0; i < arg_map.size; i++) {
280 if ((arg_map.args[i].ids_size == 0) &&
281 (arg_map.args[i].weight_set_size == 0))
282 continue;
283 int r = decompile_choose_arg(&arg_map.args[i], -1-i, out);
284 if (r < 0)
285 return r;
286 }
287 return 0;
288}
289
290int CrushCompiler::decompile_choose_args(const std::pair<const long unsigned int, crush_choose_arg_map> &i,
291 ostream &out)
292{
293 out << "choose_args " << i.first << " {\n";
294 int r = decompile_choose_arg_map(i.second, out);
295 if (r < 0)
296 return r;
297 out << "}\n";
298 return 0;
299}
300
301int CrushCompiler::decompile(ostream &out)
302{
303 crush.cleanup_classes();
304
305 out << "# begin crush map\n";
306
307 // only dump tunables if they differ from the defaults
308 if (crush.get_choose_local_tries() != 2)
309 out << "tunable choose_local_tries " << crush.get_choose_local_tries() << "\n";
310 if (crush.get_choose_local_fallback_tries() != 5)
311 out << "tunable choose_local_fallback_tries " << crush.get_choose_local_fallback_tries() << "\n";
312 if (crush.get_choose_total_tries() != 19)
313 out << "tunable choose_total_tries " << crush.get_choose_total_tries() << "\n";
314 if (crush.get_chooseleaf_descend_once() != 0)
315 out << "tunable chooseleaf_descend_once " << crush.get_chooseleaf_descend_once() << "\n";
316 if (crush.get_chooseleaf_vary_r() != 0)
317 out << "tunable chooseleaf_vary_r " << crush.get_chooseleaf_vary_r() << "\n";
318 if (crush.get_chooseleaf_stable() != 0)
319 out << "tunable chooseleaf_stable " << crush.get_chooseleaf_stable() << "\n";
320 if (crush.get_straw_calc_version() != 0)
321 out << "tunable straw_calc_version " << crush.get_straw_calc_version() << "\n";
322 if (crush.get_allowed_bucket_algs() != CRUSH_LEGACY_ALLOWED_BUCKET_ALGS)
323 out << "tunable allowed_bucket_algs " << crush.get_allowed_bucket_algs()
324 << "\n";
325
326 out << "\n# devices\n";
327 for (int i=0; i<crush.get_max_devices(); i++) {
328 out << "device " << i << " ";
329 print_item_name(out, i, crush);
330 print_item_class(out, i, crush);
331 out << "\n";
332 }
333
334 out << "\n# types\n";
335 int n = crush.get_num_type_names();
336 for (int i=0; n; i++) {
337 const char *name = crush.get_type_name(i);
338 if (!name) {
339 if (i == 0) out << "type 0 osd\n";
340 continue;
341 }
342 n--;
343 out << "type " << i << " " << name << "\n";
344 }
345
346 out << "\n# buckets\n";
347 std::map<int, dcb_state_t> dcb_states;
348 for (int bucket = -1; bucket > -1-crush.get_max_buckets(); --bucket) {
349 int ret = decompile_bucket(bucket, dcb_states, out);
350 if (ret)
351 return ret;
352 }
353
354 out << "\n# rules\n";
355 for (int i=0; i<crush.get_max_rules(); i++) {
356 if (!crush.rule_exists(i))
357 continue;
358 out << "rule ";
359 if (crush.get_rule_name(i))
360 print_rule_name(out, i, crush);
361 out << " {\n";
362 out << "\truleset " << crush.get_rule_mask_ruleset(i) << "\n";
363
364 switch (crush.get_rule_mask_type(i)) {
365 case CEPH_PG_TYPE_REPLICATED:
366 out << "\ttype replicated\n";
367 break;
368 case CEPH_PG_TYPE_ERASURE:
369 out << "\ttype erasure\n";
370 break;
371 default:
372 out << "\ttype " << crush.get_rule_mask_type(i) << "\n";
373 }
374
375 out << "\tmin_size " << crush.get_rule_mask_min_size(i) << "\n";
376 out << "\tmax_size " << crush.get_rule_mask_max_size(i) << "\n";
377
378 for (int j=0; j<crush.get_rule_len(i); j++) {
379 switch (crush.get_rule_op(i, j)) {
380 case CRUSH_RULE_NOOP:
381 out << "\tstep noop\n";
382 break;
383 case CRUSH_RULE_TAKE:
384 out << "\tstep take ";
385 {
386 int step_item = crush.get_rule_arg1(i, j);
387 int original_item;
388 int c;
389 int res = crush.split_id_class(step_item, &original_item, &c);
390 if (res < 0)
391 return res;
392 if (c >= 0)
393 step_item = original_item;
394 print_item_name(out, step_item, crush);
395 if (c >= 0)
396 print_class(out, c, crush);
397 }
398 out << "\n";
399 break;
400 case CRUSH_RULE_EMIT:
401 out << "\tstep emit\n";
402 break;
403 case CRUSH_RULE_SET_CHOOSE_TRIES:
404 out << "\tstep set_choose_tries " << crush.get_rule_arg1(i, j)
405 << "\n";
406 break;
407 case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
408 out << "\tstep set_choose_local_tries " << crush.get_rule_arg1(i, j)
409 << "\n";
410 break;
411 case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
412 out << "\tstep set_choose_local_fallback_tries " << crush.get_rule_arg1(i, j)
413 << "\n";
414 break;
415 case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
416 out << "\tstep set_chooseleaf_tries " << crush.get_rule_arg1(i, j)
417 << "\n";
418 break;
419 case CRUSH_RULE_SET_CHOOSELEAF_VARY_R:
420 out << "\tstep set_chooseleaf_vary_r " << crush.get_rule_arg1(i, j)
421 << "\n";
422 break;
423 case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
424 out << "\tstep set_chooseleaf_stable " << crush.get_rule_arg1(i, j)
425 << "\n";
426 break;
427 case CRUSH_RULE_CHOOSE_FIRSTN:
428 out << "\tstep choose firstn "
429 << crush.get_rule_arg1(i, j)
430 << " type ";
431 print_type_name(out, crush.get_rule_arg2(i, j), crush);
432 out << "\n";
433 break;
434 case CRUSH_RULE_CHOOSE_INDEP:
435 out << "\tstep choose indep "
436 << crush.get_rule_arg1(i, j)
437 << " type ";
438 print_type_name(out, crush.get_rule_arg2(i, j), crush);
439 out << "\n";
440 break;
441 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
442 out << "\tstep chooseleaf firstn "
443 << crush.get_rule_arg1(i, j)
444 << " type ";
445 print_type_name(out, crush.get_rule_arg2(i, j), crush);
446 out << "\n";
447 break;
448 case CRUSH_RULE_CHOOSELEAF_INDEP:
449 out << "\tstep chooseleaf indep "
450 << crush.get_rule_arg1(i, j)
451 << " type ";
452 print_type_name(out, crush.get_rule_arg2(i, j), crush);
453 out << "\n";
454 break;
455 }
456 }
457 out << "}\n";
458 }
459 if (crush.choose_args.size() > 0) {
460 out << "\n# choose_args\n";
461 for (auto i : crush.choose_args) {
462 int ret = decompile_choose_args(i, out);
463 if (ret)
464 return ret;
465 }
466 }
467 out << "\n# end crush map" << std::endl;
468 return 0;
469}
470
471
472// ================================================================
473
474string CrushCompiler::string_node(node_t &node)
475{
476 return boost::trim_copy(string(node.value.begin(), node.value.end()));
477}
478
479int CrushCompiler::int_node(node_t &node)
480{
481 string str = string_node(node);
482 return strtol(str.c_str(), 0, 10);
483}
484
485float CrushCompiler::float_node(node_t &node)
486{
487 string s = string_node(node);
488 return strtof(s.c_str(), 0);
489}
490
491int CrushCompiler::parse_device(iter_t const& i)
492{
493 int id = int_node(i->children[1]);
494
495 string name = string_node(i->children[2]);
496 crush.set_item_name(id, name.c_str());
497 if (item_id.count(name)) {
498 err << "item " << name << " defined twice" << std::endl;
499 return -1;
500 }
501 item_id[name] = id;
502 id_item[id] = name;
503
504 if (verbose) err << "device " << id << " '" << name << "'";
505
506 if (i->children.size() > 3) {
507 string c = string_node(i->children[4]);
508 crush.set_item_class(id, c);
509 if (verbose) err << " class" << " '" << c << "'" << std::endl;
510 } else {
511 if (verbose) err << std::endl;
512 }
513 return 0;
514}
515
516int CrushCompiler::parse_tunable(iter_t const& i)
517{
518 string name = string_node(i->children[1]);
519 int val = int_node(i->children[2]);
520
521 if (name == "choose_local_tries")
522 crush.set_choose_local_tries(val);
523 else if (name == "choose_local_fallback_tries")
524 crush.set_choose_local_fallback_tries(val);
525 else if (name == "choose_total_tries")
526 crush.set_choose_total_tries(val);
527 else if (name == "chooseleaf_descend_once")
528 crush.set_chooseleaf_descend_once(val);
529 else if (name == "chooseleaf_vary_r")
530 crush.set_chooseleaf_vary_r(val);
531 else if (name == "chooseleaf_stable")
532 crush.set_chooseleaf_stable(val);
533 else if (name == "straw_calc_version")
534 crush.set_straw_calc_version(val);
535 else if (name == "allowed_bucket_algs")
536 crush.set_allowed_bucket_algs(val);
537 else {
538 err << "tunable " << name << " not recognized" << std::endl;
539 return -1;
540 }
541
542 /*
543
544 current crop of tunables are all now "safe". re-enable this when we
545 add new ones that are ... new.
546
547 if (!unsafe_tunables) {
548 err << "tunables are NOT FULLY IMPLEMENTED; enable with --enable-unsafe-tunables to enable this feature" << std::endl;
549 return -1;
550 }
551 */
552
553 if (verbose) err << "tunable " << name << " " << val << std::endl;
554 return 0;
555}
556
557int CrushCompiler::parse_bucket_type(iter_t const& i)
558{
559 int id = int_node(i->children[1]);
560 string name = string_node(i->children[2]);
561 if (verbose) err << "type " << id << " '" << name << "'" << std::endl;
562 type_id[name] = id;
563 crush.set_type_name(id, name.c_str());
564 return 0;
565}
566
567int CrushCompiler::parse_bucket(iter_t const& i)
568{
569 string tname = string_node(i->children[0]);
570 if (!type_id.count(tname)) {
571 err << "bucket type '" << tname << "' is not defined" << std::endl;
572 return -1;
573 }
574 int type = type_id[tname];
575
576 string name = string_node(i->children[1]);
577 if (item_id.count(name)) {
578 err << "bucket or device '" << name << "' is already defined" << std::endl;
579 return -1;
580 }
581
582 int id = 0; // none, yet!
583 int alg = -1;
584 int hash = 0;
585 set<int> used_items;
586 int size = 0;
587 map<int32_t, int32_t> class_id;
588
589 for (unsigned p=3; p<i->children.size()-1; p++) {
590 iter_t sub = i->children.begin() + p;
591 string tag = string_node(sub->children[0]);
592 //err << "tag " << tag << std::endl;
593 if (tag == "id") {
594 int maybe_id = int_node(sub->children[1]);
595 if (verbose) err << "bucket " << name << " id " << maybe_id;
596 if (sub->children.size() > 2) {
597 string class_name = string_node(sub->children[3]);
598 if (!crush.class_exists(class_name)) {
599 err << " unknown device class '" << class_name << "'" << std::endl;
600 return -EINVAL;
601 }
602 int cid = crush.get_class_id(class_name);
603 if (class_id.count(cid) != 0) {
604 err << "duplicate device class " << class_name << " for bucket " << name << std::endl;
605 return -ERANGE;
606 }
607 class_id[cid] = maybe_id;
608 if (verbose) err << " class" << " '" << class_name << "'" << std::endl;
609 } else {
610 id = maybe_id;
611 if (verbose) err << std::endl;
612 }
613 } else if (tag == "alg") {
614 string a = string_node(sub->children[1]);
615 if (a == "uniform")
616 alg = CRUSH_BUCKET_UNIFORM;
617 else if (a == "list")
618 alg = CRUSH_BUCKET_LIST;
619 else if (a == "tree")
620 alg = CRUSH_BUCKET_TREE;
621 else if (a == "straw")
622 alg = CRUSH_BUCKET_STRAW;
623 else if (a == "straw2")
624 alg = CRUSH_BUCKET_STRAW2;
625 else {
626 err << "unknown bucket alg '" << a << "'" << std::endl << std::endl;
627 return -EINVAL;
628 }
629 }
630 else if (tag == "hash") {
631 string a = string_node(sub->children[1]);
632 if (a == "rjenkins1")
633 hash = CRUSH_HASH_RJENKINS1;
634 else
635 hash = atoi(a.c_str());
636 }
637 else if (tag == "item") {
638 // first, just determine which item pos's are already used
639 size++;
640 for (unsigned q = 2; q < sub->children.size(); q++) {
641 string tag = string_node(sub->children[q++]);
642 if (tag == "pos") {
643 int pos = int_node(sub->children[q]);
644 if (used_items.count(pos)) {
645 err << "item '" << string_node(sub->children[1]) << "' in bucket '" << name << "' has explicit pos " << pos << ", which is occupied" << std::endl;
646 return -1;
647 }
648 used_items.insert(pos);
649 }
650 }
651 }
652 else ceph_abort();
653 }
654
655 // now do the items.
656 if (!used_items.empty())
657 size = MAX(size, *used_items.rbegin());
658 vector<int> items(size);
659 vector<int> weights(size);
660
661 int curpos = 0;
662 unsigned bucketweight = 0;
663 bool have_uniform_weight = false;
664 unsigned uniform_weight = 0;
665 for (unsigned p=3; p<i->children.size()-1; p++) {
666 iter_t sub = i->children.begin() + p;
667 string tag = string_node(sub->children[0]);
668 if (tag == "item") {
669
670 string iname = string_node(sub->children[1]);
671 if (!item_id.count(iname)) {
672 err << "item '" << iname << "' in bucket '" << name << "' is not defined" << std::endl;
673 return -1;
674 }
675 int itemid = item_id[iname];
676
677 unsigned weight = 0x10000;
678 if (item_weight.count(itemid))
679 weight = item_weight[itemid];
680
681 int pos = -1;
682 for (unsigned q = 2; q < sub->children.size(); q++) {
683 string tag = string_node(sub->children[q++]);
684 if (tag == "weight") {
685 weight = float_node(sub->children[q]) * (float)0x10000;
686 if (weight > CRUSH_MAX_DEVICE_WEIGHT && itemid >= 0) {
687 err << "device weight limited to " << CRUSH_MAX_DEVICE_WEIGHT / 0x10000 << std::endl;
688 return -ERANGE;
689 }
690 else if (weight > CRUSH_MAX_BUCKET_WEIGHT && itemid < 0) {
691 err << "bucket weight limited to " << CRUSH_MAX_BUCKET_WEIGHT / 0x10000
692 << " to prevent overflow" << std::endl;
693 return -ERANGE;
694 }
695 }
696 else if (tag == "pos")
697 pos = int_node(sub->children[q]);
698 else
699 ceph_abort();
700
701 }
702 if (alg == CRUSH_BUCKET_UNIFORM) {
703 if (!have_uniform_weight) {
704 have_uniform_weight = true;
705 uniform_weight = weight;
706 } else {
707 if (uniform_weight != weight) {
708 err << "item '" << iname << "' in uniform bucket '" << name << "' has weight " << weight
709 << " but previous item(s) have weight " << (float)uniform_weight/(float)0x10000
710 << "; uniform bucket items must all have identical weights." << std::endl;
711 return -1;
712 }
713 }
714 }
715
716 if (pos >= size) {
717 err << "item '" << iname << "' in bucket '" << name << "' has pos " << pos << " >= size " << size << std::endl;
718 return -1;
719 }
720 if (pos < 0) {
721 while (used_items.count(curpos)) curpos++;
722 pos = curpos++;
723 }
724 //err << " item " << iname << " (" << itemid << ") pos " << pos << " weight " << weight << std::endl;
725 items[pos] = itemid;
726 weights[pos] = weight;
727
728 if (crush_addition_is_unsafe(bucketweight, weight)) {
729 err << "oh no! our bucket weights are overflowing all over the place, better lower the item weights" << std::endl;
730 return -ERANGE;
731 }
732
733 bucketweight += weight;
734 }
735 }
736
737 if (id == 0) {
738 for (id=-1; id_item.count(id); id--) ;
739 //err << "assigned id " << id << std::endl;
740 }
741
742 for (auto &i : class_id)
743 crush.class_bucket[id][i.first] = i.second;
744
745 if (verbose) err << "bucket " << name << " (" << id << ") " << size << " items and weight "
746 << (float)bucketweight / (float)0x10000 << std::endl;
747 id_item[id] = name;
748 item_id[name] = id;
749 item_weight[id] = bucketweight;
750
751 assert(id != 0);
752 int r = crush.add_bucket(id, alg, hash, type, size, &items[0], &weights[0], NULL);
753 if (r < 0) {
754 if (r == -EEXIST)
755 err << "Duplicate bucket id " << id << std::endl;
756 else
757 err << "add_bucket failed " << cpp_strerror(r) << std::endl;
758 return r;
759 }
760 r = crush.set_item_name(id, name.c_str());
761 return r;
762}
763
764int CrushCompiler::parse_rule(iter_t const& i)
765{
766 crush.populate_classes();
767
768 int start; // rule name is optional!
769
770 string rname = string_node(i->children[1]);
771 if (rname != "{") {
772 if (rule_id.count(rname)) {
773 err << "rule name '" << rname << "' already defined\n" << std::endl;
774 return -1;
775 }
776 start = 4;
777 } else {
778 rname = string();
779 start = 3;
780 }
781
782 int ruleset = int_node(i->children[start]);
783
784 string tname = string_node(i->children[start+2]);
785 int type;
786 if (tname == "replicated")
787 type = CEPH_PG_TYPE_REPLICATED;
788 else if (tname == "erasure")
789 type = CEPH_PG_TYPE_ERASURE;
790 else
791 ceph_abort();
792
793 int minsize = int_node(i->children[start+4]);
794 int maxsize = int_node(i->children[start+6]);
795
796 int steps = i->children.size() - start - 8;
797 //err << "num steps " << steps << std::endl;
798
799 int ruleno = crush.add_rule(steps, ruleset, type, minsize, maxsize, -1);
800 if (rname.length()) {
801 crush.set_rule_name(ruleno, rname.c_str());
802 rule_id[rname] = ruleno;
803 }
804
805 int step = 0;
806 for (iter_t p = i->children.begin() + start + 7; step < steps; p++) {
807 iter_t s = p->children.begin() + 1;
808 int stepid = s->value.id().to_long();
809 switch (stepid) {
810 case crush_grammar::_step_take:
811 {
812 string item = string_node(s->children[1]);
813 if (!item_id.count(item)) {
814 err << "in rule '" << rname << "' item '" << item << "' not defined" << std::endl;
815 return -1;
816 }
817 int id = item_id[item];
818 int c = -1;
819 string class_name;
820 if (s->children.size() > 2) {
821 class_name = string_node(s->children[3]);
822 c = crush.get_class_id(class_name);
823 if (c < 0)
824 return c;
825 if (crush.class_bucket.count(id) == 0) {
826 err << "in rule '" << rname << "' step take " << item
827 << " has no class information" << std::endl;
828 return -EINVAL;
829 }
830 if (crush.class_bucket[id].count(c) == 0) {
831 err << "in rule '" << rname << "' step take " << item
832 << " no matching bucket for class " << class_name << std::endl;
833 return -EINVAL;
834 }
835 id = crush.class_bucket[id][c];
836 }
837 if (verbose) {
838 err << "rule " << rname << " take " << item;
839 if (c < 0)
840 err << std::endl;
841 else
842 err << " remapped to " << crush.get_item_name(id) << std::endl;
843 }
844
845 crush.set_rule_step_take(ruleno, step++, id);
846 }
847 break;
848
849 case crush_grammar::_step_set_choose_tries:
850 {
851 int val = int_node(s->children[1]);
852 crush.set_rule_step_set_choose_tries(ruleno, step++, val);
853 }
854 break;
855
856 case crush_grammar::_step_set_choose_local_tries:
857 {
858 int val = int_node(s->children[1]);
859 crush.set_rule_step_set_choose_local_tries(ruleno, step++, val);
860 }
861 break;
862
863 case crush_grammar::_step_set_choose_local_fallback_tries:
864 {
865 int val = int_node(s->children[1]);
866 crush.set_rule_step_set_choose_local_fallback_tries(ruleno, step++, val);
867 }
868 break;
869
870 case crush_grammar::_step_set_chooseleaf_tries:
871 {
872 int val = int_node(s->children[1]);
873 crush.set_rule_step_set_chooseleaf_tries(ruleno, step++, val);
874 }
875 break;
876
877 case crush_grammar::_step_set_chooseleaf_vary_r:
878 {
879 int val = int_node(s->children[1]);
880 crush.set_rule_step_set_chooseleaf_vary_r(ruleno, step++, val);
881 }
882 break;
883
884 case crush_grammar::_step_set_chooseleaf_stable:
885 {
886 int val = int_node(s->children[1]);
887 crush.set_rule_step_set_chooseleaf_stable(ruleno, step++, val);
888 }
889 break;
890
891 case crush_grammar::_step_choose:
892 case crush_grammar::_step_chooseleaf:
893 {
894 string type = string_node(s->children[4]);
895 if (!type_id.count(type)) {
896 err << "in rule '" << rname << "' type '" << type << "' not defined" << std::endl;
897 return -1;
898 }
899 string choose = string_node(s->children[0]);
900 string mode = string_node(s->children[1]);
901 if (choose == "choose") {
902 if (mode == "firstn")
903 crush.set_rule_step_choose_firstn(ruleno, step++, int_node(s->children[2]), type_id[type]);
904 else if (mode == "indep")
905 crush.set_rule_step_choose_indep(ruleno, step++, int_node(s->children[2]), type_id[type]);
906 else ceph_abort();
907 } else if (choose == "chooseleaf") {
908 if (mode == "firstn")
909 crush.set_rule_step_choose_leaf_firstn(ruleno, step++, int_node(s->children[2]), type_id[type]);
910 else if (mode == "indep")
911 crush.set_rule_step_choose_leaf_indep(ruleno, step++, int_node(s->children[2]), type_id[type]);
912 else ceph_abort();
913 } else ceph_abort();
914 }
915 break;
916
917 case crush_grammar::_step_emit:
918 crush.set_rule_step_emit(ruleno, step++);
919 break;
920
921 default:
922 err << "bad crush step " << stepid << std::endl;
923 return -1;
924 }
925 }
926 assert(step == steps);
927 return 0;
928}
929
930int CrushCompiler::parse_weight_set_weights(iter_t const& i, int bucket_id, crush_weight_set *weight_set)
931{
932 // -2 for the enclosing [ ]
933 __u32 size = i->children.size() - 2;
934 __u32 bucket_size = crush.get_bucket_size(bucket_id);
935 if (size != bucket_size) {
936 err << bucket_id << " needs exactly " << bucket_size
937 << " weights but got " << size << std::endl;
938 return -1;
939 }
940 weight_set->size = size;
941 weight_set->weights = (__u32 *)calloc(weight_set->size, sizeof(__u32));
942 __u32 pos = 0;
943 for (iter_t p = i->children.begin() + 1; p != i->children.end(); p++, pos++)
944 if (pos < size)
945 weight_set->weights[pos] = float_node(*p) * (float)0x10000;
946 return 0;
947}
948
949int CrushCompiler::parse_weight_set(iter_t const& i, int bucket_id, crush_choose_arg *arg)
950{
951 // -3 stands for the leading "weight_set" keyword and the enclosing [ ]
952 arg->weight_set_size = i->children.size() - 3;
953 arg->weight_set = (crush_weight_set *)calloc(arg->weight_set_size, sizeof(crush_weight_set));
954 __u32 pos = 0;
955 for (iter_t p = i->children.begin(); p != i->children.end(); p++) {
956 int r = 0;
957 switch((int)p->value.id().to_long()) {
958 case crush_grammar::_weight_set_weights:
959 if (pos < arg->weight_set_size) {
960 r = parse_weight_set_weights(p, bucket_id, &arg->weight_set[pos]);
961 pos++;
962 } else {
963 err << "invalid weight_set syntax" << std::endl;
964 r = -1;
965 }
966 }
967 if (r < 0)
968 return r;
969 }
970 return 0;
971}
972
973int CrushCompiler::parse_choose_arg_ids(iter_t const& i, int bucket_id, crush_choose_arg *arg)
974{
975 // -3 for the leading "ids" keyword and the enclosing [ ]
976 __u32 size = i->children.size() - 3;
977 __u32 bucket_size = crush.get_bucket_size(bucket_id);
978 if (size != bucket_size) {
979 err << bucket_id << " needs exactly " << bucket_size
980 << " ids but got " << size << std::endl;
981 return -1;
982 }
983 arg->ids_size = size;
984 arg->ids = (int *)calloc(arg->ids_size, sizeof(int));
985 __u32 pos = 0;
986 for (iter_t p = i->children.begin() + 2; pos < size; p++, pos++)
987 arg->ids[pos] = int_node(*p);
988 return 0;
989}
990
991int CrushCompiler::parse_choose_arg(iter_t const& i, crush_choose_arg *args)
992{
993 int bucket_id = int_node(i->children[2]);
994 if (-1-bucket_id < 0 || -1-bucket_id >= crush.get_max_buckets()) {
995 err << bucket_id << " is out of range" << std::endl;
996 return -1;
997 }
998 if (!crush.bucket_exists(bucket_id)) {
999 err << bucket_id << " does not exist" << std::endl;
1000 return -1;
1001 }
1002 crush_choose_arg *arg = &args[-1-bucket_id];
1003 for (iter_t p = i->children.begin(); p != i->children.end(); p++) {
1004 int r = 0;
1005 switch((int)p->value.id().to_long()) {
1006 case crush_grammar::_weight_set:
1007 r = parse_weight_set(p, bucket_id, arg);
1008 break;
1009 case crush_grammar::_choose_arg_ids:
1010 r = parse_choose_arg_ids(p, bucket_id, arg);
1011 break;
1012 }
1013 if (r < 0)
1014 return r;
1015 }
1016 return 0;
1017}
1018
1019int CrushCompiler::parse_choose_args(iter_t const& i)
1020{
1021 int choose_arg_index = int_node(i->children[1]);
1022 if (crush.choose_args.find(choose_arg_index) != crush.choose_args.end()) {
1023 err << choose_arg_index << " duplicated" << std::endl;
1024 return -1;
1025 }
1026 crush_choose_arg_map arg_map;
1027 arg_map.size = crush.get_max_buckets();
1028 arg_map.args = (crush_choose_arg *)calloc(arg_map.size, sizeof(crush_choose_arg));
1029 for (iter_t p = i->children.begin() + 2; p != i->children.end(); p++) {
1030 int r = 0;
1031 switch((int)p->value.id().to_long()) {
1032 case crush_grammar::_choose_arg:
1033 r = parse_choose_arg(p, arg_map.args);
1034 break;
1035 }
1036 if (r < 0) {
1037 crush.destroy_choose_args(arg_map);
1038 return r;
1039 }
1040 }
1041 crush.choose_args[choose_arg_index] = arg_map;
1042 return 0;
1043}
1044
1045void CrushCompiler::find_used_bucket_ids(iter_t const& i)
1046{
1047 for (iter_t p = i->children.begin(); p != i->children.end(); p++) {
1048 if ((int)p->value.id().to_long() == crush_grammar::_bucket) {
1049 iter_t firstline = p->children.begin() + 3;
1050 string tag = string_node(firstline->children[0]);
1051 if (tag == "id") {
1052 int id = int_node(firstline->children[1]);
1053 //err << "saw bucket id " << id << std::endl;
1054 id_item[id] = string();
1055 }
1056 }
1057 }
1058}
1059
1060int CrushCompiler::parse_crush(iter_t const& i)
1061{
1062 find_used_bucket_ids(i);
1063
1064 for (iter_t p = i->children.begin(); p != i->children.end(); p++) {
1065 int r = 0;
1066 switch (p->value.id().to_long()) {
1067 case crush_grammar::_tunable:
1068 r = parse_tunable(p);
1069 break;
1070 case crush_grammar::_device:
1071 r = parse_device(p);
1072 break;
1073 case crush_grammar::_bucket_type:
1074 r = parse_bucket_type(p);
1075 break;
1076 case crush_grammar::_bucket:
1077 r = parse_bucket(p);
1078 break;
1079 case crush_grammar::_crushrule:
1080 r = parse_rule(p);
1081 break;
1082 case crush_grammar::_choose_args:
1083 r = parse_choose_args(p);
1084 break;
1085 default:
1086 ceph_abort();
1087 }
1088 if (r < 0) {
1089 return r;
1090 }
1091 }
1092
1093 //err << "max_devices " << crush.get_max_devices() << std::endl;
1094 crush.cleanup_classes();
1095 crush.finalize();
1096
1097 return 0;
1098}
1099
1100// squash runs of whitespace to one space, excepting newlines
1101string CrushCompiler::consolidate_whitespace(string in)
1102{
1103 string out;
1104
1105 bool white = false;
1106 for (unsigned p=0; p<in.length(); p++) {
1107 if (isspace(in[p]) && in[p] != '\n') {
1108 if (white)
1109 continue;
1110 white = true;
1111 } else {
1112 if (white) {
1113 if (out.length()) out += " ";
1114 white = false;
1115 }
1116 out += in[p];
1117 }
1118 }
1119 if (verbose > 3)
1120 err << " \"" << in << "\" -> \"" << out << "\"" << std::endl;
1121 return out;
1122}
1123
1124void CrushCompiler::dump(iter_t const& i, int ind)
1125{
1126 err << "dump";
1127 for (int j=0; j<ind; j++)
1128 cout << "\t";
1129 long id = i->value.id().to_long();
1130 err << id << "\t";
1131 err << "'" << string(i->value.begin(), i->value.end())
1132 << "' " << i->children.size() << " children" << std::endl;
1133 for (unsigned int j = 0; j < i->children.size(); j++)
1134 dump(i->children.begin() + j, ind+1);
1135}
1136
1137/**
1138* This function fix the problem like below
1139* rack using_foo { item foo }
1140* host foo { ... }
1141*
1142* if an item being used by a bucket is defined after that bucket.
1143* CRUSH compiler will create a map by which we can
1144* not identify that item when selecting in that bucket.
1145**/
1146int CrushCompiler::adjust_bucket_item_place(iter_t const &i)
1147{
1148 map<string,set<string> > bucket_items;
1149 map<string,iter_t> bucket_itrer;
1150 vector<string> buckets;
1151 for (iter_t p = i->children.begin(); p != i->children.end(); ++p) {
1152 if ((int)p->value.id().to_long() == crush_grammar::_bucket) {
1153 string name = string_node(p->children[1]);
1154 buckets.push_back(name);
1155 bucket_itrer[name] = p;
1156 //skip non-bucket-item children in the bucket's parse tree
1157 for (unsigned q=3; q < p->children.size()-1; ++q) {
1158 iter_t sub = p->children.begin() + q;
1159 if ((int)sub->value.id().to_long()
1160 == crush_grammar::_bucket_item) {
1161 string iname = string_node(sub->children[1]);
1162 bucket_items[name].insert(iname);
1163 }
1164 }
1165 }
1166 }
1167
1168 //adjust the bucket
1169 for (unsigned i=0; i < buckets.size(); ++i) {
1170 for (unsigned j=i+1; j < buckets.size(); ++j) {
1171 if (bucket_items[buckets[i]].count(buckets[j])) {
1172 if (bucket_items[buckets[j]].count(buckets[i])) {
1173 err << "bucket '" << buckets[i] << "' and bucket '"
1174 << buckets[j] << "' are included each other" << std::endl;
1175 return -1;
1176 } else {
1177 std::iter_swap(bucket_itrer[buckets[i]], bucket_itrer[buckets[j]]);
1178 }
1179 }
1180 }
1181 }
1182
1183 return 0;
1184}
1185
1186int CrushCompiler::compile(istream& in, const char *infn)
1187{
1188 if (!infn)
1189 infn = "<input>";
1190
1191 // always start with legacy tunables, so that the compiled result of
1192 // a given crush file is fixed for all time.
1193 crush.set_tunables_legacy();
1194
1195 string big;
1196 string str;
1197 int line = 1;
1198 map<int,int> line_pos; // pos -> line
1199 map<int,string> line_val;
1200 while (getline(in, str)) {
1201 // remove newline
1202 int l = str.length();
1203 if (l && str[l - 1] == '\n')
1204 str.erase(l-1, 1);
1205
1206 line_val[line] = str;
1207
1208 // strip comment
1209 int n = str.find("#");
1210 if (n >= 0)
1211 str.erase(n, str.length()-n);
1212
1213 if (verbose>1) err << line << ": " << str << std::endl;
1214
1215 // work around spirit crankiness by removing extraneous
1216 // whitespace. there is probably a more elegant solution, but
1217 // this only broke with the latest spirit (with the switchover to
1218 // "classic"), i don't want to spend too much time figuring it
1219 // out.
1220 string stripped = consolidate_whitespace(str);
1221 if (stripped.length() && big.length() && big[big.length()-1] != ' ') big += " ";
1222
1223 line_pos[big.length()] = line;
1224 line++;
1225 big += stripped;
1226 }
1227
1228 if (verbose > 2) err << "whole file is: \"" << big << "\"" << std::endl;
1229
1230 crush_grammar crushg;
1231 const char *start = big.c_str();
1232 //tree_parse_info<const char *> info = ast_parse(start, crushg, space_p);
1233 tree_parse_info<> info = ast_parse(start, crushg, space_p);
1234
1235 // parse error?
1236 if (!info.full) {
1237 int cpos = info.stop - start;
1238 //out << "cpos " << cpos << std::endl;
1239 //out << " linemap " << line_pos << std::endl;
1240 assert(!line_pos.empty());
1241 map<int,int>::iterator p = line_pos.upper_bound(cpos);
1242 if (p != line_pos.begin())
1243 --p;
1244 int line = p->second;
1245 int pos = cpos - p->first;
1246 err << infn << ":" << line //<< ":" << (pos+1)
1247 << " error: parse error at '" << line_val[line].substr(pos) << "'" << std::endl;
1248 return -1;
1249 }
1250
1251 int r = adjust_bucket_item_place(info.trees.begin());
1252 if (r < 0) {
1253 return r;
1254 }
1255 //out << "parsing succeeded\n";
1256 //dump(info.trees.begin());
1257 return parse_crush(info.trees.begin());
1258}