]> git.proxmox.com Git - ceph.git/blob - ceph/src/crush/builder.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / crush / builder.c
1 #include <string.h>
2 #include <limits.h>
3 #include <math.h>
4 #include <stdlib.h>
5 #include <stdio.h>
6 #include <assert.h>
7 #include <errno.h>
8
9 #include "include/int_types.h"
10
11 #include "builder.h"
12 #include "hash.h"
13
14 #define dprintk(args...) /* printf(args) */
15
16 #define BUG_ON(x) assert(!(x))
17
18 struct crush_map *crush_create()
19 {
20 struct crush_map *m;
21 m = malloc(sizeof(*m));
22 if (!m)
23 return NULL;
24 memset(m, 0, sizeof(*m));
25
26 set_optimal_crush_map(m);
27 return m;
28 }
29
30 /*
31 * finalize should be called _after_ all buckets are added to the map.
32 */
33 void crush_finalize(struct crush_map *map)
34 {
35 int b;
36 __u32 i;
37
38 /* Calculate the needed working space while we do other
39 finalization tasks. */
40 map->working_size = sizeof(struct crush_work);
41 /* Space for the array of pointers to per-bucket workspace */
42 map->working_size += map->max_buckets *
43 sizeof(struct crush_work_bucket *);
44
45 /* calc max_devices */
46 map->max_devices = 0;
47 for (b=0; b<map->max_buckets; b++) {
48 if (map->buckets[b] == 0)
49 continue;
50 for (i=0; i<map->buckets[b]->size; i++)
51 if (map->buckets[b]->items[i] >= map->max_devices)
52 map->max_devices = map->buckets[b]->items[i] + 1;
53
54 switch (map->buckets[b]->alg) {
55 default:
56 /* The base case, permutation variables and
57 the pointer to the permutation array. */
58 map->working_size += sizeof(struct crush_work_bucket);
59 break;
60 }
61 /* Every bucket has a permutation array. */
62 map->working_size += map->buckets[b]->size * sizeof(__u32);
63 }
64 }
65
66
67
68 /** rules **/
69
70 int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno)
71 {
72 __u32 r;
73
74 if (ruleno < 0) {
75 for (r=0; r < map->max_rules; r++)
76 if (map->rules[r] == 0)
77 break;
78 assert(r < CRUSH_MAX_RULES);
79 }
80 else
81 r = ruleno;
82
83 if (r >= map->max_rules) {
84 /* expand array */
85 int oldsize;
86 void *_realloc = NULL;
87 if (map->max_rules +1 > CRUSH_MAX_RULES)
88 return -ENOSPC;
89 oldsize = map->max_rules;
90 map->max_rules = r+1;
91 if ((_realloc = realloc(map->rules, map->max_rules * sizeof(map->rules[0]))) == NULL) {
92 return -ENOMEM;
93 } else {
94 map->rules = _realloc;
95 }
96 memset(map->rules + oldsize, 0, (map->max_rules-oldsize) * sizeof(map->rules[0]));
97 }
98
99 /* add it */
100 map->rules[r] = rule;
101 return r;
102 }
103
104 struct crush_rule *crush_make_rule(int len, int ruleset, int type, int minsize, int maxsize)
105 {
106 struct crush_rule *rule;
107 rule = malloc(crush_rule_size(len));
108 if (!rule)
109 return NULL;
110 rule->len = len;
111 rule->mask.ruleset = ruleset;
112 rule->mask.type = type;
113 rule->mask.min_size = minsize;
114 rule->mask.max_size = maxsize;
115 return rule;
116 }
117
118 /*
119 * be careful; this doesn't verify that the buffer you allocated is big enough!
120 */
121 void crush_rule_set_step(struct crush_rule *rule, int n, int op, int arg1, int arg2)
122 {
123 assert((__u32)n < rule->len);
124 rule->steps[n].op = op;
125 rule->steps[n].arg1 = arg1;
126 rule->steps[n].arg2 = arg2;
127 }
128
129
130 /** buckets **/
131 int crush_get_next_bucket_id(struct crush_map *map)
132 {
133 int pos;
134 for (pos=0; pos < map->max_buckets; pos++)
135 if (map->buckets[pos] == 0)
136 break;
137 return -1 - pos;
138 }
139
140
141 int crush_add_bucket(struct crush_map *map,
142 int id,
143 struct crush_bucket *bucket,
144 int *idout)
145 {
146 int pos;
147
148 /* find a bucket id */
149 if (id == 0)
150 id = crush_get_next_bucket_id(map);
151 pos = -1 - id;
152
153 while (pos >= map->max_buckets) {
154 /* expand array */
155 int oldsize = map->max_buckets;
156 if (map->max_buckets)
157 map->max_buckets *= 2;
158 else
159 map->max_buckets = 8;
160 void *_realloc = NULL;
161 if ((_realloc = realloc(map->buckets, map->max_buckets * sizeof(map->buckets[0]))) == NULL) {
162 return -ENOMEM;
163 } else {
164 map->buckets = _realloc;
165 }
166 memset(map->buckets + oldsize, 0, (map->max_buckets-oldsize) * sizeof(map->buckets[0]));
167 }
168
169 if (map->buckets[pos] != 0) {
170 return -EEXIST;
171 }
172
173 /* add it */
174 bucket->id = id;
175 map->buckets[pos] = bucket;
176
177 if (idout) *idout = id;
178 return 0;
179 }
180
181 int crush_remove_bucket(struct crush_map *map, struct crush_bucket *bucket)
182 {
183 int pos = -1 - bucket->id;
184 assert(pos < map->max_buckets);
185 map->buckets[pos] = NULL;
186 crush_destroy_bucket(bucket);
187 return 0;
188 }
189
190
191 /* uniform bucket */
192
193 struct crush_bucket_uniform *
194 crush_make_uniform_bucket(int hash, int type, int size,
195 int *items,
196 int item_weight)
197 {
198 int i;
199 struct crush_bucket_uniform *bucket;
200
201 bucket = malloc(sizeof(*bucket));
202 if (!bucket)
203 return NULL;
204 memset(bucket, 0, sizeof(*bucket));
205 bucket->h.alg = CRUSH_BUCKET_UNIFORM;
206 bucket->h.hash = hash;
207 bucket->h.type = type;
208 bucket->h.size = size;
209
210 if (crush_multiplication_is_unsafe(size, item_weight))
211 goto err;
212
213 bucket->h.weight = size * item_weight;
214 bucket->item_weight = item_weight;
215 bucket->h.items = malloc(sizeof(__s32)*size);
216
217 if (!bucket->h.items)
218 goto err;
219
220 for (i=0; i<size; i++)
221 bucket->h.items[i] = items[i];
222
223 return bucket;
224 err:
225 free(bucket->h.items);
226 free(bucket);
227 return NULL;
228 }
229
230
231 /* list bucket */
232
233 struct crush_bucket_list*
234 crush_make_list_bucket(int hash, int type, int size,
235 int *items,
236 int *weights)
237 {
238 int i;
239 int w;
240 struct crush_bucket_list *bucket;
241
242 bucket = malloc(sizeof(*bucket));
243 if (!bucket)
244 return NULL;
245 memset(bucket, 0, sizeof(*bucket));
246 bucket->h.alg = CRUSH_BUCKET_LIST;
247 bucket->h.hash = hash;
248 bucket->h.type = type;
249 bucket->h.size = size;
250
251 bucket->h.items = malloc(sizeof(__s32)*size);
252 if (!bucket->h.items)
253 goto err;
254
255
256 bucket->item_weights = malloc(sizeof(__u32)*size);
257 if (!bucket->item_weights)
258 goto err;
259 bucket->sum_weights = malloc(sizeof(__u32)*size);
260 if (!bucket->sum_weights)
261 goto err;
262 w = 0;
263 for (i=0; i<size; i++) {
264 bucket->h.items[i] = items[i];
265 bucket->item_weights[i] = weights[i];
266
267 if (crush_addition_is_unsafe(w, weights[i]))
268 goto err;
269
270 w += weights[i];
271 bucket->sum_weights[i] = w;
272 /*dprintk("pos %d item %d weight %d sum %d\n",
273 i, items[i], weights[i], bucket->sum_weights[i]);*/
274 }
275
276 bucket->h.weight = w;
277
278 return bucket;
279 err:
280 free(bucket->sum_weights);
281 free(bucket->item_weights);
282 free(bucket->h.items);
283 free(bucket);
284 return NULL;
285 }
286
287
288 /* tree bucket */
289
290 static int height(int n) {
291 int h = 0;
292 while ((n & 1) == 0) {
293 h++;
294 n = n >> 1;
295 }
296 return h;
297 }
298 static int on_right(int n, int h) {
299 return n & (1 << (h+1));
300 }
301 static int parent(int n)
302 {
303 int h = height(n);
304 if (on_right(n, h))
305 return n - (1<<h);
306 else
307 return n + (1<<h);
308 }
309
310 static int calc_depth(int size)
311 {
312 if (size == 0) {
313 return 0;
314 }
315
316 int depth = 1;
317 int t = size - 1;
318 while (t) {
319 t = t >> 1;
320 depth++;
321 }
322 return depth;
323 }
324
325 struct crush_bucket_tree*
326 crush_make_tree_bucket(int hash, int type, int size,
327 int *items, /* in leaf order */
328 int *weights)
329 {
330 struct crush_bucket_tree *bucket;
331 int depth;
332 int node;
333 int i, j;
334
335 bucket = malloc(sizeof(*bucket));
336 if (!bucket)
337 return NULL;
338 memset(bucket, 0, sizeof(*bucket));
339 bucket->h.alg = CRUSH_BUCKET_TREE;
340 bucket->h.hash = hash;
341 bucket->h.type = type;
342 bucket->h.size = size;
343
344 if (size == 0) {
345 bucket->h.items = NULL;
346 bucket->h.weight = 0;
347 bucket->node_weights = NULL;
348 bucket->num_nodes = 0;
349 /* printf("size 0 depth 0 nodes 0\n"); */
350 return bucket;
351 }
352
353 bucket->h.items = malloc(sizeof(__s32)*size);
354 if (!bucket->h.items)
355 goto err;
356
357 /* calc tree depth */
358 depth = calc_depth(size);
359 bucket->num_nodes = 1 << depth;
360 dprintk("size %d depth %d nodes %d\n", size, depth, bucket->num_nodes);
361
362 bucket->node_weights = malloc(sizeof(__u32)*bucket->num_nodes);
363 if (!bucket->node_weights)
364 goto err;
365
366 memset(bucket->h.items, 0, sizeof(__s32)*bucket->h.size);
367 memset(bucket->node_weights, 0, sizeof(__u32)*bucket->num_nodes);
368
369 for (i=0; i<size; i++) {
370 bucket->h.items[i] = items[i];
371 node = crush_calc_tree_node(i);
372 dprintk("item %d node %d weight %d\n", i, node, weights[i]);
373 bucket->node_weights[node] = weights[i];
374
375 if (crush_addition_is_unsafe(bucket->h.weight, weights[i]))
376 goto err;
377
378 bucket->h.weight += weights[i];
379 for (j=1; j<depth; j++) {
380 node = parent(node);
381
382 if (crush_addition_is_unsafe(bucket->node_weights[node], weights[i]))
383 goto err;
384
385 bucket->node_weights[node] += weights[i];
386 dprintk(" node %d weight %d\n", node, bucket->node_weights[node]);
387 }
388 }
389 BUG_ON(bucket->node_weights[bucket->num_nodes/2] != bucket->h.weight);
390
391 return bucket;
392 err:
393 free(bucket->node_weights);
394 free(bucket->h.items);
395 free(bucket);
396 return NULL;
397 }
398
399
400
401 /* straw bucket */
402
403 /*
404 * this code was written 8 years ago. i have a vague recollection of
405 * drawing boxes underneath bars of different lengths, where the bar
406 * length represented the probability/weight, and that there was some
407 * trial and error involved in arriving at this implementation.
408 * however, reading the code now after all this time, the intuition
409 * that motivated is lost on me. lame. my only excuse is that I now
410 * know that the approach is fundamentally flawed and am not
411 * particularly motivated to reconstruct the flawed reasoning.
412 *
413 * as best as i can remember, the idea is: sort the weights, and start
414 * with the smallest. arbitrarily scale it at 1.0 (16-bit fixed
415 * point). look at the next larger weight, and calculate the scaling
416 * factor for that straw based on the relative difference in weight so
417 * far. what's not clear to me now is why we are looking at wnext
418 * (the delta to the next bigger weight) for all remaining weights,
419 * and slicing things horizontally instead of considering just the
420 * next item or set of items. or why pow() is used the way it is.
421 *
422 * note that the original version 1 of this function made special
423 * accomodation for the case where straw lengths were identical. this
424 * is also flawed in a non-obvious way; version 2 drops the special
425 * handling and appears to work just as well.
426 *
427 * moral of the story: if you do something clever, write down why it
428 * works.
429 */
430 int crush_calc_straw(struct crush_map *map, struct crush_bucket_straw *bucket)
431 {
432 int *reverse;
433 int i, j, k;
434 double straw, wbelow, lastw, wnext, pbelow;
435 int numleft;
436 int size = bucket->h.size;
437 __u32 *weights = bucket->item_weights;
438
439 /* reverse sort by weight (simple insertion sort) */
440 reverse = malloc(sizeof(int) * size);
441 if (!reverse)
442 return -ENOMEM;
443 if (size)
444 reverse[0] = 0;
445 for (i=1; i<size; i++) {
446 for (j=0; j<i; j++) {
447 if (weights[i] < weights[reverse[j]]) {
448 /* insert here */
449 for (k=i; k>j; k--)
450 reverse[k] = reverse[k-1];
451 reverse[j] = i;
452 break;
453 }
454 }
455 if (j == i)
456 reverse[i] = i;
457 }
458
459 numleft = size;
460 straw = 1.0;
461 wbelow = 0;
462 lastw = 0;
463
464 i=0;
465 while (i < size) {
466 if (map->straw_calc_version == 0) {
467 /* zero weight items get 0 length straws! */
468 if (weights[reverse[i]] == 0) {
469 bucket->straws[reverse[i]] = 0;
470 i++;
471 continue;
472 }
473
474 /* set this item's straw */
475 bucket->straws[reverse[i]] = straw * 0x10000;
476 dprintk("item %d at %d weight %d straw %d (%lf)\n",
477 bucket->h.items[reverse[i]],
478 reverse[i], weights[reverse[i]],
479 bucket->straws[reverse[i]], straw);
480 i++;
481 if (i == size)
482 break;
483
484 /* same weight as previous? */
485 if (weights[reverse[i]] == weights[reverse[i-1]]) {
486 dprintk("same as previous\n");
487 continue;
488 }
489
490 /* adjust straw for next guy */
491 wbelow += ((double)weights[reverse[i-1]] - lastw) *
492 numleft;
493 for (j=i; j<size; j++)
494 if (weights[reverse[j]] == weights[reverse[i]])
495 numleft--;
496 else
497 break;
498 wnext = numleft * (weights[reverse[i]] -
499 weights[reverse[i-1]]);
500 pbelow = wbelow / (wbelow + wnext);
501 dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n",
502 wbelow, wnext, pbelow, numleft);
503
504 straw *= pow((double)1.0 / pbelow, (double)1.0 /
505 (double)numleft);
506
507 lastw = weights[reverse[i-1]];
508 } else if (map->straw_calc_version >= 1) {
509 /* zero weight items get 0 length straws! */
510 if (weights[reverse[i]] == 0) {
511 bucket->straws[reverse[i]] = 0;
512 i++;
513 numleft--;
514 continue;
515 }
516
517 /* set this item's straw */
518 bucket->straws[reverse[i]] = straw * 0x10000;
519 dprintk("item %d at %d weight %d straw %d (%lf)\n",
520 bucket->h.items[reverse[i]],
521 reverse[i], weights[reverse[i]],
522 bucket->straws[reverse[i]], straw);
523 i++;
524 if (i == size)
525 break;
526
527 /* adjust straw for next guy */
528 wbelow += ((double)weights[reverse[i-1]] - lastw) *
529 numleft;
530 numleft--;
531 wnext = numleft * (weights[reverse[i]] -
532 weights[reverse[i-1]]);
533 pbelow = wbelow / (wbelow + wnext);
534 dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n",
535 wbelow, wnext, pbelow, numleft);
536
537 straw *= pow((double)1.0 / pbelow, (double)1.0 /
538 (double)numleft);
539
540 lastw = weights[reverse[i-1]];
541 }
542 }
543
544 free(reverse);
545 return 0;
546 }
547
548 struct crush_bucket_straw *
549 crush_make_straw_bucket(struct crush_map *map,
550 int hash,
551 int type,
552 int size,
553 int *items,
554 int *weights)
555 {
556 struct crush_bucket_straw *bucket;
557 int i;
558
559 bucket = malloc(sizeof(*bucket));
560 if (!bucket)
561 return NULL;
562 memset(bucket, 0, sizeof(*bucket));
563 bucket->h.alg = CRUSH_BUCKET_STRAW;
564 bucket->h.hash = hash;
565 bucket->h.type = type;
566 bucket->h.size = size;
567
568 bucket->h.items = malloc(sizeof(__s32)*size);
569 if (!bucket->h.items)
570 goto err;
571 bucket->item_weights = malloc(sizeof(__u32)*size);
572 if (!bucket->item_weights)
573 goto err;
574 bucket->straws = malloc(sizeof(__u32)*size);
575 if (!bucket->straws)
576 goto err;
577
578 bucket->h.weight = 0;
579 for (i=0; i<size; i++) {
580 bucket->h.items[i] = items[i];
581 bucket->h.weight += weights[i];
582 bucket->item_weights[i] = weights[i];
583 }
584
585 if (crush_calc_straw(map, bucket) < 0)
586 goto err;
587
588 return bucket;
589 err:
590 free(bucket->straws);
591 free(bucket->item_weights);
592 free(bucket->h.items);
593 free(bucket);
594 return NULL;
595 }
596
597 struct crush_bucket_straw2 *
598 crush_make_straw2_bucket(struct crush_map *map,
599 int hash,
600 int type,
601 int size,
602 int *items,
603 int *weights)
604 {
605 struct crush_bucket_straw2 *bucket;
606 int i;
607
608 bucket = malloc(sizeof(*bucket));
609 if (!bucket)
610 return NULL;
611 memset(bucket, 0, sizeof(*bucket));
612 bucket->h.alg = CRUSH_BUCKET_STRAW2;
613 bucket->h.hash = hash;
614 bucket->h.type = type;
615 bucket->h.size = size;
616
617 bucket->h.items = malloc(sizeof(__s32)*size);
618 if (!bucket->h.items)
619 goto err;
620 bucket->item_weights = malloc(sizeof(__u32)*size);
621 if (!bucket->item_weights)
622 goto err;
623
624 bucket->h.weight = 0;
625 for (i=0; i<size; i++) {
626 bucket->h.items[i] = items[i];
627 bucket->h.weight += weights[i];
628 bucket->item_weights[i] = weights[i];
629 }
630
631 return bucket;
632 err:
633 free(bucket->item_weights);
634 free(bucket->h.items);
635 free(bucket);
636 return NULL;
637 }
638
639
640
641 struct crush_bucket*
642 crush_make_bucket(struct crush_map *map,
643 int alg, int hash, int type, int size,
644 int *items,
645 int *weights)
646 {
647 int item_weight;
648
649 switch (alg) {
650 case CRUSH_BUCKET_UNIFORM:
651 if (size && weights)
652 item_weight = weights[0];
653 else
654 item_weight = 0;
655 return (struct crush_bucket *)crush_make_uniform_bucket(hash, type, size, items, item_weight);
656
657 case CRUSH_BUCKET_LIST:
658 return (struct crush_bucket *)crush_make_list_bucket(hash, type, size, items, weights);
659
660 case CRUSH_BUCKET_TREE:
661 return (struct crush_bucket *)crush_make_tree_bucket(hash, type, size, items, weights);
662
663 case CRUSH_BUCKET_STRAW:
664 return (struct crush_bucket *)crush_make_straw_bucket(map, hash, type, size, items, weights);
665 case CRUSH_BUCKET_STRAW2:
666 return (struct crush_bucket *)crush_make_straw2_bucket(map, hash, type, size, items, weights);
667 }
668 return 0;
669 }
670
671
672 /************************************************/
673
674 int crush_add_uniform_bucket_item(struct crush_bucket_uniform *bucket, int item, int weight)
675 {
676 int newsize = bucket->h.size + 1;
677 void *_realloc = NULL;
678
679 /* In such situation 'CRUSH_BUCKET_UNIFORM', the weight
680 provided for the item should be the same as
681 bucket->item_weight defined with 'crush_make_bucket'. This
682 assumption is enforced by the return value which is always
683 0. */
684 if (bucket->item_weight != weight) {
685 return -EINVAL;
686 }
687
688 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
689 return -ENOMEM;
690 } else {
691 bucket->h.items = _realloc;
692 }
693
694 bucket->h.items[newsize-1] = item;
695
696 if (crush_addition_is_unsafe(bucket->h.weight, weight))
697 return -ERANGE;
698
699 bucket->h.weight += weight;
700 bucket->h.size++;
701
702 return 0;
703 }
704
705 int crush_add_list_bucket_item(struct crush_bucket_list *bucket, int item, int weight)
706 {
707 int newsize = bucket->h.size + 1;
708 void *_realloc = NULL;
709
710 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
711 return -ENOMEM;
712 } else {
713 bucket->h.items = _realloc;
714 }
715 if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
716 return -ENOMEM;
717 } else {
718 bucket->item_weights = _realloc;
719 }
720 if ((_realloc = realloc(bucket->sum_weights, sizeof(__u32)*newsize)) == NULL) {
721 return -ENOMEM;
722 } else {
723 bucket->sum_weights = _realloc;
724 }
725
726 bucket->h.items[newsize-1] = item;
727 bucket->item_weights[newsize-1] = weight;
728 if (newsize > 1) {
729
730 if (crush_addition_is_unsafe(bucket->sum_weights[newsize-2], weight))
731 return -ERANGE;
732
733 bucket->sum_weights[newsize-1] = bucket->sum_weights[newsize-2] + weight;
734 }
735
736 else {
737 bucket->sum_weights[newsize-1] = weight;
738 }
739
740 bucket->h.weight += weight;
741 bucket->h.size++;
742 return 0;
743 }
744
745 int crush_add_tree_bucket_item(struct crush_bucket_tree *bucket, int item, int weight)
746 {
747 int newsize = bucket->h.size + 1;
748 int depth = calc_depth(newsize);;
749 int node;
750 int j;
751 void *_realloc = NULL;
752
753 bucket->num_nodes = 1 << depth;
754
755 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
756 return -ENOMEM;
757 } else {
758 bucket->h.items = _realloc;
759 }
760 if ((_realloc = realloc(bucket->node_weights, sizeof(__u32)*bucket->num_nodes)) == NULL) {
761 return -ENOMEM;
762 } else {
763 bucket->node_weights = _realloc;
764 }
765
766 node = crush_calc_tree_node(newsize-1);
767 bucket->node_weights[node] = weight;
768
769 /* if the depth increase, we need to initialize the new root node's weight before add bucket item */
770 int root = bucket->num_nodes/2;
771 if (depth >= 2 && (node - 1) == root) {
772 /* if the new item is the first node in right sub tree, so
773 * the root node initial weight is left sub tree's weight
774 */
775 bucket->node_weights[root] = bucket->node_weights[root/2];
776 }
777
778 for (j=1; j<depth; j++) {
779 node = parent(node);
780
781 if (crush_addition_is_unsafe(bucket->node_weights[node], weight))
782 return -ERANGE;
783
784 bucket->node_weights[node] += weight;
785 dprintk(" node %d weight %d\n", node, bucket->node_weights[node]);
786 }
787
788
789 if (crush_addition_is_unsafe(bucket->h.weight, weight))
790 return -ERANGE;
791
792 bucket->h.items[newsize-1] = item;
793 bucket->h.weight += weight;
794 bucket->h.size++;
795
796 return 0;
797 }
798
799 int crush_add_straw_bucket_item(struct crush_map *map,
800 struct crush_bucket_straw *bucket,
801 int item, int weight)
802 {
803 int newsize = bucket->h.size + 1;
804
805 void *_realloc = NULL;
806
807 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
808 return -ENOMEM;
809 } else {
810 bucket->h.items = _realloc;
811 }
812 if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
813 return -ENOMEM;
814 } else {
815 bucket->item_weights = _realloc;
816 }
817 if ((_realloc = realloc(bucket->straws, sizeof(__u32)*newsize)) == NULL) {
818 return -ENOMEM;
819 } else {
820 bucket->straws = _realloc;
821 }
822
823 bucket->h.items[newsize-1] = item;
824 bucket->item_weights[newsize-1] = weight;
825
826 if (crush_addition_is_unsafe(bucket->h.weight, weight))
827 return -ERANGE;
828
829 bucket->h.weight += weight;
830 bucket->h.size++;
831
832 return crush_calc_straw(map, bucket);
833 }
834
835 int crush_add_straw2_bucket_item(struct crush_map *map,
836 struct crush_bucket_straw2 *bucket,
837 int item, int weight)
838 {
839 int newsize = bucket->h.size + 1;
840
841 void *_realloc = NULL;
842
843 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
844 return -ENOMEM;
845 } else {
846 bucket->h.items = _realloc;
847 }
848 if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
849 return -ENOMEM;
850 } else {
851 bucket->item_weights = _realloc;
852 }
853
854 bucket->h.items[newsize-1] = item;
855 bucket->item_weights[newsize-1] = weight;
856
857 if (crush_addition_is_unsafe(bucket->h.weight, weight))
858 return -ERANGE;
859
860 bucket->h.weight += weight;
861 bucket->h.size++;
862
863 return 0;
864 }
865
866 int crush_bucket_add_item(struct crush_map *map,
867 struct crush_bucket *b, int item, int weight)
868 {
869 switch (b->alg) {
870 case CRUSH_BUCKET_UNIFORM:
871 return crush_add_uniform_bucket_item((struct crush_bucket_uniform *)b, item, weight);
872 case CRUSH_BUCKET_LIST:
873 return crush_add_list_bucket_item((struct crush_bucket_list *)b, item, weight);
874 case CRUSH_BUCKET_TREE:
875 return crush_add_tree_bucket_item((struct crush_bucket_tree *)b, item, weight);
876 case CRUSH_BUCKET_STRAW:
877 return crush_add_straw_bucket_item(map, (struct crush_bucket_straw *)b, item, weight);
878 case CRUSH_BUCKET_STRAW2:
879 return crush_add_straw2_bucket_item(map, (struct crush_bucket_straw2 *)b, item, weight);
880 default:
881 return -1;
882 }
883 }
884
885 /************************************************/
886
887 int crush_remove_uniform_bucket_item(struct crush_bucket_uniform *bucket, int item)
888 {
889 unsigned i, j;
890 int newsize;
891 void *_realloc = NULL;
892
893 for (i = 0; i < bucket->h.size; i++)
894 if (bucket->h.items[i] == item)
895 break;
896 if (i == bucket->h.size)
897 return -ENOENT;
898
899 for (j = i; j < bucket->h.size; j++)
900 bucket->h.items[j] = bucket->h.items[j+1];
901 newsize = --bucket->h.size;
902 if (bucket->item_weight < bucket->h.weight)
903 bucket->h.weight -= bucket->item_weight;
904 else
905 bucket->h.weight = 0;
906
907 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
908 return -ENOMEM;
909 } else {
910 bucket->h.items = _realloc;
911 }
912 return 0;
913 }
914
915 int crush_remove_list_bucket_item(struct crush_bucket_list *bucket, int item)
916 {
917 unsigned i, j;
918 int newsize;
919 unsigned weight;
920
921 for (i = 0; i < bucket->h.size; i++)
922 if (bucket->h.items[i] == item)
923 break;
924 if (i == bucket->h.size)
925 return -ENOENT;
926
927 weight = bucket->item_weights[i];
928 for (j = i; j < bucket->h.size; j++) {
929 bucket->h.items[j] = bucket->h.items[j+1];
930 bucket->item_weights[j] = bucket->item_weights[j+1];
931 bucket->sum_weights[j] = bucket->sum_weights[j+1] - weight;
932 }
933 if (weight < bucket->h.weight)
934 bucket->h.weight -= weight;
935 else
936 bucket->h.weight = 0;
937 newsize = --bucket->h.size;
938
939 void *_realloc = NULL;
940
941 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
942 return -ENOMEM;
943 } else {
944 bucket->h.items = _realloc;
945 }
946 if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
947 return -ENOMEM;
948 } else {
949 bucket->item_weights = _realloc;
950 }
951 if ((_realloc = realloc(bucket->sum_weights, sizeof(__u32)*newsize)) == NULL) {
952 return -ENOMEM;
953 } else {
954 bucket->sum_weights = _realloc;
955 }
956 return 0;
957 }
958
959 int crush_remove_tree_bucket_item(struct crush_bucket_tree *bucket, int item)
960 {
961 unsigned i;
962 unsigned newsize;
963
964 for (i = 0; i < bucket->h.size; i++) {
965 int node;
966 unsigned weight;
967 int j;
968 int depth = calc_depth(bucket->h.size);
969
970 if (bucket->h.items[i] != item)
971 continue;
972
973 bucket->h.items[i] = 0;
974 node = crush_calc_tree_node(i);
975 weight = bucket->node_weights[node];
976 bucket->node_weights[node] = 0;
977
978 for (j = 1; j < depth; j++) {
979 node = parent(node);
980 bucket->node_weights[node] -= weight;
981 dprintk(" node %d weight %d\n", node, bucket->node_weights[node]);
982 }
983 if (weight < bucket->h.weight)
984 bucket->h.weight -= weight;
985 else
986 bucket->h.weight = 0;
987 break;
988 }
989 if (i == bucket->h.size)
990 return -ENOENT;
991
992 newsize = bucket->h.size;
993 while (newsize > 0) {
994 int node = crush_calc_tree_node(newsize - 1);
995 if (bucket->node_weights[node])
996 break;
997 --newsize;
998 }
999
1000 if (newsize != bucket->h.size) {
1001 int olddepth, newdepth;
1002
1003 void *_realloc = NULL;
1004
1005 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
1006 return -ENOMEM;
1007 } else {
1008 bucket->h.items = _realloc;
1009 }
1010
1011 olddepth = calc_depth(bucket->h.size);
1012 newdepth = calc_depth(newsize);
1013 if (olddepth != newdepth) {
1014 bucket->num_nodes = 1 << newdepth;
1015 if ((_realloc = realloc(bucket->node_weights,
1016 sizeof(__u32)*bucket->num_nodes)) == NULL) {
1017 return -ENOMEM;
1018 } else {
1019 bucket->node_weights = _realloc;
1020 }
1021 }
1022
1023 bucket->h.size = newsize;
1024 }
1025 return 0;
1026 }
1027
1028 int crush_remove_straw_bucket_item(struct crush_map *map,
1029 struct crush_bucket_straw *bucket, int item)
1030 {
1031 int newsize = bucket->h.size - 1;
1032 unsigned i, j;
1033
1034 for (i = 0; i < bucket->h.size; i++) {
1035 if (bucket->h.items[i] == item) {
1036 bucket->h.size--;
1037 if (bucket->item_weights[i] < bucket->h.weight)
1038 bucket->h.weight -= bucket->item_weights[i];
1039 else
1040 bucket->h.weight = 0;
1041 for (j = i; j < bucket->h.size; j++) {
1042 bucket->h.items[j] = bucket->h.items[j+1];
1043 bucket->item_weights[j] = bucket->item_weights[j+1];
1044 }
1045 break;
1046 }
1047 }
1048 if (i == bucket->h.size)
1049 return -ENOENT;
1050
1051 void *_realloc = NULL;
1052
1053 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
1054 return -ENOMEM;
1055 } else {
1056 bucket->h.items = _realloc;
1057 }
1058 if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
1059 return -ENOMEM;
1060 } else {
1061 bucket->item_weights = _realloc;
1062 }
1063 if ((_realloc = realloc(bucket->straws, sizeof(__u32)*newsize)) == NULL) {
1064 return -ENOMEM;
1065 } else {
1066 bucket->straws = _realloc;
1067 }
1068
1069 return crush_calc_straw(map, bucket);
1070 }
1071
1072 int crush_remove_straw2_bucket_item(struct crush_map *map,
1073 struct crush_bucket_straw2 *bucket, int item)
1074 {
1075 int newsize = bucket->h.size - 1;
1076 unsigned i, j;
1077
1078 for (i = 0; i < bucket->h.size; i++) {
1079 if (bucket->h.items[i] == item) {
1080 bucket->h.size--;
1081 if (bucket->item_weights[i] < bucket->h.weight)
1082 bucket->h.weight -= bucket->item_weights[i];
1083 else
1084 bucket->h.weight = 0;
1085 for (j = i; j < bucket->h.size; j++) {
1086 bucket->h.items[j] = bucket->h.items[j+1];
1087 bucket->item_weights[j] = bucket->item_weights[j+1];
1088 }
1089 break;
1090 }
1091 }
1092 if (i == bucket->h.size)
1093 return -ENOENT;
1094
1095 void *_realloc = NULL;
1096
1097 if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
1098 return -ENOMEM;
1099 } else {
1100 bucket->h.items = _realloc;
1101 }
1102 if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
1103 return -ENOMEM;
1104 } else {
1105 bucket->item_weights = _realloc;
1106 }
1107
1108 return 0;
1109 }
1110
1111 int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *b, int item)
1112 {
1113 switch (b->alg) {
1114 case CRUSH_BUCKET_UNIFORM:
1115 return crush_remove_uniform_bucket_item((struct crush_bucket_uniform *)b, item);
1116 case CRUSH_BUCKET_LIST:
1117 return crush_remove_list_bucket_item((struct crush_bucket_list *)b, item);
1118 case CRUSH_BUCKET_TREE:
1119 return crush_remove_tree_bucket_item((struct crush_bucket_tree *)b, item);
1120 case CRUSH_BUCKET_STRAW:
1121 return crush_remove_straw_bucket_item(map, (struct crush_bucket_straw *)b, item);
1122 case CRUSH_BUCKET_STRAW2:
1123 return crush_remove_straw2_bucket_item(map, (struct crush_bucket_straw2 *)b, item);
1124 default:
1125 return -1;
1126 }
1127 }
1128
1129
1130 /************************************************/
1131
1132 int crush_adjust_uniform_bucket_item_weight(struct crush_bucket_uniform *bucket, int item, int weight)
1133 {
1134 int diff = (weight - bucket->item_weight) * bucket->h.size;
1135
1136 bucket->item_weight = weight;
1137 bucket->h.weight = bucket->item_weight * bucket->h.size;
1138
1139 return diff;
1140 }
1141
1142 int crush_adjust_list_bucket_item_weight(struct crush_bucket_list *bucket, int item, int weight)
1143 {
1144 int diff;
1145 unsigned i, j;
1146
1147 for (i = 0; i < bucket->h.size; i++) {
1148 if (bucket->h.items[i] == item)
1149 break;
1150 }
1151 if (i == bucket->h.size)
1152 return 0;
1153
1154 diff = weight - bucket->item_weights[i];
1155 bucket->item_weights[i] = weight;
1156 bucket->h.weight += diff;
1157
1158 for (j = i; j < bucket->h.size; j++)
1159 bucket->sum_weights[j] += diff;
1160
1161 return diff;
1162 }
1163
1164 int crush_adjust_tree_bucket_item_weight(struct crush_bucket_tree *bucket, int item, int weight)
1165 {
1166 int diff;
1167 int node;
1168 unsigned i, j;
1169 unsigned depth = calc_depth(bucket->h.size);
1170
1171 for (i = 0; i < bucket->h.size; i++) {
1172 if (bucket->h.items[i] == item)
1173 break;
1174 }
1175 if (i == bucket->h.size)
1176 return 0;
1177
1178 node = crush_calc_tree_node(i);
1179 diff = weight - bucket->node_weights[node];
1180 bucket->node_weights[node] = weight;
1181 bucket->h.weight += diff;
1182
1183 for (j=1; j<depth; j++) {
1184 node = parent(node);
1185 bucket->node_weights[node] += diff;
1186 }
1187
1188 return diff;
1189 }
1190
1191 int crush_adjust_straw_bucket_item_weight(struct crush_map *map,
1192 struct crush_bucket_straw *bucket,
1193 int item, int weight)
1194 {
1195 unsigned idx;
1196 int diff;
1197 int r;
1198
1199 for (idx = 0; idx < bucket->h.size; idx++)
1200 if (bucket->h.items[idx] == item)
1201 break;
1202 if (idx == bucket->h.size)
1203 return 0;
1204
1205 diff = weight - bucket->item_weights[idx];
1206 bucket->item_weights[idx] = weight;
1207 bucket->h.weight += diff;
1208
1209 r = crush_calc_straw(map, bucket);
1210 if (r < 0)
1211 return r;
1212
1213 return diff;
1214 }
1215
1216 int crush_adjust_straw2_bucket_item_weight(struct crush_map *map,
1217 struct crush_bucket_straw2 *bucket,
1218 int item, int weight)
1219 {
1220 unsigned idx;
1221 int diff;
1222
1223 for (idx = 0; idx < bucket->h.size; idx++)
1224 if (bucket->h.items[idx] == item)
1225 break;
1226 if (idx == bucket->h.size)
1227 return 0;
1228
1229 diff = weight - bucket->item_weights[idx];
1230 bucket->item_weights[idx] = weight;
1231 bucket->h.weight += diff;
1232
1233 return diff;
1234 }
1235
1236 int crush_bucket_adjust_item_weight(struct crush_map *map,
1237 struct crush_bucket *b,
1238 int item, int weight)
1239 {
1240 switch (b->alg) {
1241 case CRUSH_BUCKET_UNIFORM:
1242 return crush_adjust_uniform_bucket_item_weight((struct crush_bucket_uniform *)b,
1243 item, weight);
1244 case CRUSH_BUCKET_LIST:
1245 return crush_adjust_list_bucket_item_weight((struct crush_bucket_list *)b,
1246 item, weight);
1247 case CRUSH_BUCKET_TREE:
1248 return crush_adjust_tree_bucket_item_weight((struct crush_bucket_tree *)b,
1249 item, weight);
1250 case CRUSH_BUCKET_STRAW:
1251 return crush_adjust_straw_bucket_item_weight(map,
1252 (struct crush_bucket_straw *)b,
1253 item, weight);
1254 case CRUSH_BUCKET_STRAW2:
1255 return crush_adjust_straw2_bucket_item_weight(map,
1256 (struct crush_bucket_straw2 *)b,
1257 item, weight);
1258 default:
1259 return -1;
1260 }
1261 }
1262
1263 /************************************************/
1264
1265 static int crush_reweight_uniform_bucket(struct crush_map *map, struct crush_bucket_uniform *bucket)
1266 {
1267 unsigned i;
1268 unsigned sum = 0, n = 0, leaves = 0;
1269
1270 for (i = 0; i < bucket->h.size; i++) {
1271 int id = bucket->h.items[i];
1272 if (id < 0) {
1273 struct crush_bucket *c = map->buckets[-1-id];
1274 crush_reweight_bucket(map, c);
1275
1276 if (crush_addition_is_unsafe(sum, c->weight))
1277 return -ERANGE;
1278
1279 sum += c->weight;
1280 n++;
1281 } else {
1282 leaves++;
1283 }
1284 }
1285
1286 if (n > leaves)
1287 bucket->item_weight = sum / n; // more bucket children than leaves, average!
1288 bucket->h.weight = bucket->item_weight * bucket->h.size;
1289
1290 return 0;
1291 }
1292
1293 static int crush_reweight_list_bucket(struct crush_map *map, struct crush_bucket_list *bucket)
1294 {
1295 unsigned i;
1296
1297 bucket->h.weight = 0;
1298 for (i = 0; i < bucket->h.size; i++) {
1299 int id = bucket->h.items[i];
1300 if (id < 0) {
1301 struct crush_bucket *c = map->buckets[-1-id];
1302 crush_reweight_bucket(map, c);
1303 bucket->item_weights[i] = c->weight;
1304 }
1305
1306 if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i]))
1307 return -ERANGE;
1308
1309 bucket->h.weight += bucket->item_weights[i];
1310 }
1311
1312 return 0;
1313 }
1314
1315 static int crush_reweight_tree_bucket(struct crush_map *map, struct crush_bucket_tree *bucket)
1316 {
1317 unsigned i;
1318
1319 bucket->h.weight = 0;
1320 for (i = 0; i < bucket->h.size; i++) {
1321 int node = crush_calc_tree_node(i);
1322 int id = bucket->h.items[i];
1323 if (id < 0) {
1324 struct crush_bucket *c = map->buckets[-1-id];
1325 crush_reweight_bucket(map, c);
1326 bucket->node_weights[node] = c->weight;
1327 }
1328
1329 if (crush_addition_is_unsafe(bucket->h.weight, bucket->node_weights[node]))
1330 return -ERANGE;
1331
1332 bucket->h.weight += bucket->node_weights[node];
1333
1334
1335 }
1336
1337 return 0;
1338 }
1339
1340 static int crush_reweight_straw_bucket(struct crush_map *map, struct crush_bucket_straw *bucket)
1341 {
1342 unsigned i;
1343
1344 bucket->h.weight = 0;
1345 for (i = 0; i < bucket->h.size; i++) {
1346 int id = bucket->h.items[i];
1347 if (id < 0) {
1348 struct crush_bucket *c = map->buckets[-1-id];
1349 crush_reweight_bucket(map, c);
1350 bucket->item_weights[i] = c->weight;
1351 }
1352
1353 if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i]))
1354 return -ERANGE;
1355
1356 bucket->h.weight += bucket->item_weights[i];
1357 }
1358 crush_calc_straw(map, bucket);
1359
1360 return 0;
1361 }
1362
1363 static int crush_reweight_straw2_bucket(struct crush_map *map, struct crush_bucket_straw2 *bucket)
1364 {
1365 unsigned i;
1366
1367 bucket->h.weight = 0;
1368 for (i = 0; i < bucket->h.size; i++) {
1369 int id = bucket->h.items[i];
1370 if (id < 0) {
1371 struct crush_bucket *c = map->buckets[-1-id];
1372 crush_reweight_bucket(map, c);
1373 bucket->item_weights[i] = c->weight;
1374 }
1375
1376 if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i]))
1377 return -ERANGE;
1378
1379 bucket->h.weight += bucket->item_weights[i];
1380 }
1381
1382 return 0;
1383 }
1384
1385 int crush_reweight_bucket(struct crush_map *map, struct crush_bucket *b)
1386 {
1387 switch (b->alg) {
1388 case CRUSH_BUCKET_UNIFORM:
1389 return crush_reweight_uniform_bucket(map, (struct crush_bucket_uniform *)b);
1390 case CRUSH_BUCKET_LIST:
1391 return crush_reweight_list_bucket(map, (struct crush_bucket_list *)b);
1392 case CRUSH_BUCKET_TREE:
1393 return crush_reweight_tree_bucket(map, (struct crush_bucket_tree *)b);
1394 case CRUSH_BUCKET_STRAW:
1395 return crush_reweight_straw_bucket(map, (struct crush_bucket_straw *)b);
1396 case CRUSH_BUCKET_STRAW2:
1397 return crush_reweight_straw2_bucket(map, (struct crush_bucket_straw2 *)b);
1398 default:
1399 return -1;
1400 }
1401 }
1402
1403 struct crush_choose_arg *crush_make_choose_args(struct crush_map *map, int num_positions)
1404 {
1405 int b;
1406 int sum_bucket_size = 0;
1407 int bucket_count = 0;
1408 for (b = 0; b < map->max_buckets; b++) {
1409 if (map->buckets[b] == 0)
1410 continue;
1411 sum_bucket_size += map->buckets[b]->size;
1412 bucket_count++;
1413 }
1414 dprintk("sum_bucket_size %d max_buckets %d bucket_count %d\n",
1415 sum_bucket_size, map->max_buckets, bucket_count);
1416 int size = (sizeof(struct crush_choose_arg) * map->max_buckets +
1417 sizeof(struct crush_weight_set) * bucket_count * num_positions +
1418 sizeof(__u32) * sum_bucket_size * num_positions + // weights
1419 sizeof(__u32) * sum_bucket_size); // ids
1420 char *space = malloc(size);
1421 struct crush_choose_arg *arg = (struct crush_choose_arg *)space;
1422 struct crush_weight_set *weight_set = (struct crush_weight_set *)(arg + map->max_buckets);
1423 __u32 *weights = (__u32 *)(weight_set + bucket_count * num_positions);
1424 char *weight_set_ends = (char*)weights;
1425 int *ids = (int *)(weights + sum_bucket_size * num_positions);
1426 char *weights_end = (char *)ids;
1427 char *ids_end = (char *)(ids + sum_bucket_size);
1428 BUG_ON(space + size != ids_end);
1429 for (b = 0; b < map->max_buckets; b++) {
1430 if (map->buckets[b] == 0) {
1431 memset(&arg[b], '\0', sizeof(struct crush_choose_arg));
1432 continue;
1433 }
1434 struct crush_bucket_straw2 *bucket = (struct crush_bucket_straw2 *)map->buckets[b];
1435
1436 int position;
1437 for (position = 0; position < num_positions; position++) {
1438 memcpy(weights, bucket->item_weights, sizeof(__u32) * bucket->h.size);
1439 weight_set[position].weights = weights;
1440 weight_set[position].size = bucket->h.size;
1441 dprintk("moving weight %d bytes forward\n", (int)((weights + bucket->h.size) - weights));
1442 weights += bucket->h.size;
1443 }
1444 arg[b].weight_set = weight_set;
1445 arg[b].weight_set_size = num_positions;
1446 weight_set += position;
1447
1448 memcpy(ids, bucket->h.items, sizeof(int) * bucket->h.size);
1449 arg[b].ids = ids;
1450 arg[b].ids_size = bucket->h.size;
1451 ids += bucket->h.size;
1452 }
1453 BUG_ON((char*)weight_set_ends != (char*)weight_set);
1454 BUG_ON((char*)weights_end != (char*)weights);
1455 BUG_ON((char*)ids != (char*)ids_end);
1456 return arg;
1457 }
1458
1459 void crush_destroy_choose_args(struct crush_choose_arg *args)
1460 {
1461 free(args);
1462 }
1463
1464 /***************************/
1465
1466 /* methods to check for safe arithmetic operations */
1467
1468 int crush_addition_is_unsafe(__u32 a, __u32 b)
1469 {
1470 if ((((__u32)(-1)) - b) < a)
1471 return 1;
1472 else
1473 return 0;
1474 }
1475
1476 int crush_multiplication_is_unsafe(__u32 a, __u32 b)
1477 {
1478 /* prevent division by zero */
1479 if (!a)
1480 return 0;
1481 if (!b)
1482 return 1;
1483 if ((((__u32)(-1)) / b) < a)
1484 return 1;
1485 else
1486 return 0;
1487 }
1488
1489 /***************************/
1490
1491 /* methods to configure crush_map */
1492
1493 void set_legacy_crush_map(struct crush_map *map) {
1494 /* initialize legacy tunable values */
1495 map->choose_local_tries = 2;
1496 map->choose_local_fallback_tries = 5;
1497 map->choose_total_tries = 19;
1498 map->chooseleaf_descend_once = 0;
1499 map->chooseleaf_vary_r = 0;
1500 map->chooseleaf_stable = 0;
1501 map->straw_calc_version = 0;
1502
1503 // by default, use legacy types, and also exclude tree,
1504 // since it was buggy.
1505 map->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS;
1506 }
1507
1508 void set_optimal_crush_map(struct crush_map *map) {
1509 map->choose_local_tries = 0;
1510 map->choose_local_fallback_tries = 0;
1511 map->choose_total_tries = 50;
1512 map->chooseleaf_descend_once = 1;
1513 map->chooseleaf_vary_r = 1;
1514 map->chooseleaf_stable = 1;
1515 map->allowed_bucket_algs = (
1516 (1 << CRUSH_BUCKET_UNIFORM) |
1517 (1 << CRUSH_BUCKET_LIST) |
1518 (1 << CRUSH_BUCKET_STRAW) |
1519 (1 << CRUSH_BUCKET_STRAW2));
1520 }