9 #include "include/int_types.h"
14 #define dprintk(args...) /* printf(args) */
16 #define BUG_ON(x) assert(!(x))
18 struct crush_map
*crush_create()
21 m
= malloc(sizeof(*m
));
24 memset(m
, 0, sizeof(*m
));
26 set_optimal_crush_map(m
);
31 * finalize should be called _after_ all buckets are added to the map.
33 void crush_finalize(struct crush_map
*map
)
38 /* Calculate the needed working space while we do other
39 finalization tasks. */
40 map
->working_size
= sizeof(struct crush_work
);
41 /* Space for the array of pointers to per-bucket workspace */
42 map
->working_size
+= map
->max_buckets
*
43 sizeof(struct crush_work_bucket
*);
45 /* calc max_devices */
47 for (b
=0; b
<map
->max_buckets
; b
++) {
48 if (map
->buckets
[b
] == 0)
50 for (i
=0; i
<map
->buckets
[b
]->size
; i
++)
51 if (map
->buckets
[b
]->items
[i
] >= map
->max_devices
)
52 map
->max_devices
= map
->buckets
[b
]->items
[i
] + 1;
54 switch (map
->buckets
[b
]->alg
) {
56 /* The base case, permutation variables and
57 the pointer to the permutation array. */
58 map
->working_size
+= sizeof(struct crush_work_bucket
);
61 /* Every bucket has a permutation array. */
62 map
->working_size
+= map
->buckets
[b
]->size
* sizeof(__u32
);
70 int crush_add_rule(struct crush_map
*map
, struct crush_rule
*rule
, int ruleno
)
75 for (r
=0; r
< map
->max_rules
; r
++)
76 if (map
->rules
[r
] == 0)
78 assert(r
< CRUSH_MAX_RULES
);
83 if (r
>= map
->max_rules
) {
86 void *_realloc
= NULL
;
87 if (map
->max_rules
+1 > CRUSH_MAX_RULES
)
89 oldsize
= map
->max_rules
;
91 if ((_realloc
= realloc(map
->rules
, map
->max_rules
* sizeof(map
->rules
[0]))) == NULL
) {
94 map
->rules
= _realloc
;
96 memset(map
->rules
+ oldsize
, 0, (map
->max_rules
-oldsize
) * sizeof(map
->rules
[0]));
100 map
->rules
[r
] = rule
;
104 struct crush_rule
*crush_make_rule(int len
, int ruleset
, int type
, int minsize
, int maxsize
)
106 struct crush_rule
*rule
;
107 rule
= malloc(crush_rule_size(len
));
111 rule
->mask
.ruleset
= ruleset
;
112 rule
->mask
.type
= type
;
113 rule
->mask
.min_size
= minsize
;
114 rule
->mask
.max_size
= maxsize
;
119 * be careful; this doesn't verify that the buffer you allocated is big enough!
121 void crush_rule_set_step(struct crush_rule
*rule
, int n
, int op
, int arg1
, int arg2
)
123 assert((__u32
)n
< rule
->len
);
124 rule
->steps
[n
].op
= op
;
125 rule
->steps
[n
].arg1
= arg1
;
126 rule
->steps
[n
].arg2
= arg2
;
131 int crush_get_next_bucket_id(struct crush_map
*map
)
134 for (pos
=0; pos
< map
->max_buckets
; pos
++)
135 if (map
->buckets
[pos
] == 0)
141 int crush_add_bucket(struct crush_map
*map
,
143 struct crush_bucket
*bucket
,
148 /* find a bucket id */
150 id
= crush_get_next_bucket_id(map
);
153 while (pos
>= map
->max_buckets
) {
155 int oldsize
= map
->max_buckets
;
156 if (map
->max_buckets
)
157 map
->max_buckets
*= 2;
159 map
->max_buckets
= 8;
160 void *_realloc
= NULL
;
161 if ((_realloc
= realloc(map
->buckets
, map
->max_buckets
* sizeof(map
->buckets
[0]))) == NULL
) {
164 map
->buckets
= _realloc
;
166 memset(map
->buckets
+ oldsize
, 0, (map
->max_buckets
-oldsize
) * sizeof(map
->buckets
[0]));
169 if (map
->buckets
[pos
] != 0) {
175 map
->buckets
[pos
] = bucket
;
177 if (idout
) *idout
= id
;
181 int crush_remove_bucket(struct crush_map
*map
, struct crush_bucket
*bucket
)
183 int pos
= -1 - bucket
->id
;
184 assert(pos
< map
->max_buckets
);
185 map
->buckets
[pos
] = NULL
;
186 crush_destroy_bucket(bucket
);
193 struct crush_bucket_uniform
*
194 crush_make_uniform_bucket(int hash
, int type
, int size
,
199 struct crush_bucket_uniform
*bucket
;
201 bucket
= malloc(sizeof(*bucket
));
204 memset(bucket
, 0, sizeof(*bucket
));
205 bucket
->h
.alg
= CRUSH_BUCKET_UNIFORM
;
206 bucket
->h
.hash
= hash
;
207 bucket
->h
.type
= type
;
208 bucket
->h
.size
= size
;
210 if (crush_multiplication_is_unsafe(size
, item_weight
))
213 bucket
->h
.weight
= size
* item_weight
;
214 bucket
->item_weight
= item_weight
;
215 bucket
->h
.items
= malloc(sizeof(__s32
)*size
);
217 if (!bucket
->h
.items
)
220 for (i
=0; i
<size
; i
++)
221 bucket
->h
.items
[i
] = items
[i
];
225 free(bucket
->h
.items
);
233 struct crush_bucket_list
*
234 crush_make_list_bucket(int hash
, int type
, int size
,
240 struct crush_bucket_list
*bucket
;
242 bucket
= malloc(sizeof(*bucket
));
245 memset(bucket
, 0, sizeof(*bucket
));
246 bucket
->h
.alg
= CRUSH_BUCKET_LIST
;
247 bucket
->h
.hash
= hash
;
248 bucket
->h
.type
= type
;
249 bucket
->h
.size
= size
;
251 bucket
->h
.items
= malloc(sizeof(__s32
)*size
);
252 if (!bucket
->h
.items
)
256 bucket
->item_weights
= malloc(sizeof(__u32
)*size
);
257 if (!bucket
->item_weights
)
259 bucket
->sum_weights
= malloc(sizeof(__u32
)*size
);
260 if (!bucket
->sum_weights
)
263 for (i
=0; i
<size
; i
++) {
264 bucket
->h
.items
[i
] = items
[i
];
265 bucket
->item_weights
[i
] = weights
[i
];
267 if (crush_addition_is_unsafe(w
, weights
[i
]))
271 bucket
->sum_weights
[i
] = w
;
272 /*dprintk("pos %d item %d weight %d sum %d\n",
273 i, items[i], weights[i], bucket->sum_weights[i]);*/
276 bucket
->h
.weight
= w
;
280 free(bucket
->sum_weights
);
281 free(bucket
->item_weights
);
282 free(bucket
->h
.items
);
290 static int height(int n
) {
292 while ((n
& 1) == 0) {
298 static int on_right(int n
, int h
) {
299 return n
& (1 << (h
+1));
301 static int parent(int n
)
310 static int calc_depth(int size
)
325 struct crush_bucket_tree
*
326 crush_make_tree_bucket(int hash
, int type
, int size
,
327 int *items
, /* in leaf order */
330 struct crush_bucket_tree
*bucket
;
335 bucket
= malloc(sizeof(*bucket
));
338 memset(bucket
, 0, sizeof(*bucket
));
339 bucket
->h
.alg
= CRUSH_BUCKET_TREE
;
340 bucket
->h
.hash
= hash
;
341 bucket
->h
.type
= type
;
342 bucket
->h
.size
= size
;
345 bucket
->h
.items
= NULL
;
346 bucket
->h
.weight
= 0;
347 bucket
->node_weights
= NULL
;
348 bucket
->num_nodes
= 0;
349 /* printf("size 0 depth 0 nodes 0\n"); */
353 bucket
->h
.items
= malloc(sizeof(__s32
)*size
);
354 if (!bucket
->h
.items
)
357 /* calc tree depth */
358 depth
= calc_depth(size
);
359 bucket
->num_nodes
= 1 << depth
;
360 dprintk("size %d depth %d nodes %d\n", size
, depth
, bucket
->num_nodes
);
362 bucket
->node_weights
= malloc(sizeof(__u32
)*bucket
->num_nodes
);
363 if (!bucket
->node_weights
)
366 memset(bucket
->h
.items
, 0, sizeof(__s32
)*bucket
->h
.size
);
367 memset(bucket
->node_weights
, 0, sizeof(__u32
)*bucket
->num_nodes
);
369 for (i
=0; i
<size
; i
++) {
370 bucket
->h
.items
[i
] = items
[i
];
371 node
= crush_calc_tree_node(i
);
372 dprintk("item %d node %d weight %d\n", i
, node
, weights
[i
]);
373 bucket
->node_weights
[node
] = weights
[i
];
375 if (crush_addition_is_unsafe(bucket
->h
.weight
, weights
[i
]))
378 bucket
->h
.weight
+= weights
[i
];
379 for (j
=1; j
<depth
; j
++) {
382 if (crush_addition_is_unsafe(bucket
->node_weights
[node
], weights
[i
]))
385 bucket
->node_weights
[node
] += weights
[i
];
386 dprintk(" node %d weight %d\n", node
, bucket
->node_weights
[node
]);
389 BUG_ON(bucket
->node_weights
[bucket
->num_nodes
/2] != bucket
->h
.weight
);
393 free(bucket
->node_weights
);
394 free(bucket
->h
.items
);
404 * this code was written 8 years ago. i have a vague recollection of
405 * drawing boxes underneath bars of different lengths, where the bar
406 * length represented the probability/weight, and that there was some
407 * trial and error involved in arriving at this implementation.
408 * however, reading the code now after all this time, the intuition
409 * that motivated is lost on me. lame. my only excuse is that I now
410 * know that the approach is fundamentally flawed and am not
411 * particularly motivated to reconstruct the flawed reasoning.
413 * as best as i can remember, the idea is: sort the weights, and start
414 * with the smallest. arbitrarily scale it at 1.0 (16-bit fixed
415 * point). look at the next larger weight, and calculate the scaling
416 * factor for that straw based on the relative difference in weight so
417 * far. what's not clear to me now is why we are looking at wnext
418 * (the delta to the next bigger weight) for all remaining weights,
419 * and slicing things horizontally instead of considering just the
420 * next item or set of items. or why pow() is used the way it is.
422 * note that the original version 1 of this function made special
423 * accomodation for the case where straw lengths were identical. this
424 * is also flawed in a non-obvious way; version 2 drops the special
425 * handling and appears to work just as well.
427 * moral of the story: if you do something clever, write down why it
430 int crush_calc_straw(struct crush_map
*map
, struct crush_bucket_straw
*bucket
)
434 double straw
, wbelow
, lastw
, wnext
, pbelow
;
436 int size
= bucket
->h
.size
;
437 __u32
*weights
= bucket
->item_weights
;
439 /* reverse sort by weight (simple insertion sort) */
440 reverse
= malloc(sizeof(int) * size
);
445 for (i
=1; i
<size
; i
++) {
446 for (j
=0; j
<i
; j
++) {
447 if (weights
[i
] < weights
[reverse
[j
]]) {
450 reverse
[k
] = reverse
[k
-1];
466 if (map
->straw_calc_version
== 0) {
467 /* zero weight items get 0 length straws! */
468 if (weights
[reverse
[i
]] == 0) {
469 bucket
->straws
[reverse
[i
]] = 0;
474 /* set this item's straw */
475 bucket
->straws
[reverse
[i
]] = straw
* 0x10000;
476 dprintk("item %d at %d weight %d straw %d (%lf)\n",
477 bucket
->h
.items
[reverse
[i
]],
478 reverse
[i
], weights
[reverse
[i
]],
479 bucket
->straws
[reverse
[i
]], straw
);
484 /* same weight as previous? */
485 if (weights
[reverse
[i
]] == weights
[reverse
[i
-1]]) {
486 dprintk("same as previous\n");
490 /* adjust straw for next guy */
491 wbelow
+= ((double)weights
[reverse
[i
-1]] - lastw
) *
493 for (j
=i
; j
<size
; j
++)
494 if (weights
[reverse
[j
]] == weights
[reverse
[i
]])
498 wnext
= numleft
* (weights
[reverse
[i
]] -
499 weights
[reverse
[i
-1]]);
500 pbelow
= wbelow
/ (wbelow
+ wnext
);
501 dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n",
502 wbelow
, wnext
, pbelow
, numleft
);
504 straw
*= pow((double)1.0 / pbelow
, (double)1.0 /
507 lastw
= weights
[reverse
[i
-1]];
508 } else if (map
->straw_calc_version
>= 1) {
509 /* zero weight items get 0 length straws! */
510 if (weights
[reverse
[i
]] == 0) {
511 bucket
->straws
[reverse
[i
]] = 0;
517 /* set this item's straw */
518 bucket
->straws
[reverse
[i
]] = straw
* 0x10000;
519 dprintk("item %d at %d weight %d straw %d (%lf)\n",
520 bucket
->h
.items
[reverse
[i
]],
521 reverse
[i
], weights
[reverse
[i
]],
522 bucket
->straws
[reverse
[i
]], straw
);
527 /* adjust straw for next guy */
528 wbelow
+= ((double)weights
[reverse
[i
-1]] - lastw
) *
531 wnext
= numleft
* (weights
[reverse
[i
]] -
532 weights
[reverse
[i
-1]]);
533 pbelow
= wbelow
/ (wbelow
+ wnext
);
534 dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n",
535 wbelow
, wnext
, pbelow
, numleft
);
537 straw
*= pow((double)1.0 / pbelow
, (double)1.0 /
540 lastw
= weights
[reverse
[i
-1]];
548 struct crush_bucket_straw
*
549 crush_make_straw_bucket(struct crush_map
*map
,
556 struct crush_bucket_straw
*bucket
;
559 bucket
= malloc(sizeof(*bucket
));
562 memset(bucket
, 0, sizeof(*bucket
));
563 bucket
->h
.alg
= CRUSH_BUCKET_STRAW
;
564 bucket
->h
.hash
= hash
;
565 bucket
->h
.type
= type
;
566 bucket
->h
.size
= size
;
568 bucket
->h
.items
= malloc(sizeof(__s32
)*size
);
569 if (!bucket
->h
.items
)
571 bucket
->item_weights
= malloc(sizeof(__u32
)*size
);
572 if (!bucket
->item_weights
)
574 bucket
->straws
= malloc(sizeof(__u32
)*size
);
578 bucket
->h
.weight
= 0;
579 for (i
=0; i
<size
; i
++) {
580 bucket
->h
.items
[i
] = items
[i
];
581 bucket
->h
.weight
+= weights
[i
];
582 bucket
->item_weights
[i
] = weights
[i
];
585 if (crush_calc_straw(map
, bucket
) < 0)
590 free(bucket
->straws
);
591 free(bucket
->item_weights
);
592 free(bucket
->h
.items
);
597 struct crush_bucket_straw2
*
598 crush_make_straw2_bucket(struct crush_map
*map
,
605 struct crush_bucket_straw2
*bucket
;
608 bucket
= malloc(sizeof(*bucket
));
611 memset(bucket
, 0, sizeof(*bucket
));
612 bucket
->h
.alg
= CRUSH_BUCKET_STRAW2
;
613 bucket
->h
.hash
= hash
;
614 bucket
->h
.type
= type
;
615 bucket
->h
.size
= size
;
617 bucket
->h
.items
= malloc(sizeof(__s32
)*size
);
618 if (!bucket
->h
.items
)
620 bucket
->item_weights
= malloc(sizeof(__u32
)*size
);
621 if (!bucket
->item_weights
)
624 bucket
->h
.weight
= 0;
625 for (i
=0; i
<size
; i
++) {
626 bucket
->h
.items
[i
] = items
[i
];
627 bucket
->h
.weight
+= weights
[i
];
628 bucket
->item_weights
[i
] = weights
[i
];
633 free(bucket
->item_weights
);
634 free(bucket
->h
.items
);
642 crush_make_bucket(struct crush_map
*map
,
643 int alg
, int hash
, int type
, int size
,
650 case CRUSH_BUCKET_UNIFORM
:
652 item_weight
= weights
[0];
655 return (struct crush_bucket
*)crush_make_uniform_bucket(hash
, type
, size
, items
, item_weight
);
657 case CRUSH_BUCKET_LIST
:
658 return (struct crush_bucket
*)crush_make_list_bucket(hash
, type
, size
, items
, weights
);
660 case CRUSH_BUCKET_TREE
:
661 return (struct crush_bucket
*)crush_make_tree_bucket(hash
, type
, size
, items
, weights
);
663 case CRUSH_BUCKET_STRAW
:
664 return (struct crush_bucket
*)crush_make_straw_bucket(map
, hash
, type
, size
, items
, weights
);
665 case CRUSH_BUCKET_STRAW2
:
666 return (struct crush_bucket
*)crush_make_straw2_bucket(map
, hash
, type
, size
, items
, weights
);
672 /************************************************/
674 int crush_add_uniform_bucket_item(struct crush_bucket_uniform
*bucket
, int item
, int weight
)
676 int newsize
= bucket
->h
.size
+ 1;
677 void *_realloc
= NULL
;
679 /* In such situation 'CRUSH_BUCKET_UNIFORM', the weight
680 provided for the item should be the same as
681 bucket->item_weight defined with 'crush_make_bucket'. This
682 assumption is enforced by the return value which is always
684 if (bucket
->item_weight
!= weight
) {
688 if ((_realloc
= realloc(bucket
->h
.items
, sizeof(__s32
)*newsize
)) == NULL
) {
691 bucket
->h
.items
= _realloc
;
694 bucket
->h
.items
[newsize
-1] = item
;
696 if (crush_addition_is_unsafe(bucket
->h
.weight
, weight
))
699 bucket
->h
.weight
+= weight
;
705 int crush_add_list_bucket_item(struct crush_bucket_list
*bucket
, int item
, int weight
)
707 int newsize
= bucket
->h
.size
+ 1;
708 void *_realloc
= NULL
;
710 if ((_realloc
= realloc(bucket
->h
.items
, sizeof(__s32
)*newsize
)) == NULL
) {
713 bucket
->h
.items
= _realloc
;
715 if ((_realloc
= realloc(bucket
->item_weights
, sizeof(__u32
)*newsize
)) == NULL
) {
718 bucket
->item_weights
= _realloc
;
720 if ((_realloc
= realloc(bucket
->sum_weights
, sizeof(__u32
)*newsize
)) == NULL
) {
723 bucket
->sum_weights
= _realloc
;
726 bucket
->h
.items
[newsize
-1] = item
;
727 bucket
->item_weights
[newsize
-1] = weight
;
730 if (crush_addition_is_unsafe(bucket
->sum_weights
[newsize
-2], weight
))
733 bucket
->sum_weights
[newsize
-1] = bucket
->sum_weights
[newsize
-2] + weight
;
737 bucket
->sum_weights
[newsize
-1] = weight
;
740 bucket
->h
.weight
+= weight
;
745 int crush_add_tree_bucket_item(struct crush_bucket_tree
*bucket
, int item
, int weight
)
747 int newsize
= bucket
->h
.size
+ 1;
748 int depth
= calc_depth(newsize
);;
751 void *_realloc
= NULL
;
753 bucket
->num_nodes
= 1 << depth
;
755 if ((_realloc
= realloc(bucket
->h
.items
, sizeof(__s32
)*newsize
)) == NULL
) {
758 bucket
->h
.items
= _realloc
;
760 if ((_realloc
= realloc(bucket
->node_weights
, sizeof(__u32
)*bucket
->num_nodes
)) == NULL
) {
763 bucket
->node_weights
= _realloc
;
766 node
= crush_calc_tree_node(newsize
-1);
767 bucket
->node_weights
[node
] = weight
;
769 /* if the depth increase, we need to initialize the new root node's weight before add bucket item */
770 int root
= bucket
->num_nodes
/2;
771 if (depth
>= 2 && (node
- 1) == root
) {
772 /* if the new item is the first node in right sub tree, so
773 * the root node initial weight is left sub tree's weight
775 bucket
->node_weights
[root
] = bucket
->node_weights
[root
/2];
778 for (j
=1; j
<depth
; j
++) {
781 if (crush_addition_is_unsafe(bucket
->node_weights
[node
], weight
))
784 bucket
->node_weights
[node
] += weight
;
785 dprintk(" node %d weight %d\n", node
, bucket
->node_weights
[node
]);
789 if (crush_addition_is_unsafe(bucket
->h
.weight
, weight
))
792 bucket
->h
.items
[newsize
-1] = item
;
793 bucket
->h
.weight
+= weight
;
799 int crush_add_straw_bucket_item(struct crush_map
*map
,
800 struct crush_bucket_straw
*bucket
,
801 int item
, int weight
)
803 int newsize
= bucket
->h
.size
+ 1;
805 void *_realloc
= NULL
;
807 if ((_realloc
= realloc(bucket
->h
.items
, sizeof(__s32
)*newsize
)) == NULL
) {
810 bucket
->h
.items
= _realloc
;
812 if ((_realloc
= realloc(bucket
->item_weights
, sizeof(__u32
)*newsize
)) == NULL
) {
815 bucket
->item_weights
= _realloc
;
817 if ((_realloc
= realloc(bucket
->straws
, sizeof(__u32
)*newsize
)) == NULL
) {
820 bucket
->straws
= _realloc
;
823 bucket
->h
.items
[newsize
-1] = item
;
824 bucket
->item_weights
[newsize
-1] = weight
;
826 if (crush_addition_is_unsafe(bucket
->h
.weight
, weight
))
829 bucket
->h
.weight
+= weight
;
832 return crush_calc_straw(map
, bucket
);
835 int crush_add_straw2_bucket_item(struct crush_map
*map
,
836 struct crush_bucket_straw2
*bucket
,
837 int item
, int weight
)
839 int newsize
= bucket
->h
.size
+ 1;
841 void *_realloc
= NULL
;
843 if ((_realloc
= realloc(bucket
->h
.items
, sizeof(__s32
)*newsize
)) == NULL
) {
846 bucket
->h
.items
= _realloc
;
848 if ((_realloc
= realloc(bucket
->item_weights
, sizeof(__u32
)*newsize
)) == NULL
) {
851 bucket
->item_weights
= _realloc
;
854 bucket
->h
.items
[newsize
-1] = item
;
855 bucket
->item_weights
[newsize
-1] = weight
;
857 if (crush_addition_is_unsafe(bucket
->h
.weight
, weight
))
860 bucket
->h
.weight
+= weight
;
866 int crush_bucket_add_item(struct crush_map
*map
,
867 struct crush_bucket
*b
, int item
, int weight
)
870 case CRUSH_BUCKET_UNIFORM
:
871 return crush_add_uniform_bucket_item((struct crush_bucket_uniform
*)b
, item
, weight
);
872 case CRUSH_BUCKET_LIST
:
873 return crush_add_list_bucket_item((struct crush_bucket_list
*)b
, item
, weight
);
874 case CRUSH_BUCKET_TREE
:
875 return crush_add_tree_bucket_item((struct crush_bucket_tree
*)b
, item
, weight
);
876 case CRUSH_BUCKET_STRAW
:
877 return crush_add_straw_bucket_item(map
, (struct crush_bucket_straw
*)b
, item
, weight
);
878 case CRUSH_BUCKET_STRAW2
:
879 return crush_add_straw2_bucket_item(map
, (struct crush_bucket_straw2
*)b
, item
, weight
);
885 /************************************************/
887 int crush_remove_uniform_bucket_item(struct crush_bucket_uniform
*bucket
, int item
)
891 void *_realloc
= NULL
;
893 for (i
= 0; i
< bucket
->h
.size
; i
++)
894 if (bucket
->h
.items
[i
] == item
)
896 if (i
== bucket
->h
.size
)
899 for (j
= i
; j
< bucket
->h
.size
; j
++)
900 bucket
->h
.items
[j
] = bucket
->h
.items
[j
+1];
901 newsize
= --bucket
->h
.size
;
902 if (bucket
->item_weight
< bucket
->h
.weight
)
903 bucket
->h
.weight
-= bucket
->item_weight
;
905 bucket
->h
.weight
= 0;
907 if ((_realloc
= realloc(bucket
->h
.items
, sizeof(__s32
)*newsize
)) == NULL
) {
910 bucket
->h
.items
= _realloc
;
915 int crush_remove_list_bucket_item(struct crush_bucket_list
*bucket
, int item
)
921 for (i
= 0; i
< bucket
->h
.size
; i
++)
922 if (bucket
->h
.items
[i
] == item
)
924 if (i
== bucket
->h
.size
)
927 weight
= bucket
->item_weights
[i
];
928 for (j
= i
; j
< bucket
->h
.size
; j
++) {
929 bucket
->h
.items
[j
] = bucket
->h
.items
[j
+1];
930 bucket
->item_weights
[j
] = bucket
->item_weights
[j
+1];
931 bucket
->sum_weights
[j
] = bucket
->sum_weights
[j
+1] - weight
;
933 if (weight
< bucket
->h
.weight
)
934 bucket
->h
.weight
-= weight
;
936 bucket
->h
.weight
= 0;
937 newsize
= --bucket
->h
.size
;
939 void *_realloc
= NULL
;
941 if ((_realloc
= realloc(bucket
->h
.items
, sizeof(__s32
)*newsize
)) == NULL
) {
944 bucket
->h
.items
= _realloc
;
946 if ((_realloc
= realloc(bucket
->item_weights
, sizeof(__u32
)*newsize
)) == NULL
) {
949 bucket
->item_weights
= _realloc
;
951 if ((_realloc
= realloc(bucket
->sum_weights
, sizeof(__u32
)*newsize
)) == NULL
) {
954 bucket
->sum_weights
= _realloc
;
959 int crush_remove_tree_bucket_item(struct crush_bucket_tree
*bucket
, int item
)
964 for (i
= 0; i
< bucket
->h
.size
; i
++) {
968 int depth
= calc_depth(bucket
->h
.size
);
970 if (bucket
->h
.items
[i
] != item
)
973 bucket
->h
.items
[i
] = 0;
974 node
= crush_calc_tree_node(i
);
975 weight
= bucket
->node_weights
[node
];
976 bucket
->node_weights
[node
] = 0;
978 for (j
= 1; j
< depth
; j
++) {
980 bucket
->node_weights
[node
] -= weight
;
981 dprintk(" node %d weight %d\n", node
, bucket
->node_weights
[node
]);
983 if (weight
< bucket
->h
.weight
)
984 bucket
->h
.weight
-= weight
;
986 bucket
->h
.weight
= 0;
989 if (i
== bucket
->h
.size
)
992 newsize
= bucket
->h
.size
;
993 while (newsize
> 0) {
994 int node
= crush_calc_tree_node(newsize
- 1);
995 if (bucket
->node_weights
[node
])
1000 if (newsize
!= bucket
->h
.size
) {
1001 int olddepth
, newdepth
;
1003 void *_realloc
= NULL
;
1005 if ((_realloc
= realloc(bucket
->h
.items
, sizeof(__s32
)*newsize
)) == NULL
) {
1008 bucket
->h
.items
= _realloc
;
1011 olddepth
= calc_depth(bucket
->h
.size
);
1012 newdepth
= calc_depth(newsize
);
1013 if (olddepth
!= newdepth
) {
1014 bucket
->num_nodes
= 1 << newdepth
;
1015 if ((_realloc
= realloc(bucket
->node_weights
,
1016 sizeof(__u32
)*bucket
->num_nodes
)) == NULL
) {
1019 bucket
->node_weights
= _realloc
;
1023 bucket
->h
.size
= newsize
;
1028 int crush_remove_straw_bucket_item(struct crush_map
*map
,
1029 struct crush_bucket_straw
*bucket
, int item
)
1031 int newsize
= bucket
->h
.size
- 1;
1034 for (i
= 0; i
< bucket
->h
.size
; i
++) {
1035 if (bucket
->h
.items
[i
] == item
) {
1037 if (bucket
->item_weights
[i
] < bucket
->h
.weight
)
1038 bucket
->h
.weight
-= bucket
->item_weights
[i
];
1040 bucket
->h
.weight
= 0;
1041 for (j
= i
; j
< bucket
->h
.size
; j
++) {
1042 bucket
->h
.items
[j
] = bucket
->h
.items
[j
+1];
1043 bucket
->item_weights
[j
] = bucket
->item_weights
[j
+1];
1048 if (i
== bucket
->h
.size
)
1051 void *_realloc
= NULL
;
1053 if ((_realloc
= realloc(bucket
->h
.items
, sizeof(__s32
)*newsize
)) == NULL
) {
1056 bucket
->h
.items
= _realloc
;
1058 if ((_realloc
= realloc(bucket
->item_weights
, sizeof(__u32
)*newsize
)) == NULL
) {
1061 bucket
->item_weights
= _realloc
;
1063 if ((_realloc
= realloc(bucket
->straws
, sizeof(__u32
)*newsize
)) == NULL
) {
1066 bucket
->straws
= _realloc
;
1069 return crush_calc_straw(map
, bucket
);
1072 int crush_remove_straw2_bucket_item(struct crush_map
*map
,
1073 struct crush_bucket_straw2
*bucket
, int item
)
1075 int newsize
= bucket
->h
.size
- 1;
1078 for (i
= 0; i
< bucket
->h
.size
; i
++) {
1079 if (bucket
->h
.items
[i
] == item
) {
1081 if (bucket
->item_weights
[i
] < bucket
->h
.weight
)
1082 bucket
->h
.weight
-= bucket
->item_weights
[i
];
1084 bucket
->h
.weight
= 0;
1085 for (j
= i
; j
< bucket
->h
.size
; j
++) {
1086 bucket
->h
.items
[j
] = bucket
->h
.items
[j
+1];
1087 bucket
->item_weights
[j
] = bucket
->item_weights
[j
+1];
1092 if (i
== bucket
->h
.size
)
1095 void *_realloc
= NULL
;
1097 if ((_realloc
= realloc(bucket
->h
.items
, sizeof(__s32
)*newsize
)) == NULL
) {
1100 bucket
->h
.items
= _realloc
;
1102 if ((_realloc
= realloc(bucket
->item_weights
, sizeof(__u32
)*newsize
)) == NULL
) {
1105 bucket
->item_weights
= _realloc
;
1111 int crush_bucket_remove_item(struct crush_map
*map
, struct crush_bucket
*b
, int item
)
1114 case CRUSH_BUCKET_UNIFORM
:
1115 return crush_remove_uniform_bucket_item((struct crush_bucket_uniform
*)b
, item
);
1116 case CRUSH_BUCKET_LIST
:
1117 return crush_remove_list_bucket_item((struct crush_bucket_list
*)b
, item
);
1118 case CRUSH_BUCKET_TREE
:
1119 return crush_remove_tree_bucket_item((struct crush_bucket_tree
*)b
, item
);
1120 case CRUSH_BUCKET_STRAW
:
1121 return crush_remove_straw_bucket_item(map
, (struct crush_bucket_straw
*)b
, item
);
1122 case CRUSH_BUCKET_STRAW2
:
1123 return crush_remove_straw2_bucket_item(map
, (struct crush_bucket_straw2
*)b
, item
);
1130 /************************************************/
1132 int crush_adjust_uniform_bucket_item_weight(struct crush_bucket_uniform
*bucket
, int item
, int weight
)
1134 int diff
= (weight
- bucket
->item_weight
) * bucket
->h
.size
;
1136 bucket
->item_weight
= weight
;
1137 bucket
->h
.weight
= bucket
->item_weight
* bucket
->h
.size
;
1142 int crush_adjust_list_bucket_item_weight(struct crush_bucket_list
*bucket
, int item
, int weight
)
1147 for (i
= 0; i
< bucket
->h
.size
; i
++) {
1148 if (bucket
->h
.items
[i
] == item
)
1151 if (i
== bucket
->h
.size
)
1154 diff
= weight
- bucket
->item_weights
[i
];
1155 bucket
->item_weights
[i
] = weight
;
1156 bucket
->h
.weight
+= diff
;
1158 for (j
= i
; j
< bucket
->h
.size
; j
++)
1159 bucket
->sum_weights
[j
] += diff
;
1164 int crush_adjust_tree_bucket_item_weight(struct crush_bucket_tree
*bucket
, int item
, int weight
)
1169 unsigned depth
= calc_depth(bucket
->h
.size
);
1171 for (i
= 0; i
< bucket
->h
.size
; i
++) {
1172 if (bucket
->h
.items
[i
] == item
)
1175 if (i
== bucket
->h
.size
)
1178 node
= crush_calc_tree_node(i
);
1179 diff
= weight
- bucket
->node_weights
[node
];
1180 bucket
->node_weights
[node
] = weight
;
1181 bucket
->h
.weight
+= diff
;
1183 for (j
=1; j
<depth
; j
++) {
1184 node
= parent(node
);
1185 bucket
->node_weights
[node
] += diff
;
1191 int crush_adjust_straw_bucket_item_weight(struct crush_map
*map
,
1192 struct crush_bucket_straw
*bucket
,
1193 int item
, int weight
)
1199 for (idx
= 0; idx
< bucket
->h
.size
; idx
++)
1200 if (bucket
->h
.items
[idx
] == item
)
1202 if (idx
== bucket
->h
.size
)
1205 diff
= weight
- bucket
->item_weights
[idx
];
1206 bucket
->item_weights
[idx
] = weight
;
1207 bucket
->h
.weight
+= diff
;
1209 r
= crush_calc_straw(map
, bucket
);
1216 int crush_adjust_straw2_bucket_item_weight(struct crush_map
*map
,
1217 struct crush_bucket_straw2
*bucket
,
1218 int item
, int weight
)
1223 for (idx
= 0; idx
< bucket
->h
.size
; idx
++)
1224 if (bucket
->h
.items
[idx
] == item
)
1226 if (idx
== bucket
->h
.size
)
1229 diff
= weight
- bucket
->item_weights
[idx
];
1230 bucket
->item_weights
[idx
] = weight
;
1231 bucket
->h
.weight
+= diff
;
1236 int crush_bucket_adjust_item_weight(struct crush_map
*map
,
1237 struct crush_bucket
*b
,
1238 int item
, int weight
)
1241 case CRUSH_BUCKET_UNIFORM
:
1242 return crush_adjust_uniform_bucket_item_weight((struct crush_bucket_uniform
*)b
,
1244 case CRUSH_BUCKET_LIST
:
1245 return crush_adjust_list_bucket_item_weight((struct crush_bucket_list
*)b
,
1247 case CRUSH_BUCKET_TREE
:
1248 return crush_adjust_tree_bucket_item_weight((struct crush_bucket_tree
*)b
,
1250 case CRUSH_BUCKET_STRAW
:
1251 return crush_adjust_straw_bucket_item_weight(map
,
1252 (struct crush_bucket_straw
*)b
,
1254 case CRUSH_BUCKET_STRAW2
:
1255 return crush_adjust_straw2_bucket_item_weight(map
,
1256 (struct crush_bucket_straw2
*)b
,
1263 /************************************************/
1265 static int crush_reweight_uniform_bucket(struct crush_map
*map
, struct crush_bucket_uniform
*bucket
)
1268 unsigned sum
= 0, n
= 0, leaves
= 0;
1270 for (i
= 0; i
< bucket
->h
.size
; i
++) {
1271 int id
= bucket
->h
.items
[i
];
1273 struct crush_bucket
*c
= map
->buckets
[-1-id
];
1274 crush_reweight_bucket(map
, c
);
1276 if (crush_addition_is_unsafe(sum
, c
->weight
))
1287 bucket
->item_weight
= sum
/ n
; // more bucket children than leaves, average!
1288 bucket
->h
.weight
= bucket
->item_weight
* bucket
->h
.size
;
1293 static int crush_reweight_list_bucket(struct crush_map
*map
, struct crush_bucket_list
*bucket
)
1297 bucket
->h
.weight
= 0;
1298 for (i
= 0; i
< bucket
->h
.size
; i
++) {
1299 int id
= bucket
->h
.items
[i
];
1301 struct crush_bucket
*c
= map
->buckets
[-1-id
];
1302 crush_reweight_bucket(map
, c
);
1303 bucket
->item_weights
[i
] = c
->weight
;
1306 if (crush_addition_is_unsafe(bucket
->h
.weight
, bucket
->item_weights
[i
]))
1309 bucket
->h
.weight
+= bucket
->item_weights
[i
];
1315 static int crush_reweight_tree_bucket(struct crush_map
*map
, struct crush_bucket_tree
*bucket
)
1319 bucket
->h
.weight
= 0;
1320 for (i
= 0; i
< bucket
->h
.size
; i
++) {
1321 int node
= crush_calc_tree_node(i
);
1322 int id
= bucket
->h
.items
[i
];
1324 struct crush_bucket
*c
= map
->buckets
[-1-id
];
1325 crush_reweight_bucket(map
, c
);
1326 bucket
->node_weights
[node
] = c
->weight
;
1329 if (crush_addition_is_unsafe(bucket
->h
.weight
, bucket
->node_weights
[node
]))
1332 bucket
->h
.weight
+= bucket
->node_weights
[node
];
1340 static int crush_reweight_straw_bucket(struct crush_map
*map
, struct crush_bucket_straw
*bucket
)
1344 bucket
->h
.weight
= 0;
1345 for (i
= 0; i
< bucket
->h
.size
; i
++) {
1346 int id
= bucket
->h
.items
[i
];
1348 struct crush_bucket
*c
= map
->buckets
[-1-id
];
1349 crush_reweight_bucket(map
, c
);
1350 bucket
->item_weights
[i
] = c
->weight
;
1353 if (crush_addition_is_unsafe(bucket
->h
.weight
, bucket
->item_weights
[i
]))
1356 bucket
->h
.weight
+= bucket
->item_weights
[i
];
1358 crush_calc_straw(map
, bucket
);
1363 static int crush_reweight_straw2_bucket(struct crush_map
*map
, struct crush_bucket_straw2
*bucket
)
1367 bucket
->h
.weight
= 0;
1368 for (i
= 0; i
< bucket
->h
.size
; i
++) {
1369 int id
= bucket
->h
.items
[i
];
1371 struct crush_bucket
*c
= map
->buckets
[-1-id
];
1372 crush_reweight_bucket(map
, c
);
1373 bucket
->item_weights
[i
] = c
->weight
;
1376 if (crush_addition_is_unsafe(bucket
->h
.weight
, bucket
->item_weights
[i
]))
1379 bucket
->h
.weight
+= bucket
->item_weights
[i
];
1385 int crush_reweight_bucket(struct crush_map
*map
, struct crush_bucket
*b
)
1388 case CRUSH_BUCKET_UNIFORM
:
1389 return crush_reweight_uniform_bucket(map
, (struct crush_bucket_uniform
*)b
);
1390 case CRUSH_BUCKET_LIST
:
1391 return crush_reweight_list_bucket(map
, (struct crush_bucket_list
*)b
);
1392 case CRUSH_BUCKET_TREE
:
1393 return crush_reweight_tree_bucket(map
, (struct crush_bucket_tree
*)b
);
1394 case CRUSH_BUCKET_STRAW
:
1395 return crush_reweight_straw_bucket(map
, (struct crush_bucket_straw
*)b
);
1396 case CRUSH_BUCKET_STRAW2
:
1397 return crush_reweight_straw2_bucket(map
, (struct crush_bucket_straw2
*)b
);
1403 struct crush_choose_arg
*crush_make_choose_args(struct crush_map
*map
, int num_positions
)
1406 int sum_bucket_size
= 0;
1407 int bucket_count
= 0;
1408 for (b
= 0; b
< map
->max_buckets
; b
++) {
1409 if (map
->buckets
[b
] == 0)
1411 sum_bucket_size
+= map
->buckets
[b
]->size
;
1414 dprintk("sum_bucket_size %d max_buckets %d bucket_count %d\n",
1415 sum_bucket_size
, map
->max_buckets
, bucket_count
);
1416 int size
= (sizeof(struct crush_choose_arg
) * map
->max_buckets
+
1417 sizeof(struct crush_weight_set
) * bucket_count
* num_positions
+
1418 sizeof(__u32
) * sum_bucket_size
* num_positions
+ // weights
1419 sizeof(__u32
) * sum_bucket_size
); // ids
1420 char *space
= malloc(size
);
1421 struct crush_choose_arg
*arg
= (struct crush_choose_arg
*)space
;
1422 struct crush_weight_set
*weight_set
= (struct crush_weight_set
*)(arg
+ map
->max_buckets
);
1423 __u32
*weights
= (__u32
*)(weight_set
+ bucket_count
* num_positions
);
1424 char *weight_set_ends
= (char*)weights
;
1425 int *ids
= (int *)(weights
+ sum_bucket_size
* num_positions
);
1426 char *weights_end
= (char *)ids
;
1427 char *ids_end
= (char *)(ids
+ sum_bucket_size
);
1428 BUG_ON(space
+ size
!= ids_end
);
1429 for (b
= 0; b
< map
->max_buckets
; b
++) {
1430 if (map
->buckets
[b
] == 0) {
1431 memset(&arg
[b
], '\0', sizeof(struct crush_choose_arg
));
1434 struct crush_bucket_straw2
*bucket
= (struct crush_bucket_straw2
*)map
->buckets
[b
];
1437 for (position
= 0; position
< num_positions
; position
++) {
1438 memcpy(weights
, bucket
->item_weights
, sizeof(__u32
) * bucket
->h
.size
);
1439 weight_set
[position
].weights
= weights
;
1440 weight_set
[position
].size
= bucket
->h
.size
;
1441 dprintk("moving weight %d bytes forward\n", (int)((weights
+ bucket
->h
.size
) - weights
));
1442 weights
+= bucket
->h
.size
;
1444 arg
[b
].weight_set
= weight_set
;
1445 arg
[b
].weight_set_size
= num_positions
;
1446 weight_set
+= position
;
1448 memcpy(ids
, bucket
->h
.items
, sizeof(int) * bucket
->h
.size
);
1450 arg
[b
].ids_size
= bucket
->h
.size
;
1451 ids
+= bucket
->h
.size
;
1453 BUG_ON((char*)weight_set_ends
!= (char*)weight_set
);
1454 BUG_ON((char*)weights_end
!= (char*)weights
);
1455 BUG_ON((char*)ids
!= (char*)ids_end
);
1459 void crush_destroy_choose_args(struct crush_choose_arg
*args
)
1464 /***************************/
1466 /* methods to check for safe arithmetic operations */
1468 int crush_addition_is_unsafe(__u32 a
, __u32 b
)
1470 if ((((__u32
)(-1)) - b
) < a
)
1476 int crush_multiplication_is_unsafe(__u32 a
, __u32 b
)
1478 /* prevent division by zero */
1483 if ((((__u32
)(-1)) / b
) < a
)
1489 /***************************/
1491 /* methods to configure crush_map */
1493 void set_legacy_crush_map(struct crush_map
*map
) {
1494 /* initialize legacy tunable values */
1495 map
->choose_local_tries
= 2;
1496 map
->choose_local_fallback_tries
= 5;
1497 map
->choose_total_tries
= 19;
1498 map
->chooseleaf_descend_once
= 0;
1499 map
->chooseleaf_vary_r
= 0;
1500 map
->chooseleaf_stable
= 0;
1501 map
->straw_calc_version
= 0;
1503 // by default, use legacy types, and also exclude tree,
1504 // since it was buggy.
1505 map
->allowed_bucket_algs
= CRUSH_LEGACY_ALLOWED_BUCKET_ALGS
;
1508 void set_optimal_crush_map(struct crush_map
*map
) {
1509 map
->choose_local_tries
= 0;
1510 map
->choose_local_fallback_tries
= 0;
1511 map
->choose_total_tries
= 50;
1512 map
->chooseleaf_descend_once
= 1;
1513 map
->chooseleaf_vary_r
= 1;
1514 map
->chooseleaf_stable
= 1;
1515 map
->allowed_bucket_algs
= (
1516 (1 << CRUSH_BUCKET_UNIFORM
) |
1517 (1 << CRUSH_BUCKET_LIST
) |
1518 (1 << CRUSH_BUCKET_STRAW
) |
1519 (1 << CRUSH_BUCKET_STRAW2
));