]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | #include <string.h> |
7c673cae FG |
2 | #include <math.h> |
3 | #include <stdlib.h> | |
4 | #include <stdio.h> | |
5 | #include <assert.h> | |
6 | #include <errno.h> | |
7 | ||
31f18b77 | 8 | #include "crush/crush.h" |
7c673cae | 9 | #include "builder.h" |
7c673cae FG |
10 | |
11 | #define dprintk(args...) /* printf(args) */ | |
12 | ||
13 | #define BUG_ON(x) assert(!(x)) | |
14 | ||
15 | struct crush_map *crush_create() | |
16 | { | |
17 | struct crush_map *m; | |
18 | m = malloc(sizeof(*m)); | |
19 | if (!m) | |
20 | return NULL; | |
21 | memset(m, 0, sizeof(*m)); | |
22 | ||
23 | set_optimal_crush_map(m); | |
24 | return m; | |
25 | } | |
26 | ||
27 | /* | |
28 | * finalize should be called _after_ all buckets are added to the map. | |
29 | */ | |
30 | void crush_finalize(struct crush_map *map) | |
31 | { | |
32 | int b; | |
33 | __u32 i; | |
34 | ||
35 | /* Calculate the needed working space while we do other | |
36 | finalization tasks. */ | |
37 | map->working_size = sizeof(struct crush_work); | |
38 | /* Space for the array of pointers to per-bucket workspace */ | |
39 | map->working_size += map->max_buckets * | |
40 | sizeof(struct crush_work_bucket *); | |
41 | ||
42 | /* calc max_devices */ | |
43 | map->max_devices = 0; | |
44 | for (b=0; b<map->max_buckets; b++) { | |
45 | if (map->buckets[b] == 0) | |
46 | continue; | |
47 | for (i=0; i<map->buckets[b]->size; i++) | |
48 | if (map->buckets[b]->items[i] >= map->max_devices) | |
49 | map->max_devices = map->buckets[b]->items[i] + 1; | |
50 | ||
51 | switch (map->buckets[b]->alg) { | |
52 | default: | |
53 | /* The base case, permutation variables and | |
54 | the pointer to the permutation array. */ | |
55 | map->working_size += sizeof(struct crush_work_bucket); | |
56 | break; | |
57 | } | |
58 | /* Every bucket has a permutation array. */ | |
59 | map->working_size += map->buckets[b]->size * sizeof(__u32); | |
60 | } | |
61 | } | |
62 | ||
63 | ||
64 | ||
65 | /** rules **/ | |
66 | ||
67 | int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno) | |
68 | { | |
69 | __u32 r; | |
70 | ||
71 | if (ruleno < 0) { | |
72 | for (r=0; r < map->max_rules; r++) | |
73 | if (map->rules[r] == 0) | |
74 | break; | |
75 | assert(r < CRUSH_MAX_RULES); | |
76 | } | |
77 | else | |
78 | r = ruleno; | |
79 | ||
80 | if (r >= map->max_rules) { | |
81 | /* expand array */ | |
82 | int oldsize; | |
83 | void *_realloc = NULL; | |
84 | if (map->max_rules +1 > CRUSH_MAX_RULES) | |
85 | return -ENOSPC; | |
86 | oldsize = map->max_rules; | |
87 | map->max_rules = r+1; | |
88 | if ((_realloc = realloc(map->rules, map->max_rules * sizeof(map->rules[0]))) == NULL) { | |
89 | return -ENOMEM; | |
90 | } else { | |
91 | map->rules = _realloc; | |
92 | } | |
93 | memset(map->rules + oldsize, 0, (map->max_rules-oldsize) * sizeof(map->rules[0])); | |
94 | } | |
95 | ||
96 | /* add it */ | |
97 | map->rules[r] = rule; | |
98 | return r; | |
99 | } | |
100 | ||
101 | struct crush_rule *crush_make_rule(int len, int ruleset, int type, int minsize, int maxsize) | |
102 | { | |
103 | struct crush_rule *rule; | |
104 | rule = malloc(crush_rule_size(len)); | |
105 | if (!rule) | |
106 | return NULL; | |
107 | rule->len = len; | |
108 | rule->mask.ruleset = ruleset; | |
109 | rule->mask.type = type; | |
110 | rule->mask.min_size = minsize; | |
111 | rule->mask.max_size = maxsize; | |
112 | return rule; | |
113 | } | |
114 | ||
115 | /* | |
116 | * be careful; this doesn't verify that the buffer you allocated is big enough! | |
117 | */ | |
118 | void crush_rule_set_step(struct crush_rule *rule, int n, int op, int arg1, int arg2) | |
119 | { | |
120 | assert((__u32)n < rule->len); | |
121 | rule->steps[n].op = op; | |
122 | rule->steps[n].arg1 = arg1; | |
123 | rule->steps[n].arg2 = arg2; | |
124 | } | |
125 | ||
126 | ||
127 | /** buckets **/ | |
128 | int crush_get_next_bucket_id(struct crush_map *map) | |
129 | { | |
130 | int pos; | |
131 | for (pos=0; pos < map->max_buckets; pos++) | |
132 | if (map->buckets[pos] == 0) | |
133 | break; | |
134 | return -1 - pos; | |
135 | } | |
136 | ||
137 | ||
138 | int crush_add_bucket(struct crush_map *map, | |
139 | int id, | |
140 | struct crush_bucket *bucket, | |
141 | int *idout) | |
142 | { | |
143 | int pos; | |
144 | ||
145 | /* find a bucket id */ | |
146 | if (id == 0) | |
147 | id = crush_get_next_bucket_id(map); | |
148 | pos = -1 - id; | |
149 | ||
150 | while (pos >= map->max_buckets) { | |
151 | /* expand array */ | |
152 | int oldsize = map->max_buckets; | |
153 | if (map->max_buckets) | |
154 | map->max_buckets *= 2; | |
155 | else | |
156 | map->max_buckets = 8; | |
157 | void *_realloc = NULL; | |
158 | if ((_realloc = realloc(map->buckets, map->max_buckets * sizeof(map->buckets[0]))) == NULL) { | |
159 | return -ENOMEM; | |
160 | } else { | |
161 | map->buckets = _realloc; | |
162 | } | |
163 | memset(map->buckets + oldsize, 0, (map->max_buckets-oldsize) * sizeof(map->buckets[0])); | |
164 | } | |
165 | ||
166 | if (map->buckets[pos] != 0) { | |
167 | return -EEXIST; | |
168 | } | |
169 | ||
170 | /* add it */ | |
171 | bucket->id = id; | |
172 | map->buckets[pos] = bucket; | |
173 | ||
174 | if (idout) *idout = id; | |
175 | return 0; | |
176 | } | |
177 | ||
178 | int crush_remove_bucket(struct crush_map *map, struct crush_bucket *bucket) | |
179 | { | |
180 | int pos = -1 - bucket->id; | |
181 | assert(pos < map->max_buckets); | |
182 | map->buckets[pos] = NULL; | |
183 | crush_destroy_bucket(bucket); | |
184 | return 0; | |
185 | } | |
186 | ||
187 | ||
188 | /* uniform bucket */ | |
189 | ||
190 | struct crush_bucket_uniform * | |
191 | crush_make_uniform_bucket(int hash, int type, int size, | |
192 | int *items, | |
193 | int item_weight) | |
194 | { | |
195 | int i; | |
196 | struct crush_bucket_uniform *bucket; | |
197 | ||
198 | bucket = malloc(sizeof(*bucket)); | |
199 | if (!bucket) | |
200 | return NULL; | |
201 | memset(bucket, 0, sizeof(*bucket)); | |
202 | bucket->h.alg = CRUSH_BUCKET_UNIFORM; | |
203 | bucket->h.hash = hash; | |
204 | bucket->h.type = type; | |
205 | bucket->h.size = size; | |
206 | ||
207 | if (crush_multiplication_is_unsafe(size, item_weight)) | |
208 | goto err; | |
209 | ||
210 | bucket->h.weight = size * item_weight; | |
211 | bucket->item_weight = item_weight; | |
f67539c2 TL |
212 | |
213 | if (size == 0) { | |
214 | return bucket; | |
215 | } | |
7c673cae FG |
216 | bucket->h.items = malloc(sizeof(__s32)*size); |
217 | ||
218 | if (!bucket->h.items) | |
219 | goto err; | |
220 | ||
221 | for (i=0; i<size; i++) | |
222 | bucket->h.items[i] = items[i]; | |
223 | ||
224 | return bucket; | |
225 | err: | |
226 | free(bucket->h.items); | |
227 | free(bucket); | |
228 | return NULL; | |
229 | } | |
230 | ||
231 | ||
232 | /* list bucket */ | |
233 | ||
234 | struct crush_bucket_list* | |
235 | crush_make_list_bucket(int hash, int type, int size, | |
236 | int *items, | |
237 | int *weights) | |
238 | { | |
239 | int i; | |
240 | int w; | |
241 | struct crush_bucket_list *bucket; | |
242 | ||
243 | bucket = malloc(sizeof(*bucket)); | |
244 | if (!bucket) | |
245 | return NULL; | |
246 | memset(bucket, 0, sizeof(*bucket)); | |
247 | bucket->h.alg = CRUSH_BUCKET_LIST; | |
248 | bucket->h.hash = hash; | |
249 | bucket->h.type = type; | |
250 | bucket->h.size = size; | |
251 | ||
f67539c2 TL |
252 | if (size == 0) { |
253 | return bucket; | |
254 | } | |
255 | ||
7c673cae FG |
256 | bucket->h.items = malloc(sizeof(__s32)*size); |
257 | if (!bucket->h.items) | |
258 | goto err; | |
259 | ||
260 | ||
261 | bucket->item_weights = malloc(sizeof(__u32)*size); | |
262 | if (!bucket->item_weights) | |
263 | goto err; | |
264 | bucket->sum_weights = malloc(sizeof(__u32)*size); | |
265 | if (!bucket->sum_weights) | |
266 | goto err; | |
267 | w = 0; | |
268 | for (i=0; i<size; i++) { | |
269 | bucket->h.items[i] = items[i]; | |
270 | bucket->item_weights[i] = weights[i]; | |
271 | ||
272 | if (crush_addition_is_unsafe(w, weights[i])) | |
273 | goto err; | |
274 | ||
275 | w += weights[i]; | |
276 | bucket->sum_weights[i] = w; | |
277 | /*dprintk("pos %d item %d weight %d sum %d\n", | |
278 | i, items[i], weights[i], bucket->sum_weights[i]);*/ | |
279 | } | |
280 | ||
281 | bucket->h.weight = w; | |
282 | ||
283 | return bucket; | |
284 | err: | |
285 | free(bucket->sum_weights); | |
286 | free(bucket->item_weights); | |
287 | free(bucket->h.items); | |
288 | free(bucket); | |
289 | return NULL; | |
290 | } | |
291 | ||
292 | ||
293 | /* tree bucket */ | |
294 | ||
295 | static int height(int n) { | |
296 | int h = 0; | |
297 | while ((n & 1) == 0) { | |
298 | h++; | |
299 | n = n >> 1; | |
300 | } | |
301 | return h; | |
302 | } | |
303 | static int on_right(int n, int h) { | |
304 | return n & (1 << (h+1)); | |
305 | } | |
306 | static int parent(int n) | |
307 | { | |
308 | int h = height(n); | |
309 | if (on_right(n, h)) | |
310 | return n - (1<<h); | |
311 | else | |
312 | return n + (1<<h); | |
313 | } | |
314 | ||
315 | static int calc_depth(int size) | |
316 | { | |
317 | if (size == 0) { | |
318 | return 0; | |
319 | } | |
320 | ||
321 | int depth = 1; | |
322 | int t = size - 1; | |
323 | while (t) { | |
324 | t = t >> 1; | |
325 | depth++; | |
326 | } | |
327 | return depth; | |
328 | } | |
329 | ||
330 | struct crush_bucket_tree* | |
331 | crush_make_tree_bucket(int hash, int type, int size, | |
332 | int *items, /* in leaf order */ | |
333 | int *weights) | |
334 | { | |
335 | struct crush_bucket_tree *bucket; | |
336 | int depth; | |
337 | int node; | |
338 | int i, j; | |
339 | ||
340 | bucket = malloc(sizeof(*bucket)); | |
341 | if (!bucket) | |
342 | return NULL; | |
343 | memset(bucket, 0, sizeof(*bucket)); | |
344 | bucket->h.alg = CRUSH_BUCKET_TREE; | |
345 | bucket->h.hash = hash; | |
346 | bucket->h.type = type; | |
347 | bucket->h.size = size; | |
348 | ||
349 | if (size == 0) { | |
7c673cae FG |
350 | /* printf("size 0 depth 0 nodes 0\n"); */ |
351 | return bucket; | |
352 | } | |
353 | ||
354 | bucket->h.items = malloc(sizeof(__s32)*size); | |
355 | if (!bucket->h.items) | |
356 | goto err; | |
357 | ||
358 | /* calc tree depth */ | |
359 | depth = calc_depth(size); | |
360 | bucket->num_nodes = 1 << depth; | |
361 | dprintk("size %d depth %d nodes %d\n", size, depth, bucket->num_nodes); | |
362 | ||
363 | bucket->node_weights = malloc(sizeof(__u32)*bucket->num_nodes); | |
364 | if (!bucket->node_weights) | |
365 | goto err; | |
366 | ||
367 | memset(bucket->h.items, 0, sizeof(__s32)*bucket->h.size); | |
368 | memset(bucket->node_weights, 0, sizeof(__u32)*bucket->num_nodes); | |
369 | ||
370 | for (i=0; i<size; i++) { | |
371 | bucket->h.items[i] = items[i]; | |
372 | node = crush_calc_tree_node(i); | |
373 | dprintk("item %d node %d weight %d\n", i, node, weights[i]); | |
374 | bucket->node_weights[node] = weights[i]; | |
375 | ||
376 | if (crush_addition_is_unsafe(bucket->h.weight, weights[i])) | |
377 | goto err; | |
378 | ||
379 | bucket->h.weight += weights[i]; | |
380 | for (j=1; j<depth; j++) { | |
381 | node = parent(node); | |
382 | ||
383 | if (crush_addition_is_unsafe(bucket->node_weights[node], weights[i])) | |
384 | goto err; | |
385 | ||
386 | bucket->node_weights[node] += weights[i]; | |
387 | dprintk(" node %d weight %d\n", node, bucket->node_weights[node]); | |
388 | } | |
389 | } | |
390 | BUG_ON(bucket->node_weights[bucket->num_nodes/2] != bucket->h.weight); | |
391 | ||
392 | return bucket; | |
393 | err: | |
394 | free(bucket->node_weights); | |
395 | free(bucket->h.items); | |
396 | free(bucket); | |
397 | return NULL; | |
398 | } | |
399 | ||
400 | ||
401 | ||
402 | /* straw bucket */ | |
403 | ||
404 | /* | |
405 | * this code was written 8 years ago. i have a vague recollection of | |
406 | * drawing boxes underneath bars of different lengths, where the bar | |
407 | * length represented the probability/weight, and that there was some | |
408 | * trial and error involved in arriving at this implementation. | |
409 | * however, reading the code now after all this time, the intuition | |
410 | * that motivated is lost on me. lame. my only excuse is that I now | |
411 | * know that the approach is fundamentally flawed and am not | |
412 | * particularly motivated to reconstruct the flawed reasoning. | |
413 | * | |
414 | * as best as i can remember, the idea is: sort the weights, and start | |
415 | * with the smallest. arbitrarily scale it at 1.0 (16-bit fixed | |
416 | * point). look at the next larger weight, and calculate the scaling | |
417 | * factor for that straw based on the relative difference in weight so | |
418 | * far. what's not clear to me now is why we are looking at wnext | |
419 | * (the delta to the next bigger weight) for all remaining weights, | |
420 | * and slicing things horizontally instead of considering just the | |
421 | * next item or set of items. or why pow() is used the way it is. | |
422 | * | |
423 | * note that the original version 1 of this function made special | |
11fdf7f2 | 424 | * accommodation for the case where straw lengths were identical. this |
7c673cae FG |
425 | * is also flawed in a non-obvious way; version 2 drops the special |
426 | * handling and appears to work just as well. | |
427 | * | |
428 | * moral of the story: if you do something clever, write down why it | |
429 | * works. | |
430 | */ | |
431 | int crush_calc_straw(struct crush_map *map, struct crush_bucket_straw *bucket) | |
432 | { | |
433 | int *reverse; | |
434 | int i, j, k; | |
435 | double straw, wbelow, lastw, wnext, pbelow; | |
436 | int numleft; | |
437 | int size = bucket->h.size; | |
438 | __u32 *weights = bucket->item_weights; | |
439 | ||
440 | /* reverse sort by weight (simple insertion sort) */ | |
441 | reverse = malloc(sizeof(int) * size); | |
442 | if (!reverse) | |
443 | return -ENOMEM; | |
444 | if (size) | |
445 | reverse[0] = 0; | |
446 | for (i=1; i<size; i++) { | |
447 | for (j=0; j<i; j++) { | |
448 | if (weights[i] < weights[reverse[j]]) { | |
449 | /* insert here */ | |
450 | for (k=i; k>j; k--) | |
451 | reverse[k] = reverse[k-1]; | |
452 | reverse[j] = i; | |
453 | break; | |
454 | } | |
455 | } | |
456 | if (j == i) | |
457 | reverse[i] = i; | |
458 | } | |
459 | ||
460 | numleft = size; | |
461 | straw = 1.0; | |
462 | wbelow = 0; | |
463 | lastw = 0; | |
464 | ||
465 | i=0; | |
466 | while (i < size) { | |
467 | if (map->straw_calc_version == 0) { | |
468 | /* zero weight items get 0 length straws! */ | |
469 | if (weights[reverse[i]] == 0) { | |
470 | bucket->straws[reverse[i]] = 0; | |
471 | i++; | |
472 | continue; | |
473 | } | |
474 | ||
475 | /* set this item's straw */ | |
476 | bucket->straws[reverse[i]] = straw * 0x10000; | |
477 | dprintk("item %d at %d weight %d straw %d (%lf)\n", | |
478 | bucket->h.items[reverse[i]], | |
479 | reverse[i], weights[reverse[i]], | |
480 | bucket->straws[reverse[i]], straw); | |
481 | i++; | |
482 | if (i == size) | |
483 | break; | |
484 | ||
485 | /* same weight as previous? */ | |
486 | if (weights[reverse[i]] == weights[reverse[i-1]]) { | |
487 | dprintk("same as previous\n"); | |
488 | continue; | |
489 | } | |
490 | ||
491 | /* adjust straw for next guy */ | |
492 | wbelow += ((double)weights[reverse[i-1]] - lastw) * | |
493 | numleft; | |
494 | for (j=i; j<size; j++) | |
495 | if (weights[reverse[j]] == weights[reverse[i]]) | |
496 | numleft--; | |
497 | else | |
498 | break; | |
499 | wnext = numleft * (weights[reverse[i]] - | |
500 | weights[reverse[i-1]]); | |
501 | pbelow = wbelow / (wbelow + wnext); | |
502 | dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n", | |
503 | wbelow, wnext, pbelow, numleft); | |
504 | ||
505 | straw *= pow((double)1.0 / pbelow, (double)1.0 / | |
506 | (double)numleft); | |
507 | ||
508 | lastw = weights[reverse[i-1]]; | |
509 | } else if (map->straw_calc_version >= 1) { | |
510 | /* zero weight items get 0 length straws! */ | |
511 | if (weights[reverse[i]] == 0) { | |
512 | bucket->straws[reverse[i]] = 0; | |
513 | i++; | |
514 | numleft--; | |
515 | continue; | |
516 | } | |
517 | ||
518 | /* set this item's straw */ | |
519 | bucket->straws[reverse[i]] = straw * 0x10000; | |
520 | dprintk("item %d at %d weight %d straw %d (%lf)\n", | |
521 | bucket->h.items[reverse[i]], | |
522 | reverse[i], weights[reverse[i]], | |
523 | bucket->straws[reverse[i]], straw); | |
524 | i++; | |
525 | if (i == size) | |
526 | break; | |
527 | ||
528 | /* adjust straw for next guy */ | |
529 | wbelow += ((double)weights[reverse[i-1]] - lastw) * | |
530 | numleft; | |
531 | numleft--; | |
532 | wnext = numleft * (weights[reverse[i]] - | |
533 | weights[reverse[i-1]]); | |
534 | pbelow = wbelow / (wbelow + wnext); | |
535 | dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n", | |
536 | wbelow, wnext, pbelow, numleft); | |
537 | ||
538 | straw *= pow((double)1.0 / pbelow, (double)1.0 / | |
539 | (double)numleft); | |
540 | ||
541 | lastw = weights[reverse[i-1]]; | |
542 | } | |
543 | } | |
544 | ||
545 | free(reverse); | |
546 | return 0; | |
547 | } | |
548 | ||
549 | struct crush_bucket_straw * | |
550 | crush_make_straw_bucket(struct crush_map *map, | |
551 | int hash, | |
552 | int type, | |
553 | int size, | |
554 | int *items, | |
555 | int *weights) | |
556 | { | |
557 | struct crush_bucket_straw *bucket; | |
558 | int i; | |
559 | ||
560 | bucket = malloc(sizeof(*bucket)); | |
561 | if (!bucket) | |
562 | return NULL; | |
563 | memset(bucket, 0, sizeof(*bucket)); | |
564 | bucket->h.alg = CRUSH_BUCKET_STRAW; | |
565 | bucket->h.hash = hash; | |
566 | bucket->h.type = type; | |
567 | bucket->h.size = size; | |
568 | ||
569 | bucket->h.items = malloc(sizeof(__s32)*size); | |
570 | if (!bucket->h.items) | |
571 | goto err; | |
572 | bucket->item_weights = malloc(sizeof(__u32)*size); | |
573 | if (!bucket->item_weights) | |
574 | goto err; | |
575 | bucket->straws = malloc(sizeof(__u32)*size); | |
576 | if (!bucket->straws) | |
577 | goto err; | |
578 | ||
7c673cae FG |
579 | for (i=0; i<size; i++) { |
580 | bucket->h.items[i] = items[i]; | |
581 | bucket->h.weight += weights[i]; | |
582 | bucket->item_weights[i] = weights[i]; | |
583 | } | |
584 | ||
585 | if (crush_calc_straw(map, bucket) < 0) | |
586 | goto err; | |
587 | ||
588 | return bucket; | |
589 | err: | |
590 | free(bucket->straws); | |
591 | free(bucket->item_weights); | |
592 | free(bucket->h.items); | |
593 | free(bucket); | |
594 | return NULL; | |
595 | } | |
596 | ||
597 | struct crush_bucket_straw2 * | |
598 | crush_make_straw2_bucket(struct crush_map *map, | |
599 | int hash, | |
600 | int type, | |
601 | int size, | |
602 | int *items, | |
603 | int *weights) | |
604 | { | |
605 | struct crush_bucket_straw2 *bucket; | |
606 | int i; | |
607 | ||
608 | bucket = malloc(sizeof(*bucket)); | |
609 | if (!bucket) | |
610 | return NULL; | |
611 | memset(bucket, 0, sizeof(*bucket)); | |
612 | bucket->h.alg = CRUSH_BUCKET_STRAW2; | |
613 | bucket->h.hash = hash; | |
614 | bucket->h.type = type; | |
615 | bucket->h.size = size; | |
616 | ||
f67539c2 TL |
617 | if (size == 0) { |
618 | return bucket; | |
619 | } | |
620 | ||
7c673cae FG |
621 | bucket->h.items = malloc(sizeof(__s32)*size); |
622 | if (!bucket->h.items) | |
623 | goto err; | |
624 | bucket->item_weights = malloc(sizeof(__u32)*size); | |
625 | if (!bucket->item_weights) | |
626 | goto err; | |
627 | ||
7c673cae FG |
628 | for (i=0; i<size; i++) { |
629 | bucket->h.items[i] = items[i]; | |
630 | bucket->h.weight += weights[i]; | |
631 | bucket->item_weights[i] = weights[i]; | |
632 | } | |
633 | ||
634 | return bucket; | |
635 | err: | |
636 | free(bucket->item_weights); | |
637 | free(bucket->h.items); | |
638 | free(bucket); | |
639 | return NULL; | |
640 | } | |
641 | ||
642 | ||
643 | ||
644 | struct crush_bucket* | |
645 | crush_make_bucket(struct crush_map *map, | |
646 | int alg, int hash, int type, int size, | |
647 | int *items, | |
648 | int *weights) | |
649 | { | |
650 | int item_weight; | |
651 | ||
652 | switch (alg) { | |
653 | case CRUSH_BUCKET_UNIFORM: | |
654 | if (size && weights) | |
655 | item_weight = weights[0]; | |
656 | else | |
657 | item_weight = 0; | |
658 | return (struct crush_bucket *)crush_make_uniform_bucket(hash, type, size, items, item_weight); | |
659 | ||
660 | case CRUSH_BUCKET_LIST: | |
661 | return (struct crush_bucket *)crush_make_list_bucket(hash, type, size, items, weights); | |
662 | ||
663 | case CRUSH_BUCKET_TREE: | |
664 | return (struct crush_bucket *)crush_make_tree_bucket(hash, type, size, items, weights); | |
665 | ||
666 | case CRUSH_BUCKET_STRAW: | |
667 | return (struct crush_bucket *)crush_make_straw_bucket(map, hash, type, size, items, weights); | |
668 | case CRUSH_BUCKET_STRAW2: | |
669 | return (struct crush_bucket *)crush_make_straw2_bucket(map, hash, type, size, items, weights); | |
670 | } | |
671 | return 0; | |
672 | } | |
673 | ||
674 | ||
675 | /************************************************/ | |
676 | ||
677 | int crush_add_uniform_bucket_item(struct crush_bucket_uniform *bucket, int item, int weight) | |
678 | { | |
679 | int newsize = bucket->h.size + 1; | |
680 | void *_realloc = NULL; | |
681 | ||
682 | /* In such situation 'CRUSH_BUCKET_UNIFORM', the weight | |
683 | provided for the item should be the same as | |
684 | bucket->item_weight defined with 'crush_make_bucket'. This | |
685 | assumption is enforced by the return value which is always | |
686 | 0. */ | |
687 | if (bucket->item_weight != weight) { | |
688 | return -EINVAL; | |
689 | } | |
690 | ||
691 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
692 | return -ENOMEM; | |
693 | } else { | |
694 | bucket->h.items = _realloc; | |
695 | } | |
696 | ||
697 | bucket->h.items[newsize-1] = item; | |
698 | ||
699 | if (crush_addition_is_unsafe(bucket->h.weight, weight)) | |
700 | return -ERANGE; | |
701 | ||
702 | bucket->h.weight += weight; | |
703 | bucket->h.size++; | |
704 | ||
705 | return 0; | |
706 | } | |
707 | ||
708 | int crush_add_list_bucket_item(struct crush_bucket_list *bucket, int item, int weight) | |
709 | { | |
710 | int newsize = bucket->h.size + 1; | |
711 | void *_realloc = NULL; | |
712 | ||
713 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
714 | return -ENOMEM; | |
715 | } else { | |
716 | bucket->h.items = _realloc; | |
717 | } | |
718 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
719 | return -ENOMEM; | |
720 | } else { | |
721 | bucket->item_weights = _realloc; | |
722 | } | |
723 | if ((_realloc = realloc(bucket->sum_weights, sizeof(__u32)*newsize)) == NULL) { | |
724 | return -ENOMEM; | |
725 | } else { | |
726 | bucket->sum_weights = _realloc; | |
727 | } | |
728 | ||
729 | bucket->h.items[newsize-1] = item; | |
730 | bucket->item_weights[newsize-1] = weight; | |
731 | if (newsize > 1) { | |
732 | ||
733 | if (crush_addition_is_unsafe(bucket->sum_weights[newsize-2], weight)) | |
734 | return -ERANGE; | |
735 | ||
736 | bucket->sum_weights[newsize-1] = bucket->sum_weights[newsize-2] + weight; | |
737 | } | |
738 | ||
739 | else { | |
740 | bucket->sum_weights[newsize-1] = weight; | |
741 | } | |
742 | ||
743 | bucket->h.weight += weight; | |
744 | bucket->h.size++; | |
745 | return 0; | |
746 | } | |
747 | ||
748 | int crush_add_tree_bucket_item(struct crush_bucket_tree *bucket, int item, int weight) | |
749 | { | |
750 | int newsize = bucket->h.size + 1; | |
751 | int depth = calc_depth(newsize);; | |
752 | int node; | |
753 | int j; | |
754 | void *_realloc = NULL; | |
755 | ||
756 | bucket->num_nodes = 1 << depth; | |
757 | ||
758 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
759 | return -ENOMEM; | |
760 | } else { | |
761 | bucket->h.items = _realloc; | |
762 | } | |
763 | if ((_realloc = realloc(bucket->node_weights, sizeof(__u32)*bucket->num_nodes)) == NULL) { | |
764 | return -ENOMEM; | |
765 | } else { | |
766 | bucket->node_weights = _realloc; | |
767 | } | |
768 | ||
769 | node = crush_calc_tree_node(newsize-1); | |
770 | bucket->node_weights[node] = weight; | |
771 | ||
772 | /* if the depth increase, we need to initialize the new root node's weight before add bucket item */ | |
773 | int root = bucket->num_nodes/2; | |
774 | if (depth >= 2 && (node - 1) == root) { | |
775 | /* if the new item is the first node in right sub tree, so | |
776 | * the root node initial weight is left sub tree's weight | |
777 | */ | |
778 | bucket->node_weights[root] = bucket->node_weights[root/2]; | |
779 | } | |
780 | ||
781 | for (j=1; j<depth; j++) { | |
782 | node = parent(node); | |
783 | ||
784 | if (crush_addition_is_unsafe(bucket->node_weights[node], weight)) | |
785 | return -ERANGE; | |
786 | ||
787 | bucket->node_weights[node] += weight; | |
788 | dprintk(" node %d weight %d\n", node, bucket->node_weights[node]); | |
789 | } | |
790 | ||
791 | ||
792 | if (crush_addition_is_unsafe(bucket->h.weight, weight)) | |
793 | return -ERANGE; | |
794 | ||
795 | bucket->h.items[newsize-1] = item; | |
796 | bucket->h.weight += weight; | |
797 | bucket->h.size++; | |
798 | ||
799 | return 0; | |
800 | } | |
801 | ||
802 | int crush_add_straw_bucket_item(struct crush_map *map, | |
803 | struct crush_bucket_straw *bucket, | |
804 | int item, int weight) | |
805 | { | |
806 | int newsize = bucket->h.size + 1; | |
807 | ||
808 | void *_realloc = NULL; | |
809 | ||
810 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
811 | return -ENOMEM; | |
812 | } else { | |
813 | bucket->h.items = _realloc; | |
814 | } | |
815 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
816 | return -ENOMEM; | |
817 | } else { | |
818 | bucket->item_weights = _realloc; | |
819 | } | |
820 | if ((_realloc = realloc(bucket->straws, sizeof(__u32)*newsize)) == NULL) { | |
821 | return -ENOMEM; | |
822 | } else { | |
823 | bucket->straws = _realloc; | |
824 | } | |
825 | ||
826 | bucket->h.items[newsize-1] = item; | |
827 | bucket->item_weights[newsize-1] = weight; | |
828 | ||
829 | if (crush_addition_is_unsafe(bucket->h.weight, weight)) | |
830 | return -ERANGE; | |
831 | ||
832 | bucket->h.weight += weight; | |
833 | bucket->h.size++; | |
834 | ||
835 | return crush_calc_straw(map, bucket); | |
836 | } | |
837 | ||
838 | int crush_add_straw2_bucket_item(struct crush_map *map, | |
839 | struct crush_bucket_straw2 *bucket, | |
840 | int item, int weight) | |
841 | { | |
842 | int newsize = bucket->h.size + 1; | |
843 | ||
844 | void *_realloc = NULL; | |
845 | ||
846 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
847 | return -ENOMEM; | |
848 | } else { | |
849 | bucket->h.items = _realloc; | |
850 | } | |
851 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
852 | return -ENOMEM; | |
853 | } else { | |
854 | bucket->item_weights = _realloc; | |
855 | } | |
856 | ||
857 | bucket->h.items[newsize-1] = item; | |
858 | bucket->item_weights[newsize-1] = weight; | |
859 | ||
860 | if (crush_addition_is_unsafe(bucket->h.weight, weight)) | |
861 | return -ERANGE; | |
862 | ||
863 | bucket->h.weight += weight; | |
864 | bucket->h.size++; | |
865 | ||
866 | return 0; | |
867 | } | |
868 | ||
869 | int crush_bucket_add_item(struct crush_map *map, | |
870 | struct crush_bucket *b, int item, int weight) | |
871 | { | |
872 | switch (b->alg) { | |
873 | case CRUSH_BUCKET_UNIFORM: | |
874 | return crush_add_uniform_bucket_item((struct crush_bucket_uniform *)b, item, weight); | |
875 | case CRUSH_BUCKET_LIST: | |
876 | return crush_add_list_bucket_item((struct crush_bucket_list *)b, item, weight); | |
877 | case CRUSH_BUCKET_TREE: | |
878 | return crush_add_tree_bucket_item((struct crush_bucket_tree *)b, item, weight); | |
879 | case CRUSH_BUCKET_STRAW: | |
880 | return crush_add_straw_bucket_item(map, (struct crush_bucket_straw *)b, item, weight); | |
881 | case CRUSH_BUCKET_STRAW2: | |
882 | return crush_add_straw2_bucket_item(map, (struct crush_bucket_straw2 *)b, item, weight); | |
883 | default: | |
884 | return -1; | |
885 | } | |
886 | } | |
887 | ||
888 | /************************************************/ | |
889 | ||
890 | int crush_remove_uniform_bucket_item(struct crush_bucket_uniform *bucket, int item) | |
891 | { | |
892 | unsigned i, j; | |
893 | int newsize; | |
894 | void *_realloc = NULL; | |
895 | ||
896 | for (i = 0; i < bucket->h.size; i++) | |
897 | if (bucket->h.items[i] == item) | |
898 | break; | |
899 | if (i == bucket->h.size) | |
900 | return -ENOENT; | |
901 | ||
902 | for (j = i; j < bucket->h.size; j++) | |
903 | bucket->h.items[j] = bucket->h.items[j+1]; | |
904 | newsize = --bucket->h.size; | |
905 | if (bucket->item_weight < bucket->h.weight) | |
906 | bucket->h.weight -= bucket->item_weight; | |
907 | else | |
908 | bucket->h.weight = 0; | |
909 | ||
910 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
911 | return -ENOMEM; | |
912 | } else { | |
913 | bucket->h.items = _realloc; | |
914 | } | |
915 | return 0; | |
916 | } | |
917 | ||
918 | int crush_remove_list_bucket_item(struct crush_bucket_list *bucket, int item) | |
919 | { | |
920 | unsigned i, j; | |
921 | int newsize; | |
922 | unsigned weight; | |
923 | ||
924 | for (i = 0; i < bucket->h.size; i++) | |
925 | if (bucket->h.items[i] == item) | |
926 | break; | |
927 | if (i == bucket->h.size) | |
928 | return -ENOENT; | |
929 | ||
930 | weight = bucket->item_weights[i]; | |
931 | for (j = i; j < bucket->h.size; j++) { | |
932 | bucket->h.items[j] = bucket->h.items[j+1]; | |
933 | bucket->item_weights[j] = bucket->item_weights[j+1]; | |
934 | bucket->sum_weights[j] = bucket->sum_weights[j+1] - weight; | |
935 | } | |
936 | if (weight < bucket->h.weight) | |
937 | bucket->h.weight -= weight; | |
938 | else | |
939 | bucket->h.weight = 0; | |
940 | newsize = --bucket->h.size; | |
941 | ||
942 | void *_realloc = NULL; | |
943 | ||
944 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
945 | return -ENOMEM; | |
946 | } else { | |
947 | bucket->h.items = _realloc; | |
948 | } | |
949 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
950 | return -ENOMEM; | |
951 | } else { | |
952 | bucket->item_weights = _realloc; | |
953 | } | |
954 | if ((_realloc = realloc(bucket->sum_weights, sizeof(__u32)*newsize)) == NULL) { | |
955 | return -ENOMEM; | |
956 | } else { | |
957 | bucket->sum_weights = _realloc; | |
958 | } | |
959 | return 0; | |
960 | } | |
961 | ||
962 | int crush_remove_tree_bucket_item(struct crush_bucket_tree *bucket, int item) | |
963 | { | |
964 | unsigned i; | |
965 | unsigned newsize; | |
966 | ||
967 | for (i = 0; i < bucket->h.size; i++) { | |
968 | int node; | |
969 | unsigned weight; | |
970 | int j; | |
971 | int depth = calc_depth(bucket->h.size); | |
972 | ||
973 | if (bucket->h.items[i] != item) | |
974 | continue; | |
975 | ||
976 | bucket->h.items[i] = 0; | |
977 | node = crush_calc_tree_node(i); | |
978 | weight = bucket->node_weights[node]; | |
979 | bucket->node_weights[node] = 0; | |
980 | ||
981 | for (j = 1; j < depth; j++) { | |
982 | node = parent(node); | |
983 | bucket->node_weights[node] -= weight; | |
984 | dprintk(" node %d weight %d\n", node, bucket->node_weights[node]); | |
985 | } | |
986 | if (weight < bucket->h.weight) | |
987 | bucket->h.weight -= weight; | |
988 | else | |
989 | bucket->h.weight = 0; | |
990 | break; | |
991 | } | |
992 | if (i == bucket->h.size) | |
993 | return -ENOENT; | |
994 | ||
995 | newsize = bucket->h.size; | |
996 | while (newsize > 0) { | |
997 | int node = crush_calc_tree_node(newsize - 1); | |
998 | if (bucket->node_weights[node]) | |
999 | break; | |
1000 | --newsize; | |
1001 | } | |
1002 | ||
1003 | if (newsize != bucket->h.size) { | |
1004 | int olddepth, newdepth; | |
1005 | ||
1006 | void *_realloc = NULL; | |
1007 | ||
1008 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
1009 | return -ENOMEM; | |
1010 | } else { | |
1011 | bucket->h.items = _realloc; | |
1012 | } | |
1013 | ||
1014 | olddepth = calc_depth(bucket->h.size); | |
1015 | newdepth = calc_depth(newsize); | |
1016 | if (olddepth != newdepth) { | |
1017 | bucket->num_nodes = 1 << newdepth; | |
1018 | if ((_realloc = realloc(bucket->node_weights, | |
1019 | sizeof(__u32)*bucket->num_nodes)) == NULL) { | |
1020 | return -ENOMEM; | |
1021 | } else { | |
1022 | bucket->node_weights = _realloc; | |
1023 | } | |
1024 | } | |
1025 | ||
1026 | bucket->h.size = newsize; | |
1027 | } | |
1028 | return 0; | |
1029 | } | |
1030 | ||
1031 | int crush_remove_straw_bucket_item(struct crush_map *map, | |
1032 | struct crush_bucket_straw *bucket, int item) | |
1033 | { | |
1034 | int newsize = bucket->h.size - 1; | |
1035 | unsigned i, j; | |
1036 | ||
1037 | for (i = 0; i < bucket->h.size; i++) { | |
1038 | if (bucket->h.items[i] == item) { | |
7c673cae FG |
1039 | if (bucket->item_weights[i] < bucket->h.weight) |
1040 | bucket->h.weight -= bucket->item_weights[i]; | |
1041 | else | |
1042 | bucket->h.weight = 0; | |
35e4c445 | 1043 | for (j = i; j < bucket->h.size - 1; j++) { |
7c673cae FG |
1044 | bucket->h.items[j] = bucket->h.items[j+1]; |
1045 | bucket->item_weights[j] = bucket->item_weights[j+1]; | |
1046 | } | |
1047 | break; | |
1048 | } | |
1049 | } | |
1050 | if (i == bucket->h.size) | |
1051 | return -ENOENT; | |
35e4c445 FG |
1052 | bucket->h.size--; |
1053 | if (bucket->h.size == 0) { | |
1054 | /* don't bother reallocating */ | |
1055 | return 0; | |
1056 | } | |
7c673cae FG |
1057 | void *_realloc = NULL; |
1058 | ||
1059 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
1060 | return -ENOMEM; | |
1061 | } else { | |
1062 | bucket->h.items = _realloc; | |
1063 | } | |
1064 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
1065 | return -ENOMEM; | |
1066 | } else { | |
1067 | bucket->item_weights = _realloc; | |
1068 | } | |
1069 | if ((_realloc = realloc(bucket->straws, sizeof(__u32)*newsize)) == NULL) { | |
1070 | return -ENOMEM; | |
1071 | } else { | |
1072 | bucket->straws = _realloc; | |
1073 | } | |
1074 | ||
1075 | return crush_calc_straw(map, bucket); | |
1076 | } | |
1077 | ||
1078 | int crush_remove_straw2_bucket_item(struct crush_map *map, | |
1079 | struct crush_bucket_straw2 *bucket, int item) | |
1080 | { | |
1081 | int newsize = bucket->h.size - 1; | |
1082 | unsigned i, j; | |
1083 | ||
1084 | for (i = 0; i < bucket->h.size; i++) { | |
1085 | if (bucket->h.items[i] == item) { | |
7c673cae FG |
1086 | if (bucket->item_weights[i] < bucket->h.weight) |
1087 | bucket->h.weight -= bucket->item_weights[i]; | |
1088 | else | |
1089 | bucket->h.weight = 0; | |
35e4c445 | 1090 | for (j = i; j < bucket->h.size - 1; j++) { |
7c673cae FG |
1091 | bucket->h.items[j] = bucket->h.items[j+1]; |
1092 | bucket->item_weights[j] = bucket->item_weights[j+1]; | |
1093 | } | |
1094 | break; | |
1095 | } | |
1096 | } | |
1097 | if (i == bucket->h.size) | |
1098 | return -ENOENT; | |
1099 | ||
35e4c445 FG |
1100 | bucket->h.size--; |
1101 | if (!newsize) { | |
1102 | /* don't bother reallocating a 0-length array. */ | |
1103 | return 0; | |
1104 | } | |
1105 | ||
7c673cae FG |
1106 | void *_realloc = NULL; |
1107 | ||
1108 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
1109 | return -ENOMEM; | |
1110 | } else { | |
1111 | bucket->h.items = _realloc; | |
1112 | } | |
1113 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
1114 | return -ENOMEM; | |
1115 | } else { | |
1116 | bucket->item_weights = _realloc; | |
1117 | } | |
1118 | ||
1119 | return 0; | |
1120 | } | |
1121 | ||
1122 | int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *b, int item) | |
1123 | { | |
1124 | switch (b->alg) { | |
1125 | case CRUSH_BUCKET_UNIFORM: | |
1126 | return crush_remove_uniform_bucket_item((struct crush_bucket_uniform *)b, item); | |
1127 | case CRUSH_BUCKET_LIST: | |
1128 | return crush_remove_list_bucket_item((struct crush_bucket_list *)b, item); | |
1129 | case CRUSH_BUCKET_TREE: | |
1130 | return crush_remove_tree_bucket_item((struct crush_bucket_tree *)b, item); | |
1131 | case CRUSH_BUCKET_STRAW: | |
1132 | return crush_remove_straw_bucket_item(map, (struct crush_bucket_straw *)b, item); | |
1133 | case CRUSH_BUCKET_STRAW2: | |
1134 | return crush_remove_straw2_bucket_item(map, (struct crush_bucket_straw2 *)b, item); | |
1135 | default: | |
1136 | return -1; | |
1137 | } | |
1138 | } | |
1139 | ||
1140 | ||
1141 | /************************************************/ | |
1142 | ||
1143 | int crush_adjust_uniform_bucket_item_weight(struct crush_bucket_uniform *bucket, int item, int weight) | |
1144 | { | |
1145 | int diff = (weight - bucket->item_weight) * bucket->h.size; | |
1146 | ||
1147 | bucket->item_weight = weight; | |
1148 | bucket->h.weight = bucket->item_weight * bucket->h.size; | |
1149 | ||
1150 | return diff; | |
1151 | } | |
1152 | ||
1153 | int crush_adjust_list_bucket_item_weight(struct crush_bucket_list *bucket, int item, int weight) | |
1154 | { | |
1155 | int diff; | |
1156 | unsigned i, j; | |
1157 | ||
1158 | for (i = 0; i < bucket->h.size; i++) { | |
1159 | if (bucket->h.items[i] == item) | |
1160 | break; | |
1161 | } | |
1162 | if (i == bucket->h.size) | |
1163 | return 0; | |
1164 | ||
1165 | diff = weight - bucket->item_weights[i]; | |
1166 | bucket->item_weights[i] = weight; | |
1167 | bucket->h.weight += diff; | |
1168 | ||
1169 | for (j = i; j < bucket->h.size; j++) | |
1170 | bucket->sum_weights[j] += diff; | |
1171 | ||
1172 | return diff; | |
1173 | } | |
1174 | ||
1175 | int crush_adjust_tree_bucket_item_weight(struct crush_bucket_tree *bucket, int item, int weight) | |
1176 | { | |
1177 | int diff; | |
1178 | int node; | |
1179 | unsigned i, j; | |
1180 | unsigned depth = calc_depth(bucket->h.size); | |
1181 | ||
1182 | for (i = 0; i < bucket->h.size; i++) { | |
1183 | if (bucket->h.items[i] == item) | |
1184 | break; | |
1185 | } | |
1186 | if (i == bucket->h.size) | |
1187 | return 0; | |
1188 | ||
1189 | node = crush_calc_tree_node(i); | |
1190 | diff = weight - bucket->node_weights[node]; | |
1191 | bucket->node_weights[node] = weight; | |
1192 | bucket->h.weight += diff; | |
1193 | ||
1194 | for (j=1; j<depth; j++) { | |
1195 | node = parent(node); | |
1196 | bucket->node_weights[node] += diff; | |
1197 | } | |
1198 | ||
1199 | return diff; | |
1200 | } | |
1201 | ||
1202 | int crush_adjust_straw_bucket_item_weight(struct crush_map *map, | |
1203 | struct crush_bucket_straw *bucket, | |
1204 | int item, int weight) | |
1205 | { | |
1206 | unsigned idx; | |
1207 | int diff; | |
1208 | int r; | |
1209 | ||
1210 | for (idx = 0; idx < bucket->h.size; idx++) | |
1211 | if (bucket->h.items[idx] == item) | |
1212 | break; | |
1213 | if (idx == bucket->h.size) | |
1214 | return 0; | |
1215 | ||
1216 | diff = weight - bucket->item_weights[idx]; | |
1217 | bucket->item_weights[idx] = weight; | |
1218 | bucket->h.weight += diff; | |
1219 | ||
1220 | r = crush_calc_straw(map, bucket); | |
1221 | if (r < 0) | |
1222 | return r; | |
1223 | ||
1224 | return diff; | |
1225 | } | |
1226 | ||
1227 | int crush_adjust_straw2_bucket_item_weight(struct crush_map *map, | |
1228 | struct crush_bucket_straw2 *bucket, | |
1229 | int item, int weight) | |
1230 | { | |
1231 | unsigned idx; | |
1232 | int diff; | |
1233 | ||
1234 | for (idx = 0; idx < bucket->h.size; idx++) | |
1235 | if (bucket->h.items[idx] == item) | |
1236 | break; | |
1237 | if (idx == bucket->h.size) | |
1238 | return 0; | |
1239 | ||
1240 | diff = weight - bucket->item_weights[idx]; | |
1241 | bucket->item_weights[idx] = weight; | |
1242 | bucket->h.weight += diff; | |
1243 | ||
1244 | return diff; | |
1245 | } | |
1246 | ||
1247 | int crush_bucket_adjust_item_weight(struct crush_map *map, | |
1248 | struct crush_bucket *b, | |
1249 | int item, int weight) | |
1250 | { | |
1251 | switch (b->alg) { | |
1252 | case CRUSH_BUCKET_UNIFORM: | |
1253 | return crush_adjust_uniform_bucket_item_weight((struct crush_bucket_uniform *)b, | |
1254 | item, weight); | |
1255 | case CRUSH_BUCKET_LIST: | |
1256 | return crush_adjust_list_bucket_item_weight((struct crush_bucket_list *)b, | |
1257 | item, weight); | |
1258 | case CRUSH_BUCKET_TREE: | |
1259 | return crush_adjust_tree_bucket_item_weight((struct crush_bucket_tree *)b, | |
1260 | item, weight); | |
1261 | case CRUSH_BUCKET_STRAW: | |
1262 | return crush_adjust_straw_bucket_item_weight(map, | |
1263 | (struct crush_bucket_straw *)b, | |
1264 | item, weight); | |
1265 | case CRUSH_BUCKET_STRAW2: | |
1266 | return crush_adjust_straw2_bucket_item_weight(map, | |
1267 | (struct crush_bucket_straw2 *)b, | |
1268 | item, weight); | |
1269 | default: | |
1270 | return -1; | |
1271 | } | |
1272 | } | |
1273 | ||
1274 | /************************************************/ | |
1275 | ||
1276 | static int crush_reweight_uniform_bucket(struct crush_map *map, struct crush_bucket_uniform *bucket) | |
1277 | { | |
1278 | unsigned i; | |
1279 | unsigned sum = 0, n = 0, leaves = 0; | |
1280 | ||
1281 | for (i = 0; i < bucket->h.size; i++) { | |
1282 | int id = bucket->h.items[i]; | |
1283 | if (id < 0) { | |
1284 | struct crush_bucket *c = map->buckets[-1-id]; | |
1285 | crush_reweight_bucket(map, c); | |
1286 | ||
1287 | if (crush_addition_is_unsafe(sum, c->weight)) | |
1288 | return -ERANGE; | |
1289 | ||
1290 | sum += c->weight; | |
1291 | n++; | |
1292 | } else { | |
1293 | leaves++; | |
1294 | } | |
1295 | } | |
1296 | ||
1297 | if (n > leaves) | |
1298 | bucket->item_weight = sum / n; // more bucket children than leaves, average! | |
1299 | bucket->h.weight = bucket->item_weight * bucket->h.size; | |
1300 | ||
1301 | return 0; | |
1302 | } | |
1303 | ||
1304 | static int crush_reweight_list_bucket(struct crush_map *map, struct crush_bucket_list *bucket) | |
1305 | { | |
1306 | unsigned i; | |
1307 | ||
1308 | bucket->h.weight = 0; | |
1309 | for (i = 0; i < bucket->h.size; i++) { | |
1310 | int id = bucket->h.items[i]; | |
1311 | if (id < 0) { | |
1312 | struct crush_bucket *c = map->buckets[-1-id]; | |
1313 | crush_reweight_bucket(map, c); | |
1314 | bucket->item_weights[i] = c->weight; | |
1315 | } | |
1316 | ||
1317 | if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i])) | |
1318 | return -ERANGE; | |
1319 | ||
1320 | bucket->h.weight += bucket->item_weights[i]; | |
1321 | } | |
1322 | ||
1323 | return 0; | |
1324 | } | |
1325 | ||
1326 | static int crush_reweight_tree_bucket(struct crush_map *map, struct crush_bucket_tree *bucket) | |
1327 | { | |
1328 | unsigned i; | |
1329 | ||
1330 | bucket->h.weight = 0; | |
1331 | for (i = 0; i < bucket->h.size; i++) { | |
1332 | int node = crush_calc_tree_node(i); | |
1333 | int id = bucket->h.items[i]; | |
1334 | if (id < 0) { | |
1335 | struct crush_bucket *c = map->buckets[-1-id]; | |
1336 | crush_reweight_bucket(map, c); | |
1337 | bucket->node_weights[node] = c->weight; | |
1338 | } | |
1339 | ||
1340 | if (crush_addition_is_unsafe(bucket->h.weight, bucket->node_weights[node])) | |
1341 | return -ERANGE; | |
1342 | ||
1343 | bucket->h.weight += bucket->node_weights[node]; | |
1344 | ||
1345 | ||
1346 | } | |
1347 | ||
1348 | return 0; | |
1349 | } | |
1350 | ||
1351 | static int crush_reweight_straw_bucket(struct crush_map *map, struct crush_bucket_straw *bucket) | |
1352 | { | |
1353 | unsigned i; | |
1354 | ||
1355 | bucket->h.weight = 0; | |
1356 | for (i = 0; i < bucket->h.size; i++) { | |
1357 | int id = bucket->h.items[i]; | |
1358 | if (id < 0) { | |
1359 | struct crush_bucket *c = map->buckets[-1-id]; | |
1360 | crush_reweight_bucket(map, c); | |
1361 | bucket->item_weights[i] = c->weight; | |
1362 | } | |
1363 | ||
1364 | if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i])) | |
1365 | return -ERANGE; | |
1366 | ||
1367 | bucket->h.weight += bucket->item_weights[i]; | |
1368 | } | |
1369 | crush_calc_straw(map, bucket); | |
1370 | ||
1371 | return 0; | |
1372 | } | |
1373 | ||
1374 | static int crush_reweight_straw2_bucket(struct crush_map *map, struct crush_bucket_straw2 *bucket) | |
1375 | { | |
1376 | unsigned i; | |
1377 | ||
1378 | bucket->h.weight = 0; | |
1379 | for (i = 0; i < bucket->h.size; i++) { | |
1380 | int id = bucket->h.items[i]; | |
1381 | if (id < 0) { | |
1382 | struct crush_bucket *c = map->buckets[-1-id]; | |
1383 | crush_reweight_bucket(map, c); | |
1384 | bucket->item_weights[i] = c->weight; | |
1385 | } | |
1386 | ||
1387 | if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i])) | |
1388 | return -ERANGE; | |
1389 | ||
1390 | bucket->h.weight += bucket->item_weights[i]; | |
1391 | } | |
1392 | ||
1393 | return 0; | |
1394 | } | |
1395 | ||
1396 | int crush_reweight_bucket(struct crush_map *map, struct crush_bucket *b) | |
1397 | { | |
1398 | switch (b->alg) { | |
1399 | case CRUSH_BUCKET_UNIFORM: | |
1400 | return crush_reweight_uniform_bucket(map, (struct crush_bucket_uniform *)b); | |
1401 | case CRUSH_BUCKET_LIST: | |
1402 | return crush_reweight_list_bucket(map, (struct crush_bucket_list *)b); | |
1403 | case CRUSH_BUCKET_TREE: | |
1404 | return crush_reweight_tree_bucket(map, (struct crush_bucket_tree *)b); | |
1405 | case CRUSH_BUCKET_STRAW: | |
1406 | return crush_reweight_straw_bucket(map, (struct crush_bucket_straw *)b); | |
1407 | case CRUSH_BUCKET_STRAW2: | |
1408 | return crush_reweight_straw2_bucket(map, (struct crush_bucket_straw2 *)b); | |
1409 | default: | |
1410 | return -1; | |
1411 | } | |
1412 | } | |
1413 | ||
1414 | struct crush_choose_arg *crush_make_choose_args(struct crush_map *map, int num_positions) | |
1415 | { | |
1416 | int b; | |
1417 | int sum_bucket_size = 0; | |
1418 | int bucket_count = 0; | |
1419 | for (b = 0; b < map->max_buckets; b++) { | |
1420 | if (map->buckets[b] == 0) | |
1421 | continue; | |
1422 | sum_bucket_size += map->buckets[b]->size; | |
1423 | bucket_count++; | |
1424 | } | |
1425 | dprintk("sum_bucket_size %d max_buckets %d bucket_count %d\n", | |
1426 | sum_bucket_size, map->max_buckets, bucket_count); | |
1427 | int size = (sizeof(struct crush_choose_arg) * map->max_buckets + | |
1428 | sizeof(struct crush_weight_set) * bucket_count * num_positions + | |
1429 | sizeof(__u32) * sum_bucket_size * num_positions + // weights | |
224ce89b | 1430 | sizeof(__s32) * sum_bucket_size); // ids |
7c673cae FG |
1431 | char *space = malloc(size); |
1432 | struct crush_choose_arg *arg = (struct crush_choose_arg *)space; | |
1433 | struct crush_weight_set *weight_set = (struct crush_weight_set *)(arg + map->max_buckets); | |
1434 | __u32 *weights = (__u32 *)(weight_set + bucket_count * num_positions); | |
11fdf7f2 | 1435 | char *weight_set_ends __attribute__((unused)) = (char*)weights; |
224ce89b | 1436 | __s32 *ids = (__s32 *)(weights + sum_bucket_size * num_positions); |
11fdf7f2 TL |
1437 | char *weights_end __attribute__((unused)) = (char *)ids; |
1438 | char *ids_end __attribute__((unused)) = (char *)(ids + sum_bucket_size); | |
7c673cae FG |
1439 | BUG_ON(space + size != ids_end); |
1440 | for (b = 0; b < map->max_buckets; b++) { | |
1441 | if (map->buckets[b] == 0) { | |
1442 | memset(&arg[b], '\0', sizeof(struct crush_choose_arg)); | |
1443 | continue; | |
1444 | } | |
1445 | struct crush_bucket_straw2 *bucket = (struct crush_bucket_straw2 *)map->buckets[b]; | |
1446 | ||
1447 | int position; | |
1448 | for (position = 0; position < num_positions; position++) { | |
1449 | memcpy(weights, bucket->item_weights, sizeof(__u32) * bucket->h.size); | |
1450 | weight_set[position].weights = weights; | |
1451 | weight_set[position].size = bucket->h.size; | |
1452 | dprintk("moving weight %d bytes forward\n", (int)((weights + bucket->h.size) - weights)); | |
1453 | weights += bucket->h.size; | |
1454 | } | |
1455 | arg[b].weight_set = weight_set; | |
28e407b8 | 1456 | arg[b].weight_set_positions = num_positions; |
7c673cae FG |
1457 | weight_set += position; |
1458 | ||
224ce89b | 1459 | memcpy(ids, bucket->h.items, sizeof(__s32) * bucket->h.size); |
7c673cae FG |
1460 | arg[b].ids = ids; |
1461 | arg[b].ids_size = bucket->h.size; | |
1462 | ids += bucket->h.size; | |
1463 | } | |
1464 | BUG_ON((char*)weight_set_ends != (char*)weight_set); | |
1465 | BUG_ON((char*)weights_end != (char*)weights); | |
1466 | BUG_ON((char*)ids != (char*)ids_end); | |
1467 | return arg; | |
1468 | } | |
1469 | ||
1470 | void crush_destroy_choose_args(struct crush_choose_arg *args) | |
1471 | { | |
1472 | free(args); | |
1473 | } | |
1474 | ||
1475 | /***************************/ | |
1476 | ||
1477 | /* methods to check for safe arithmetic operations */ | |
1478 | ||
1479 | int crush_addition_is_unsafe(__u32 a, __u32 b) | |
1480 | { | |
1481 | if ((((__u32)(-1)) - b) < a) | |
1482 | return 1; | |
1483 | else | |
1484 | return 0; | |
1485 | } | |
1486 | ||
1487 | int crush_multiplication_is_unsafe(__u32 a, __u32 b) | |
1488 | { | |
1489 | /* prevent division by zero */ | |
1490 | if (!a) | |
1491 | return 0; | |
1492 | if (!b) | |
1493 | return 1; | |
1494 | if ((((__u32)(-1)) / b) < a) | |
1495 | return 1; | |
1496 | else | |
1497 | return 0; | |
1498 | } | |
1499 | ||
1500 | /***************************/ | |
1501 | ||
1502 | /* methods to configure crush_map */ | |
1503 | ||
1504 | void set_legacy_crush_map(struct crush_map *map) { | |
1505 | /* initialize legacy tunable values */ | |
1506 | map->choose_local_tries = 2; | |
1507 | map->choose_local_fallback_tries = 5; | |
1508 | map->choose_total_tries = 19; | |
1509 | map->chooseleaf_descend_once = 0; | |
1510 | map->chooseleaf_vary_r = 0; | |
1511 | map->chooseleaf_stable = 0; | |
1512 | map->straw_calc_version = 0; | |
1513 | ||
1514 | // by default, use legacy types, and also exclude tree, | |
1515 | // since it was buggy. | |
1516 | map->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
1517 | } | |
1518 | ||
1519 | void set_optimal_crush_map(struct crush_map *map) { | |
1520 | map->choose_local_tries = 0; | |
1521 | map->choose_local_fallback_tries = 0; | |
1522 | map->choose_total_tries = 50; | |
1523 | map->chooseleaf_descend_once = 1; | |
1524 | map->chooseleaf_vary_r = 1; | |
1525 | map->chooseleaf_stable = 1; | |
1526 | map->allowed_bucket_algs = ( | |
1527 | (1 << CRUSH_BUCKET_UNIFORM) | | |
1528 | (1 << CRUSH_BUCKET_LIST) | | |
1529 | (1 << CRUSH_BUCKET_STRAW) | | |
1530 | (1 << CRUSH_BUCKET_STRAW2)); | |
1531 | } |