]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | #include <string.h> |
7c673cae FG |
2 | #include <math.h> |
3 | #include <stdlib.h> | |
4 | #include <stdio.h> | |
5 | #include <assert.h> | |
6 | #include <errno.h> | |
7 | ||
31f18b77 | 8 | #include "crush/crush.h" |
7c673cae | 9 | #include "builder.h" |
7c673cae FG |
10 | |
11 | #define dprintk(args...) /* printf(args) */ | |
12 | ||
13 | #define BUG_ON(x) assert(!(x)) | |
14 | ||
15 | struct crush_map *crush_create() | |
16 | { | |
17 | struct crush_map *m; | |
18 | m = malloc(sizeof(*m)); | |
19 | if (!m) | |
20 | return NULL; | |
21 | memset(m, 0, sizeof(*m)); | |
22 | ||
23 | set_optimal_crush_map(m); | |
24 | return m; | |
25 | } | |
26 | ||
27 | /* | |
28 | * finalize should be called _after_ all buckets are added to the map. | |
29 | */ | |
30 | void crush_finalize(struct crush_map *map) | |
31 | { | |
32 | int b; | |
33 | __u32 i; | |
34 | ||
35 | /* Calculate the needed working space while we do other | |
36 | finalization tasks. */ | |
37 | map->working_size = sizeof(struct crush_work); | |
38 | /* Space for the array of pointers to per-bucket workspace */ | |
39 | map->working_size += map->max_buckets * | |
40 | sizeof(struct crush_work_bucket *); | |
41 | ||
42 | /* calc max_devices */ | |
43 | map->max_devices = 0; | |
44 | for (b=0; b<map->max_buckets; b++) { | |
45 | if (map->buckets[b] == 0) | |
46 | continue; | |
47 | for (i=0; i<map->buckets[b]->size; i++) | |
48 | if (map->buckets[b]->items[i] >= map->max_devices) | |
49 | map->max_devices = map->buckets[b]->items[i] + 1; | |
50 | ||
51 | switch (map->buckets[b]->alg) { | |
52 | default: | |
53 | /* The base case, permutation variables and | |
54 | the pointer to the permutation array. */ | |
55 | map->working_size += sizeof(struct crush_work_bucket); | |
56 | break; | |
57 | } | |
58 | /* Every bucket has a permutation array. */ | |
59 | map->working_size += map->buckets[b]->size * sizeof(__u32); | |
60 | } | |
61 | } | |
62 | ||
63 | ||
64 | ||
65 | /** rules **/ | |
66 | ||
67 | int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno) | |
68 | { | |
69 | __u32 r; | |
70 | ||
71 | if (ruleno < 0) { | |
72 | for (r=0; r < map->max_rules; r++) | |
73 | if (map->rules[r] == 0) | |
74 | break; | |
75 | assert(r < CRUSH_MAX_RULES); | |
76 | } | |
77 | else | |
78 | r = ruleno; | |
79 | ||
80 | if (r >= map->max_rules) { | |
81 | /* expand array */ | |
82 | int oldsize; | |
83 | void *_realloc = NULL; | |
84 | if (map->max_rules +1 > CRUSH_MAX_RULES) | |
85 | return -ENOSPC; | |
86 | oldsize = map->max_rules; | |
87 | map->max_rules = r+1; | |
88 | if ((_realloc = realloc(map->rules, map->max_rules * sizeof(map->rules[0]))) == NULL) { | |
89 | return -ENOMEM; | |
90 | } else { | |
91 | map->rules = _realloc; | |
92 | } | |
93 | memset(map->rules + oldsize, 0, (map->max_rules-oldsize) * sizeof(map->rules[0])); | |
94 | } | |
95 | ||
96 | /* add it */ | |
97 | map->rules[r] = rule; | |
98 | return r; | |
99 | } | |
100 | ||
101 | struct crush_rule *crush_make_rule(int len, int ruleset, int type, int minsize, int maxsize) | |
102 | { | |
103 | struct crush_rule *rule; | |
104 | rule = malloc(crush_rule_size(len)); | |
105 | if (!rule) | |
106 | return NULL; | |
107 | rule->len = len; | |
108 | rule->mask.ruleset = ruleset; | |
109 | rule->mask.type = type; | |
110 | rule->mask.min_size = minsize; | |
111 | rule->mask.max_size = maxsize; | |
112 | return rule; | |
113 | } | |
114 | ||
115 | /* | |
116 | * be careful; this doesn't verify that the buffer you allocated is big enough! | |
117 | */ | |
118 | void crush_rule_set_step(struct crush_rule *rule, int n, int op, int arg1, int arg2) | |
119 | { | |
120 | assert((__u32)n < rule->len); | |
121 | rule->steps[n].op = op; | |
122 | rule->steps[n].arg1 = arg1; | |
123 | rule->steps[n].arg2 = arg2; | |
124 | } | |
125 | ||
126 | ||
127 | /** buckets **/ | |
128 | int crush_get_next_bucket_id(struct crush_map *map) | |
129 | { | |
130 | int pos; | |
131 | for (pos=0; pos < map->max_buckets; pos++) | |
132 | if (map->buckets[pos] == 0) | |
133 | break; | |
134 | return -1 - pos; | |
135 | } | |
136 | ||
137 | ||
138 | int crush_add_bucket(struct crush_map *map, | |
139 | int id, | |
140 | struct crush_bucket *bucket, | |
141 | int *idout) | |
142 | { | |
143 | int pos; | |
144 | ||
145 | /* find a bucket id */ | |
146 | if (id == 0) | |
147 | id = crush_get_next_bucket_id(map); | |
148 | pos = -1 - id; | |
149 | ||
150 | while (pos >= map->max_buckets) { | |
151 | /* expand array */ | |
152 | int oldsize = map->max_buckets; | |
153 | if (map->max_buckets) | |
154 | map->max_buckets *= 2; | |
155 | else | |
156 | map->max_buckets = 8; | |
157 | void *_realloc = NULL; | |
158 | if ((_realloc = realloc(map->buckets, map->max_buckets * sizeof(map->buckets[0]))) == NULL) { | |
159 | return -ENOMEM; | |
160 | } else { | |
161 | map->buckets = _realloc; | |
162 | } | |
163 | memset(map->buckets + oldsize, 0, (map->max_buckets-oldsize) * sizeof(map->buckets[0])); | |
164 | } | |
165 | ||
166 | if (map->buckets[pos] != 0) { | |
167 | return -EEXIST; | |
168 | } | |
169 | ||
170 | /* add it */ | |
171 | bucket->id = id; | |
172 | map->buckets[pos] = bucket; | |
173 | ||
174 | if (idout) *idout = id; | |
175 | return 0; | |
176 | } | |
177 | ||
178 | int crush_remove_bucket(struct crush_map *map, struct crush_bucket *bucket) | |
179 | { | |
180 | int pos = -1 - bucket->id; | |
181 | assert(pos < map->max_buckets); | |
182 | map->buckets[pos] = NULL; | |
183 | crush_destroy_bucket(bucket); | |
184 | return 0; | |
185 | } | |
186 | ||
187 | ||
188 | /* uniform bucket */ | |
189 | ||
190 | struct crush_bucket_uniform * | |
191 | crush_make_uniform_bucket(int hash, int type, int size, | |
192 | int *items, | |
193 | int item_weight) | |
194 | { | |
195 | int i; | |
196 | struct crush_bucket_uniform *bucket; | |
197 | ||
198 | bucket = malloc(sizeof(*bucket)); | |
199 | if (!bucket) | |
200 | return NULL; | |
201 | memset(bucket, 0, sizeof(*bucket)); | |
202 | bucket->h.alg = CRUSH_BUCKET_UNIFORM; | |
203 | bucket->h.hash = hash; | |
204 | bucket->h.type = type; | |
205 | bucket->h.size = size; | |
206 | ||
207 | if (crush_multiplication_is_unsafe(size, item_weight)) | |
208 | goto err; | |
209 | ||
210 | bucket->h.weight = size * item_weight; | |
211 | bucket->item_weight = item_weight; | |
212 | bucket->h.items = malloc(sizeof(__s32)*size); | |
213 | ||
214 | if (!bucket->h.items) | |
215 | goto err; | |
216 | ||
217 | for (i=0; i<size; i++) | |
218 | bucket->h.items[i] = items[i]; | |
219 | ||
220 | return bucket; | |
221 | err: | |
222 | free(bucket->h.items); | |
223 | free(bucket); | |
224 | return NULL; | |
225 | } | |
226 | ||
227 | ||
228 | /* list bucket */ | |
229 | ||
230 | struct crush_bucket_list* | |
231 | crush_make_list_bucket(int hash, int type, int size, | |
232 | int *items, | |
233 | int *weights) | |
234 | { | |
235 | int i; | |
236 | int w; | |
237 | struct crush_bucket_list *bucket; | |
238 | ||
239 | bucket = malloc(sizeof(*bucket)); | |
240 | if (!bucket) | |
241 | return NULL; | |
242 | memset(bucket, 0, sizeof(*bucket)); | |
243 | bucket->h.alg = CRUSH_BUCKET_LIST; | |
244 | bucket->h.hash = hash; | |
245 | bucket->h.type = type; | |
246 | bucket->h.size = size; | |
247 | ||
248 | bucket->h.items = malloc(sizeof(__s32)*size); | |
249 | if (!bucket->h.items) | |
250 | goto err; | |
251 | ||
252 | ||
253 | bucket->item_weights = malloc(sizeof(__u32)*size); | |
254 | if (!bucket->item_weights) | |
255 | goto err; | |
256 | bucket->sum_weights = malloc(sizeof(__u32)*size); | |
257 | if (!bucket->sum_weights) | |
258 | goto err; | |
259 | w = 0; | |
260 | for (i=0; i<size; i++) { | |
261 | bucket->h.items[i] = items[i]; | |
262 | bucket->item_weights[i] = weights[i]; | |
263 | ||
264 | if (crush_addition_is_unsafe(w, weights[i])) | |
265 | goto err; | |
266 | ||
267 | w += weights[i]; | |
268 | bucket->sum_weights[i] = w; | |
269 | /*dprintk("pos %d item %d weight %d sum %d\n", | |
270 | i, items[i], weights[i], bucket->sum_weights[i]);*/ | |
271 | } | |
272 | ||
273 | bucket->h.weight = w; | |
274 | ||
275 | return bucket; | |
276 | err: | |
277 | free(bucket->sum_weights); | |
278 | free(bucket->item_weights); | |
279 | free(bucket->h.items); | |
280 | free(bucket); | |
281 | return NULL; | |
282 | } | |
283 | ||
284 | ||
285 | /* tree bucket */ | |
286 | ||
287 | static int height(int n) { | |
288 | int h = 0; | |
289 | while ((n & 1) == 0) { | |
290 | h++; | |
291 | n = n >> 1; | |
292 | } | |
293 | return h; | |
294 | } | |
295 | static int on_right(int n, int h) { | |
296 | return n & (1 << (h+1)); | |
297 | } | |
298 | static int parent(int n) | |
299 | { | |
300 | int h = height(n); | |
301 | if (on_right(n, h)) | |
302 | return n - (1<<h); | |
303 | else | |
304 | return n + (1<<h); | |
305 | } | |
306 | ||
307 | static int calc_depth(int size) | |
308 | { | |
309 | if (size == 0) { | |
310 | return 0; | |
311 | } | |
312 | ||
313 | int depth = 1; | |
314 | int t = size - 1; | |
315 | while (t) { | |
316 | t = t >> 1; | |
317 | depth++; | |
318 | } | |
319 | return depth; | |
320 | } | |
321 | ||
322 | struct crush_bucket_tree* | |
323 | crush_make_tree_bucket(int hash, int type, int size, | |
324 | int *items, /* in leaf order */ | |
325 | int *weights) | |
326 | { | |
327 | struct crush_bucket_tree *bucket; | |
328 | int depth; | |
329 | int node; | |
330 | int i, j; | |
331 | ||
332 | bucket = malloc(sizeof(*bucket)); | |
333 | if (!bucket) | |
334 | return NULL; | |
335 | memset(bucket, 0, sizeof(*bucket)); | |
336 | bucket->h.alg = CRUSH_BUCKET_TREE; | |
337 | bucket->h.hash = hash; | |
338 | bucket->h.type = type; | |
339 | bucket->h.size = size; | |
340 | ||
341 | if (size == 0) { | |
342 | bucket->h.items = NULL; | |
343 | bucket->h.weight = 0; | |
344 | bucket->node_weights = NULL; | |
345 | bucket->num_nodes = 0; | |
346 | /* printf("size 0 depth 0 nodes 0\n"); */ | |
347 | return bucket; | |
348 | } | |
349 | ||
350 | bucket->h.items = malloc(sizeof(__s32)*size); | |
351 | if (!bucket->h.items) | |
352 | goto err; | |
353 | ||
354 | /* calc tree depth */ | |
355 | depth = calc_depth(size); | |
356 | bucket->num_nodes = 1 << depth; | |
357 | dprintk("size %d depth %d nodes %d\n", size, depth, bucket->num_nodes); | |
358 | ||
359 | bucket->node_weights = malloc(sizeof(__u32)*bucket->num_nodes); | |
360 | if (!bucket->node_weights) | |
361 | goto err; | |
362 | ||
363 | memset(bucket->h.items, 0, sizeof(__s32)*bucket->h.size); | |
364 | memset(bucket->node_weights, 0, sizeof(__u32)*bucket->num_nodes); | |
365 | ||
366 | for (i=0; i<size; i++) { | |
367 | bucket->h.items[i] = items[i]; | |
368 | node = crush_calc_tree_node(i); | |
369 | dprintk("item %d node %d weight %d\n", i, node, weights[i]); | |
370 | bucket->node_weights[node] = weights[i]; | |
371 | ||
372 | if (crush_addition_is_unsafe(bucket->h.weight, weights[i])) | |
373 | goto err; | |
374 | ||
375 | bucket->h.weight += weights[i]; | |
376 | for (j=1; j<depth; j++) { | |
377 | node = parent(node); | |
378 | ||
379 | if (crush_addition_is_unsafe(bucket->node_weights[node], weights[i])) | |
380 | goto err; | |
381 | ||
382 | bucket->node_weights[node] += weights[i]; | |
383 | dprintk(" node %d weight %d\n", node, bucket->node_weights[node]); | |
384 | } | |
385 | } | |
386 | BUG_ON(bucket->node_weights[bucket->num_nodes/2] != bucket->h.weight); | |
387 | ||
388 | return bucket; | |
389 | err: | |
390 | free(bucket->node_weights); | |
391 | free(bucket->h.items); | |
392 | free(bucket); | |
393 | return NULL; | |
394 | } | |
395 | ||
396 | ||
397 | ||
398 | /* straw bucket */ | |
399 | ||
400 | /* | |
401 | * this code was written 8 years ago. i have a vague recollection of | |
402 | * drawing boxes underneath bars of different lengths, where the bar | |
403 | * length represented the probability/weight, and that there was some | |
404 | * trial and error involved in arriving at this implementation. | |
405 | * however, reading the code now after all this time, the intuition | |
406 | * that motivated is lost on me. lame. my only excuse is that I now | |
407 | * know that the approach is fundamentally flawed and am not | |
408 | * particularly motivated to reconstruct the flawed reasoning. | |
409 | * | |
410 | * as best as i can remember, the idea is: sort the weights, and start | |
411 | * with the smallest. arbitrarily scale it at 1.0 (16-bit fixed | |
412 | * point). look at the next larger weight, and calculate the scaling | |
413 | * factor for that straw based on the relative difference in weight so | |
414 | * far. what's not clear to me now is why we are looking at wnext | |
415 | * (the delta to the next bigger weight) for all remaining weights, | |
416 | * and slicing things horizontally instead of considering just the | |
417 | * next item or set of items. or why pow() is used the way it is. | |
418 | * | |
419 | * note that the original version 1 of this function made special | |
420 | * accomodation for the case where straw lengths were identical. this | |
421 | * is also flawed in a non-obvious way; version 2 drops the special | |
422 | * handling and appears to work just as well. | |
423 | * | |
424 | * moral of the story: if you do something clever, write down why it | |
425 | * works. | |
426 | */ | |
427 | int crush_calc_straw(struct crush_map *map, struct crush_bucket_straw *bucket) | |
428 | { | |
429 | int *reverse; | |
430 | int i, j, k; | |
431 | double straw, wbelow, lastw, wnext, pbelow; | |
432 | int numleft; | |
433 | int size = bucket->h.size; | |
434 | __u32 *weights = bucket->item_weights; | |
435 | ||
436 | /* reverse sort by weight (simple insertion sort) */ | |
437 | reverse = malloc(sizeof(int) * size); | |
438 | if (!reverse) | |
439 | return -ENOMEM; | |
440 | if (size) | |
441 | reverse[0] = 0; | |
442 | for (i=1; i<size; i++) { | |
443 | for (j=0; j<i; j++) { | |
444 | if (weights[i] < weights[reverse[j]]) { | |
445 | /* insert here */ | |
446 | for (k=i; k>j; k--) | |
447 | reverse[k] = reverse[k-1]; | |
448 | reverse[j] = i; | |
449 | break; | |
450 | } | |
451 | } | |
452 | if (j == i) | |
453 | reverse[i] = i; | |
454 | } | |
455 | ||
456 | numleft = size; | |
457 | straw = 1.0; | |
458 | wbelow = 0; | |
459 | lastw = 0; | |
460 | ||
461 | i=0; | |
462 | while (i < size) { | |
463 | if (map->straw_calc_version == 0) { | |
464 | /* zero weight items get 0 length straws! */ | |
465 | if (weights[reverse[i]] == 0) { | |
466 | bucket->straws[reverse[i]] = 0; | |
467 | i++; | |
468 | continue; | |
469 | } | |
470 | ||
471 | /* set this item's straw */ | |
472 | bucket->straws[reverse[i]] = straw * 0x10000; | |
473 | dprintk("item %d at %d weight %d straw %d (%lf)\n", | |
474 | bucket->h.items[reverse[i]], | |
475 | reverse[i], weights[reverse[i]], | |
476 | bucket->straws[reverse[i]], straw); | |
477 | i++; | |
478 | if (i == size) | |
479 | break; | |
480 | ||
481 | /* same weight as previous? */ | |
482 | if (weights[reverse[i]] == weights[reverse[i-1]]) { | |
483 | dprintk("same as previous\n"); | |
484 | continue; | |
485 | } | |
486 | ||
487 | /* adjust straw for next guy */ | |
488 | wbelow += ((double)weights[reverse[i-1]] - lastw) * | |
489 | numleft; | |
490 | for (j=i; j<size; j++) | |
491 | if (weights[reverse[j]] == weights[reverse[i]]) | |
492 | numleft--; | |
493 | else | |
494 | break; | |
495 | wnext = numleft * (weights[reverse[i]] - | |
496 | weights[reverse[i-1]]); | |
497 | pbelow = wbelow / (wbelow + wnext); | |
498 | dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n", | |
499 | wbelow, wnext, pbelow, numleft); | |
500 | ||
501 | straw *= pow((double)1.0 / pbelow, (double)1.0 / | |
502 | (double)numleft); | |
503 | ||
504 | lastw = weights[reverse[i-1]]; | |
505 | } else if (map->straw_calc_version >= 1) { | |
506 | /* zero weight items get 0 length straws! */ | |
507 | if (weights[reverse[i]] == 0) { | |
508 | bucket->straws[reverse[i]] = 0; | |
509 | i++; | |
510 | numleft--; | |
511 | continue; | |
512 | } | |
513 | ||
514 | /* set this item's straw */ | |
515 | bucket->straws[reverse[i]] = straw * 0x10000; | |
516 | dprintk("item %d at %d weight %d straw %d (%lf)\n", | |
517 | bucket->h.items[reverse[i]], | |
518 | reverse[i], weights[reverse[i]], | |
519 | bucket->straws[reverse[i]], straw); | |
520 | i++; | |
521 | if (i == size) | |
522 | break; | |
523 | ||
524 | /* adjust straw for next guy */ | |
525 | wbelow += ((double)weights[reverse[i-1]] - lastw) * | |
526 | numleft; | |
527 | numleft--; | |
528 | wnext = numleft * (weights[reverse[i]] - | |
529 | weights[reverse[i-1]]); | |
530 | pbelow = wbelow / (wbelow + wnext); | |
531 | dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n", | |
532 | wbelow, wnext, pbelow, numleft); | |
533 | ||
534 | straw *= pow((double)1.0 / pbelow, (double)1.0 / | |
535 | (double)numleft); | |
536 | ||
537 | lastw = weights[reverse[i-1]]; | |
538 | } | |
539 | } | |
540 | ||
541 | free(reverse); | |
542 | return 0; | |
543 | } | |
544 | ||
545 | struct crush_bucket_straw * | |
546 | crush_make_straw_bucket(struct crush_map *map, | |
547 | int hash, | |
548 | int type, | |
549 | int size, | |
550 | int *items, | |
551 | int *weights) | |
552 | { | |
553 | struct crush_bucket_straw *bucket; | |
554 | int i; | |
555 | ||
556 | bucket = malloc(sizeof(*bucket)); | |
557 | if (!bucket) | |
558 | return NULL; | |
559 | memset(bucket, 0, sizeof(*bucket)); | |
560 | bucket->h.alg = CRUSH_BUCKET_STRAW; | |
561 | bucket->h.hash = hash; | |
562 | bucket->h.type = type; | |
563 | bucket->h.size = size; | |
564 | ||
565 | bucket->h.items = malloc(sizeof(__s32)*size); | |
566 | if (!bucket->h.items) | |
567 | goto err; | |
568 | bucket->item_weights = malloc(sizeof(__u32)*size); | |
569 | if (!bucket->item_weights) | |
570 | goto err; | |
571 | bucket->straws = malloc(sizeof(__u32)*size); | |
572 | if (!bucket->straws) | |
573 | goto err; | |
574 | ||
575 | bucket->h.weight = 0; | |
576 | for (i=0; i<size; i++) { | |
577 | bucket->h.items[i] = items[i]; | |
578 | bucket->h.weight += weights[i]; | |
579 | bucket->item_weights[i] = weights[i]; | |
580 | } | |
581 | ||
582 | if (crush_calc_straw(map, bucket) < 0) | |
583 | goto err; | |
584 | ||
585 | return bucket; | |
586 | err: | |
587 | free(bucket->straws); | |
588 | free(bucket->item_weights); | |
589 | free(bucket->h.items); | |
590 | free(bucket); | |
591 | return NULL; | |
592 | } | |
593 | ||
594 | struct crush_bucket_straw2 * | |
595 | crush_make_straw2_bucket(struct crush_map *map, | |
596 | int hash, | |
597 | int type, | |
598 | int size, | |
599 | int *items, | |
600 | int *weights) | |
601 | { | |
602 | struct crush_bucket_straw2 *bucket; | |
603 | int i; | |
604 | ||
605 | bucket = malloc(sizeof(*bucket)); | |
606 | if (!bucket) | |
607 | return NULL; | |
608 | memset(bucket, 0, sizeof(*bucket)); | |
609 | bucket->h.alg = CRUSH_BUCKET_STRAW2; | |
610 | bucket->h.hash = hash; | |
611 | bucket->h.type = type; | |
612 | bucket->h.size = size; | |
613 | ||
614 | bucket->h.items = malloc(sizeof(__s32)*size); | |
615 | if (!bucket->h.items) | |
616 | goto err; | |
617 | bucket->item_weights = malloc(sizeof(__u32)*size); | |
618 | if (!bucket->item_weights) | |
619 | goto err; | |
620 | ||
621 | bucket->h.weight = 0; | |
622 | for (i=0; i<size; i++) { | |
623 | bucket->h.items[i] = items[i]; | |
624 | bucket->h.weight += weights[i]; | |
625 | bucket->item_weights[i] = weights[i]; | |
626 | } | |
627 | ||
628 | return bucket; | |
629 | err: | |
630 | free(bucket->item_weights); | |
631 | free(bucket->h.items); | |
632 | free(bucket); | |
633 | return NULL; | |
634 | } | |
635 | ||
636 | ||
637 | ||
638 | struct crush_bucket* | |
639 | crush_make_bucket(struct crush_map *map, | |
640 | int alg, int hash, int type, int size, | |
641 | int *items, | |
642 | int *weights) | |
643 | { | |
644 | int item_weight; | |
645 | ||
646 | switch (alg) { | |
647 | case CRUSH_BUCKET_UNIFORM: | |
648 | if (size && weights) | |
649 | item_weight = weights[0]; | |
650 | else | |
651 | item_weight = 0; | |
652 | return (struct crush_bucket *)crush_make_uniform_bucket(hash, type, size, items, item_weight); | |
653 | ||
654 | case CRUSH_BUCKET_LIST: | |
655 | return (struct crush_bucket *)crush_make_list_bucket(hash, type, size, items, weights); | |
656 | ||
657 | case CRUSH_BUCKET_TREE: | |
658 | return (struct crush_bucket *)crush_make_tree_bucket(hash, type, size, items, weights); | |
659 | ||
660 | case CRUSH_BUCKET_STRAW: | |
661 | return (struct crush_bucket *)crush_make_straw_bucket(map, hash, type, size, items, weights); | |
662 | case CRUSH_BUCKET_STRAW2: | |
663 | return (struct crush_bucket *)crush_make_straw2_bucket(map, hash, type, size, items, weights); | |
664 | } | |
665 | return 0; | |
666 | } | |
667 | ||
668 | ||
669 | /************************************************/ | |
670 | ||
671 | int crush_add_uniform_bucket_item(struct crush_bucket_uniform *bucket, int item, int weight) | |
672 | { | |
673 | int newsize = bucket->h.size + 1; | |
674 | void *_realloc = NULL; | |
675 | ||
676 | /* In such situation 'CRUSH_BUCKET_UNIFORM', the weight | |
677 | provided for the item should be the same as | |
678 | bucket->item_weight defined with 'crush_make_bucket'. This | |
679 | assumption is enforced by the return value which is always | |
680 | 0. */ | |
681 | if (bucket->item_weight != weight) { | |
682 | return -EINVAL; | |
683 | } | |
684 | ||
685 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
686 | return -ENOMEM; | |
687 | } else { | |
688 | bucket->h.items = _realloc; | |
689 | } | |
690 | ||
691 | bucket->h.items[newsize-1] = item; | |
692 | ||
693 | if (crush_addition_is_unsafe(bucket->h.weight, weight)) | |
694 | return -ERANGE; | |
695 | ||
696 | bucket->h.weight += weight; | |
697 | bucket->h.size++; | |
698 | ||
699 | return 0; | |
700 | } | |
701 | ||
702 | int crush_add_list_bucket_item(struct crush_bucket_list *bucket, int item, int weight) | |
703 | { | |
704 | int newsize = bucket->h.size + 1; | |
705 | void *_realloc = NULL; | |
706 | ||
707 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
708 | return -ENOMEM; | |
709 | } else { | |
710 | bucket->h.items = _realloc; | |
711 | } | |
712 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
713 | return -ENOMEM; | |
714 | } else { | |
715 | bucket->item_weights = _realloc; | |
716 | } | |
717 | if ((_realloc = realloc(bucket->sum_weights, sizeof(__u32)*newsize)) == NULL) { | |
718 | return -ENOMEM; | |
719 | } else { | |
720 | bucket->sum_weights = _realloc; | |
721 | } | |
722 | ||
723 | bucket->h.items[newsize-1] = item; | |
724 | bucket->item_weights[newsize-1] = weight; | |
725 | if (newsize > 1) { | |
726 | ||
727 | if (crush_addition_is_unsafe(bucket->sum_weights[newsize-2], weight)) | |
728 | return -ERANGE; | |
729 | ||
730 | bucket->sum_weights[newsize-1] = bucket->sum_weights[newsize-2] + weight; | |
731 | } | |
732 | ||
733 | else { | |
734 | bucket->sum_weights[newsize-1] = weight; | |
735 | } | |
736 | ||
737 | bucket->h.weight += weight; | |
738 | bucket->h.size++; | |
739 | return 0; | |
740 | } | |
741 | ||
742 | int crush_add_tree_bucket_item(struct crush_bucket_tree *bucket, int item, int weight) | |
743 | { | |
744 | int newsize = bucket->h.size + 1; | |
745 | int depth = calc_depth(newsize);; | |
746 | int node; | |
747 | int j; | |
748 | void *_realloc = NULL; | |
749 | ||
750 | bucket->num_nodes = 1 << depth; | |
751 | ||
752 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
753 | return -ENOMEM; | |
754 | } else { | |
755 | bucket->h.items = _realloc; | |
756 | } | |
757 | if ((_realloc = realloc(bucket->node_weights, sizeof(__u32)*bucket->num_nodes)) == NULL) { | |
758 | return -ENOMEM; | |
759 | } else { | |
760 | bucket->node_weights = _realloc; | |
761 | } | |
762 | ||
763 | node = crush_calc_tree_node(newsize-1); | |
764 | bucket->node_weights[node] = weight; | |
765 | ||
766 | /* if the depth increase, we need to initialize the new root node's weight before add bucket item */ | |
767 | int root = bucket->num_nodes/2; | |
768 | if (depth >= 2 && (node - 1) == root) { | |
769 | /* if the new item is the first node in right sub tree, so | |
770 | * the root node initial weight is left sub tree's weight | |
771 | */ | |
772 | bucket->node_weights[root] = bucket->node_weights[root/2]; | |
773 | } | |
774 | ||
775 | for (j=1; j<depth; j++) { | |
776 | node = parent(node); | |
777 | ||
778 | if (crush_addition_is_unsafe(bucket->node_weights[node], weight)) | |
779 | return -ERANGE; | |
780 | ||
781 | bucket->node_weights[node] += weight; | |
782 | dprintk(" node %d weight %d\n", node, bucket->node_weights[node]); | |
783 | } | |
784 | ||
785 | ||
786 | if (crush_addition_is_unsafe(bucket->h.weight, weight)) | |
787 | return -ERANGE; | |
788 | ||
789 | bucket->h.items[newsize-1] = item; | |
790 | bucket->h.weight += weight; | |
791 | bucket->h.size++; | |
792 | ||
793 | return 0; | |
794 | } | |
795 | ||
796 | int crush_add_straw_bucket_item(struct crush_map *map, | |
797 | struct crush_bucket_straw *bucket, | |
798 | int item, int weight) | |
799 | { | |
800 | int newsize = bucket->h.size + 1; | |
801 | ||
802 | void *_realloc = NULL; | |
803 | ||
804 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
805 | return -ENOMEM; | |
806 | } else { | |
807 | bucket->h.items = _realloc; | |
808 | } | |
809 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
810 | return -ENOMEM; | |
811 | } else { | |
812 | bucket->item_weights = _realloc; | |
813 | } | |
814 | if ((_realloc = realloc(bucket->straws, sizeof(__u32)*newsize)) == NULL) { | |
815 | return -ENOMEM; | |
816 | } else { | |
817 | bucket->straws = _realloc; | |
818 | } | |
819 | ||
820 | bucket->h.items[newsize-1] = item; | |
821 | bucket->item_weights[newsize-1] = weight; | |
822 | ||
823 | if (crush_addition_is_unsafe(bucket->h.weight, weight)) | |
824 | return -ERANGE; | |
825 | ||
826 | bucket->h.weight += weight; | |
827 | bucket->h.size++; | |
828 | ||
829 | return crush_calc_straw(map, bucket); | |
830 | } | |
831 | ||
832 | int crush_add_straw2_bucket_item(struct crush_map *map, | |
833 | struct crush_bucket_straw2 *bucket, | |
834 | int item, int weight) | |
835 | { | |
836 | int newsize = bucket->h.size + 1; | |
837 | ||
838 | void *_realloc = NULL; | |
839 | ||
840 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
841 | return -ENOMEM; | |
842 | } else { | |
843 | bucket->h.items = _realloc; | |
844 | } | |
845 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
846 | return -ENOMEM; | |
847 | } else { | |
848 | bucket->item_weights = _realloc; | |
849 | } | |
850 | ||
851 | bucket->h.items[newsize-1] = item; | |
852 | bucket->item_weights[newsize-1] = weight; | |
853 | ||
854 | if (crush_addition_is_unsafe(bucket->h.weight, weight)) | |
855 | return -ERANGE; | |
856 | ||
857 | bucket->h.weight += weight; | |
858 | bucket->h.size++; | |
859 | ||
860 | return 0; | |
861 | } | |
862 | ||
863 | int crush_bucket_add_item(struct crush_map *map, | |
864 | struct crush_bucket *b, int item, int weight) | |
865 | { | |
866 | switch (b->alg) { | |
867 | case CRUSH_BUCKET_UNIFORM: | |
868 | return crush_add_uniform_bucket_item((struct crush_bucket_uniform *)b, item, weight); | |
869 | case CRUSH_BUCKET_LIST: | |
870 | return crush_add_list_bucket_item((struct crush_bucket_list *)b, item, weight); | |
871 | case CRUSH_BUCKET_TREE: | |
872 | return crush_add_tree_bucket_item((struct crush_bucket_tree *)b, item, weight); | |
873 | case CRUSH_BUCKET_STRAW: | |
874 | return crush_add_straw_bucket_item(map, (struct crush_bucket_straw *)b, item, weight); | |
875 | case CRUSH_BUCKET_STRAW2: | |
876 | return crush_add_straw2_bucket_item(map, (struct crush_bucket_straw2 *)b, item, weight); | |
877 | default: | |
878 | return -1; | |
879 | } | |
880 | } | |
881 | ||
882 | /************************************************/ | |
883 | ||
884 | int crush_remove_uniform_bucket_item(struct crush_bucket_uniform *bucket, int item) | |
885 | { | |
886 | unsigned i, j; | |
887 | int newsize; | |
888 | void *_realloc = NULL; | |
889 | ||
890 | for (i = 0; i < bucket->h.size; i++) | |
891 | if (bucket->h.items[i] == item) | |
892 | break; | |
893 | if (i == bucket->h.size) | |
894 | return -ENOENT; | |
895 | ||
896 | for (j = i; j < bucket->h.size; j++) | |
897 | bucket->h.items[j] = bucket->h.items[j+1]; | |
898 | newsize = --bucket->h.size; | |
899 | if (bucket->item_weight < bucket->h.weight) | |
900 | bucket->h.weight -= bucket->item_weight; | |
901 | else | |
902 | bucket->h.weight = 0; | |
903 | ||
904 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
905 | return -ENOMEM; | |
906 | } else { | |
907 | bucket->h.items = _realloc; | |
908 | } | |
909 | return 0; | |
910 | } | |
911 | ||
912 | int crush_remove_list_bucket_item(struct crush_bucket_list *bucket, int item) | |
913 | { | |
914 | unsigned i, j; | |
915 | int newsize; | |
916 | unsigned weight; | |
917 | ||
918 | for (i = 0; i < bucket->h.size; i++) | |
919 | if (bucket->h.items[i] == item) | |
920 | break; | |
921 | if (i == bucket->h.size) | |
922 | return -ENOENT; | |
923 | ||
924 | weight = bucket->item_weights[i]; | |
925 | for (j = i; j < bucket->h.size; j++) { | |
926 | bucket->h.items[j] = bucket->h.items[j+1]; | |
927 | bucket->item_weights[j] = bucket->item_weights[j+1]; | |
928 | bucket->sum_weights[j] = bucket->sum_weights[j+1] - weight; | |
929 | } | |
930 | if (weight < bucket->h.weight) | |
931 | bucket->h.weight -= weight; | |
932 | else | |
933 | bucket->h.weight = 0; | |
934 | newsize = --bucket->h.size; | |
935 | ||
936 | void *_realloc = NULL; | |
937 | ||
938 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
939 | return -ENOMEM; | |
940 | } else { | |
941 | bucket->h.items = _realloc; | |
942 | } | |
943 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
944 | return -ENOMEM; | |
945 | } else { | |
946 | bucket->item_weights = _realloc; | |
947 | } | |
948 | if ((_realloc = realloc(bucket->sum_weights, sizeof(__u32)*newsize)) == NULL) { | |
949 | return -ENOMEM; | |
950 | } else { | |
951 | bucket->sum_weights = _realloc; | |
952 | } | |
953 | return 0; | |
954 | } | |
955 | ||
956 | int crush_remove_tree_bucket_item(struct crush_bucket_tree *bucket, int item) | |
957 | { | |
958 | unsigned i; | |
959 | unsigned newsize; | |
960 | ||
961 | for (i = 0; i < bucket->h.size; i++) { | |
962 | int node; | |
963 | unsigned weight; | |
964 | int j; | |
965 | int depth = calc_depth(bucket->h.size); | |
966 | ||
967 | if (bucket->h.items[i] != item) | |
968 | continue; | |
969 | ||
970 | bucket->h.items[i] = 0; | |
971 | node = crush_calc_tree_node(i); | |
972 | weight = bucket->node_weights[node]; | |
973 | bucket->node_weights[node] = 0; | |
974 | ||
975 | for (j = 1; j < depth; j++) { | |
976 | node = parent(node); | |
977 | bucket->node_weights[node] -= weight; | |
978 | dprintk(" node %d weight %d\n", node, bucket->node_weights[node]); | |
979 | } | |
980 | if (weight < bucket->h.weight) | |
981 | bucket->h.weight -= weight; | |
982 | else | |
983 | bucket->h.weight = 0; | |
984 | break; | |
985 | } | |
986 | if (i == bucket->h.size) | |
987 | return -ENOENT; | |
988 | ||
989 | newsize = bucket->h.size; | |
990 | while (newsize > 0) { | |
991 | int node = crush_calc_tree_node(newsize - 1); | |
992 | if (bucket->node_weights[node]) | |
993 | break; | |
994 | --newsize; | |
995 | } | |
996 | ||
997 | if (newsize != bucket->h.size) { | |
998 | int olddepth, newdepth; | |
999 | ||
1000 | void *_realloc = NULL; | |
1001 | ||
1002 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
1003 | return -ENOMEM; | |
1004 | } else { | |
1005 | bucket->h.items = _realloc; | |
1006 | } | |
1007 | ||
1008 | olddepth = calc_depth(bucket->h.size); | |
1009 | newdepth = calc_depth(newsize); | |
1010 | if (olddepth != newdepth) { | |
1011 | bucket->num_nodes = 1 << newdepth; | |
1012 | if ((_realloc = realloc(bucket->node_weights, | |
1013 | sizeof(__u32)*bucket->num_nodes)) == NULL) { | |
1014 | return -ENOMEM; | |
1015 | } else { | |
1016 | bucket->node_weights = _realloc; | |
1017 | } | |
1018 | } | |
1019 | ||
1020 | bucket->h.size = newsize; | |
1021 | } | |
1022 | return 0; | |
1023 | } | |
1024 | ||
1025 | int crush_remove_straw_bucket_item(struct crush_map *map, | |
1026 | struct crush_bucket_straw *bucket, int item) | |
1027 | { | |
1028 | int newsize = bucket->h.size - 1; | |
1029 | unsigned i, j; | |
1030 | ||
1031 | for (i = 0; i < bucket->h.size; i++) { | |
1032 | if (bucket->h.items[i] == item) { | |
7c673cae FG |
1033 | if (bucket->item_weights[i] < bucket->h.weight) |
1034 | bucket->h.weight -= bucket->item_weights[i]; | |
1035 | else | |
1036 | bucket->h.weight = 0; | |
35e4c445 | 1037 | for (j = i; j < bucket->h.size - 1; j++) { |
7c673cae FG |
1038 | bucket->h.items[j] = bucket->h.items[j+1]; |
1039 | bucket->item_weights[j] = bucket->item_weights[j+1]; | |
1040 | } | |
1041 | break; | |
1042 | } | |
1043 | } | |
1044 | if (i == bucket->h.size) | |
1045 | return -ENOENT; | |
35e4c445 FG |
1046 | bucket->h.size--; |
1047 | if (bucket->h.size == 0) { | |
1048 | /* don't bother reallocating */ | |
1049 | return 0; | |
1050 | } | |
7c673cae FG |
1051 | void *_realloc = NULL; |
1052 | ||
1053 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
1054 | return -ENOMEM; | |
1055 | } else { | |
1056 | bucket->h.items = _realloc; | |
1057 | } | |
1058 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
1059 | return -ENOMEM; | |
1060 | } else { | |
1061 | bucket->item_weights = _realloc; | |
1062 | } | |
1063 | if ((_realloc = realloc(bucket->straws, sizeof(__u32)*newsize)) == NULL) { | |
1064 | return -ENOMEM; | |
1065 | } else { | |
1066 | bucket->straws = _realloc; | |
1067 | } | |
1068 | ||
1069 | return crush_calc_straw(map, bucket); | |
1070 | } | |
1071 | ||
1072 | int crush_remove_straw2_bucket_item(struct crush_map *map, | |
1073 | struct crush_bucket_straw2 *bucket, int item) | |
1074 | { | |
1075 | int newsize = bucket->h.size - 1; | |
1076 | unsigned i, j; | |
1077 | ||
1078 | for (i = 0; i < bucket->h.size; i++) { | |
1079 | if (bucket->h.items[i] == item) { | |
7c673cae FG |
1080 | if (bucket->item_weights[i] < bucket->h.weight) |
1081 | bucket->h.weight -= bucket->item_weights[i]; | |
1082 | else | |
1083 | bucket->h.weight = 0; | |
35e4c445 | 1084 | for (j = i; j < bucket->h.size - 1; j++) { |
7c673cae FG |
1085 | bucket->h.items[j] = bucket->h.items[j+1]; |
1086 | bucket->item_weights[j] = bucket->item_weights[j+1]; | |
1087 | } | |
1088 | break; | |
1089 | } | |
1090 | } | |
1091 | if (i == bucket->h.size) | |
1092 | return -ENOENT; | |
1093 | ||
35e4c445 FG |
1094 | bucket->h.size--; |
1095 | if (!newsize) { | |
1096 | /* don't bother reallocating a 0-length array. */ | |
1097 | return 0; | |
1098 | } | |
1099 | ||
7c673cae FG |
1100 | void *_realloc = NULL; |
1101 | ||
1102 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
1103 | return -ENOMEM; | |
1104 | } else { | |
1105 | bucket->h.items = _realloc; | |
1106 | } | |
1107 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
1108 | return -ENOMEM; | |
1109 | } else { | |
1110 | bucket->item_weights = _realloc; | |
1111 | } | |
1112 | ||
1113 | return 0; | |
1114 | } | |
1115 | ||
1116 | int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *b, int item) | |
1117 | { | |
1118 | switch (b->alg) { | |
1119 | case CRUSH_BUCKET_UNIFORM: | |
1120 | return crush_remove_uniform_bucket_item((struct crush_bucket_uniform *)b, item); | |
1121 | case CRUSH_BUCKET_LIST: | |
1122 | return crush_remove_list_bucket_item((struct crush_bucket_list *)b, item); | |
1123 | case CRUSH_BUCKET_TREE: | |
1124 | return crush_remove_tree_bucket_item((struct crush_bucket_tree *)b, item); | |
1125 | case CRUSH_BUCKET_STRAW: | |
1126 | return crush_remove_straw_bucket_item(map, (struct crush_bucket_straw *)b, item); | |
1127 | case CRUSH_BUCKET_STRAW2: | |
1128 | return crush_remove_straw2_bucket_item(map, (struct crush_bucket_straw2 *)b, item); | |
1129 | default: | |
1130 | return -1; | |
1131 | } | |
1132 | } | |
1133 | ||
1134 | ||
1135 | /************************************************/ | |
1136 | ||
1137 | int crush_adjust_uniform_bucket_item_weight(struct crush_bucket_uniform *bucket, int item, int weight) | |
1138 | { | |
1139 | int diff = (weight - bucket->item_weight) * bucket->h.size; | |
1140 | ||
1141 | bucket->item_weight = weight; | |
1142 | bucket->h.weight = bucket->item_weight * bucket->h.size; | |
1143 | ||
1144 | return diff; | |
1145 | } | |
1146 | ||
1147 | int crush_adjust_list_bucket_item_weight(struct crush_bucket_list *bucket, int item, int weight) | |
1148 | { | |
1149 | int diff; | |
1150 | unsigned i, j; | |
1151 | ||
1152 | for (i = 0; i < bucket->h.size; i++) { | |
1153 | if (bucket->h.items[i] == item) | |
1154 | break; | |
1155 | } | |
1156 | if (i == bucket->h.size) | |
1157 | return 0; | |
1158 | ||
1159 | diff = weight - bucket->item_weights[i]; | |
1160 | bucket->item_weights[i] = weight; | |
1161 | bucket->h.weight += diff; | |
1162 | ||
1163 | for (j = i; j < bucket->h.size; j++) | |
1164 | bucket->sum_weights[j] += diff; | |
1165 | ||
1166 | return diff; | |
1167 | } | |
1168 | ||
1169 | int crush_adjust_tree_bucket_item_weight(struct crush_bucket_tree *bucket, int item, int weight) | |
1170 | { | |
1171 | int diff; | |
1172 | int node; | |
1173 | unsigned i, j; | |
1174 | unsigned depth = calc_depth(bucket->h.size); | |
1175 | ||
1176 | for (i = 0; i < bucket->h.size; i++) { | |
1177 | if (bucket->h.items[i] == item) | |
1178 | break; | |
1179 | } | |
1180 | if (i == bucket->h.size) | |
1181 | return 0; | |
1182 | ||
1183 | node = crush_calc_tree_node(i); | |
1184 | diff = weight - bucket->node_weights[node]; | |
1185 | bucket->node_weights[node] = weight; | |
1186 | bucket->h.weight += diff; | |
1187 | ||
1188 | for (j=1; j<depth; j++) { | |
1189 | node = parent(node); | |
1190 | bucket->node_weights[node] += diff; | |
1191 | } | |
1192 | ||
1193 | return diff; | |
1194 | } | |
1195 | ||
1196 | int crush_adjust_straw_bucket_item_weight(struct crush_map *map, | |
1197 | struct crush_bucket_straw *bucket, | |
1198 | int item, int weight) | |
1199 | { | |
1200 | unsigned idx; | |
1201 | int diff; | |
1202 | int r; | |
1203 | ||
1204 | for (idx = 0; idx < bucket->h.size; idx++) | |
1205 | if (bucket->h.items[idx] == item) | |
1206 | break; | |
1207 | if (idx == bucket->h.size) | |
1208 | return 0; | |
1209 | ||
1210 | diff = weight - bucket->item_weights[idx]; | |
1211 | bucket->item_weights[idx] = weight; | |
1212 | bucket->h.weight += diff; | |
1213 | ||
1214 | r = crush_calc_straw(map, bucket); | |
1215 | if (r < 0) | |
1216 | return r; | |
1217 | ||
1218 | return diff; | |
1219 | } | |
1220 | ||
1221 | int crush_adjust_straw2_bucket_item_weight(struct crush_map *map, | |
1222 | struct crush_bucket_straw2 *bucket, | |
1223 | int item, int weight) | |
1224 | { | |
1225 | unsigned idx; | |
1226 | int diff; | |
1227 | ||
1228 | for (idx = 0; idx < bucket->h.size; idx++) | |
1229 | if (bucket->h.items[idx] == item) | |
1230 | break; | |
1231 | if (idx == bucket->h.size) | |
1232 | return 0; | |
1233 | ||
1234 | diff = weight - bucket->item_weights[idx]; | |
1235 | bucket->item_weights[idx] = weight; | |
1236 | bucket->h.weight += diff; | |
1237 | ||
1238 | return diff; | |
1239 | } | |
1240 | ||
1241 | int crush_bucket_adjust_item_weight(struct crush_map *map, | |
1242 | struct crush_bucket *b, | |
1243 | int item, int weight) | |
1244 | { | |
1245 | switch (b->alg) { | |
1246 | case CRUSH_BUCKET_UNIFORM: | |
1247 | return crush_adjust_uniform_bucket_item_weight((struct crush_bucket_uniform *)b, | |
1248 | item, weight); | |
1249 | case CRUSH_BUCKET_LIST: | |
1250 | return crush_adjust_list_bucket_item_weight((struct crush_bucket_list *)b, | |
1251 | item, weight); | |
1252 | case CRUSH_BUCKET_TREE: | |
1253 | return crush_adjust_tree_bucket_item_weight((struct crush_bucket_tree *)b, | |
1254 | item, weight); | |
1255 | case CRUSH_BUCKET_STRAW: | |
1256 | return crush_adjust_straw_bucket_item_weight(map, | |
1257 | (struct crush_bucket_straw *)b, | |
1258 | item, weight); | |
1259 | case CRUSH_BUCKET_STRAW2: | |
1260 | return crush_adjust_straw2_bucket_item_weight(map, | |
1261 | (struct crush_bucket_straw2 *)b, | |
1262 | item, weight); | |
1263 | default: | |
1264 | return -1; | |
1265 | } | |
1266 | } | |
1267 | ||
1268 | /************************************************/ | |
1269 | ||
1270 | static int crush_reweight_uniform_bucket(struct crush_map *map, struct crush_bucket_uniform *bucket) | |
1271 | { | |
1272 | unsigned i; | |
1273 | unsigned sum = 0, n = 0, leaves = 0; | |
1274 | ||
1275 | for (i = 0; i < bucket->h.size; i++) { | |
1276 | int id = bucket->h.items[i]; | |
1277 | if (id < 0) { | |
1278 | struct crush_bucket *c = map->buckets[-1-id]; | |
1279 | crush_reweight_bucket(map, c); | |
1280 | ||
1281 | if (crush_addition_is_unsafe(sum, c->weight)) | |
1282 | return -ERANGE; | |
1283 | ||
1284 | sum += c->weight; | |
1285 | n++; | |
1286 | } else { | |
1287 | leaves++; | |
1288 | } | |
1289 | } | |
1290 | ||
1291 | if (n > leaves) | |
1292 | bucket->item_weight = sum / n; // more bucket children than leaves, average! | |
1293 | bucket->h.weight = bucket->item_weight * bucket->h.size; | |
1294 | ||
1295 | return 0; | |
1296 | } | |
1297 | ||
1298 | static int crush_reweight_list_bucket(struct crush_map *map, struct crush_bucket_list *bucket) | |
1299 | { | |
1300 | unsigned i; | |
1301 | ||
1302 | bucket->h.weight = 0; | |
1303 | for (i = 0; i < bucket->h.size; i++) { | |
1304 | int id = bucket->h.items[i]; | |
1305 | if (id < 0) { | |
1306 | struct crush_bucket *c = map->buckets[-1-id]; | |
1307 | crush_reweight_bucket(map, c); | |
1308 | bucket->item_weights[i] = c->weight; | |
1309 | } | |
1310 | ||
1311 | if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i])) | |
1312 | return -ERANGE; | |
1313 | ||
1314 | bucket->h.weight += bucket->item_weights[i]; | |
1315 | } | |
1316 | ||
1317 | return 0; | |
1318 | } | |
1319 | ||
1320 | static int crush_reweight_tree_bucket(struct crush_map *map, struct crush_bucket_tree *bucket) | |
1321 | { | |
1322 | unsigned i; | |
1323 | ||
1324 | bucket->h.weight = 0; | |
1325 | for (i = 0; i < bucket->h.size; i++) { | |
1326 | int node = crush_calc_tree_node(i); | |
1327 | int id = bucket->h.items[i]; | |
1328 | if (id < 0) { | |
1329 | struct crush_bucket *c = map->buckets[-1-id]; | |
1330 | crush_reweight_bucket(map, c); | |
1331 | bucket->node_weights[node] = c->weight; | |
1332 | } | |
1333 | ||
1334 | if (crush_addition_is_unsafe(bucket->h.weight, bucket->node_weights[node])) | |
1335 | return -ERANGE; | |
1336 | ||
1337 | bucket->h.weight += bucket->node_weights[node]; | |
1338 | ||
1339 | ||
1340 | } | |
1341 | ||
1342 | return 0; | |
1343 | } | |
1344 | ||
1345 | static int crush_reweight_straw_bucket(struct crush_map *map, struct crush_bucket_straw *bucket) | |
1346 | { | |
1347 | unsigned i; | |
1348 | ||
1349 | bucket->h.weight = 0; | |
1350 | for (i = 0; i < bucket->h.size; i++) { | |
1351 | int id = bucket->h.items[i]; | |
1352 | if (id < 0) { | |
1353 | struct crush_bucket *c = map->buckets[-1-id]; | |
1354 | crush_reweight_bucket(map, c); | |
1355 | bucket->item_weights[i] = c->weight; | |
1356 | } | |
1357 | ||
1358 | if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i])) | |
1359 | return -ERANGE; | |
1360 | ||
1361 | bucket->h.weight += bucket->item_weights[i]; | |
1362 | } | |
1363 | crush_calc_straw(map, bucket); | |
1364 | ||
1365 | return 0; | |
1366 | } | |
1367 | ||
1368 | static int crush_reweight_straw2_bucket(struct crush_map *map, struct crush_bucket_straw2 *bucket) | |
1369 | { | |
1370 | unsigned i; | |
1371 | ||
1372 | bucket->h.weight = 0; | |
1373 | for (i = 0; i < bucket->h.size; i++) { | |
1374 | int id = bucket->h.items[i]; | |
1375 | if (id < 0) { | |
1376 | struct crush_bucket *c = map->buckets[-1-id]; | |
1377 | crush_reweight_bucket(map, c); | |
1378 | bucket->item_weights[i] = c->weight; | |
1379 | } | |
1380 | ||
1381 | if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i])) | |
1382 | return -ERANGE; | |
1383 | ||
1384 | bucket->h.weight += bucket->item_weights[i]; | |
1385 | } | |
1386 | ||
1387 | return 0; | |
1388 | } | |
1389 | ||
1390 | int crush_reweight_bucket(struct crush_map *map, struct crush_bucket *b) | |
1391 | { | |
1392 | switch (b->alg) { | |
1393 | case CRUSH_BUCKET_UNIFORM: | |
1394 | return crush_reweight_uniform_bucket(map, (struct crush_bucket_uniform *)b); | |
1395 | case CRUSH_BUCKET_LIST: | |
1396 | return crush_reweight_list_bucket(map, (struct crush_bucket_list *)b); | |
1397 | case CRUSH_BUCKET_TREE: | |
1398 | return crush_reweight_tree_bucket(map, (struct crush_bucket_tree *)b); | |
1399 | case CRUSH_BUCKET_STRAW: | |
1400 | return crush_reweight_straw_bucket(map, (struct crush_bucket_straw *)b); | |
1401 | case CRUSH_BUCKET_STRAW2: | |
1402 | return crush_reweight_straw2_bucket(map, (struct crush_bucket_straw2 *)b); | |
1403 | default: | |
1404 | return -1; | |
1405 | } | |
1406 | } | |
1407 | ||
1408 | struct crush_choose_arg *crush_make_choose_args(struct crush_map *map, int num_positions) | |
1409 | { | |
1410 | int b; | |
1411 | int sum_bucket_size = 0; | |
1412 | int bucket_count = 0; | |
1413 | for (b = 0; b < map->max_buckets; b++) { | |
1414 | if (map->buckets[b] == 0) | |
1415 | continue; | |
1416 | sum_bucket_size += map->buckets[b]->size; | |
1417 | bucket_count++; | |
1418 | } | |
1419 | dprintk("sum_bucket_size %d max_buckets %d bucket_count %d\n", | |
1420 | sum_bucket_size, map->max_buckets, bucket_count); | |
1421 | int size = (sizeof(struct crush_choose_arg) * map->max_buckets + | |
1422 | sizeof(struct crush_weight_set) * bucket_count * num_positions + | |
1423 | sizeof(__u32) * sum_bucket_size * num_positions + // weights | |
224ce89b | 1424 | sizeof(__s32) * sum_bucket_size); // ids |
7c673cae FG |
1425 | char *space = malloc(size); |
1426 | struct crush_choose_arg *arg = (struct crush_choose_arg *)space; | |
1427 | struct crush_weight_set *weight_set = (struct crush_weight_set *)(arg + map->max_buckets); | |
1428 | __u32 *weights = (__u32 *)(weight_set + bucket_count * num_positions); | |
1429 | char *weight_set_ends = (char*)weights; | |
224ce89b | 1430 | __s32 *ids = (__s32 *)(weights + sum_bucket_size * num_positions); |
7c673cae FG |
1431 | char *weights_end = (char *)ids; |
1432 | char *ids_end = (char *)(ids + sum_bucket_size); | |
1433 | BUG_ON(space + size != ids_end); | |
1434 | for (b = 0; b < map->max_buckets; b++) { | |
1435 | if (map->buckets[b] == 0) { | |
1436 | memset(&arg[b], '\0', sizeof(struct crush_choose_arg)); | |
1437 | continue; | |
1438 | } | |
1439 | struct crush_bucket_straw2 *bucket = (struct crush_bucket_straw2 *)map->buckets[b]; | |
1440 | ||
1441 | int position; | |
1442 | for (position = 0; position < num_positions; position++) { | |
1443 | memcpy(weights, bucket->item_weights, sizeof(__u32) * bucket->h.size); | |
1444 | weight_set[position].weights = weights; | |
1445 | weight_set[position].size = bucket->h.size; | |
1446 | dprintk("moving weight %d bytes forward\n", (int)((weights + bucket->h.size) - weights)); | |
1447 | weights += bucket->h.size; | |
1448 | } | |
1449 | arg[b].weight_set = weight_set; | |
1450 | arg[b].weight_set_size = num_positions; | |
1451 | weight_set += position; | |
1452 | ||
224ce89b | 1453 | memcpy(ids, bucket->h.items, sizeof(__s32) * bucket->h.size); |
7c673cae FG |
1454 | arg[b].ids = ids; |
1455 | arg[b].ids_size = bucket->h.size; | |
1456 | ids += bucket->h.size; | |
1457 | } | |
1458 | BUG_ON((char*)weight_set_ends != (char*)weight_set); | |
1459 | BUG_ON((char*)weights_end != (char*)weights); | |
1460 | BUG_ON((char*)ids != (char*)ids_end); | |
1461 | return arg; | |
1462 | } | |
1463 | ||
1464 | void crush_destroy_choose_args(struct crush_choose_arg *args) | |
1465 | { | |
1466 | free(args); | |
1467 | } | |
1468 | ||
1469 | /***************************/ | |
1470 | ||
1471 | /* methods to check for safe arithmetic operations */ | |
1472 | ||
1473 | int crush_addition_is_unsafe(__u32 a, __u32 b) | |
1474 | { | |
1475 | if ((((__u32)(-1)) - b) < a) | |
1476 | return 1; | |
1477 | else | |
1478 | return 0; | |
1479 | } | |
1480 | ||
1481 | int crush_multiplication_is_unsafe(__u32 a, __u32 b) | |
1482 | { | |
1483 | /* prevent division by zero */ | |
1484 | if (!a) | |
1485 | return 0; | |
1486 | if (!b) | |
1487 | return 1; | |
1488 | if ((((__u32)(-1)) / b) < a) | |
1489 | return 1; | |
1490 | else | |
1491 | return 0; | |
1492 | } | |
1493 | ||
1494 | /***************************/ | |
1495 | ||
1496 | /* methods to configure crush_map */ | |
1497 | ||
1498 | void set_legacy_crush_map(struct crush_map *map) { | |
1499 | /* initialize legacy tunable values */ | |
1500 | map->choose_local_tries = 2; | |
1501 | map->choose_local_fallback_tries = 5; | |
1502 | map->choose_total_tries = 19; | |
1503 | map->chooseleaf_descend_once = 0; | |
1504 | map->chooseleaf_vary_r = 0; | |
1505 | map->chooseleaf_stable = 0; | |
1506 | map->straw_calc_version = 0; | |
1507 | ||
1508 | // by default, use legacy types, and also exclude tree, | |
1509 | // since it was buggy. | |
1510 | map->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
1511 | } | |
1512 | ||
1513 | void set_optimal_crush_map(struct crush_map *map) { | |
1514 | map->choose_local_tries = 0; | |
1515 | map->choose_local_fallback_tries = 0; | |
1516 | map->choose_total_tries = 50; | |
1517 | map->chooseleaf_descend_once = 1; | |
1518 | map->chooseleaf_vary_r = 1; | |
1519 | map->chooseleaf_stable = 1; | |
1520 | map->allowed_bucket_algs = ( | |
1521 | (1 << CRUSH_BUCKET_UNIFORM) | | |
1522 | (1 << CRUSH_BUCKET_LIST) | | |
1523 | (1 << CRUSH_BUCKET_STRAW) | | |
1524 | (1 << CRUSH_BUCKET_STRAW2)); | |
1525 | } |