]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | #include <string.h> |
2 | #include <limits.h> | |
3 | #include <math.h> | |
4 | #include <stdlib.h> | |
5 | #include <stdio.h> | |
6 | #include <assert.h> | |
7 | #include <errno.h> | |
8 | ||
9 | #include "include/int_types.h" | |
10 | ||
11 | #include "builder.h" | |
12 | #include "hash.h" | |
13 | ||
14 | #define dprintk(args...) /* printf(args) */ | |
15 | ||
16 | #define BUG_ON(x) assert(!(x)) | |
17 | ||
18 | struct crush_map *crush_create() | |
19 | { | |
20 | struct crush_map *m; | |
21 | m = malloc(sizeof(*m)); | |
22 | if (!m) | |
23 | return NULL; | |
24 | memset(m, 0, sizeof(*m)); | |
25 | ||
26 | set_optimal_crush_map(m); | |
27 | return m; | |
28 | } | |
29 | ||
30 | /* | |
31 | * finalize should be called _after_ all buckets are added to the map. | |
32 | */ | |
33 | void crush_finalize(struct crush_map *map) | |
34 | { | |
35 | int b; | |
36 | __u32 i; | |
37 | ||
38 | /* Calculate the needed working space while we do other | |
39 | finalization tasks. */ | |
40 | map->working_size = sizeof(struct crush_work); | |
41 | /* Space for the array of pointers to per-bucket workspace */ | |
42 | map->working_size += map->max_buckets * | |
43 | sizeof(struct crush_work_bucket *); | |
44 | ||
45 | /* calc max_devices */ | |
46 | map->max_devices = 0; | |
47 | for (b=0; b<map->max_buckets; b++) { | |
48 | if (map->buckets[b] == 0) | |
49 | continue; | |
50 | for (i=0; i<map->buckets[b]->size; i++) | |
51 | if (map->buckets[b]->items[i] >= map->max_devices) | |
52 | map->max_devices = map->buckets[b]->items[i] + 1; | |
53 | ||
54 | switch (map->buckets[b]->alg) { | |
55 | default: | |
56 | /* The base case, permutation variables and | |
57 | the pointer to the permutation array. */ | |
58 | map->working_size += sizeof(struct crush_work_bucket); | |
59 | break; | |
60 | } | |
61 | /* Every bucket has a permutation array. */ | |
62 | map->working_size += map->buckets[b]->size * sizeof(__u32); | |
63 | } | |
64 | } | |
65 | ||
66 | ||
67 | ||
68 | /** rules **/ | |
69 | ||
70 | int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno) | |
71 | { | |
72 | __u32 r; | |
73 | ||
74 | if (ruleno < 0) { | |
75 | for (r=0; r < map->max_rules; r++) | |
76 | if (map->rules[r] == 0) | |
77 | break; | |
78 | assert(r < CRUSH_MAX_RULES); | |
79 | } | |
80 | else | |
81 | r = ruleno; | |
82 | ||
83 | if (r >= map->max_rules) { | |
84 | /* expand array */ | |
85 | int oldsize; | |
86 | void *_realloc = NULL; | |
87 | if (map->max_rules +1 > CRUSH_MAX_RULES) | |
88 | return -ENOSPC; | |
89 | oldsize = map->max_rules; | |
90 | map->max_rules = r+1; | |
91 | if ((_realloc = realloc(map->rules, map->max_rules * sizeof(map->rules[0]))) == NULL) { | |
92 | return -ENOMEM; | |
93 | } else { | |
94 | map->rules = _realloc; | |
95 | } | |
96 | memset(map->rules + oldsize, 0, (map->max_rules-oldsize) * sizeof(map->rules[0])); | |
97 | } | |
98 | ||
99 | /* add it */ | |
100 | map->rules[r] = rule; | |
101 | return r; | |
102 | } | |
103 | ||
104 | struct crush_rule *crush_make_rule(int len, int ruleset, int type, int minsize, int maxsize) | |
105 | { | |
106 | struct crush_rule *rule; | |
107 | rule = malloc(crush_rule_size(len)); | |
108 | if (!rule) | |
109 | return NULL; | |
110 | rule->len = len; | |
111 | rule->mask.ruleset = ruleset; | |
112 | rule->mask.type = type; | |
113 | rule->mask.min_size = minsize; | |
114 | rule->mask.max_size = maxsize; | |
115 | return rule; | |
116 | } | |
117 | ||
118 | /* | |
119 | * be careful; this doesn't verify that the buffer you allocated is big enough! | |
120 | */ | |
121 | void crush_rule_set_step(struct crush_rule *rule, int n, int op, int arg1, int arg2) | |
122 | { | |
123 | assert((__u32)n < rule->len); | |
124 | rule->steps[n].op = op; | |
125 | rule->steps[n].arg1 = arg1; | |
126 | rule->steps[n].arg2 = arg2; | |
127 | } | |
128 | ||
129 | ||
130 | /** buckets **/ | |
131 | int crush_get_next_bucket_id(struct crush_map *map) | |
132 | { | |
133 | int pos; | |
134 | for (pos=0; pos < map->max_buckets; pos++) | |
135 | if (map->buckets[pos] == 0) | |
136 | break; | |
137 | return -1 - pos; | |
138 | } | |
139 | ||
140 | ||
141 | int crush_add_bucket(struct crush_map *map, | |
142 | int id, | |
143 | struct crush_bucket *bucket, | |
144 | int *idout) | |
145 | { | |
146 | int pos; | |
147 | ||
148 | /* find a bucket id */ | |
149 | if (id == 0) | |
150 | id = crush_get_next_bucket_id(map); | |
151 | pos = -1 - id; | |
152 | ||
153 | while (pos >= map->max_buckets) { | |
154 | /* expand array */ | |
155 | int oldsize = map->max_buckets; | |
156 | if (map->max_buckets) | |
157 | map->max_buckets *= 2; | |
158 | else | |
159 | map->max_buckets = 8; | |
160 | void *_realloc = NULL; | |
161 | if ((_realloc = realloc(map->buckets, map->max_buckets * sizeof(map->buckets[0]))) == NULL) { | |
162 | return -ENOMEM; | |
163 | } else { | |
164 | map->buckets = _realloc; | |
165 | } | |
166 | memset(map->buckets + oldsize, 0, (map->max_buckets-oldsize) * sizeof(map->buckets[0])); | |
167 | } | |
168 | ||
169 | if (map->buckets[pos] != 0) { | |
170 | return -EEXIST; | |
171 | } | |
172 | ||
173 | /* add it */ | |
174 | bucket->id = id; | |
175 | map->buckets[pos] = bucket; | |
176 | ||
177 | if (idout) *idout = id; | |
178 | return 0; | |
179 | } | |
180 | ||
181 | int crush_remove_bucket(struct crush_map *map, struct crush_bucket *bucket) | |
182 | { | |
183 | int pos = -1 - bucket->id; | |
184 | assert(pos < map->max_buckets); | |
185 | map->buckets[pos] = NULL; | |
186 | crush_destroy_bucket(bucket); | |
187 | return 0; | |
188 | } | |
189 | ||
190 | ||
191 | /* uniform bucket */ | |
192 | ||
193 | struct crush_bucket_uniform * | |
194 | crush_make_uniform_bucket(int hash, int type, int size, | |
195 | int *items, | |
196 | int item_weight) | |
197 | { | |
198 | int i; | |
199 | struct crush_bucket_uniform *bucket; | |
200 | ||
201 | bucket = malloc(sizeof(*bucket)); | |
202 | if (!bucket) | |
203 | return NULL; | |
204 | memset(bucket, 0, sizeof(*bucket)); | |
205 | bucket->h.alg = CRUSH_BUCKET_UNIFORM; | |
206 | bucket->h.hash = hash; | |
207 | bucket->h.type = type; | |
208 | bucket->h.size = size; | |
209 | ||
210 | if (crush_multiplication_is_unsafe(size, item_weight)) | |
211 | goto err; | |
212 | ||
213 | bucket->h.weight = size * item_weight; | |
214 | bucket->item_weight = item_weight; | |
215 | bucket->h.items = malloc(sizeof(__s32)*size); | |
216 | ||
217 | if (!bucket->h.items) | |
218 | goto err; | |
219 | ||
220 | for (i=0; i<size; i++) | |
221 | bucket->h.items[i] = items[i]; | |
222 | ||
223 | return bucket; | |
224 | err: | |
225 | free(bucket->h.items); | |
226 | free(bucket); | |
227 | return NULL; | |
228 | } | |
229 | ||
230 | ||
231 | /* list bucket */ | |
232 | ||
233 | struct crush_bucket_list* | |
234 | crush_make_list_bucket(int hash, int type, int size, | |
235 | int *items, | |
236 | int *weights) | |
237 | { | |
238 | int i; | |
239 | int w; | |
240 | struct crush_bucket_list *bucket; | |
241 | ||
242 | bucket = malloc(sizeof(*bucket)); | |
243 | if (!bucket) | |
244 | return NULL; | |
245 | memset(bucket, 0, sizeof(*bucket)); | |
246 | bucket->h.alg = CRUSH_BUCKET_LIST; | |
247 | bucket->h.hash = hash; | |
248 | bucket->h.type = type; | |
249 | bucket->h.size = size; | |
250 | ||
251 | bucket->h.items = malloc(sizeof(__s32)*size); | |
252 | if (!bucket->h.items) | |
253 | goto err; | |
254 | ||
255 | ||
256 | bucket->item_weights = malloc(sizeof(__u32)*size); | |
257 | if (!bucket->item_weights) | |
258 | goto err; | |
259 | bucket->sum_weights = malloc(sizeof(__u32)*size); | |
260 | if (!bucket->sum_weights) | |
261 | goto err; | |
262 | w = 0; | |
263 | for (i=0; i<size; i++) { | |
264 | bucket->h.items[i] = items[i]; | |
265 | bucket->item_weights[i] = weights[i]; | |
266 | ||
267 | if (crush_addition_is_unsafe(w, weights[i])) | |
268 | goto err; | |
269 | ||
270 | w += weights[i]; | |
271 | bucket->sum_weights[i] = w; | |
272 | /*dprintk("pos %d item %d weight %d sum %d\n", | |
273 | i, items[i], weights[i], bucket->sum_weights[i]);*/ | |
274 | } | |
275 | ||
276 | bucket->h.weight = w; | |
277 | ||
278 | return bucket; | |
279 | err: | |
280 | free(bucket->sum_weights); | |
281 | free(bucket->item_weights); | |
282 | free(bucket->h.items); | |
283 | free(bucket); | |
284 | return NULL; | |
285 | } | |
286 | ||
287 | ||
288 | /* tree bucket */ | |
289 | ||
290 | static int height(int n) { | |
291 | int h = 0; | |
292 | while ((n & 1) == 0) { | |
293 | h++; | |
294 | n = n >> 1; | |
295 | } | |
296 | return h; | |
297 | } | |
298 | static int on_right(int n, int h) { | |
299 | return n & (1 << (h+1)); | |
300 | } | |
301 | static int parent(int n) | |
302 | { | |
303 | int h = height(n); | |
304 | if (on_right(n, h)) | |
305 | return n - (1<<h); | |
306 | else | |
307 | return n + (1<<h); | |
308 | } | |
309 | ||
310 | static int calc_depth(int size) | |
311 | { | |
312 | if (size == 0) { | |
313 | return 0; | |
314 | } | |
315 | ||
316 | int depth = 1; | |
317 | int t = size - 1; | |
318 | while (t) { | |
319 | t = t >> 1; | |
320 | depth++; | |
321 | } | |
322 | return depth; | |
323 | } | |
324 | ||
325 | struct crush_bucket_tree* | |
326 | crush_make_tree_bucket(int hash, int type, int size, | |
327 | int *items, /* in leaf order */ | |
328 | int *weights) | |
329 | { | |
330 | struct crush_bucket_tree *bucket; | |
331 | int depth; | |
332 | int node; | |
333 | int i, j; | |
334 | ||
335 | bucket = malloc(sizeof(*bucket)); | |
336 | if (!bucket) | |
337 | return NULL; | |
338 | memset(bucket, 0, sizeof(*bucket)); | |
339 | bucket->h.alg = CRUSH_BUCKET_TREE; | |
340 | bucket->h.hash = hash; | |
341 | bucket->h.type = type; | |
342 | bucket->h.size = size; | |
343 | ||
344 | if (size == 0) { | |
345 | bucket->h.items = NULL; | |
346 | bucket->h.weight = 0; | |
347 | bucket->node_weights = NULL; | |
348 | bucket->num_nodes = 0; | |
349 | /* printf("size 0 depth 0 nodes 0\n"); */ | |
350 | return bucket; | |
351 | } | |
352 | ||
353 | bucket->h.items = malloc(sizeof(__s32)*size); | |
354 | if (!bucket->h.items) | |
355 | goto err; | |
356 | ||
357 | /* calc tree depth */ | |
358 | depth = calc_depth(size); | |
359 | bucket->num_nodes = 1 << depth; | |
360 | dprintk("size %d depth %d nodes %d\n", size, depth, bucket->num_nodes); | |
361 | ||
362 | bucket->node_weights = malloc(sizeof(__u32)*bucket->num_nodes); | |
363 | if (!bucket->node_weights) | |
364 | goto err; | |
365 | ||
366 | memset(bucket->h.items, 0, sizeof(__s32)*bucket->h.size); | |
367 | memset(bucket->node_weights, 0, sizeof(__u32)*bucket->num_nodes); | |
368 | ||
369 | for (i=0; i<size; i++) { | |
370 | bucket->h.items[i] = items[i]; | |
371 | node = crush_calc_tree_node(i); | |
372 | dprintk("item %d node %d weight %d\n", i, node, weights[i]); | |
373 | bucket->node_weights[node] = weights[i]; | |
374 | ||
375 | if (crush_addition_is_unsafe(bucket->h.weight, weights[i])) | |
376 | goto err; | |
377 | ||
378 | bucket->h.weight += weights[i]; | |
379 | for (j=1; j<depth; j++) { | |
380 | node = parent(node); | |
381 | ||
382 | if (crush_addition_is_unsafe(bucket->node_weights[node], weights[i])) | |
383 | goto err; | |
384 | ||
385 | bucket->node_weights[node] += weights[i]; | |
386 | dprintk(" node %d weight %d\n", node, bucket->node_weights[node]); | |
387 | } | |
388 | } | |
389 | BUG_ON(bucket->node_weights[bucket->num_nodes/2] != bucket->h.weight); | |
390 | ||
391 | return bucket; | |
392 | err: | |
393 | free(bucket->node_weights); | |
394 | free(bucket->h.items); | |
395 | free(bucket); | |
396 | return NULL; | |
397 | } | |
398 | ||
399 | ||
400 | ||
401 | /* straw bucket */ | |
402 | ||
403 | /* | |
404 | * this code was written 8 years ago. i have a vague recollection of | |
405 | * drawing boxes underneath bars of different lengths, where the bar | |
406 | * length represented the probability/weight, and that there was some | |
407 | * trial and error involved in arriving at this implementation. | |
408 | * however, reading the code now after all this time, the intuition | |
409 | * that motivated is lost on me. lame. my only excuse is that I now | |
410 | * know that the approach is fundamentally flawed and am not | |
411 | * particularly motivated to reconstruct the flawed reasoning. | |
412 | * | |
413 | * as best as i can remember, the idea is: sort the weights, and start | |
414 | * with the smallest. arbitrarily scale it at 1.0 (16-bit fixed | |
415 | * point). look at the next larger weight, and calculate the scaling | |
416 | * factor for that straw based on the relative difference in weight so | |
417 | * far. what's not clear to me now is why we are looking at wnext | |
418 | * (the delta to the next bigger weight) for all remaining weights, | |
419 | * and slicing things horizontally instead of considering just the | |
420 | * next item or set of items. or why pow() is used the way it is. | |
421 | * | |
422 | * note that the original version 1 of this function made special | |
423 | * accomodation for the case where straw lengths were identical. this | |
424 | * is also flawed in a non-obvious way; version 2 drops the special | |
425 | * handling and appears to work just as well. | |
426 | * | |
427 | * moral of the story: if you do something clever, write down why it | |
428 | * works. | |
429 | */ | |
430 | int crush_calc_straw(struct crush_map *map, struct crush_bucket_straw *bucket) | |
431 | { | |
432 | int *reverse; | |
433 | int i, j, k; | |
434 | double straw, wbelow, lastw, wnext, pbelow; | |
435 | int numleft; | |
436 | int size = bucket->h.size; | |
437 | __u32 *weights = bucket->item_weights; | |
438 | ||
439 | /* reverse sort by weight (simple insertion sort) */ | |
440 | reverse = malloc(sizeof(int) * size); | |
441 | if (!reverse) | |
442 | return -ENOMEM; | |
443 | if (size) | |
444 | reverse[0] = 0; | |
445 | for (i=1; i<size; i++) { | |
446 | for (j=0; j<i; j++) { | |
447 | if (weights[i] < weights[reverse[j]]) { | |
448 | /* insert here */ | |
449 | for (k=i; k>j; k--) | |
450 | reverse[k] = reverse[k-1]; | |
451 | reverse[j] = i; | |
452 | break; | |
453 | } | |
454 | } | |
455 | if (j == i) | |
456 | reverse[i] = i; | |
457 | } | |
458 | ||
459 | numleft = size; | |
460 | straw = 1.0; | |
461 | wbelow = 0; | |
462 | lastw = 0; | |
463 | ||
464 | i=0; | |
465 | while (i < size) { | |
466 | if (map->straw_calc_version == 0) { | |
467 | /* zero weight items get 0 length straws! */ | |
468 | if (weights[reverse[i]] == 0) { | |
469 | bucket->straws[reverse[i]] = 0; | |
470 | i++; | |
471 | continue; | |
472 | } | |
473 | ||
474 | /* set this item's straw */ | |
475 | bucket->straws[reverse[i]] = straw * 0x10000; | |
476 | dprintk("item %d at %d weight %d straw %d (%lf)\n", | |
477 | bucket->h.items[reverse[i]], | |
478 | reverse[i], weights[reverse[i]], | |
479 | bucket->straws[reverse[i]], straw); | |
480 | i++; | |
481 | if (i == size) | |
482 | break; | |
483 | ||
484 | /* same weight as previous? */ | |
485 | if (weights[reverse[i]] == weights[reverse[i-1]]) { | |
486 | dprintk("same as previous\n"); | |
487 | continue; | |
488 | } | |
489 | ||
490 | /* adjust straw for next guy */ | |
491 | wbelow += ((double)weights[reverse[i-1]] - lastw) * | |
492 | numleft; | |
493 | for (j=i; j<size; j++) | |
494 | if (weights[reverse[j]] == weights[reverse[i]]) | |
495 | numleft--; | |
496 | else | |
497 | break; | |
498 | wnext = numleft * (weights[reverse[i]] - | |
499 | weights[reverse[i-1]]); | |
500 | pbelow = wbelow / (wbelow + wnext); | |
501 | dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n", | |
502 | wbelow, wnext, pbelow, numleft); | |
503 | ||
504 | straw *= pow((double)1.0 / pbelow, (double)1.0 / | |
505 | (double)numleft); | |
506 | ||
507 | lastw = weights[reverse[i-1]]; | |
508 | } else if (map->straw_calc_version >= 1) { | |
509 | /* zero weight items get 0 length straws! */ | |
510 | if (weights[reverse[i]] == 0) { | |
511 | bucket->straws[reverse[i]] = 0; | |
512 | i++; | |
513 | numleft--; | |
514 | continue; | |
515 | } | |
516 | ||
517 | /* set this item's straw */ | |
518 | bucket->straws[reverse[i]] = straw * 0x10000; | |
519 | dprintk("item %d at %d weight %d straw %d (%lf)\n", | |
520 | bucket->h.items[reverse[i]], | |
521 | reverse[i], weights[reverse[i]], | |
522 | bucket->straws[reverse[i]], straw); | |
523 | i++; | |
524 | if (i == size) | |
525 | break; | |
526 | ||
527 | /* adjust straw for next guy */ | |
528 | wbelow += ((double)weights[reverse[i-1]] - lastw) * | |
529 | numleft; | |
530 | numleft--; | |
531 | wnext = numleft * (weights[reverse[i]] - | |
532 | weights[reverse[i-1]]); | |
533 | pbelow = wbelow / (wbelow + wnext); | |
534 | dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n", | |
535 | wbelow, wnext, pbelow, numleft); | |
536 | ||
537 | straw *= pow((double)1.0 / pbelow, (double)1.0 / | |
538 | (double)numleft); | |
539 | ||
540 | lastw = weights[reverse[i-1]]; | |
541 | } | |
542 | } | |
543 | ||
544 | free(reverse); | |
545 | return 0; | |
546 | } | |
547 | ||
548 | struct crush_bucket_straw * | |
549 | crush_make_straw_bucket(struct crush_map *map, | |
550 | int hash, | |
551 | int type, | |
552 | int size, | |
553 | int *items, | |
554 | int *weights) | |
555 | { | |
556 | struct crush_bucket_straw *bucket; | |
557 | int i; | |
558 | ||
559 | bucket = malloc(sizeof(*bucket)); | |
560 | if (!bucket) | |
561 | return NULL; | |
562 | memset(bucket, 0, sizeof(*bucket)); | |
563 | bucket->h.alg = CRUSH_BUCKET_STRAW; | |
564 | bucket->h.hash = hash; | |
565 | bucket->h.type = type; | |
566 | bucket->h.size = size; | |
567 | ||
568 | bucket->h.items = malloc(sizeof(__s32)*size); | |
569 | if (!bucket->h.items) | |
570 | goto err; | |
571 | bucket->item_weights = malloc(sizeof(__u32)*size); | |
572 | if (!bucket->item_weights) | |
573 | goto err; | |
574 | bucket->straws = malloc(sizeof(__u32)*size); | |
575 | if (!bucket->straws) | |
576 | goto err; | |
577 | ||
578 | bucket->h.weight = 0; | |
579 | for (i=0; i<size; i++) { | |
580 | bucket->h.items[i] = items[i]; | |
581 | bucket->h.weight += weights[i]; | |
582 | bucket->item_weights[i] = weights[i]; | |
583 | } | |
584 | ||
585 | if (crush_calc_straw(map, bucket) < 0) | |
586 | goto err; | |
587 | ||
588 | return bucket; | |
589 | err: | |
590 | free(bucket->straws); | |
591 | free(bucket->item_weights); | |
592 | free(bucket->h.items); | |
593 | free(bucket); | |
594 | return NULL; | |
595 | } | |
596 | ||
597 | struct crush_bucket_straw2 * | |
598 | crush_make_straw2_bucket(struct crush_map *map, | |
599 | int hash, | |
600 | int type, | |
601 | int size, | |
602 | int *items, | |
603 | int *weights) | |
604 | { | |
605 | struct crush_bucket_straw2 *bucket; | |
606 | int i; | |
607 | ||
608 | bucket = malloc(sizeof(*bucket)); | |
609 | if (!bucket) | |
610 | return NULL; | |
611 | memset(bucket, 0, sizeof(*bucket)); | |
612 | bucket->h.alg = CRUSH_BUCKET_STRAW2; | |
613 | bucket->h.hash = hash; | |
614 | bucket->h.type = type; | |
615 | bucket->h.size = size; | |
616 | ||
617 | bucket->h.items = malloc(sizeof(__s32)*size); | |
618 | if (!bucket->h.items) | |
619 | goto err; | |
620 | bucket->item_weights = malloc(sizeof(__u32)*size); | |
621 | if (!bucket->item_weights) | |
622 | goto err; | |
623 | ||
624 | bucket->h.weight = 0; | |
625 | for (i=0; i<size; i++) { | |
626 | bucket->h.items[i] = items[i]; | |
627 | bucket->h.weight += weights[i]; | |
628 | bucket->item_weights[i] = weights[i]; | |
629 | } | |
630 | ||
631 | return bucket; | |
632 | err: | |
633 | free(bucket->item_weights); | |
634 | free(bucket->h.items); | |
635 | free(bucket); | |
636 | return NULL; | |
637 | } | |
638 | ||
639 | ||
640 | ||
641 | struct crush_bucket* | |
642 | crush_make_bucket(struct crush_map *map, | |
643 | int alg, int hash, int type, int size, | |
644 | int *items, | |
645 | int *weights) | |
646 | { | |
647 | int item_weight; | |
648 | ||
649 | switch (alg) { | |
650 | case CRUSH_BUCKET_UNIFORM: | |
651 | if (size && weights) | |
652 | item_weight = weights[0]; | |
653 | else | |
654 | item_weight = 0; | |
655 | return (struct crush_bucket *)crush_make_uniform_bucket(hash, type, size, items, item_weight); | |
656 | ||
657 | case CRUSH_BUCKET_LIST: | |
658 | return (struct crush_bucket *)crush_make_list_bucket(hash, type, size, items, weights); | |
659 | ||
660 | case CRUSH_BUCKET_TREE: | |
661 | return (struct crush_bucket *)crush_make_tree_bucket(hash, type, size, items, weights); | |
662 | ||
663 | case CRUSH_BUCKET_STRAW: | |
664 | return (struct crush_bucket *)crush_make_straw_bucket(map, hash, type, size, items, weights); | |
665 | case CRUSH_BUCKET_STRAW2: | |
666 | return (struct crush_bucket *)crush_make_straw2_bucket(map, hash, type, size, items, weights); | |
667 | } | |
668 | return 0; | |
669 | } | |
670 | ||
671 | ||
672 | /************************************************/ | |
673 | ||
674 | int crush_add_uniform_bucket_item(struct crush_bucket_uniform *bucket, int item, int weight) | |
675 | { | |
676 | int newsize = bucket->h.size + 1; | |
677 | void *_realloc = NULL; | |
678 | ||
679 | /* In such situation 'CRUSH_BUCKET_UNIFORM', the weight | |
680 | provided for the item should be the same as | |
681 | bucket->item_weight defined with 'crush_make_bucket'. This | |
682 | assumption is enforced by the return value which is always | |
683 | 0. */ | |
684 | if (bucket->item_weight != weight) { | |
685 | return -EINVAL; | |
686 | } | |
687 | ||
688 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
689 | return -ENOMEM; | |
690 | } else { | |
691 | bucket->h.items = _realloc; | |
692 | } | |
693 | ||
694 | bucket->h.items[newsize-1] = item; | |
695 | ||
696 | if (crush_addition_is_unsafe(bucket->h.weight, weight)) | |
697 | return -ERANGE; | |
698 | ||
699 | bucket->h.weight += weight; | |
700 | bucket->h.size++; | |
701 | ||
702 | return 0; | |
703 | } | |
704 | ||
705 | int crush_add_list_bucket_item(struct crush_bucket_list *bucket, int item, int weight) | |
706 | { | |
707 | int newsize = bucket->h.size + 1; | |
708 | void *_realloc = NULL; | |
709 | ||
710 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
711 | return -ENOMEM; | |
712 | } else { | |
713 | bucket->h.items = _realloc; | |
714 | } | |
715 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
716 | return -ENOMEM; | |
717 | } else { | |
718 | bucket->item_weights = _realloc; | |
719 | } | |
720 | if ((_realloc = realloc(bucket->sum_weights, sizeof(__u32)*newsize)) == NULL) { | |
721 | return -ENOMEM; | |
722 | } else { | |
723 | bucket->sum_weights = _realloc; | |
724 | } | |
725 | ||
726 | bucket->h.items[newsize-1] = item; | |
727 | bucket->item_weights[newsize-1] = weight; | |
728 | if (newsize > 1) { | |
729 | ||
730 | if (crush_addition_is_unsafe(bucket->sum_weights[newsize-2], weight)) | |
731 | return -ERANGE; | |
732 | ||
733 | bucket->sum_weights[newsize-1] = bucket->sum_weights[newsize-2] + weight; | |
734 | } | |
735 | ||
736 | else { | |
737 | bucket->sum_weights[newsize-1] = weight; | |
738 | } | |
739 | ||
740 | bucket->h.weight += weight; | |
741 | bucket->h.size++; | |
742 | return 0; | |
743 | } | |
744 | ||
745 | int crush_add_tree_bucket_item(struct crush_bucket_tree *bucket, int item, int weight) | |
746 | { | |
747 | int newsize = bucket->h.size + 1; | |
748 | int depth = calc_depth(newsize);; | |
749 | int node; | |
750 | int j; | |
751 | void *_realloc = NULL; | |
752 | ||
753 | bucket->num_nodes = 1 << depth; | |
754 | ||
755 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
756 | return -ENOMEM; | |
757 | } else { | |
758 | bucket->h.items = _realloc; | |
759 | } | |
760 | if ((_realloc = realloc(bucket->node_weights, sizeof(__u32)*bucket->num_nodes)) == NULL) { | |
761 | return -ENOMEM; | |
762 | } else { | |
763 | bucket->node_weights = _realloc; | |
764 | } | |
765 | ||
766 | node = crush_calc_tree_node(newsize-1); | |
767 | bucket->node_weights[node] = weight; | |
768 | ||
769 | /* if the depth increase, we need to initialize the new root node's weight before add bucket item */ | |
770 | int root = bucket->num_nodes/2; | |
771 | if (depth >= 2 && (node - 1) == root) { | |
772 | /* if the new item is the first node in right sub tree, so | |
773 | * the root node initial weight is left sub tree's weight | |
774 | */ | |
775 | bucket->node_weights[root] = bucket->node_weights[root/2]; | |
776 | } | |
777 | ||
778 | for (j=1; j<depth; j++) { | |
779 | node = parent(node); | |
780 | ||
781 | if (crush_addition_is_unsafe(bucket->node_weights[node], weight)) | |
782 | return -ERANGE; | |
783 | ||
784 | bucket->node_weights[node] += weight; | |
785 | dprintk(" node %d weight %d\n", node, bucket->node_weights[node]); | |
786 | } | |
787 | ||
788 | ||
789 | if (crush_addition_is_unsafe(bucket->h.weight, weight)) | |
790 | return -ERANGE; | |
791 | ||
792 | bucket->h.items[newsize-1] = item; | |
793 | bucket->h.weight += weight; | |
794 | bucket->h.size++; | |
795 | ||
796 | return 0; | |
797 | } | |
798 | ||
799 | int crush_add_straw_bucket_item(struct crush_map *map, | |
800 | struct crush_bucket_straw *bucket, | |
801 | int item, int weight) | |
802 | { | |
803 | int newsize = bucket->h.size + 1; | |
804 | ||
805 | void *_realloc = NULL; | |
806 | ||
807 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
808 | return -ENOMEM; | |
809 | } else { | |
810 | bucket->h.items = _realloc; | |
811 | } | |
812 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
813 | return -ENOMEM; | |
814 | } else { | |
815 | bucket->item_weights = _realloc; | |
816 | } | |
817 | if ((_realloc = realloc(bucket->straws, sizeof(__u32)*newsize)) == NULL) { | |
818 | return -ENOMEM; | |
819 | } else { | |
820 | bucket->straws = _realloc; | |
821 | } | |
822 | ||
823 | bucket->h.items[newsize-1] = item; | |
824 | bucket->item_weights[newsize-1] = weight; | |
825 | ||
826 | if (crush_addition_is_unsafe(bucket->h.weight, weight)) | |
827 | return -ERANGE; | |
828 | ||
829 | bucket->h.weight += weight; | |
830 | bucket->h.size++; | |
831 | ||
832 | return crush_calc_straw(map, bucket); | |
833 | } | |
834 | ||
835 | int crush_add_straw2_bucket_item(struct crush_map *map, | |
836 | struct crush_bucket_straw2 *bucket, | |
837 | int item, int weight) | |
838 | { | |
839 | int newsize = bucket->h.size + 1; | |
840 | ||
841 | void *_realloc = NULL; | |
842 | ||
843 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
844 | return -ENOMEM; | |
845 | } else { | |
846 | bucket->h.items = _realloc; | |
847 | } | |
848 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
849 | return -ENOMEM; | |
850 | } else { | |
851 | bucket->item_weights = _realloc; | |
852 | } | |
853 | ||
854 | bucket->h.items[newsize-1] = item; | |
855 | bucket->item_weights[newsize-1] = weight; | |
856 | ||
857 | if (crush_addition_is_unsafe(bucket->h.weight, weight)) | |
858 | return -ERANGE; | |
859 | ||
860 | bucket->h.weight += weight; | |
861 | bucket->h.size++; | |
862 | ||
863 | return 0; | |
864 | } | |
865 | ||
866 | int crush_bucket_add_item(struct crush_map *map, | |
867 | struct crush_bucket *b, int item, int weight) | |
868 | { | |
869 | switch (b->alg) { | |
870 | case CRUSH_BUCKET_UNIFORM: | |
871 | return crush_add_uniform_bucket_item((struct crush_bucket_uniform *)b, item, weight); | |
872 | case CRUSH_BUCKET_LIST: | |
873 | return crush_add_list_bucket_item((struct crush_bucket_list *)b, item, weight); | |
874 | case CRUSH_BUCKET_TREE: | |
875 | return crush_add_tree_bucket_item((struct crush_bucket_tree *)b, item, weight); | |
876 | case CRUSH_BUCKET_STRAW: | |
877 | return crush_add_straw_bucket_item(map, (struct crush_bucket_straw *)b, item, weight); | |
878 | case CRUSH_BUCKET_STRAW2: | |
879 | return crush_add_straw2_bucket_item(map, (struct crush_bucket_straw2 *)b, item, weight); | |
880 | default: | |
881 | return -1; | |
882 | } | |
883 | } | |
884 | ||
885 | /************************************************/ | |
886 | ||
887 | int crush_remove_uniform_bucket_item(struct crush_bucket_uniform *bucket, int item) | |
888 | { | |
889 | unsigned i, j; | |
890 | int newsize; | |
891 | void *_realloc = NULL; | |
892 | ||
893 | for (i = 0; i < bucket->h.size; i++) | |
894 | if (bucket->h.items[i] == item) | |
895 | break; | |
896 | if (i == bucket->h.size) | |
897 | return -ENOENT; | |
898 | ||
899 | for (j = i; j < bucket->h.size; j++) | |
900 | bucket->h.items[j] = bucket->h.items[j+1]; | |
901 | newsize = --bucket->h.size; | |
902 | if (bucket->item_weight < bucket->h.weight) | |
903 | bucket->h.weight -= bucket->item_weight; | |
904 | else | |
905 | bucket->h.weight = 0; | |
906 | ||
907 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
908 | return -ENOMEM; | |
909 | } else { | |
910 | bucket->h.items = _realloc; | |
911 | } | |
912 | return 0; | |
913 | } | |
914 | ||
915 | int crush_remove_list_bucket_item(struct crush_bucket_list *bucket, int item) | |
916 | { | |
917 | unsigned i, j; | |
918 | int newsize; | |
919 | unsigned weight; | |
920 | ||
921 | for (i = 0; i < bucket->h.size; i++) | |
922 | if (bucket->h.items[i] == item) | |
923 | break; | |
924 | if (i == bucket->h.size) | |
925 | return -ENOENT; | |
926 | ||
927 | weight = bucket->item_weights[i]; | |
928 | for (j = i; j < bucket->h.size; j++) { | |
929 | bucket->h.items[j] = bucket->h.items[j+1]; | |
930 | bucket->item_weights[j] = bucket->item_weights[j+1]; | |
931 | bucket->sum_weights[j] = bucket->sum_weights[j+1] - weight; | |
932 | } | |
933 | if (weight < bucket->h.weight) | |
934 | bucket->h.weight -= weight; | |
935 | else | |
936 | bucket->h.weight = 0; | |
937 | newsize = --bucket->h.size; | |
938 | ||
939 | void *_realloc = NULL; | |
940 | ||
941 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
942 | return -ENOMEM; | |
943 | } else { | |
944 | bucket->h.items = _realloc; | |
945 | } | |
946 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
947 | return -ENOMEM; | |
948 | } else { | |
949 | bucket->item_weights = _realloc; | |
950 | } | |
951 | if ((_realloc = realloc(bucket->sum_weights, sizeof(__u32)*newsize)) == NULL) { | |
952 | return -ENOMEM; | |
953 | } else { | |
954 | bucket->sum_weights = _realloc; | |
955 | } | |
956 | return 0; | |
957 | } | |
958 | ||
959 | int crush_remove_tree_bucket_item(struct crush_bucket_tree *bucket, int item) | |
960 | { | |
961 | unsigned i; | |
962 | unsigned newsize; | |
963 | ||
964 | for (i = 0; i < bucket->h.size; i++) { | |
965 | int node; | |
966 | unsigned weight; | |
967 | int j; | |
968 | int depth = calc_depth(bucket->h.size); | |
969 | ||
970 | if (bucket->h.items[i] != item) | |
971 | continue; | |
972 | ||
973 | bucket->h.items[i] = 0; | |
974 | node = crush_calc_tree_node(i); | |
975 | weight = bucket->node_weights[node]; | |
976 | bucket->node_weights[node] = 0; | |
977 | ||
978 | for (j = 1; j < depth; j++) { | |
979 | node = parent(node); | |
980 | bucket->node_weights[node] -= weight; | |
981 | dprintk(" node %d weight %d\n", node, bucket->node_weights[node]); | |
982 | } | |
983 | if (weight < bucket->h.weight) | |
984 | bucket->h.weight -= weight; | |
985 | else | |
986 | bucket->h.weight = 0; | |
987 | break; | |
988 | } | |
989 | if (i == bucket->h.size) | |
990 | return -ENOENT; | |
991 | ||
992 | newsize = bucket->h.size; | |
993 | while (newsize > 0) { | |
994 | int node = crush_calc_tree_node(newsize - 1); | |
995 | if (bucket->node_weights[node]) | |
996 | break; | |
997 | --newsize; | |
998 | } | |
999 | ||
1000 | if (newsize != bucket->h.size) { | |
1001 | int olddepth, newdepth; | |
1002 | ||
1003 | void *_realloc = NULL; | |
1004 | ||
1005 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
1006 | return -ENOMEM; | |
1007 | } else { | |
1008 | bucket->h.items = _realloc; | |
1009 | } | |
1010 | ||
1011 | olddepth = calc_depth(bucket->h.size); | |
1012 | newdepth = calc_depth(newsize); | |
1013 | if (olddepth != newdepth) { | |
1014 | bucket->num_nodes = 1 << newdepth; | |
1015 | if ((_realloc = realloc(bucket->node_weights, | |
1016 | sizeof(__u32)*bucket->num_nodes)) == NULL) { | |
1017 | return -ENOMEM; | |
1018 | } else { | |
1019 | bucket->node_weights = _realloc; | |
1020 | } | |
1021 | } | |
1022 | ||
1023 | bucket->h.size = newsize; | |
1024 | } | |
1025 | return 0; | |
1026 | } | |
1027 | ||
1028 | int crush_remove_straw_bucket_item(struct crush_map *map, | |
1029 | struct crush_bucket_straw *bucket, int item) | |
1030 | { | |
1031 | int newsize = bucket->h.size - 1; | |
1032 | unsigned i, j; | |
1033 | ||
1034 | for (i = 0; i < bucket->h.size; i++) { | |
1035 | if (bucket->h.items[i] == item) { | |
1036 | bucket->h.size--; | |
1037 | if (bucket->item_weights[i] < bucket->h.weight) | |
1038 | bucket->h.weight -= bucket->item_weights[i]; | |
1039 | else | |
1040 | bucket->h.weight = 0; | |
1041 | for (j = i; j < bucket->h.size; j++) { | |
1042 | bucket->h.items[j] = bucket->h.items[j+1]; | |
1043 | bucket->item_weights[j] = bucket->item_weights[j+1]; | |
1044 | } | |
1045 | break; | |
1046 | } | |
1047 | } | |
1048 | if (i == bucket->h.size) | |
1049 | return -ENOENT; | |
1050 | ||
1051 | void *_realloc = NULL; | |
1052 | ||
1053 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
1054 | return -ENOMEM; | |
1055 | } else { | |
1056 | bucket->h.items = _realloc; | |
1057 | } | |
1058 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
1059 | return -ENOMEM; | |
1060 | } else { | |
1061 | bucket->item_weights = _realloc; | |
1062 | } | |
1063 | if ((_realloc = realloc(bucket->straws, sizeof(__u32)*newsize)) == NULL) { | |
1064 | return -ENOMEM; | |
1065 | } else { | |
1066 | bucket->straws = _realloc; | |
1067 | } | |
1068 | ||
1069 | return crush_calc_straw(map, bucket); | |
1070 | } | |
1071 | ||
1072 | int crush_remove_straw2_bucket_item(struct crush_map *map, | |
1073 | struct crush_bucket_straw2 *bucket, int item) | |
1074 | { | |
1075 | int newsize = bucket->h.size - 1; | |
1076 | unsigned i, j; | |
1077 | ||
1078 | for (i = 0; i < bucket->h.size; i++) { | |
1079 | if (bucket->h.items[i] == item) { | |
1080 | bucket->h.size--; | |
1081 | if (bucket->item_weights[i] < bucket->h.weight) | |
1082 | bucket->h.weight -= bucket->item_weights[i]; | |
1083 | else | |
1084 | bucket->h.weight = 0; | |
1085 | for (j = i; j < bucket->h.size; j++) { | |
1086 | bucket->h.items[j] = bucket->h.items[j+1]; | |
1087 | bucket->item_weights[j] = bucket->item_weights[j+1]; | |
1088 | } | |
1089 | break; | |
1090 | } | |
1091 | } | |
1092 | if (i == bucket->h.size) | |
1093 | return -ENOENT; | |
1094 | ||
1095 | void *_realloc = NULL; | |
1096 | ||
1097 | if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) { | |
1098 | return -ENOMEM; | |
1099 | } else { | |
1100 | bucket->h.items = _realloc; | |
1101 | } | |
1102 | if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) { | |
1103 | return -ENOMEM; | |
1104 | } else { | |
1105 | bucket->item_weights = _realloc; | |
1106 | } | |
1107 | ||
1108 | return 0; | |
1109 | } | |
1110 | ||
1111 | int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *b, int item) | |
1112 | { | |
1113 | switch (b->alg) { | |
1114 | case CRUSH_BUCKET_UNIFORM: | |
1115 | return crush_remove_uniform_bucket_item((struct crush_bucket_uniform *)b, item); | |
1116 | case CRUSH_BUCKET_LIST: | |
1117 | return crush_remove_list_bucket_item((struct crush_bucket_list *)b, item); | |
1118 | case CRUSH_BUCKET_TREE: | |
1119 | return crush_remove_tree_bucket_item((struct crush_bucket_tree *)b, item); | |
1120 | case CRUSH_BUCKET_STRAW: | |
1121 | return crush_remove_straw_bucket_item(map, (struct crush_bucket_straw *)b, item); | |
1122 | case CRUSH_BUCKET_STRAW2: | |
1123 | return crush_remove_straw2_bucket_item(map, (struct crush_bucket_straw2 *)b, item); | |
1124 | default: | |
1125 | return -1; | |
1126 | } | |
1127 | } | |
1128 | ||
1129 | ||
1130 | /************************************************/ | |
1131 | ||
1132 | int crush_adjust_uniform_bucket_item_weight(struct crush_bucket_uniform *bucket, int item, int weight) | |
1133 | { | |
1134 | int diff = (weight - bucket->item_weight) * bucket->h.size; | |
1135 | ||
1136 | bucket->item_weight = weight; | |
1137 | bucket->h.weight = bucket->item_weight * bucket->h.size; | |
1138 | ||
1139 | return diff; | |
1140 | } | |
1141 | ||
1142 | int crush_adjust_list_bucket_item_weight(struct crush_bucket_list *bucket, int item, int weight) | |
1143 | { | |
1144 | int diff; | |
1145 | unsigned i, j; | |
1146 | ||
1147 | for (i = 0; i < bucket->h.size; i++) { | |
1148 | if (bucket->h.items[i] == item) | |
1149 | break; | |
1150 | } | |
1151 | if (i == bucket->h.size) | |
1152 | return 0; | |
1153 | ||
1154 | diff = weight - bucket->item_weights[i]; | |
1155 | bucket->item_weights[i] = weight; | |
1156 | bucket->h.weight += diff; | |
1157 | ||
1158 | for (j = i; j < bucket->h.size; j++) | |
1159 | bucket->sum_weights[j] += diff; | |
1160 | ||
1161 | return diff; | |
1162 | } | |
1163 | ||
1164 | int crush_adjust_tree_bucket_item_weight(struct crush_bucket_tree *bucket, int item, int weight) | |
1165 | { | |
1166 | int diff; | |
1167 | int node; | |
1168 | unsigned i, j; | |
1169 | unsigned depth = calc_depth(bucket->h.size); | |
1170 | ||
1171 | for (i = 0; i < bucket->h.size; i++) { | |
1172 | if (bucket->h.items[i] == item) | |
1173 | break; | |
1174 | } | |
1175 | if (i == bucket->h.size) | |
1176 | return 0; | |
1177 | ||
1178 | node = crush_calc_tree_node(i); | |
1179 | diff = weight - bucket->node_weights[node]; | |
1180 | bucket->node_weights[node] = weight; | |
1181 | bucket->h.weight += diff; | |
1182 | ||
1183 | for (j=1; j<depth; j++) { | |
1184 | node = parent(node); | |
1185 | bucket->node_weights[node] += diff; | |
1186 | } | |
1187 | ||
1188 | return diff; | |
1189 | } | |
1190 | ||
1191 | int crush_adjust_straw_bucket_item_weight(struct crush_map *map, | |
1192 | struct crush_bucket_straw *bucket, | |
1193 | int item, int weight) | |
1194 | { | |
1195 | unsigned idx; | |
1196 | int diff; | |
1197 | int r; | |
1198 | ||
1199 | for (idx = 0; idx < bucket->h.size; idx++) | |
1200 | if (bucket->h.items[idx] == item) | |
1201 | break; | |
1202 | if (idx == bucket->h.size) | |
1203 | return 0; | |
1204 | ||
1205 | diff = weight - bucket->item_weights[idx]; | |
1206 | bucket->item_weights[idx] = weight; | |
1207 | bucket->h.weight += diff; | |
1208 | ||
1209 | r = crush_calc_straw(map, bucket); | |
1210 | if (r < 0) | |
1211 | return r; | |
1212 | ||
1213 | return diff; | |
1214 | } | |
1215 | ||
1216 | int crush_adjust_straw2_bucket_item_weight(struct crush_map *map, | |
1217 | struct crush_bucket_straw2 *bucket, | |
1218 | int item, int weight) | |
1219 | { | |
1220 | unsigned idx; | |
1221 | int diff; | |
1222 | ||
1223 | for (idx = 0; idx < bucket->h.size; idx++) | |
1224 | if (bucket->h.items[idx] == item) | |
1225 | break; | |
1226 | if (idx == bucket->h.size) | |
1227 | return 0; | |
1228 | ||
1229 | diff = weight - bucket->item_weights[idx]; | |
1230 | bucket->item_weights[idx] = weight; | |
1231 | bucket->h.weight += diff; | |
1232 | ||
1233 | return diff; | |
1234 | } | |
1235 | ||
1236 | int crush_bucket_adjust_item_weight(struct crush_map *map, | |
1237 | struct crush_bucket *b, | |
1238 | int item, int weight) | |
1239 | { | |
1240 | switch (b->alg) { | |
1241 | case CRUSH_BUCKET_UNIFORM: | |
1242 | return crush_adjust_uniform_bucket_item_weight((struct crush_bucket_uniform *)b, | |
1243 | item, weight); | |
1244 | case CRUSH_BUCKET_LIST: | |
1245 | return crush_adjust_list_bucket_item_weight((struct crush_bucket_list *)b, | |
1246 | item, weight); | |
1247 | case CRUSH_BUCKET_TREE: | |
1248 | return crush_adjust_tree_bucket_item_weight((struct crush_bucket_tree *)b, | |
1249 | item, weight); | |
1250 | case CRUSH_BUCKET_STRAW: | |
1251 | return crush_adjust_straw_bucket_item_weight(map, | |
1252 | (struct crush_bucket_straw *)b, | |
1253 | item, weight); | |
1254 | case CRUSH_BUCKET_STRAW2: | |
1255 | return crush_adjust_straw2_bucket_item_weight(map, | |
1256 | (struct crush_bucket_straw2 *)b, | |
1257 | item, weight); | |
1258 | default: | |
1259 | return -1; | |
1260 | } | |
1261 | } | |
1262 | ||
1263 | /************************************************/ | |
1264 | ||
1265 | static int crush_reweight_uniform_bucket(struct crush_map *map, struct crush_bucket_uniform *bucket) | |
1266 | { | |
1267 | unsigned i; | |
1268 | unsigned sum = 0, n = 0, leaves = 0; | |
1269 | ||
1270 | for (i = 0; i < bucket->h.size; i++) { | |
1271 | int id = bucket->h.items[i]; | |
1272 | if (id < 0) { | |
1273 | struct crush_bucket *c = map->buckets[-1-id]; | |
1274 | crush_reweight_bucket(map, c); | |
1275 | ||
1276 | if (crush_addition_is_unsafe(sum, c->weight)) | |
1277 | return -ERANGE; | |
1278 | ||
1279 | sum += c->weight; | |
1280 | n++; | |
1281 | } else { | |
1282 | leaves++; | |
1283 | } | |
1284 | } | |
1285 | ||
1286 | if (n > leaves) | |
1287 | bucket->item_weight = sum / n; // more bucket children than leaves, average! | |
1288 | bucket->h.weight = bucket->item_weight * bucket->h.size; | |
1289 | ||
1290 | return 0; | |
1291 | } | |
1292 | ||
1293 | static int crush_reweight_list_bucket(struct crush_map *map, struct crush_bucket_list *bucket) | |
1294 | { | |
1295 | unsigned i; | |
1296 | ||
1297 | bucket->h.weight = 0; | |
1298 | for (i = 0; i < bucket->h.size; i++) { | |
1299 | int id = bucket->h.items[i]; | |
1300 | if (id < 0) { | |
1301 | struct crush_bucket *c = map->buckets[-1-id]; | |
1302 | crush_reweight_bucket(map, c); | |
1303 | bucket->item_weights[i] = c->weight; | |
1304 | } | |
1305 | ||
1306 | if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i])) | |
1307 | return -ERANGE; | |
1308 | ||
1309 | bucket->h.weight += bucket->item_weights[i]; | |
1310 | } | |
1311 | ||
1312 | return 0; | |
1313 | } | |
1314 | ||
1315 | static int crush_reweight_tree_bucket(struct crush_map *map, struct crush_bucket_tree *bucket) | |
1316 | { | |
1317 | unsigned i; | |
1318 | ||
1319 | bucket->h.weight = 0; | |
1320 | for (i = 0; i < bucket->h.size; i++) { | |
1321 | int node = crush_calc_tree_node(i); | |
1322 | int id = bucket->h.items[i]; | |
1323 | if (id < 0) { | |
1324 | struct crush_bucket *c = map->buckets[-1-id]; | |
1325 | crush_reweight_bucket(map, c); | |
1326 | bucket->node_weights[node] = c->weight; | |
1327 | } | |
1328 | ||
1329 | if (crush_addition_is_unsafe(bucket->h.weight, bucket->node_weights[node])) | |
1330 | return -ERANGE; | |
1331 | ||
1332 | bucket->h.weight += bucket->node_weights[node]; | |
1333 | ||
1334 | ||
1335 | } | |
1336 | ||
1337 | return 0; | |
1338 | } | |
1339 | ||
1340 | static int crush_reweight_straw_bucket(struct crush_map *map, struct crush_bucket_straw *bucket) | |
1341 | { | |
1342 | unsigned i; | |
1343 | ||
1344 | bucket->h.weight = 0; | |
1345 | for (i = 0; i < bucket->h.size; i++) { | |
1346 | int id = bucket->h.items[i]; | |
1347 | if (id < 0) { | |
1348 | struct crush_bucket *c = map->buckets[-1-id]; | |
1349 | crush_reweight_bucket(map, c); | |
1350 | bucket->item_weights[i] = c->weight; | |
1351 | } | |
1352 | ||
1353 | if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i])) | |
1354 | return -ERANGE; | |
1355 | ||
1356 | bucket->h.weight += bucket->item_weights[i]; | |
1357 | } | |
1358 | crush_calc_straw(map, bucket); | |
1359 | ||
1360 | return 0; | |
1361 | } | |
1362 | ||
1363 | static int crush_reweight_straw2_bucket(struct crush_map *map, struct crush_bucket_straw2 *bucket) | |
1364 | { | |
1365 | unsigned i; | |
1366 | ||
1367 | bucket->h.weight = 0; | |
1368 | for (i = 0; i < bucket->h.size; i++) { | |
1369 | int id = bucket->h.items[i]; | |
1370 | if (id < 0) { | |
1371 | struct crush_bucket *c = map->buckets[-1-id]; | |
1372 | crush_reweight_bucket(map, c); | |
1373 | bucket->item_weights[i] = c->weight; | |
1374 | } | |
1375 | ||
1376 | if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i])) | |
1377 | return -ERANGE; | |
1378 | ||
1379 | bucket->h.weight += bucket->item_weights[i]; | |
1380 | } | |
1381 | ||
1382 | return 0; | |
1383 | } | |
1384 | ||
1385 | int crush_reweight_bucket(struct crush_map *map, struct crush_bucket *b) | |
1386 | { | |
1387 | switch (b->alg) { | |
1388 | case CRUSH_BUCKET_UNIFORM: | |
1389 | return crush_reweight_uniform_bucket(map, (struct crush_bucket_uniform *)b); | |
1390 | case CRUSH_BUCKET_LIST: | |
1391 | return crush_reweight_list_bucket(map, (struct crush_bucket_list *)b); | |
1392 | case CRUSH_BUCKET_TREE: | |
1393 | return crush_reweight_tree_bucket(map, (struct crush_bucket_tree *)b); | |
1394 | case CRUSH_BUCKET_STRAW: | |
1395 | return crush_reweight_straw_bucket(map, (struct crush_bucket_straw *)b); | |
1396 | case CRUSH_BUCKET_STRAW2: | |
1397 | return crush_reweight_straw2_bucket(map, (struct crush_bucket_straw2 *)b); | |
1398 | default: | |
1399 | return -1; | |
1400 | } | |
1401 | } | |
1402 | ||
1403 | struct crush_choose_arg *crush_make_choose_args(struct crush_map *map, int num_positions) | |
1404 | { | |
1405 | int b; | |
1406 | int sum_bucket_size = 0; | |
1407 | int bucket_count = 0; | |
1408 | for (b = 0; b < map->max_buckets; b++) { | |
1409 | if (map->buckets[b] == 0) | |
1410 | continue; | |
1411 | sum_bucket_size += map->buckets[b]->size; | |
1412 | bucket_count++; | |
1413 | } | |
1414 | dprintk("sum_bucket_size %d max_buckets %d bucket_count %d\n", | |
1415 | sum_bucket_size, map->max_buckets, bucket_count); | |
1416 | int size = (sizeof(struct crush_choose_arg) * map->max_buckets + | |
1417 | sizeof(struct crush_weight_set) * bucket_count * num_positions + | |
1418 | sizeof(__u32) * sum_bucket_size * num_positions + // weights | |
1419 | sizeof(__u32) * sum_bucket_size); // ids | |
1420 | char *space = malloc(size); | |
1421 | struct crush_choose_arg *arg = (struct crush_choose_arg *)space; | |
1422 | struct crush_weight_set *weight_set = (struct crush_weight_set *)(arg + map->max_buckets); | |
1423 | __u32 *weights = (__u32 *)(weight_set + bucket_count * num_positions); | |
1424 | char *weight_set_ends = (char*)weights; | |
1425 | int *ids = (int *)(weights + sum_bucket_size * num_positions); | |
1426 | char *weights_end = (char *)ids; | |
1427 | char *ids_end = (char *)(ids + sum_bucket_size); | |
1428 | BUG_ON(space + size != ids_end); | |
1429 | for (b = 0; b < map->max_buckets; b++) { | |
1430 | if (map->buckets[b] == 0) { | |
1431 | memset(&arg[b], '\0', sizeof(struct crush_choose_arg)); | |
1432 | continue; | |
1433 | } | |
1434 | struct crush_bucket_straw2 *bucket = (struct crush_bucket_straw2 *)map->buckets[b]; | |
1435 | ||
1436 | int position; | |
1437 | for (position = 0; position < num_positions; position++) { | |
1438 | memcpy(weights, bucket->item_weights, sizeof(__u32) * bucket->h.size); | |
1439 | weight_set[position].weights = weights; | |
1440 | weight_set[position].size = bucket->h.size; | |
1441 | dprintk("moving weight %d bytes forward\n", (int)((weights + bucket->h.size) - weights)); | |
1442 | weights += bucket->h.size; | |
1443 | } | |
1444 | arg[b].weight_set = weight_set; | |
1445 | arg[b].weight_set_size = num_positions; | |
1446 | weight_set += position; | |
1447 | ||
1448 | memcpy(ids, bucket->h.items, sizeof(int) * bucket->h.size); | |
1449 | arg[b].ids = ids; | |
1450 | arg[b].ids_size = bucket->h.size; | |
1451 | ids += bucket->h.size; | |
1452 | } | |
1453 | BUG_ON((char*)weight_set_ends != (char*)weight_set); | |
1454 | BUG_ON((char*)weights_end != (char*)weights); | |
1455 | BUG_ON((char*)ids != (char*)ids_end); | |
1456 | return arg; | |
1457 | } | |
1458 | ||
1459 | void crush_destroy_choose_args(struct crush_choose_arg *args) | |
1460 | { | |
1461 | free(args); | |
1462 | } | |
1463 | ||
1464 | /***************************/ | |
1465 | ||
1466 | /* methods to check for safe arithmetic operations */ | |
1467 | ||
1468 | int crush_addition_is_unsafe(__u32 a, __u32 b) | |
1469 | { | |
1470 | if ((((__u32)(-1)) - b) < a) | |
1471 | return 1; | |
1472 | else | |
1473 | return 0; | |
1474 | } | |
1475 | ||
1476 | int crush_multiplication_is_unsafe(__u32 a, __u32 b) | |
1477 | { | |
1478 | /* prevent division by zero */ | |
1479 | if (!a) | |
1480 | return 0; | |
1481 | if (!b) | |
1482 | return 1; | |
1483 | if ((((__u32)(-1)) / b) < a) | |
1484 | return 1; | |
1485 | else | |
1486 | return 0; | |
1487 | } | |
1488 | ||
1489 | /***************************/ | |
1490 | ||
1491 | /* methods to configure crush_map */ | |
1492 | ||
1493 | void set_legacy_crush_map(struct crush_map *map) { | |
1494 | /* initialize legacy tunable values */ | |
1495 | map->choose_local_tries = 2; | |
1496 | map->choose_local_fallback_tries = 5; | |
1497 | map->choose_total_tries = 19; | |
1498 | map->chooseleaf_descend_once = 0; | |
1499 | map->chooseleaf_vary_r = 0; | |
1500 | map->chooseleaf_stable = 0; | |
1501 | map->straw_calc_version = 0; | |
1502 | ||
1503 | // by default, use legacy types, and also exclude tree, | |
1504 | // since it was buggy. | |
1505 | map->allowed_bucket_algs = CRUSH_LEGACY_ALLOWED_BUCKET_ALGS; | |
1506 | } | |
1507 | ||
1508 | void set_optimal_crush_map(struct crush_map *map) { | |
1509 | map->choose_local_tries = 0; | |
1510 | map->choose_local_fallback_tries = 0; | |
1511 | map->choose_total_tries = 50; | |
1512 | map->chooseleaf_descend_once = 1; | |
1513 | map->chooseleaf_vary_r = 1; | |
1514 | map->chooseleaf_stable = 1; | |
1515 | map->allowed_bucket_algs = ( | |
1516 | (1 << CRUSH_BUCKET_UNIFORM) | | |
1517 | (1 << CRUSH_BUCKET_LIST) | | |
1518 | (1 << CRUSH_BUCKET_STRAW) | | |
1519 | (1 << CRUSH_BUCKET_STRAW2)); | |
1520 | } |