]>
git.proxmox.com Git - ceph.git/blob - ceph/src/crush/builder.h
1 #ifndef CEPH_CRUSH_BUILDER_H
2 #define CEPH_CRUSH_BUILDER_H
4 #include "include/int_types.h"
7 struct crush_choose_arg
;
13 * Allocate a crush_map with __malloc(3)__ and initialize it. The
14 * caller is responsible for deallocating the crush_map with
17 * The content of the allocated crush_map is set with
18 * set_optimal_crush_map(). The caller is responsible for setting each
19 * tunable in the __crush_map__ for backward compatibility or mapping
22 * @returns a pointer to the newly created crush_map or NULL
24 extern struct crush_map
* crush_create ();
27 * Analyze the content of __map__ and set the internal values required
28 * before it can be used to map values with crush_do_rule(). The caller
29 * must make sure it is run before crush_do_rule() and after any
30 * function that modifies the __map__ (crush_add_bucket(), etc.).
32 * @param map the crush_map
34 extern void crush_finalize ( struct crush_map
* map
);
39 * Allocate an empty crush_rule structure large enough to store __len__ steps.
40 * Steps can be added to a rule via crush_rule_set_step().
42 * The caller is responsible for deallocating the returned pointer via
43 * crush_destroy_rule().
45 * If __malloc(3)__ fails, return NULL.
47 * @param len number of steps in the rule
48 * @param type user defined value
50 * @returns a pointer to the newly created rule or NULL
52 extern struct crush_rule
* crush_make_rule ( int len
, int type
);
55 * Set the __pos__ step of the __rule__ to an operand and up to two arguments.
56 * The value of the operand __op__ determines if the arguments are used and how:
58 * - __CRUSH_RULE_NOOP__ do nothing.
59 * - __CRUSH_RULE_TAKE__ select the __arg1__ item
60 * - __CRUSH_RULE_EMIT__ append the selection to the results and clear
63 * - __CRUSH_RULE_CHOOSE_FIRSTN__ and __CRUSH_RULE_CHOOSE_INDEP__
64 * recursively explore each bucket currently selected, looking for
65 * __arg1__ items of type __arg2__ and select them.
66 * - __CRUSH_RULE_CHOOSELEAF_FIRSTN__ and __CRUSH_RULE_CHOOSELEAF_INDEP__
67 * recursively explore each bucket currently selected, looking for
68 * __arg1__ leaves within all the buckets of type __arg2__ and
71 * In all __CHOOSE__ steps, if __arg1__ is less than or equal to zero,
72 * the number of items to select is equal to the __max_result__ argument
73 * of crush_do_rule() minus __arg1__. It is common to set __arg1__ to zero
74 * to select as many items as requested by __max_result__.
76 * - __CRUSH_RULE_SET_CHOOSE_TRIES__ and __CRUSH_RULE_SET_CHOOSELEAF_TRIES__
78 * The CHOOSE_FIRSTN and CHOOSE_INDEP rule step look for buckets of
79 * a given type, randomly selecting them. If they are unlucky and
80 * find the same bucket twice, they will try N+1 times (N being the
81 * value of the choose_total_tries tunable). If there is a previous
82 * SET_CHOOSE_TRIES step in the same rule, it will try C times
83 * instead (C being the value of the argument of the
84 * SET_CHOOSE_TRIES step).
86 * Note: the __choose_total_tries__ tunable defined in crush_map is
87 * the number of retry, not the number of tries. The number of tries
88 * is the number of retry+1. The SET_CHOOSE_TRIES rule step sets the
89 * number of tries and does not need the + 1. This confusing
90 * difference is inherited from an off-by-one bug from years ago.
92 * The CHOOSELEAF_FIRSTN and CHOOSELEAF_INDEP rule step do the same
93 * as CHOOSE_FIRSTN and CHOOSE_INDEP but also recursively explore
94 * each bucket found, looking for a single device. The same device
95 * may be found in two different buckets because the crush map is
96 * not a strict hierarchy, it is a DAG. When such a collision
97 * happens, they will try again. The number of times they try to
98 * find a non colliding device is:
100 * - If FIRSTN and there is no previous SET_CHOOSELEAF_TRIES rule
101 * step: try N + 1 times (N being the value of the
102 * __choose_total_tries__ tunable defined in crush_map)
104 * - If FIRSTN and there is a previous SET_CHOOSELEAF_TRIES rule
105 * step: try P times (P being the value of the argument of the
106 * SET_CHOOSELEAF_TRIES rule step)
108 * - If INDEP and there is no previous SET_CHOOSELEAF_TRIES rule
111 * - If INDEP and there is a previous SET_CHOOSELEAF_TRIES rule step: try
112 * P times (P being the value of the argument of the SET_CHOOSELEAF_TRIES
115 * @param rule the rule in which the step is inserted
116 * @param pos the zero based step index
117 * @param op one of __CRUSH_RULE_NOOP__, __CRUSH_RULE_TAKE__, __CRUSH_RULE_CHOOSE_FIRSTN__, __CRUSH_RULE_CHOOSE_INDEP__, __CRUSH_RULE_CHOOSELEAF_FIRSTN__, __CRUSH_RULE_CHOOSELEAF_INDEP__, __CRUSH_RULE_SET_CHOOSE_TRIES__, __CRUSH_RULE_SET_CHOOSELEAF_TRIES__ or __CRUSH_RULE_EMIT__
118 * @param arg1 first argument for __op__
119 * @param arg2 second argument for __op__
121 extern void crush_rule_set_step ( struct crush_rule
* rule
, int pos
, int op
, int arg1
, int arg2
);
124 * Add the __rule__ into the crush __map__ and assign it the
125 * __ruleno__ unique identifier. If __ruleno__ is -1, the function will
126 * assign the lowest available identifier. The __ruleno__ value must be
127 * a positive integer lower than __CRUSH_MAX_RULES__.
129 * - return -ENOSPC if the rule identifier is >= __CRUSH_MAX_RULES__
130 * - return -ENOMEM if __realloc(3)__ fails to expand the array of
131 * rules in the __map__
133 * @param map the crush_map
134 * @param rule the rule to add to the __map__
135 * @param ruleno a positive integer < __CRUSH_MAX_RULES__ or -1
137 * @returns the rule unique identifier on success, < 0 on error
139 extern int crush_add_rule ( struct crush_map
* map
, struct crush_rule
* rule
, int ruleno
);
142 extern int crush_get_next_bucket_id ( struct crush_map
* map
);
145 * Add __bucket__ into the crush __map__ and assign it the
146 * __bucketno__ unique identifier. If __bucketno__ is 0, the function
147 * will assign the lowest available identifier. The bucket identifier
148 * must be a negative integer. The bucket identifier is returned via
151 * - return -ENOMEM if __realloc(3)__ fails to expand the array of
152 * buckets in the __map__
153 * - return -EEXIST if the __bucketno__ identifier is already assigned
156 * @param[in] map the crush_map
157 * @param[in] bucketno the bucket unique identifier or 0
158 * @param[in] bucket the bucket to add to the __map__
159 * @param[out] idout a pointer to the bucket identifier
161 * @returns 0 on success, < 0 on error
163 extern int crush_add_bucket ( struct crush_map
* map
,
165 struct crush_bucket
* bucket
, int * idout
);
168 * Allocate a crush_bucket with __malloc(3)__ and initialize it. The
169 * content of the bucket is filled with __size__ items from
170 * __items__. The item selection is set to use __alg__ which is one of
171 * ::CRUSH_BUCKET_UNIFORM , ::CRUSH_BUCKET_LIST or
172 * ::CRUSH_BUCKET_STRAW2. The initial __items__ are assigned a
173 * weight from the __weights__ array, depending on the value of
174 * __alg__. If __alg__ is ::CRUSH_BUCKET_UNIFORM, all items are set
175 * to have a weight equal to __weights[0]__, otherwise the weight of
176 * __items[x]__ is set to be the value of __weights[x]__.
178 * The caller is responsible for deallocating the returned pointer via
179 * crush_destroy_bucket().
181 * @param map __unused__
182 * @param alg algorithm for item selection
183 * @param hash always set to CRUSH_HASH_RJENKINS1
184 * @param type user defined bucket type
185 * @param size of the __items__ array
186 * @param items array of __size__ items
187 * @param weights the weight of each item in __items__, depending on __alg__
189 * @returns a pointer to the newly created bucket or NULL
191 struct crush_bucket
* crush_make_bucket ( struct crush_map
* map
, int alg
, int hash
, int type
, int size
, int * items
, int * weights
);
192 extern struct crush_choose_arg
* crush_make_choose_args ( struct crush_map
* map
, int num_positions
);
193 extern void crush_destroy_choose_args ( struct crush_choose_arg
* args
);
196 * Add __item__ to __bucket__ with __weight__. The weight of the new
197 * item is added to the weight of the bucket so that it reflects
198 * the total weight of all items.
200 * If __bucket->alg__ is ::CRUSH_BUCKET_UNIFORM, the value of __weight__ must be equal to
201 * __(struct crush_bucket_uniform *)bucket->item_weight__.
203 * - return -ENOMEM if the __bucket__ cannot be resized with __realloc(3)__.
204 * - return -ERANGE if adding __weight__ to the weight of the bucket overflows.
205 * - return -EINVAL if __bucket->alg__ is ::CRUSH_BUCKET_UNIFORM and
206 * the __weight__ is not equal to __(struct crush_bucket_uniform *)bucket->item_weight__.
207 * - return -1 if the value of __bucket->alg__ is unknown.
209 * @returns 0 on success, < 0 on error
211 extern int crush_bucket_add_item ( struct crush_map
* map
, struct crush_bucket
* bucket
, int item
, int weight
);
214 * If __bucket->alg__ is ::CRUSH_BUCKET_UNIFORM,
215 * __(struct crush_bucket_uniform *)bucket->item_weight__ is set to __weight__ and the
216 * weight of the bucket is set to be the number of items in the bucket times the weight.
217 * The return value is the difference between the new bucket weight and the former
218 * bucket weight. The __item__ argument is ignored.
220 * If __bucket->alg__ is different from ::CRUSH_BUCKET_UNIFORM,
221 * set the __weight__ of __item__ in __bucket__. The former weight of the
222 * item is subtracted from the weight of the bucket and the new weight is added.
223 * The return value is the difference between the new item weight and the former
226 * @returns the difference between the new weight and the former weight
228 extern int crush_bucket_adjust_item_weight ( struct crush_map
* map
, struct crush_bucket
* bucket
, int item
, int weight
);
231 * Recursively update the weight of __bucket__ and its children, deep
232 * first. The __bucket__ weight is set to the sum of the weight of the
235 * - return -ERANGE if the sum of the weight of the items in __bucket__ overflows.
236 * - return -1 if the value of __bucket->alg__ is unknown.
238 * @param map a crush_map containing __bucket__
239 * @param bucket the root of the tree to reweight
240 * @returns 0 on success, < 0 on error
242 extern int crush_reweight_bucket ( struct crush_map
* map
, struct crush_bucket
* bucket
);
245 * Remove __bucket__ from __map__ and deallocate it via crush_destroy_bucket().
246 * __assert(3)__ that __bucket__ is in __map__. The caller is responsible for
247 * making sure the bucket is not the child of any other bucket in the __map__.
249 * @param map a crush_map containing __bucket__
250 * @param bucket the bucket to remove from __map__
253 extern int crush_remove_bucket ( struct crush_map
* map
, struct crush_bucket
* bucket
);
256 * Remove __item__ from __bucket__ and subtract the item weight from
257 * the bucket weight. If the weight of the item is greater than the
258 * weight of the bucket, silently set the bucket weight to zero.
260 * - return -ENOMEM if the __bucket__ cannot be sized down with __realloc(3)__.
261 * - return -1 if the value of __bucket->alg__ is unknown.
263 * @param map __unused__
264 * @param bucket the bucket from which __item__ is removed
265 * @param item the item to remove from __bucket__
266 * @returns 0 on success, < 0 on error
268 extern int crush_bucket_remove_item ( struct crush_map
* map
, struct crush_bucket
* bucket
, int item
);
270 struct crush_bucket_uniform
*
271 crush_make_uniform_bucket ( int hash
, int type
, int size
,
274 struct crush_bucket_list
*
275 crush_make_list_bucket ( int hash
, int type
, int size
,
278 struct crush_bucket_tree
*
279 crush_make_tree_bucket ( int hash
, int type
, int size
,
280 int * items
, /* in leaf order */
282 struct crush_bucket_straw
*
283 crush_make_straw_bucket ( struct crush_map
* map
,
284 int hash
, int type
, int size
,
288 extern int crush_addition_is_unsafe ( __u32 a
, __u32 b
);
289 extern int crush_multiplication_is_unsafe ( __u32 a
, __u32 b
);
293 * Set the __map__ tunables to implement the most ancient behavior,
294 * for backward compatibility purposes only.
296 * - choose_local_tries == 2
297 * - choose_local_fallback_tries == 5
298 * - choose_total_tries == 19
299 * - chooseleaf_descend_once == 0
300 * - chooseleaf_vary_r == 0
301 * - straw_calc_version == 0
302 * - chooseleaf_stable = 0
304 * See the __crush_map__ documentation for more information about
307 * @param map a crush_map
309 extern void set_legacy_crush_map ( struct crush_map
* map
);
312 * Set the __map__ tunables to implement the optimal behavior. These
313 * are the values set by crush_create(). It does not guarantee a
314 * stable mapping after an upgrade.
316 * For instance when a bug is fixed it may significantly change the
317 * mapping. In that case a new tunable (say tunable_new) is added so
318 * the caller can control when the bug fix is activated. The
319 * set_optimal_crush_map() function will always set all tunables,
320 * including tunable_new, to fix all bugs even if it means changing
321 * the mapping. If the caller needs fine grained control on the
322 * tunables to upgrade to a new version without changing the mapping,
323 * it needs to set the __crush_map__ tunables individually.
325 * See the __crush_map__ documentation for more information about
328 * @param map a crush_map
330 extern void set_optimal_crush_map ( struct crush_map
* map
);