#include "include/assert.h"
#include "include/err.h"
#include "include/encoding.h"
-
+#include "include/mempool.h"
#include "common/Mutex.h"
class Formatter;
}
+namespace CrushTreeDumper {
+ typedef mempool::osdmap::map<int64_t,string> name_map_t;
+}
+
WRITE_RAW_ENCODER(crush_rule_mask) // it's all u8's
inline static void encode(const crush_rule_step &s, bufferlist &bl)
using namespace std;
class CrushWrapper {
public:
+ // magic value used by OSDMap for a "default" fallback choose_args, used if
+ // the choose_arg_map passed to do_rule does not exist. if this also
+ // doesn't exist, fall back to canonical weights.
+ enum {
+ DEFAULT_CHOOSE_ARGS = -1
+ };
+
std::map<int32_t, string> type_map; /* bucket/device type names */
std::map<int32_t, string> name_map; /* bucket/device names */
std::map<int32_t, string> rule_name_map;
std::map<int32_t, string> class_name; /* class id -> class name */
std::map<string, int32_t> class_rname; /* class name -> class id */
std::map<int32_t, map<int32_t, int32_t> > class_bucket; /* bucket[id][class] == id */
- std::map<uint64_t, crush_choose_arg_map> choose_args;
+ std::map<int64_t, crush_choose_arg_map> choose_args;
private:
struct crush_map *crush;
/// true if any ruleset has more than 1 rule
bool has_multirule_rulesets() const;
+ /// true if any buckets that aren't straw2
+ bool has_non_straw2_buckets() const;
+
// tunables
void set_tunables_argonaut() {
crush->choose_local_tries = 2;
name_rmap[bn] = a;
}
}
- bool id_has_class(int i) {
- int idout;
- int classout;
- if (split_id_class(i, &idout, &classout) != 0)
- return false;
- return classout != -1;
- }
int split_id_class(int i, int *idout, int *classout) const;
bool class_exists(const string& name) const {
return 0;
}
- int rename_class(const string& srcname, const string& dstname) {
- auto p = class_rname.find(srcname);
- if (p == class_rname.end())
- return -ENOENT;
- int class_id = p->second;
- auto q = class_name.find(class_id);
- if (q == class_name.end())
- return -ENOENT;
- class_rname.erase(srcname);
- class_name.erase(class_id);
- class_rname[dstname] = class_id;
- class_name[class_id] = dstname;
- return 0;
- }
-
int32_t _alloc_class_id() const;
int get_or_create_class_id(const string& name) {
if (have_rmaps)
rule_name_rmap[name] = i;
}
+ bool is_shadow_item(int id) const {
+ const char *name = get_item_name(id);
+ return name && !is_valid_crush_name(name);
+ }
/**
*/
void find_roots(set<int>& roots) const;
+
+ /**
+ * find tree roots that contain shadow (device class) items only
+ */
+ void find_shadow_roots(set<int>& roots) const {
+ set<int> all;
+ find_roots(all);
+ for (auto& p: all) {
+ if (is_shadow_item(p)) {
+ roots.insert(p);
+ }
+ }
+ }
+
/**
* find tree roots that are not shadow (device class) items
*
* These are parentless nodes in the map that are not shadow
* items for device classes.
*/
- void find_nonshadow_roots(set<int>& roots) const;
+ void find_nonshadow_roots(set<int>& roots) const {
+ set<int> all;
+ find_roots(all);
+ for (auto& p: all) {
+ if (!is_shadow_item(p)) {
+ roots.insert(p);
+ }
+ }
+ }
/**
* see if an item is contained within a subtree
* FIXME: ambiguous for items that occur multiple times in the map
*/
pair<string,string> get_immediate_parent(int id, int *ret = NULL);
+
int get_immediate_parent_id(int id, int *parent) const;
/**
int get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap);
/* modifiers */
- int add_rule(int len, int ruleset, int type, int minsize, int maxsize, int ruleno) {
+
+ int add_rule(int ruleno, int len, int type, int minsize, int maxsize) {
if (!crush) return -ENOENT;
- crush_rule *n = crush_make_rule(len, ruleset, type, minsize, maxsize);
+ crush_rule *n = crush_make_rule(len, ruleno, type, minsize, maxsize);
assert(n);
ruleno = crush_add_rule(crush, n, ruleno);
return ruleno;
/** buckets **/
-private:
const crush_bucket *get_bucket(int id) const {
if (!crush)
return (crush_bucket *)(-EINVAL);
return (crush_bucket *)(-ENOENT);
return ret;
}
+private:
crush_bucket *get_bucket(int id) {
if (!crush)
return (crush_bucket *)(-EINVAL);
*
* returns the weight of the detached bucket
**/
- int detach_bucket(CephContext *cct, int item){
- if (!crush)
- return (-EINVAL);
-
- if (item >= 0)
- return (-EINVAL);
-
- // check that the bucket that we want to detach exists
- assert(bucket_exists(item));
-
- // get the bucket's weight
- crush_bucket *b = get_bucket(item);
- unsigned bucket_weight = b->weight;
-
- // get where the bucket is located
- pair<string, string> bucket_location = get_immediate_parent(item);
-
- // get the id of the parent bucket
- int parent_id = get_item_id(bucket_location.second);
-
- // get the parent bucket
- crush_bucket *parent_bucket = get_bucket(parent_id);
-
- if (!IS_ERR(parent_bucket)) {
- // zero out the bucket weight
- bucket_adjust_item_weight(cct, parent_bucket, item, 0);
- adjust_item_weight(cct, parent_bucket->id, parent_bucket->weight);
-
- // remove the bucket from the parent
- bucket_remove_item(parent_bucket, item);
- } else if (PTR_ERR(parent_bucket) != -ENOENT) {
- return PTR_ERR(parent_bucket);
- }
-
- // check that we're happy
- int test_weight = 0;
- map<string,string> test_location;
- test_location[ bucket_location.first ] = (bucket_location.second);
-
- bool successful_detach = !(check_item_loc(cct, item, test_location, &test_weight));
- assert(successful_detach);
- assert(test_weight == 0);
-
- return bucket_weight;
- }
+ int detach_bucket(CephContext *cct, int item);
public:
int get_max_buckets() const {
/* modifiers */
int add_bucket(int bucketno, int alg, int hash, int type, int size,
- int *items, int *weights, int *idout) {
- if (alg == 0) {
- alg = get_default_bucket_alg();
- if (alg == 0)
- return -EINVAL;
- }
- crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items, weights);
- assert(b);
- return crush_add_bucket(crush, bucketno, b, idout);
- }
-
+ int *items, int *weights, int *idout);
int bucket_add_item(crush_bucket *bucket, int item, int weight);
int bucket_remove_item(struct crush_bucket *bucket, int item);
int bucket_adjust_item_weight(CephContext *cct, struct crush_bucket *bucket, int item, int weight);
}
int update_device_class(int id, const string& class_name, const string& name, ostream *ss);
+ int remove_device_class(CephContext *cct, int id, ostream *ss);
int device_class_clone(int original, int device_class, int *clone);
- bool class_is_in_use(int class_id);
+ bool class_is_in_use(int class_id, ostream *ss = nullptr);
int populate_classes();
int rebuild_roots_with_classes();
/* remove unused roots generated for class devices */
* the original choose_total_tries value was off by one (it
* counted "retries" and not "tries"). add one to alloc.
*/
- crush->choose_tries = (__u32 *)malloc(sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1));
+ crush->choose_tries = (__u32 *)calloc(sizeof(*crush->choose_tries),
+ (crush->choose_total_tries + 1));
memset(crush->choose_tries, 0,
sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1));
}
return result;
}
- crush_choose_arg_map choose_args_get(uint64_t choose_args_index) const {
+ bool have_choose_args(int64_t choose_args_index) const {
+ return choose_args.count(choose_args_index);
+ }
+
+ crush_choose_arg_map choose_args_get_with_fallback(
+ int64_t choose_args_index) const {
+ auto i = choose_args.find(choose_args_index);
+ if (i == choose_args.end()) {
+ i = choose_args.find(DEFAULT_CHOOSE_ARGS);
+ }
+ if (i == choose_args.end()) {
+ crush_choose_arg_map arg_map;
+ arg_map.args = NULL;
+ arg_map.size = 0;
+ return arg_map;
+ } else {
+ return i->second;
+ }
+ }
+ crush_choose_arg_map choose_args_get(int64_t choose_args_index) const {
auto i = choose_args.find(choose_args_index);
if (i == choose_args.end()) {
crush_choose_arg_map arg_map;
}
free(arg_map.args);
}
-
+
+ void create_choose_args(int64_t id, int positions) {
+ if (choose_args.count(id))
+ return;
+ assert(positions);
+ auto &cmap = choose_args[id];
+ cmap.args = (crush_choose_arg*)calloc(sizeof(crush_choose_arg),
+ crush->max_buckets);
+ cmap.size = crush->max_buckets;
+ for (int bidx=0; bidx < crush->max_buckets; ++bidx) {
+ crush_bucket *b = crush->buckets[bidx];
+ auto &carg = cmap.args[bidx];
+ carg.ids = NULL;
+ carg.ids_size = 0;
+ if (b && b->alg == CRUSH_BUCKET_STRAW2) {
+ crush_bucket_straw2 *sb = (crush_bucket_straw2*)b;
+ carg.weight_set_size = positions;
+ carg.weight_set = (crush_weight_set*)calloc(sizeof(crush_weight_set),
+ carg.weight_set_size);
+ // initialize with canonical weights
+ for (int pos = 0; pos < positions; ++pos) {
+ carg.weight_set[pos].size = b->size;
+ carg.weight_set[pos].weights = (__u32*)calloc(4, b->size);
+ for (unsigned i = 0; i < b->size; ++i) {
+ carg.weight_set[pos].weights[i] = sb->item_weights[i];
+ }
+ }
+ } else {
+ carg.weight_set = NULL;
+ carg.weight_set_size = 0;
+ }
+ }
+ }
+
+ void rm_choose_args(int64_t id) {
+ auto p = choose_args.find(id);
+ if (p != choose_args.end()) {
+ destroy_choose_args(p->second);
+ choose_args.erase(p);
+ }
+ }
+
void choose_args_clear() {
for (auto w : choose_args)
destroy_choose_args(w.second);
choose_args.clear();
}
+ // adjust choose_args_map weight, preserving the hierarchical summation
+ // property. used by callers optimizing layouts by tweaking weights.
+ int _choose_args_adjust_item_weight_in_bucket(
+ CephContext *cct,
+ crush_choose_arg_map cmap,
+ int bucketid,
+ int id,
+ const vector<int>& weight,
+ ostream *ss);
+ int choose_args_adjust_item_weight(
+ CephContext *cct,
+ crush_choose_arg_map cmap,
+ int id, const vector<int>& weight,
+ ostream *ss);
+ int choose_args_adjust_item_weightf(
+ CephContext *cct,
+ crush_choose_arg_map cmap,
+ int id, const vector<double>& weightf,
+ ostream *ss) {
+ vector<int> weight(weightf.size());
+ for (unsigned i = 0; i < weightf.size(); ++i) {
+ weight[i] = (int)(weightf[i] * (float)0x10000);
+ }
+ return choose_args_adjust_item_weight(cct, cmap, id, weight, ss);
+ }
+
+ int get_choose_args_positions(crush_choose_arg_map cmap) {
+ // infer positions from other buckets
+ for (unsigned j = 0; j < cmap.size; ++j) {
+ if (cmap.args[j].weight_set_size) {
+ return cmap.args[j].weight_set_size;
+ }
+ }
+ return 1;
+ }
+
template<typename WeightVector>
void do_rule(int rule, int x, vector<int>& out, int maxout,
const WeightVector& weight,
int rawout[maxout];
char work[crush_work_size(crush, maxout)];
crush_init_workspace(crush, work);
- crush_choose_arg_map arg_map = choose_args_get(choose_args_index);
+ crush_choose_arg_map arg_map = choose_args_get_with_fallback(
+ choose_args_index);
int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0],
weight.size(), work, arg_map.args);
if (numrep < 0)
void dump_tunables(Formatter *f) const;
void dump_choose_args(Formatter *f) const;
void list_rules(Formatter *f) const;
- void dump_tree(ostream *out, Formatter *f) const;
- void dump_tree(Formatter *f) const;
+ void list_rules(ostream *ss) const;
+ void dump_tree(ostream *out,
+ Formatter *f,
+ const CrushTreeDumper::name_map_t& ws,
+ bool show_shadow = false) const;
+ void dump_tree(ostream *out, Formatter *f) {
+ dump_tree(out, f, CrushTreeDumper::name_map_t());
+ }
+ void dump_tree(Formatter *f,
+ const CrushTreeDumper::name_map_t& ws) const;
static void generate_test_instances(list<CrushWrapper*>& o);
int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct);