#include "include/assert.h"
#include "include/err.h"
#include "include/encoding.h"
-
+#include "include/mempool.h"
#include "common/Mutex.h"
class Formatter;
}
+namespace CrushTreeDumper {
+ typedef mempool::osdmap::map<int64_t,string> name_map_t;
+}
+
WRITE_RAW_ENCODER(crush_rule_mask) // it's all u8's
inline static void encode(const crush_rule_step &s, bufferlist &bl)
using namespace std;
class CrushWrapper {
public:
+ // magic value used by OSDMap for a "default" fallback choose_args, used if
+ // the choose_arg_map passed to do_rule does not exist. if this also
+ // doesn't exist, fall back to canonical weights.
+ enum {
+ DEFAULT_CHOOSE_ARGS = -1
+ };
+
std::map<int32_t, string> type_map; /* bucket/device type names */
std::map<int32_t, string> name_map; /* bucket/device names */
std::map<int32_t, string> rule_name_map;
+
std::map<int32_t, int32_t> class_map; /* item id -> class id */
std::map<int32_t, string> class_name; /* class id -> class name */
std::map<string, int32_t> class_rname; /* class name -> class id */
std::map<int32_t, map<int32_t, int32_t> > class_bucket; /* bucket[id][class] == id */
- std::map<uint64_t, crush_choose_arg_map> choose_args;
+ std::map<int64_t, crush_choose_arg_map> choose_args;
private:
struct crush_map *crush;
/// true if any ruleset has more than 1 rule
bool has_multirule_rulesets() const;
+ /// true if any buckets that aren't straw2
+ bool has_non_straw2_buckets() const;
+
// tunables
void set_tunables_argonaut() {
crush->choose_local_tries = 2;
name_rmap[bn] = a;
}
}
- bool id_has_class(int i) {
- int idout;
- int classout;
- if (split_id_class(i, &idout, &classout) != 0)
- return false;
- return classout != -1;
- }
int split_id_class(int i, int *idout, int *classout) const;
bool class_exists(const string& name) const {
return class_rname.count(name);
}
const char *get_class_name(int i) const {
- std::map<int,string>::const_iterator p = class_name.find(i);
+ auto p = class_name.find(i);
if (p != class_name.end())
return p->second.c_str();
return 0;
}
int get_class_id(const string& name) const {
- std::map<string,int>::const_iterator p = class_rname.find(name);
+ auto p = class_rname.find(name);
if (p != class_rname.end())
return p->second;
else
return -EINVAL;
}
int remove_class_name(const string& name) {
- std::map<string,int>::const_iterator p = class_rname.find(name);
+ auto p = class_rname.find(name);
if (p == class_rname.end())
return -ENOENT;
int class_id = p->second;
- std::map<int,string>::const_iterator q = class_name.find(class_id);
+ auto q = class_name.find(class_id);
if (q == class_name.end())
return -ENOENT;
class_rname.erase(name);
class_name.erase(class_id);
return 0;
}
+
+ int32_t _alloc_class_id() const;
+
int get_or_create_class_id(const string& name) {
int c = get_class_id(name);
if (c < 0) {
- int i = class_name.size();
+ int i = _alloc_class_id();
class_name[i] = name;
class_rname[name] = i;
return i;
class_map[i] = c;
return c;
}
-
+ void get_devices_by_class(const string &name, set<int> *devices) const {
+ assert(devices);
+ devices->clear();
+ if (!class_exists(name)) {
+ return;
+ }
+ auto cid = get_class_id(name);
+ for (auto& p : class_map) {
+ if (p.first >= 0 && p.second == cid) {
+ devices->insert(p.first);
+ }
+ }
+ }
+ void class_remove_item(int i) {
+ auto it = class_map.find(i);
+ if (it == class_map.end()) {
+ return;
+ }
+ class_map.erase(it);
+ }
int can_rename_item(const string& srcname,
const string& dstname,
ostream *ss) const;
ostream *ss);
// rule names
+ int rename_rule(const string& srcname,
+ const string& dstname,
+ ostream *ss);
bool rule_exists(string name) const {
build_rmaps();
return rule_name_rmap.count(name);
if (have_rmaps)
rule_name_rmap[name] = i;
}
+ bool is_shadow_item(int id) const {
+ const char *name = get_item_name(id);
+ return name && !is_valid_crush_name(name);
+ }
/**
*/
void find_roots(set<int>& roots) const;
+
+ /**
+ * find tree roots that contain shadow (device class) items only
+ */
+ void find_shadow_roots(set<int>& roots) const {
+ set<int> all;
+ find_roots(all);
+ for (auto& p: all) {
+ if (is_shadow_item(p)) {
+ roots.insert(p);
+ }
+ }
+ }
+
+ /**
+ * find tree roots that are not shadow (device class) items
+ *
+ * These are parentless nodes in the map that are not shadow
+ * items for device classes.
+ */
+ void find_nonshadow_roots(set<int>& roots) const {
+ set<int> all;
+ find_roots(all);
+ for (auto& p: all) {
+ if (!is_shadow_item(p)) {
+ roots.insert(p);
+ }
+ }
+ }
+
/**
* see if an item is contained within a subtree
*
* FIXME: ambiguous for items that occur multiple times in the map
*/
pair<string,string> get_immediate_parent(int id, int *ret = NULL);
+
int get_immediate_parent_id(int id, int *parent) const;
/**
* when a bucket is in use.
*
* @param item id to remove
- * @param unused true if only unused items should be removed
* @return 0 on success, negative on error
*/
- int remove_root(int item, bool unused);
+ int remove_root(int item);
/**
* remove all instances of an item nested beneath a certain point from the map
return (float)get_item_weight_in_loc(id, loc) / (float)0x10000;
}
+ int validate_weightf(float weight) {
+ uint64_t iweight = weight * 0x10000;
+ if (iweight > std::numeric_limits<int>::max()) {
+ return -EOVERFLOW;
+ }
+ return 0;
+ }
int adjust_item_weight(CephContext *cct, int id, int weight);
int adjust_item_weightf(CephContext *cct, int id, float weight) {
+ int r = validate_weightf(weight);
+ if (r < 0) {
+ return r;
+ }
return adjust_item_weight(cct, id, (int)(weight * (float)0x10000));
}
int adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc);
int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight, const map<string,string>& loc) {
+ int r = validate_weightf(weight);
+ if (r < 0) {
+ return r;
+ }
return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000), loc);
}
void reweight(CephContext *cct);
int adjust_subtree_weight(CephContext *cct, int id, int weight);
int adjust_subtree_weightf(CephContext *cct, int id, float weight) {
+ int r = validate_weightf(weight);
+ if (r < 0) {
+ return r;
+ }
return adjust_subtree_weight(cct, id, (int)(weight * (float)0x10000));
}
int get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap);
/* modifiers */
- int add_rule(int len, int ruleset, int type, int minsize, int maxsize, int ruleno) {
+
+ int add_rule(int ruleno, int len, int type, int minsize, int maxsize) {
if (!crush) return -ENOENT;
- crush_rule *n = crush_make_rule(len, ruleset, type, minsize, maxsize);
+ crush_rule *n = crush_make_rule(len, ruleno, type, minsize, maxsize);
assert(n);
ruleno = crush_add_rule(crush, n, ruleno);
return ruleno;
int add_simple_rule(
string name, string root_name, string failure_domain_type,
+ string device_class,
string mode, int rule_type, ostream *err = 0);
/**
*/
int add_simple_rule_at(
string name, string root_name,
- string failure_domain_type, string mode,
+ string failure_domain_type, string device_class, string mode,
int rule_type, int rno, ostream *err = 0);
int remove_rule(int ruleno);
/** buckets **/
-private:
const crush_bucket *get_bucket(int id) const {
if (!crush)
return (crush_bucket *)(-EINVAL);
return (crush_bucket *)(-ENOENT);
return ret;
}
+private:
crush_bucket *get_bucket(int id) {
if (!crush)
return (crush_bucket *)(-EINVAL);
*
* returns the weight of the detached bucket
**/
- int detach_bucket(CephContext *cct, int item){
- if (!crush)
- return (-EINVAL);
-
- if (item >= 0)
- return (-EINVAL);
-
- // check that the bucket that we want to detach exists
- assert(bucket_exists(item));
-
- // get the bucket's weight
- crush_bucket *b = get_bucket(item);
- unsigned bucket_weight = b->weight;
-
- // get where the bucket is located
- pair<string, string> bucket_location = get_immediate_parent(item);
-
- // get the id of the parent bucket
- int parent_id = get_item_id(bucket_location.second);
-
- // get the parent bucket
- crush_bucket *parent_bucket = get_bucket(parent_id);
-
- if (!IS_ERR(parent_bucket)) {
- // zero out the bucket weight
- bucket_adjust_item_weight(cct, parent_bucket, item, 0);
- adjust_item_weight(cct, parent_bucket->id, parent_bucket->weight);
-
- // remove the bucket from the parent
- bucket_remove_item(parent_bucket, item);
- } else if (PTR_ERR(parent_bucket) != -ENOENT) {
- return PTR_ERR(parent_bucket);
- }
-
- // check that we're happy
- int test_weight = 0;
- map<string,string> test_location;
- test_location[ bucket_location.first ] = (bucket_location.second);
-
- bool successful_detach = !(check_item_loc(cct, item, test_location, &test_weight));
- assert(successful_detach);
- assert(test_weight == 0);
-
- return bucket_weight;
- }
+ int detach_bucket(CephContext *cct, int item);
public:
int get_max_buckets() const {
/* modifiers */
int add_bucket(int bucketno, int alg, int hash, int type, int size,
- int *items, int *weights, int *idout) {
- if (alg == 0) {
- alg = get_default_bucket_alg();
- if (alg == 0)
- return -EINVAL;
- }
- crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items, weights);
- assert(b);
- return crush_add_bucket(crush, bucketno, b, idout);
- }
-
+ int *items, int *weights, int *idout);
int bucket_add_item(crush_bucket *bucket, int item, int weight);
int bucket_remove_item(struct crush_bucket *bucket, int item);
int bucket_adjust_item_weight(CephContext *cct, struct crush_bucket *bucket, int item, int weight);
have_uniform_rules = !has_legacy_rulesets();
}
- int update_device_class(CephContext *cct, int id, const string& class_name, const string& name);
- int device_class_clone(int original, int device_class, int *clone);
- bool class_is_in_use(int class_id);
- int populate_classes();
+ int update_device_class(int id, const string& class_name, const string& name, ostream *ss);
+ int remove_device_class(CephContext *cct, int id, ostream *ss);
+ int device_class_clone(
+ int original, int device_class,
+ const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
+ const std::set<int32_t>& used_ids,
+ int *clone,
+ map<int,map<int,vector<int>>> *cmap_item_weight);
+ int rename_class(const string& srcname, const string& dstname);
+ int populate_classes(
+ const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket);
+ int get_rules_by_class(const string &class_name, set<int> *rules);
+ bool _class_is_dead(int class_id);
+ void cleanup_dead_classes();
int rebuild_roots_with_classes();
/* remove unused roots generated for class devices */
- int trim_roots_with_class(bool unused);
- int cleanup_classes();
+ int trim_roots_with_class();
void start_choose_profile() {
free(crush->choose_tries);
* the original choose_total_tries value was off by one (it
* counted "retries" and not "tries"). add one to alloc.
*/
- crush->choose_tries = (__u32 *)malloc(sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1));
+ crush->choose_tries = (__u32 *)calloc(sizeof(*crush->choose_tries),
+ (crush->choose_total_tries + 1));
memset(crush->choose_tries, 0,
sizeof(*crush->choose_tries) * (crush->choose_total_tries + 1));
}
}
}
- bool ruleset_exists(int const ruleset) const {
+ bool ruleset_exists(const int ruleset) const {
for (size_t i = 0; i < crush->max_rules; ++i) {
if (rule_exists(i) && crush->rules[i]->mask.ruleset == ruleset) {
return true;
return result;
}
- crush_choose_arg_map choose_args_get(uint64_t choose_args_index) const {
+ bool have_choose_args(int64_t choose_args_index) const {
+ return choose_args.count(choose_args_index);
+ }
+
+ crush_choose_arg_map choose_args_get_with_fallback(
+ int64_t choose_args_index) const {
+ auto i = choose_args.find(choose_args_index);
+ if (i == choose_args.end()) {
+ i = choose_args.find(DEFAULT_CHOOSE_ARGS);
+ }
+ if (i == choose_args.end()) {
+ crush_choose_arg_map arg_map;
+ arg_map.args = NULL;
+ arg_map.size = 0;
+ return arg_map;
+ } else {
+ return i->second;
+ }
+ }
+ crush_choose_arg_map choose_args_get(int64_t choose_args_index) const {
auto i = choose_args.find(choose_args_index);
if (i == choose_args.end()) {
crush_choose_arg_map arg_map;
}
free(arg_map.args);
}
-
+
+ void create_choose_args(int64_t id, int positions) {
+ if (choose_args.count(id))
+ return;
+ assert(positions);
+ auto &cmap = choose_args[id];
+ cmap.args = (crush_choose_arg*)calloc(sizeof(crush_choose_arg),
+ crush->max_buckets);
+ cmap.size = crush->max_buckets;
+ for (int bidx=0; bidx < crush->max_buckets; ++bidx) {
+ crush_bucket *b = crush->buckets[bidx];
+ auto &carg = cmap.args[bidx];
+ carg.ids = NULL;
+ carg.ids_size = 0;
+ if (b && b->alg == CRUSH_BUCKET_STRAW2) {
+ crush_bucket_straw2 *sb = (crush_bucket_straw2*)b;
+ carg.weight_set_size = positions;
+ carg.weight_set = (crush_weight_set*)calloc(sizeof(crush_weight_set),
+ carg.weight_set_size);
+ // initialize with canonical weights
+ for (int pos = 0; pos < positions; ++pos) {
+ carg.weight_set[pos].size = b->size;
+ carg.weight_set[pos].weights = (__u32*)calloc(4, b->size);
+ for (unsigned i = 0; i < b->size; ++i) {
+ carg.weight_set[pos].weights[i] = sb->item_weights[i];
+ }
+ }
+ } else {
+ carg.weight_set = NULL;
+ carg.weight_set_size = 0;
+ }
+ }
+ }
+
+ void rm_choose_args(int64_t id) {
+ auto p = choose_args.find(id);
+ if (p != choose_args.end()) {
+ destroy_choose_args(p->second);
+ choose_args.erase(p);
+ }
+ }
+
void choose_args_clear() {
for (auto w : choose_args)
destroy_choose_args(w.second);
choose_args.clear();
}
+ // adjust choose_args_map weight, preserving the hierarchical summation
+ // property. used by callers optimizing layouts by tweaking weights.
+ int _choose_args_adjust_item_weight_in_bucket(
+ CephContext *cct,
+ crush_choose_arg_map cmap,
+ int bucketid,
+ int id,
+ const vector<int>& weight,
+ ostream *ss);
+ int choose_args_adjust_item_weight(
+ CephContext *cct,
+ crush_choose_arg_map cmap,
+ int id, const vector<int>& weight,
+ ostream *ss);
+ int choose_args_adjust_item_weightf(
+ CephContext *cct,
+ crush_choose_arg_map cmap,
+ int id, const vector<double>& weightf,
+ ostream *ss) {
+ vector<int> weight(weightf.size());
+ for (unsigned i = 0; i < weightf.size(); ++i) {
+ weight[i] = (int)(weightf[i] * (float)0x10000);
+ }
+ return choose_args_adjust_item_weight(cct, cmap, id, weight, ss);
+ }
+
+ int get_choose_args_positions(crush_choose_arg_map cmap) {
+ // infer positions from other buckets
+ for (unsigned j = 0; j < cmap.size; ++j) {
+ if (cmap.args[j].weight_set_size) {
+ return cmap.args[j].weight_set_size;
+ }
+ }
+ return 1;
+ }
+
template<typename WeightVector>
void do_rule(int rule, int x, vector<int>& out, int maxout,
const WeightVector& weight,
int rawout[maxout];
char work[crush_work_size(crush, maxout)];
crush_init_workspace(crush, work);
- crush_choose_arg_map arg_map = choose_args_get(choose_args_index);
+ crush_choose_arg_map arg_map = choose_args_get_with_fallback(
+ choose_args_index);
int numrep = crush_do_rule(crush, rule, x, rawout, maxout, &weight[0],
weight.size(), work, arg_map.args);
if (numrep < 0)
void dump_tunables(Formatter *f) const;
void dump_choose_args(Formatter *f) const;
void list_rules(Formatter *f) const;
- void dump_tree(ostream *out, Formatter *f) const;
- void dump_tree(Formatter *f) const;
+ void list_rules(ostream *ss) const;
+ void dump_tree(ostream *out,
+ Formatter *f,
+ const CrushTreeDumper::name_map_t& ws,
+ bool show_shadow = false) const;
+ void dump_tree(ostream *out, Formatter *f) {
+ dump_tree(out, f, CrushTreeDumper::name_map_t());
+ }
+ void dump_tree(Formatter *f,
+ const CrushTreeDumper::name_map_t& ws) const;
static void generate_test_instances(list<CrushWrapper*>& o);
int get_osd_pool_default_crush_replicated_ruleset(CephContext *cct);