#include "common/debug.h"
#include "common/Formatter.h"
#include "common/errno.h"
+#include "common/TextTable.h"
#include "include/stringify.h"
#include "CrushWrapper.h"
#define dout_subsys ceph_subsys_crush
+bool CrushWrapper::has_legacy_rulesets() const
+{
+ for (unsigned i=0; i<crush->max_rules; i++) {
+ crush_rule *r = crush->rules[i];
+ if (r &&
+ r->mask.ruleset != i) {
+ return true;
+ }
+ }
+ return false;
+}
+
+int CrushWrapper::renumber_rules_by_ruleset()
+{
+ int max_ruleset = 0;
+ for (unsigned i=0; i<crush->max_rules; i++) {
+ crush_rule *r = crush->rules[i];
+ if (r && r->mask.ruleset >= max_ruleset) {
+ max_ruleset = r->mask.ruleset + 1;
+ }
+ }
+ struct crush_rule **newrules =
+ (crush_rule**)calloc(1, max_ruleset * sizeof(crush_rule*));
+ for (unsigned i=0; i<crush->max_rules; i++) {
+ crush_rule *r = crush->rules[i];
+ if (!r)
+ continue;
+ if (newrules[r->mask.ruleset]) {
+ // collision, we can't do it.
+ free(newrules);
+ return -EINVAL;
+ }
+ newrules[r->mask.ruleset] = r;
+ }
+
+ // success, swap!
+ free(crush->rules);
+ crush->rules = newrules;
+ crush->max_rules = max_ruleset;
+ return 0;
+}
+
+bool CrushWrapper::has_multirule_rulesets() const
+{
+ for (unsigned i=0; i<crush->max_rules; i++) {
+ crush_rule *r = crush->rules[i];
+ if (!r)
+ continue;
+ for (unsigned j=i+1; j<crush->max_rules; j++) {
+ crush_rule *s = crush->rules[j];
+ if (!s)
+ continue;
+ if (r->mask.ruleset == s->mask.ruleset)
+ return true;
+ }
+ }
+ return false;
+}
+
+bool CrushWrapper::has_non_straw2_buckets() const
+{
+ for (int i=0; i<crush->max_buckets; ++i) {
+ crush_bucket *b = crush->buckets[i];
+ if (!b)
+ continue;
+ if (b->alg != CRUSH_BUCKET_STRAW2)
+ return true;
+ }
+ return false;
+}
+
bool CrushWrapper::has_v2_rules() const
{
for (unsigned i=0; i<crush->max_rules; i++) {
return false;
}
-bool CrushWrapper::has_chooseargs() const
+bool CrushWrapper::has_choose_args() const
{
return !choose_args.empty();
}
-bool CrushWrapper::has_incompat_chooseargs() const
+bool CrushWrapper::has_incompat_choose_args() const
{
- // FIXME: if the chooseargs all have 1 position *and* do not remap IDs then
- // we can fabricate a compatible crush map for legacy clients by swapping the
- // choose_args weights in for the real weights. until then,
- return has_chooseargs();
+ if (choose_args.empty())
+ return false;
+ if (choose_args.size() > 1)
+ return true;
+ if (choose_args.begin()->first != DEFAULT_CHOOSE_ARGS)
+ return true;
+ crush_choose_arg_map arg_map = choose_args.begin()->second;
+ for (__u32 i = 0; i < arg_map.size; i++) {
+ crush_choose_arg *arg = &arg_map.args[i];
+ if (arg->weight_set_size == 0 &&
+ arg->ids_size == 0)
+ continue;
+ if (arg->weight_set_size != 1)
+ return true;
+ if (arg->ids_size != 0)
+ return true;
+ }
+ return false;
}
int CrushWrapper::split_id_class(int i, int *idout, int *classout) const
crush_remove_bucket(crush, t);
if (class_bucket.count(item) != 0)
class_bucket.erase(item);
+ class_remove_item(item);
}
if ((item >= 0 || !unlink_only) && name_map.count(item)) {
ldout(cct, 5) << "_maybe_remove_last_instance removing name for item " << item << dendl;
name_map.erase(item);
have_rmaps = false;
+ if (item >= 0 && !unlink_only) {
+ class_remove_item(item);
+ }
}
+ rebuild_roots_with_classes();
return true;
}
return 0;
crush_bucket *b = get_bucket(item);
- if (IS_ERR(b))
- return -ENOENT;
+ if (IS_ERR(b)) {
+ // should be idempotent
+ // e.g.: we use 'crush link' to link same host into
+ // different roots, which as a result can cause different
+ // shadow trees reference same hosts too. This means
+ // we may need to destory the same buckets(hosts, racks, etc.)
+ // multiple times during rebuilding all shadow trees.
+ return 0;
+ }
for (unsigned n = 0; n < b->size; n++) {
if (b->items[n] >= 0)
}
if (class_bucket.count(item) != 0)
class_bucket.erase(item);
+ class_remove_item(item);
return 0;
}
int CrushWrapper::remove_item(CephContext *cct, int item, bool unlink_only)
{
- if (choose_args.size() > 0) {
- ldout(cct, 1) << "remove_item not implemented when choose_args is not empty" << dendl;
- return -EDOM;
- }
-
- ldout(cct, 5) << "remove_item " << item << (unlink_only ? " unlink_only":"") << dendl;
+ ldout(cct, 5) << "remove_item " << item
+ << (unlink_only ? " unlink_only":"") << dendl;
int ret = -ENOENT;
if (item < 0 && !unlink_only) {
crush_bucket *t = get_bucket(item);
if (IS_ERR(t)) {
- ldout(cct, 1) << "remove_item bucket " << item << " does not exist" << dendl;
+ ldout(cct, 1) << "remove_item bucket " << item << " does not exist"
+ << dendl;
return -ENOENT;
}
if (id == item) {
ldout(cct, 5) << "remove_item removing item " << item
<< " from bucket " << b->id << dendl;
- crush_bucket_remove_item(crush, b, item);
+ for (auto& p : choose_args) {
+ // weight down each weight-set to 0 before we remove the item
+ vector<int> weightv(get_choose_args_positions(p.second), 0);
+ choose_args_adjust_item_weight(cct, p.second, item, weightv, nullptr);
+ }
+ bucket_remove_item(b, item);
adjust_item_weight(cct, b->id, b->weight);
ret = 0;
}
return false;
}
-int CrushWrapper::_remove_item_under(CephContext *cct, int item, int ancestor, bool unlink_only)
+int CrushWrapper::_remove_item_under(
+ CephContext *cct, int item, int ancestor, bool unlink_only)
{
ldout(cct, 5) << "_remove_item_under " << item << " under " << ancestor
<< (unlink_only ? " unlink_only":"") << dendl;
for (unsigned i=0; i<b->size; ++i) {
int id = b->items[i];
if (id == item) {
- ldout(cct, 5) << "_remove_item_under removing item " << item << " from bucket " << b->id << dendl;
- crush_bucket_remove_item(crush, b, item);
+ ldout(cct, 5) << "_remove_item_under removing item " << item
+ << " from bucket " << b->id << dendl;
+ bucket_remove_item(b, item);
+ for (auto& p : choose_args) {
+ // weight down each weight-set to 0 before we remove the item
+ vector<int> weightv(get_choose_args_positions(p.second), 0);
+ _choose_args_adjust_item_weight_in_bucket(
+ cct, p.second, b->id, item, weightv, nullptr);
+ }
adjust_item_weight(cct, b->id, b->weight);
ret = 0;
} else if (id < 0) {
return ret;
}
-int CrushWrapper::remove_item_under(CephContext *cct, int item, int ancestor, bool unlink_only)
+int CrushWrapper::remove_item_under(
+ CephContext *cct, int item, int ancestor, bool unlink_only)
{
ldout(cct, 5) << "remove_item_under " << item << " under " << ancestor
<< (unlink_only ? " unlink_only":"") << dendl;
return 0;
}
+string CrushWrapper::get_full_location_ordered_string(int id)
+{
+ vector<pair<string, string> > full_location_ordered;
+ string full_location;
+ get_full_location_ordered(id, full_location_ordered);
+ reverse(begin(full_location_ordered), end(full_location_ordered));
+ for(auto i = full_location_ordered.begin(); i != full_location_ordered.end(); i++) {
+ full_location = full_location + i->first + "=" + i->second;
+ if (i != full_location_ordered.end() - 1) {
+ full_location = full_location + ",";
+ }
+ }
+ return full_location;
+}
map<int, string> CrushWrapper::get_parent_hierarchy(int id)
{
return b->size;
}
+int CrushWrapper::_get_leaves(int id, list<int> *leaves)
+{
+ assert(leaves);
+
+ // Already leaf?
+ if (id >= 0) {
+ leaves->push_back(id);
+ return 0;
+ }
+
+ crush_bucket *b = get_bucket(id);
+ if (IS_ERR(b)) {
+ return -ENOENT;
+ }
+
+ for (unsigned n = 0; n < b->size; n++) {
+ if (b->items[n] >= 0) {
+ leaves->push_back(b->items[n]);
+ } else {
+ // is a bucket, do recursive call
+ int r = _get_leaves(b->items[n], leaves);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ return 0; // all is well
+}
-int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string name,
- const map<string,string>& loc) // typename -> bucketname
+int CrushWrapper::get_leaves(const string &name, set<int> *leaves)
{
- if (choose_args.size() > 0) {
- ldout(cct, 1) << "insert_item not implemented when choose_args is not empty" << dendl;
- return -EDOM;
+ assert(leaves);
+ leaves->clear();
+
+ if (!name_exists(name)) {
+ return -ENOENT;
+ }
+
+ int id = get_item_id(name);
+ if (id >= 0) {
+ // already leaf
+ leaves->insert(id);
+ return 0;
+ }
+
+ list<int> unordered;
+ int r = _get_leaves(id, &unordered);
+ if (r < 0) {
+ return r;
}
+ for (auto &p : unordered) {
+ leaves->insert(p);
+ }
+
+ return 0;
+}
+
+int CrushWrapper::insert_item(
+ CephContext *cct, int item, float weight, string name,
+ const map<string,string>& loc) // typename -> bucketname
+{
ldout(cct, 5) << "insert_item item " << item << " weight " << weight
<< " name " << name << " loc " << loc << dendl;
if (!is_valid_crush_loc(cct, loc))
return -EINVAL;
+ int r = validate_weightf(weight);
+ if (r < 0) {
+ return r;
+ }
+
if (name_exists(name)) {
if (get_item_id(name) != item) {
ldout(cct, 10) << "device name '" << name << "' already exists as id "
int cur = item;
- // create locations if locations don't exist and add child in location with 0 weight
- // the more detail in the insert_item method declaration in CrushWrapper.h
- for (map<int,string>::iterator p = type_map.begin(); p != type_map.end(); ++p) {
+ // create locations if locations don't exist and add child in
+ // location with 0 weight the more detail in the insert_item method
+ // declaration in CrushWrapper.h
+ for (auto p = type_map.begin(); p != type_map.end(); ++p) {
// ignore device type
if (p->first == 0)
continue;
// skip types that are unspecified
map<string,string>::const_iterator q = loc.find(p->second);
if (q == loc.end()) {
- ldout(cct, 2) << "warning: did not specify location for '" << p->second << "' level (levels are "
+ ldout(cct, 2) << "warning: did not specify location for '"
+ << p->second << "' level (levels are "
<< type_map << ")" << dendl;
continue;
}
int r = add_bucket(0, 0,
CRUSH_HASH_DEFAULT, p->first, 1, &cur, &empty, &newid);
if (r < 0) {
- ldout(cct, 1) << "add_bucket failure error: " << cpp_strerror(r) << dendl;
+ ldout(cct, 1) << "add_bucket failure error: " << cpp_strerror(r)
+ << dendl;
return r;
}
set_item_name(newid, q->second);
// check that we aren't creating a cycle.
if (subtree_contains(id, cur)) {
- ldout(cct, 1) << "insert_item item " << cur << " already exists beneath " << id << dendl;
+ ldout(cct, 1) << "insert_item item " << cur << " already exists beneath "
+ << id << dendl;
return -EINVAL;
}
ldout(cct, 5) << "insert_item adding " << cur << " weight " << weight
<< " to bucket " << id << dendl;
- int r = crush_bucket_add_item(crush, b, cur, 0);
+ int r = bucket_add_item(b, cur, 0);
assert (!r);
break;
}
// adjust the item's weight in location
- if(adjust_item_weightf_in_loc(cct, item, weight, loc) > 0) {
+ if (adjust_item_weightf_in_loc(cct, item, weight, loc) > 0) {
if (item >= crush->max_devices) {
crush->max_devices = item + 1;
- ldout(cct, 5) << "insert_item max_devices now " << crush->max_devices << dendl;
+ ldout(cct, 5) << "insert_item max_devices now " << crush->max_devices
+ << dendl;
+ }
+ r = rebuild_roots_with_classes();
+ if (r < 0) {
+ ldout(cct, 0) << __func__ << " unable to rebuild roots with classes: "
+ << cpp_strerror(r) << dendl;
+ return r;
}
return 0;
}
- ldout(cct, 1) << "error: didn't find anywhere to add item " << item << " in " << loc << dendl;
+ ldout(cct, 1) << "error: didn't find anywhere to add item " << item
+ << " in " << loc << dendl;
return -EINVAL;
}
-int CrushWrapper::move_bucket(CephContext *cct, int id, const map<string,string>& loc)
-{
- if (choose_args.size() > 0) {
- ldout(cct, 1) << "move_bucket not implemented when choose_args is not empty" << dendl;
- return -EDOM;
- }
+int CrushWrapper::move_bucket(
+ CephContext *cct, int id, const map<string,string>& loc)
+{
// sorry this only works for buckets
if (id >= 0)
return -EINVAL;
return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc);
}
-int CrushWrapper::link_bucket(CephContext *cct, int id, const map<string,string>& loc)
+int CrushWrapper::detach_bucket(CephContext *cct, int item)
{
- if (choose_args.size() > 0) {
- ldout(cct, 1) << "link_bucket not implemented when choose_args is not empty" << dendl;
- return -EDOM;
+ if (!crush)
+ return (-EINVAL);
+
+ if (item >= 0)
+ return (-EINVAL);
+
+ // check that the bucket that we want to detach exists
+ assert(bucket_exists(item));
+
+ // get the bucket's weight
+ crush_bucket *b = get_bucket(item);
+ unsigned bucket_weight = b->weight;
+
+ // get where the bucket is located
+ pair<string, string> bucket_location = get_immediate_parent(item);
+
+ // get the id of the parent bucket
+ int parent_id = get_item_id(bucket_location.second);
+
+ // get the parent bucket
+ crush_bucket *parent_bucket = get_bucket(parent_id);
+
+ if (!IS_ERR(parent_bucket)) {
+ // zero out the bucket weight
+ bucket_adjust_item_weight(cct, parent_bucket, item, 0);
+ adjust_item_weight(cct, parent_bucket->id, parent_bucket->weight);
+ for (auto& p : choose_args) {
+ // weight down each weight-set to 0 before we remove the item
+ vector<int> weightv(get_choose_args_positions(p.second), 0);
+ choose_args_adjust_item_weight(cct, p.second, item, weightv, nullptr);
+ }
+
+ // remove the bucket from the parent
+ bucket_remove_item(parent_bucket, item);
+ } else if (PTR_ERR(parent_bucket) != -ENOENT) {
+ return PTR_ERR(parent_bucket);
}
+ // check that we're happy
+ int test_weight = 0;
+ map<string,string> test_location;
+ test_location[ bucket_location.first ] = (bucket_location.second);
+
+ bool successful_detach = !(check_item_loc(cct, item, test_location,
+ &test_weight));
+ assert(successful_detach);
+ assert(test_weight == 0);
+
+ return bucket_weight;
+}
+
+int CrushWrapper::swap_bucket(CephContext *cct, int src, int dst)
+{
+ if (src >= 0 || dst >= 0)
+ return -EINVAL;
+ if (!item_exists(src) || !item_exists(dst))
+ return -EINVAL;
+ crush_bucket *a = get_bucket(src);
+ crush_bucket *b = get_bucket(dst);
+ unsigned aw = a->weight;
+ unsigned bw = b->weight;
+
+ // swap weights
+ adjust_item_weight(cct, a->id, bw);
+ adjust_item_weight(cct, b->id, aw);
+
+ // swap items
+ map<int,unsigned> tmp;
+ unsigned as = a->size;
+ unsigned bs = b->size;
+ for (unsigned i = 0; i < as; ++i) {
+ int item = a->items[0];
+ int itemw = crush_get_bucket_item_weight(a, 0);
+ tmp[item] = itemw;
+ bucket_remove_item(a, item);
+ }
+ assert(a->size == 0);
+ assert(b->size == bs);
+ for (unsigned i = 0; i < bs; ++i) {
+ int item = b->items[0];
+ int itemw = crush_get_bucket_item_weight(b, 0);
+ bucket_remove_item(b, item);
+ bucket_add_item(a, item, itemw);
+ }
+ assert(a->size == bs);
+ assert(b->size == 0);
+ for (auto t : tmp) {
+ bucket_add_item(b, t.first, t.second);
+ }
+ assert(a->size == bs);
+ assert(b->size == as);
+
+ // swap names
+ swap_names(src, dst);
+ return 0;
+}
+
+int CrushWrapper::link_bucket(
+ CephContext *cct, int id, const map<string,string>& loc)
+{
// sorry this only works for buckets
if (id >= 0)
return -EINVAL;
return insert_item(cct, id, bucket_weight / (float)0x10000, id_name, loc);
}
-int CrushWrapper::create_or_move_item(CephContext *cct, int item, float weight, string name,
- const map<string,string>& loc) // typename -> bucketname
+int CrushWrapper::create_or_move_item(
+ CephContext *cct, int item, float weight, string name,
+ const map<string,string>& loc) // typename -> bucketname
{
- if (choose_args.size() > 0) {
- ldout(cct, 1) << "create_or_move_item not implemented when choose_args is not empty" << dendl;
- return -EDOM;
- }
-
int ret = 0;
int old_iweight;
return -EINVAL;
if (check_item_loc(cct, item, loc, &old_iweight)) {
- ldout(cct, 5) << "create_or_move_item " << item << " already at " << loc << dendl;
+ ldout(cct, 5) << "create_or_move_item " << item << " already at " << loc
+ << dendl;
} else {
if (_search_item_exists(item)) {
weight = get_item_weightf(item);
- ldout(cct, 10) << "create_or_move_item " << item << " exists with weight " << weight << dendl;
+ ldout(cct, 10) << "create_or_move_item " << item
+ << " exists with weight " << weight << dendl;
remove_item(cct, item, true);
}
- ldout(cct, 5) << "create_or_move_item adding " << item << " weight " << weight
+ ldout(cct, 5) << "create_or_move_item adding " << item
+ << " weight " << weight
<< " at " << loc << dendl;
ret = insert_item(cct, item, weight, name, loc);
if (ret == 0)
return ret;
}
-int CrushWrapper::update_item(CephContext *cct, int item, float weight, string name,
- const map<string,string>& loc) // typename -> bucketname
+int CrushWrapper::update_item(
+ CephContext *cct, int item, float weight, string name,
+ const map<string,string>& loc) // typename -> bucketname
{
- if (choose_args.size() > 0) {
- ldout(cct, 1) << "update_item not implemented when choose_args is not empty" << dendl;
- return -EDOM;
- }
-
ldout(cct, 5) << "update_item item " << item << " weight " << weight
<< " name " << name << " loc " << loc << dendl;
int ret = 0;
if (!is_valid_crush_loc(cct, loc))
return -EINVAL;
+ ret = validate_weightf(weight);
+ if (ret < 0) {
+ return ret;
+ }
+
// compare quantized (fixed-point integer) weights!
int iweight = (int)(weight * (float)0x10000);
int old_iweight;
ldout(cct, 5) << "update_item " << item << " already at " << loc << dendl;
if (old_iweight != iweight) {
ldout(cct, 5) << "update_item " << item << " adjusting weight "
- << ((float)old_iweight/(float)0x10000) << " -> " << weight << dendl;
+ << ((float)old_iweight/(float)0x10000) << " -> " << weight
+ << dendl;
adjust_item_weight_in_loc(cct, item, iweight, loc);
ret = 1;
}
if (get_item_name(item) != name) {
- ldout(cct, 5) << "update_item setting " << item << " name to " << name << dendl;
+ ldout(cct, 5) << "update_item setting " << item << " name to " << name
+ << dendl;
set_item_name(item, name);
ret = 1;
}
continue;
for (unsigned i = 0; i < b->size; i++) {
if (b->items[i] == id) {
- int diff = crush_bucket_adjust_item_weight(crush, b, id, weight);
- ldout(cct, 5) << "adjust_item_weight " << id << " diff " << diff << " in bucket " << bidx << dendl;
+ int diff = bucket_adjust_item_weight(cct, b, id, weight);
+ ldout(cct, 5) << "adjust_item_weight " << id << " diff " << diff
+ << " in bucket " << bidx << dendl;
adjust_item_weight(cct, -1 - bidx, b->weight);
changed++;
}
int CrushWrapper::adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map<string,string>& loc)
{
- ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " weight " << weight << " in " << loc << dendl;
+ ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " weight " << weight
+ << " in " << loc << dendl;
int changed = 0;
- for (map<string,string>::const_iterator l = loc.begin(); l != loc.end(); ++l) {
+ for (auto l = loc.begin(); l != loc.end(); ++l) {
int bid = get_item_id(l->second);
if (!bucket_exists(bid))
continue;
crush_bucket *b = get_bucket(bid);
for (unsigned int i = 0; i < b->size; i++) {
if (b->items[i] == id) {
- int diff = crush_bucket_adjust_item_weight(crush, b, id, weight);
- ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " diff " << diff << " in bucket " << bid << dendl;
+ int diff = bucket_adjust_item_weight(cct, b, id, weight);
+ ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " diff " << diff
+ << " in bucket " << bid << dendl;
adjust_item_weight(cct, bid, b->weight);
changed++;
}
for (unsigned i=0; i<b->size; ++i) {
int n = b->items[i];
if (n >= 0) {
- crush_bucket_adjust_item_weight(crush, b, n, weight);
+ bucket_adjust_item_weight(cct, b, n, weight);
++changed;
++local_changed;
} else {
crush_bucket *b = crush->buckets[bidx];
if (b == 0)
continue;
+ if (is_shadow_item(b->id))
+ continue;
for (unsigned i = 0; i < b->size; i++)
if (b->items[i] == id) {
string parent_id = name_map[b->id];
crush_bucket *b = crush->buckets[bidx];
if (b == 0)
continue;
+ if (is_shadow_item(b->id))
+ continue;
for (unsigned i = 0; i < b->size; i++) {
if (b->items[i] == id) {
*parent = b->id;
return -ENOENT;
}
-bool CrushWrapper::class_is_in_use(int class_id)
+int CrushWrapper::get_parent_of_type(int item, int type) const
{
- for (auto &i : class_bucket)
- for (auto &j : i.second)
- if (j.first == class_id)
- return true;
-
- for (auto &i : class_map)
- if (i.second == class_id)
- return true;
-
- return false;
+ do {
+ int r = get_immediate_parent_id(item, &item);
+ if (r < 0) {
+ return 0;
+ }
+ } while (get_bucket_type(item) != type);
+ return item;
}
-int CrushWrapper::populate_classes()
+int CrushWrapper::populate_classes(
+ const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket)
{
+ // build set of previous used shadow ids
+ set<int32_t> used_ids;
+ for (auto& p : old_class_bucket) {
+ for (auto& q : p.second) {
+ used_ids.insert(q.second);
+ }
+ }
set<int> roots;
- find_roots(roots);
+ find_nonshadow_roots(roots);
for (auto &r : roots) {
if (r >= 0)
continue;
- if (id_has_class(r))
- continue;
for (auto &c : class_name) {
int clone;
- int res = device_class_clone(r, c.first, &clone);
+ int res = device_class_clone(r, c.first, old_class_bucket, used_ids,
+ &clone);
if (res < 0)
return res;
}
return 0;
}
-int CrushWrapper::cleanup_classes()
-{
- return trim_roots_with_class(true);
-}
-
int CrushWrapper::trim_roots_with_class(bool unused)
{
set<int> roots;
- find_roots(roots);
+ find_shadow_roots(roots);
for (auto &r : roots) {
if (r >= 0)
continue;
- if (!id_has_class(r))
- continue;
int res = remove_root(r, unused);
if (res)
return res;
return 0;
}
+int32_t CrushWrapper::_alloc_class_id() const {
+ if (class_name.empty()) {
+ return 0;
+ }
+ int32_t class_id = class_name.rbegin()->first + 1;
+ if (class_id >= 0) {
+ return class_id;
+ }
+ // wrapped, pick a random start and do exhaustive search
+ uint32_t upperlimit = numeric_limits<int32_t>::max();
+ upperlimit++;
+ class_id = rand() % upperlimit;
+ const auto start = class_id;
+ do {
+ if (!class_name.count(class_id)) {
+ return class_id;
+ } else {
+ class_id++;
+ if (class_id < 0) {
+ class_id = 0;
+ }
+ }
+ } while (class_id != start);
+ assert(0 == "no available class id");
+}
+
void CrushWrapper::reweight(CephContext *cct)
{
set<int> roots;
}
}
-int CrushWrapper::add_simple_ruleset_at(string name, string root_name,
- string failure_domain_name,
- string mode, int rule_type,
- int rno, ostream *err)
+int CrushWrapper::add_simple_rule_at(
+ string name, string root_name,
+ string failure_domain_name,
+ string device_class,
+ string mode, int rule_type,
+ int rno,
+ ostream *err)
{
if (rule_exists(name)) {
if (err)
return -EINVAL;
}
}
+ if (device_class.size()) {
+ if (!class_exists(device_class)) {
+ if (err)
+ *err << "device class " << device_class << " does not exist";
+ return -EINVAL;
+ }
+ int c = get_class_id(device_class);
+ if (class_bucket.count(root) == 0 ||
+ class_bucket[root].count(c) == 0) {
+ if (err)
+ *err << "root " << root_name << " has no devices with class "
+ << device_class;
+ return -EINVAL;
+ }
+ root = class_bucket[root][c];
+ }
if (mode != "firstn" && mode != "indep") {
if (err)
*err << "unknown mode " << mode;
return rno;
}
-int CrushWrapper::add_simple_ruleset(string name, string root_name,
- string failure_domain_name,
- string mode, int rule_type,
- ostream *err)
+int CrushWrapper::add_simple_rule(
+ string name, string root_name,
+ string failure_domain_name,
+ string device_class,
+ string mode, int rule_type,
+ ostream *err)
{
- return add_simple_ruleset_at(name, root_name, failure_domain_name, mode,
- rule_type, -1, err);
+ return add_simple_rule_at(name, root_name, failure_domain_name, device_class,
+ mode,
+ rule_type, -1, err);
}
int CrushWrapper::get_rule_weight_osd_map(unsigned ruleno, map<int,float> *pmap)
crush_rule *rule = crush->rules[ruleno];
// build a weight map for each TAKE in the rule, and then merge them
+
+ // FIXME: if there are multiple takes that place a different number of
+ // objects we do not take that into account. (Also, note that doing this
+ // right is also a function of the pool, since the crush rule
+ // might choose 2 + choose 2 but pool size may only be 3.)
for (unsigned i=0; i<rule->len; ++i) {
map<int,float> m;
float sum = 0;
return 0;
}
-int CrushWrapper::update_device_class(CephContext *cct, int id, const string& class_name, const string& name)
+int CrushWrapper::bucket_adjust_item_weight(CephContext *cct, crush_bucket *bucket, int item, int weight)
{
- int class_id = get_class_id(class_name);
- if (class_id < 0) {
- ldout(cct, 0) << "update_device_class class " << class_name << " does not exist " << dendl;
- return -ENOENT;
+ if (cct->_conf->osd_crush_update_weight_set) {
+ unsigned position;
+ for (position = 0; position < bucket->size; position++)
+ if (bucket->items[position] == item)
+ break;
+ assert(position != bucket->size);
+ for (auto w : choose_args) {
+ crush_choose_arg_map arg_map = w.second;
+ crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
+ for (__u32 j = 0; j < arg->weight_set_size; j++) {
+ crush_weight_set *weight_set = &arg->weight_set[j];
+ weight_set->weights[position] = weight;
+ }
+ }
}
+ return crush_bucket_adjust_item_weight(crush, bucket, item, weight);
+}
+
+int CrushWrapper::add_bucket(
+ int bucketno, int alg, int hash, int type, int size,
+ int *items, int *weights, int *idout)
+{
+ if (alg == 0) {
+ alg = get_default_bucket_alg();
+ if (alg == 0)
+ return -EINVAL;
+ }
+ crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items,
+ weights);
+ assert(b);
+ int r = crush_add_bucket(crush, bucketno, b, idout);
+ for (auto& p : choose_args) {
+ crush_choose_arg_map& cmap = p.second;
+ if (cmap.args) {
+ if ((int)cmap.size <= *idout) {
+ cmap.args = (crush_choose_arg*)realloc(
+ cmap.args,
+ sizeof(crush_choose_arg) * (*idout + 1));
+ memset(&cmap.args[cmap.size], 0,
+ sizeof(crush_choose_arg) * (*idout + 1 - cmap.size));
+ cmap.size = *idout + 1;
+ }
+ } else {
+ cmap.args = (crush_choose_arg*)calloc(sizeof(crush_choose_arg),
+ *idout + 1);
+ cmap.size = *idout + 1;
+ }
+ if (size > 0) {
+ int positions = get_choose_args_positions(cmap);
+ crush_choose_arg& carg = cmap.args[*idout];
+ carg.weight_set = (crush_weight_set*)calloc(sizeof(crush_weight_set),
+ size);
+ carg.weight_set_size = positions;
+ for (int ppos = 0; ppos < positions; ++ppos) {
+ carg.weight_set[ppos].weights = (__u32*)calloc(sizeof(__u32), size);
+ carg.weight_set[ppos].size = size;
+ for (int bpos = 0; bpos < size; ++bpos) {
+ carg.weight_set[ppos].weights[bpos] = weights[bpos];
+ }
+ }
+ }
+ }
+ return r;
+}
+
+int CrushWrapper::bucket_add_item(crush_bucket *bucket, int item, int weight)
+{
+ __u32 new_size = bucket->size + 1;
+ for (auto w : choose_args) {
+ crush_choose_arg_map arg_map = w.second;
+ crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
+ for (__u32 j = 0; j < arg->weight_set_size; j++) {
+ crush_weight_set *weight_set = &arg->weight_set[j];
+ weight_set->weights = (__u32*)realloc(weight_set->weights,
+ new_size * sizeof(__u32));
+ assert(weight_set->size + 1 == new_size);
+ weight_set->weights[weight_set->size] = weight;
+ weight_set->size = new_size;
+ }
+ if (arg->ids_size) {
+ arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
+ assert(arg->ids_size + 1 == new_size);
+ arg->ids[arg->ids_size] = item;
+ arg->ids_size = new_size;
+ }
+ }
+ return crush_bucket_add_item(crush, bucket, item, weight);
+}
+
+int CrushWrapper::bucket_remove_item(crush_bucket *bucket, int item)
+{
+ __u32 new_size = bucket->size - 1;
+ unsigned position;
+ for (position = 0; position < bucket->size; position++)
+ if (bucket->items[position] == item)
+ break;
+ assert(position != bucket->size);
+ for (auto w : choose_args) {
+ crush_choose_arg_map arg_map = w.second;
+ crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
+ for (__u32 j = 0; j < arg->weight_set_size; j++) {
+ crush_weight_set *weight_set = &arg->weight_set[j];
+ assert(weight_set->size - 1 == new_size);
+ for (__u32 k = position; k < new_size; k++)
+ weight_set->weights[k] = weight_set->weights[k+1];
+ weight_set->weights = (__u32*)realloc(weight_set->weights,
+ new_size * sizeof(__u32));
+ weight_set->size = new_size;
+ }
+ if (arg->ids_size) {
+ assert(arg->ids_size - 1 == new_size);
+ for (__u32 k = position; k < new_size; k++)
+ arg->ids[k] = arg->ids[k+1];
+ arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
+ arg->ids_size = new_size;
+ }
+ }
+ return crush_bucket_remove_item(crush, bucket, item);
+}
+
+int CrushWrapper::update_device_class(int id,
+ const string& class_name,
+ const string& name,
+ ostream *ss)
+{
+ assert(item_exists(id));
+ auto old_class_name = get_item_class(id);
+ if (old_class_name && old_class_name != class_name) {
+ *ss << "osd." << id << " has already bound to class '" << old_class_name
+ << "', can not reset class to '" << class_name << "'; "
+ << "use 'ceph osd crush rm-device-class <osd>' to "
+ << "remove old class first";
+ return -EBUSY;
+ }
+
+ int class_id = get_or_create_class_id(class_name);
if (id < 0) {
- ldout(cct, 0) << "update_device_class " << name << " id " << id << " is negative " << dendl;
+ *ss << name << " id " << id << " is negative";
return -EINVAL;
}
- assert(item_exists(id));
if (class_map.count(id) != 0 && class_map[id] == class_id) {
- ldout(cct, 5) << "update_device_class " << name << " already set to class " << class_name << dendl;
+ *ss << name << " already set to class " << class_name;
return 0;
}
return 1;
}
-int CrushWrapper::device_class_clone(int original_id, int device_class, int *clone)
+int CrushWrapper::remove_device_class(CephContext *cct, int id, ostream *ss)
+{
+ assert(ss);
+ const char *name = get_item_name(id);
+ if (!name) {
+ *ss << "osd." << id << " does not have a name";
+ return -ENOENT;
+ }
+
+ const char *class_name = get_item_class(id);
+ if (!class_name) {
+ *ss << "osd." << id << " has not been bound to a specific class yet";
+ return 0;
+ }
+ class_remove_item(id);
+
+ int r = rebuild_roots_with_classes();
+ if (r < 0) {
+ *ss << "unable to rebuild roots with class '" << class_name << "' "
+ << "of osd." << id << ": " << cpp_strerror(r);
+ return r;
+ }
+ return 0;
+}
+
+int CrushWrapper::device_class_clone(
+ int original_id, int device_class,
+ const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
+ const std::set<int32_t>& used_ids,
+ int *clone)
{
const char *item_name = get_item_name(original_id);
if (item_name == NULL)
return 0;
}
crush_bucket *original = get_bucket(original_id);
- if (original == NULL)
- return -ENOENT;
+ assert(!IS_ERR(original));
crush_bucket *copy = crush_make_bucket(crush,
original->alg,
original->hash,
original->type,
0, NULL, NULL);
- if(copy == NULL)
- return -ENOMEM;
+ assert(copy);
for (unsigned i = 0; i < original->size; i++) {
int item = original->items[i];
int weight = crush_get_bucket_item_weight(original, i);
if (item >= 0) {
if (class_map.count(item) != 0 && class_map[item] == device_class) {
- int res = crush_bucket_add_item(crush, copy, item, weight);
+ int res = bucket_add_item(copy, item, weight);
if (res)
return res;
}
} else {
int child_copy_id;
- int res = device_class_clone(item, device_class, &child_copy_id);
+ int res = device_class_clone(item, device_class, old_class_bucket,
+ used_ids, &child_copy_id);
if (res < 0)
return res;
crush_bucket *child_copy = get_bucket(child_copy_id);
- if (IS_ERR(child_copy))
- return -ENOENT;
- res = crush_bucket_add_item(crush, copy, child_copy_id, child_copy->weight);
+ assert(!IS_ERR(child_copy));
+ res = bucket_add_item(copy, child_copy_id, child_copy->weight);
if (res)
return res;
}
}
- int res = crush_add_bucket(crush, 0, copy, clone);
+ int bno = 0;
+ if (old_class_bucket.count(original_id) &&
+ old_class_bucket.at(original_id).count(device_class)) {
+ bno = old_class_bucket.at(original_id).at(device_class);
+ } else {
+ // pick a new shadow bucket id that is not used by the current map
+ // *or* any previous shadow buckets.
+ bno = -1;
+ while (((-1-bno) < crush->max_buckets && crush->buckets[-1-bno]) ||
+ used_ids.count(bno)) {
+ --bno;
+ }
+ }
+ int res = crush_add_bucket(crush, bno, copy, clone);
if (res)
return res;
+ assert(!bno || bno == *clone);
res = set_item_class(*clone, device_class);
if (res < 0)
return res;
return 0;
}
+bool CrushWrapper::_class_is_dead(int class_id)
+{
+ for (auto &p: class_map) {
+ if (p.first >= 0 && p.second == class_id) {
+ return false;
+ }
+ }
+ for (unsigned i = 0; i < crush->max_rules; ++i) {
+ crush_rule *r = crush->rules[i];
+ if (!r)
+ continue;
+ for (unsigned j = 0; j < r->len; ++j) {
+ if (r->steps[j].op == CRUSH_RULE_TAKE) {
+ int root = r->steps[j].arg1;
+ for (auto &p : class_bucket) {
+ auto& q = p.second;
+ if (q.count(class_id) && q[class_id] == root) {
+ return false;
+ }
+ }
+ }
+ }
+ }
+ // no more referenced by any devices or crush rules
+ return true;
+}
+
+void CrushWrapper::cleanup_dead_classes()
+{
+ for (auto &c: class_name) {
+ if (_class_is_dead(c.first))
+ remove_class_name(c.second);
+ }
+}
+
int CrushWrapper::rebuild_roots_with_classes()
{
+ std::map<int32_t, map<int32_t, int32_t> > old_class_bucket = class_bucket;
+ cleanup_dead_classes();
int r = trim_roots_with_class(false);
if (r < 0)
return r;
- r = populate_classes();
- if (r < 0)
- return r;
- return trim_roots_with_class(true);
+ class_bucket.clear();
+ return populate_classes(old_class_bucket);
}
void CrushWrapper::encode(bufferlist& bl, uint64_t features) const
::encode(crush->max_rules, bl);
::encode(crush->max_devices, bl);
+ bool encode_compat_choose_args = false;
+ crush_choose_arg_map arg_map;
+ memset(&arg_map, '\0', sizeof(arg_map));
+ if (has_choose_args() &&
+ !HAVE_FEATURE(features, CRUSH_CHOOSE_ARGS)) {
+ assert(!has_incompat_choose_args());
+ encode_compat_choose_args = true;
+ arg_map = choose_args.begin()->second;
+ }
+
// buckets
for (int i=0; i<crush->max_buckets; i++) {
__u32 alg = 0;
break;
case CRUSH_BUCKET_STRAW2:
- for (unsigned j=0; j<crush->buckets[i]->size; j++) {
- ::encode((reinterpret_cast<crush_bucket_straw2*>(crush->buckets[i]))->item_weights[j], bl);
+ {
+ __u32 *weights;
+ if (encode_compat_choose_args &&
+ arg_map.args[i].weight_set_size > 0) {
+ weights = arg_map.args[i].weight_set[0].weights;
+ } else {
+ weights = (reinterpret_cast<crush_bucket_straw2*>(crush->buckets[i]))->item_weights;
+ }
+ for (unsigned j=0; j<crush->buckets[i]->size; j++) {
+ ::encode(weights[j], bl);
+ }
}
break;
::encode(class_name, bl);
::encode(class_bucket, bl);
- ::encode(choose_args.size(), bl);
+ // choose args
+ __u32 size = (__u32)choose_args.size();
+ ::encode(size, bl);
for (auto c : choose_args) {
::encode(c.first, bl);
crush_choose_arg_map arg_map = c.second;
- __u32 size = 0;
+ size = 0;
for (__u32 i = 0; i < arg_map.size; i++) {
crush_choose_arg *arg = &arg_map.args[i];
if (arg->weight_set_size == 0 &&
for (auto &c : class_name)
class_rname[c.second] = c.first;
::decode(class_bucket, blp);
- cleanup_classes();
}
if (!blp.end()) {
- size_t choose_args_size;
+ __u32 choose_args_size;
::decode(choose_args_size, blp);
- for (size_t i = 0; i < choose_args_size; i++) {
+ for (__u32 i = 0; i < choose_args_size; i++) {
uint64_t choose_args_index;
::decode(choose_args_index, blp);
crush_choose_arg_map arg_map;
arg_map.size = crush->max_buckets;
- arg_map.args = (crush_choose_arg*)calloc(arg_map.size, sizeof(crush_choose_arg));
+ arg_map.args = (crush_choose_arg*)calloc(
+ arg_map.size, sizeof(crush_choose_arg));
__u32 size;
::decode(size, blp);
for (__u32 j = 0; j < size; j++) {
assert(bucket_index < arg_map.size);
crush_choose_arg *arg = &arg_map.args[bucket_index];
::decode(arg->weight_set_size, blp);
- arg->weight_set = (crush_weight_set*)calloc(arg->weight_set_size, sizeof(crush_weight_set));
- for (__u32 k = 0; k < arg->weight_set_size; k++) {
- crush_weight_set *weight_set = &arg->weight_set[k];
- ::decode(weight_set->size, blp);
- weight_set->weights = (__u32*)calloc(weight_set->size, sizeof(__u32));
- for (__u32 l = 0; l < weight_set->size; l++)
- ::decode(weight_set->weights[l], blp);
+ if (arg->weight_set_size) {
+ arg->weight_set = (crush_weight_set*)calloc(
+ arg->weight_set_size, sizeof(crush_weight_set));
+ for (__u32 k = 0; k < arg->weight_set_size; k++) {
+ crush_weight_set *weight_set = &arg->weight_set[k];
+ ::decode(weight_set->size, blp);
+ weight_set->weights = (__u32*)calloc(
+ weight_set->size, sizeof(__u32));
+ for (__u32 l = 0; l < weight_set->size; l++)
+ ::decode(weight_set->weights[l], blp);
+ }
}
::decode(arg->ids_size, blp);
- arg->ids = (int*)calloc(arg->ids_size, sizeof(int));
- for (__u32 k = 0; k < arg->ids_size; k++)
- ::decode(arg->ids[k], blp);
+ if (arg->ids_size) {
+ assert(arg->ids_size == crush->buckets[bucket_index]->size);
+ arg->ids = (__s32 *)calloc(arg->ids_size, sizeof(__s32));
+ for (__u32 k = 0; k < arg->ids_size; k++)
+ ::decode(arg->ids[k], blp);
+ }
}
choose_args[choose_args_index] = arg_map;
}
class TreeDumper {
typedef CrushTreeDumper::Item Item;
const CrushWrapper *crush;
+ const CrushTreeDumper::name_map_t& weight_set_names;
public:
- explicit TreeDumper(const CrushWrapper *crush)
- : crush(crush) {}
+ explicit TreeDumper(const CrushWrapper *crush,
+ const CrushTreeDumper::name_map_t& wsnames)
+ : crush(crush), weight_set_names(wsnames) {}
void dump(Formatter *f) {
set<int> roots;
crush->find_roots(roots);
for (set<int>::iterator root = roots.begin(); root != roots.end(); ++root) {
- dump_item(Item(*root, 0, crush->get_bucket_weightf(*root)), f);
+ dump_item(Item(*root, 0, 0, crush->get_bucket_weightf(*root)), f);
}
}
void dump_item(const Item& qi, Formatter* f) {
if (qi.is_bucket()) {
f->open_object_section("bucket");
- CrushTreeDumper::dump_item_fields(crush, qi, f);
+ CrushTreeDumper::dump_item_fields(crush, weight_set_names, qi, f);
dump_bucket_children(qi, f);
f->close_section();
} else {
f->open_object_section("device");
- CrushTreeDumper::dump_item_fields(crush, qi, f);
+ CrushTreeDumper::dump_item_fields(crush, weight_set_names, qi, f);
f->close_section();
}
}
for (int pos = 0; pos < max_pos; pos++) {
int id = crush->get_bucket_item(parent.id, pos);
float weight = crush->get_bucket_item_weightf(parent.id, pos);
- dump_item(Item(id, parent.depth + 1, weight), f);
+ dump_item(Item(id, parent.id, parent.depth + 1, weight), f);
}
f->close_section();
}
};
}
-void CrushWrapper::dump_tree(Formatter *f) const
+void CrushWrapper::dump_tree(
+ Formatter *f,
+ const CrushTreeDumper::name_map_t& weight_set_names) const
{
assert(f);
- TreeDumper(this).dump(f);
+ TreeDumper(this, weight_set_names).dump(f);
}
void CrushWrapper::dump_tunables(Formatter *f) const
}
}
-class CrushTreePlainDumper : public CrushTreeDumper::Dumper<ostream> {
-public:
- typedef CrushTreeDumper::Dumper<ostream> Parent;
-
- explicit CrushTreePlainDumper(const CrushWrapper *crush)
- : Parent(crush) {}
+void CrushWrapper::list_rules(ostream *ss) const
+{
+ for (int rule = 0; rule < get_max_rules(); rule++) {
+ if (!rule_exists(rule))
+ continue;
+ *ss << get_rule_name(rule) << "\n";
+ }
+}
- void dump(ostream *out) {
- *out << "ID\tWEIGHT\tTYPE NAME\n";
- Parent::dump(out);
+class CrushTreePlainDumper : public CrushTreeDumper::Dumper<TextTable> {
+public:
+ typedef CrushTreeDumper::Dumper<TextTable> Parent;
+
+ explicit CrushTreePlainDumper(const CrushWrapper *crush,
+ const CrushTreeDumper::name_map_t& wsnames)
+ : Parent(crush, wsnames) {}
+ explicit CrushTreePlainDumper(const CrushWrapper *crush,
+ const CrushTreeDumper::name_map_t& wsnames,
+ bool show_shadow)
+ : Parent(crush, wsnames, show_shadow) {}
+
+
+ void dump(TextTable *tbl) {
+ tbl->define_column("ID", TextTable::LEFT, TextTable::RIGHT);
+ tbl->define_column("CLASS", TextTable::LEFT, TextTable::RIGHT);
+ tbl->define_column("WEIGHT", TextTable::LEFT, TextTable::RIGHT);
+ for (auto& p : crush->choose_args) {
+ if (p.first == CrushWrapper::DEFAULT_CHOOSE_ARGS) {
+ tbl->define_column("(compat)", TextTable::LEFT, TextTable::RIGHT);
+ } else {
+ string name;
+ auto q = weight_set_names.find(p.first);
+ name = q != weight_set_names.end() ? q->second :
+ stringify(p.first);
+ tbl->define_column(name.c_str(), TextTable::LEFT, TextTable::RIGHT);
+ }
+ }
+ tbl->define_column("TYPE NAME", TextTable::LEFT, TextTable::LEFT);
+ Parent::dump(tbl);
}
protected:
- void dump_item(const CrushTreeDumper::Item &qi, ostream *out) override {
- *out << qi.id << "\t"
- << weightf_t(qi.weight) << "\t";
-
- for (int k=0; k < qi.depth; k++)
- *out << "\t";
-
- if (qi.is_bucket())
- {
- *out << crush->get_type_name(crush->get_bucket_type(qi.id)) << " "
- << crush->get_item_name(qi.id);
+ void dump_item(const CrushTreeDumper::Item &qi, TextTable *tbl) override {
+ const char *c = crush->get_item_class(qi.id);
+ if (!c)
+ c = "";
+ *tbl << qi.id
+ << c
+ << weightf_t(qi.weight);
+ for (auto& p : crush->choose_args) {
+ if (qi.parent < 0) {
+ const crush_choose_arg_map cmap = crush->choose_args_get(p.first);
+ int bidx = -1 - qi.parent;
+ const crush_bucket *b = crush->get_bucket(qi.parent);
+ if (b &&
+ bidx < (int)cmap.size &&
+ cmap.args[bidx].weight_set &&
+ cmap.args[bidx].weight_set_size >= 1) {
+ int pos;
+ for (pos = 0;
+ pos < (int)cmap.args[bidx].weight_set[0].size &&
+ b->items[pos] != qi.id;
+ ++pos) ;
+ *tbl << weightf_t((float)cmap.args[bidx].weight_set[0].weights[pos] /
+ (float)0x10000);
+ continue;
+ }
+ }
+ *tbl << "";
}
- else
- {
- *out << "osd." << qi.id;
+ ostringstream ss;
+ for (int k=0; k < qi.depth; k++) {
+ ss << " ";
+ }
+ if (qi.is_bucket()) {
+ ss << crush->get_type_name(crush->get_bucket_type(qi.id)) << " "
+ << crush->get_item_name(qi.id);
+ } else {
+ ss << "osd." << qi.id;
}
- *out << "\n";
+ *tbl << ss.str();
+ *tbl << TextTable::endrow;
}
};
public:
typedef CrushTreeDumper::FormattingDumper Parent;
- explicit CrushTreeFormattingDumper(const CrushWrapper *crush)
- : Parent(crush) {}
+ explicit CrushTreeFormattingDumper(
+ const CrushWrapper *crush,
+ const CrushTreeDumper::name_map_t& wsnames)
+ : Parent(crush, wsnames) {}
+
+ explicit CrushTreeFormattingDumper(
+ const CrushWrapper *crush,
+ const CrushTreeDumper::name_map_t& wsnames,
+ bool show_shadow)
+ : Parent(crush, wsnames, show_shadow) {}
void dump(Formatter *f) {
f->open_array_section("nodes");
};
-void CrushWrapper::dump_tree(ostream *out, Formatter *f) const
+void CrushWrapper::dump_tree(
+ ostream *out,
+ Formatter *f,
+ const CrushTreeDumper::name_map_t& weight_set_names,
+ bool show_shadow) const
{
- if (out)
- CrushTreePlainDumper(this).dump(out);
- if (f)
- CrushTreeFormattingDumper(this).dump(f);
+ if (out) {
+ TextTable tbl;
+ CrushTreePlainDumper(this, weight_set_names, show_shadow).dump(&tbl);
+ *out << tbl;
+ }
+ if (f) {
+ CrushTreeFormattingDumper(this, weight_set_names, show_shadow).dump(f);
+ }
}
void CrushWrapper::generate_test_instances(list<CrushWrapper*>& o)
// fixme
}
-int CrushWrapper::_get_osd_pool_default_crush_replicated_ruleset(CephContext *cct,
- bool quiet)
-{
- int crush_ruleset = cct->_conf->osd_pool_default_crush_rule;
- if (crush_ruleset == -1) {
- crush_ruleset = cct->_conf->osd_pool_default_crush_replicated_ruleset;
- } else if (!quiet) {
- ldout(cct, 0) << "osd_pool_default_crush_rule is deprecated "
- << "use osd_pool_default_crush_replicated_ruleset instead"
- << dendl;
- ldout(cct, 0) << "osd_pool_default_crush_rule = "
- << cct->_conf-> osd_pool_default_crush_rule << " overrides "
- << "osd_pool_default_crush_replicated_ruleset = "
- << cct->_conf->osd_pool_default_crush_replicated_ruleset
- << dendl;
- }
-
- return crush_ruleset;
-}
-
/**
* Determine the default CRUSH ruleset ID to be used with
* newly created replicated pools.
*/
int CrushWrapper::get_osd_pool_default_crush_replicated_ruleset(CephContext *cct)
{
- int crush_ruleset = _get_osd_pool_default_crush_replicated_ruleset(cct,
- false);
- if (crush_ruleset == CEPH_DEFAULT_CRUSH_REPLICATED_RULESET) {
+ int crush_ruleset = cct->_conf->osd_pool_default_crush_rule;
+ if (crush_ruleset < 0) {
crush_ruleset = find_first_ruleset(pg_pool_t::TYPE_REPLICATED);
} else if (!ruleset_exists(crush_ruleset)) {
crush_ruleset = -1; // match find_first_ruleset() retval
}
-
return crush_ruleset;
}
ldout(cct, 10) << __func__ << " cumulative_fanout " << cumulative_fanout
<< dendl;
+ // identify underful targets for each intermediate level.
+ // this serves two purposes:
+ // 1. we can tell when we are selecting a bucket that does not have any underfull
+ // devices beneath it. that means that if the current input includes an overfull
+ // device, we won't be able to find an underfull device with this parent to
+ // swap for it.
+ // 2. when we decide we should reject a bucket due to the above, this list gives us
+ // a list of peers to consider that *do* have underfull devices available.. (we
+ // are careful to pick one that has the same parent.)
+ vector<set<int>> underfull_buckets; // level -> set of buckets with >0 underfull item(s)
+ underfull_buckets.resize(stack.size() - 1);
+ for (auto osd : underfull) {
+ int item = osd;
+ for (int j = (int)stack.size() - 2; j >= 0; --j) {
+ int type = stack[j].first;
+ item = get_parent_of_type(item, type);
+ ldout(cct, 10) << __func__ << " underfull " << osd << " type " << type
+ << " is " << item << dendl;
+ underfull_buckets[j].insert(item);
+ }
+ }
+ ldout(cct, 20) << __func__ << " underfull_buckets " << underfull_buckets << dendl;
+
for (unsigned j = 0; j < stack.size(); ++j) {
int type = stack[j].first;
int fanout = stack[j].second;
auto tmpi = i;
for (auto from : w) {
ldout(cct, 10) << " from " << from << dendl;
-
+ // identify leaves under each choice. we use this to check whether any of these
+ // leaves are overfull. (if so, we need to make sure there are underfull candidates
+ // to swap for them.)
+ vector<set<int>> leaves;
+ leaves.resize(fanout);
for (int pos = 0; pos < fanout; ++pos) {
if (type > 0) {
// non-leaf
- int item = *tmpi;
- do {
- int r = get_immediate_parent_id(item, &item);
- if (r < 0) {
- ldout(cct, 10) << __func__ << " parent of " << item << " got "
- << cpp_strerror(r) << dendl;
- return -EINVAL;
- }
- } while (get_bucket_type(item) != type);
+ int item = get_parent_of_type(*tmpi, type);
o.push_back(item);
- ldout(cct, 10) << __func__ << " from " << *tmpi << " got " << item
- << " of type " << type << dendl;
int n = cum_fanout;
- while (n-- && tmpi != orig.end())
- ++tmpi;
+ while (n-- && tmpi != orig.end()) {
+ leaves[pos].insert(*tmpi++);
+ }
+ ldout(cct, 10) << __func__ << " from " << *tmpi << " got " << item
+ << " of type " << type << " over leaves " << leaves[pos] << dendl;
} else {
// leaf
bool replaced = false;
}
}
}
+ if (j + 1 < stack.size()) {
+ // check if any buckets have overfull leaves but no underfull candidates
+ for (int pos = 0; pos < fanout; ++pos) {
+ if (underfull_buckets[j].count(o[pos]) == 0) {
+ // are any leaves overfull?
+ bool any_overfull = false;
+ for (auto osd : leaves[pos]) {
+ if (overfull.count(osd)) {
+ any_overfull = true;
+ }
+ }
+ if (any_overfull) {
+ ldout(cct, 10) << " bucket " << o[pos] << " has no underfull targets and "
+ << ">0 leaves " << leaves[pos] << " is overfull; alts "
+ << underfull_buckets[j]
+ << dendl;
+ for (auto alt : underfull_buckets[j]) {
+ if (std::find(o.begin(), o.end(), alt) == o.end()) {
+ // see if alt has the same parent
+ if (j == 0 ||
+ get_parent_of_type(o[pos], stack[j-1].first) ==
+ get_parent_of_type(alt, stack[j-1].first)) {
+ if (j)
+ ldout(cct, 10) << " replacing " << o[pos]
+ << " (which has no underfull leaves) with " << alt
+ << " (same parent "
+ << get_parent_of_type(alt, stack[j-1].first) << " type "
+ << type << ")" << dendl;
+ else
+ ldout(cct, 10) << " replacing " << o[pos]
+ << " (which has no underfull leaves) with " << alt
+ << " (first level)" << dendl;
+ o[pos] = alt;
+ break;
+ } else {
+ ldout(cct, 30) << " alt " << alt << " for " << o[pos]
+ << " has different parent, skipping" << dendl;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
if (i == orig.end()) {
ldout(cct, 10) << __func__ << " end of orig, break 2" << dendl;
break;
return 0;
}
+
+
+int CrushWrapper::_choose_args_adjust_item_weight_in_bucket(
+ CephContext *cct,
+ crush_choose_arg_map cmap,
+ int bucketid,
+ int id,
+ const vector<int>& weight,
+ ostream *ss)
+{
+ int changed = 0;
+ int bidx = -1 - bucketid;
+ crush_bucket *b = crush->buckets[bidx];
+ if (bidx >= (int)cmap.size) {
+ if (ss)
+ *ss << "no weight-set for bucket " << b->id;
+ ldout(cct, 10) << __func__ << " no crush_choose_arg for bucket " << b->id
+ << dendl;
+ return 0;
+ }
+ crush_choose_arg *carg = &cmap.args[bidx];
+ if (carg->weight_set == NULL) {
+ if (ss)
+ *ss << "no weight-set for bucket " << b->id;
+ ldout(cct, 10) << __func__ << " no weight_set for bucket " << b->id
+ << dendl;
+ return 0;
+ }
+ if (carg->weight_set_size != weight.size()) {
+ if (ss)
+ *ss << "weight_set_size != " << weight.size() << " for bucket " << b->id;
+ ldout(cct, 10) << __func__ << " weight_set_size != " << weight.size()
+ << " for bucket " << b->id << dendl;
+ return 0;
+ }
+ for (unsigned i = 0; i < b->size; i++) {
+ if (b->items[i] == id) {
+ for (unsigned j = 0; j < weight.size(); ++j) {
+ carg->weight_set[j].weights[i] = weight[j];
+ }
+ ldout(cct, 5) << __func__ << " set " << id << " to " << weight
+ << " in bucket " << b->id << dendl;
+ changed++;
+ }
+ }
+ if (changed) {
+ vector<int> bucket_weight(weight.size(), 0);
+ for (unsigned i = 0; i < b->size; i++) {
+ for (unsigned j = 0; j < weight.size(); ++j) {
+ bucket_weight[j] += carg->weight_set[j].weights[i];
+ }
+ }
+ choose_args_adjust_item_weight(cct, cmap, b->id, bucket_weight, nullptr);
+ }
+ return changed;
+}
+
+int CrushWrapper::choose_args_adjust_item_weight(
+ CephContext *cct,
+ crush_choose_arg_map cmap,
+ int id,
+ const vector<int>& weight,
+ ostream *ss)
+{
+ ldout(cct, 5) << __func__ << " " << id << " weight " << weight << dendl;
+ int changed = 0;
+ for (int bidx = 0; bidx < crush->max_buckets; bidx++) {
+ crush_bucket *b = crush->buckets[bidx];
+ if (b == nullptr) {
+ continue;
+ }
+ changed += _choose_args_adjust_item_weight_in_bucket(
+ cct, cmap, b->id, id, weight, ss);
+ }
+ if (!changed) {
+ if (ss)
+ *ss << "item " << id << " not found in crush map";
+ return -ENOENT;
+ }
+ return changed;
+}