cmake_minimum_required(VERSION 2.8.11)
project(ceph)
-set(VERSION 12.1.3)
+set(VERSION 12.1.4)
if(POLICY CMP0046)
# Tweak policies (this one disables "missing" dependency warning)
# Contributor: John Coyle <dx9err@gmail.com>
# Maintainer: John Coyle <dx9err@gmail.com>
pkgname=ceph
-pkgver=12.1.3
+pkgver=12.1.4
pkgrel=0
pkgdesc="Ceph is a distributed object store and file system"
pkgusers="ceph"
xmlstarlet
yasm
"
-source="ceph-12.1.3.tar.bz2"
+source="ceph-12.1.4.tar.bz2"
subpackages="
$pkgname-base
$pkgname-common
_udevrulesdir=/etc/udev/rules.d
_python_sitelib=/usr/lib/python2.7/site-packages
-builddir=$srcdir/ceph-12.1.3
+builddir=$srcdir/ceph-12.1.4
build() {
export CEPH_BUILD_VIRTUALENV=$builddir
# main package definition
#################################################################################
Name: ceph
-Version: 12.1.3
+Version: 12.1.4
Release: 0%{?dist}
%if 0%{?fedora} || 0%{?rhel}
Epoch: 2
Group: System/Filesystems
%endif
URL: http://ceph.com/
-Source0: http://ceph.com/download/ceph-12.1.3.tar.bz2
+Source0: http://ceph.com/download/ceph-12.1.4.tar.bz2
%if 0%{?suse_version}
%if 0%{?is_opensuse}
ExclusiveArch: x86_64 aarch64 ppc64 ppc64le
# common
#################################################################################
%prep
-%autosetup -p1 -n ceph-12.1.3
+%autosetup -p1 -n ceph-12.1.4
%build
%if 0%{with cephfs_java}
+ceph (12.1.4-1) stable; urgency=medium
+
+ * New upstream release
+
+ -- Ceph Release Team <ceph-maintainers@ceph.com> Tue, 15 Aug 2017 13:45:08 +0000
+
ceph (12.1.3-1) stable; urgency=medium
* New upstream release
| **ceph** **mon_status**
-| **ceph** **osd** [ *blacklist* \| *blocked-by* \| *create* \| *new* \| *deep-scrub* \| *df* \| *down* \| *dump* \| *erasure-code-profile* \| *find* \| *getcrushmap* \| *getmap* \| *getmaxosd* \| *in* \| *lspools* \| *map* \| *metadata* \| *out* \| *pause* \| *perf* \| *pg-temp* \| *force-create-pg* \| *primary-affinity* \| *primary-temp* \| *repair* \| *reweight* \| *reweight-by-pg* \| *rm* \| *destroy* \| *purge* \| *scrub* \| *set* \| *setcrushmap* \| *setmaxosd* \| *stat* \| *tree* \| *unpause* \| *unset* ] ...
+| **ceph** **osd** [ *blacklist* \| *blocked-by* \| *create* \| *new* \| *deep-scrub* \| *df* \| *down* \| *dump* \| *erasure-code-profile* \| *find* \| *getcrushmap* \| *getmap* \| *getmaxosd* \| *in* \| *lspools* \| *map* \| *metadata* \| *ok-to-stop* \| *out* \| *pause* \| *perf* \| *pg-temp* \| *force-create-pg* \| *primary-affinity* \| *primary-temp* \| *repair* \| *reweight* \| *reweight-by-pg* \| *rm* \| *destroy* \| *purge* \| *safe-to-destroy* \| *scrub* \| *set* \| *setcrushmap* \| *setmaxosd* \| *stat* \| *tree* \| *unpause* \| *unset* ] ...
| **ceph** **osd** **crush** [ *add* \| *add-bucket* \| *create-or-move* \| *dump* \| *get-tunable* \| *link* \| *move* \| *remove* \| *rename-bucket* \| *reweight* \| *reweight-all* \| *reweight-subtree* \| *rm* \| *rule* \| *set* \| *set-tunable* \| *show-tunables* \| *tunables* \| *unlink* ] ...
ceph osd out <ids> [<ids>...]
+Subcommand ``ok-to-stop`` checks whether the list of OSD(s) can be
+stopped without immediately making data unavailable. That is, all
+data should remain readable and writeable, although data redundancy
+may be reduced as some PGs may end up in a degraded (but active)
+state. It will return a success code if it is okay to stop the
+OSD(s), or an error code and informative message if it is not or if no
+conclusion can be drawn at the current time.
+
+Usage::
+
+ ceph osd ok-to-stop <id> [<ids>...]
+
Subcommand ``pause`` pauses osd.
Usage::
ceph osd purge <id> {--yes-i-really-mean-it}
+Subcommand ``safe-to-destroy`` checks whether it is safe to remove or
+destroy an OSD without reducing overall data redundancy or durability.
+It will return a success code if it is definitely safe, or an error
+code and informative message if it is not or if no conclusion can be
+drawn at the current time.
+
+Usage::
+
+ ceph osd safe-to-destroy <id> [<ids>...]
+
Subcommand ``scrub`` initiates scrub on specified osd.
Usage::
- ``ceph osd {add,rm}-{noout,noin,nodown,noup}`` allow the
`noout`, `noin`, `nodown`, and `noup` flags to be applied to
specific OSDs.
+ - ``ceph osd safe-to-destroy <osd(s)>`` will report whether it is safe to
+ remove or destroy OSD(s) without reducing data durability or redundancy.
+ - ``ceph osd ok-to-stop <osd(s)>`` will report whether it is okay to stop
+ OSD(s) without immediately compromising availability (i.e., all PGs
+ should remain active but may be degraded).
- ``ceph log last [n]`` will output the last *n* lines of the cluster
log.
- ``ceph mgr dump`` will dump the MgrMap, including the currently active
# test set-device-class implicitly change class
ceph osd crush set-device-class hdd osd.0 || return 1
expect_failure $dir EBUSY ceph osd crush set-device-class nvme osd.0 || return 1
+
+ # test class rename
+ ceph osd crush rm-device-class all || return 1
+ ceph osd crush set-device-class class_1 all || return 1
+ ceph osd crush class ls | grep 'class_1' || return 1
+ ceph osd crush tree --show-shadow | grep 'class_1' || return 1
+ ceph osd crush rule create-replicated class_1_rule default host class_1 || return 1
+ ceph osd crush class rename class_1 class_2
+ ceph osd crush class ls | grep 'class_1' && return 1
+ ceph osd crush tree --show-shadow | grep 'class_1' && return 1
+ ceph osd crush class ls | grep 'class_2' || return 1
+ ceph osd crush tree --show-shadow | grep 'class_2' || return 1
}
main crush-classes "$@"
- .*clock.*skew.*
- clocks not synchronized
- overall HEALTH_
- - (MON_CLOCK_SKEW)
+ - \(MON_CLOCK_SKEW\)
+ - \(MGR_DOWN\)
+ - \(PG_
+ - No standby daemons available
- mon_clock_skew_check:
expect-skew: true
osd heartbeat grace: 40
mon:
mon osd crush smoke test: false
+ log-whitelist:
+ - overall HEALTH_
+# valgrind is slow.. we might get PGs stuck peering etc
+ - \(PG_
valgrind:
mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
osd: [--tool=memcheck]
data_pool_name, pgs_per_fs_pool.__str__())
self.mon_manager.raw_cluster_cmd('fs', 'new',
self.name, self.metadata_pool_name, data_pool_name)
+ self.check_pool_application(self.metadata_pool_name)
+ self.check_pool_application(data_pool_name)
# Turn off spurious standby count warnings from modifying max_mds in tests.
try:
self.mon_manager.raw_cluster_cmd('fs', 'set', self.name, 'standby_count_wanted', '0')
self.getinfo(refresh = True)
+
+ def check_pool_application(self, pool_name):
+ osd_map = self.mon_manager.get_osd_dump_json()
+ for pool in osd_map['pools']:
+ if pool['pool_name'] == pool_name:
+ if "application_metadata" in pool:
+ if not "cephfs" in pool['application_metadata']:
+ raise RuntimeError("Pool %p does not name cephfs as application!".\
+ format(pool_name))
+
+
def __del__(self):
if getattr(self._ctx, "filesystem", None) == self:
delattr(self._ctx, "filesystem")
- \(OBJECT_
- \(REQUEST_SLOW\)
- \(TOO_FEW_PGS\)
+ - \(MON_DOWN\)
ceph osd crush weight-set ls | grep '(compat)'
ceph osd crush weight-set rm-compat
+# weight set vs device classes
+ceph osd pool create cool 2
+ceph osd pool create cold 2
+ceph osd pool set cold size 2
+ceph osd crush weight-set create-compat
+ceph osd crush weight-set create cool flat
+ceph osd crush weight-set create cold positional
+ceph osd crush rm-device-class osd.0
+ceph osd crush weight-set reweight-compat osd.0 10.5
+ceph osd crush weight-set reweight cool osd.0 11.5
+ceph osd crush weight-set reweight cold osd.0 12.5 12.4
+ceph osd crush set-device-class fish osd.0
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep fish | grep 10\\.
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep fish | grep 11\\.
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep fish | grep 12\\.
+ceph osd crush rm-device-class osd.0
+ceph osd crush set-device-class globster osd.0
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 10\\.
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 11\\.
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 12\\.
+ceph osd crush weight-set reweight-compat osd.0 7.5
+ceph osd crush weight-set reweight cool osd.0 8.5
+ceph osd crush weight-set reweight cold osd.0 6.5 6.6
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 7\\.
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 8\\.
+ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 6\\.
+ceph osd crush rm-device-class osd.0
+ceph osd pool rm cool cool --yes-i-really-really-mean-it
+ceph osd pool rm cold cold --yes-i-really-really-mean-it
+ceph osd crush weight-set rm-compat
+
echo OK
-c56d9c07b342c08419bbc18dcf2a4c5fae62b9cf
-v12.1.3
+a5f84b37668fc8e03165aaf5cbb380c78e4deba4
+v12.1.4
.set_description(""),
Option("memstore_page_set", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
- .set_default(true)
+ .set_default(false)
.set_description(""),
Option("memstore_page_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
return true;
}
-int CrushWrapper::remove_root(int item, bool unused)
+int CrushWrapper::remove_root(int item)
{
- if (unused && _bucket_is_in_use(item))
- return 0;
-
crush_bucket *b = get_bucket(item);
if (IS_ERR(b)) {
// should be idempotent
for (unsigned n = 0; n < b->size; n++) {
if (b->items[n] >= 0)
continue;
- int r = remove_root(b->items[n], unused);
+ int r = remove_root(b->items[n]);
if (r < 0)
return r;
}
if (id == item) {
ldout(cct, 5) << "_remove_item_under removing item " << item
<< " from bucket " << b->id << dendl;
- bucket_remove_item(b, item);
for (auto& p : choose_args) {
// weight down each weight-set to 0 before we remove the item
vector<int> weightv(get_choose_args_positions(p.second), 0);
_choose_args_adjust_item_weight_in_bucket(
cct, p.second, b->id, item, weightv, nullptr);
}
+ bucket_remove_item(b, item);
adjust_item_weight(cct, b->id, b->weight);
ret = 0;
} else if (id < 0) {
return item;
}
+int CrushWrapper::rename_class(const string& srcname, const string& dstname)
+{
+ auto i = class_rname.find(srcname);
+ if (i == class_rname.end())
+ return -ENOENT;
+ auto j = class_rname.find(dstname);
+ if (j != class_rname.end())
+ return -EEXIST;
+
+ int class_id = i->second;
+ assert(class_name.count(class_id));
+ // rename any shadow buckets of old class name
+ for (auto &it: class_map) {
+ if (it.first < 0 && it.second == class_id) {
+ string old_name = get_item_name(it.first);
+ size_t pos = old_name.find("~");
+ assert(pos != string::npos);
+ string name_no_class = old_name.substr(0, pos);
+ string old_class_name = old_name.substr(pos + 1);
+ assert(old_class_name == srcname);
+ string new_name = name_no_class + "~" + dstname;
+ // we do not use set_item_name
+ // because the name is intentionally invalid
+ name_map[it.first] = new_name;
+ have_rmaps = false;
+ }
+ }
+
+ // rename class
+ class_rname.erase(srcname);
+ class_name.erase(class_id);
+ class_rname[dstname] = class_id;
+ class_name[class_id] = dstname;
+ return 0;
+}
+
int CrushWrapper::populate_classes(
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket)
{
used_ids.insert(q.second);
}
}
+ // accumulate weight values for each carg and bucket as we go. because it is
+ // depth first, we will have the nested bucket weights we need when we
+ // finish constructing the containing buckets.
+ map<int,map<int,vector<int>>> cmap_item_weight; // cargs -> bno -> weights
set<int> roots;
find_nonshadow_roots(roots);
for (auto &r : roots) {
for (auto &c : class_name) {
int clone;
int res = device_class_clone(r, c.first, old_class_bucket, used_ids,
- &clone);
+ &clone, &cmap_item_weight);
if (res < 0)
return res;
}
return 0;
}
-int CrushWrapper::trim_roots_with_class(bool unused)
+int CrushWrapper::trim_roots_with_class()
{
set<int> roots;
find_shadow_roots(roots);
for (auto &r : roots) {
if (r >= 0)
continue;
- int res = remove_root(r, unused);
+ int res = remove_root(r);
if (res)
return res;
}
int CrushWrapper::bucket_add_item(crush_bucket *bucket, int item, int weight)
{
__u32 new_size = bucket->size + 1;
+ int r = crush_bucket_add_item(crush, bucket, item, weight);
+ if (r < 0) {
+ return r;
+ }
for (auto w : choose_args) {
crush_choose_arg_map arg_map = w.second;
crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
arg->ids_size = new_size;
}
}
- return crush_bucket_add_item(crush, bucket, item, weight);
+ return 0;
}
int CrushWrapper::bucket_remove_item(crush_bucket *bucket, int item)
if (bucket->items[position] == item)
break;
assert(position != bucket->size);
+ int r = crush_bucket_remove_item(crush, bucket, item);
+ if (r < 0) {
+ return r;
+ }
for (auto w : choose_args) {
crush_choose_arg_map arg_map = w.second;
crush_choose_arg *arg = &arg_map.args[-1-bucket->id];
assert(weight_set->size - 1 == new_size);
for (__u32 k = position; k < new_size; k++)
weight_set->weights[k] = weight_set->weights[k+1];
- weight_set->weights = (__u32*)realloc(weight_set->weights,
- new_size * sizeof(__u32));
+ if (new_size) {
+ weight_set->weights = (__u32*)realloc(weight_set->weights,
+ new_size * sizeof(__u32));
+ } else {
+ weight_set->weights = NULL;
+ }
weight_set->size = new_size;
}
if (arg->ids_size) {
assert(arg->ids_size - 1 == new_size);
for (__u32 k = position; k < new_size; k++)
arg->ids[k] = arg->ids[k+1];
- arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
+ if (new_size) {
+ arg->ids = (__s32 *)realloc(arg->ids, new_size * sizeof(__s32));
+ } else {
+ arg->ids = NULL;
+ }
arg->ids_size = new_size;
}
}
- return crush_bucket_remove_item(crush, bucket, item);
+ return 0;
}
int CrushWrapper::update_device_class(int id,
int original_id, int device_class,
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
const std::set<int32_t>& used_ids,
- int *clone)
+ int *clone,
+ map<int,map<int,vector<int>>> *cmap_item_weight)
{
const char *item_name = get_item_name(original_id);
if (item_name == NULL)
*clone = get_item_id(copy_name);
return 0;
}
+
crush_bucket *original = get_bucket(original_id);
assert(!IS_ERR(original));
crush_bucket *copy = crush_make_bucket(crush,
original->type,
0, NULL, NULL);
assert(copy);
+
+ vector<unsigned> item_orig_pos; // new item pos -> orig item pos
for (unsigned i = 0; i < original->size; i++) {
int item = original->items[i];
int weight = crush_get_bucket_item_weight(original, i);
if (item >= 0) {
if (class_map.count(item) != 0 && class_map[item] == device_class) {
- int res = bucket_add_item(copy, item, weight);
+ int res = crush_bucket_add_item(crush, copy, item, weight);
if (res)
return res;
+ } else {
+ continue;
}
} else {
int child_copy_id;
int res = device_class_clone(item, device_class, old_class_bucket,
- used_ids, &child_copy_id);
+ used_ids, &child_copy_id,
+ cmap_item_weight);
if (res < 0)
return res;
crush_bucket *child_copy = get_bucket(child_copy_id);
assert(!IS_ERR(child_copy));
- res = bucket_add_item(copy, child_copy_id, child_copy->weight);
+ res = crush_bucket_add_item(crush, copy, child_copy_id,
+ child_copy->weight);
if (res)
return res;
}
+ item_orig_pos.push_back(i);
}
+ assert(item_orig_pos.size() == copy->size);
+
int bno = 0;
if (old_class_bucket.count(original_id) &&
old_class_bucket.at(original_id).count(device_class)) {
if (res)
return res;
assert(!bno || bno == *clone);
+
res = set_item_class(*clone, device_class);
if (res < 0)
return res;
+
// we do not use set_item_name because the name is intentionally invalid
name_map[*clone] = copy_name;
if (have_rmaps)
name_rmap[copy_name] = *clone;
class_bucket[original_id][device_class] = *clone;
+
+ // set up choose_args for the new bucket.
+ for (auto& w : choose_args) {
+ crush_choose_arg_map& cmap = w.second;
+ if (-1-bno >= (int)cmap.size) {
+ unsigned new_size = -1-bno + 1;
+ cmap.args = (crush_choose_arg*)realloc(cmap.args,
+ new_size * sizeof(cmap.args[0]));
+ memset(cmap.args + cmap.size, 0,
+ (new_size - cmap.size) * sizeof(cmap.args[0]));
+ }
+ auto& o = cmap.args[-1-original_id];
+ auto& n = cmap.args[-1-bno];
+ n.ids_size = 0; // FIXME: implement me someday
+ n.weight_set_size = o.weight_set_size;
+ n.weight_set = (crush_weight_set*)calloc(
+ n.weight_set_size, sizeof(crush_weight_set));
+ for (size_t s = 0; s < n.weight_set_size; ++s) {
+ n.weight_set[s].size = copy->size;
+ n.weight_set[s].weights = (__u32*)calloc(copy->size, sizeof(__u32));
+ }
+ for (size_t s = 0; s < n.weight_set_size; ++s) {
+ vector<int> bucket_weights(n.weight_set_size);
+ for (size_t i = 0; i < copy->size; ++i) {
+ int item = copy->items[i];
+ if (item >= 0) {
+ n.weight_set[s].weights[i] = o.weight_set[s].weights[item_orig_pos[i]];
+ } else {
+ n.weight_set[s].weights[i] = (*cmap_item_weight)[w.first][item][s];
+ }
+ bucket_weights[s] += n.weight_set[s].weights[i];
+ }
+ (*cmap_item_weight)[w.first][bno] = bucket_weights;
+ }
+ }
return 0;
}
void CrushWrapper::cleanup_dead_classes()
{
- for (auto &c: class_name) {
- if (_class_is_dead(c.first))
- remove_class_name(c.second);
+ auto p = class_name.begin();
+ while (p != class_name.end()) {
+ if (_class_is_dead(p->first)) {
+ string n = p->second;
+ ++p;
+ remove_class_name(n);
+ } else {
+ ++p;
+ }
}
}
{
std::map<int32_t, map<int32_t, int32_t> > old_class_bucket = class_bucket;
cleanup_dead_classes();
- int r = trim_roots_with_class(false);
+ int r = trim_roots_with_class();
if (r < 0)
return r;
class_bucket.clear();
* when a bucket is in use.
*
* @param item id to remove
- * @param unused true if only unused items should be removed
* @return 0 on success, negative on error
*/
- int remove_root(int item, bool unused);
+ int remove_root(int item);
/**
* remove all instances of an item nested beneath a certain point from the map
int original, int device_class,
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket,
const std::set<int32_t>& used_ids,
- int *clone);
+ int *clone,
+ map<int,map<int,vector<int>>> *cmap_item_weight);
+ int rename_class(const string& srcname, const string& dstname);
int populate_classes(
const std::map<int32_t, map<int32_t, int32_t>>& old_class_bucket);
bool _class_is_dead(int class_id);
void cleanup_dead_classes();
int rebuild_roots_with_classes();
/* remove unused roots generated for class devices */
- int trim_roots_with_class(bool unused);
+ int trim_roots_with_class();
void start_choose_profile() {
free(crush->choose_tries);
for (i = 0; i < bucket->h.size; i++) {
if (bucket->h.items[i] == item) {
- bucket->h.size--;
if (bucket->item_weights[i] < bucket->h.weight)
bucket->h.weight -= bucket->item_weights[i];
else
bucket->h.weight = 0;
- for (j = i; j < bucket->h.size; j++) {
+ for (j = i; j < bucket->h.size - 1; j++) {
bucket->h.items[j] = bucket->h.items[j+1];
bucket->item_weights[j] = bucket->item_weights[j+1];
}
}
if (i == bucket->h.size)
return -ENOENT;
-
+ bucket->h.size--;
+ if (bucket->h.size == 0) {
+ /* don't bother reallocating */
+ return 0;
+ }
void *_realloc = NULL;
if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
for (i = 0; i < bucket->h.size; i++) {
if (bucket->h.items[i] == item) {
- bucket->h.size--;
if (bucket->item_weights[i] < bucket->h.weight)
bucket->h.weight -= bucket->item_weights[i];
else
bucket->h.weight = 0;
- for (j = i; j < bucket->h.size; j++) {
+ for (j = i; j < bucket->h.size - 1; j++) {
bucket->h.items[j] = bucket->h.items[j+1];
bucket->item_weights[j] = bucket->item_weights[j+1];
}
if (i == bucket->h.size)
return -ENOENT;
+ bucket->h.size--;
+ if (!newsize) {
+ /* don't bother reallocating a 0-length array. */
+ return 0;
+ }
+
void *_realloc = NULL;
if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
});
cmdctx->reply(r, "");
return true;
+ } else if (prefix == "osd safe-to-destroy") {
+ vector<string> ids;
+ cmd_getval(g_ceph_context, cmdctx->cmdmap, "ids", ids);
+ set<int> osds;
+ int r;
+ cluster_state.with_osdmap([&](const OSDMap& osdmap) {
+ r = osdmap.parse_osd_id_list(ids, &osds, &ss);
+ });
+ if (!r && osds.empty()) {
+ ss << "must specify one or more OSDs";
+ r = -EINVAL;
+ }
+ if (r < 0) {
+ cmdctx->reply(r, ss);
+ return true;
+ }
+ set<int> active_osds, missing_stats, stored_pgs;
+ int affected_pgs = 0;
+ cluster_state.with_pgmap([&](const PGMap& pg_map) {
+ if (pg_map.num_pg_unknown > 0) {
+ ss << pg_map.num_pg_unknown << " pgs have unknown state; cannot draw"
+ << " any conclusions";
+ r = -EAGAIN;
+ return;
+ }
+ int num_active_clean = 0;
+ for (auto& p : pg_map.num_pg_by_state) {
+ unsigned want = PG_STATE_ACTIVE|PG_STATE_CLEAN;
+ if ((p.first & want) == want) {
+ num_active_clean += p.second;
+ }
+ }
+ cluster_state.with_osdmap([&](const OSDMap& osdmap) {
+ for (auto osd : osds) {
+ if (!osdmap.exists(osd)) {
+ continue; // clearly safe to destroy
+ }
+ auto q = pg_map.num_pg_by_osd.find(osd);
+ if (q != pg_map.num_pg_by_osd.end()) {
+ if (q->second.acting > 0 || q->second.up > 0) {
+ active_osds.insert(osd);
+ affected_pgs += q->second.acting + q->second.up;
+ continue;
+ }
+ }
+ if (num_active_clean < pg_map.num_pg) {
+ // all pgs aren't active+clean; we need to be careful.
+ auto p = pg_map.osd_stat.find(osd);
+ if (p == pg_map.osd_stat.end()) {
+ missing_stats.insert(osd);
+ }
+ if (p->second.num_pgs > 0) {
+ stored_pgs.insert(osd);
+ }
+ }
+ }
+ });
+ });
+ if (!r && !active_osds.empty()) {
+ ss << "OSD(s) " << active_osds << " have " << affected_pgs
+ << " pgs currently mapped to them";
+ r = -EBUSY;
+ } else if (!missing_stats.empty()) {
+ ss << "OSD(s) " << missing_stats << " have no reported stats, and not all"
+ << " PGs are active+clean; we cannot draw any conclusions";
+ r = -EAGAIN;
+ } else if (!stored_pgs.empty()) {
+ ss << "OSD(s) " << stored_pgs << " last reported they still store some PG"
+ << " data, and not all PGs are active+clean; we cannot be sure they"
+ << " aren't still needed.";
+ r = -EBUSY;
+ }
+ if (r) {
+ cmdctx->reply(r, ss);
+ return true;
+ }
+ ss << "OSD(s) " << osds << " are safe to destroy without reducing data"
+ << " durability.";
+ cmdctx->reply(0, ss);
+ return true;
+ } else if (prefix == "osd ok-to-stop") {
+ vector<string> ids;
+ cmd_getval(g_ceph_context, cmdctx->cmdmap, "ids", ids);
+ set<int> osds;
+ int r;
+ cluster_state.with_osdmap([&](const OSDMap& osdmap) {
+ r = osdmap.parse_osd_id_list(ids, &osds, &ss);
+ });
+ if (!r && osds.empty()) {
+ ss << "must specify one or more OSDs";
+ r = -EINVAL;
+ }
+ if (r < 0) {
+ cmdctx->reply(r, ss);
+ return true;
+ }
+ map<pg_t,int> pg_delta; // pgid -> net acting set size change
+ int dangerous_pgs = 0;
+ cluster_state.with_pgmap([&](const PGMap& pg_map) {
+ return cluster_state.with_osdmap([&](const OSDMap& osdmap) {
+ if (pg_map.num_pg_unknown > 0) {
+ ss << pg_map.num_pg_unknown << " pgs have unknown state; "
+ << "cannot draw any conclusions";
+ r = -EAGAIN;
+ return;
+ }
+ for (auto osd : osds) {
+ auto p = pg_map.pg_by_osd.find(osd);
+ if (p != pg_map.pg_by_osd.end()) {
+ for (auto& pgid : p->second) {
+ --pg_delta[pgid];
+ }
+ }
+ }
+ for (auto& p : pg_delta) {
+ auto q = pg_map.pg_stat.find(p.first);
+ if (q == pg_map.pg_stat.end()) {
+ ss << "missing information about " << p.first << "; cannot draw"
+ << " any conclusions";
+ r = -EAGAIN;
+ return;
+ }
+ if (!(q->second.state & PG_STATE_ACTIVE) ||
+ (q->second.state & PG_STATE_DEGRADED)) {
+ // we don't currently have a good way to tell *how* degraded
+ // a degraded PG is, so we have to assume we cannot remove
+ // any more replicas/shards.
+ ++dangerous_pgs;
+ continue;
+ }
+ const pg_pool_t *pi = osdmap.get_pg_pool(p.first.pool());
+ if (!pi) {
+ ++dangerous_pgs; // pool is creating or deleting
+ } else {
+ if (q->second.acting.size() + p.second < pi->min_size) {
+ ++dangerous_pgs;
+ }
+ }
+ }
+ });
+ });
+ if (r) {
+ cmdctx->reply(r, ss);
+ return true;
+ }
+ if (dangerous_pgs) {
+ ss << dangerous_pgs << " PGs are already degraded or might become "
+ << "unavailable";
+ cmdctx->reply(-EBUSY, ss);
+ return true;
+ }
+ ss << "OSD(s) " << osds << " are ok to stop without reducing"
+ << " availability, provided there are no other concurrent failures"
+ << " or interventions. " << pg_delta.size() << " PGs are likely to be"
+ << " degraded (but remain available) as a result.";
+ cmdctx->reply(0, ss);
+ return true;
} else if (prefix == "pg force-recovery" ||
prefix == "pg force-backfill" ||
prefix == "pg cancel-force-recovery" ||
"dry run of reweight OSDs by PG distribution [overload-percentage-for-consideration, default 120]", \
"osd", "r", "cli,rest")
+COMMAND("osd safe-to-destroy name=ids,type=CephString,n=N",
+ "check whether osd(s) can be safely destroyed without reducing data durability",
+ "osd", "r", "cli,rest")
+COMMAND("osd ok-to-stop name=ids,type=CephString,n=N",
+ "check whether osd(s) can be safely stopped without reducing immediate"\
+ " data availability", "osd", "r", "cli,rest")
+
COMMAND("osd scrub " \
"name=who,type=CephString", \
"initiate scrub on osd <who>, or use <all|any|*> to scrub all", \
pg_pool_t const *metadata_pool = mon->osdmon()->osdmap.get_pg_pool(metadata);
assert(metadata_pool != NULL); // Checked it existed above
- // we must make these checks before we even allow ourselves to *think*
- // about requesting a proposal to the osdmonitor and bail out now if
- // we believe we must. bailing out *after* we request the proposal is
- // bad business as we could have changed the osdmon's state and ending up
- // returning an error to the user.
int r = _check_pool(mon->osdmon()->osdmap, data, false, force, &ss);
if (r < 0) {
return r;
if (r < 0) {
return r;
}
-
- mon->osdmon()->do_application_enable(data,
- pg_pool_t::APPLICATION_NAME_CEPHFS);
- mon->osdmon()->do_application_enable(metadata,
- pg_pool_t::APPLICATION_NAME_CEPHFS);
+
+ // if we're running as luminous, we have to set the pool application metadata
+ if (mon->osdmon()->osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS ||
+ mon->osdmon()->pending_inc.new_require_osd_release >= CEPH_RELEASE_LUMINOUS) {
+ if (!mon->osdmon()->is_writeable()) {
+ // not allowed to write yet, so retry when we can
+ mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
+ return -EAGAIN;
+ }
+ mon->osdmon()->do_application_enable(data,
+ pg_pool_t::APPLICATION_NAME_CEPHFS);
+ mon->osdmon()->do_application_enable(metadata,
+ pg_pool_t::APPLICATION_NAME_CEPHFS);
+ mon->osdmon()->propose_pending();
+ }
// All checks passed, go ahead and create.
fsmap.create_filesystem(fs_name, metadata, data,
"remove class of the osd(s) <id> [<id>...]," \
"or use <all|any|*> to remove all.", \
"osd", "rw", "cli,rest")
+COMMAND("osd crush class rename " \
+ "name=srcname,type=CephString,goodchars=[A-Za-z0-9-_] " \
+ "name=dstname,type=CephString,goodchars=[A-Za-z0-9-_]", \
+ "rename crush device class <srcname> to <dstname>", \
+ "osd", "rw", "cli,rest")
COMMAND("osd crush create-or-move " \
"name=id,type=CephOsdName " \
"name=weight,type=CephFloat,range=0.0 " \
mdsmon()->count_metadata("ceph_version", &mds);
f->open_object_section("mds");
- for (auto& p : mon) {
+ for (auto& p : mds) {
f->dump_int(p.first.c_str(), p.second);
overall[p.first] += p.second;
}
void OSDMonitor::do_application_enable(int64_t pool_id,
const std::string &app_name)
{
- assert(paxos->is_plugged());
+ assert(paxos->is_plugged() && is_writeable());
dout(20) << __func__ << ": pool_id=" << pool_id << ", app_name=" << app_name
<< dendl;
+ assert(osdmap.require_osd_release >= CEPH_RELEASE_LUMINOUS ||
+ pending_inc.new_require_osd_release >= CEPH_RELEASE_LUMINOUS);
+
auto pp = osdmap.get_pg_pool(pool_id);
assert(pp != nullptr);
new Monitor::C_Command(mon,op, 0, rs, get_last_committed() + 1));
return true;
}
+ } else if (prefix == "osd crush class rename") {
+ string srcname, dstname;
+ if (!cmd_getval(g_ceph_context, cmdmap, "srcname", srcname)) {
+ err = -EINVAL;
+ goto reply;
+ }
+ if (!cmd_getval(g_ceph_context, cmdmap, "dstname", dstname)) {
+ err = -EINVAL;
+ goto reply;
+ }
+
+ CrushWrapper newcrush;
+ _get_pending_crush(newcrush);
+
+ if (!newcrush.class_exists(srcname)) {
+ err = -ENOENT;
+ ss << "class '" << srcname << "' does not exist";
+ goto reply;
+ }
+
+ if (newcrush.class_exists(dstname)) {
+ err = -EEXIST;
+ ss << "class '" << dstname << "' already exists";
+ goto reply;
+ }
+ err = newcrush.rename_class(srcname, dstname);
+ if (err < 0) {
+ ss << "fail to rename '" << srcname << "' to '" << dstname << "' : "
+ << cpp_strerror(err);
+ goto reply;
+ }
+
+ pending_inc.crush.clear();
+ newcrush.encode(pending_inc.crush, mon->get_quorum_con_features());
+ ss << "rename class '" << srcname << "' to '" << dstname << "'";
+ goto update;
} else if (prefix == "osd crush add-bucket") {
// os crush add-bucket <name> <type>
string name, typestr;
pending_inc.old_pg_upmap_items.insert(p.first);
}
}
+
+ // remove any choose_args for this pool
+ CrushWrapper newcrush;
+ _get_pending_crush(newcrush);
+ if (newcrush.have_choose_args(pool)) {
+ dout(10) << __func__ << " removing choose_args for pool " << pool << dendl;
+ newcrush.rm_choose_args(pool);
+ pending_inc.crush.clear();
+ newcrush.encode(pending_inc.crush, mon->get_quorum_con_features());
+ }
return 0;
}
void finish(int r) override {
assert(r >= 0);
Mutex::Locker l(paxos->mon->lock);
+ if (paxos->is_shutdown()) {
+ paxos->abort_commit();
+ return;
+ }
paxos->commit_finish();
}
};
+void Paxos::abort_commit()
+{
+ assert(commits_started > 0);
+ --commits_started;
+ if (commits_started == 0)
+ shutdown_cond.Signal();
+}
+
void Paxos::commit_start()
{
dout(10) << __func__ << " " << (last_committed+1) << dendl;
state = STATE_WRITING;
else
ceph_abort();
+ ++commits_started;
if (mon->get_quorum().size() > 1) {
// cancel timeout event
// it doesn't need to flush the store queue
assert(is_writing() || is_writing_previous());
state = STATE_REFRESH;
+ assert(commits_started > 0);
+ --commits_started;
if (do_refresh()) {
commit_proposal();
{
dout(10) << __func__ << " cancel all contexts" << dendl;
+ state = STATE_SHUTDOWN;
+
// discard pending transaction
pending_proposal.reset();
+ // Let store finish commits in progress
+ // XXX: I assume I can't use finish_contexts() because the store
+ // is going to trigger
+ while(commits_started > 0)
+ shutdown_cond.Wait(mon->lock);
+
finish_contexts(g_ceph_context, waiting_for_writeable, -ECANCELED);
finish_contexts(g_ceph_context, waiting_for_commit, -ECANCELED);
finish_contexts(g_ceph_context, waiting_for_readable, -ECANCELED);
STATE_WRITING_PREVIOUS,
// leader: refresh following a commit
STATE_REFRESH,
+ // Shutdown after WRITING or WRITING_PREVIOUS
+ STATE_SHUTDOWN
};
/**
return "writing-previous";
case STATE_REFRESH:
return "refresh";
+ case STATE_SHUTDOWN:
+ return "shutdown";
default:
return "UNKNOWN";
}
/**
* @}
*/
+ int commits_started = 0;
+
+ Cond shutdown_cond;
public:
/**
/// @return 'true' if we are refreshing an update just committed
bool is_refresh() const { return state == STATE_REFRESH; }
+ /// @return 'true' if we are in the process of shutting down
+ bool is_shutdown() const { return state == STATE_SHUTDOWN; }
+
private:
/**
* @defgroup Paxos_h_recovery_vars Common recovery-related member variables
*/
void commit_start();
void commit_finish(); ///< finish a commit after txn becomes durable
+ void abort_commit(); ///< Handle commit finish after shutdown started
/**
* Commit the new value to stable storage as being the latest available
* version.
* Child must populate this during encode_pending() by calling encode_health().
*/
health_check_map_t health_checks;
-public:
- const health_check_map_t& get_health_checks() {
- return health_checks;
- }
-
protected:
/**
* format of our state in leveldb, 0 for default
*/
version_t format_version;
+public:
+ const health_check_map_t& get_health_checks() {
+ return health_checks;
+ }
+
/**
* @defgroup PaxosService_h_callbacks Callback classes
* @{
* @}
*/
-public:
/**
* @param mn A Monitor instance
* @param p A Paxos instance
if (b.is_spanning()) {
out << " spanning " << b.id;
}
- out << " " << b.get_blob() << " " << b.get_blob_use_tracker()
- << " " << *b.shared_blob
- << ")";
+ out << " " << b.get_blob() << " " << b.get_blob_use_tracker();
+ if (b.shared_blob) {
+ out << " " << *b.shared_blob;
+ } else {
+ out << " (shared_blob=NULL)";
+ }
+ out << ")";
return out;
}
case Transaction::OP_TRUNCATE:
{
uint64_t off = op->off;
- _truncate(txc, c, o, off);
+ r = _truncate(txc, c, o, off);
}
break;
dout(15) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< dendl;
- _assign_nid(txc, o);
- int r = _do_write(txc, c, o, offset, length, bl, fadvise_flags);
- txc->write_onode(o);
-
+ int r = 0;
+ if (offset + length >= OBJECT_MAX_SIZE) {
+ r = -E2BIG;
+ } else {
+ _assign_nid(txc, o);
+ r = _do_write(txc, c, o, offset, length, bl, fadvise_flags);
+ txc->write_onode(o);
+ }
dout(10) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< " = " << r << dendl;
dout(15) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< dendl;
- _assign_nid(txc, o);
- int r = _do_zero(txc, c, o, offset, length);
+ int r = 0;
+ if (offset + length >= OBJECT_MAX_SIZE) {
+ r = -E2BIG;
+ } else {
+ _assign_nid(txc, o);
+ r = _do_zero(txc, c, o, offset, length);
+ }
dout(10) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << "~" << length << std::dec
<< " = " << r << dendl;
txc->write_onode(o);
}
-void BlueStore::_truncate(TransContext *txc,
+int BlueStore::_truncate(TransContext *txc,
CollectionRef& c,
OnodeRef& o,
uint64_t offset)
dout(15) << __func__ << " " << c->cid << " " << o->oid
<< " 0x" << std::hex << offset << std::dec
<< dendl;
- _do_truncate(txc, c, o, offset);
+ int r = 0;
+ if (offset >= OBJECT_MAX_SIZE) {
+ r = -E2BIG;
+ } else {
+ _do_truncate(txc, c, o, offset);
+ }
+ dout(10) << __func__ << " " << c->cid << " " << o->oid
+ << " 0x" << std::hex << offset << std::dec
+ << " = " << r << dendl;
+ return r;
}
int BlueStore::_do_remove(
uint64_t end = srcoff + length;
uint32_t dirty_range_begin = 0;
uint32_t dirty_range_end = 0;
+ bool src_dirty = false;
for (auto ep = oldo->extent_map.seek_lextent(srcoff);
ep != oldo->extent_map.extent_map.end();
++ep) {
// make sure it is shared
if (!blob.is_shared()) {
c->make_blob_shared(_assign_blobid(txc), e.blob);
- if (dirty_range_begin == 0 && dirty_range_end == 0) {
+ if (!src_dirty) {
+ src_dirty = true;
dirty_range_begin = e.logical_offset;
}
assert(e.logical_end() > 0);
dout(20) << __func__ << " dst " << *ne << dendl;
++n;
}
- if (dirty_range_end > dirty_range_begin) {
+ if (src_dirty) {
oldo->extent_map.dirty_range(dirty_range_begin,
dirty_range_end - dirty_range_begin);
txc->write_onode(oldo);
<< " to offset 0x" << dstoff << std::dec << dendl;
int r = 0;
+ if (srcoff + length >= OBJECT_MAX_SIZE ||
+ dstoff + length >= OBJECT_MAX_SIZE) {
+ r = -E2BIG;
+ goto out;
+ }
if (srcoff + length > oldo->onode.size) {
r = -EINVAL;
goto out;
OnodeRef o,
uint64_t offset,
set<SharedBlob*> *maybe_unshared_blobs=0);
- void _truncate(TransContext *txc,
+ int _truncate(TransContext *txc,
CollectionRef& c,
OnodeRef& o,
uint64_t offset);
}
osd_stat_t OSDService::set_osd_stat(const struct store_statfs_t &stbuf,
- vector<int>& hb_peers)
+ vector<int>& hb_peers,
+ int num_pgs)
{
uint64_t bytes = stbuf.total;
uint64_t used = bytes - stbuf.available;
osd_stat.kb = bytes >> 10;
osd_stat.kb_used = used >> 10;
osd_stat.kb_avail = avail >> 10;
+ osd_stat.num_pgs = num_pgs;
return osd_stat;
}
}
return;
}
- auto new_stat = set_osd_stat(stbuf, hb_peers);
+ auto new_stat = set_osd_stat(stbuf, hb_peers, osd->get_num_pgs());
dout(20) << "update_osd_stat " << new_stat << dendl;
assert(new_stat.kb);
float ratio = ((float)new_stat.kb_used) / ((float)new_stat.kb);
void update_osd_stat(vector<int>& hb_peers);
osd_stat_t set_osd_stat(const struct store_statfs_t &stbuf,
- vector<int>& hb_peers);
+ vector<int>& hb_peers,
+ int num_pgs);
osd_stat_t get_osd_stat() {
Mutex::Locker l(stat_lock);
++seq;
public:
PG *lookup_lock_pg(spg_t pgid);
+ int get_num_pgs() {
+ RWLock::RLocker l(pg_map_lock);
+ return pg_map.size();
+ }
+
protected:
PG *_open_lock_pg(OSDMapRef createmap,
spg_t pg, bool no_lockdep_check=false);
}
}
}
+
+int OSDMap::parse_osd_id_list(const vector<string>& ls, set<int> *out,
+ ostream *ss) const
+{
+ out->clear();
+ for (auto i = ls.begin(); i != ls.end(); ++i) {
+ if (i == ls.begin() &&
+ (*i == "any" || *i == "all" || *i == "*")) {
+ get_all_osds(*out);
+ break;
+ }
+ long osd = parse_osd_id(i->c_str(), ss);
+ if (osd < 0) {
+ *ss << "invalid osd id '" << *i << "'";
+ return -EINVAL;
+ }
+ out->insert(osd);
+ }
+ return 0;
+}
bool check_new_blacklist_entries() const { return new_blacklist_entries; }
void check_health(health_check_map_t *checks) const;
+
+ int parse_osd_id_list(const vector<string>& ls,
+ set<int> *out,
+ ostream *ss) const;
};
WRITE_CLASS_ENCODER_FEATURES(OSDMap)
WRITE_CLASS_ENCODER_FEATURES(OSDMap::Incremental)
eversion_t on_disk_rollback_info_trimmed_to = eversion_t();
ObjectMap::ObjectMapIterator p = store->get_omap_iterator(log_coll, log_oid);
map<eversion_t, hobject_t> divergent_priors;
+ bool must_rebuild = force_rebuild_missing;
missing.may_include_deletes = false;
list<pg_log_entry_t> entries;
list<pg_log_dup_t> dups;
::decode(divergent_priors, bp);
ldpp_dout(dpp, 20) << "read_log_and_missing " << divergent_priors.size()
<< " divergent_priors" << dendl;
- assert(force_rebuild_missing);
+ must_rebuild = true;
debug_verify_stored_missing = false;
} else if (p->key() == "can_rollback_to") {
::decode(on_disk_can_rollback_to, bp);
std::move(entries),
std::move(dups));
- if (force_rebuild_missing || debug_verify_stored_missing) {
+ if (must_rebuild || debug_verify_stored_missing) {
// build missing
if (debug_verify_stored_missing || info.last_complete < info.last_update) {
ldpp_dout(dpp, 10)
}
}
} else {
- assert(force_rebuild_missing);
+ assert(must_rebuild);
for (map<eversion_t, hobject_t>::reverse_iterator i =
divergent_priors.rbegin();
i != divergent_priors.rend();
}
}
- if (!force_rebuild_missing) {
+ if (!must_rebuild) {
if (clear_divergent_priors)
(*clear_divergent_priors) = false;
missing.flush();
{
f->dump_unsigned("up_from", up_from);
f->dump_unsigned("seq", seq);
+ f->dump_unsigned("num_pgs", num_pgs);
f->dump_unsigned("kb", kb);
f->dump_unsigned("kb_used", kb_used);
f->dump_unsigned("kb_avail", kb_avail);
void osd_stat_t::encode(bufferlist &bl) const
{
- ENCODE_START(6, 2, bl);
+ ENCODE_START(7, 2, bl);
::encode(kb, bl);
::encode(kb_used, bl);
::encode(kb_avail, bl);
::encode(os_perf_stat, bl);
::encode(up_from, bl);
::encode(seq, bl);
+ ::encode(num_pgs, bl);
ENCODE_FINISH(bl);
}
::decode(up_from, bl);
::decode(seq, bl);
}
+ if (struct_v >= 7) {
+ ::decode(num_pgs, bl);
+ }
DECODE_FINISH(bl);
}
epoch_t up_from = 0;
uint64_t seq = 0;
+ uint32_t num_pgs = 0;
+
osd_stat_t() : kb(0), kb_used(0), kb_avail(0),
snap_trim_queue_len(0), num_snap_trimming(0) {}
num_snap_trimming += o.num_snap_trimming;
op_queue_age_hist.add(o.op_queue_age_hist);
os_perf_stat.add(o.os_perf_stat);
+ num_pgs += o.num_pgs;
}
void sub(const osd_stat_t& o) {
kb -= o.kb;
num_snap_trimming -= o.num_snap_trimming;
op_queue_age_hist.sub(o.op_queue_age_hist);
os_perf_stat.sub(o.os_perf_stat);
+ num_pgs -= o.num_pgs;
}
void dump(Formatter *f) const;
l.num_snap_trimming == r.num_snap_trimming &&
l.hb_peers == r.hb_peers &&
l.op_queue_age_hist == r.op_queue_age_hist &&
- l.os_perf_stat == r.os_perf_stat;
+ l.os_perf_stat == r.os_perf_stat &&
+ l.num_pgs == r.num_pgs;
}
inline bool operator!=(const osd_stat_t& l, const osd_stat_t& r) {
return !(l == r);
}
}
-TEST(CrushWrapper, remove_unused_root) {
+TEST(CrushWrapper, remove_root) {
CrushWrapper c;
c.create();
c.set_type_name(1, "host");
ASSERT_TRUE(c.name_exists("default"));
ASSERT_TRUE(c.name_exists("r11"));
ASSERT_TRUE(c.name_exists("r12"));
- ASSERT_EQ(c.remove_root(c.get_item_id("default"), true), 0);
+ ASSERT_EQ(c.remove_root(c.get_item_id("default")), 0);
ASSERT_FALSE(c.name_exists("default"));
- ASSERT_TRUE(c.name_exists("r11"));
+ ASSERT_FALSE(c.name_exists("r11"));
ASSERT_FALSE(c.name_exists("r12"));
}
int root_id = c.get_item_id("default");
int clone_id;
map<int32_t, map<int32_t, int32_t>> old_class_bucket;
+ map<int,map<int,vector<int>>> cmap_item_weight; // cargs -> bno -> weights
set<int32_t> used_ids;
ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids,
- &clone_id), 0);
+ &clone_id, &cmap_item_weight), 0);
ASSERT_TRUE(c.name_exists("default"));
ASSERT_TRUE(c.name_exists("default~ssd"));
- c.trim_roots_with_class(true); // do nothing because still in use
- ASSERT_TRUE(c.name_exists("default"));
- ASSERT_TRUE(c.name_exists("default~ssd"));
- c.class_bucket.clear();
- c.trim_roots_with_class(true); // do nothing because still in use
+ c.trim_roots_with_class();
ASSERT_TRUE(c.name_exists("default"));
ASSERT_FALSE(c.name_exists("default~ssd"));
}
c.reweight(g_ceph_context);
map<int32_t, map<int32_t, int32_t>> old_class_bucket;
+ map<int,map<int,vector<int>>> cmap_item_weight; // cargs -> bno -> weights
set<int32_t> used_ids;
int root_id = c.get_item_id("default");
int clone_id;
ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids,
- &clone_id), 0);
+ &clone_id, &cmap_item_weight), 0);
ASSERT_TRUE(c.name_exists("default~ssd"));
ASSERT_EQ(clone_id, c.get_item_id("default~ssd"));
ASSERT_TRUE(c.subtree_contains(clone_id, item));
// cloning again does nothing and returns the existing one
int other_clone_id;
ASSERT_EQ(c.device_class_clone(root_id, cl, old_class_bucket, used_ids,
- &other_clone_id), 0);
+ &other_clone_id, &cmap_item_weight), 0);
ASSERT_EQ(clone_id, other_clone_id);
// invalid arguments
ASSERT_EQ(c.device_class_clone(12345, cl, old_class_bucket, used_ids,
- &other_clone_id), -ECHILD);
+ &other_clone_id, &cmap_item_weight), -ECHILD);
ASSERT_EQ(c.device_class_clone(root_id, 12345, old_class_bucket, used_ids,
- &other_clone_id), -EBADF);
+ &other_clone_id, &cmap_item_weight), -EBADF);
}
TEST(CrushWrapper, split_id_class) {
c.class_map[item] = class_id;
map<int32_t, map<int32_t, int32_t>> old_class_bucket;
+ map<int,map<int,vector<int>>> cmap_item_weight; // cargs -> bno -> weights
set<int32_t> used_ids;
int item_id = c.get_item_id("default");
int clone_id;
ASSERT_EQ(c.device_class_clone(item_id, class_id, old_class_bucket, used_ids,
- &clone_id), 0);
+ &clone_id, &cmap_item_weight), 0);
int retrieved_item_id;
int retrieved_class_id;
ASSERT_EQ(c.split_id_class(clone_id, &retrieved_item_id, &retrieved_class_id), 0);
t.remove_collection(cid);
cerr << "Invalid rm coll" << std::endl;
PrCtl unset_dumpable;
- EXPECT_DEATH(apply_transaction(store, &osr, std::move(t)), ".*Directory not empty.*");
+ EXPECT_DEATH(apply_transaction(store, &osr, std::move(t)), "");
}
{
ObjectStore::Transaction t;
t.remove(cid, hoid2);
t.remove_collection(cid);
PrCtl unset_dumpable;
- EXPECT_DEATH(apply_transaction(store, &osr, std::move(t)), ".*Directory not empty.*");
+ EXPECT_DEATH(apply_transaction(store, &osr, std::move(t)), "");
}
{
ObjectStore::Transaction t;
}
}
+TEST_F(OSDMapTest, parse_osd_id_list) {
+ set_up_map();
+ set<int> out;
+ set<int> all;
+ osdmap.get_all_osds(all);
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0"}, &out, &cout));
+ ASSERT_EQ(1, out.size());
+ ASSERT_EQ(0, *out.begin());
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"1"}, &out, &cout));
+ ASSERT_EQ(1, out.size());
+ ASSERT_EQ(1, *out.begin());
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","osd.1"}, &out, &cout));
+ ASSERT_EQ(2, out.size());
+ ASSERT_EQ(0, *out.begin());
+ ASSERT_EQ(1, *out.rbegin());
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","1"}, &out, &cout));
+ ASSERT_EQ(2, out.size());
+ ASSERT_EQ(0, *out.begin());
+ ASSERT_EQ(1, *out.rbegin());
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"*"}, &out, &cout));
+ ASSERT_EQ(all.size(), out.size());
+ ASSERT_EQ(all, out);
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"all"}, &out, &cout));
+ ASSERT_EQ(all, out);
+
+ ASSERT_EQ(0, osdmap.parse_osd_id_list({"any"}, &out, &cout));
+ ASSERT_EQ(all, out);
+
+ ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"foo"}, &out, &cout));
+ ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"-12"}, &out, &cout));
+}
+
TEST(PGTempMap, basic)
{
PGTempMap m;
ASSERT_EQ(m.find(b), m.end());
ASSERT_EQ(998u, m.size());
}
+
" getxattr <obj-name> attr\n"
" setxattr <obj-name> attr val\n"
" rmxattr <obj-name> attr\n"
-" stat objname stat the named object\n"
+" stat <obj-name> stat the named object\n"
" mapext <obj-name>\n"
" rollback <obj-name> <snap-name> roll back object to snap <snap-name>\n"
"\n"