#include <errno.h>
#include <fcntl.h>
#include <iostream>
-#include <map>
+#include <memory>
+#include <optional>
#include <poll.h>
#include <sstream>
#include <stdio.h>
#include <string.h>
#include <string>
#include <sys/stat.h>
+#include <sys/sysmacros.h>
#include <sys/types.h>
+#include <tuple>
#include <unistd.h>
+#include <utility>
#include "auth/KeyRing.h"
#include "common/errno.h"
#include "common/safe_io.h"
#include "common/secret.h"
#include "common/TextTable.h"
-#include "include/assert.h"
+#include "common/Thread.h"
+#include "include/ceph_assert.h"
#include "include/stringify.h"
#include "include/krbd.h"
#include "mon/MonMap.h"
#include <blkid/blkid.h>
+#include <boost/algorithm/string/predicate.hpp>
#include <libudev.h>
-using namespace std;
+static const int UDEV_BUF_SIZE = 1 << 20; /* doubled to 2M (SO_RCVBUFFORCE) */
+static const char DEVNODE_PREFIX[] = "/dev/rbd";
+static const char SNAP_HEAD_NAME[] = "-";
-const static int POLL_TIMEOUT=120000;
+#define DEFINE_UDEV_UPTR(what) \
+struct udev_##what##_deleter { \
+ void operator()(udev_##what *p) { \
+ udev_##what##_unref(p); \
+ } \
+}; \
+using udev_##what##_uptr = \
+ std::unique_ptr<udev_##what, udev_##what##_deleter>;
+
+DEFINE_UDEV_UPTR(monitor) /* udev_monitor_uptr */
+DEFINE_UDEV_UPTR(enumerate) /* udev_enumerate_uptr */
+DEFINE_UDEV_UPTR(device) /* udev_device_uptr */
struct krbd_ctx {
CephContext *cct;
struct udev *udev;
};
-static string get_kernel_rbd_name(const char *id)
+struct krbd_spec {
+ std::string pool_name;
+ std::string nspace_name;
+ std::string image_name;
+ std::string snap_name;
+
+ krbd_spec(const char *pool_name, const char *nspace_name,
+ const char *image_name, const char *snap_name)
+ : pool_name(pool_name),
+ nspace_name(nspace_name),
+ image_name(image_name),
+ snap_name(*snap_name ? snap_name : SNAP_HEAD_NAME) { }
+
+ bool operator==(const krbd_spec& rhs) const {
+ return pool_name == rhs.pool_name &&
+ nspace_name == rhs.nspace_name &&
+ image_name == rhs.image_name &&
+ snap_name == rhs.snap_name;
+ }
+};
+
+static std::ostream& operator<<(std::ostream& os, const krbd_spec& spec)
+{
+ os << spec.pool_name << "/";
+ if (!spec.nspace_name.empty())
+ os << spec.nspace_name << "/";
+ os << spec.image_name;
+ if (spec.snap_name != SNAP_HEAD_NAME)
+ os << "@" << spec.snap_name;
+ return os;
+}
+
+static std::optional<krbd_spec> spec_from_dev(udev_device *dev)
+{
+ const char *pool_name = udev_device_get_sysattr_value(dev, "pool");
+ const char *nspace_name = udev_device_get_sysattr_value(dev, "pool_ns");
+ const char *image_name = udev_device_get_sysattr_value(dev, "name");
+ const char *snap_name = udev_device_get_sysattr_value(dev, "current_snap");
+
+ if (!pool_name || !image_name || !snap_name)
+ return std::nullopt;
+
+ return std::make_optional<krbd_spec>(
+ pool_name, nspace_name ?: "", image_name, snap_name);
+}
+
+static udev_device_uptr dev_from_list_entry(udev *udev, udev_list_entry *l)
+{
+ return udev_device_uptr(
+ udev_device_new_from_syspath(udev, udev_list_entry_get_name(l)));
+}
+
+static std::string get_devnode(udev_device *dev)
{
- return string("/dev/rbd") + id;
+ std::string devnode = DEVNODE_PREFIX;
+ devnode += udev_device_get_sysname(dev);
+ return devnode;
}
static int sysfs_write_rbd(const char *which, const string& buf)
return access("/sys/module/rbd/parameters/single_major", F_OK) == 0;
}
-static int build_map_buf(CephContext *cct, const char *pool, const char *image,
- const char *snap, const char *options, string *pbuf)
+static int build_map_buf(CephContext *cct, const krbd_spec& spec,
+ const char *options, string *pbuf)
{
ostringstream oss;
int r;
MonMap monmap;
- r = monmap.build_initial(cct, cerr);
+ r = monmap.build_initial(cct, false, cerr);
if (r < 0)
return r;
oss << " name=" << cct->_conf->name.get_id();
KeyRing keyring;
- r = keyring.from_ceph_context(cct);
- if (r == -ENOENT && !(cct->_conf->keyfile.length() ||
- cct->_conf->key.length()))
- r = 0;
- if (r < 0) {
- cerr << "rbd: failed to get secret" << std::endl;
- return r;
+ auto auth_client_required =
+ cct->_conf.get_val<std::string>("auth_client_required");
+ if (auth_client_required != "none") {
+ r = keyring.from_ceph_context(cct);
+ auto keyfile = cct->_conf.get_val<std::string>("keyfile");
+ auto key = cct->_conf.get_val<std::string>("key");
+ if (r == -ENOENT && keyfile.empty() && key.empty())
+ r = 0;
+ if (r < 0) {
+ cerr << "rbd: failed to get secret" << std::endl;
+ return r;
+ }
}
CryptoKey secret;
if (strcmp(options, "") != 0)
oss << "," << options;
+ if (!spec.nspace_name.empty())
+ oss << ",_pool_ns=" << spec.nspace_name;
- oss << " " << pool << " " << image << " " << snap;
+ oss << " " << spec.pool_name << " " << spec.image_name << " "
+ << spec.snap_name;
*pbuf = oss.str();
return 0;
}
-static int wait_for_udev_add(struct udev_monitor *mon, const char *pool,
- const char *image, const char *snap,
- string *pname)
+/*
+ * Return:
+ * <kernel error, false> - didn't map
+ * <0 or udev error, true> - mapped
+ */
+template <typename F>
+static std::pair<int, bool> wait_for_mapping(int sysfs_r_fd, udev_monitor *mon,
+ F udev_device_handler)
{
- struct udev_device *bus_dev = NULL;
+ struct pollfd fds[2];
+ int sysfs_r = INT_MAX, udev_r = INT_MAX;
+ int r;
+
+ fds[0].fd = sysfs_r_fd;
+ fds[0].events = POLLIN;
+ fds[1].fd = udev_monitor_get_fd(mon);
+ fds[1].events = POLLIN;
- /*
- * Catch /sys/devices/rbd/<id>/ and wait for the corresponding
- * block device to show up. This is necessary because rbd devices
- * and block devices aren't linked together in our sysfs layout.
- */
for (;;) {
- struct pollfd fds[1];
- struct udev_device *dev;
+ if (poll(fds, 2, -1) < 0) {
+ ceph_abort_msgf("poll failed: %d", -errno);
+ }
- fds[0].fd = udev_monitor_get_fd(mon);
- fds[0].events = POLLIN;
- if (poll(fds, 1, POLL_TIMEOUT) < 0)
- return -errno;
+ if (fds[0].revents) {
+ r = safe_read_exact(sysfs_r_fd, &sysfs_r, sizeof(sysfs_r));
+ if (r < 0) {
+ ceph_abort_msgf("safe_read_exact failed: %d", r);
+ }
+ if (sysfs_r < 0) {
+ return std::make_pair(sysfs_r, false);
+ }
+ if (udev_r != INT_MAX) {
+ ceph_assert(!sysfs_r);
+ return std::make_pair(udev_r, true);
+ }
+ fds[0].fd = -1;
+ }
- dev = udev_monitor_receive_device(mon);
- if (!dev)
- continue;
-
- if (strcmp(udev_device_get_action(dev), "add") != 0)
- goto next;
-
- if (!bus_dev) {
- if (strcmp(udev_device_get_subsystem(dev), "rbd") == 0) {
- const char *this_pool = udev_device_get_sysattr_value(dev, "pool");
- const char *this_image = udev_device_get_sysattr_value(dev, "name");
- const char *this_snap = udev_device_get_sysattr_value(dev,
- "current_snap");
-
- if (this_pool && strcmp(this_pool, pool) == 0 &&
- this_image && strcmp(this_image, image) == 0 &&
- this_snap && strcmp(this_snap, snap) == 0) {
- bus_dev = dev;
- continue;
+ if (fds[1].revents) {
+ for (;;) {
+ udev_device_uptr dev(udev_monitor_receive_device(mon));
+ if (!dev) {
+ if (errno != EINTR && errno != EAGAIN) {
+ udev_r = -errno;
+ if (sysfs_r != INT_MAX) {
+ ceph_assert(!sysfs_r);
+ return std::make_pair(udev_r, true);
+ }
+ fds[1].fd = -1;
+ }
+ break;
+ }
+ if (udev_device_handler(std::move(dev))) {
+ udev_r = 0;
+ if (sysfs_r != INT_MAX) {
+ ceph_assert(!sysfs_r);
+ return std::make_pair(udev_r, true);
+ }
+ fds[1].fd = -1;
+ break;
}
}
- } else {
- if (strcmp(udev_device_get_subsystem(dev), "block") == 0) {
- const char *major = udev_device_get_sysattr_value(bus_dev, "major");
- const char *minor = udev_device_get_sysattr_value(bus_dev, "minor");
- const char *this_major = udev_device_get_property_value(dev, "MAJOR");
- const char *this_minor = udev_device_get_property_value(dev, "MINOR");
-
- assert(!minor ^ have_minor_attr());
-
- if (strcmp(this_major, major) == 0 &&
- (!minor || strcmp(this_minor, minor) == 0)) {
- string name = get_kernel_rbd_name(udev_device_get_sysname(bus_dev));
+ }
+ }
+}
- assert(strcmp(udev_device_get_devnode(dev), name.c_str()) == 0);
- *pname = name;
+class UdevMapHandler {
+public:
+ UdevMapHandler(const krbd_spec *spec, std::string *pdevnode) :
+ m_spec(spec), m_pdevnode(pdevnode) {}
- udev_device_unref(dev);
- udev_device_unref(bus_dev);
- break;
+ /*
+ * Catch /sys/devices/rbd/<id>/ and wait for the corresponding
+ * block device to show up. This is necessary because rbd devices
+ * and block devices aren't linked together in our sysfs layout.
+ *
+ * Note that our "block" event can come before the "rbd" event, so
+ * all potential "block" events are gathered in m_block_devs before
+ * m_bus_dev is caught.
+ */
+ bool operator()(udev_device_uptr dev) {
+ if (strcmp(udev_device_get_action(dev.get()), "add")) {
+ return false;
+ }
+ if (!strcmp(udev_device_get_subsystem(dev.get()), "rbd")) {
+ if (!m_bus_dev) {
+ auto spec = spec_from_dev(dev.get());
+ if (spec && *spec == *m_spec) {
+ m_bus_dev = std::move(dev);
+ m_devnode = get_devnode(m_bus_dev.get());
}
}
+ } else if (!strcmp(udev_device_get_subsystem(dev.get()), "block")) {
+ if (boost::starts_with(udev_device_get_devnode(dev.get()),
+ DEVNODE_PREFIX)) {
+ m_block_devs.push_back(std::move(dev));
+ }
}
- next:
- udev_device_unref(dev);
+ if (m_bus_dev && !m_block_devs.empty()) {
+ for (const auto& p : m_block_devs) {
+ if (udev_device_get_devnode(p.get()) == m_devnode) {
+ ceph_assert(!strcmp(
+ udev_device_get_sysattr_value(m_bus_dev.get(), "major"),
+ udev_device_get_property_value(p.get(), "MAJOR")));
+ ceph_assert(!have_minor_attr() || !strcmp(
+ udev_device_get_sysattr_value(m_bus_dev.get(), "minor"),
+ udev_device_get_property_value(p.get(), "MINOR")));
+ *m_pdevnode = std::move(m_devnode);
+ return true;
+ }
+ }
+ m_block_devs.clear();
+ }
+ return false;
}
- return 0;
-}
+private:
+ udev_device_uptr m_bus_dev;
+ std::vector<udev_device_uptr> m_block_devs;
+ std::string m_devnode;
+ const krbd_spec *m_spec;
+ std::string *m_pdevnode;
+};
-static int do_map(struct udev *udev, const char *pool, const char *image,
- const char *snap, const string& buf, string *pname)
+static int do_map(struct udev *udev, const krbd_spec& spec, const string& buf,
+ string *pname)
{
- struct udev_monitor *mon;
+ bool mapped;
+ int fds[2];
int r;
- mon = udev_monitor_new_from_netlink(udev, "udev");
+ udev_monitor_uptr mon(udev_monitor_new_from_netlink(udev, "udev"));
if (!mon)
return -ENOMEM;
- r = udev_monitor_filter_add_match_subsystem_devtype(mon, "rbd", NULL);
- if (r < 0)
- goto out_mon;
-
- r = udev_monitor_filter_add_match_subsystem_devtype(mon, "block", "disk");
+ r = udev_monitor_filter_add_match_subsystem_devtype(mon.get(), "rbd",
+ nullptr);
if (r < 0)
- goto out_mon;
+ return r;
- r = udev_monitor_enable_receiving(mon);
+ r = udev_monitor_filter_add_match_subsystem_devtype(mon.get(), "block",
+ "disk");
if (r < 0)
- goto out_mon;
+ return r;
- r = sysfs_write_rbd_add(buf);
+ r = udev_monitor_set_receive_buffer_size(mon.get(), UDEV_BUF_SIZE);
if (r < 0) {
- cerr << "rbd: sysfs write failed" << std::endl;
- goto out_mon;
+ std::cerr << "rbd: failed to set udev buffer size: " << cpp_strerror(r)
+ << std::endl;
+ /* not fatal */
}
- r = wait_for_udev_add(mon, pool, image, snap, pname);
+ r = udev_monitor_enable_receiving(mon.get());
+ if (r < 0)
+ return r;
+
+ if (pipe2(fds, O_NONBLOCK) < 0)
+ return -errno;
+
+ auto mapper = make_named_thread("mapper", [&buf, sysfs_r_fd = fds[1]]() {
+ int sysfs_r = sysfs_write_rbd_add(buf);
+ int r = safe_write(sysfs_r_fd, &sysfs_r, sizeof(sysfs_r));
+ if (r < 0) {
+ ceph_abort_msgf("safe_write failed: %d", r);
+ }
+ });
+
+ std::tie(r, mapped) = wait_for_mapping(fds[0], mon.get(),
+ UdevMapHandler(&spec, pname));
if (r < 0) {
- cerr << "rbd: wait failed" << std::endl;
- goto out_mon;
+ if (!mapped) {
+ std::cerr << "rbd: sysfs write failed" << std::endl;
+ } else {
+ std::cerr << "rbd: udev wait failed" << std::endl;
+ /* TODO: fall back to enumeration */
+ }
}
-out_mon:
- udev_monitor_unref(mon);
+ mapper.join();
+ close(fds[0]);
+ close(fds[1]);
return r;
}
-static int map_image(struct krbd_ctx *ctx, const char *pool, const char *image,
- const char *snap, const char *options, string *pname)
+static int map_image(struct krbd_ctx *ctx, const krbd_spec& spec,
+ const char *options, string *pname)
{
string buf;
int r;
- if (strcmp(snap, "") == 0)
- snap = "-";
-
- r = build_map_buf(ctx->cct, pool, image, snap, options, &buf);
- if (r < 0)
- return r;
-
/*
* Modprobe rbd kernel module. If it supports single-major device
* number allocation scheme, make sure it's turned on.
+ *
+ * Do this before calling build_map_buf() - it wants "ceph" key type
+ * registered.
*/
if (access("/sys/bus/rbd", F_OK) != 0) {
const char *module_options = NULL;
}
}
- return do_map(ctx->udev, pool, image, snap, buf, pname);
+ r = build_map_buf(ctx->cct, spec, options, &buf);
+ if (r < 0)
+ return r;
+
+ return do_map(ctx->udev, spec, buf, pname);
}
static int devno_to_krbd_id(struct udev *udev, dev_t devno, string *pid)
{
- struct udev_enumerate *enm;
+ udev_enumerate_uptr enm;
struct udev_list_entry *l;
- struct udev_device *dev;
int r;
- enm = udev_enumerate_new(udev);
+retry:
+ enm.reset(udev_enumerate_new(udev));
if (!enm)
return -ENOMEM;
- r = udev_enumerate_add_match_subsystem(enm, "rbd");
+ r = udev_enumerate_add_match_subsystem(enm.get(), "rbd");
if (r < 0)
- goto out_enm;
+ return r;
- r = udev_enumerate_add_match_sysattr(enm, "major",
+ r = udev_enumerate_add_match_sysattr(enm.get(), "major",
stringify(major(devno)).c_str());
if (r < 0)
- goto out_enm;
+ return r;
if (have_minor_attr()) {
- r = udev_enumerate_add_match_sysattr(enm, "minor",
+ r = udev_enumerate_add_match_sysattr(enm.get(), "minor",
stringify(minor(devno)).c_str());
if (r < 0)
- goto out_enm;
+ return r;
}
- r = udev_enumerate_scan_devices(enm);
- if (r < 0)
- goto out_enm;
-
- l = udev_enumerate_get_list_entry(enm);
- if (!l) {
- r = -ENOENT;
- goto out_enm;
+ r = udev_enumerate_scan_devices(enm.get());
+ if (r < 0) {
+ if (r == -ENOENT || r == -ENODEV) {
+ std::cerr << "rbd: udev enumerate failed, retrying" << std::endl;
+ goto retry;
+ }
+ return r;
}
- /* make sure there is only one match */
- assert(!udev_list_entry_get_next(l));
+ l = udev_enumerate_get_list_entry(enm.get());
+ if (!l)
+ return -ENOENT;
- dev = udev_device_new_from_syspath(udev, udev_list_entry_get_name(l));
- if (!dev) {
- r = -ENOMEM;
- goto out_enm;
- }
+ /* make sure there is only one match */
+ ceph_assert(!udev_list_entry_get_next(l));
- *pid = udev_device_get_sysname(dev);
+ auto dev = dev_from_list_entry(udev, l);
+ if (!dev)
+ return -ENOMEM;
- udev_device_unref(dev);
-out_enm:
- udev_enumerate_unref(enm);
- return r;
+ *pid = udev_device_get_sysname(dev.get());
+ return 0;
}
-static int spec_to_devno_and_krbd_id(struct udev *udev, const char *pool,
- const char *image, const char *snap,
- dev_t *pdevno, string *pid)
+static int __enumerate_devices(struct udev *udev, const krbd_spec& spec,
+ bool match_nspace, udev_enumerate_uptr *penm)
{
- struct udev_enumerate *enm;
- struct udev_list_entry *l;
- struct udev_device *dev;
- unsigned int maj, min = 0;
- string err;
+ udev_enumerate_uptr enm;
int r;
- enm = udev_enumerate_new(udev);
+retry:
+ enm.reset(udev_enumerate_new(udev));
if (!enm)
return -ENOMEM;
- r = udev_enumerate_add_match_subsystem(enm, "rbd");
+ r = udev_enumerate_add_match_subsystem(enm.get(), "rbd");
if (r < 0)
- goto out_enm;
+ return r;
- r = udev_enumerate_add_match_sysattr(enm, "pool", pool);
+ r = udev_enumerate_add_match_sysattr(enm.get(), "pool",
+ spec.pool_name.c_str());
if (r < 0)
- goto out_enm;
+ return r;
- r = udev_enumerate_add_match_sysattr(enm, "name", image);
+ if (match_nspace) {
+ r = udev_enumerate_add_match_sysattr(enm.get(), "pool_ns",
+ spec.nspace_name.c_str());
+ } else {
+ /*
+ * Match _only_ devices that don't have pool_ns attribute.
+ * If the kernel supports namespaces, the result will be empty.
+ */
+ r = udev_enumerate_add_nomatch_sysattr(enm.get(), "pool_ns", nullptr);
+ }
if (r < 0)
- goto out_enm;
+ return r;
- r = udev_enumerate_add_match_sysattr(enm, "current_snap", snap);
+ r = udev_enumerate_add_match_sysattr(enm.get(), "name",
+ spec.image_name.c_str());
if (r < 0)
- goto out_enm;
+ return r;
- r = udev_enumerate_scan_devices(enm);
+ r = udev_enumerate_add_match_sysattr(enm.get(), "current_snap",
+ spec.snap_name.c_str());
if (r < 0)
- goto out_enm;
+ return r;
- l = udev_enumerate_get_list_entry(enm);
- if (!l) {
- r = -ENOENT;
- goto out_enm;
+ r = udev_enumerate_scan_devices(enm.get());
+ if (r < 0) {
+ if (r == -ENOENT || r == -ENODEV) {
+ std::cerr << "rbd: udev enumerate failed, retrying" << std::endl;
+ goto retry;
+ }
+ return r;
}
- dev = udev_device_new_from_syspath(udev, udev_list_entry_get_name(l));
- if (!dev) {
- r = -ENOMEM;
- goto out_enm;
+ *penm = std::move(enm);
+ return 0;
+}
+
+static int enumerate_devices(struct udev *udev, const krbd_spec& spec,
+ udev_enumerate_uptr *penm)
+{
+ udev_enumerate_uptr enm;
+ int r;
+
+ r = __enumerate_devices(udev, spec, true, &enm);
+ if (r < 0)
+ return r;
+
+ /*
+ * If no namespace is set, try again with match_nspace=false to
+ * handle older kernels. On a newer kernel the result will remain
+ * the same (i.e. empty).
+ */
+ if (!udev_enumerate_get_list_entry(enm.get()) && spec.nspace_name.empty()) {
+ r = __enumerate_devices(udev, spec, false, &enm);
+ if (r < 0)
+ return r;
}
- maj = strict_strtoll(udev_device_get_sysattr_value(dev, "major"), 10, &err);
+ *penm = std::move(enm);
+ return 0;
+}
+
+static int spec_to_devno_and_krbd_id(struct udev *udev, const krbd_spec& spec,
+ dev_t *pdevno, string *pid)
+{
+ udev_enumerate_uptr enm;
+ struct udev_list_entry *l;
+ unsigned int maj, min = 0;
+ string err;
+ int r;
+
+ r = enumerate_devices(udev, spec, &enm);
+ if (r < 0)
+ return r;
+
+ l = udev_enumerate_get_list_entry(enm.get());
+ if (!l)
+ return -ENOENT;
+
+ auto dev = dev_from_list_entry(udev, l);
+ if (!dev)
+ return -ENOMEM;
+
+ maj = strict_strtoll(udev_device_get_sysattr_value(dev.get(), "major"), 10,
+ &err);
if (!err.empty()) {
cerr << "rbd: couldn't parse major: " << err << std::endl;
- r = -EINVAL;
- goto out_dev;
+ return -EINVAL;
}
if (have_minor_attr()) {
- min = strict_strtoll(udev_device_get_sysattr_value(dev, "minor"), 10, &err);
+ min = strict_strtoll(udev_device_get_sysattr_value(dev.get(), "minor"), 10,
+ &err);
if (!err.empty()) {
cerr << "rbd: couldn't parse minor: " << err << std::endl;
- r = -EINVAL;
- goto out_dev;
+ return -EINVAL;
}
}
* ran map.
*/
if (udev_list_entry_get_next(l))
- cerr << "rbd: " << pool << "/" << image << "@" << snap
- << ": mapped more than once, unmapping "
- << get_kernel_rbd_name(udev_device_get_sysname(dev))
- << " only" << std::endl;
+ cerr << "rbd: " << spec << ": mapped more than once, unmapping "
+ << get_devnode(dev.get()) << " only" << std::endl;
*pdevno = makedev(maj, min);
- *pid = udev_device_get_sysname(dev);
-
-out_dev:
- udev_device_unref(dev);
-out_enm:
- udev_enumerate_unref(enm);
- return r;
+ *pid = udev_device_get_sysname(dev.get());
+ return 0;
}
-static string build_unmap_buf(const string& id, const char *options)
+static void append_unmap_options(std::string *buf, const char *options)
{
- string buf(id);
if (strcmp(options, "") != 0) {
- buf += " ";
- buf += options;
+ *buf += " ";
+ *buf += options;
}
- return buf;
}
-static int wait_for_udev_remove(struct udev_monitor *mon, dev_t devno)
-{
- for (;;) {
- struct pollfd fds[1];
- struct udev_device *dev;
-
- fds[0].fd = udev_monitor_get_fd(mon);
- fds[0].events = POLLIN;
- if (poll(fds, 1, POLL_TIMEOUT) < 0)
- return -errno;
+class UdevUnmapHandler {
+public:
+ UdevUnmapHandler(dev_t devno) : m_devno(devno) {}
- dev = udev_monitor_receive_device(mon);
- if (!dev)
- continue;
-
- if (strcmp(udev_device_get_action(dev), "remove") == 0 &&
- udev_device_get_devnum(dev) == devno) {
- udev_device_unref(dev);
- break;
+ bool operator()(udev_device_uptr dev) {
+ if (strcmp(udev_device_get_action(dev.get()), "remove")) {
+ return false;
}
-
- udev_device_unref(dev);
+ return udev_device_get_devnum(dev.get()) == m_devno;
}
- return 0;
-}
+private:
+ dev_t m_devno;
+};
static int do_unmap(struct udev *udev, dev_t devno, const string& buf)
{
- struct udev_monitor *mon;
+ bool unmapped;
+ int fds[2];
int r;
- mon = udev_monitor_new_from_netlink(udev, "udev");
+ udev_monitor_uptr mon(udev_monitor_new_from_netlink(udev, "udev"));
if (!mon)
return -ENOMEM;
- r = udev_monitor_filter_add_match_subsystem_devtype(mon, "block", "disk");
+ r = udev_monitor_filter_add_match_subsystem_devtype(mon.get(), "block",
+ "disk");
if (r < 0)
- goto out_mon;
+ return r;
+
+ r = udev_monitor_set_receive_buffer_size(mon.get(), UDEV_BUF_SIZE);
+ if (r < 0) {
+ std::cerr << "rbd: failed to set udev buffer size: " << cpp_strerror(r)
+ << std::endl;
+ /* not fatal */
+ }
- r = udev_monitor_enable_receiving(mon);
+ r = udev_monitor_enable_receiving(mon.get());
if (r < 0)
- goto out_mon;
+ return r;
- /*
- * On final device close(), kernel sends a block change event, in
- * response to which udev apparently runs blkid on the device. This
- * makes unmap fail with EBUSY, if issued right after final close().
- * Try to circumvent this with a retry before turning to udev.
- */
- for (int tries = 0; ; tries++) {
- r = sysfs_write_rbd_remove(buf);
- if (r >= 0) {
- break;
- } else if (r == -EBUSY && tries < 2) {
- if (!tries) {
- usleep(250 * 1000);
+ if (pipe2(fds, O_NONBLOCK) < 0)
+ return -errno;
+
+ auto unmapper = make_named_thread("unmapper", [&buf, sysfs_r_fd = fds[1]]() {
+ /*
+ * On final device close(), kernel sends a block change event, in
+ * response to which udev apparently runs blkid on the device. This
+ * makes unmap fail with EBUSY, if issued right after final close().
+ * Try to circumvent this with a retry before turning to udev.
+ */
+ for (int tries = 0; ; tries++) {
+ int sysfs_r = sysfs_write_rbd_remove(buf);
+ if (sysfs_r == -EBUSY && tries < 2) {
+ if (!tries) {
+ usleep(250 * 1000);
+ } else {
+ /*
+ * libudev does not provide the "wait until the queue is empty"
+ * API or the sufficient amount of primitives to build it from.
+ */
+ std::string err = run_cmd("udevadm", "settle", "--timeout", "10",
+ (char *)NULL);
+ if (!err.empty())
+ std::cerr << "rbd: " << err << std::endl;
+ }
} else {
- /*
- * libudev does not provide the "wait until the queue is empty"
- * API or the sufficient amount of primitives to build it from.
- */
- string err = run_cmd("udevadm", "settle", "--timeout", "10", NULL);
- if (!err.empty())
- cerr << "rbd: " << err << std::endl;
+ int r = safe_write(sysfs_r_fd, &sysfs_r, sizeof(sysfs_r));
+ if (r < 0) {
+ ceph_abort_msgf("safe_write failed: %d", r);
+ }
+ break;
}
- } else {
- cerr << "rbd: sysfs write failed" << std::endl;
- goto out_mon;
}
- }
+ });
- r = wait_for_udev_remove(mon, devno);
+ std::tie(r, unmapped) = wait_for_mapping(fds[0], mon.get(),
+ UdevUnmapHandler(devno));
if (r < 0) {
- cerr << "rbd: wait failed" << std::endl;
- goto out_mon;
+ if (!unmapped) {
+ std::cerr << "rbd: sysfs write failed" << std::endl;
+ } else {
+ std::cerr << "rbd: udev wait failed: " << cpp_strerror(r) << std::endl;
+ r = 0;
+ }
}
-out_mon:
- udev_monitor_unref(mon);
+ unmapper.join();
+ close(fds[0]);
+ close(fds[1]);
return r;
}
{
struct stat sb;
dev_t wholedevno = 0;
- string id;
+ std::string buf;
int r;
if (stat(devnode, &sb) < 0 || !S_ISBLK(sb.st_mode)) {
wholedevno = sb.st_rdev;
}
- r = devno_to_krbd_id(ctx->udev, wholedevno, &id);
- if (r < 0) {
- if (r == -ENOENT) {
- cerr << "rbd: '" << devnode << "' is not an rbd device" << std::endl;
- r = -EINVAL;
+ for (int tries = 0; ; tries++) {
+ r = devno_to_krbd_id(ctx->udev, wholedevno, &buf);
+ if (r == -ENOENT && tries < 2) {
+ usleep(250 * 1000);
+ } else {
+ if (r < 0) {
+ if (r == -ENOENT) {
+ std::cerr << "rbd: '" << devnode << "' is not an rbd device"
+ << std::endl;
+ r = -EINVAL;
+ }
+ return r;
+ }
+ if (tries) {
+ std::cerr << "rbd: udev enumerate missed a device, tries = " << tries
+ << std::endl;
+ }
+ break;
}
- return r;
}
- return do_unmap(ctx->udev, wholedevno, build_unmap_buf(id, options));
+ append_unmap_options(&buf, options);
+ return do_unmap(ctx->udev, wholedevno, buf);
}
-static int unmap_image(struct krbd_ctx *ctx, const char *pool,
- const char *image, const char *snap,
+static int unmap_image(struct krbd_ctx *ctx, const krbd_spec& spec,
const char *options)
{
dev_t devno = 0;
- string id;
+ std::string buf;
int r;
- if (!snap)
- snap = "-";
-
- r = spec_to_devno_and_krbd_id(ctx->udev, pool, image, snap, &devno, &id);
- if (r < 0) {
- if (r == -ENOENT) {
- cerr << "rbd: " << pool << "/" << image << "@" << snap
- << ": not a mapped image or snapshot" << std::endl;
- r = -EINVAL;
+ for (int tries = 0; ; tries++) {
+ r = spec_to_devno_and_krbd_id(ctx->udev, spec, &devno, &buf);
+ if (r == -ENOENT && tries < 2) {
+ usleep(250 * 1000);
+ } else {
+ if (r < 0) {
+ if (r == -ENOENT) {
+ std::cerr << "rbd: " << spec << ": not a mapped image or snapshot"
+ << std::endl;
+ r = -EINVAL;
+ }
+ return r;
+ }
+ if (tries) {
+ std::cerr << "rbd: udev enumerate missed a device, tries = " << tries
+ << std::endl;
+ }
+ break;
}
- return r;
}
- return do_unmap(ctx->udev, devno, build_unmap_buf(id, options));
+ append_unmap_options(&buf, options);
+ return do_unmap(ctx->udev, devno, buf);
}
static bool dump_one_image(Formatter *f, TextTable *tbl,
struct udev_device *dev)
{
- const char *id = udev_device_get_sysname(dev);
- const char *pool = udev_device_get_sysattr_value(dev, "pool");
- const char *image = udev_device_get_sysattr_value(dev, "name");
- const char *snap = udev_device_get_sysattr_value(dev, "current_snap");
- string kname = get_kernel_rbd_name(id);
+ auto spec = spec_from_dev(dev);
+ std::string devnode = get_devnode(dev);
+ const char *id = devnode.c_str() + sizeof(DEVNODE_PREFIX) - 1;
- if (!pool || !image || !snap)
+ if (!spec)
return false;
if (f) {
- f->open_object_section(id);
- f->dump_string("pool", pool);
- f->dump_string("name", image);
- f->dump_string("snap", snap);
- f->dump_string("device", kname);
+ f->open_object_section("device");
+ f->dump_string("id", id);
+ f->dump_string("pool", spec->pool_name);
+ f->dump_string("namespace", spec->nspace_name);
+ f->dump_string("name", spec->image_name);
+ f->dump_string("snap", spec->snap_name);
+ f->dump_string("device", devnode);
f->close_section();
} else {
- *tbl << id << pool << image << snap << kname << TextTable::endrow;
+ *tbl << id << spec->pool_name << spec->nspace_name << spec->image_name
+ << spec->snap_name << devnode << TextTable::endrow;
}
return true;
static int do_dump(struct udev *udev, Formatter *f, TextTable *tbl)
{
- struct udev_enumerate *enm;
- struct udev_list_entry *l;
+ udev_enumerate_uptr enm;
+ struct udev_list_entry *l = NULL;
bool have_output = false;
int r;
- enm = udev_enumerate_new(udev);
+retry:
+ enm.reset(udev_enumerate_new(udev));
if (!enm)
return -ENOMEM;
- r = udev_enumerate_add_match_subsystem(enm, "rbd");
- if (r < 0)
- goto out_enm;
-
- r = udev_enumerate_scan_devices(enm);
+ r = udev_enumerate_add_match_subsystem(enm.get(), "rbd");
if (r < 0)
- goto out_enm;
+ return r;
- udev_list_entry_foreach(l, udev_enumerate_get_list_entry(enm)) {
- struct udev_device *dev;
+ r = udev_enumerate_scan_devices(enm.get());
+ if (r < 0) {
+ if (r == -ENOENT || r == -ENODEV) {
+ std::cerr << "rbd: udev enumerate failed, retrying" << std::endl;
+ goto retry;
+ }
+ return r;
+ }
- dev = udev_device_new_from_syspath(udev, udev_list_entry_get_name(l));
+ udev_list_entry_foreach(l, udev_enumerate_get_list_entry(enm.get())) {
+ auto dev = dev_from_list_entry(udev, l);
if (dev) {
- have_output |= dump_one_image(f, tbl, dev);
- udev_device_unref(dev);
+ have_output |= dump_one_image(f, tbl, dev.get());
}
}
- r = have_output;
-out_enm:
- udev_enumerate_unref(enm);
- return r;
+ return have_output;
}
-int dump_images(struct krbd_ctx *ctx, Formatter *f)
+static int dump_images(struct krbd_ctx *ctx, Formatter *f)
{
TextTable tbl;
int r;
if (f) {
- f->open_object_section("devices");
+ f->open_array_section("devices");
} else {
tbl.define_column("id", TextTable::LEFT, TextTable::LEFT);
tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT);
+ tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT);
tbl.define_column("image", TextTable::LEFT, TextTable::LEFT);
tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT);
tbl.define_column("device", TextTable::LEFT, TextTable::LEFT);
return r;
}
+static int is_mapped_image(struct udev *udev, const krbd_spec& spec,
+ string *pname)
+{
+ udev_enumerate_uptr enm;
+ struct udev_list_entry *l;
+ int r;
+
+ r = enumerate_devices(udev, spec, &enm);
+ if (r < 0)
+ return r;
+
+ l = udev_enumerate_get_list_entry(enm.get());
+ if (l) {
+ auto dev = dev_from_list_entry(udev, l);
+ if (!dev)
+ return -ENOMEM;
+
+ *pname = get_devnode(dev.get());
+ return 1;
+ }
+
+ return 0; /* not mapped */
+}
+
extern "C" int krbd_create_from_context(rados_config_t cct,
struct krbd_ctx **pctx)
{
delete ctx;
}
-extern "C" int krbd_map(struct krbd_ctx *ctx, const char *pool,
- const char *image, const char *snap,
- const char *options, char **pdevnode)
+extern "C" int krbd_map(struct krbd_ctx *ctx,
+ const char *pool_name,
+ const char *nspace_name,
+ const char *image_name,
+ const char *snap_name,
+ const char *options,
+ char **pdevnode)
{
+ krbd_spec spec(pool_name, nspace_name, image_name, snap_name);
string name;
char *devnode;
int r;
- r = map_image(ctx, pool, image, snap, options, &name);
+ r = map_image(ctx, spec, options, &name);
if (r < 0)
return r;
return unmap_image(ctx, devnode, options);
}
-extern "C" int krbd_unmap_by_spec(struct krbd_ctx *ctx, const char *pool,
- const char *image, const char *snap,
+extern "C" int krbd_unmap_by_spec(struct krbd_ctx *ctx,
+ const char *pool_name,
+ const char *nspace_name,
+ const char *image_name,
+ const char *snap_name,
const char *options)
{
- return unmap_image(ctx, pool, image, snap, options);
+ krbd_spec spec(pool_name, nspace_name, image_name, snap_name);
+ return unmap_image(ctx, spec, options);
}
int krbd_showmapped(struct krbd_ctx *ctx, Formatter *f)
{
return dump_images(ctx, f);
}
+
+extern "C" int krbd_is_mapped(struct krbd_ctx *ctx,
+ const char *pool_name,
+ const char *nspace_name,
+ const char *image_name,
+ const char *snap_name,
+ char **pdevnode)
+{
+ krbd_spec spec(pool_name, nspace_name, image_name, snap_name);
+ string name;
+ char *devnode;
+ int r;
+
+ r = is_mapped_image(ctx->udev, spec, &name);
+ if (r <= 0) /* error or not mapped */
+ return r;
+
+ devnode = strdup(name.c_str());
+ if (!devnode)
+ return -ENOMEM;
+
+ *pdevnode = devnode;
+ return r;
+}