]> git.proxmox.com Git - ceph.git/blob - ceph/src/krbd.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / krbd.cc
1 /*
2 * Ceph - scalable distributed file system
3 *
4 * Copyright (C) 2014 Inktank Storage, Inc.
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
10 *
11 */
12
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <iostream>
16 #include <memory>
17 #include <optional>
18 #include <poll.h>
19 #include <sstream>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <string>
24 #include <sys/stat.h>
25 #include <sys/sysmacros.h>
26 #include <sys/types.h>
27 #include <tuple>
28 #include <unistd.h>
29 #include <utility>
30
31 #include "auth/KeyRing.h"
32 #include "common/errno.h"
33 #include "common/Formatter.h"
34 #include "common/module.h"
35 #include "common/run_cmd.h"
36 #include "common/safe_io.h"
37 #include "common/secret.h"
38 #include "common/TextTable.h"
39 #include "common/Thread.h"
40 #include "include/ceph_assert.h"
41 #include "include/stringify.h"
42 #include "include/krbd.h"
43 #include "mon/MonMap.h"
44
45 #include <blkid/blkid.h>
46 #include <boost/algorithm/string/predicate.hpp>
47 #include <libudev.h>
48
49 static const int UDEV_BUF_SIZE = 1 << 20; /* doubled to 2M (SO_RCVBUFFORCE) */
50 static const char DEVNODE_PREFIX[] = "/dev/rbd";
51 static const char SNAP_HEAD_NAME[] = "-";
52
53 #define DEFINE_UDEV_UPTR(what) \
54 struct udev_##what##_deleter { \
55 void operator()(udev_##what *p) { \
56 udev_##what##_unref(p); \
57 } \
58 }; \
59 using udev_##what##_uptr = \
60 std::unique_ptr<udev_##what, udev_##what##_deleter>;
61
62 DEFINE_UDEV_UPTR(monitor) /* udev_monitor_uptr */
63 DEFINE_UDEV_UPTR(enumerate) /* udev_enumerate_uptr */
64 DEFINE_UDEV_UPTR(device) /* udev_device_uptr */
65
66 struct krbd_ctx {
67 CephContext *cct;
68 struct udev *udev;
69 };
70
71 struct krbd_spec {
72 std::string pool_name;
73 std::string nspace_name;
74 std::string image_name;
75 std::string snap_name;
76
77 krbd_spec(const char *pool_name, const char *nspace_name,
78 const char *image_name, const char *snap_name)
79 : pool_name(pool_name),
80 nspace_name(nspace_name),
81 image_name(image_name),
82 snap_name(*snap_name ? snap_name : SNAP_HEAD_NAME) { }
83
84 bool operator==(const krbd_spec& rhs) const {
85 return pool_name == rhs.pool_name &&
86 nspace_name == rhs.nspace_name &&
87 image_name == rhs.image_name &&
88 snap_name == rhs.snap_name;
89 }
90 };
91
92 static std::ostream& operator<<(std::ostream& os, const krbd_spec& spec)
93 {
94 os << spec.pool_name << "/";
95 if (!spec.nspace_name.empty())
96 os << spec.nspace_name << "/";
97 os << spec.image_name;
98 if (spec.snap_name != SNAP_HEAD_NAME)
99 os << "@" << spec.snap_name;
100 return os;
101 }
102
103 static std::optional<krbd_spec> spec_from_dev(udev_device *dev)
104 {
105 const char *pool_name = udev_device_get_sysattr_value(dev, "pool");
106 const char *nspace_name = udev_device_get_sysattr_value(dev, "pool_ns");
107 const char *image_name = udev_device_get_sysattr_value(dev, "name");
108 const char *snap_name = udev_device_get_sysattr_value(dev, "current_snap");
109
110 if (!pool_name || !image_name || !snap_name)
111 return std::nullopt;
112
113 return std::make_optional<krbd_spec>(
114 pool_name, nspace_name ?: "", image_name, snap_name);
115 }
116
117 static udev_device_uptr dev_from_list_entry(udev *udev, udev_list_entry *l)
118 {
119 return udev_device_uptr(
120 udev_device_new_from_syspath(udev, udev_list_entry_get_name(l)));
121 }
122
123 static std::string get_devnode(udev_device *dev)
124 {
125 std::string devnode = DEVNODE_PREFIX;
126 devnode += udev_device_get_sysname(dev);
127 return devnode;
128 }
129
130 static int sysfs_write_rbd(const char *which, const string& buf)
131 {
132 const string s = string("/sys/bus/rbd/") + which;
133 const string t = s + "_single_major";
134 int fd;
135 int r;
136
137 /*
138 * 'add' and 'add_single_major' interfaces are identical, but if rbd
139 * kernel module is new enough and is configured to use single-major
140 * scheme, 'add' is disabled in order to prevent old userspace from
141 * doing weird things at unmap time.
142 *
143 * Same goes for 'remove' vs 'remove_single_major'.
144 */
145 fd = open(t.c_str(), O_WRONLY);
146 if (fd < 0) {
147 if (errno == ENOENT) {
148 fd = open(s.c_str(), O_WRONLY);
149 if (fd < 0)
150 return -errno;
151 } else {
152 return -errno;
153 }
154 }
155
156 r = safe_write(fd, buf.c_str(), buf.size());
157
158 close(fd);
159 return r;
160 }
161
162 static int sysfs_write_rbd_add(const string& buf)
163 {
164 return sysfs_write_rbd("add", buf);
165 }
166
167 static int sysfs_write_rbd_remove(const string& buf)
168 {
169 return sysfs_write_rbd("remove", buf);
170 }
171
172 static int have_minor_attr(void)
173 {
174 /*
175 * 'minor' attribute was added as part of single_major merge, which
176 * exposed the 'single_major' parameter. 'minor' is always present,
177 * regardless of whether single-major scheme is turned on or not.
178 *
179 * (Something like ver >= KERNEL_VERSION(3, 14, 0) is a no-go because
180 * this has to work with rbd.ko backported to various kernels.)
181 */
182 return access("/sys/module/rbd/parameters/single_major", F_OK) == 0;
183 }
184
185 static int build_map_buf(CephContext *cct, const krbd_spec& spec,
186 const char *options, string *pbuf)
187 {
188 ostringstream oss;
189 int r;
190
191 MonMap monmap;
192 r = monmap.build_initial(cct, false, cerr);
193 if (r < 0)
194 return r;
195
196 list<entity_addr_t> mon_addr;
197 monmap.list_addrs(mon_addr);
198
199 for (const auto &p : mon_addr) {
200 if (oss.tellp() > 0) {
201 oss << ",";
202 }
203 oss << p.get_sockaddr();
204 }
205
206 oss << " name=" << cct->_conf->name.get_id();
207
208 KeyRing keyring;
209 auto auth_client_required =
210 cct->_conf.get_val<std::string>("auth_client_required");
211 if (auth_client_required != "none") {
212 r = keyring.from_ceph_context(cct);
213 auto keyfile = cct->_conf.get_val<std::string>("keyfile");
214 auto key = cct->_conf.get_val<std::string>("key");
215 if (r == -ENOENT && keyfile.empty() && key.empty())
216 r = 0;
217 if (r < 0) {
218 cerr << "rbd: failed to get secret" << std::endl;
219 return r;
220 }
221 }
222
223 CryptoKey secret;
224 string key_name = string("client.") + cct->_conf->name.get_id();
225 if (keyring.get_secret(cct->_conf->name, secret)) {
226 string secret_str;
227 secret.encode_base64(secret_str);
228
229 r = set_kernel_secret(secret_str.c_str(), key_name.c_str());
230 if (r >= 0) {
231 if (r == 0)
232 cerr << "rbd: warning: secret has length 0" << std::endl;
233 oss << ",key=" << key_name;
234 } else if (r == -ENODEV || r == -ENOSYS) {
235 // running against older kernel; fall back to secret= in options
236 oss << ",secret=" << secret_str;
237 } else {
238 cerr << "rbd: failed to add secret '" << key_name << "' to kernel"
239 << std::endl;
240 return r;
241 }
242 } else if (is_kernel_secret(key_name.c_str())) {
243 oss << ",key=" << key_name;
244 }
245
246 if (strcmp(options, "") != 0)
247 oss << "," << options;
248 if (!spec.nspace_name.empty())
249 oss << ",_pool_ns=" << spec.nspace_name;
250
251 oss << " " << spec.pool_name << " " << spec.image_name << " "
252 << spec.snap_name;
253
254 *pbuf = oss.str();
255 return 0;
256 }
257
258 /*
259 * Return:
260 * <kernel error, false> - didn't map
261 * <0 or udev error, true> - mapped
262 */
263 template <typename F>
264 static std::pair<int, bool> wait_for_mapping(int sysfs_r_fd, udev_monitor *mon,
265 F udev_device_handler)
266 {
267 struct pollfd fds[2];
268 int sysfs_r = INT_MAX, udev_r = INT_MAX;
269 int r;
270
271 fds[0].fd = sysfs_r_fd;
272 fds[0].events = POLLIN;
273 fds[1].fd = udev_monitor_get_fd(mon);
274 fds[1].events = POLLIN;
275
276 for (;;) {
277 if (poll(fds, 2, -1) < 0) {
278 ceph_abort_msgf("poll failed: %d", -errno);
279 }
280
281 if (fds[0].revents) {
282 r = safe_read_exact(sysfs_r_fd, &sysfs_r, sizeof(sysfs_r));
283 if (r < 0) {
284 ceph_abort_msgf("safe_read_exact failed: %d", r);
285 }
286 if (sysfs_r < 0) {
287 return std::make_pair(sysfs_r, false);
288 }
289 if (udev_r != INT_MAX) {
290 ceph_assert(!sysfs_r);
291 return std::make_pair(udev_r, true);
292 }
293 fds[0].fd = -1;
294 }
295
296 if (fds[1].revents) {
297 for (;;) {
298 udev_device_uptr dev(udev_monitor_receive_device(mon));
299 if (!dev) {
300 if (errno != EINTR && errno != EAGAIN) {
301 udev_r = -errno;
302 if (sysfs_r != INT_MAX) {
303 ceph_assert(!sysfs_r);
304 return std::make_pair(udev_r, true);
305 }
306 fds[1].fd = -1;
307 }
308 break;
309 }
310 if (udev_device_handler(std::move(dev))) {
311 udev_r = 0;
312 if (sysfs_r != INT_MAX) {
313 ceph_assert(!sysfs_r);
314 return std::make_pair(udev_r, true);
315 }
316 fds[1].fd = -1;
317 break;
318 }
319 }
320 }
321 }
322 }
323
324 class UdevMapHandler {
325 public:
326 UdevMapHandler(const krbd_spec *spec, std::string *pdevnode) :
327 m_spec(spec), m_pdevnode(pdevnode) {}
328
329 /*
330 * Catch /sys/devices/rbd/<id>/ and wait for the corresponding
331 * block device to show up. This is necessary because rbd devices
332 * and block devices aren't linked together in our sysfs layout.
333 *
334 * Note that our "block" event can come before the "rbd" event, so
335 * all potential "block" events are gathered in m_block_devs before
336 * m_bus_dev is caught.
337 */
338 bool operator()(udev_device_uptr dev) {
339 if (strcmp(udev_device_get_action(dev.get()), "add")) {
340 return false;
341 }
342 if (!strcmp(udev_device_get_subsystem(dev.get()), "rbd")) {
343 if (!m_bus_dev) {
344 auto spec = spec_from_dev(dev.get());
345 if (spec && *spec == *m_spec) {
346 m_bus_dev = std::move(dev);
347 m_devnode = get_devnode(m_bus_dev.get());
348 }
349 }
350 } else if (!strcmp(udev_device_get_subsystem(dev.get()), "block")) {
351 if (boost::starts_with(udev_device_get_devnode(dev.get()),
352 DEVNODE_PREFIX)) {
353 m_block_devs.push_back(std::move(dev));
354 }
355 }
356
357 if (m_bus_dev && !m_block_devs.empty()) {
358 for (const auto& p : m_block_devs) {
359 if (udev_device_get_devnode(p.get()) == m_devnode) {
360 ceph_assert(!strcmp(
361 udev_device_get_sysattr_value(m_bus_dev.get(), "major"),
362 udev_device_get_property_value(p.get(), "MAJOR")));
363 ceph_assert(!have_minor_attr() || !strcmp(
364 udev_device_get_sysattr_value(m_bus_dev.get(), "minor"),
365 udev_device_get_property_value(p.get(), "MINOR")));
366 *m_pdevnode = std::move(m_devnode);
367 return true;
368 }
369 }
370 m_block_devs.clear();
371 }
372 return false;
373 }
374
375 private:
376 udev_device_uptr m_bus_dev;
377 std::vector<udev_device_uptr> m_block_devs;
378 std::string m_devnode;
379 const krbd_spec *m_spec;
380 std::string *m_pdevnode;
381 };
382
383 static int do_map(struct udev *udev, const krbd_spec& spec, const string& buf,
384 string *pname)
385 {
386 bool mapped;
387 int fds[2];
388 int r;
389
390 udev_monitor_uptr mon(udev_monitor_new_from_netlink(udev, "udev"));
391 if (!mon)
392 return -ENOMEM;
393
394 r = udev_monitor_filter_add_match_subsystem_devtype(mon.get(), "rbd",
395 nullptr);
396 if (r < 0)
397 return r;
398
399 r = udev_monitor_filter_add_match_subsystem_devtype(mon.get(), "block",
400 "disk");
401 if (r < 0)
402 return r;
403
404 r = udev_monitor_set_receive_buffer_size(mon.get(), UDEV_BUF_SIZE);
405 if (r < 0) {
406 std::cerr << "rbd: failed to set udev buffer size: " << cpp_strerror(r)
407 << std::endl;
408 /* not fatal */
409 }
410
411 r = udev_monitor_enable_receiving(mon.get());
412 if (r < 0)
413 return r;
414
415 if (pipe2(fds, O_NONBLOCK) < 0)
416 return -errno;
417
418 auto mapper = make_named_thread("mapper", [&buf, sysfs_r_fd = fds[1]]() {
419 int sysfs_r = sysfs_write_rbd_add(buf);
420 int r = safe_write(sysfs_r_fd, &sysfs_r, sizeof(sysfs_r));
421 if (r < 0) {
422 ceph_abort_msgf("safe_write failed: %d", r);
423 }
424 });
425
426 std::tie(r, mapped) = wait_for_mapping(fds[0], mon.get(),
427 UdevMapHandler(&spec, pname));
428 if (r < 0) {
429 if (!mapped) {
430 std::cerr << "rbd: sysfs write failed" << std::endl;
431 } else {
432 std::cerr << "rbd: udev wait failed" << std::endl;
433 /* TODO: fall back to enumeration */
434 }
435 }
436
437 mapper.join();
438 close(fds[0]);
439 close(fds[1]);
440 return r;
441 }
442
443 static int map_image(struct krbd_ctx *ctx, const krbd_spec& spec,
444 const char *options, string *pname)
445 {
446 string buf;
447 int r;
448
449 /*
450 * Modprobe rbd kernel module. If it supports single-major device
451 * number allocation scheme, make sure it's turned on.
452 *
453 * Do this before calling build_map_buf() - it wants "ceph" key type
454 * registered.
455 */
456 if (access("/sys/bus/rbd", F_OK) != 0) {
457 const char *module_options = NULL;
458 if (module_has_param("rbd", "single_major"))
459 module_options = "single_major=Y";
460
461 r = module_load("rbd", module_options);
462 if (r) {
463 cerr << "rbd: failed to load rbd kernel module (" << r << ")"
464 << std::endl;
465 /*
466 * Ignore the error: modprobe failing doesn't necessarily prevent
467 * from working.
468 */
469 }
470 }
471
472 r = build_map_buf(ctx->cct, spec, options, &buf);
473 if (r < 0)
474 return r;
475
476 return do_map(ctx->udev, spec, buf, pname);
477 }
478
479 static int devno_to_krbd_id(struct udev *udev, dev_t devno, string *pid)
480 {
481 udev_enumerate_uptr enm;
482 struct udev_list_entry *l;
483 int r;
484
485 retry:
486 enm.reset(udev_enumerate_new(udev));
487 if (!enm)
488 return -ENOMEM;
489
490 r = udev_enumerate_add_match_subsystem(enm.get(), "rbd");
491 if (r < 0)
492 return r;
493
494 r = udev_enumerate_add_match_sysattr(enm.get(), "major",
495 stringify(major(devno)).c_str());
496 if (r < 0)
497 return r;
498
499 if (have_minor_attr()) {
500 r = udev_enumerate_add_match_sysattr(enm.get(), "minor",
501 stringify(minor(devno)).c_str());
502 if (r < 0)
503 return r;
504 }
505
506 r = udev_enumerate_scan_devices(enm.get());
507 if (r < 0) {
508 if (r == -ENOENT || r == -ENODEV) {
509 std::cerr << "rbd: udev enumerate failed, retrying" << std::endl;
510 goto retry;
511 }
512 return r;
513 }
514
515 l = udev_enumerate_get_list_entry(enm.get());
516 if (!l)
517 return -ENOENT;
518
519 /* make sure there is only one match */
520 ceph_assert(!udev_list_entry_get_next(l));
521
522 auto dev = dev_from_list_entry(udev, l);
523 if (!dev)
524 return -ENOMEM;
525
526 *pid = udev_device_get_sysname(dev.get());
527 return 0;
528 }
529
530 static int __enumerate_devices(struct udev *udev, const krbd_spec& spec,
531 bool match_nspace, udev_enumerate_uptr *penm)
532 {
533 udev_enumerate_uptr enm;
534 int r;
535
536 retry:
537 enm.reset(udev_enumerate_new(udev));
538 if (!enm)
539 return -ENOMEM;
540
541 r = udev_enumerate_add_match_subsystem(enm.get(), "rbd");
542 if (r < 0)
543 return r;
544
545 r = udev_enumerate_add_match_sysattr(enm.get(), "pool",
546 spec.pool_name.c_str());
547 if (r < 0)
548 return r;
549
550 if (match_nspace) {
551 r = udev_enumerate_add_match_sysattr(enm.get(), "pool_ns",
552 spec.nspace_name.c_str());
553 } else {
554 /*
555 * Match _only_ devices that don't have pool_ns attribute.
556 * If the kernel supports namespaces, the result will be empty.
557 */
558 r = udev_enumerate_add_nomatch_sysattr(enm.get(), "pool_ns", nullptr);
559 }
560 if (r < 0)
561 return r;
562
563 r = udev_enumerate_add_match_sysattr(enm.get(), "name",
564 spec.image_name.c_str());
565 if (r < 0)
566 return r;
567
568 r = udev_enumerate_add_match_sysattr(enm.get(), "current_snap",
569 spec.snap_name.c_str());
570 if (r < 0)
571 return r;
572
573 r = udev_enumerate_scan_devices(enm.get());
574 if (r < 0) {
575 if (r == -ENOENT || r == -ENODEV) {
576 std::cerr << "rbd: udev enumerate failed, retrying" << std::endl;
577 goto retry;
578 }
579 return r;
580 }
581
582 *penm = std::move(enm);
583 return 0;
584 }
585
586 static int enumerate_devices(struct udev *udev, const krbd_spec& spec,
587 udev_enumerate_uptr *penm)
588 {
589 udev_enumerate_uptr enm;
590 int r;
591
592 r = __enumerate_devices(udev, spec, true, &enm);
593 if (r < 0)
594 return r;
595
596 /*
597 * If no namespace is set, try again with match_nspace=false to
598 * handle older kernels. On a newer kernel the result will remain
599 * the same (i.e. empty).
600 */
601 if (!udev_enumerate_get_list_entry(enm.get()) && spec.nspace_name.empty()) {
602 r = __enumerate_devices(udev, spec, false, &enm);
603 if (r < 0)
604 return r;
605 }
606
607 *penm = std::move(enm);
608 return 0;
609 }
610
611 static int spec_to_devno_and_krbd_id(struct udev *udev, const krbd_spec& spec,
612 dev_t *pdevno, string *pid)
613 {
614 udev_enumerate_uptr enm;
615 struct udev_list_entry *l;
616 unsigned int maj, min = 0;
617 string err;
618 int r;
619
620 r = enumerate_devices(udev, spec, &enm);
621 if (r < 0)
622 return r;
623
624 l = udev_enumerate_get_list_entry(enm.get());
625 if (!l)
626 return -ENOENT;
627
628 auto dev = dev_from_list_entry(udev, l);
629 if (!dev)
630 return -ENOMEM;
631
632 maj = strict_strtoll(udev_device_get_sysattr_value(dev.get(), "major"), 10,
633 &err);
634 if (!err.empty()) {
635 cerr << "rbd: couldn't parse major: " << err << std::endl;
636 return -EINVAL;
637 }
638 if (have_minor_attr()) {
639 min = strict_strtoll(udev_device_get_sysattr_value(dev.get(), "minor"), 10,
640 &err);
641 if (!err.empty()) {
642 cerr << "rbd: couldn't parse minor: " << err << std::endl;
643 return -EINVAL;
644 }
645 }
646
647 /*
648 * If an image is mapped more than once don't bother trying to unmap
649 * all devices - let users run unmap the same number of times they
650 * ran map.
651 */
652 if (udev_list_entry_get_next(l))
653 cerr << "rbd: " << spec << ": mapped more than once, unmapping "
654 << get_devnode(dev.get()) << " only" << std::endl;
655
656 *pdevno = makedev(maj, min);
657 *pid = udev_device_get_sysname(dev.get());
658 return 0;
659 }
660
661 static void append_unmap_options(std::string *buf, const char *options)
662 {
663 if (strcmp(options, "") != 0) {
664 *buf += " ";
665 *buf += options;
666 }
667 }
668
669 class UdevUnmapHandler {
670 public:
671 UdevUnmapHandler(dev_t devno) : m_devno(devno) {}
672
673 bool operator()(udev_device_uptr dev) {
674 if (strcmp(udev_device_get_action(dev.get()), "remove")) {
675 return false;
676 }
677 return udev_device_get_devnum(dev.get()) == m_devno;
678 }
679
680 private:
681 dev_t m_devno;
682 };
683
684 static int do_unmap(struct udev *udev, dev_t devno, const string& buf)
685 {
686 bool unmapped;
687 int fds[2];
688 int r;
689
690 udev_monitor_uptr mon(udev_monitor_new_from_netlink(udev, "udev"));
691 if (!mon)
692 return -ENOMEM;
693
694 r = udev_monitor_filter_add_match_subsystem_devtype(mon.get(), "block",
695 "disk");
696 if (r < 0)
697 return r;
698
699 r = udev_monitor_set_receive_buffer_size(mon.get(), UDEV_BUF_SIZE);
700 if (r < 0) {
701 std::cerr << "rbd: failed to set udev buffer size: " << cpp_strerror(r)
702 << std::endl;
703 /* not fatal */
704 }
705
706 r = udev_monitor_enable_receiving(mon.get());
707 if (r < 0)
708 return r;
709
710 if (pipe2(fds, O_NONBLOCK) < 0)
711 return -errno;
712
713 auto unmapper = make_named_thread("unmapper", [&buf, sysfs_r_fd = fds[1]]() {
714 /*
715 * On final device close(), kernel sends a block change event, in
716 * response to which udev apparently runs blkid on the device. This
717 * makes unmap fail with EBUSY, if issued right after final close().
718 * Try to circumvent this with a retry before turning to udev.
719 */
720 for (int tries = 0; ; tries++) {
721 int sysfs_r = sysfs_write_rbd_remove(buf);
722 if (sysfs_r == -EBUSY && tries < 2) {
723 if (!tries) {
724 usleep(250 * 1000);
725 } else {
726 /*
727 * libudev does not provide the "wait until the queue is empty"
728 * API or the sufficient amount of primitives to build it from.
729 */
730 std::string err = run_cmd("udevadm", "settle", "--timeout", "10",
731 (char *)NULL);
732 if (!err.empty())
733 std::cerr << "rbd: " << err << std::endl;
734 }
735 } else {
736 int r = safe_write(sysfs_r_fd, &sysfs_r, sizeof(sysfs_r));
737 if (r < 0) {
738 ceph_abort_msgf("safe_write failed: %d", r);
739 }
740 break;
741 }
742 }
743 });
744
745 std::tie(r, unmapped) = wait_for_mapping(fds[0], mon.get(),
746 UdevUnmapHandler(devno));
747 if (r < 0) {
748 if (!unmapped) {
749 std::cerr << "rbd: sysfs write failed" << std::endl;
750 } else {
751 std::cerr << "rbd: udev wait failed: " << cpp_strerror(r) << std::endl;
752 r = 0;
753 }
754 }
755
756 unmapper.join();
757 close(fds[0]);
758 close(fds[1]);
759 return r;
760 }
761
762 static int unmap_image(struct krbd_ctx *ctx, const char *devnode,
763 const char *options)
764 {
765 struct stat sb;
766 dev_t wholedevno = 0;
767 std::string buf;
768 int r;
769
770 if (stat(devnode, &sb) < 0 || !S_ISBLK(sb.st_mode)) {
771 cerr << "rbd: '" << devnode << "' is not a block device" << std::endl;
772 return -EINVAL;
773 }
774
775 r = blkid_devno_to_wholedisk(sb.st_rdev, NULL, 0, &wholedevno);
776 if (r < 0) {
777 cerr << "rbd: couldn't compute wholedevno: " << cpp_strerror(r)
778 << std::endl;
779 /*
780 * Ignore the error: we are given whole disks most of the time, and
781 * if it turns out this is a partition we will fail later anyway.
782 */
783 wholedevno = sb.st_rdev;
784 }
785
786 for (int tries = 0; ; tries++) {
787 r = devno_to_krbd_id(ctx->udev, wholedevno, &buf);
788 if (r == -ENOENT && tries < 2) {
789 usleep(250 * 1000);
790 } else {
791 if (r < 0) {
792 if (r == -ENOENT) {
793 std::cerr << "rbd: '" << devnode << "' is not an rbd device"
794 << std::endl;
795 r = -EINVAL;
796 }
797 return r;
798 }
799 if (tries) {
800 std::cerr << "rbd: udev enumerate missed a device, tries = " << tries
801 << std::endl;
802 }
803 break;
804 }
805 }
806
807 append_unmap_options(&buf, options);
808 return do_unmap(ctx->udev, wholedevno, buf);
809 }
810
811 static int unmap_image(struct krbd_ctx *ctx, const krbd_spec& spec,
812 const char *options)
813 {
814 dev_t devno = 0;
815 std::string buf;
816 int r;
817
818 for (int tries = 0; ; tries++) {
819 r = spec_to_devno_and_krbd_id(ctx->udev, spec, &devno, &buf);
820 if (r == -ENOENT && tries < 2) {
821 usleep(250 * 1000);
822 } else {
823 if (r < 0) {
824 if (r == -ENOENT) {
825 std::cerr << "rbd: " << spec << ": not a mapped image or snapshot"
826 << std::endl;
827 r = -EINVAL;
828 }
829 return r;
830 }
831 if (tries) {
832 std::cerr << "rbd: udev enumerate missed a device, tries = " << tries
833 << std::endl;
834 }
835 break;
836 }
837 }
838
839 append_unmap_options(&buf, options);
840 return do_unmap(ctx->udev, devno, buf);
841 }
842
843 static bool dump_one_image(Formatter *f, TextTable *tbl,
844 struct udev_device *dev)
845 {
846 auto spec = spec_from_dev(dev);
847 std::string devnode = get_devnode(dev);
848 const char *id = devnode.c_str() + sizeof(DEVNODE_PREFIX) - 1;
849
850 if (!spec)
851 return false;
852
853 if (f) {
854 f->open_object_section("device");
855 f->dump_string("id", id);
856 f->dump_string("pool", spec->pool_name);
857 f->dump_string("namespace", spec->nspace_name);
858 f->dump_string("name", spec->image_name);
859 f->dump_string("snap", spec->snap_name);
860 f->dump_string("device", devnode);
861 f->close_section();
862 } else {
863 *tbl << id << spec->pool_name << spec->nspace_name << spec->image_name
864 << spec->snap_name << devnode << TextTable::endrow;
865 }
866
867 return true;
868 }
869
870 static int do_dump(struct udev *udev, Formatter *f, TextTable *tbl)
871 {
872 udev_enumerate_uptr enm;
873 struct udev_list_entry *l = NULL;
874 bool have_output = false;
875 int r;
876
877 retry:
878 enm.reset(udev_enumerate_new(udev));
879 if (!enm)
880 return -ENOMEM;
881
882 r = udev_enumerate_add_match_subsystem(enm.get(), "rbd");
883 if (r < 0)
884 return r;
885
886 r = udev_enumerate_scan_devices(enm.get());
887 if (r < 0) {
888 if (r == -ENOENT || r == -ENODEV) {
889 std::cerr << "rbd: udev enumerate failed, retrying" << std::endl;
890 goto retry;
891 }
892 return r;
893 }
894
895 udev_list_entry_foreach(l, udev_enumerate_get_list_entry(enm.get())) {
896 auto dev = dev_from_list_entry(udev, l);
897 if (dev) {
898 have_output |= dump_one_image(f, tbl, dev.get());
899 }
900 }
901
902 return have_output;
903 }
904
905 static int dump_images(struct krbd_ctx *ctx, Formatter *f)
906 {
907 TextTable tbl;
908 int r;
909
910 if (f) {
911 f->open_array_section("devices");
912 } else {
913 tbl.define_column("id", TextTable::LEFT, TextTable::LEFT);
914 tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT);
915 tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT);
916 tbl.define_column("image", TextTable::LEFT, TextTable::LEFT);
917 tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT);
918 tbl.define_column("device", TextTable::LEFT, TextTable::LEFT);
919 }
920
921 r = do_dump(ctx->udev, f, &tbl);
922
923 if (f) {
924 f->close_section();
925 f->flush(cout);
926 } else {
927 if (r > 0)
928 cout << tbl;
929 }
930
931 return r;
932 }
933
934 static int is_mapped_image(struct udev *udev, const krbd_spec& spec,
935 string *pname)
936 {
937 udev_enumerate_uptr enm;
938 struct udev_list_entry *l;
939 int r;
940
941 r = enumerate_devices(udev, spec, &enm);
942 if (r < 0)
943 return r;
944
945 l = udev_enumerate_get_list_entry(enm.get());
946 if (l) {
947 auto dev = dev_from_list_entry(udev, l);
948 if (!dev)
949 return -ENOMEM;
950
951 *pname = get_devnode(dev.get());
952 return 1;
953 }
954
955 return 0; /* not mapped */
956 }
957
958 extern "C" int krbd_create_from_context(rados_config_t cct,
959 struct krbd_ctx **pctx)
960 {
961 struct krbd_ctx *ctx = new struct krbd_ctx();
962
963 ctx->cct = reinterpret_cast<CephContext *>(cct);
964 ctx->udev = udev_new();
965 if (!ctx->udev) {
966 delete ctx;
967 return -ENOMEM;
968 }
969
970 *pctx = ctx;
971 return 0;
972 }
973
974 extern "C" void krbd_destroy(struct krbd_ctx *ctx)
975 {
976 if (!ctx)
977 return;
978
979 udev_unref(ctx->udev);
980
981 delete ctx;
982 }
983
984 extern "C" int krbd_map(struct krbd_ctx *ctx,
985 const char *pool_name,
986 const char *nspace_name,
987 const char *image_name,
988 const char *snap_name,
989 const char *options,
990 char **pdevnode)
991 {
992 krbd_spec spec(pool_name, nspace_name, image_name, snap_name);
993 string name;
994 char *devnode;
995 int r;
996
997 r = map_image(ctx, spec, options, &name);
998 if (r < 0)
999 return r;
1000
1001 devnode = strdup(name.c_str());
1002 if (!devnode)
1003 return -ENOMEM;
1004
1005 *pdevnode = devnode;
1006 return r;
1007 }
1008
1009 extern "C" int krbd_unmap(struct krbd_ctx *ctx, const char *devnode,
1010 const char *options)
1011 {
1012 return unmap_image(ctx, devnode, options);
1013 }
1014
1015 extern "C" int krbd_unmap_by_spec(struct krbd_ctx *ctx,
1016 const char *pool_name,
1017 const char *nspace_name,
1018 const char *image_name,
1019 const char *snap_name,
1020 const char *options)
1021 {
1022 krbd_spec spec(pool_name, nspace_name, image_name, snap_name);
1023 return unmap_image(ctx, spec, options);
1024 }
1025
1026 int krbd_showmapped(struct krbd_ctx *ctx, Formatter *f)
1027 {
1028 return dump_images(ctx, f);
1029 }
1030
1031 extern "C" int krbd_is_mapped(struct krbd_ctx *ctx,
1032 const char *pool_name,
1033 const char *nspace_name,
1034 const char *image_name,
1035 const char *snap_name,
1036 char **pdevnode)
1037 {
1038 krbd_spec spec(pool_name, nspace_name, image_name, snap_name);
1039 string name;
1040 char *devnode;
1041 int r;
1042
1043 r = is_mapped_image(ctx->udev, spec, &name);
1044 if (r <= 0) /* error or not mapped */
1045 return r;
1046
1047 devnode = strdup(name.c_str());
1048 if (!devnode)
1049 return -ENOMEM;
1050
1051 *pdevnode = devnode;
1052 return r;
1053 }