1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
5 * rbd-nbd - RBD in userspace
7 * Copyright (C) 2015 - 2016 Kylin Corporation
9 * Author: Yunchuan Wen <yunchuan.wen@kylin-cloud.com>
10 * Li Wang <li.wang@kylin-cloud.com>
12 * This is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License version 2.1, as published by the Free Software
15 * Foundation. See file COPYING.
20 #include "include/int_types.h"
21 #include "include/scope_guard.h"
23 #include <boost/endian/conversion.hpp>
33 #include <sys/types.h>
36 #include <linux/nbd.h>
38 #include <sys/ioctl.h>
39 #include <sys/socket.h>
40 #include <sys/syscall.h>
42 #include "nbd-netlink.h"
43 #include <libnl3/netlink/genl/genl.h>
44 #include <libnl3/netlink/genl/ctrl.h>
45 #include <libnl3/netlink/genl/mngt.h>
52 #include <boost/algorithm/string/predicate.hpp>
53 #include <boost/lexical_cast.hpp>
55 #include "common/Formatter.h"
56 #include "common/Preforker.h"
57 #include "common/SubProcess.h"
58 #include "common/TextTable.h"
59 #include "common/ceph_argparse.h"
60 #include "common/config.h"
61 #include "common/dout.h"
62 #include "common/errno.h"
63 #include "common/event_socket.h"
64 #include "common/module.h"
65 #include "common/safe_io.h"
66 #include "common/version.h"
68 #include "global/global_init.h"
69 #include "global/signal_handler.h"
71 #include "include/rados/librados.hpp"
72 #include "include/rbd/librbd.hpp"
73 #include "include/stringify.h"
74 #include "include/xlist.h"
76 #include "mon/MonClient.h"
78 #define dout_context g_ceph_context
79 #define dout_subsys ceph_subsys_rbd
81 #define dout_prefix *_dout << "rbd-nbd: "
84 namespace fs
= std::filesystem
;
86 using boost::endian::big_to_native
;
87 using boost::endian::native_to_big
;
102 int reattach_timeout
= 30;
104 bool exclusive
= false;
106 bool quiesce
= false;
107 bool readonly
= false;
108 bool set_max_part
= false;
109 bool try_netlink
= false;
110 bool show_cookie
= false;
112 std::string poolname
;
115 std::string snapname
;
117 std::string quiesce_hook
= CMAKE_INSTALL_LIBEXECDIR
"/rbd-nbd/rbd-nbd_quiesce";
120 bool pretty_format
= false;
122 std::vector
<librbd::encryption_format_t
> encryption_formats
;
123 std::vector
<std::string
> encryption_passphrase_files
;
125 Command command
= None
;
128 uint64_t snapid
= CEPH_NOSNAP
;
130 std::string
image_spec() const {
131 std::string spec
= poolname
+ "/";
133 if (!nsname
.empty()) {
134 spec
+= nsname
+ "/";
138 if (!snapname
.empty()) {
139 spec
+= "@" + snapname
;
148 std::cout
<< "Usage: rbd-nbd [options] map <image-or-snap-spec> Map image to nbd device\n"
149 << " detach <device|image-or-snap-spec> Detach image from nbd device\n"
150 << " [options] attach <image-or-snap-spec> Attach image to nbd device\n"
151 << " unmap <device|image-or-snap-spec> Unmap nbd device\n"
152 << " [options] list-mapped List mapped nbd devices\n"
153 << "Map and attach options:\n"
154 << " --device <device path> Specify nbd device path (/dev/nbd{num})\n"
155 << " --encryption-format luks|luks1|luks2\n"
156 << " Image encryption format (default: luks)\n"
157 << " --encryption-passphrase-file Path of file containing passphrase for unlocking image encryption\n"
158 << " --exclusive Forbid writes by other clients\n"
159 << " --notrim Turn off trim/discard\n"
160 << " --io-timeout <sec> Set nbd IO timeout\n"
161 << " --max_part <limit> Override for module param max_part\n"
162 << " --nbds_max <limit> Override for module param nbds_max\n"
163 << " --quiesce Use quiesce callbacks\n"
164 << " --quiesce-hook <path> Specify quiesce hook path\n"
165 << " (default: " << Config().quiesce_hook
<< ")\n"
166 << " --read-only Map read-only\n"
167 << " --reattach-timeout <sec> Set nbd re-attach timeout\n"
168 << " (default: " << Config().reattach_timeout
<< ")\n"
169 << " --try-netlink Use the nbd netlink interface\n"
170 << " --show-cookie Show device cookie\n"
171 << " --cookie Specify device cookie\n"
172 << " --snap-id <snap-id> Specify snapshot by ID instead of by name\n"
174 << "Unmap and detach options:\n"
175 << " --device <device path> Specify nbd device path (/dev/nbd{num})\n"
176 << " --snap-id <snap-id> Specify snapshot by ID instead of by name\n"
179 << " --format plain|json|xml Output format (default: plain)\n"
180 << " --pretty-format Pretty formatting (json and xml)\n"
182 generic_server_usage();
186 static int nbd_index
= -1;
187 static EventSocket terminate_event_sock
;
189 #define RBD_NBD_BLKSIZE 512UL
192 #define VERSION_INFO 2
194 static int parse_args(vector
<const char*>& args
, std::ostream
*err_msg
,
196 static int netlink_disconnect(int index
);
197 static int netlink_resize(int nbd_index
, uint64_t size
);
199 static int run_quiesce_hook(const std::string
&quiesce_hook
,
200 const std::string
&devpath
,
201 const std::string
&command
);
203 static std::string
get_cookie(const std::string
&devpath
);
208 uint64_t quiesce_watch_handle
= 0;
212 librbd::Image
&image
;
216 NBDServer(int fd
, librbd::Image
& image
, Config
*cfg
)
220 , reader_thread(*this, &NBDServer::reader_entry
)
221 , writer_thread(*this, &NBDServer::writer_entry
)
222 , quiesce_thread(*this, &NBDServer::quiesce_entry
)
224 std::vector
<librbd::config_option_t
> options
;
225 image
.config_list(&options
);
226 for (auto &option
: options
) {
227 if ((option
.name
== std::string("rbd_cache") ||
228 option
.name
== std::string("rbd_cache_writethrough_until_flush")) &&
229 option
.value
== "false") {
230 allow_internal_flush
= true;
236 Config
*get_cfg() const {
241 int terminate_event_fd
= -1;
242 ceph::mutex disconnect_lock
=
243 ceph::make_mutex("NBDServer::DisconnectLocker");
244 ceph::condition_variable disconnect_cond
;
245 std::atomic
<bool> terminated
= { false };
246 std::atomic
<bool> allow_internal_flush
= { false };
250 xlist
<IOContext
*>::item item
;
251 NBDServer
*server
= nullptr;
252 struct nbd_request request
;
253 struct nbd_reply reply
;
262 friend std::ostream
&operator<<(std::ostream
&os
, const IOContext
&ctx
);
264 ceph::mutex lock
= ceph::make_mutex("NBDServer::Locker");
265 ceph::condition_variable cond
;
266 xlist
<IOContext
*> io_pending
;
267 xlist
<IOContext
*> io_finished
;
269 void io_start(IOContext
*ctx
)
271 std::lock_guard l
{lock
};
272 io_pending
.push_back(&ctx
->item
);
275 void io_finish(IOContext
*ctx
)
277 std::lock_guard l
{lock
};
278 ceph_assert(ctx
->item
.is_on_list());
279 ctx
->item
.remove_myself();
280 io_finished
.push_back(&ctx
->item
);
284 IOContext
*wait_io_finish()
286 std::unique_lock l
{lock
};
287 cond
.wait(l
, [this] {
288 return !io_finished
.empty() ||
289 (io_pending
.empty() && terminated
);
292 if (io_finished
.empty())
295 IOContext
*ret
= io_finished
.front();
296 io_finished
.pop_front();
303 std::unique_lock l
{lock
};
304 cond
.wait(l
, [this] { return io_pending
.empty(); });
306 while(!io_finished
.empty()) {
307 std::unique_ptr
<IOContext
> free_ctx(io_finished
.front());
308 io_finished
.pop_front();
314 std::unique_lock l
{lock
};
316 ceph_assert(!reader_thread
.is_started());
317 ceph_assert(!writer_thread
.is_started());
318 ceph_assert(io_pending
.empty());
319 ceph_assert(io_finished
.empty());
322 static void aio_callback(librbd::completion_t cb
, void *arg
)
324 librbd::RBD::AioCompletion
*aio_completion
=
325 reinterpret_cast<librbd::RBD::AioCompletion
*>(cb
);
327 IOContext
*ctx
= reinterpret_cast<IOContext
*>(arg
);
328 int ret
= aio_completion
->get_return_value();
330 dout(20) << __func__
<< ": " << *ctx
<< dendl
;
332 if (ret
== -EINVAL
) {
333 // if shrinking an image, a pagecache writeback might reference
334 // extents outside of the range of the new image extents
335 dout(0) << __func__
<< ": masking IO out-of-bounds error" << dendl
;
341 ctx
->reply
.error
= native_to_big
<uint32_t>(-ret
);
342 } else if ((ctx
->command
== NBD_CMD_READ
) &&
343 ret
< static_cast<int>(ctx
->request
.len
)) {
344 int pad_byte_count
= static_cast<int> (ctx
->request
.len
) - ret
;
345 ctx
->data
.append_zero(pad_byte_count
);
346 dout(20) << __func__
<< ": " << *ctx
<< ": Pad byte count: "
347 << pad_byte_count
<< dendl
;
348 ctx
->reply
.error
= native_to_big
<uint32_t>(0);
350 ctx
->reply
.error
= native_to_big
<uint32_t>(0);
352 ctx
->server
->io_finish(ctx
);
354 aio_completion
->release();
359 struct pollfd poll_fds
[2];
360 memset(poll_fds
, 0, sizeof(struct pollfd
) * 2);
362 poll_fds
[0].events
= POLLIN
;
363 poll_fds
[1].fd
= terminate_event_fd
;
364 poll_fds
[1].events
= POLLIN
;
367 std::unique_ptr
<IOContext
> ctx(new IOContext());
370 dout(20) << __func__
<< ": waiting for nbd request" << dendl
;
372 int r
= poll(poll_fds
, 2, -1);
374 if (errno
== EINTR
) {
378 derr
<< "failed to poll nbd: " << cpp_strerror(r
) << dendl
;
382 if ((poll_fds
[1].revents
& POLLIN
) != 0) {
383 dout(0) << __func__
<< ": terminate received" << dendl
;
387 if ((poll_fds
[0].revents
& POLLIN
) == 0) {
388 dout(20) << __func__
<< ": nothing to read" << dendl
;
392 r
= safe_read_exact(fd
, &ctx
->request
, sizeof(struct nbd_request
));
394 derr
<< "failed to read nbd request header: " << cpp_strerror(r
)
399 if (ctx
->request
.magic
!= htonl(NBD_REQUEST_MAGIC
)) {
400 derr
<< "invalid nbd request header" << dendl
;
404 ctx
->request
.from
= big_to_native(ctx
->request
.from
);
405 ctx
->request
.type
= big_to_native(ctx
->request
.type
);
406 ctx
->request
.len
= big_to_native(ctx
->request
.len
);
408 ctx
->reply
.magic
= native_to_big
<uint32_t>(NBD_REPLY_MAGIC
);
409 memcpy(ctx
->reply
.handle
, ctx
->request
.handle
, sizeof(ctx
->reply
.handle
));
411 ctx
->command
= ctx
->request
.type
& 0x0000ffff;
413 dout(20) << *ctx
<< ": start" << dendl
;
415 switch (ctx
->command
)
418 // NBD_DO_IT will return when pipe is closed
419 dout(0) << "disconnect request received" << dendl
;
422 bufferptr
ptr(ctx
->request
.len
);
423 r
= safe_read_exact(fd
, ptr
.c_str(), ctx
->request
.len
);
425 derr
<< *ctx
<< ": failed to read nbd request data: "
426 << cpp_strerror(r
) << dendl
;
429 ctx
->data
.push_back(ptr
);
433 IOContext
*pctx
= ctx
.release();
435 librbd::RBD::AioCompletion
*c
= new librbd::RBD::AioCompletion(pctx
, aio_callback
);
436 switch (pctx
->command
)
439 image
.aio_write(pctx
->request
.from
, pctx
->request
.len
, pctx
->data
, c
);
442 image
.aio_read(pctx
->request
.from
, pctx
->request
.len
, pctx
->data
, c
);
446 allow_internal_flush
= true;
449 image
.aio_discard(pctx
->request
.from
, pctx
->request
.len
, c
);
452 derr
<< *pctx
<< ": invalid request command" << dendl
;
459 int r
= netlink_disconnect(nbd_index
);
461 ioctl(nbd
, NBD_DISCONNECT
);
465 std::lock_guard l
{lock
};
469 std::lock_guard disconnect_l
{disconnect_lock
};
470 disconnect_cond
.notify_all();
472 dout(20) << __func__
<< ": terminated" << dendl
;
478 dout(20) << __func__
<< ": waiting for io request" << dendl
;
479 std::unique_ptr
<IOContext
> ctx(wait_io_finish());
481 dout(20) << __func__
<< ": no io requests, terminating" << dendl
;
485 dout(20) << __func__
<< ": got: " << *ctx
<< dendl
;
487 int r
= safe_write(fd
, &ctx
->reply
, sizeof(struct nbd_reply
));
489 derr
<< *ctx
<< ": failed to write reply header: " << cpp_strerror(r
)
493 if (ctx
->command
== NBD_CMD_READ
&& ctx
->reply
.error
== htonl(0)) {
494 r
= ctx
->data
.write_fd(fd
);
496 derr
<< *ctx
<< ": failed to write replay data: " << cpp_strerror(r
)
501 dout(20) << *ctx
<< ": finish" << dendl
;
506 ::shutdown(fd
, SHUT_RDWR
);
508 dout(20) << __func__
<< ": terminated" << dendl
;
511 bool wait_quiesce() {
512 dout(20) << __func__
<< dendl
;
514 std::unique_lock locker
{lock
};
515 cond
.wait(locker
, [this] { return quiesce
|| terminated
; });
521 dout(20) << __func__
<< ": got quiesce request" << dendl
;
525 void wait_unquiesce(std::unique_lock
<ceph::mutex
> &locker
) {
526 dout(20) << __func__
<< dendl
;
528 cond
.wait(locker
, [this] { return !quiesce
|| terminated
; });
530 dout(20) << __func__
<< ": got unquiesce request" << dendl
;
533 void wait_inflight_io() {
534 if (!allow_internal_flush
) {
538 uint64_t features
= 0;
539 image
.features(&features
);
540 if ((features
& RBD_FEATURE_EXCLUSIVE_LOCK
) != 0) {
541 bool is_owner
= false;
542 image
.is_exclusive_lock_owner(&is_owner
);
548 dout(20) << __func__
<< dendl
;
550 int r
= image
.flush();
552 derr
<< "flush failed: " << cpp_strerror(r
) << dendl
;
558 ceph_assert(cfg
->quiesce
);
560 while (wait_quiesce()) {
562 int r
= run_quiesce_hook(cfg
->quiesce_hook
, cfg
->devpath
, "quiesce");
567 std::unique_lock locker
{lock
};
568 ceph_assert(quiesce
== true);
570 image
.quiesce_complete(quiesce_watch_handle
, r
);
577 wait_unquiesce(locker
);
580 run_quiesce_hook(cfg
->quiesce_hook
, cfg
->devpath
, "unquiesce");
583 dout(20) << __func__
<< ": terminated" << dendl
;
586 class ThreadHelper
: public Thread
589 typedef void (NBDServer::*entry_func
)();
594 ThreadHelper(NBDServer
&_server
, entry_func _func
)
599 void* entry() override
604 } reader_thread
, writer_thread
, quiesce_thread
;
606 bool started
= false;
607 bool quiesce
= false;
613 dout(10) << __func__
<< ": starting" << dendl
;
617 terminate_event_fd
= eventfd(0, EFD_NONBLOCK
);
618 ceph_assert(terminate_event_fd
> 0);
619 int r
= terminate_event_sock
.init(terminate_event_fd
,
620 EVENT_SOCKET_TYPE_EVENTFD
);
623 reader_thread
.create("rbd_reader");
624 writer_thread
.create("rbd_writer");
626 quiesce_thread
.create("rbd_quiesce");
631 void wait_for_disconnect()
636 std::unique_lock l
{disconnect_lock
};
637 disconnect_cond
.wait(l
);
640 void notify_quiesce() {
641 dout(10) << __func__
<< dendl
;
643 ceph_assert(cfg
->quiesce
);
645 std::unique_lock locker
{lock
};
646 ceph_assert(quiesce
== false);
651 void notify_unquiesce() {
652 dout(10) << __func__
<< dendl
;
654 ceph_assert(cfg
->quiesce
);
656 std::unique_lock locker
{lock
};
657 ceph_assert(quiesce
== true);
665 dout(10) << __func__
<< ": terminating" << dendl
;
667 terminate_event_sock
.notify();
669 reader_thread
.join();
670 writer_thread
.join();
672 quiesce_thread
.join();
677 close(terminate_event_fd
);
683 std::ostream
&operator<<(std::ostream
&os
, const NBDServer::IOContext
&ctx
) {
685 os
<< "[" << std::hex
<< big_to_native(*((uint64_t *)ctx
.request
.handle
));
705 os
<< " UNKNOWN(" << ctx
.command
<< ") ";
709 os
<< ctx
.request
.from
<< "~" << ctx
.request
.len
<< " "
710 << std::dec
<< big_to_native(ctx
.reply
.error
) << "]";
715 class NBDQuiesceWatchCtx
: public librbd::QuiesceWatchCtx
718 NBDQuiesceWatchCtx(NBDServer
*server
) : server(server
) {
721 void handle_quiesce() override
{
722 server
->notify_quiesce();
725 void handle_unquiesce() override
{
726 server
->notify_unquiesce();
733 class NBDWatchCtx
: public librbd::UpdateWatchCtx
739 librados::IoCtx
&io_ctx
;
740 librbd::Image
&image
;
742 std::thread handle_notify_thread
;
743 ceph::condition_variable cond
;
744 ceph::mutex lock
= ceph::make_mutex("NBDWatchCtx::Locker");
746 bool terminated
= false;
749 dout(10) << __func__
<< dendl
;
751 std::unique_lock locker
{lock
};
752 cond
.wait(locker
, [this] { return notify
|| terminated
; });
758 dout(10) << __func__
<< ": got notify request" << dendl
;
763 void handle_notify_entry() {
764 dout(10) << __func__
<< dendl
;
766 while (wait_notify()) {
768 int ret
= image
.size(&new_size
);
770 derr
<< "getting image size failed: " << cpp_strerror(ret
) << dendl
;
773 if (new_size
== size
) {
776 dout(5) << "resize detected" << dendl
;
777 if (ioctl(fd
, BLKFLSBUF
, NULL
) < 0) {
778 derr
<< "invalidate page cache failed: " << cpp_strerror(errno
)
782 ret
= netlink_resize(nbd_index
, new_size
);
784 ret
= ioctl(fd
, NBD_SET_SIZE
, new_size
);
786 derr
<< "resize failed: " << cpp_strerror(errno
) << dendl
;
792 if (ioctl(fd
, BLKRRPART
, NULL
) < 0) {
793 derr
<< "rescan of partition table failed: " << cpp_strerror(errno
)
796 if (image
.invalidate_cache() < 0) {
797 derr
<< "invalidate rbd cache failed" << dendl
;
806 librados::IoCtx
&_io_ctx
,
807 librbd::Image
&_image
,
810 , nbd_index(_nbd_index
)
811 , use_netlink(_use_netlink
)
816 handle_notify_thread
= make_named_thread("rbd_handle_notify",
817 &NBDWatchCtx::handle_notify_entry
,
821 ~NBDWatchCtx() override
823 dout(10) << __func__
<< ": terminating" << dendl
;
824 std::unique_lock locker
{lock
};
829 handle_notify_thread
.join();
830 dout(10) << __func__
<< ": finish" << dendl
;
833 void handle_notify() override
835 dout(10) << __func__
<< dendl
;
837 std::unique_lock locker
{lock
};
843 class NBDListIterator
{
845 bool get(Config
*cfg
) {
847 std::string nbd_path
= "/sys/block/nbd" + stringify(m_index
);
848 if(access(nbd_path
.c_str(), F_OK
) != 0) {
853 cfg
->devpath
= "/dev/nbd" + stringify(m_index
++);
857 ifs
.open(nbd_path
+ "/pid", std::ifstream::in
);
858 if (!ifs
.is_open()) {
864 // If the rbd-nbd is re-attached the pid may store garbage
865 // here. We are sure this is the case when it is negative or
866 // zero. Then we just try to find the attached process scanning
867 // /proc fs. If it is positive we check the process with this
868 // pid first and if it is not rbd-nbd fallback to searching the
872 pid
= find_attached(cfg
->devpath
);
878 if (get_mapped_info(pid
, cfg
) >= 0) {
888 std::map
<int, Config
> m_mapped_info_cache
;
890 int get_mapped_info(int pid
, Config
*cfg
) {
891 ceph_assert(!cfg
->devpath
.empty());
893 auto it
= m_mapped_info_cache
.find(pid
);
894 if (it
!= m_mapped_info_cache
.end()) {
895 if (it
->second
.devpath
!= cfg
->devpath
) {
902 m_mapped_info_cache
[pid
] = {};
905 std::string path
= "/proc/" + stringify(pid
) + "/comm";
908 ifs
.open(path
.c_str(), std::ifstream::in
);
912 if (comm
!= "rbd-nbd") {
917 path
= "/proc/" + stringify(pid
) + "/cmdline";
919 std::vector
<const char*> args
;
921 ifs
.open(path
.c_str(), std::ifstream::in
);
926 if (cmdline
.empty()) {
930 for (unsigned i
= 0; i
< cmdline
.size(); i
++) {
931 char *arg
= &cmdline
[i
];
933 if (strcmp(basename(arg
) , "rbd-nbd") != 0) {
940 while (cmdline
[i
] != '\0') {
945 std::ostringstream err_msg
;
947 r
= parse_args(args
, &err_msg
, &c
);
952 if (c
.command
!= Map
&& c
.command
!= Attach
) {
957 m_mapped_info_cache
.erase(pid
);
958 if (!c
.devpath
.empty()) {
959 m_mapped_info_cache
[pid
] = c
;
960 if (c
.devpath
!= cfg
->devpath
) {
964 c
.devpath
= cfg
->devpath
;
967 c
.cookie
= get_cookie(cfg
->devpath
);
972 int find_attached(const std::string
&devpath
) {
973 for (auto &entry
: fs::directory_iterator("/proc")) {
974 if (!fs::is_directory(entry
.status())) {
980 pid
= boost::lexical_cast
<uint64_t>(entry
.path().filename().c_str());
981 } catch (boost::bad_lexical_cast
&) {
986 cfg
.devpath
= devpath
;
987 if (get_mapped_info(pid
, &cfg
) >=0 && cfg
.command
== Attach
) {
996 struct EncryptionOptions
{
997 std::vector
<librbd::encryption_spec_t
> specs
;
999 ~EncryptionOptions() {
1000 for (auto& spec
: specs
) {
1001 switch (spec
.format
) {
1002 case RBD_ENCRYPTION_FORMAT_LUKS
: {
1004 static_cast<librbd::encryption_luks_format_options_t
*>(spec
.opts
);
1005 ceph_memzero_s(opts
->passphrase
.data(), opts
->passphrase
.size(),
1006 opts
->passphrase
.size());
1010 case RBD_ENCRYPTION_FORMAT_LUKS1
: {
1012 static_cast<librbd::encryption_luks1_format_options_t
*>(spec
.opts
);
1013 ceph_memzero_s(opts
->passphrase
.data(), opts
->passphrase
.size(),
1014 opts
->passphrase
.size());
1018 case RBD_ENCRYPTION_FORMAT_LUKS2
: {
1020 static_cast<librbd::encryption_luks2_format_options_t
*>(spec
.opts
);
1021 ceph_memzero_s(opts
->passphrase
.data(), opts
->passphrase
.size(),
1022 opts
->passphrase
.size());
1033 static std::string
get_cookie(const std::string
&devpath
)
1037 std::string path
= "/sys/block/" + devpath
.substr(sizeof("/dev/") - 1) + "/backend";
1039 ifs
.open(path
, std::ifstream::in
);
1040 if (ifs
.is_open()) {
1041 std::getline(ifs
, cookie
);
1047 static int load_module(Config
*cfg
)
1049 ostringstream param
;
1053 param
<< "nbds_max=" << cfg
->nbds_max
;
1056 param
<< " max_part=" << cfg
->max_part
;
1058 if (!access("/sys/module/nbd", F_OK
)) {
1059 if (cfg
->nbds_max
|| cfg
->set_max_part
)
1060 cerr
<< "rbd-nbd: ignoring kernel module parameter options: nbd module already loaded"
1065 ret
= module_load("nbd", param
.str().c_str());
1067 cerr
<< "rbd-nbd: failed to load nbd kernel module: " << cpp_strerror(-ret
)
1073 static int check_device_size(int nbd_index
, unsigned long expected_size
)
1075 // There are bugs with some older kernel versions that result in an
1076 // overflow for large image sizes. This check is to ensure we are
1079 unsigned long size
= 0;
1080 std::string path
= "/sys/block/nbd" + stringify(nbd_index
) + "/size";
1082 ifs
.open(path
.c_str(), std::ifstream::in
);
1083 if (!ifs
.is_open()) {
1084 cerr
<< "rbd-nbd: failed to open " << path
<< std::endl
;
1088 size
*= RBD_NBD_BLKSIZE
;
1091 // Newer kernel versions will report real size only after nbd
1092 // connect. Assume this is the case and return success.
1096 if (size
!= expected_size
) {
1097 cerr
<< "rbd-nbd: kernel reported invalid device size (" << size
1098 << ", expected " << expected_size
<< ")" << std::endl
;
1105 static int parse_nbd_index(const std::string
& devpath
)
1109 ret
= sscanf(devpath
.c_str(), "/dev/nbd%d", &index
);
1111 // mean an early matching failure. But some cases need a negative value.
1114 cerr
<< "rbd-nbd: invalid device path: " << devpath
1115 << " (expected /dev/nbd{num})" << std::endl
;
1122 static int try_ioctl_setup(Config
*cfg
, int fd
, uint64_t size
,
1123 uint64_t blksize
, uint64_t flags
)
1127 if (cfg
->devpath
.empty()) {
1129 const char *path
= "/sys/module/nbd/parameters/nbds_max";
1131 if (access(path
, F_OK
) == 0) {
1133 ifs
.open(path
, std::ifstream::in
);
1134 if (ifs
.is_open()) {
1141 snprintf(dev
, sizeof(dev
), "/dev/nbd%d", index
);
1143 nbd
= open(dev
, O_RDWR
);
1145 if (nbd
== -EPERM
&& nbds_max
!= -1 && index
< (nbds_max
-1)) {
1150 cerr
<< "rbd-nbd: failed to find unused device" << std::endl
;
1154 r
= ioctl(nbd
, NBD_SET_SOCK
, fd
);
1165 r
= parse_nbd_index(cfg
->devpath
);
1170 nbd
= open(cfg
->devpath
.c_str(), O_RDWR
);
1173 cerr
<< "rbd-nbd: failed to open device: " << cfg
->devpath
<< std::endl
;
1177 r
= ioctl(nbd
, NBD_SET_SOCK
, fd
);
1180 cerr
<< "rbd-nbd: the device " << cfg
->devpath
<< " is busy" << std::endl
;
1186 r
= ioctl(nbd
, NBD_SET_BLKSIZE
, blksize
);
1189 cerr
<< "rbd-nbd: NBD_SET_BLKSIZE failed" << std::endl
;
1193 r
= ioctl(nbd
, NBD_SET_SIZE
, size
);
1195 cerr
<< "rbd-nbd: NBD_SET_SIZE failed" << std::endl
;
1200 ioctl(nbd
, NBD_SET_FLAGS
, flags
);
1202 if (cfg
->io_timeout
>= 0) {
1203 r
= ioctl(nbd
, NBD_SET_TIMEOUT
, (unsigned long)cfg
->io_timeout
);
1206 cerr
<< "rbd-nbd: failed to set IO timeout: " << cpp_strerror(r
)
1212 dout(10) << "ioctl setup complete for " << cfg
->devpath
<< dendl
;
1218 ioctl(nbd
, NBD_CLEAR_SOCK
);
1219 cerr
<< "rbd-nbd: failed to map, status: " << cpp_strerror(-r
) << std::endl
;
1226 static void netlink_cleanup(struct nl_sock
*sock
)
1232 nl_socket_free(sock
);
1235 static struct nl_sock
*netlink_init(int *id
)
1237 struct nl_sock
*sock
;
1240 sock
= nl_socket_alloc();
1242 cerr
<< "rbd-nbd: Could not allocate netlink socket." << std::endl
;
1246 ret
= genl_connect(sock
);
1248 cerr
<< "rbd-nbd: Could not connect netlink socket. Error " << ret
1253 *id
= genl_ctrl_resolve(sock
, "nbd");
1255 // nbd netlink interface not supported.
1263 nl_socket_free(sock
);
1267 static int netlink_disconnect(int index
)
1269 struct nl_sock
*sock
;
1273 sock
= netlink_init(&nl_id
);
1278 nl_socket_modify_cb(sock
, NL_CB_VALID
, NL_CB_CUSTOM
, genl_handle_msg
, NULL
);
1280 msg
= nlmsg_alloc();
1282 cerr
<< "rbd-nbd: Could not allocate netlink message." << std::endl
;
1286 if (!genlmsg_put(msg
, NL_AUTO_PORT
, NL_AUTO_SEQ
, nl_id
, 0, 0,
1287 NBD_CMD_DISCONNECT
, 0)) {
1288 cerr
<< "rbd-nbd: Could not setup message." << std::endl
;
1289 goto nla_put_failure
;
1292 NLA_PUT_U32(msg
, NBD_ATTR_INDEX
, index
);
1294 ret
= nl_send_sync(sock
, msg
);
1295 netlink_cleanup(sock
);
1297 cerr
<< "rbd-nbd: netlink disconnect failed: " << nl_geterror(-ret
)
1307 netlink_cleanup(sock
);
1311 static int netlink_disconnect_by_path(const std::string
& devpath
)
1315 index
= parse_nbd_index(devpath
);
1319 return netlink_disconnect(index
);
1322 static int netlink_resize(int nbd_index
, uint64_t size
)
1324 struct nl_sock
*sock
;
1328 sock
= netlink_init(&nl_id
);
1330 cerr
<< "rbd-nbd: Netlink interface not supported." << std::endl
;
1334 nl_socket_modify_cb(sock
, NL_CB_VALID
, NL_CB_CUSTOM
, genl_handle_msg
, NULL
);
1336 msg
= nlmsg_alloc();
1338 cerr
<< "rbd-nbd: Could not allocate netlink message." << std::endl
;
1342 if (!genlmsg_put(msg
, NL_AUTO_PORT
, NL_AUTO_SEQ
, nl_id
, 0, 0,
1343 NBD_CMD_RECONFIGURE
, 0)) {
1344 cerr
<< "rbd-nbd: Could not setup message." << std::endl
;
1348 NLA_PUT_U32(msg
, NBD_ATTR_INDEX
, nbd_index
);
1349 NLA_PUT_U64(msg
, NBD_ATTR_SIZE_BYTES
, size
);
1351 ret
= nl_send_sync(sock
, msg
);
1353 cerr
<< "rbd-nbd: netlink resize failed: " << nl_geterror(ret
) << std::endl
;
1357 netlink_cleanup(sock
);
1358 dout(10) << "netlink resize complete for nbd" << nbd_index
<< dendl
;
1365 netlink_cleanup(sock
);
1369 static int netlink_connect_cb(struct nl_msg
*msg
, void *arg
)
1371 struct genlmsghdr
*gnlh
= (struct genlmsghdr
*)nlmsg_data(nlmsg_hdr(msg
));
1372 Config
*cfg
= (Config
*)arg
;
1373 struct nlattr
*msg_attr
[NBD_ATTR_MAX
+ 1];
1377 ret
= nla_parse(msg_attr
, NBD_ATTR_MAX
, genlmsg_attrdata(gnlh
, 0),
1378 genlmsg_attrlen(gnlh
, 0), NULL
);
1380 cerr
<< "rbd-nbd: Unsupported netlink reply" << std::endl
;
1381 return -NLE_MSGTYPE_NOSUPPORT
;
1384 if (!msg_attr
[NBD_ATTR_INDEX
]) {
1385 cerr
<< "rbd-nbd: netlink connect reply missing device index." << std::endl
;
1386 return -NLE_MSGTYPE_NOSUPPORT
;
1389 index
= nla_get_u32(msg_attr
[NBD_ATTR_INDEX
]);
1390 cfg
->devpath
= "/dev/nbd" + stringify(index
);
1396 static int netlink_connect(Config
*cfg
, struct nl_sock
*sock
, int nl_id
, int fd
,
1397 uint64_t size
, uint64_t flags
, bool reconnect
)
1399 struct nlattr
*sock_attr
;
1400 struct nlattr
*sock_opt
;
1405 dout(10) << "netlink try reconnect for " << cfg
->devpath
<< dendl
;
1407 nl_socket_modify_cb(sock
, NL_CB_VALID
, NL_CB_CUSTOM
, genl_handle_msg
, NULL
);
1409 nl_socket_modify_cb(sock
, NL_CB_VALID
, NL_CB_CUSTOM
, netlink_connect_cb
,
1413 msg
= nlmsg_alloc();
1415 cerr
<< "rbd-nbd: Could not allocate netlink message." << std::endl
;
1419 if (!genlmsg_put(msg
, NL_AUTO_PORT
, NL_AUTO_SEQ
, nl_id
, 0, 0,
1420 reconnect
? NBD_CMD_RECONFIGURE
: NBD_CMD_CONNECT
, 0)) {
1421 cerr
<< "rbd-nbd: Could not setup message." << std::endl
;
1425 if (!cfg
->devpath
.empty()) {
1426 ret
= parse_nbd_index(cfg
->devpath
);
1430 NLA_PUT_U32(msg
, NBD_ATTR_INDEX
, ret
);
1436 if (cfg
->io_timeout
>= 0)
1437 NLA_PUT_U64(msg
, NBD_ATTR_TIMEOUT
, cfg
->io_timeout
);
1439 NLA_PUT_U64(msg
, NBD_ATTR_SIZE_BYTES
, size
);
1440 NLA_PUT_U64(msg
, NBD_ATTR_BLOCK_SIZE_BYTES
, RBD_NBD_BLKSIZE
);
1441 NLA_PUT_U64(msg
, NBD_ATTR_SERVER_FLAGS
, flags
);
1442 NLA_PUT_U64(msg
, NBD_ATTR_DEAD_CONN_TIMEOUT
, cfg
->reattach_timeout
);
1443 if (!cfg
->cookie
.empty())
1444 NLA_PUT_STRING(msg
, NBD_ATTR_BACKEND_IDENTIFIER
, cfg
->cookie
.c_str());
1446 sock_attr
= nla_nest_start(msg
, NBD_ATTR_SOCKETS
);
1448 cerr
<< "rbd-nbd: Could not init sockets in netlink message." << std::endl
;
1452 sock_opt
= nla_nest_start(msg
, NBD_SOCK_ITEM
);
1454 cerr
<< "rbd-nbd: Could not init sock in netlink message." << std::endl
;
1458 NLA_PUT_U32(msg
, NBD_SOCK_FD
, fd
);
1459 nla_nest_end(msg
, sock_opt
);
1460 nla_nest_end(msg
, sock_attr
);
1462 ret
= nl_send_sync(sock
, msg
);
1464 cerr
<< "rbd-nbd: netlink connect failed: " << nl_geterror(ret
)
1469 dout(10) << "netlink connect complete for " << cfg
->devpath
<< dendl
;
1478 static int try_netlink_setup(Config
*cfg
, int fd
, uint64_t size
, uint64_t flags
,
1481 struct nl_sock
*sock
;
1484 sock
= netlink_init(&nl_id
);
1486 cerr
<< "rbd-nbd: Netlink interface not supported. Using ioctl interface."
1491 dout(10) << "netlink interface supported." << dendl
;
1493 ret
= netlink_connect(cfg
, sock
, nl_id
, fd
, size
, flags
, reconnect
);
1494 netlink_cleanup(sock
);
1499 nbd
= open(cfg
->devpath
.c_str(), O_RDWR
);
1501 cerr
<< "rbd-nbd: failed to open device: " << cfg
->devpath
<< std::endl
;
1508 static int run_quiesce_hook(const std::string
&quiesce_hook
,
1509 const std::string
&devpath
,
1510 const std::string
&command
) {
1511 dout(10) << __func__
<< ": " << quiesce_hook
<< " " << devpath
<< " "
1512 << command
<< dendl
;
1514 SubProcess
hook(quiesce_hook
.c_str(), SubProcess::CLOSE
, SubProcess::PIPE
,
1516 hook
.add_cmd_args(devpath
.c_str(), command
.c_str(), NULL
);
1518 int r
= hook
.spawn();
1520 err
.append("subprocess spawn failed");
1522 err
.read_fd(hook
.get_stderr(), 16384);
1529 derr
<< __func__
<< ": " << quiesce_hook
<< " " << devpath
<< " "
1530 << command
<< " failed: " << err
.to_str() << dendl
;
1532 dout(10) << " succeeded: " << err
.to_str() << dendl
;
1538 static void handle_signal(int signum
)
1540 ceph_assert(signum
== SIGINT
|| signum
== SIGTERM
);
1541 derr
<< "*** Got signal " << sig_str(signum
) << " ***" << dendl
;
1543 dout(20) << __func__
<< ": " << "notifying terminate" << dendl
;
1545 ceph_assert(terminate_event_sock
.is_valid());
1546 terminate_event_sock
.notify();
1549 static NBDServer
*start_server(int fd
, librbd::Image
& image
, Config
*cfg
)
1553 server
= new NBDServer(fd
, image
, cfg
);
1556 init_async_signal_handler();
1557 register_async_signal_handler(SIGHUP
, sighup_handler
);
1558 register_async_signal_handler_oneshot(SIGINT
, handle_signal
);
1559 register_async_signal_handler_oneshot(SIGTERM
, handle_signal
);
1564 static void run_server(Preforker
& forker
, NBDServer
*server
, bool netlink_used
)
1566 if (g_conf()->daemonize
) {
1567 global_init_postfork_finish(g_ceph_context
);
1572 server
->wait_for_disconnect();
1574 ioctl(nbd
, NBD_DO_IT
);
1576 unregister_async_signal_handler(SIGHUP
, sighup_handler
);
1577 unregister_async_signal_handler(SIGINT
, handle_signal
);
1578 unregister_async_signal_handler(SIGTERM
, handle_signal
);
1579 shutdown_async_signal_handler();
1582 // Eventually it should be removed when pidfd_open is widely supported.
1584 static int wait_for_terminate_legacy(int pid
, int timeout
)
1586 for (int i
= 0; ; i
++) {
1587 if (kill(pid
, 0) == -1) {
1588 if (errno
== ESRCH
) {
1592 cerr
<< "rbd-nbd: kill(" << pid
<< ", 0) failed: "
1593 << cpp_strerror(r
) << std::endl
;
1596 if (i
>= timeout
* 2) {
1602 cerr
<< "rbd-nbd: waiting for process exit timed out" << std::endl
;
1606 // Eventually it should be replaced with glibc' pidfd_open
1607 // when it is widely available.
1609 #ifdef __NR_pidfd_open
1610 static int pidfd_open(pid_t pid
, unsigned int flags
)
1612 return syscall(__NR_pidfd_open
, pid
, flags
);
1615 static int pidfd_open(pid_t pid
, unsigned int flags
)
1622 static int wait_for_terminate(int pid
, int timeout
)
1624 int fd
= pidfd_open(pid
, 0);
1626 if (errno
== ENOSYS
) {
1627 return wait_for_terminate_legacy(pid
, timeout
);
1629 if (errno
== ESRCH
) {
1633 cerr
<< "rbd-nbd: pidfd_open(" << pid
<< ") failed: "
1634 << cpp_strerror(r
) << std::endl
;
1638 struct pollfd poll_fds
[1];
1639 memset(poll_fds
, 0, sizeof(struct pollfd
));
1640 poll_fds
[0].fd
= fd
;
1641 poll_fds
[0].events
= POLLIN
;
1643 int r
= poll(poll_fds
, 1, timeout
* 1000);
1646 cerr
<< "rbd-nbd: failed to poll rbd-nbd process: " << cpp_strerror(r
)
1653 if ((poll_fds
[0].revents
& POLLIN
) == 0) {
1654 cerr
<< "rbd-nbd: waiting for process exit timed out" << std::endl
;
1664 static int do_map(int argc
, const char *argv
[], Config
*cfg
, bool reconnect
)
1668 librados::Rados rados
;
1670 librados::IoCtx io_ctx
;
1671 librbd::Image image
;
1674 unsigned long flags
;
1676 unsigned long blksize
= RBD_NBD_BLKSIZE
;
1681 librbd::image_info_t info
;
1686 auto args
= argv_to_vec(argc
, argv
);
1688 cerr
<< argv
[0] << ": -h or --help for usage" << std::endl
;
1691 if (ceph_argparse_need_usage(args
)) {
1696 auto cct
= global_init(NULL
, args
, CEPH_ENTITY_TYPE_CLIENT
,
1697 CODE_ENVIRONMENT_DAEMON
,
1698 CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS
);
1699 g_ceph_context
->_conf
.set_val_or_die("pid_file", "");
1701 if (global_init_prefork(g_ceph_context
) >= 0) {
1703 r
= forker
.prefork(err
);
1705 cerr
<< err
<< std::endl
;
1708 if (forker
.is_parent()) {
1709 if (forker
.parent_wait(err
) != 0) {
1714 global_init_postfork_start(g_ceph_context
);
1717 common_init_finish(g_ceph_context
);
1718 global_init_chdir(g_ceph_context
);
1720 if (socketpair(AF_UNIX
, SOCK_STREAM
, 0, fd
) == -1) {
1725 r
= rados
.init_with_context(g_ceph_context
);
1729 r
= rados
.connect();
1733 r
= rados
.ioctx_create(cfg
->poolname
.c_str(), io_ctx
);
1737 io_ctx
.set_namespace(cfg
->nsname
);
1739 r
= rbd
.open(io_ctx
, image
, cfg
->imgname
.c_str());
1743 if (cfg
->exclusive
) {
1744 r
= image
.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE
);
1746 cerr
<< "rbd-nbd: failed to acquire exclusive lock: " << cpp_strerror(r
)
1752 if (cfg
->snapid
!= CEPH_NOSNAP
) {
1753 r
= image
.snap_set_by_id(cfg
->snapid
);
1755 cerr
<< "rbd-nbd: failed to set snap id: " << cpp_strerror(r
)
1759 } else if (!cfg
->snapname
.empty()) {
1760 r
= image
.snap_set(cfg
->snapname
.c_str());
1762 cerr
<< "rbd-nbd: failed to set snap name: " << cpp_strerror(r
)
1768 if (!cfg
->encryption_formats
.empty()) {
1769 EncryptionOptions encryption_options
;
1770 encryption_options
.specs
.reserve(cfg
->encryption_formats
.size());
1772 for (size_t i
= 0; i
< cfg
->encryption_formats
.size(); ++i
) {
1773 std::ifstream
file(cfg
->encryption_passphrase_files
[i
],
1774 std::ios::in
| std::ios::binary
);
1777 std::cerr
<< "rbd-nbd: unable to open passphrase file '"
1778 << cfg
->encryption_passphrase_files
[i
] << "': "
1779 << cpp_strerror(r
) << std::endl
;
1782 std::string
passphrase((std::istreambuf_iterator
<char>(file
)),
1783 std::istreambuf_iterator
<char>());
1786 switch (cfg
->encryption_formats
[i
]) {
1787 case RBD_ENCRYPTION_FORMAT_LUKS
: {
1788 auto opts
= new librbd::encryption_luks_format_options_t
{
1789 std::move(passphrase
)};
1790 encryption_options
.specs
.push_back(
1791 {RBD_ENCRYPTION_FORMAT_LUKS
, opts
, sizeof(*opts
)});
1794 case RBD_ENCRYPTION_FORMAT_LUKS1
: {
1795 auto opts
= new librbd::encryption_luks1_format_options_t
{
1796 .passphrase
= std::move(passphrase
)};
1797 encryption_options
.specs
.push_back(
1798 {RBD_ENCRYPTION_FORMAT_LUKS1
, opts
, sizeof(*opts
)});
1801 case RBD_ENCRYPTION_FORMAT_LUKS2
: {
1802 auto opts
= new librbd::encryption_luks2_format_options_t
{
1803 .passphrase
= std::move(passphrase
)};
1804 encryption_options
.specs
.push_back(
1805 {RBD_ENCRYPTION_FORMAT_LUKS2
, opts
, sizeof(*opts
)});
1813 r
= image
.encryption_load2(encryption_options
.specs
.data(),
1814 encryption_options
.specs
.size());
1816 cerr
<< "rbd-nbd: failed to load encryption: " << cpp_strerror(r
)
1821 // luks2 block size can vary upto 4096, while luks1 always uses 512
1822 // currently we don't have an rbd API for querying the loaded encryption
1826 r
= image
.stat(info
, sizeof(info
));
1830 flags
= NBD_FLAG_SEND_FLUSH
| NBD_FLAG_HAS_FLAGS
;
1832 flags
|= NBD_FLAG_SEND_TRIM
;
1834 if (!cfg
->snapname
.empty() || cfg
->readonly
) {
1835 flags
|= NBD_FLAG_READ_ONLY
;
1839 if (info
.size
> ULONG_MAX
) {
1841 cerr
<< "rbd-nbd: image is too large (" << byte_u_t(info
.size
)
1842 << ", max is " << byte_u_t(ULONG_MAX
) << ")" << std::endl
;
1848 r
= load_module(cfg
);
1852 server
= start_server(fd
[1], image
, cfg
);
1854 use_netlink
= cfg
->try_netlink
|| reconnect
;
1856 // generate when the cookie is not supplied at CLI
1857 if (!reconnect
&& cfg
->cookie
.empty()) {
1859 uuid_gen
.generate_random();
1860 cfg
->cookie
= uuid_gen
.to_string();
1862 r
= try_netlink_setup(cfg
, fd
[0], size
, flags
, reconnect
);
1865 } else if (r
== 1) {
1866 use_netlink
= false;
1871 r
= try_ioctl_setup(cfg
, fd
[0], size
, blksize
, flags
);
1876 r
= check_device_size(nbd_index
, size
);
1880 r
= ioctl(nbd
, BLKROSET
, (unsigned long) &read_only
);
1887 NBDQuiesceWatchCtx
quiesce_watch_ctx(server
);
1889 r
= image
.quiesce_watch(&quiesce_watch_ctx
,
1890 &server
->quiesce_watch_handle
);
1898 NBDWatchCtx
watch_ctx(nbd
, nbd_index
, use_netlink
, io_ctx
, image
,
1900 r
= image
.update_watch(&watch_ctx
, &handle
);
1906 cookie
= get_cookie(cfg
->devpath
);
1907 ceph_assert(cookie
== cfg
->cookie
|| cookie
.empty());
1909 if (cfg
->show_cookie
&& !cookie
.empty()) {
1910 cout
<< cfg
->devpath
<< " " << cookie
<< std::endl
;
1912 cout
<< cfg
->devpath
<< std::endl
;
1915 run_server(forker
, server
, use_netlink
);
1918 r
= image
.quiesce_unwatch(server
->quiesce_watch_handle
);
1919 ceph_assert(r
== 0);
1922 r
= image
.update_unwatch(handle
);
1923 ceph_assert(r
== 0);
1929 netlink_disconnect(nbd_index
);
1931 ioctl(nbd
, NBD_CLEAR_SOCK
);
1932 cerr
<< "rbd-nbd: failed to map, status: " << cpp_strerror(-r
)
1947 forker
.exit(r
< 0 ? EXIT_FAILURE
: 0);
1952 static int do_detach(Config
*cfg
)
1954 int r
= kill(cfg
->pid
, SIGTERM
);
1957 cerr
<< "rbd-nbd: failed to terminate " << cfg
->pid
<< ": "
1958 << cpp_strerror(r
) << std::endl
;
1962 return wait_for_terminate(cfg
->pid
, cfg
->reattach_timeout
);
1965 static int do_unmap(Config
*cfg
)
1968 * The netlink disconnect call supports devices setup with netlink or ioctl,
1969 * so we always try that first.
1971 int r
= netlink_disconnect_by_path(cfg
->devpath
);
1977 int nbd
= open(cfg
->devpath
.c_str(), O_RDWR
);
1979 cerr
<< "rbd-nbd: failed to open device: " << cfg
->devpath
<< std::endl
;
1983 r
= ioctl(nbd
, NBD_DISCONNECT
);
1985 cerr
<< "rbd-nbd: the device is not used" << std::endl
;
1996 r
= wait_for_terminate(cfg
->pid
, cfg
->reattach_timeout
);
2002 static int parse_imgpath(const std::string
&imgpath
, Config
*cfg
,
2003 std::ostream
*err_msg
) {
2004 std::regex
pattern("^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$");
2006 if (!std::regex_match(imgpath
, match
, pattern
)) {
2007 std::cerr
<< "rbd-nbd: invalid spec '" << imgpath
<< "'" << std::endl
;
2011 if (match
[1].matched
) {
2012 cfg
->poolname
= match
[1];
2015 if (match
[2].matched
) {
2016 cfg
->nsname
= match
[2];
2019 cfg
->imgname
= match
[3];
2021 if (match
[4].matched
)
2022 cfg
->snapname
= match
[4];
2027 static int do_list_mapped_devices(const std::string
&format
, bool pretty_format
)
2029 bool should_print
= false;
2030 std::unique_ptr
<ceph::Formatter
> f
;
2033 if (format
== "json") {
2034 f
.reset(new JSONFormatter(pretty_format
));
2035 } else if (format
== "xml") {
2036 f
.reset(new XMLFormatter(pretty_format
));
2037 } else if (!format
.empty() && format
!= "plain") {
2038 std::cerr
<< "rbd-nbd: invalid output format: " << format
<< std::endl
;
2043 f
->open_array_section("devices");
2045 tbl
.define_column("id", TextTable::LEFT
, TextTable::LEFT
);
2046 tbl
.define_column("pool", TextTable::LEFT
, TextTable::LEFT
);
2047 tbl
.define_column("namespace", TextTable::LEFT
, TextTable::LEFT
);
2048 tbl
.define_column("image", TextTable::LEFT
, TextTable::LEFT
);
2049 tbl
.define_column("snap", TextTable::LEFT
, TextTable::LEFT
);
2050 tbl
.define_column("device", TextTable::LEFT
, TextTable::LEFT
);
2051 tbl
.define_column("cookie", TextTable::LEFT
, TextTable::LEFT
);
2056 while (it
.get(&cfg
)) {
2057 std::string snap
= (cfg
.snapid
!= CEPH_NOSNAP
?
2058 "@" + std::to_string(cfg
.snapid
) : cfg
.snapname
);
2060 f
->open_object_section("device");
2061 f
->dump_int("id", cfg
.pid
);
2062 f
->dump_string("pool", cfg
.poolname
);
2063 f
->dump_string("namespace", cfg
.nsname
);
2064 f
->dump_string("image", cfg
.imgname
);
2065 f
->dump_string("snap", snap
);
2066 f
->dump_string("device", cfg
.devpath
);
2067 f
->dump_string("cookie", cfg
.cookie
);
2070 should_print
= true;
2071 tbl
<< cfg
.pid
<< cfg
.poolname
<< cfg
.nsname
<< cfg
.imgname
2072 << (snap
.empty() ? "-" : snap
) << cfg
.devpath
<< cfg
.cookie
2073 << TextTable::endrow
;
2078 f
->close_section(); // devices
2079 f
->flush(std::cout
);
2087 static bool find_mapped_dev_by_spec(Config
*cfg
, int skip_pid
=-1) {
2090 while (it
.get(&c
)) {
2091 if (c
.pid
!= skip_pid
&&
2092 c
.poolname
== cfg
->poolname
&& c
.nsname
== cfg
->nsname
&&
2093 c
.imgname
== cfg
->imgname
&& c
.snapname
== cfg
->snapname
&&
2094 (cfg
->devpath
.empty() || c
.devpath
== cfg
->devpath
) &&
2095 c
.snapid
== cfg
->snapid
) {
2103 static int find_proc_by_dev(Config
*cfg
) {
2106 while (it
.get(&c
)) {
2107 if (c
.devpath
== cfg
->devpath
) {
2115 static int parse_args(vector
<const char*>& args
, std::ostream
*err_msg
,
2117 std::string conf_file_list
;
2118 std::string cluster
;
2119 CephInitParameters iparams
= ceph_argparse_early_args(
2120 args
, CEPH_ENTITY_TYPE_CLIENT
, &cluster
, &conf_file_list
);
2122 ConfigProxy config
{false};
2123 config
->name
= iparams
.name
;
2124 config
->cluster
= cluster
;
2126 if (!conf_file_list
.empty()) {
2127 config
.parse_config_files(conf_file_list
.c_str(), nullptr, 0);
2129 config
.parse_config_files(nullptr, nullptr, 0);
2131 config
.parse_env(CEPH_ENTITY_TYPE_CLIENT
);
2132 config
.parse_argv(args
);
2133 cfg
->poolname
= config
.get_val
<std::string
>("rbd_default_pool");
2135 std::vector
<const char*>::iterator i
;
2136 std::ostringstream err
;
2137 std::string arg_value
;
2140 for (i
= args
.begin(); i
!= args
.end(); ) {
2141 if (ceph_argparse_flag(args
, i
, "-h", "--help", (char*)NULL
)) {
2143 } else if (ceph_argparse_flag(args
, i
, "-v", "--version", (char*)NULL
)) {
2144 return VERSION_INFO
;
2145 } else if (ceph_argparse_witharg(args
, i
, &cfg
->devpath
, "--device", (char *)NULL
)) {
2146 } else if (ceph_argparse_witharg(args
, i
, &cfg
->io_timeout
, err
,
2147 "--io-timeout", (char *)NULL
)) {
2148 if (!err
.str().empty()) {
2149 *err_msg
<< "rbd-nbd: " << err
.str();
2152 if (cfg
->io_timeout
< 0) {
2153 *err_msg
<< "rbd-nbd: Invalid argument for io-timeout!";
2156 } else if (ceph_argparse_witharg(args
, i
, &cfg
->nbds_max
, err
, "--nbds_max", (char *)NULL
)) {
2157 if (!err
.str().empty()) {
2158 *err_msg
<< "rbd-nbd: " << err
.str();
2161 if (cfg
->nbds_max
< 0) {
2162 *err_msg
<< "rbd-nbd: Invalid argument for nbds_max!";
2165 } else if (ceph_argparse_witharg(args
, i
, &cfg
->max_part
, err
, "--max_part", (char *)NULL
)) {
2166 if (!err
.str().empty()) {
2167 *err_msg
<< "rbd-nbd: " << err
.str();
2170 if ((cfg
->max_part
< 0) || (cfg
->max_part
> 255)) {
2171 *err_msg
<< "rbd-nbd: Invalid argument for max_part(0~255)!";
2174 cfg
->set_max_part
= true;
2175 } else if (ceph_argparse_flag(args
, i
, "--quiesce", (char *)NULL
)) {
2176 cfg
->quiesce
= true;
2177 } else if (ceph_argparse_witharg(args
, i
, &cfg
->quiesce_hook
,
2178 "--quiesce-hook", (char *)NULL
)) {
2179 } else if (ceph_argparse_flag(args
, i
, "--read-only", (char *)NULL
)) {
2180 cfg
->readonly
= true;
2181 } else if (ceph_argparse_witharg(args
, i
, &cfg
->reattach_timeout
, err
,
2182 "--reattach-timeout", (char *)NULL
)) {
2183 if (!err
.str().empty()) {
2184 *err_msg
<< "rbd-nbd: " << err
.str();
2187 if (cfg
->reattach_timeout
< 0) {
2188 *err_msg
<< "rbd-nbd: Invalid argument for reattach-timeout!";
2191 } else if (ceph_argparse_flag(args
, i
, "--exclusive", (char *)NULL
)) {
2192 cfg
->exclusive
= true;
2193 } else if (ceph_argparse_flag(args
, i
, "--notrim", (char *)NULL
)) {
2195 } else if (ceph_argparse_witharg(args
, i
, &cfg
->io_timeout
, err
,
2196 "--timeout", (char *)NULL
)) {
2197 if (!err
.str().empty()) {
2198 *err_msg
<< "rbd-nbd: " << err
.str();
2201 if (cfg
->io_timeout
< 0) {
2202 *err_msg
<< "rbd-nbd: Invalid argument for timeout!";
2205 *err_msg
<< "rbd-nbd: --timeout is deprecated (use --io-timeout)";
2206 } else if (ceph_argparse_witharg(args
, i
, &cfg
->format
, err
, "--format",
2208 } else if (ceph_argparse_flag(args
, i
, "--pretty-format", (char *)NULL
)) {
2209 cfg
->pretty_format
= true;
2210 } else if (ceph_argparse_flag(args
, i
, "--try-netlink", (char *)NULL
)) {
2211 cfg
->try_netlink
= true;
2212 } else if (ceph_argparse_flag(args
, i
, "--show-cookie", (char *)NULL
)) {
2213 cfg
->show_cookie
= true;
2214 } else if (ceph_argparse_witharg(args
, i
, &cfg
->cookie
, "--cookie", (char *)NULL
)) {
2215 } else if (ceph_argparse_witharg(args
, i
, &snapid
, err
,
2216 "--snap-id", (char *)NULL
)) {
2217 if (!err
.str().empty()) {
2218 *err_msg
<< "rbd-nbd: " << err
.str();
2222 *err_msg
<< "rbd-nbd: Invalid argument for snap-id!";
2225 cfg
->snapid
= snapid
;
2226 } else if (ceph_argparse_witharg(args
, i
, &arg_value
,
2227 "--encryption-format", (char *)NULL
)) {
2228 if (arg_value
== "luks1") {
2229 cfg
->encryption_formats
.push_back(RBD_ENCRYPTION_FORMAT_LUKS1
);
2230 } else if (arg_value
== "luks2") {
2231 cfg
->encryption_formats
.push_back(RBD_ENCRYPTION_FORMAT_LUKS2
);
2232 } else if (arg_value
== "luks") {
2233 cfg
->encryption_formats
.push_back(RBD_ENCRYPTION_FORMAT_LUKS
);
2235 *err_msg
<< "rbd-nbd: Invalid encryption format";
2238 } else if (ceph_argparse_witharg(args
, i
, &arg_value
,
2239 "--encryption-passphrase-file",
2241 cfg
->encryption_passphrase_files
.push_back(arg_value
);
2247 if (cfg
->encryption_formats
.empty() &&
2248 !cfg
->encryption_passphrase_files
.empty()) {
2249 cfg
->encryption_formats
.resize(cfg
->encryption_passphrase_files
.size(),
2250 RBD_ENCRYPTION_FORMAT_LUKS
);
2253 if (cfg
->encryption_formats
.size() != cfg
->encryption_passphrase_files
.size()) {
2254 *err_msg
<< "rbd-nbd: Encryption formats count does not match "
2255 << "passphrase files count";
2260 if (args
.begin() != args
.end()) {
2261 if (strcmp(*args
.begin(), "map") == 0) {
2263 } else if (strcmp(*args
.begin(), "unmap") == 0) {
2265 } else if (strcmp(*args
.begin(), "attach") == 0) {
2267 } else if (strcmp(*args
.begin(), "detach") == 0) {
2269 } else if (strcmp(*args
.begin(), "list-mapped") == 0) {
2272 *err_msg
<< "rbd-nbd: unknown command: " << *args
.begin();
2275 args
.erase(args
.begin());
2279 *err_msg
<< "rbd-nbd: must specify command";
2286 if (cfg
->devpath
.empty()) {
2287 *err_msg
<< "rbd-nbd: must specify device to attach";
2290 // Allowing attach without --cookie option for kernel without
2291 // NBD_ATTR_BACKEND_IDENTIFIER support for compatibility
2292 cookie
= get_cookie(cfg
->devpath
);
2293 if (!cookie
.empty()) {
2294 if (cfg
->cookie
.empty()) {
2295 *err_msg
<< "rbd-nbd: must specify cookie to attach";
2297 } else if (cookie
!= cfg
->cookie
) {
2298 *err_msg
<< "rbd-nbd: cookie mismatch";
2301 } else if (!cfg
->cookie
.empty()) {
2302 *err_msg
<< "rbd-nbd: kernel does not have cookie support";
2307 if (args
.begin() == args
.end()) {
2308 *err_msg
<< "rbd-nbd: must specify image-or-snap-spec";
2311 if (parse_imgpath(*args
.begin(), cfg
, err_msg
) < 0) {
2314 args
.erase(args
.begin());
2318 if (args
.begin() == args
.end()) {
2319 *err_msg
<< "rbd-nbd: must specify nbd device or image-or-snap-spec";
2322 if (boost::starts_with(*args
.begin(), "/dev/")) {
2323 cfg
->devpath
= *args
.begin();
2325 if (parse_imgpath(*args
.begin(), cfg
, err_msg
) < 0) {
2329 args
.erase(args
.begin());
2336 if (cfg
->snapid
!= CEPH_NOSNAP
&& !cfg
->snapname
.empty()) {
2337 *err_msg
<< "rbd-nbd: use either snapname or snapid, not both";
2341 if (args
.begin() != args
.end()) {
2342 *err_msg
<< "rbd-nbd: unknown args: " << *args
.begin();
2350 static int rbd_nbd(int argc
, const char *argv
[])
2354 auto args
= argv_to_vec(argc
, argv
);
2355 std::ostringstream err_msg
;
2356 r
= parse_args(args
, &err_msg
, &cfg
);
2357 if (r
== HELP_INFO
) {
2360 } else if (r
== VERSION_INFO
) {
2361 std::cout
<< pretty_version_to_str() << std::endl
;
2364 cerr
<< err_msg
.str() << std::endl
;
2368 if (!err_msg
.str().empty()) {
2369 cerr
<< err_msg
.str() << std::endl
;
2372 switch (cfg
.command
) {
2374 ceph_assert(!cfg
.devpath
.empty());
2375 if (find_mapped_dev_by_spec(&cfg
, getpid())) {
2376 cerr
<< "rbd-nbd: " << cfg
.devpath
<< " has process " << cfg
.pid
2377 << " connected" << std::endl
;
2382 if (cfg
.imgname
.empty()) {
2383 cerr
<< "rbd-nbd: image name was not specified" << std::endl
;
2387 r
= do_map(argc
, argv
, &cfg
, cfg
.command
== Attach
);
2392 if (cfg
.devpath
.empty()) {
2393 if (!find_mapped_dev_by_spec(&cfg
)) {
2394 cerr
<< "rbd-nbd: " << cfg
.image_spec() << " is not mapped"
2398 } else if (!find_proc_by_dev(&cfg
)) {
2399 cerr
<< "rbd-nbd: no process attached to " << cfg
.devpath
<< " found"
2403 r
= do_detach(&cfg
);
2408 if (cfg
.devpath
.empty()) {
2409 if (!find_mapped_dev_by_spec(&cfg
)) {
2410 cerr
<< "rbd-nbd: " << cfg
.image_spec() << " is not mapped"
2414 } else if (!find_proc_by_dev(&cfg
)) {
2415 // still try to send disconnect to the device
2422 r
= do_list_mapped_devices(cfg
.format
, cfg
.pretty_format
);
2434 int main(int argc
, const char *argv
[])
2436 int r
= rbd_nbd(argc
, argv
);
2438 return EXIT_FAILURE
;