1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
5 * rbd-nbd - RBD in userspace
7 * Copyright (C) 2015 - 2016 Kylin Corporation
9 * Author: Yunchuan Wen <yunchuan.wen@kylin-cloud.com>
10 * Li Wang <li.wang@kylin-cloud.com>
12 * This is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License version 2.1, as published by the Free Software
15 * Foundation. See file COPYING.
19 #include "include/int_types.h"
27 #include <sys/types.h>
30 #include <linux/nbd.h>
32 #include <sys/ioctl.h>
33 #include <sys/socket.h>
37 #include <boost/regex.hpp>
39 #include "mon/MonClient.h"
40 #include "common/config.h"
41 #include "common/dout.h"
43 #include "common/errno.h"
44 #include "common/module.h"
45 #include "common/safe_io.h"
46 #include "common/ceph_argparse.h"
47 #include "common/Preforker.h"
48 #include "global/global_init.h"
49 #include "global/signal_handler.h"
51 #include "include/rados/librados.hpp"
52 #include "include/rbd/librbd.hpp"
53 #include "include/stringify.h"
54 #include "include/xlist.h"
56 #define dout_context g_ceph_context
57 #define dout_subsys ceph_subsys_rbd
59 #define dout_prefix *_dout << "rbd-nbd: "
63 std::cout
<< "Usage: rbd-nbd [options] map <image-or-snap-spec> Map an image to nbd device\n"
64 << " unmap <device path> Unmap nbd device\n"
65 << " list-mapped List mapped nbd devices\n"
67 << " --device <device path> Specify nbd device path\n"
68 << " --read-only Map read-only\n"
69 << " --nbds_max <limit> Override for module param nbds_max\n"
70 << " --max_part <limit> Override for module param max_part\n"
71 << " --exclusive Forbid writes by other clients\n"
73 generic_server_usage();
76 static std::string devpath
, poolname("rbd"), imgname
, snapname
;
77 static bool readonly
= false;
78 static int nbds_max
= 0;
79 static int max_part
= 255;
80 static bool set_max_part
= false;
81 static bool exclusive
= false;
84 #define RBD_NBD_BLKSIZE 512UL
86 #ifdef CEPH_BIG_ENDIAN
88 #elif defined(CEPH_LITTLE_ENDIAN)
89 #define ntohll(a) swab(a)
91 #error "Could not determine endianess"
93 #define htonll(a) ntohll(a)
95 static void handle_signal(int signum
)
97 assert(signum
== SIGINT
|| signum
== SIGTERM
);
98 derr
<< "*** Got signal " << sig_str(signum
) << " ***" << dendl
;
99 dout(20) << __func__
<< ": " << "sending NBD_DISCONNECT" << dendl
;
100 if (ioctl(nbd
, NBD_DISCONNECT
) < 0) {
101 derr
<< "rbd-nbd: disconnect failed: " << cpp_strerror(errno
) << dendl
;
103 dout(20) << __func__
<< ": " << "disconnected" << dendl
;
111 librbd::Image
&image
;
114 NBDServer(int _fd
, librbd::Image
& _image
)
117 , lock("NBDServer::Locker")
118 , reader_thread(*this, &NBDServer::reader_entry
)
119 , writer_thread(*this, &NBDServer::writer_entry
)
124 std::atomic
<bool> terminated
= { false };
128 bool expected
= false;
129 if (terminated
.compare_exchange_strong(expected
, true)) {
130 ::shutdown(fd
, SHUT_RDWR
);
132 Mutex::Locker
l(lock
);
139 xlist
<IOContext
*>::item item
;
141 struct nbd_request request
;
142 struct nbd_reply reply
;
151 friend std::ostream
&operator<<(std::ostream
&os
, const IOContext
&ctx
);
155 xlist
<IOContext
*> io_pending
;
156 xlist
<IOContext
*> io_finished
;
158 void io_start(IOContext
*ctx
)
160 Mutex::Locker
l(lock
);
161 io_pending
.push_back(&ctx
->item
);
164 void io_finish(IOContext
*ctx
)
166 Mutex::Locker
l(lock
);
167 assert(ctx
->item
.is_on_list());
168 ctx
->item
.remove_myself();
169 io_finished
.push_back(&ctx
->item
);
173 IOContext
*wait_io_finish()
175 Mutex::Locker
l(lock
);
176 while(io_finished
.empty() && !terminated
)
179 if (io_finished
.empty())
182 IOContext
*ret
= io_finished
.front();
183 io_finished
.pop_front();
190 assert(!reader_thread
.is_started());
191 Mutex::Locker
l(lock
);
192 while(!io_pending
.empty())
195 while(!io_finished
.empty()) {
196 ceph::unique_ptr
<IOContext
> free_ctx(io_finished
.front());
197 io_finished
.pop_front();
201 static void aio_callback(librbd::completion_t cb
, void *arg
)
203 librbd::RBD::AioCompletion
*aio_completion
=
204 reinterpret_cast<librbd::RBD::AioCompletion
*>(cb
);
206 IOContext
*ctx
= reinterpret_cast<IOContext
*>(arg
);
207 int ret
= aio_completion
->get_return_value();
209 dout(20) << __func__
<< ": " << *ctx
<< dendl
;
211 if (ret
== -EINVAL
) {
212 // if shrinking an image, a pagecache writeback might reference
213 // extents outside of the range of the new image extents
214 dout(5) << __func__
<< ": masking IO out-of-bounds error" << dendl
;
220 ctx
->reply
.error
= htonl(-ret
);
221 } else if ((ctx
->command
== NBD_CMD_READ
) &&
222 ret
< static_cast<int>(ctx
->request
.len
)) {
223 int pad_byte_count
= static_cast<int> (ctx
->request
.len
) - ret
;
224 ctx
->data
.append_zero(pad_byte_count
);
225 dout(20) << __func__
<< ": " << *ctx
<< ": Pad byte count: "
226 << pad_byte_count
<< dendl
;
227 ctx
->reply
.error
= 0;
229 ctx
->reply
.error
= htonl(0);
231 ctx
->server
->io_finish(ctx
);
233 aio_completion
->release();
238 while (!terminated
) {
239 ceph::unique_ptr
<IOContext
> ctx(new IOContext());
242 dout(20) << __func__
<< ": waiting for nbd request" << dendl
;
244 int r
= safe_read_exact(fd
, &ctx
->request
, sizeof(struct nbd_request
));
246 derr
<< "failed to read nbd request header: " << cpp_strerror(r
)
251 if (ctx
->request
.magic
!= htonl(NBD_REQUEST_MAGIC
)) {
252 derr
<< "invalid nbd request header" << dendl
;
256 ctx
->request
.from
= ntohll(ctx
->request
.from
);
257 ctx
->request
.type
= ntohl(ctx
->request
.type
);
258 ctx
->request
.len
= ntohl(ctx
->request
.len
);
260 ctx
->reply
.magic
= htonl(NBD_REPLY_MAGIC
);
261 memcpy(ctx
->reply
.handle
, ctx
->request
.handle
, sizeof(ctx
->reply
.handle
));
263 ctx
->command
= ctx
->request
.type
& 0x0000ffff;
265 dout(20) << *ctx
<< ": start" << dendl
;
267 switch (ctx
->command
)
270 // NBD_DO_IT will return when pipe is closed
271 dout(0) << "disconnect request received" << dendl
;
274 bufferptr
ptr(ctx
->request
.len
);
275 r
= safe_read_exact(fd
, ptr
.c_str(), ctx
->request
.len
);
277 derr
<< *ctx
<< ": failed to read nbd request data: "
278 << cpp_strerror(r
) << dendl
;
281 ctx
->data
.push_back(ptr
);
285 IOContext
*pctx
= ctx
.release();
287 librbd::RBD::AioCompletion
*c
= new librbd::RBD::AioCompletion(pctx
, aio_callback
);
288 switch (pctx
->command
)
291 image
.aio_write(pctx
->request
.from
, pctx
->request
.len
, pctx
->data
, c
);
294 image
.aio_read(pctx
->request
.from
, pctx
->request
.len
, pctx
->data
, c
);
300 image
.aio_discard(pctx
->request
.from
, pctx
->request
.len
, c
);
303 derr
<< *pctx
<< ": invalid request command" << dendl
;
308 dout(20) << __func__
<< ": terminated" << dendl
;
313 while (!terminated
) {
314 dout(20) << __func__
<< ": waiting for io request" << dendl
;
315 ceph::unique_ptr
<IOContext
> ctx(wait_io_finish());
317 dout(20) << __func__
<< ": no io requests, terminating" << dendl
;
321 dout(20) << __func__
<< ": got: " << *ctx
<< dendl
;
323 int r
= safe_write(fd
, &ctx
->reply
, sizeof(struct nbd_reply
));
325 derr
<< *ctx
<< ": failed to write reply header: " << cpp_strerror(r
)
329 if (ctx
->command
== NBD_CMD_READ
&& ctx
->reply
.error
== htonl(0)) {
330 r
= ctx
->data
.write_fd(fd
);
332 derr
<< *ctx
<< ": failed to write replay data: " << cpp_strerror(r
)
337 dout(20) << *ctx
<< ": finish" << dendl
;
339 dout(20) << __func__
<< ": terminated" << dendl
;
342 class ThreadHelper
: public Thread
345 typedef void (NBDServer::*entry_func
)();
350 ThreadHelper(NBDServer
&_server
, entry_func _func
)
355 void* entry() override
361 } reader_thread
, writer_thread
;
368 dout(10) << __func__
<< ": starting" << dendl
;
372 reader_thread
.create("rbd_reader");
373 writer_thread
.create("rbd_writer");
380 dout(10) << __func__
<< ": terminating" << dendl
;
384 reader_thread
.join();
385 writer_thread
.join();
399 std::ostream
&operator<<(std::ostream
&os
, const NBDServer::IOContext
&ctx
) {
401 os
<< "[" << std::hex
<< ntohll(*((uint64_t *)ctx
.request
.handle
));
418 os
<< " UNKNOW(" << ctx
.command
<< ") ";
422 os
<< ctx
.request
.from
<< "~" << ctx
.request
.len
<< " "
423 << ntohl(ctx
.reply
.error
) << "]";
428 class NBDWatchCtx
: public librbd::UpdateWatchCtx
432 librados::IoCtx
&io_ctx
;
433 librbd::Image
&image
;
437 librados::IoCtx
&_io_ctx
,
438 librbd::Image
&_image
,
446 ~NBDWatchCtx() override
{}
448 void handle_notify() override
450 librbd::image_info_t info
;
451 if (image
.stat(info
, sizeof(info
)) == 0) {
452 unsigned long new_size
= info
.size
;
454 if (new_size
!= size
) {
455 if (ioctl(fd
, BLKFLSBUF
, NULL
) < 0)
456 derr
<< "invalidate page cache failed: " << cpp_strerror(errno
) << dendl
;
457 if (ioctl(fd
, NBD_SET_SIZE
, new_size
) < 0) {
458 derr
<< "resize failed: " << cpp_strerror(errno
) << dendl
;
462 if (image
.invalidate_cache() < 0)
463 derr
<< "invalidate rbd cache failed" << dendl
;
469 static int open_device(const char* path
, bool try_load_module
= false)
471 int nbd
= open(path
, O_RDWR
);
472 bool loaded_module
= false;
474 if (nbd
< 0 && try_load_module
&& access("/sys/module/nbd", F_OK
) != 0) {
478 param
<< "nbds_max=" << nbds_max
;
481 param
<< " max_part=" << max_part
;
483 r
= module_load("nbd", param
.str().c_str());
485 cerr
<< "rbd-nbd: failed to load nbd kernel module: " << cpp_strerror(-r
) << std::endl
;
488 loaded_module
= true;
490 nbd
= open(path
, O_RDWR
);
493 if ((nbds_max
|| set_max_part
) &&
494 try_load_module
&& !loaded_module
) {
495 cerr
<< "rbd-nbd: ignoring kernel module parameter options: nbd module already loaded"
502 static int check_device_size(int nbd_index
, unsigned long expected_size
)
504 // There are bugs with some older kernel versions that result in an
505 // overflow for large image sizes. This check is to ensure we are
508 unsigned long size
= 0;
509 std::string path
= "/sys/block/nbd" + stringify(nbd_index
) + "/size";
511 ifs
.open(path
.c_str(), std::ifstream::in
);
512 if (!ifs
.is_open()) {
513 cerr
<< "rbd-nbd: failed to open " << path
<< std::endl
;
517 size
*= RBD_NBD_BLKSIZE
;
520 // Newer kernel versions will report real size only after nbd
521 // connect. Assume this is the case and return success.
525 if (size
!= expected_size
) {
526 cerr
<< "rbd-nbd: kernel reported invalid device size (" << size
527 << ", expected " << expected_size
<< ")" << std::endl
;
534 static int do_map(int argc
, const char *argv
[])
538 librados::Rados rados
;
540 librados::IoCtx io_ctx
;
550 librbd::image_info_t info
;
554 vector
<const char*> args
;
555 argv_to_vec(argc
, argv
, args
);
558 auto cct
= global_init(NULL
, args
, CEPH_ENTITY_TYPE_CLIENT
,
559 CODE_ENVIRONMENT_DAEMON
,
560 CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS
);
561 g_ceph_context
->_conf
->set_val_or_die("pid_file", "");
563 if (global_init_prefork(g_ceph_context
) >= 0) {
565 r
= forker
.prefork(err
);
567 cerr
<< err
<< std::endl
;
571 if (forker
.is_parent()) {
572 global_init_postfork_start(g_ceph_context
);
573 if (forker
.parent_wait(err
) != 0) {
580 common_init_finish(g_ceph_context
);
581 global_init_chdir(g_ceph_context
);
583 if (socketpair(AF_UNIX
, SOCK_STREAM
, 0, fd
) == -1) {
588 if (devpath
.empty()) {
590 bool try_load_module
= true;
592 snprintf(dev
, sizeof(dev
), "/dev/nbd%d", index
);
594 nbd
= open_device(dev
, try_load_module
);
595 try_load_module
= false;
598 cerr
<< "rbd-nbd: failed to find unused device" << std::endl
;
602 r
= ioctl(nbd
, NBD_SET_SOCK
, fd
[0]);
613 r
= sscanf(devpath
.c_str(), "/dev/nbd%d", &index
);
615 cerr
<< "rbd-nbd: invalid device path: " << devpath
616 << " (expected /dev/nbd{num})" << std::endl
;
619 nbd
= open_device(devpath
.c_str(), true);
622 cerr
<< "rbd-nbd: failed to open device: " << devpath
<< std::endl
;
626 r
= ioctl(nbd
, NBD_SET_SOCK
, fd
[0]);
629 cerr
<< "rbd-nbd: the device " << devpath
<< " is busy" << std::endl
;
635 flags
= NBD_FLAG_SEND_FLUSH
| NBD_FLAG_SEND_TRIM
| NBD_FLAG_HAS_FLAGS
;
636 if (!snapname
.empty() || readonly
) {
637 flags
|= NBD_FLAG_READ_ONLY
;
641 r
= rados
.init_with_context(g_ceph_context
);
649 r
= rados
.ioctx_create(poolname
.c_str(), io_ctx
);
653 r
= rbd
.open(io_ctx
, image
, imgname
.c_str());
658 r
= image
.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE
);
660 cerr
<< "rbd-nbd: failed to acquire exclusive lock: " << cpp_strerror(r
)
666 if (!snapname
.empty()) {
667 r
= image
.snap_set(snapname
.c_str());
672 r
= image
.stat(info
, sizeof(info
));
676 r
= ioctl(nbd
, NBD_SET_BLKSIZE
, RBD_NBD_BLKSIZE
);
682 if (info
.size
> ULONG_MAX
) {
684 cerr
<< "rbd-nbd: image is too large (" << prettybyte_t(info
.size
)
685 << ", max is " << prettybyte_t(ULONG_MAX
) << ")" << std::endl
;
691 r
= ioctl(nbd
, NBD_SET_SIZE
, size
);
697 r
= check_device_size(index
, size
);
702 ioctl(nbd
, NBD_SET_FLAGS
, flags
);
704 r
= ioctl(nbd
, BLKROSET
, (unsigned long) &read_only
);
713 NBDWatchCtx
watch_ctx(nbd
, io_ctx
, image
, info
.size
);
714 r
= image
.update_watch(&watch_ctx
, &handle
);
718 cout
<< devpath
<< std::endl
;
720 if (g_conf
->daemonize
) {
722 global_init_postfork_start(g_ceph_context
);
723 global_init_postfork_finish(g_ceph_context
);
727 NBDServer
server(fd
[1], image
);
731 init_async_signal_handler();
732 register_async_signal_handler(SIGHUP
, sighup_handler
);
733 register_async_signal_handler_oneshot(SIGINT
, handle_signal
);
734 register_async_signal_handler_oneshot(SIGTERM
, handle_signal
);
736 ioctl(nbd
, NBD_DO_IT
);
738 unregister_async_signal_handler(SIGHUP
, sighup_handler
);
739 unregister_async_signal_handler(SIGINT
, handle_signal
);
740 unregister_async_signal_handler(SIGTERM
, handle_signal
);
741 shutdown_async_signal_handler();
746 r
= image
.update_unwatch(handle
);
752 ioctl(nbd
, NBD_CLEAR_SOCK
);
753 cerr
<< "rbd-nbd: failed to map, status: " << cpp_strerror(-r
) << std::endl
;
764 forker
.exit(r
< 0 ? EXIT_FAILURE
: 0);
769 static int do_unmap()
771 int nbd
= open_device(devpath
.c_str());
773 cerr
<< "rbd-nbd: failed to open device: " << devpath
<< std::endl
;
777 if (ioctl(nbd
, NBD_DISCONNECT
) < 0) {
778 cerr
<< "rbd-nbd: the device is not used" << std::endl
;
786 static int parse_imgpath(const std::string
&imgpath
)
788 boost::regex
pattern("^(?:([^/@]+)/)?([^/@]+)(?:@([^/@]+))?$");
790 if (!boost::regex_match(imgpath
, match
, pattern
)) {
791 std::cerr
<< "rbd-nbd: invalid spec '" << imgpath
<< "'" << std::endl
;
795 if (match
[1].matched
)
800 if (match
[3].matched
)
806 static int do_list_mapped_devices()
812 if (socketpair(AF_UNIX
, SOCK_STREAM
, 0, fd
) == -1) {
814 cerr
<< "rbd-nbd: socketpair failed: " << cpp_strerror(-r
) << std::endl
;
819 snprintf(path
, sizeof(path
), "/dev/nbd%d", m
);
820 int nbd
= open_device(path
);
823 if (ioctl(nbd
, NBD_SET_SOCK
, fd
[0]) != 0)
824 cout
<< path
<< std::endl
;
826 ioctl(nbd
, NBD_CLEAR_SOCK
);
837 static int rbd_nbd(int argc
, const char *argv
[])
847 vector
<const char*> args
;
849 argv_to_vec(argc
, argv
, args
);
850 md_config_t().parse_argv(args
);
852 std::vector
<const char*>::iterator i
;
853 std::ostringstream err
;
855 for (i
= args
.begin(); i
!= args
.end(); ) {
856 if (ceph_argparse_flag(args
, i
, "-h", "--help", (char*)NULL
)) {
859 } else if (ceph_argparse_witharg(args
, i
, &devpath
, "--device", (char *)NULL
)) {
860 } else if (ceph_argparse_witharg(args
, i
, &nbds_max
, err
, "--nbds_max", (char *)NULL
)) {
861 if (!err
.str().empty()) {
862 cerr
<< err
.str() << std::endl
;
866 cerr
<< "rbd-nbd: Invalid argument for nbds_max!" << std::endl
;
869 } else if (ceph_argparse_witharg(args
, i
, &max_part
, err
, "--max_part", (char *)NULL
)) {
870 if (!err
.str().empty()) {
871 cerr
<< err
.str() << std::endl
;
874 if ((max_part
< 0) || (max_part
> 255)) {
875 cerr
<< "rbd-nbd: Invalid argument for max_part(0~255)!" << std::endl
;
879 } else if (ceph_argparse_flag(args
, i
, "--read-only", (char *)NULL
)) {
881 } else if (ceph_argparse_flag(args
, i
, "--exclusive", (char *)NULL
)) {
888 if (args
.begin() != args
.end()) {
889 if (strcmp(*args
.begin(), "map") == 0) {
891 } else if (strcmp(*args
.begin(), "unmap") == 0) {
893 } else if (strcmp(*args
.begin(), "list-mapped") == 0) {
896 cerr
<< "rbd-nbd: unknown command: " << *args
.begin() << std::endl
;
899 args
.erase(args
.begin());
903 cerr
<< "rbd-nbd: must specify command" << std::endl
;
909 if (args
.begin() == args
.end()) {
910 cerr
<< "rbd-nbd: must specify image-or-snap-spec" << std::endl
;
913 if (parse_imgpath(string(*args
.begin())) < 0)
915 args
.erase(args
.begin());
918 if (args
.begin() == args
.end()) {
919 cerr
<< "rbd-nbd: must specify nbd device path" << std::endl
;
922 devpath
= *args
.begin();
923 args
.erase(args
.begin());
930 if (args
.begin() != args
.end()) {
931 cerr
<< "rbd-nbd: unknown args: " << *args
.begin() << std::endl
;
937 if (imgname
.empty()) {
938 cerr
<< "rbd-nbd: image name was not specified" << std::endl
;
942 r
= do_map(argc
, argv
);
952 r
= do_list_mapped_devices();
964 int main(int argc
, const char *argv
[])
966 return rbd_nbd(argc
, argv
);