]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/rbd_nbd/rbd-nbd.cc
update ceph source to reef 18.2.1
[ceph.git] / ceph / src / tools / rbd_nbd / rbd-nbd.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 /*
5 * rbd-nbd - RBD in userspace
6 *
7 * Copyright (C) 2015 - 2016 Kylin Corporation
8 *
9 * Author: Yunchuan Wen <yunchuan.wen@kylin-cloud.com>
10 * Li Wang <li.wang@kylin-cloud.com>
11 *
12 * This is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License version 2.1, as published by the Free Software
15 * Foundation. See file COPYING.
16 *
17 */
18
19 #include "acconfig.h"
20 #include "include/int_types.h"
21 #include "include/scope_guard.h"
22
23 #include <boost/endian/conversion.hpp>
24
25 #include <libgen.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <stddef.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <poll.h>
32 #include <string.h>
33 #include <sys/types.h>
34 #include <unistd.h>
35
36 #include <linux/nbd.h>
37 #include <linux/fs.h>
38 #include <sys/ioctl.h>
39 #include <sys/socket.h>
40 #include <sys/syscall.h>
41
42 #include "nbd-netlink.h"
43 #include <libnl3/netlink/genl/genl.h>
44 #include <libnl3/netlink/genl/ctrl.h>
45 #include <libnl3/netlink/genl/mngt.h>
46
47 #include <filesystem>
48 #include <fstream>
49 #include <iostream>
50 #include <memory>
51 #include <regex>
52 #include <boost/algorithm/string/predicate.hpp>
53 #include <boost/lexical_cast.hpp>
54
55 #include "common/Formatter.h"
56 #include "common/Preforker.h"
57 #include "common/SubProcess.h"
58 #include "common/TextTable.h"
59 #include "common/ceph_argparse.h"
60 #include "common/config.h"
61 #include "common/dout.h"
62 #include "common/errno.h"
63 #include "common/event_socket.h"
64 #include "common/module.h"
65 #include "common/safe_io.h"
66 #include "common/version.h"
67
68 #include "global/global_init.h"
69 #include "global/signal_handler.h"
70
71 #include "include/rados/librados.hpp"
72 #include "include/rbd/librbd.hpp"
73 #include "include/stringify.h"
74 #include "include/xlist.h"
75
76 #include "mon/MonClient.h"
77
78 #define dout_context g_ceph_context
79 #define dout_subsys ceph_subsys_rbd
80 #undef dout_prefix
81 #define dout_prefix *_dout << "rbd-nbd: "
82
83 using namespace std;
84 namespace fs = std::filesystem;
85
86 using boost::endian::big_to_native;
87 using boost::endian::native_to_big;
88
89 enum Command {
90 None,
91 Map,
92 Unmap,
93 Attach,
94 Detach,
95 List
96 };
97
98 struct Config {
99 int nbds_max = 0;
100 int max_part = 255;
101 int io_timeout = -1;
102 int reattach_timeout = 30;
103
104 bool exclusive = false;
105 bool notrim = false;
106 bool quiesce = false;
107 bool readonly = false;
108 bool set_max_part = false;
109 bool try_netlink = false;
110 bool show_cookie = false;
111
112 std::string poolname;
113 std::string nsname;
114 std::string imgname;
115 std::string snapname;
116 std::string devpath;
117 std::string quiesce_hook = CMAKE_INSTALL_LIBEXECDIR "/rbd-nbd/rbd-nbd_quiesce";
118
119 std::string format;
120 bool pretty_format = false;
121
122 std::vector<librbd::encryption_format_t> encryption_formats;
123 std::vector<std::string> encryption_passphrase_files;
124
125 Command command = None;
126 int pid = 0;
127 std::string cookie;
128 uint64_t snapid = CEPH_NOSNAP;
129
130 std::string image_spec() const {
131 std::string spec = poolname + "/";
132
133 if (!nsname.empty()) {
134 spec += nsname + "/";
135 }
136 spec += imgname;
137
138 if (!snapname.empty()) {
139 spec += "@" + snapname;
140 }
141
142 return spec;
143 }
144 };
145
146 static void usage()
147 {
148 std::cout << "Usage: rbd-nbd [options] map <image-or-snap-spec> Map image to nbd device\n"
149 << " detach <device|image-or-snap-spec> Detach image from nbd device\n"
150 << " [options] attach <image-or-snap-spec> Attach image to nbd device\n"
151 << " unmap <device|image-or-snap-spec> Unmap nbd device\n"
152 << " [options] list-mapped List mapped nbd devices\n"
153 << "Map and attach options:\n"
154 << " --device <device path> Specify nbd device path (/dev/nbd{num})\n"
155 << " --encryption-format luks|luks1|luks2\n"
156 << " Image encryption format (default: luks)\n"
157 << " --encryption-passphrase-file Path of file containing passphrase for unlocking image encryption\n"
158 << " --exclusive Forbid writes by other clients\n"
159 << " --notrim Turn off trim/discard\n"
160 << " --io-timeout <sec> Set nbd IO timeout\n"
161 << " --max_part <limit> Override for module param max_part\n"
162 << " --nbds_max <limit> Override for module param nbds_max\n"
163 << " --quiesce Use quiesce callbacks\n"
164 << " --quiesce-hook <path> Specify quiesce hook path\n"
165 << " (default: " << Config().quiesce_hook << ")\n"
166 << " --read-only Map read-only\n"
167 << " --reattach-timeout <sec> Set nbd re-attach timeout\n"
168 << " (default: " << Config().reattach_timeout << ")\n"
169 << " --try-netlink Use the nbd netlink interface\n"
170 << " --show-cookie Show device cookie\n"
171 << " --cookie Specify device cookie\n"
172 << " --snap-id <snap-id> Specify snapshot by ID instead of by name\n"
173 << "\n"
174 << "Unmap and detach options:\n"
175 << " --device <device path> Specify nbd device path (/dev/nbd{num})\n"
176 << " --snap-id <snap-id> Specify snapshot by ID instead of by name\n"
177 << "\n"
178 << "List options:\n"
179 << " --format plain|json|xml Output format (default: plain)\n"
180 << " --pretty-format Pretty formatting (json and xml)\n"
181 << std::endl;
182 generic_server_usage();
183 }
184
185 static int nbd = -1;
186 static int nbd_index = -1;
187 static EventSocket terminate_event_sock;
188
189 #define RBD_NBD_BLKSIZE 512UL
190
191 #define HELP_INFO 1
192 #define VERSION_INFO 2
193
194 static int parse_args(vector<const char*>& args, std::ostream *err_msg,
195 Config *cfg);
196 static int netlink_disconnect(int index);
197 static int netlink_resize(int nbd_index, uint64_t size);
198
199 static int run_quiesce_hook(const std::string &quiesce_hook,
200 const std::string &devpath,
201 const std::string &command);
202
203 static std::string get_cookie(const std::string &devpath);
204
205 class NBDServer
206 {
207 public:
208 uint64_t quiesce_watch_handle = 0;
209
210 private:
211 int fd;
212 librbd::Image &image;
213 Config *cfg;
214
215 public:
216 NBDServer(int fd, librbd::Image& image, Config *cfg)
217 : fd(fd)
218 , image(image)
219 , cfg(cfg)
220 , reader_thread(*this, &NBDServer::reader_entry)
221 , writer_thread(*this, &NBDServer::writer_entry)
222 , quiesce_thread(*this, &NBDServer::quiesce_entry)
223 {
224 std::vector<librbd::config_option_t> options;
225 image.config_list(&options);
226 for (auto &option : options) {
227 if ((option.name == std::string("rbd_cache") ||
228 option.name == std::string("rbd_cache_writethrough_until_flush")) &&
229 option.value == "false") {
230 allow_internal_flush = true;
231 break;
232 }
233 }
234 }
235
236 Config *get_cfg() const {
237 return cfg;
238 }
239
240 private:
241 int terminate_event_fd = -1;
242 ceph::mutex disconnect_lock =
243 ceph::make_mutex("NBDServer::DisconnectLocker");
244 ceph::condition_variable disconnect_cond;
245 std::atomic<bool> terminated = { false };
246 std::atomic<bool> allow_internal_flush = { false };
247
248 struct IOContext
249 {
250 xlist<IOContext*>::item item;
251 NBDServer *server = nullptr;
252 struct nbd_request request;
253 struct nbd_reply reply;
254 bufferlist data;
255 int command = 0;
256
257 IOContext()
258 : item(this)
259 {}
260 };
261
262 friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx);
263
264 ceph::mutex lock = ceph::make_mutex("NBDServer::Locker");
265 ceph::condition_variable cond;
266 xlist<IOContext*> io_pending;
267 xlist<IOContext*> io_finished;
268
269 void io_start(IOContext *ctx)
270 {
271 std::lock_guard l{lock};
272 io_pending.push_back(&ctx->item);
273 }
274
275 void io_finish(IOContext *ctx)
276 {
277 std::lock_guard l{lock};
278 ceph_assert(ctx->item.is_on_list());
279 ctx->item.remove_myself();
280 io_finished.push_back(&ctx->item);
281 cond.notify_all();
282 }
283
284 IOContext *wait_io_finish()
285 {
286 std::unique_lock l{lock};
287 cond.wait(l, [this] {
288 return !io_finished.empty() ||
289 (io_pending.empty() && terminated);
290 });
291
292 if (io_finished.empty())
293 return NULL;
294
295 IOContext *ret = io_finished.front();
296 io_finished.pop_front();
297
298 return ret;
299 }
300
301 void wait_clean()
302 {
303 std::unique_lock l{lock};
304 cond.wait(l, [this] { return io_pending.empty(); });
305
306 while(!io_finished.empty()) {
307 std::unique_ptr<IOContext> free_ctx(io_finished.front());
308 io_finished.pop_front();
309 }
310 }
311
312 void assert_clean()
313 {
314 std::unique_lock l{lock};
315
316 ceph_assert(!reader_thread.is_started());
317 ceph_assert(!writer_thread.is_started());
318 ceph_assert(io_pending.empty());
319 ceph_assert(io_finished.empty());
320 }
321
322 static void aio_callback(librbd::completion_t cb, void *arg)
323 {
324 librbd::RBD::AioCompletion *aio_completion =
325 reinterpret_cast<librbd::RBD::AioCompletion*>(cb);
326
327 IOContext *ctx = reinterpret_cast<IOContext *>(arg);
328 int ret = aio_completion->get_return_value();
329
330 dout(20) << __func__ << ": " << *ctx << dendl;
331
332 if (ret == -EINVAL) {
333 // if shrinking an image, a pagecache writeback might reference
334 // extents outside of the range of the new image extents
335 dout(0) << __func__ << ": masking IO out-of-bounds error" << dendl;
336 ctx->data.clear();
337 ret = 0;
338 }
339
340 if (ret < 0) {
341 ctx->reply.error = native_to_big<uint32_t>(-ret);
342 } else if ((ctx->command == NBD_CMD_READ) &&
343 ret < static_cast<int>(ctx->request.len)) {
344 int pad_byte_count = static_cast<int> (ctx->request.len) - ret;
345 ctx->data.append_zero(pad_byte_count);
346 dout(20) << __func__ << ": " << *ctx << ": Pad byte count: "
347 << pad_byte_count << dendl;
348 ctx->reply.error = native_to_big<uint32_t>(0);
349 } else {
350 ctx->reply.error = native_to_big<uint32_t>(0);
351 }
352 ctx->server->io_finish(ctx);
353
354 aio_completion->release();
355 }
356
357 void reader_entry()
358 {
359 struct pollfd poll_fds[2];
360 memset(poll_fds, 0, sizeof(struct pollfd) * 2);
361 poll_fds[0].fd = fd;
362 poll_fds[0].events = POLLIN;
363 poll_fds[1].fd = terminate_event_fd;
364 poll_fds[1].events = POLLIN;
365
366 while (true) {
367 std::unique_ptr<IOContext> ctx(new IOContext());
368 ctx->server = this;
369
370 dout(20) << __func__ << ": waiting for nbd request" << dendl;
371
372 int r = poll(poll_fds, 2, -1);
373 if (r == -1) {
374 if (errno == EINTR) {
375 continue;
376 }
377 r = -errno;
378 derr << "failed to poll nbd: " << cpp_strerror(r) << dendl;
379 goto error;
380 }
381
382 if ((poll_fds[1].revents & POLLIN) != 0) {
383 dout(0) << __func__ << ": terminate received" << dendl;
384 goto signal;
385 }
386
387 if ((poll_fds[0].revents & POLLIN) == 0) {
388 dout(20) << __func__ << ": nothing to read" << dendl;
389 continue;
390 }
391
392 r = safe_read_exact(fd, &ctx->request, sizeof(struct nbd_request));
393 if (r < 0) {
394 derr << "failed to read nbd request header: " << cpp_strerror(r)
395 << dendl;
396 goto error;
397 }
398
399 if (ctx->request.magic != htonl(NBD_REQUEST_MAGIC)) {
400 derr << "invalid nbd request header" << dendl;
401 goto signal;
402 }
403
404 ctx->request.from = big_to_native(ctx->request.from);
405 ctx->request.type = big_to_native(ctx->request.type);
406 ctx->request.len = big_to_native(ctx->request.len);
407
408 ctx->reply.magic = native_to_big<uint32_t>(NBD_REPLY_MAGIC);
409 memcpy(ctx->reply.handle, ctx->request.handle, sizeof(ctx->reply.handle));
410
411 ctx->command = ctx->request.type & 0x0000ffff;
412
413 dout(20) << *ctx << ": start" << dendl;
414
415 switch (ctx->command)
416 {
417 case NBD_CMD_DISC:
418 // NBD_DO_IT will return when pipe is closed
419 dout(0) << "disconnect request received" << dendl;
420 goto signal;
421 case NBD_CMD_WRITE:
422 bufferptr ptr(ctx->request.len);
423 r = safe_read_exact(fd, ptr.c_str(), ctx->request.len);
424 if (r < 0) {
425 derr << *ctx << ": failed to read nbd request data: "
426 << cpp_strerror(r) << dendl;
427 goto error;
428 }
429 ctx->data.push_back(ptr);
430 break;
431 }
432
433 IOContext *pctx = ctx.release();
434 io_start(pctx);
435 librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(pctx, aio_callback);
436 switch (pctx->command)
437 {
438 case NBD_CMD_WRITE:
439 image.aio_write(pctx->request.from, pctx->request.len, pctx->data, c);
440 break;
441 case NBD_CMD_READ:
442 image.aio_read(pctx->request.from, pctx->request.len, pctx->data, c);
443 break;
444 case NBD_CMD_FLUSH:
445 image.aio_flush(c);
446 allow_internal_flush = true;
447 break;
448 case NBD_CMD_TRIM:
449 image.aio_discard(pctx->request.from, pctx->request.len, c);
450 break;
451 default:
452 derr << *pctx << ": invalid request command" << dendl;
453 c->release();
454 goto signal;
455 }
456 }
457 error:
458 {
459 int r = netlink_disconnect(nbd_index);
460 if (r == 1) {
461 ioctl(nbd, NBD_DISCONNECT);
462 }
463 }
464 signal:
465 std::lock_guard l{lock};
466 terminated = true;
467 cond.notify_all();
468
469 std::lock_guard disconnect_l{disconnect_lock};
470 disconnect_cond.notify_all();
471
472 dout(20) << __func__ << ": terminated" << dendl;
473 }
474
475 void writer_entry()
476 {
477 while (true) {
478 dout(20) << __func__ << ": waiting for io request" << dendl;
479 std::unique_ptr<IOContext> ctx(wait_io_finish());
480 if (!ctx) {
481 dout(20) << __func__ << ": no io requests, terminating" << dendl;
482 goto done;
483 }
484
485 dout(20) << __func__ << ": got: " << *ctx << dendl;
486
487 int r = safe_write(fd, &ctx->reply, sizeof(struct nbd_reply));
488 if (r < 0) {
489 derr << *ctx << ": failed to write reply header: " << cpp_strerror(r)
490 << dendl;
491 goto error;
492 }
493 if (ctx->command == NBD_CMD_READ && ctx->reply.error == htonl(0)) {
494 r = ctx->data.write_fd(fd);
495 if (r < 0) {
496 derr << *ctx << ": failed to write replay data: " << cpp_strerror(r)
497 << dendl;
498 goto error;
499 }
500 }
501 dout(20) << *ctx << ": finish" << dendl;
502 }
503 error:
504 wait_clean();
505 done:
506 ::shutdown(fd, SHUT_RDWR);
507
508 dout(20) << __func__ << ": terminated" << dendl;
509 }
510
511 bool wait_quiesce() {
512 dout(20) << __func__ << dendl;
513
514 std::unique_lock locker{lock};
515 cond.wait(locker, [this] { return quiesce || terminated; });
516
517 if (terminated) {
518 return false;
519 }
520
521 dout(20) << __func__ << ": got quiesce request" << dendl;
522 return true;
523 }
524
525 void wait_unquiesce(std::unique_lock<ceph::mutex> &locker) {
526 dout(20) << __func__ << dendl;
527
528 cond.wait(locker, [this] { return !quiesce || terminated; });
529
530 dout(20) << __func__ << ": got unquiesce request" << dendl;
531 }
532
533 void wait_inflight_io() {
534 if (!allow_internal_flush) {
535 return;
536 }
537
538 uint64_t features = 0;
539 image.features(&features);
540 if ((features & RBD_FEATURE_EXCLUSIVE_LOCK) != 0) {
541 bool is_owner = false;
542 image.is_exclusive_lock_owner(&is_owner);
543 if (!is_owner) {
544 return;
545 }
546 }
547
548 dout(20) << __func__ << dendl;
549
550 int r = image.flush();
551 if (r < 0) {
552 derr << "flush failed: " << cpp_strerror(r) << dendl;
553 }
554 }
555
556 void quiesce_entry()
557 {
558 ceph_assert(cfg->quiesce);
559
560 while (wait_quiesce()) {
561
562 int r = run_quiesce_hook(cfg->quiesce_hook, cfg->devpath, "quiesce");
563
564 wait_inflight_io();
565
566 {
567 std::unique_lock locker{lock};
568 ceph_assert(quiesce == true);
569
570 image.quiesce_complete(quiesce_watch_handle, r);
571
572 if (r < 0) {
573 quiesce = false;
574 continue;
575 }
576
577 wait_unquiesce(locker);
578 }
579
580 run_quiesce_hook(cfg->quiesce_hook, cfg->devpath, "unquiesce");
581 }
582
583 dout(20) << __func__ << ": terminated" << dendl;
584 }
585
586 class ThreadHelper : public Thread
587 {
588 public:
589 typedef void (NBDServer::*entry_func)();
590 private:
591 NBDServer &server;
592 entry_func func;
593 public:
594 ThreadHelper(NBDServer &_server, entry_func _func)
595 :server(_server)
596 ,func(_func)
597 {}
598 protected:
599 void* entry() override
600 {
601 (server.*func)();
602 return NULL;
603 }
604 } reader_thread, writer_thread, quiesce_thread;
605
606 bool started = false;
607 bool quiesce = false;
608
609 public:
610 void start()
611 {
612 if (!started) {
613 dout(10) << __func__ << ": starting" << dendl;
614
615 started = true;
616
617 terminate_event_fd = eventfd(0, EFD_NONBLOCK);
618 ceph_assert(terminate_event_fd > 0);
619 int r = terminate_event_sock.init(terminate_event_fd,
620 EVENT_SOCKET_TYPE_EVENTFD);
621 ceph_assert(r >= 0);
622
623 reader_thread.create("rbd_reader");
624 writer_thread.create("rbd_writer");
625 if (cfg->quiesce) {
626 quiesce_thread.create("rbd_quiesce");
627 }
628 }
629 }
630
631 void wait_for_disconnect()
632 {
633 if (!started)
634 return;
635
636 std::unique_lock l{disconnect_lock};
637 disconnect_cond.wait(l);
638 }
639
640 void notify_quiesce() {
641 dout(10) << __func__ << dendl;
642
643 ceph_assert(cfg->quiesce);
644
645 std::unique_lock locker{lock};
646 ceph_assert(quiesce == false);
647 quiesce = true;
648 cond.notify_all();
649 }
650
651 void notify_unquiesce() {
652 dout(10) << __func__ << dendl;
653
654 ceph_assert(cfg->quiesce);
655
656 std::unique_lock locker{lock};
657 ceph_assert(quiesce == true);
658 quiesce = false;
659 cond.notify_all();
660 }
661
662 ~NBDServer()
663 {
664 if (started) {
665 dout(10) << __func__ << ": terminating" << dendl;
666
667 terminate_event_sock.notify();
668
669 reader_thread.join();
670 writer_thread.join();
671 if (cfg->quiesce) {
672 quiesce_thread.join();
673 }
674
675 assert_clean();
676
677 close(terminate_event_fd);
678 started = false;
679 }
680 }
681 };
682
683 std::ostream &operator<<(std::ostream &os, const NBDServer::IOContext &ctx) {
684
685 os << "[" << std::hex << big_to_native(*((uint64_t *)ctx.request.handle));
686
687 switch (ctx.command)
688 {
689 case NBD_CMD_WRITE:
690 os << " WRITE ";
691 break;
692 case NBD_CMD_READ:
693 os << " READ ";
694 break;
695 case NBD_CMD_FLUSH:
696 os << " FLUSH ";
697 break;
698 case NBD_CMD_TRIM:
699 os << " TRIM ";
700 break;
701 case NBD_CMD_DISC:
702 os << " DISC ";
703 break;
704 default:
705 os << " UNKNOWN(" << ctx.command << ") ";
706 break;
707 }
708
709 os << ctx.request.from << "~" << ctx.request.len << " "
710 << std::dec << big_to_native(ctx.reply.error) << "]";
711
712 return os;
713 }
714
715 class NBDQuiesceWatchCtx : public librbd::QuiesceWatchCtx
716 {
717 public:
718 NBDQuiesceWatchCtx(NBDServer *server) : server(server) {
719 }
720
721 void handle_quiesce() override {
722 server->notify_quiesce();
723 }
724
725 void handle_unquiesce() override {
726 server->notify_unquiesce();
727 }
728
729 private:
730 NBDServer *server;
731 };
732
733 class NBDWatchCtx : public librbd::UpdateWatchCtx
734 {
735 private:
736 int fd;
737 int nbd_index;
738 bool use_netlink;
739 librados::IoCtx &io_ctx;
740 librbd::Image &image;
741 uint64_t size;
742 std::thread handle_notify_thread;
743 ceph::condition_variable cond;
744 ceph::mutex lock = ceph::make_mutex("NBDWatchCtx::Locker");
745 bool notify = false;
746 bool terminated = false;
747
748 bool wait_notify() {
749 dout(10) << __func__ << dendl;
750
751 std::unique_lock locker{lock};
752 cond.wait(locker, [this] { return notify || terminated; });
753
754 if (terminated) {
755 return false;
756 }
757
758 dout(10) << __func__ << ": got notify request" << dendl;
759 notify = false;
760 return true;
761 }
762
763 void handle_notify_entry() {
764 dout(10) << __func__ << dendl;
765
766 while (wait_notify()) {
767 uint64_t new_size;
768 int ret = image.size(&new_size);
769 if (ret < 0) {
770 derr << "getting image size failed: " << cpp_strerror(ret) << dendl;
771 continue;
772 }
773 if (new_size == size) {
774 continue;
775 }
776 dout(5) << "resize detected" << dendl;
777 if (ioctl(fd, BLKFLSBUF, NULL) < 0) {
778 derr << "invalidate page cache failed: " << cpp_strerror(errno)
779 << dendl;
780 }
781 if (use_netlink) {
782 ret = netlink_resize(nbd_index, new_size);
783 } else {
784 ret = ioctl(fd, NBD_SET_SIZE, new_size);
785 if (ret < 0) {
786 derr << "resize failed: " << cpp_strerror(errno) << dendl;
787 }
788 }
789 if (!ret) {
790 size = new_size;
791 }
792 if (ioctl(fd, BLKRRPART, NULL) < 0) {
793 derr << "rescan of partition table failed: " << cpp_strerror(errno)
794 << dendl;
795 }
796 if (image.invalidate_cache() < 0) {
797 derr << "invalidate rbd cache failed" << dendl;
798 }
799 }
800 }
801
802 public:
803 NBDWatchCtx(int _fd,
804 int _nbd_index,
805 bool _use_netlink,
806 librados::IoCtx &_io_ctx,
807 librbd::Image &_image,
808 unsigned long _size)
809 : fd(_fd)
810 , nbd_index(_nbd_index)
811 , use_netlink(_use_netlink)
812 , io_ctx(_io_ctx)
813 , image(_image)
814 , size(_size)
815 {
816 handle_notify_thread = make_named_thread("rbd_handle_notify",
817 &NBDWatchCtx::handle_notify_entry,
818 this);
819 }
820
821 ~NBDWatchCtx() override
822 {
823 dout(10) << __func__ << ": terminating" << dendl;
824 std::unique_lock locker{lock};
825 terminated = true;
826 cond.notify_all();
827 locker.unlock();
828
829 handle_notify_thread.join();
830 dout(10) << __func__ << ": finish" << dendl;
831 }
832
833 void handle_notify() override
834 {
835 dout(10) << __func__ << dendl;
836
837 std::unique_lock locker{lock};
838 notify = true;
839 cond.notify_all();
840 }
841 };
842
843 class NBDListIterator {
844 public:
845 bool get(Config *cfg) {
846 while (true) {
847 std::string nbd_path = "/sys/block/nbd" + stringify(m_index);
848 if(access(nbd_path.c_str(), F_OK) != 0) {
849 return false;
850 }
851
852 *cfg = Config();
853 cfg->devpath = "/dev/nbd" + stringify(m_index++);
854
855 int pid;
856 std::ifstream ifs;
857 ifs.open(nbd_path + "/pid", std::ifstream::in);
858 if (!ifs.is_open()) {
859 continue;
860 }
861 ifs >> pid;
862 ifs.close();
863
864 // If the rbd-nbd is re-attached the pid may store garbage
865 // here. We are sure this is the case when it is negative or
866 // zero. Then we just try to find the attached process scanning
867 // /proc fs. If it is positive we check the process with this
868 // pid first and if it is not rbd-nbd fallback to searching the
869 // attached process.
870 do {
871 if (pid <= 0) {
872 pid = find_attached(cfg->devpath);
873 if (pid <= 0) {
874 break;
875 }
876 }
877
878 if (get_mapped_info(pid, cfg) >= 0) {
879 return true;
880 }
881 pid = -1;
882 } while (true);
883 }
884 }
885
886 private:
887 int m_index = 0;
888 std::map<int, Config> m_mapped_info_cache;
889
890 int get_mapped_info(int pid, Config *cfg) {
891 ceph_assert(!cfg->devpath.empty());
892
893 auto it = m_mapped_info_cache.find(pid);
894 if (it != m_mapped_info_cache.end()) {
895 if (it->second.devpath != cfg->devpath) {
896 return -EINVAL;
897 }
898 *cfg = it->second;
899 return 0;
900 }
901
902 m_mapped_info_cache[pid] = {};
903
904 int r;
905 std::string path = "/proc/" + stringify(pid) + "/comm";
906 std::ifstream ifs;
907 std::string comm;
908 ifs.open(path.c_str(), std::ifstream::in);
909 if (!ifs.is_open())
910 return -1;
911 ifs >> comm;
912 if (comm != "rbd-nbd") {
913 return -EINVAL;
914 }
915 ifs.close();
916
917 path = "/proc/" + stringify(pid) + "/cmdline";
918 std::string cmdline;
919 std::vector<const char*> args;
920
921 ifs.open(path.c_str(), std::ifstream::in);
922 if (!ifs.is_open())
923 return -1;
924 ifs >> cmdline;
925
926 if (cmdline.empty()) {
927 return -EINVAL;
928 }
929
930 for (unsigned i = 0; i < cmdline.size(); i++) {
931 char *arg = &cmdline[i];
932 if (i == 0) {
933 if (strcmp(basename(arg) , "rbd-nbd") != 0) {
934 return -EINVAL;
935 }
936 } else {
937 args.push_back(arg);
938 }
939
940 while (cmdline[i] != '\0') {
941 i++;
942 }
943 }
944
945 std::ostringstream err_msg;
946 Config c;
947 r = parse_args(args, &err_msg, &c);
948 if (r < 0) {
949 return r;
950 }
951
952 if (c.command != Map && c.command != Attach) {
953 return -ENOENT;
954 }
955
956 c.pid = pid;
957 m_mapped_info_cache.erase(pid);
958 if (!c.devpath.empty()) {
959 m_mapped_info_cache[pid] = c;
960 if (c.devpath != cfg->devpath) {
961 return -ENOENT;
962 }
963 } else {
964 c.devpath = cfg->devpath;
965 }
966
967 c.cookie = get_cookie(cfg->devpath);
968 *cfg = c;
969 return 0;
970 }
971
972 int find_attached(const std::string &devpath) {
973 for (auto &entry : fs::directory_iterator("/proc")) {
974 if (!fs::is_directory(entry.status())) {
975 continue;
976 }
977
978 int pid;
979 try {
980 pid = boost::lexical_cast<uint64_t>(entry.path().filename().c_str());
981 } catch (boost::bad_lexical_cast&) {
982 continue;
983 }
984
985 Config cfg;
986 cfg.devpath = devpath;
987 if (get_mapped_info(pid, &cfg) >=0 && cfg.command == Attach) {
988 return cfg.pid;
989 }
990 }
991
992 return -1;
993 }
994 };
995
996 struct EncryptionOptions {
997 std::vector<librbd::encryption_spec_t> specs;
998
999 ~EncryptionOptions() {
1000 for (auto& spec : specs) {
1001 switch (spec.format) {
1002 case RBD_ENCRYPTION_FORMAT_LUKS: {
1003 auto opts =
1004 static_cast<librbd::encryption_luks_format_options_t*>(spec.opts);
1005 ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(),
1006 opts->passphrase.size());
1007 delete opts;
1008 break;
1009 }
1010 case RBD_ENCRYPTION_FORMAT_LUKS1: {
1011 auto opts =
1012 static_cast<librbd::encryption_luks1_format_options_t*>(spec.opts);
1013 ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(),
1014 opts->passphrase.size());
1015 delete opts;
1016 break;
1017 }
1018 case RBD_ENCRYPTION_FORMAT_LUKS2: {
1019 auto opts =
1020 static_cast<librbd::encryption_luks2_format_options_t*>(spec.opts);
1021 ceph_memzero_s(opts->passphrase.data(), opts->passphrase.size(),
1022 opts->passphrase.size());
1023 delete opts;
1024 break;
1025 }
1026 default:
1027 ceph_abort();
1028 }
1029 }
1030 }
1031 };
1032
1033 static std::string get_cookie(const std::string &devpath)
1034 {
1035 std::string cookie;
1036 std::ifstream ifs;
1037 std::string path = "/sys/block/" + devpath.substr(sizeof("/dev/") - 1) + "/backend";
1038
1039 ifs.open(path, std::ifstream::in);
1040 if (ifs.is_open()) {
1041 std::getline(ifs, cookie);
1042 ifs.close();
1043 }
1044 return cookie;
1045 }
1046
1047 static int load_module(Config *cfg)
1048 {
1049 ostringstream param;
1050 int ret;
1051
1052 if (cfg->nbds_max)
1053 param << "nbds_max=" << cfg->nbds_max;
1054
1055 if (cfg->max_part)
1056 param << " max_part=" << cfg->max_part;
1057
1058 if (!access("/sys/module/nbd", F_OK)) {
1059 if (cfg->nbds_max || cfg->set_max_part)
1060 cerr << "rbd-nbd: ignoring kernel module parameter options: nbd module already loaded"
1061 << std::endl;
1062 return 0;
1063 }
1064
1065 ret = module_load("nbd", param.str().c_str());
1066 if (ret < 0)
1067 cerr << "rbd-nbd: failed to load nbd kernel module: " << cpp_strerror(-ret)
1068 << std::endl;
1069
1070 return ret;
1071 }
1072
1073 static int check_device_size(int nbd_index, unsigned long expected_size)
1074 {
1075 // There are bugs with some older kernel versions that result in an
1076 // overflow for large image sizes. This check is to ensure we are
1077 // not affected.
1078
1079 unsigned long size = 0;
1080 std::string path = "/sys/block/nbd" + stringify(nbd_index) + "/size";
1081 std::ifstream ifs;
1082 ifs.open(path.c_str(), std::ifstream::in);
1083 if (!ifs.is_open()) {
1084 cerr << "rbd-nbd: failed to open " << path << std::endl;
1085 return -EINVAL;
1086 }
1087 ifs >> size;
1088 size *= RBD_NBD_BLKSIZE;
1089
1090 if (size == 0) {
1091 // Newer kernel versions will report real size only after nbd
1092 // connect. Assume this is the case and return success.
1093 return 0;
1094 }
1095
1096 if (size != expected_size) {
1097 cerr << "rbd-nbd: kernel reported invalid device size (" << size
1098 << ", expected " << expected_size << ")" << std::endl;
1099 return -EINVAL;
1100 }
1101
1102 return 0;
1103 }
1104
1105 static int parse_nbd_index(const std::string& devpath)
1106 {
1107 int index, ret;
1108
1109 ret = sscanf(devpath.c_str(), "/dev/nbd%d", &index);
1110 if (ret <= 0) {
1111 // mean an early matching failure. But some cases need a negative value.
1112 if (ret == 0)
1113 ret = -EINVAL;
1114 cerr << "rbd-nbd: invalid device path: " << devpath
1115 << " (expected /dev/nbd{num})" << std::endl;
1116 return ret;
1117 }
1118
1119 return index;
1120 }
1121
1122 static int try_ioctl_setup(Config *cfg, int fd, uint64_t size,
1123 uint64_t blksize, uint64_t flags)
1124 {
1125 int index = 0, r;
1126
1127 if (cfg->devpath.empty()) {
1128 char dev[64];
1129 const char *path = "/sys/module/nbd/parameters/nbds_max";
1130 int nbds_max = -1;
1131 if (access(path, F_OK) == 0) {
1132 std::ifstream ifs;
1133 ifs.open(path, std::ifstream::in);
1134 if (ifs.is_open()) {
1135 ifs >> nbds_max;
1136 ifs.close();
1137 }
1138 }
1139
1140 while (true) {
1141 snprintf(dev, sizeof(dev), "/dev/nbd%d", index);
1142
1143 nbd = open(dev, O_RDWR);
1144 if (nbd < 0) {
1145 if (nbd == -EPERM && nbds_max != -1 && index < (nbds_max-1)) {
1146 ++index;
1147 continue;
1148 }
1149 r = nbd;
1150 cerr << "rbd-nbd: failed to find unused device" << std::endl;
1151 goto done;
1152 }
1153
1154 r = ioctl(nbd, NBD_SET_SOCK, fd);
1155 if (r < 0) {
1156 close(nbd);
1157 ++index;
1158 continue;
1159 }
1160
1161 cfg->devpath = dev;
1162 break;
1163 }
1164 } else {
1165 r = parse_nbd_index(cfg->devpath);
1166 if (r < 0)
1167 goto done;
1168 index = r;
1169
1170 nbd = open(cfg->devpath.c_str(), O_RDWR);
1171 if (nbd < 0) {
1172 r = nbd;
1173 cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl;
1174 goto done;
1175 }
1176
1177 r = ioctl(nbd, NBD_SET_SOCK, fd);
1178 if (r < 0) {
1179 r = -errno;
1180 cerr << "rbd-nbd: the device " << cfg->devpath << " is busy" << std::endl;
1181 close(nbd);
1182 goto done;
1183 }
1184 }
1185
1186 r = ioctl(nbd, NBD_SET_BLKSIZE, blksize);
1187 if (r < 0) {
1188 r = -errno;
1189 cerr << "rbd-nbd: NBD_SET_BLKSIZE failed" << std::endl;
1190 goto close_nbd;
1191 }
1192
1193 r = ioctl(nbd, NBD_SET_SIZE, size);
1194 if (r < 0) {
1195 cerr << "rbd-nbd: NBD_SET_SIZE failed" << std::endl;
1196 r = -errno;
1197 goto close_nbd;
1198 }
1199
1200 ioctl(nbd, NBD_SET_FLAGS, flags);
1201
1202 if (cfg->io_timeout >= 0) {
1203 r = ioctl(nbd, NBD_SET_TIMEOUT, (unsigned long)cfg->io_timeout);
1204 if (r < 0) {
1205 r = -errno;
1206 cerr << "rbd-nbd: failed to set IO timeout: " << cpp_strerror(r)
1207 << std::endl;
1208 goto close_nbd;
1209 }
1210 }
1211
1212 dout(10) << "ioctl setup complete for " << cfg->devpath << dendl;
1213 nbd_index = index;
1214 return 0;
1215
1216 close_nbd:
1217 if (r < 0) {
1218 ioctl(nbd, NBD_CLEAR_SOCK);
1219 cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r) << std::endl;
1220 }
1221 close(nbd);
1222 done:
1223 return r;
1224 }
1225
1226 static void netlink_cleanup(struct nl_sock *sock)
1227 {
1228 if (!sock)
1229 return;
1230
1231 nl_close(sock);
1232 nl_socket_free(sock);
1233 }
1234
1235 static struct nl_sock *netlink_init(int *id)
1236 {
1237 struct nl_sock *sock;
1238 int ret;
1239
1240 sock = nl_socket_alloc();
1241 if (!sock) {
1242 cerr << "rbd-nbd: Could not allocate netlink socket." << std::endl;
1243 return NULL;
1244 }
1245
1246 ret = genl_connect(sock);
1247 if (ret < 0) {
1248 cerr << "rbd-nbd: Could not connect netlink socket. Error " << ret
1249 << std::endl;
1250 goto free_sock;
1251 }
1252
1253 *id = genl_ctrl_resolve(sock, "nbd");
1254 if (*id < 0)
1255 // nbd netlink interface not supported.
1256 goto close_sock;
1257
1258 return sock;
1259
1260 close_sock:
1261 nl_close(sock);
1262 free_sock:
1263 nl_socket_free(sock);
1264 return NULL;
1265 }
1266
1267 static int netlink_disconnect(int index)
1268 {
1269 struct nl_sock *sock;
1270 struct nl_msg *msg;
1271 int ret, nl_id;
1272
1273 sock = netlink_init(&nl_id);
1274 if (!sock)
1275 // Try ioctl
1276 return 1;
1277
1278 nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL);
1279
1280 msg = nlmsg_alloc();
1281 if (!msg) {
1282 cerr << "rbd-nbd: Could not allocate netlink message." << std::endl;
1283 goto free_sock;
1284 }
1285
1286 if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0,
1287 NBD_CMD_DISCONNECT, 0)) {
1288 cerr << "rbd-nbd: Could not setup message." << std::endl;
1289 goto nla_put_failure;
1290 }
1291
1292 NLA_PUT_U32(msg, NBD_ATTR_INDEX, index);
1293
1294 ret = nl_send_sync(sock, msg);
1295 netlink_cleanup(sock);
1296 if (ret < 0) {
1297 cerr << "rbd-nbd: netlink disconnect failed: " << nl_geterror(-ret)
1298 << std::endl;
1299 return -EIO;
1300 }
1301
1302 return 0;
1303
1304 nla_put_failure:
1305 nlmsg_free(msg);
1306 free_sock:
1307 netlink_cleanup(sock);
1308 return -EIO;
1309 }
1310
1311 static int netlink_disconnect_by_path(const std::string& devpath)
1312 {
1313 int index;
1314
1315 index = parse_nbd_index(devpath);
1316 if (index < 0)
1317 return index;
1318
1319 return netlink_disconnect(index);
1320 }
1321
1322 static int netlink_resize(int nbd_index, uint64_t size)
1323 {
1324 struct nl_sock *sock;
1325 struct nl_msg *msg;
1326 int nl_id, ret;
1327
1328 sock = netlink_init(&nl_id);
1329 if (!sock) {
1330 cerr << "rbd-nbd: Netlink interface not supported." << std::endl;
1331 return 1;
1332 }
1333
1334 nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL);
1335
1336 msg = nlmsg_alloc();
1337 if (!msg) {
1338 cerr << "rbd-nbd: Could not allocate netlink message." << std::endl;
1339 goto free_sock;
1340 }
1341
1342 if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0,
1343 NBD_CMD_RECONFIGURE, 0)) {
1344 cerr << "rbd-nbd: Could not setup message." << std::endl;
1345 goto free_msg;
1346 }
1347
1348 NLA_PUT_U32(msg, NBD_ATTR_INDEX, nbd_index);
1349 NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size);
1350
1351 ret = nl_send_sync(sock, msg);
1352 if (ret < 0) {
1353 cerr << "rbd-nbd: netlink resize failed: " << nl_geterror(ret) << std::endl;
1354 goto free_sock;
1355 }
1356
1357 netlink_cleanup(sock);
1358 dout(10) << "netlink resize complete for nbd" << nbd_index << dendl;
1359 return 0;
1360
1361 nla_put_failure:
1362 free_msg:
1363 nlmsg_free(msg);
1364 free_sock:
1365 netlink_cleanup(sock);
1366 return -EIO;
1367 }
1368
1369 static int netlink_connect_cb(struct nl_msg *msg, void *arg)
1370 {
1371 struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlmsg_hdr(msg));
1372 Config *cfg = (Config *)arg;
1373 struct nlattr *msg_attr[NBD_ATTR_MAX + 1];
1374 uint32_t index;
1375 int ret;
1376
1377 ret = nla_parse(msg_attr, NBD_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
1378 genlmsg_attrlen(gnlh, 0), NULL);
1379 if (ret) {
1380 cerr << "rbd-nbd: Unsupported netlink reply" << std::endl;
1381 return -NLE_MSGTYPE_NOSUPPORT;
1382 }
1383
1384 if (!msg_attr[NBD_ATTR_INDEX]) {
1385 cerr << "rbd-nbd: netlink connect reply missing device index." << std::endl;
1386 return -NLE_MSGTYPE_NOSUPPORT;
1387 }
1388
1389 index = nla_get_u32(msg_attr[NBD_ATTR_INDEX]);
1390 cfg->devpath = "/dev/nbd" + stringify(index);
1391 nbd_index = index;
1392
1393 return NL_OK;
1394 }
1395
1396 static int netlink_connect(Config *cfg, struct nl_sock *sock, int nl_id, int fd,
1397 uint64_t size, uint64_t flags, bool reconnect)
1398 {
1399 struct nlattr *sock_attr;
1400 struct nlattr *sock_opt;
1401 struct nl_msg *msg;
1402 int ret;
1403
1404 if (reconnect) {
1405 dout(10) << "netlink try reconnect for " << cfg->devpath << dendl;
1406
1407 nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL);
1408 } else {
1409 nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, netlink_connect_cb,
1410 cfg);
1411 }
1412
1413 msg = nlmsg_alloc();
1414 if (!msg) {
1415 cerr << "rbd-nbd: Could not allocate netlink message." << std::endl;
1416 return -ENOMEM;
1417 }
1418
1419 if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0,
1420 reconnect ? NBD_CMD_RECONFIGURE : NBD_CMD_CONNECT, 0)) {
1421 cerr << "rbd-nbd: Could not setup message." << std::endl;
1422 goto free_msg;
1423 }
1424
1425 if (!cfg->devpath.empty()) {
1426 ret = parse_nbd_index(cfg->devpath);
1427 if (ret < 0)
1428 goto free_msg;
1429
1430 NLA_PUT_U32(msg, NBD_ATTR_INDEX, ret);
1431 if (reconnect) {
1432 nbd_index = ret;
1433 }
1434 }
1435
1436 if (cfg->io_timeout >= 0)
1437 NLA_PUT_U64(msg, NBD_ATTR_TIMEOUT, cfg->io_timeout);
1438
1439 NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size);
1440 NLA_PUT_U64(msg, NBD_ATTR_BLOCK_SIZE_BYTES, RBD_NBD_BLKSIZE);
1441 NLA_PUT_U64(msg, NBD_ATTR_SERVER_FLAGS, flags);
1442 NLA_PUT_U64(msg, NBD_ATTR_DEAD_CONN_TIMEOUT, cfg->reattach_timeout);
1443 if (!cfg->cookie.empty())
1444 NLA_PUT_STRING(msg, NBD_ATTR_BACKEND_IDENTIFIER, cfg->cookie.c_str());
1445
1446 sock_attr = nla_nest_start(msg, NBD_ATTR_SOCKETS);
1447 if (!sock_attr) {
1448 cerr << "rbd-nbd: Could not init sockets in netlink message." << std::endl;
1449 goto free_msg;
1450 }
1451
1452 sock_opt = nla_nest_start(msg, NBD_SOCK_ITEM);
1453 if (!sock_opt) {
1454 cerr << "rbd-nbd: Could not init sock in netlink message." << std::endl;
1455 goto free_msg;
1456 }
1457
1458 NLA_PUT_U32(msg, NBD_SOCK_FD, fd);
1459 nla_nest_end(msg, sock_opt);
1460 nla_nest_end(msg, sock_attr);
1461
1462 ret = nl_send_sync(sock, msg);
1463 if (ret < 0) {
1464 cerr << "rbd-nbd: netlink connect failed: " << nl_geterror(ret)
1465 << std::endl;
1466 return -EIO;
1467 }
1468
1469 dout(10) << "netlink connect complete for " << cfg->devpath << dendl;
1470 return 0;
1471
1472 nla_put_failure:
1473 free_msg:
1474 nlmsg_free(msg);
1475 return -EIO;
1476 }
1477
1478 static int try_netlink_setup(Config *cfg, int fd, uint64_t size, uint64_t flags,
1479 bool reconnect)
1480 {
1481 struct nl_sock *sock;
1482 int nl_id, ret;
1483
1484 sock = netlink_init(&nl_id);
1485 if (!sock) {
1486 cerr << "rbd-nbd: Netlink interface not supported. Using ioctl interface."
1487 << std::endl;
1488 return 1;
1489 }
1490
1491 dout(10) << "netlink interface supported." << dendl;
1492
1493 ret = netlink_connect(cfg, sock, nl_id, fd, size, flags, reconnect);
1494 netlink_cleanup(sock);
1495
1496 if (ret != 0)
1497 return ret;
1498
1499 nbd = open(cfg->devpath.c_str(), O_RDWR);
1500 if (nbd < 0) {
1501 cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl;
1502 return nbd;
1503 }
1504
1505 return 0;
1506 }
1507
1508 static int run_quiesce_hook(const std::string &quiesce_hook,
1509 const std::string &devpath,
1510 const std::string &command) {
1511 dout(10) << __func__ << ": " << quiesce_hook << " " << devpath << " "
1512 << command << dendl;
1513
1514 SubProcess hook(quiesce_hook.c_str(), SubProcess::CLOSE, SubProcess::PIPE,
1515 SubProcess::PIPE);
1516 hook.add_cmd_args(devpath.c_str(), command.c_str(), NULL);
1517 bufferlist err;
1518 int r = hook.spawn();
1519 if (r < 0) {
1520 err.append("subprocess spawn failed");
1521 } else {
1522 err.read_fd(hook.get_stderr(), 16384);
1523 r = hook.join();
1524 if (r > 0) {
1525 r = -r;
1526 }
1527 }
1528 if (r < 0) {
1529 derr << __func__ << ": " << quiesce_hook << " " << devpath << " "
1530 << command << " failed: " << err.to_str() << dendl;
1531 } else {
1532 dout(10) << " succeeded: " << err.to_str() << dendl;
1533 }
1534
1535 return r;
1536 }
1537
1538 static void handle_signal(int signum)
1539 {
1540 ceph_assert(signum == SIGINT || signum == SIGTERM);
1541 derr << "*** Got signal " << sig_str(signum) << " ***" << dendl;
1542
1543 dout(20) << __func__ << ": " << "notifying terminate" << dendl;
1544
1545 ceph_assert(terminate_event_sock.is_valid());
1546 terminate_event_sock.notify();
1547 }
1548
1549 static NBDServer *start_server(int fd, librbd::Image& image, Config *cfg)
1550 {
1551 NBDServer *server;
1552
1553 server = new NBDServer(fd, image, cfg);
1554 server->start();
1555
1556 init_async_signal_handler();
1557 register_async_signal_handler(SIGHUP, sighup_handler);
1558 register_async_signal_handler_oneshot(SIGINT, handle_signal);
1559 register_async_signal_handler_oneshot(SIGTERM, handle_signal);
1560
1561 return server;
1562 }
1563
1564 static void run_server(Preforker& forker, NBDServer *server, bool netlink_used)
1565 {
1566 if (g_conf()->daemonize) {
1567 global_init_postfork_finish(g_ceph_context);
1568 forker.daemonize();
1569 }
1570
1571 if (netlink_used)
1572 server->wait_for_disconnect();
1573 else
1574 ioctl(nbd, NBD_DO_IT);
1575
1576 unregister_async_signal_handler(SIGHUP, sighup_handler);
1577 unregister_async_signal_handler(SIGINT, handle_signal);
1578 unregister_async_signal_handler(SIGTERM, handle_signal);
1579 shutdown_async_signal_handler();
1580 }
1581
1582 // Eventually it should be removed when pidfd_open is widely supported.
1583
1584 static int wait_for_terminate_legacy(int pid, int timeout)
1585 {
1586 for (int i = 0; ; i++) {
1587 if (kill(pid, 0) == -1) {
1588 if (errno == ESRCH) {
1589 return 0;
1590 }
1591 int r = -errno;
1592 cerr << "rbd-nbd: kill(" << pid << ", 0) failed: "
1593 << cpp_strerror(r) << std::endl;
1594 return r;
1595 }
1596 if (i >= timeout * 2) {
1597 break;
1598 }
1599 usleep(500000);
1600 }
1601
1602 cerr << "rbd-nbd: waiting for process exit timed out" << std::endl;
1603 return -ETIMEDOUT;
1604 }
1605
1606 // Eventually it should be replaced with glibc' pidfd_open
1607 // when it is widely available.
1608
1609 #ifdef __NR_pidfd_open
1610 static int pidfd_open(pid_t pid, unsigned int flags)
1611 {
1612 return syscall(__NR_pidfd_open, pid, flags);
1613 }
1614 #else
1615 static int pidfd_open(pid_t pid, unsigned int flags)
1616 {
1617 errno = ENOSYS;
1618 return -1;
1619 }
1620 #endif
1621
1622 static int wait_for_terminate(int pid, int timeout)
1623 {
1624 int fd = pidfd_open(pid, 0);
1625 if (fd == -1) {
1626 if (errno == ENOSYS) {
1627 return wait_for_terminate_legacy(pid, timeout);
1628 }
1629 if (errno == ESRCH) {
1630 return 0;
1631 }
1632 int r = -errno;
1633 cerr << "rbd-nbd: pidfd_open(" << pid << ") failed: "
1634 << cpp_strerror(r) << std::endl;
1635 return r;
1636 }
1637
1638 struct pollfd poll_fds[1];
1639 memset(poll_fds, 0, sizeof(struct pollfd));
1640 poll_fds[0].fd = fd;
1641 poll_fds[0].events = POLLIN;
1642
1643 int r = poll(poll_fds, 1, timeout * 1000);
1644 if (r == -1) {
1645 r = -errno;
1646 cerr << "rbd-nbd: failed to poll rbd-nbd process: " << cpp_strerror(r)
1647 << std::endl;
1648 goto done;
1649 } else {
1650 r = 0;
1651 }
1652
1653 if ((poll_fds[0].revents & POLLIN) == 0) {
1654 cerr << "rbd-nbd: waiting for process exit timed out" << std::endl;
1655 r = -ETIMEDOUT;
1656 }
1657
1658 done:
1659 close(fd);
1660
1661 return r;
1662 }
1663
1664 static int do_map(int argc, const char *argv[], Config *cfg, bool reconnect)
1665 {
1666 int r;
1667
1668 librados::Rados rados;
1669 librbd::RBD rbd;
1670 librados::IoCtx io_ctx;
1671 librbd::Image image;
1672
1673 int read_only = 0;
1674 unsigned long flags;
1675 unsigned long size;
1676 unsigned long blksize = RBD_NBD_BLKSIZE;
1677 bool use_netlink;
1678
1679 int fd[2];
1680
1681 librbd::image_info_t info;
1682
1683 Preforker forker;
1684 NBDServer *server;
1685
1686 auto args = argv_to_vec(argc, argv);
1687 if (args.empty()) {
1688 cerr << argv[0] << ": -h or --help for usage" << std::endl;
1689 exit(1);
1690 }
1691 if (ceph_argparse_need_usage(args)) {
1692 usage();
1693 exit(0);
1694 }
1695
1696 auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
1697 CODE_ENVIRONMENT_DAEMON,
1698 CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
1699 g_ceph_context->_conf.set_val_or_die("pid_file", "");
1700
1701 if (global_init_prefork(g_ceph_context) >= 0) {
1702 std::string err;
1703 r = forker.prefork(err);
1704 if (r < 0) {
1705 cerr << err << std::endl;
1706 return r;
1707 }
1708 if (forker.is_parent()) {
1709 if (forker.parent_wait(err) != 0) {
1710 return -ENXIO;
1711 }
1712 return 0;
1713 }
1714 global_init_postfork_start(g_ceph_context);
1715 }
1716
1717 common_init_finish(g_ceph_context);
1718 global_init_chdir(g_ceph_context);
1719
1720 if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) {
1721 r = -errno;
1722 goto close_ret;
1723 }
1724
1725 r = rados.init_with_context(g_ceph_context);
1726 if (r < 0)
1727 goto close_fd;
1728
1729 r = rados.connect();
1730 if (r < 0)
1731 goto close_fd;
1732
1733 r = rados.ioctx_create(cfg->poolname.c_str(), io_ctx);
1734 if (r < 0)
1735 goto close_fd;
1736
1737 io_ctx.set_namespace(cfg->nsname);
1738
1739 r = rbd.open(io_ctx, image, cfg->imgname.c_str());
1740 if (r < 0)
1741 goto close_fd;
1742
1743 if (cfg->exclusive) {
1744 r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE);
1745 if (r < 0) {
1746 cerr << "rbd-nbd: failed to acquire exclusive lock: " << cpp_strerror(r)
1747 << std::endl;
1748 goto close_fd;
1749 }
1750 }
1751
1752 if (cfg->snapid != CEPH_NOSNAP) {
1753 r = image.snap_set_by_id(cfg->snapid);
1754 if (r < 0) {
1755 cerr << "rbd-nbd: failed to set snap id: " << cpp_strerror(r)
1756 << std::endl;
1757 goto close_fd;
1758 }
1759 } else if (!cfg->snapname.empty()) {
1760 r = image.snap_set(cfg->snapname.c_str());
1761 if (r < 0) {
1762 cerr << "rbd-nbd: failed to set snap name: " << cpp_strerror(r)
1763 << std::endl;
1764 goto close_fd;
1765 }
1766 }
1767
1768 if (!cfg->encryption_formats.empty()) {
1769 EncryptionOptions encryption_options;
1770 encryption_options.specs.reserve(cfg->encryption_formats.size());
1771
1772 for (size_t i = 0; i < cfg->encryption_formats.size(); ++i) {
1773 std::ifstream file(cfg->encryption_passphrase_files[i],
1774 std::ios::in | std::ios::binary);
1775 if (file.fail()) {
1776 r = -errno;
1777 std::cerr << "rbd-nbd: unable to open passphrase file '"
1778 << cfg->encryption_passphrase_files[i] << "': "
1779 << cpp_strerror(r) << std::endl;
1780 goto close_fd;
1781 }
1782 std::string passphrase((std::istreambuf_iterator<char>(file)),
1783 std::istreambuf_iterator<char>());
1784 file.close();
1785
1786 switch (cfg->encryption_formats[i]) {
1787 case RBD_ENCRYPTION_FORMAT_LUKS: {
1788 auto opts = new librbd::encryption_luks_format_options_t{
1789 std::move(passphrase)};
1790 encryption_options.specs.push_back(
1791 {RBD_ENCRYPTION_FORMAT_LUKS, opts, sizeof(*opts)});
1792 break;
1793 }
1794 case RBD_ENCRYPTION_FORMAT_LUKS1: {
1795 auto opts = new librbd::encryption_luks1_format_options_t{
1796 .passphrase = std::move(passphrase)};
1797 encryption_options.specs.push_back(
1798 {RBD_ENCRYPTION_FORMAT_LUKS1, opts, sizeof(*opts)});
1799 break;
1800 }
1801 case RBD_ENCRYPTION_FORMAT_LUKS2: {
1802 auto opts = new librbd::encryption_luks2_format_options_t{
1803 .passphrase = std::move(passphrase)};
1804 encryption_options.specs.push_back(
1805 {RBD_ENCRYPTION_FORMAT_LUKS2, opts, sizeof(*opts)});
1806 break;
1807 }
1808 default:
1809 ceph_abort();
1810 }
1811 }
1812
1813 r = image.encryption_load2(encryption_options.specs.data(),
1814 encryption_options.specs.size());
1815 if (r != 0) {
1816 cerr << "rbd-nbd: failed to load encryption: " << cpp_strerror(r)
1817 << std::endl;
1818 goto close_fd;
1819 }
1820
1821 // luks2 block size can vary upto 4096, while luks1 always uses 512
1822 // currently we don't have an rbd API for querying the loaded encryption
1823 blksize = 4096;
1824 }
1825
1826 r = image.stat(info, sizeof(info));
1827 if (r < 0)
1828 goto close_fd;
1829
1830 flags = NBD_FLAG_SEND_FLUSH | NBD_FLAG_HAS_FLAGS;
1831 if (!cfg->notrim) {
1832 flags |= NBD_FLAG_SEND_TRIM;
1833 }
1834 if (!cfg->snapname.empty() || cfg->readonly) {
1835 flags |= NBD_FLAG_READ_ONLY;
1836 read_only = 1;
1837 }
1838
1839 if (info.size > ULONG_MAX) {
1840 r = -EFBIG;
1841 cerr << "rbd-nbd: image is too large (" << byte_u_t(info.size)
1842 << ", max is " << byte_u_t(ULONG_MAX) << ")" << std::endl;
1843 goto close_fd;
1844 }
1845
1846 size = info.size;
1847
1848 r = load_module(cfg);
1849 if (r < 0)
1850 goto close_fd;
1851
1852 server = start_server(fd[1], image, cfg);
1853
1854 use_netlink = cfg->try_netlink || reconnect;
1855 if (use_netlink) {
1856 // generate when the cookie is not supplied at CLI
1857 if (!reconnect && cfg->cookie.empty()) {
1858 uuid_d uuid_gen;
1859 uuid_gen.generate_random();
1860 cfg->cookie = uuid_gen.to_string();
1861 }
1862 r = try_netlink_setup(cfg, fd[0], size, flags, reconnect);
1863 if (r < 0) {
1864 goto free_server;
1865 } else if (r == 1) {
1866 use_netlink = false;
1867 }
1868 }
1869
1870 if (!use_netlink) {
1871 r = try_ioctl_setup(cfg, fd[0], size, blksize, flags);
1872 if (r < 0)
1873 goto free_server;
1874 }
1875
1876 r = check_device_size(nbd_index, size);
1877 if (r < 0)
1878 goto close_nbd;
1879
1880 r = ioctl(nbd, BLKROSET, (unsigned long) &read_only);
1881 if (r < 0) {
1882 r = -errno;
1883 goto close_nbd;
1884 }
1885
1886 {
1887 NBDQuiesceWatchCtx quiesce_watch_ctx(server);
1888 if (cfg->quiesce) {
1889 r = image.quiesce_watch(&quiesce_watch_ctx,
1890 &server->quiesce_watch_handle);
1891 if (r < 0) {
1892 goto close_nbd;
1893 }
1894 }
1895
1896 uint64_t handle;
1897
1898 NBDWatchCtx watch_ctx(nbd, nbd_index, use_netlink, io_ctx, image,
1899 info.size);
1900 r = image.update_watch(&watch_ctx, &handle);
1901 if (r < 0)
1902 goto close_nbd;
1903
1904 std::string cookie;
1905 if (use_netlink) {
1906 cookie = get_cookie(cfg->devpath);
1907 ceph_assert(cookie == cfg->cookie || cookie.empty());
1908 }
1909 if (cfg->show_cookie && !cookie.empty()) {
1910 cout << cfg->devpath << " " << cookie << std::endl;
1911 } else {
1912 cout << cfg->devpath << std::endl;
1913 }
1914
1915 run_server(forker, server, use_netlink);
1916
1917 if (cfg->quiesce) {
1918 r = image.quiesce_unwatch(server->quiesce_watch_handle);
1919 ceph_assert(r == 0);
1920 }
1921
1922 r = image.update_unwatch(handle);
1923 ceph_assert(r == 0);
1924 }
1925
1926 close_nbd:
1927 if (r < 0) {
1928 if (use_netlink) {
1929 netlink_disconnect(nbd_index);
1930 } else {
1931 ioctl(nbd, NBD_CLEAR_SOCK);
1932 cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r)
1933 << std::endl;
1934 }
1935 }
1936 close(nbd);
1937 free_server:
1938 delete server;
1939 close_fd:
1940 close(fd[0]);
1941 close(fd[1]);
1942 close_ret:
1943 image.close();
1944 io_ctx.close();
1945 rados.shutdown();
1946
1947 forker.exit(r < 0 ? EXIT_FAILURE : 0);
1948 // Unreachable;
1949 return r;
1950 }
1951
1952 static int do_detach(Config *cfg)
1953 {
1954 int r = kill(cfg->pid, SIGTERM);
1955 if (r == -1) {
1956 r = -errno;
1957 cerr << "rbd-nbd: failed to terminate " << cfg->pid << ": "
1958 << cpp_strerror(r) << std::endl;
1959 return r;
1960 }
1961
1962 return wait_for_terminate(cfg->pid, cfg->reattach_timeout);
1963 }
1964
1965 static int do_unmap(Config *cfg)
1966 {
1967 /*
1968 * The netlink disconnect call supports devices setup with netlink or ioctl,
1969 * so we always try that first.
1970 */
1971 int r = netlink_disconnect_by_path(cfg->devpath);
1972 if (r < 0) {
1973 return r;
1974 }
1975
1976 if (r == 1) {
1977 int nbd = open(cfg->devpath.c_str(), O_RDWR);
1978 if (nbd < 0) {
1979 cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl;
1980 return nbd;
1981 }
1982
1983 r = ioctl(nbd, NBD_DISCONNECT);
1984 if (r < 0) {
1985 cerr << "rbd-nbd: the device is not used" << std::endl;
1986 }
1987
1988 close(nbd);
1989
1990 if (r < 0) {
1991 return r;
1992 }
1993 }
1994
1995 if (cfg->pid > 0) {
1996 r = wait_for_terminate(cfg->pid, cfg->reattach_timeout);
1997 }
1998
1999 return 0;
2000 }
2001
2002 static int parse_imgpath(const std::string &imgpath, Config *cfg,
2003 std::ostream *err_msg) {
2004 std::regex pattern("^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$");
2005 std::smatch match;
2006 if (!std::regex_match(imgpath, match, pattern)) {
2007 std::cerr << "rbd-nbd: invalid spec '" << imgpath << "'" << std::endl;
2008 return -EINVAL;
2009 }
2010
2011 if (match[1].matched) {
2012 cfg->poolname = match[1];
2013 }
2014
2015 if (match[2].matched) {
2016 cfg->nsname = match[2];
2017 }
2018
2019 cfg->imgname = match[3];
2020
2021 if (match[4].matched)
2022 cfg->snapname = match[4];
2023
2024 return 0;
2025 }
2026
2027 static int do_list_mapped_devices(const std::string &format, bool pretty_format)
2028 {
2029 bool should_print = false;
2030 std::unique_ptr<ceph::Formatter> f;
2031 TextTable tbl;
2032
2033 if (format == "json") {
2034 f.reset(new JSONFormatter(pretty_format));
2035 } else if (format == "xml") {
2036 f.reset(new XMLFormatter(pretty_format));
2037 } else if (!format.empty() && format != "plain") {
2038 std::cerr << "rbd-nbd: invalid output format: " << format << std::endl;
2039 return -EINVAL;
2040 }
2041
2042 if (f) {
2043 f->open_array_section("devices");
2044 } else {
2045 tbl.define_column("id", TextTable::LEFT, TextTable::LEFT);
2046 tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT);
2047 tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT);
2048 tbl.define_column("image", TextTable::LEFT, TextTable::LEFT);
2049 tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT);
2050 tbl.define_column("device", TextTable::LEFT, TextTable::LEFT);
2051 tbl.define_column("cookie", TextTable::LEFT, TextTable::LEFT);
2052 }
2053
2054 Config cfg;
2055 NBDListIterator it;
2056 while (it.get(&cfg)) {
2057 std::string snap = (cfg.snapid != CEPH_NOSNAP ?
2058 "@" + std::to_string(cfg.snapid) : cfg.snapname);
2059 if (f) {
2060 f->open_object_section("device");
2061 f->dump_int("id", cfg.pid);
2062 f->dump_string("pool", cfg.poolname);
2063 f->dump_string("namespace", cfg.nsname);
2064 f->dump_string("image", cfg.imgname);
2065 f->dump_string("snap", snap);
2066 f->dump_string("device", cfg.devpath);
2067 f->dump_string("cookie", cfg.cookie);
2068 f->close_section();
2069 } else {
2070 should_print = true;
2071 tbl << cfg.pid << cfg.poolname << cfg.nsname << cfg.imgname
2072 << (snap.empty() ? "-" : snap) << cfg.devpath << cfg.cookie
2073 << TextTable::endrow;
2074 }
2075 }
2076
2077 if (f) {
2078 f->close_section(); // devices
2079 f->flush(std::cout);
2080 }
2081 if (should_print) {
2082 std::cout << tbl;
2083 }
2084 return 0;
2085 }
2086
2087 static bool find_mapped_dev_by_spec(Config *cfg, int skip_pid=-1) {
2088 Config c;
2089 NBDListIterator it;
2090 while (it.get(&c)) {
2091 if (c.pid != skip_pid &&
2092 c.poolname == cfg->poolname && c.nsname == cfg->nsname &&
2093 c.imgname == cfg->imgname && c.snapname == cfg->snapname &&
2094 (cfg->devpath.empty() || c.devpath == cfg->devpath) &&
2095 c.snapid == cfg->snapid) {
2096 *cfg = c;
2097 return true;
2098 }
2099 }
2100 return false;
2101 }
2102
2103 static int find_proc_by_dev(Config *cfg) {
2104 Config c;
2105 NBDListIterator it;
2106 while (it.get(&c)) {
2107 if (c.devpath == cfg->devpath) {
2108 *cfg = c;
2109 return true;
2110 }
2111 }
2112 return false;
2113 }
2114
2115 static int parse_args(vector<const char*>& args, std::ostream *err_msg,
2116 Config *cfg) {
2117 std::string conf_file_list;
2118 std::string cluster;
2119 CephInitParameters iparams = ceph_argparse_early_args(
2120 args, CEPH_ENTITY_TYPE_CLIENT, &cluster, &conf_file_list);
2121
2122 ConfigProxy config{false};
2123 config->name = iparams.name;
2124 config->cluster = cluster;
2125
2126 if (!conf_file_list.empty()) {
2127 config.parse_config_files(conf_file_list.c_str(), nullptr, 0);
2128 } else {
2129 config.parse_config_files(nullptr, nullptr, 0);
2130 }
2131 config.parse_env(CEPH_ENTITY_TYPE_CLIENT);
2132 config.parse_argv(args);
2133 cfg->poolname = config.get_val<std::string>("rbd_default_pool");
2134
2135 std::vector<const char*>::iterator i;
2136 std::ostringstream err;
2137 std::string arg_value;
2138 long long snapid;
2139
2140 for (i = args.begin(); i != args.end(); ) {
2141 if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
2142 return HELP_INFO;
2143 } else if (ceph_argparse_flag(args, i, "-v", "--version", (char*)NULL)) {
2144 return VERSION_INFO;
2145 } else if (ceph_argparse_witharg(args, i, &cfg->devpath, "--device", (char *)NULL)) {
2146 } else if (ceph_argparse_witharg(args, i, &cfg->io_timeout, err,
2147 "--io-timeout", (char *)NULL)) {
2148 if (!err.str().empty()) {
2149 *err_msg << "rbd-nbd: " << err.str();
2150 return -EINVAL;
2151 }
2152 if (cfg->io_timeout < 0) {
2153 *err_msg << "rbd-nbd: Invalid argument for io-timeout!";
2154 return -EINVAL;
2155 }
2156 } else if (ceph_argparse_witharg(args, i, &cfg->nbds_max, err, "--nbds_max", (char *)NULL)) {
2157 if (!err.str().empty()) {
2158 *err_msg << "rbd-nbd: " << err.str();
2159 return -EINVAL;
2160 }
2161 if (cfg->nbds_max < 0) {
2162 *err_msg << "rbd-nbd: Invalid argument for nbds_max!";
2163 return -EINVAL;
2164 }
2165 } else if (ceph_argparse_witharg(args, i, &cfg->max_part, err, "--max_part", (char *)NULL)) {
2166 if (!err.str().empty()) {
2167 *err_msg << "rbd-nbd: " << err.str();
2168 return -EINVAL;
2169 }
2170 if ((cfg->max_part < 0) || (cfg->max_part > 255)) {
2171 *err_msg << "rbd-nbd: Invalid argument for max_part(0~255)!";
2172 return -EINVAL;
2173 }
2174 cfg->set_max_part = true;
2175 } else if (ceph_argparse_flag(args, i, "--quiesce", (char *)NULL)) {
2176 cfg->quiesce = true;
2177 } else if (ceph_argparse_witharg(args, i, &cfg->quiesce_hook,
2178 "--quiesce-hook", (char *)NULL)) {
2179 } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) {
2180 cfg->readonly = true;
2181 } else if (ceph_argparse_witharg(args, i, &cfg->reattach_timeout, err,
2182 "--reattach-timeout", (char *)NULL)) {
2183 if (!err.str().empty()) {
2184 *err_msg << "rbd-nbd: " << err.str();
2185 return -EINVAL;
2186 }
2187 if (cfg->reattach_timeout < 0) {
2188 *err_msg << "rbd-nbd: Invalid argument for reattach-timeout!";
2189 return -EINVAL;
2190 }
2191 } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) {
2192 cfg->exclusive = true;
2193 } else if (ceph_argparse_flag(args, i, "--notrim", (char *)NULL)) {
2194 cfg->notrim = true;
2195 } else if (ceph_argparse_witharg(args, i, &cfg->io_timeout, err,
2196 "--timeout", (char *)NULL)) {
2197 if (!err.str().empty()) {
2198 *err_msg << "rbd-nbd: " << err.str();
2199 return -EINVAL;
2200 }
2201 if (cfg->io_timeout < 0) {
2202 *err_msg << "rbd-nbd: Invalid argument for timeout!";
2203 return -EINVAL;
2204 }
2205 *err_msg << "rbd-nbd: --timeout is deprecated (use --io-timeout)";
2206 } else if (ceph_argparse_witharg(args, i, &cfg->format, err, "--format",
2207 (char *)NULL)) {
2208 } else if (ceph_argparse_flag(args, i, "--pretty-format", (char *)NULL)) {
2209 cfg->pretty_format = true;
2210 } else if (ceph_argparse_flag(args, i, "--try-netlink", (char *)NULL)) {
2211 cfg->try_netlink = true;
2212 } else if (ceph_argparse_flag(args, i, "--show-cookie", (char *)NULL)) {
2213 cfg->show_cookie = true;
2214 } else if (ceph_argparse_witharg(args, i, &cfg->cookie, "--cookie", (char *)NULL)) {
2215 } else if (ceph_argparse_witharg(args, i, &snapid, err,
2216 "--snap-id", (char *)NULL)) {
2217 if (!err.str().empty()) {
2218 *err_msg << "rbd-nbd: " << err.str();
2219 return -EINVAL;
2220 }
2221 if (snapid < 0) {
2222 *err_msg << "rbd-nbd: Invalid argument for snap-id!";
2223 return -EINVAL;
2224 }
2225 cfg->snapid = snapid;
2226 } else if (ceph_argparse_witharg(args, i, &arg_value,
2227 "--encryption-format", (char *)NULL)) {
2228 if (arg_value == "luks1") {
2229 cfg->encryption_formats.push_back(RBD_ENCRYPTION_FORMAT_LUKS1);
2230 } else if (arg_value == "luks2") {
2231 cfg->encryption_formats.push_back(RBD_ENCRYPTION_FORMAT_LUKS2);
2232 } else if (arg_value == "luks") {
2233 cfg->encryption_formats.push_back(RBD_ENCRYPTION_FORMAT_LUKS);
2234 } else {
2235 *err_msg << "rbd-nbd: Invalid encryption format";
2236 return -EINVAL;
2237 }
2238 } else if (ceph_argparse_witharg(args, i, &arg_value,
2239 "--encryption-passphrase-file",
2240 (char *)NULL)) {
2241 cfg->encryption_passphrase_files.push_back(arg_value);
2242 } else {
2243 ++i;
2244 }
2245 }
2246
2247 if (cfg->encryption_formats.empty() &&
2248 !cfg->encryption_passphrase_files.empty()) {
2249 cfg->encryption_formats.resize(cfg->encryption_passphrase_files.size(),
2250 RBD_ENCRYPTION_FORMAT_LUKS);
2251 }
2252
2253 if (cfg->encryption_formats.size() != cfg->encryption_passphrase_files.size()) {
2254 *err_msg << "rbd-nbd: Encryption formats count does not match "
2255 << "passphrase files count";
2256 return -EINVAL;
2257 }
2258
2259 Command cmd = None;
2260 if (args.begin() != args.end()) {
2261 if (strcmp(*args.begin(), "map") == 0) {
2262 cmd = Map;
2263 } else if (strcmp(*args.begin(), "unmap") == 0) {
2264 cmd = Unmap;
2265 } else if (strcmp(*args.begin(), "attach") == 0) {
2266 cmd = Attach;
2267 } else if (strcmp(*args.begin(), "detach") == 0) {
2268 cmd = Detach;
2269 } else if (strcmp(*args.begin(), "list-mapped") == 0) {
2270 cmd = List;
2271 } else {
2272 *err_msg << "rbd-nbd: unknown command: " << *args.begin();
2273 return -EINVAL;
2274 }
2275 args.erase(args.begin());
2276 }
2277
2278 if (cmd == None) {
2279 *err_msg << "rbd-nbd: must specify command";
2280 return -EINVAL;
2281 }
2282
2283 std::string cookie;
2284 switch (cmd) {
2285 case Attach:
2286 if (cfg->devpath.empty()) {
2287 *err_msg << "rbd-nbd: must specify device to attach";
2288 return -EINVAL;
2289 }
2290 // Allowing attach without --cookie option for kernel without
2291 // NBD_ATTR_BACKEND_IDENTIFIER support for compatibility
2292 cookie = get_cookie(cfg->devpath);
2293 if (!cookie.empty()) {
2294 if (cfg->cookie.empty()) {
2295 *err_msg << "rbd-nbd: must specify cookie to attach";
2296 return -EINVAL;
2297 } else if (cookie != cfg->cookie) {
2298 *err_msg << "rbd-nbd: cookie mismatch";
2299 return -EINVAL;
2300 }
2301 } else if (!cfg->cookie.empty()) {
2302 *err_msg << "rbd-nbd: kernel does not have cookie support";
2303 return -EINVAL;
2304 }
2305 [[fallthrough]];
2306 case Map:
2307 if (args.begin() == args.end()) {
2308 *err_msg << "rbd-nbd: must specify image-or-snap-spec";
2309 return -EINVAL;
2310 }
2311 if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) {
2312 return -EINVAL;
2313 }
2314 args.erase(args.begin());
2315 break;
2316 case Detach:
2317 case Unmap:
2318 if (args.begin() == args.end()) {
2319 *err_msg << "rbd-nbd: must specify nbd device or image-or-snap-spec";
2320 return -EINVAL;
2321 }
2322 if (boost::starts_with(*args.begin(), "/dev/")) {
2323 cfg->devpath = *args.begin();
2324 } else {
2325 if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) {
2326 return -EINVAL;
2327 }
2328 }
2329 args.erase(args.begin());
2330 break;
2331 default:
2332 //shut up gcc;
2333 break;
2334 }
2335
2336 if (cfg->snapid != CEPH_NOSNAP && !cfg->snapname.empty()) {
2337 *err_msg << "rbd-nbd: use either snapname or snapid, not both";
2338 return -EINVAL;
2339 }
2340
2341 if (args.begin() != args.end()) {
2342 *err_msg << "rbd-nbd: unknown args: " << *args.begin();
2343 return -EINVAL;
2344 }
2345
2346 cfg->command = cmd;
2347 return 0;
2348 }
2349
2350 static int rbd_nbd(int argc, const char *argv[])
2351 {
2352 int r;
2353 Config cfg;
2354 auto args = argv_to_vec(argc, argv);
2355 std::ostringstream err_msg;
2356 r = parse_args(args, &err_msg, &cfg);
2357 if (r == HELP_INFO) {
2358 usage();
2359 return 0;
2360 } else if (r == VERSION_INFO) {
2361 std::cout << pretty_version_to_str() << std::endl;
2362 return 0;
2363 } else if (r < 0) {
2364 cerr << err_msg.str() << std::endl;
2365 return r;
2366 }
2367
2368 if (!err_msg.str().empty()) {
2369 cerr << err_msg.str() << std::endl;
2370 }
2371
2372 switch (cfg.command) {
2373 case Attach:
2374 ceph_assert(!cfg.devpath.empty());
2375 if (find_mapped_dev_by_spec(&cfg, getpid())) {
2376 cerr << "rbd-nbd: " << cfg.devpath << " has process " << cfg.pid
2377 << " connected" << std::endl;
2378 return -EBUSY;
2379 }
2380 [[fallthrough]];
2381 case Map:
2382 if (cfg.imgname.empty()) {
2383 cerr << "rbd-nbd: image name was not specified" << std::endl;
2384 return -EINVAL;
2385 }
2386
2387 r = do_map(argc, argv, &cfg, cfg.command == Attach);
2388 if (r < 0)
2389 return -EINVAL;
2390 break;
2391 case Detach:
2392 if (cfg.devpath.empty()) {
2393 if (!find_mapped_dev_by_spec(&cfg)) {
2394 cerr << "rbd-nbd: " << cfg.image_spec() << " is not mapped"
2395 << std::endl;
2396 return -ENOENT;
2397 }
2398 } else if (!find_proc_by_dev(&cfg)) {
2399 cerr << "rbd-nbd: no process attached to " << cfg.devpath << " found"
2400 << std::endl;
2401 return -ENOENT;
2402 }
2403 r = do_detach(&cfg);
2404 if (r < 0)
2405 return -EINVAL;
2406 break;
2407 case Unmap:
2408 if (cfg.devpath.empty()) {
2409 if (!find_mapped_dev_by_spec(&cfg)) {
2410 cerr << "rbd-nbd: " << cfg.image_spec() << " is not mapped"
2411 << std::endl;
2412 return -ENOENT;
2413 }
2414 } else if (!find_proc_by_dev(&cfg)) {
2415 // still try to send disconnect to the device
2416 }
2417 r = do_unmap(&cfg);
2418 if (r < 0)
2419 return -EINVAL;
2420 break;
2421 case List:
2422 r = do_list_mapped_devices(cfg.format, cfg.pretty_format);
2423 if (r < 0)
2424 return -EINVAL;
2425 break;
2426 default:
2427 usage();
2428 break;
2429 }
2430
2431 return 0;
2432 }
2433
2434 int main(int argc, const char *argv[])
2435 {
2436 int r = rbd_nbd(argc, argv);
2437 if (r < 0) {
2438 return EXIT_FAILURE;
2439 }
2440 return 0;
2441 }