]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/rbd_nbd/rbd-nbd.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / tools / rbd_nbd / rbd-nbd.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4/*
5 * rbd-nbd - RBD in userspace
6 *
7 * Copyright (C) 2015 - 2016 Kylin Corporation
8 *
9 * Author: Yunchuan Wen <yunchuan.wen@kylin-cloud.com>
10 * Li Wang <li.wang@kylin-cloud.com>
11 *
12 * This is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License version 2.1, as published by the Free Software
15 * Foundation. See file COPYING.
16 *
17*/
18
f67539c2 19#include "acconfig.h"
7c673cae 20#include "include/int_types.h"
f67539c2 21#include "include/scope_guard.h"
7c673cae 22
9f95a23c 23#include <libgen.h>
7c673cae
FG
24#include <stdio.h>
25#include <stdlib.h>
26#include <stddef.h>
27#include <errno.h>
28#include <fcntl.h>
f67539c2 29#include <poll.h>
7c673cae
FG
30#include <string.h>
31#include <sys/types.h>
32#include <unistd.h>
33
34#include <linux/nbd.h>
35#include <linux/fs.h>
36#include <sys/ioctl.h>
37#include <sys/socket.h>
f67539c2 38#include <sys/syscall.h>
7c673cae 39
eafe8130
TL
40#include "nbd-netlink.h"
41#include <libnl3/netlink/genl/genl.h>
42#include <libnl3/netlink/genl/ctrl.h>
43#include <libnl3/netlink/genl/mngt.h>
44
f67539c2
TL
45#if __has_include(<filesystem>)
46#include <filesystem>
47namespace fs = std::filesystem;
48#else
49#include <experimental/filesystem>
50namespace fs = std::experimental::filesystem;
51#endif
7c673cae 52#include <fstream>
11fdf7f2
TL
53#include <iostream>
54#include <memory>
55#include <regex>
56#include <boost/algorithm/string/predicate.hpp>
f67539c2 57#include <boost/lexical_cast.hpp>
7c673cae 58
11fdf7f2
TL
59#include "common/Formatter.h"
60#include "common/Preforker.h"
f67539c2 61#include "common/SubProcess.h"
11fdf7f2
TL
62#include "common/TextTable.h"
63#include "common/ceph_argparse.h"
7c673cae
FG
64#include "common/config.h"
65#include "common/dout.h"
7c673cae 66#include "common/errno.h"
f67539c2 67#include "common/event_socket.h"
7c673cae
FG
68#include "common/module.h"
69#include "common/safe_io.h"
224ce89b 70#include "common/version.h"
11fdf7f2 71
7c673cae
FG
72#include "global/global_init.h"
73#include "global/signal_handler.h"
74
75#include "include/rados/librados.hpp"
76#include "include/rbd/librbd.hpp"
77#include "include/stringify.h"
78#include "include/xlist.h"
79
11fdf7f2
TL
80#include "mon/MonClient.h"
81
7c673cae
FG
82#define dout_context g_ceph_context
83#define dout_subsys ceph_subsys_rbd
84#undef dout_prefix
85#define dout_prefix *_dout << "rbd-nbd: "
86
f67539c2
TL
87enum Command {
88 None,
89 Map,
90 Unmap,
91 Attach,
92 Detach,
93 List
94};
95
31f18b77
FG
96struct Config {
97 int nbds_max = 0;
98 int max_part = 255;
f67539c2
TL
99 int io_timeout = -1;
100 int reattach_timeout = 30;
31f18b77
FG
101
102 bool exclusive = false;
f67539c2 103 bool quiesce = false;
31f18b77
FG
104 bool readonly = false;
105 bool set_max_part = false;
eafe8130 106 bool try_netlink = false;
31f18b77
FG
107
108 std::string poolname;
11fdf7f2 109 std::string nsname;
31f18b77
FG
110 std::string imgname;
111 std::string snapname;
112 std::string devpath;
f67539c2 113 std::string quiesce_hook = CMAKE_INSTALL_LIBEXECDIR "/rbd-nbd/rbd-nbd_quiesce";
11fdf7f2
TL
114
115 std::string format;
116 bool pretty_format = false;
f67539c2
TL
117
118 std::optional<librbd::encryption_format_t> encryption_format;
119 std::optional<std::string> encryption_passphrase_file;
120
121 Command command = None;
122 int pid = 0;
123
124 std::string image_spec() const {
125 std::string spec = poolname + "/";
126
127 if (!nsname.empty()) {
128 spec += "/" + nsname;
129 }
130 spec += imgname;
131
132 if (!snapname.empty()) {
133 spec += "@" + snapname;
134 }
135
136 return spec;
137 }
31f18b77
FG
138};
139
7c673cae
FG
140static void usage()
141{
f67539c2
TL
142 std::cout << "Usage: rbd-nbd [options] map <image-or-snap-spec> Map image to nbd device\n"
143 << " detach <device|image-or-snap-spec> Detach image from nbd device\n"
144 << " [options] attach <image-or-snap-spec> Attach image to nbd device\n"
145 << " unmap <device|image-or-snap-spec> Unmap nbd device\n"
146 << " [options] list-mapped List mapped nbd devices\n"
147 << "Map and attach options:\n"
148 << " --device <device path> Specify nbd device path (/dev/nbd{num})\n"
149 << " --encryption-format Image encryption format\n"
150 << " (possible values: luks1, luks2)\n"
151 << " --encryption-passphrase-file Path of file containing passphrase for unlocking image encryption\n"
152 << " --exclusive Forbid writes by other clients\n"
153 << " --io-timeout <sec> Set nbd IO timeout\n"
154 << " --max_part <limit> Override for module param max_part\n"
155 << " --nbds_max <limit> Override for module param nbds_max\n"
156 << " --quiesce Use quiesce callbacks\n"
157 << " --quiesce-hook <path> Specify quiesce hook path\n"
158 << " (default: " << Config().quiesce_hook << ")\n"
159 << " --read-only Map read-only\n"
160 << " --reattach-timeout <sec> Set nbd re-attach timeout\n"
161 << " (default: " << Config().reattach_timeout << ")\n"
162 << " --try-netlink Use the nbd netlink interface\n"
11fdf7f2
TL
163 << "\n"
164 << "List options:\n"
165 << " --format plain|json|xml Output format (default: plain)\n"
166 << " --pretty-format Pretty formatting (json and xml)\n"
7c673cae
FG
167 << std::endl;
168 generic_server_usage();
169}
170
7c673cae 171static int nbd = -1;
eafe8130 172static int nbd_index = -1;
f67539c2 173static EventSocket terminate_event_sock;
31f18b77 174
7c673cae
FG
175#define RBD_NBD_BLKSIZE 512UL
176
224ce89b
WB
177#define HELP_INFO 1
178#define VERSION_INFO 2
179
7c673cae
FG
180#ifdef CEPH_BIG_ENDIAN
181#define ntohll(a) (a)
182#elif defined(CEPH_LITTLE_ENDIAN)
183#define ntohll(a) swab(a)
184#else
185#error "Could not determine endianess"
186#endif
187#define htonll(a) ntohll(a)
188
11fdf7f2 189static int parse_args(vector<const char*>& args, std::ostream *err_msg,
f67539c2
TL
190 Config *cfg);
191static int netlink_disconnect(int index);
eafe8130 192static int netlink_resize(int nbd_index, uint64_t size);
7c673cae 193
f67539c2
TL
194static int run_quiesce_hook(const std::string &quiesce_hook,
195 const std::string &devpath,
196 const std::string &command);
197
7c673cae
FG
198class NBDServer
199{
f67539c2
TL
200public:
201 uint64_t quiesce_watch_handle = 0;
202
7c673cae
FG
203private:
204 int fd;
205 librbd::Image &image;
f67539c2 206 Config *cfg;
7c673cae
FG
207
208public:
f67539c2
TL
209 NBDServer(int fd, librbd::Image& image, Config *cfg)
210 : fd(fd)
211 , image(image)
212 , cfg(cfg)
7c673cae
FG
213 , reader_thread(*this, &NBDServer::reader_entry)
214 , writer_thread(*this, &NBDServer::writer_entry)
f67539c2
TL
215 , quiesce_thread(*this, &NBDServer::quiesce_entry)
216 {
217 std::vector<librbd::config_option_t> options;
218 image.config_list(&options);
219 for (auto &option : options) {
220 if ((option.name == std::string("rbd_cache") ||
221 option.name == std::string("rbd_cache_writethrough_until_flush")) &&
222 option.value == "false") {
223 allow_internal_flush = true;
224 break;
225 }
226 }
227 }
228
229 Config *get_cfg() const {
230 return cfg;
231 }
7c673cae
FG
232
233private:
f67539c2 234 int terminate_event_fd = -1;
9f95a23c
TL
235 ceph::mutex disconnect_lock =
236 ceph::make_mutex("NBDServer::DisconnectLocker");
237 ceph::condition_variable disconnect_cond;
7c673cae 238 std::atomic<bool> terminated = { false };
f67539c2 239 std::atomic<bool> allow_internal_flush = { false };
7c673cae
FG
240
241 struct IOContext
242 {
243 xlist<IOContext*>::item item;
11fdf7f2 244 NBDServer *server = nullptr;
7c673cae
FG
245 struct nbd_request request;
246 struct nbd_reply reply;
247 bufferlist data;
11fdf7f2 248 int command = 0;
7c673cae
FG
249
250 IOContext()
251 : item(this)
252 {}
253 };
254
255 friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx);
256
9f95a23c
TL
257 ceph::mutex lock = ceph::make_mutex("NBDServer::Locker");
258 ceph::condition_variable cond;
7c673cae
FG
259 xlist<IOContext*> io_pending;
260 xlist<IOContext*> io_finished;
261
262 void io_start(IOContext *ctx)
263 {
9f95a23c 264 std::lock_guard l{lock};
7c673cae
FG
265 io_pending.push_back(&ctx->item);
266 }
267
268 void io_finish(IOContext *ctx)
269 {
9f95a23c 270 std::lock_guard l{lock};
11fdf7f2 271 ceph_assert(ctx->item.is_on_list());
7c673cae
FG
272 ctx->item.remove_myself();
273 io_finished.push_back(&ctx->item);
9f95a23c 274 cond.notify_all();
7c673cae
FG
275 }
276
277 IOContext *wait_io_finish()
278 {
9f95a23c 279 std::unique_lock l{lock};
f67539c2
TL
280 cond.wait(l, [this] {
281 return !io_finished.empty() ||
282 (io_pending.empty() && terminated);
283 });
7c673cae
FG
284
285 if (io_finished.empty())
286 return NULL;
287
288 IOContext *ret = io_finished.front();
289 io_finished.pop_front();
290
291 return ret;
292 }
293
294 void wait_clean()
295 {
9f95a23c
TL
296 std::unique_lock l{lock};
297 cond.wait(l, [this] { return io_pending.empty(); });
7c673cae
FG
298
299 while(!io_finished.empty()) {
11fdf7f2 300 std::unique_ptr<IOContext> free_ctx(io_finished.front());
7c673cae
FG
301 io_finished.pop_front();
302 }
303 }
304
f67539c2
TL
305 void assert_clean()
306 {
307 std::unique_lock l{lock};
308
309 ceph_assert(!reader_thread.is_started());
310 ceph_assert(!writer_thread.is_started());
311 ceph_assert(io_pending.empty());
312 ceph_assert(io_finished.empty());
313 }
314
7c673cae
FG
315 static void aio_callback(librbd::completion_t cb, void *arg)
316 {
317 librbd::RBD::AioCompletion *aio_completion =
318 reinterpret_cast<librbd::RBD::AioCompletion*>(cb);
319
320 IOContext *ctx = reinterpret_cast<IOContext *>(arg);
321 int ret = aio_completion->get_return_value();
322
323 dout(20) << __func__ << ": " << *ctx << dendl;
324
325 if (ret == -EINVAL) {
326 // if shrinking an image, a pagecache writeback might reference
327 // extents outside of the range of the new image extents
181888fb 328 dout(0) << __func__ << ": masking IO out-of-bounds error" << dendl;
7c673cae
FG
329 ctx->data.clear();
330 ret = 0;
331 }
332
333 if (ret < 0) {
334 ctx->reply.error = htonl(-ret);
335 } else if ((ctx->command == NBD_CMD_READ) &&
336 ret < static_cast<int>(ctx->request.len)) {
337 int pad_byte_count = static_cast<int> (ctx->request.len) - ret;
338 ctx->data.append_zero(pad_byte_count);
339 dout(20) << __func__ << ": " << *ctx << ": Pad byte count: "
340 << pad_byte_count << dendl;
11fdf7f2 341 ctx->reply.error = htonl(0);
7c673cae
FG
342 } else {
343 ctx->reply.error = htonl(0);
344 }
345 ctx->server->io_finish(ctx);
346
347 aio_completion->release();
348 }
349
350 void reader_entry()
351 {
f67539c2
TL
352 struct pollfd poll_fds[2];
353 memset(poll_fds, 0, sizeof(struct pollfd) * 2);
354 poll_fds[0].fd = fd;
355 poll_fds[0].events = POLLIN;
356 poll_fds[1].fd = terminate_event_fd;
357 poll_fds[1].events = POLLIN;
358
359 while (true) {
11fdf7f2 360 std::unique_ptr<IOContext> ctx(new IOContext());
7c673cae
FG
361 ctx->server = this;
362
363 dout(20) << __func__ << ": waiting for nbd request" << dendl;
364
f67539c2
TL
365 int r = poll(poll_fds, 2, -1);
366 if (r == -1) {
367 if (errno == EINTR) {
368 continue;
369 }
370 r = -errno;
371 derr << "failed to poll nbd: " << cpp_strerror(r) << dendl;
372 goto error;
373 }
374
375 if ((poll_fds[1].revents & POLLIN) != 0) {
376 dout(0) << __func__ << ": terminate received" << dendl;
377 goto signal;
378 }
379
380 if ((poll_fds[0].revents & POLLIN) == 0) {
381 dout(20) << __func__ << ": nothing to read" << dendl;
382 continue;
383 }
384
385 r = safe_read_exact(fd, &ctx->request, sizeof(struct nbd_request));
7c673cae
FG
386 if (r < 0) {
387 derr << "failed to read nbd request header: " << cpp_strerror(r)
388 << dendl;
f67539c2 389 goto error;
7c673cae
FG
390 }
391
392 if (ctx->request.magic != htonl(NBD_REQUEST_MAGIC)) {
393 derr << "invalid nbd request header" << dendl;
eafe8130 394 goto signal;
7c673cae
FG
395 }
396
397 ctx->request.from = ntohll(ctx->request.from);
398 ctx->request.type = ntohl(ctx->request.type);
399 ctx->request.len = ntohl(ctx->request.len);
400
401 ctx->reply.magic = htonl(NBD_REPLY_MAGIC);
402 memcpy(ctx->reply.handle, ctx->request.handle, sizeof(ctx->reply.handle));
403
404 ctx->command = ctx->request.type & 0x0000ffff;
405
406 dout(20) << *ctx << ": start" << dendl;
407
408 switch (ctx->command)
409 {
410 case NBD_CMD_DISC:
411 // NBD_DO_IT will return when pipe is closed
412 dout(0) << "disconnect request received" << dendl;
eafe8130 413 goto signal;
7c673cae
FG
414 case NBD_CMD_WRITE:
415 bufferptr ptr(ctx->request.len);
416 r = safe_read_exact(fd, ptr.c_str(), ctx->request.len);
417 if (r < 0) {
418 derr << *ctx << ": failed to read nbd request data: "
419 << cpp_strerror(r) << dendl;
f67539c2 420 goto error;
7c673cae
FG
421 }
422 ctx->data.push_back(ptr);
423 break;
424 }
425
426 IOContext *pctx = ctx.release();
427 io_start(pctx);
428 librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(pctx, aio_callback);
429 switch (pctx->command)
430 {
431 case NBD_CMD_WRITE:
432 image.aio_write(pctx->request.from, pctx->request.len, pctx->data, c);
433 break;
434 case NBD_CMD_READ:
435 image.aio_read(pctx->request.from, pctx->request.len, pctx->data, c);
436 break;
437 case NBD_CMD_FLUSH:
438 image.aio_flush(c);
f67539c2 439 allow_internal_flush = true;
7c673cae
FG
440 break;
441 case NBD_CMD_TRIM:
442 image.aio_discard(pctx->request.from, pctx->request.len, c);
443 break;
444 default:
445 derr << *pctx << ": invalid request command" << dendl;
446 c->release();
eafe8130 447 goto signal;
7c673cae
FG
448 }
449 }
f67539c2
TL
450error:
451 {
452 int r = netlink_disconnect(nbd_index);
453 if (r == 1) {
454 ioctl(nbd, NBD_DISCONNECT);
455 }
456 }
eafe8130 457signal:
f67539c2
TL
458 std::lock_guard l{lock};
459 terminated = true;
460 cond.notify_all();
461
462 std::lock_guard disconnect_l{disconnect_lock};
9f95a23c 463 disconnect_cond.notify_all();
f67539c2
TL
464
465 dout(20) << __func__ << ": terminated" << dendl;
7c673cae
FG
466 }
467
468 void writer_entry()
469 {
f67539c2 470 while (true) {
7c673cae 471 dout(20) << __func__ << ": waiting for io request" << dendl;
11fdf7f2 472 std::unique_ptr<IOContext> ctx(wait_io_finish());
7c673cae
FG
473 if (!ctx) {
474 dout(20) << __func__ << ": no io requests, terminating" << dendl;
f67539c2 475 goto done;
7c673cae
FG
476 }
477
478 dout(20) << __func__ << ": got: " << *ctx << dendl;
479
480 int r = safe_write(fd, &ctx->reply, sizeof(struct nbd_reply));
481 if (r < 0) {
482 derr << *ctx << ": failed to write reply header: " << cpp_strerror(r)
483 << dendl;
f67539c2 484 goto error;
7c673cae
FG
485 }
486 if (ctx->command == NBD_CMD_READ && ctx->reply.error == htonl(0)) {
487 r = ctx->data.write_fd(fd);
488 if (r < 0) {
489 derr << *ctx << ": failed to write replay data: " << cpp_strerror(r)
490 << dendl;
f67539c2 491 goto error;
7c673cae
FG
492 }
493 }
494 dout(20) << *ctx << ": finish" << dendl;
495 }
f67539c2
TL
496 error:
497 wait_clean();
498 done:
499 ::shutdown(fd, SHUT_RDWR);
500
501 dout(20) << __func__ << ": terminated" << dendl;
502 }
503
504 bool wait_quiesce() {
505 dout(20) << __func__ << dendl;
506
507 std::unique_lock locker{lock};
508 cond.wait(locker, [this] { return quiesce || terminated; });
509
510 if (terminated) {
511 return false;
512 }
513
514 dout(20) << __func__ << ": got quiesce request" << dendl;
515 return true;
516 }
517
518 void wait_unquiesce(std::unique_lock<ceph::mutex> &locker) {
519 dout(20) << __func__ << dendl;
520
521 cond.wait(locker, [this] { return !quiesce || terminated; });
522
523 dout(20) << __func__ << ": got unquiesce request" << dendl;
524 }
525
526 void wait_inflight_io() {
527 if (!allow_internal_flush) {
528 return;
529 }
530
531 uint64_t features = 0;
532 image.features(&features);
533 if ((features & RBD_FEATURE_EXCLUSIVE_LOCK) != 0) {
534 bool is_owner = false;
535 image.is_exclusive_lock_owner(&is_owner);
536 if (!is_owner) {
537 return;
538 }
539 }
540
541 dout(20) << __func__ << dendl;
542
543 int r = image.flush();
544 if (r < 0) {
545 derr << "flush failed: " << cpp_strerror(r) << dendl;
546 }
547 }
548
549 void quiesce_entry()
550 {
551 ceph_assert(cfg->quiesce);
552
553 while (wait_quiesce()) {
554
555 int r = run_quiesce_hook(cfg->quiesce_hook, cfg->devpath, "quiesce");
556
557 wait_inflight_io();
558
559 {
560 std::unique_lock locker{lock};
561 ceph_assert(quiesce == true);
562
563 image.quiesce_complete(quiesce_watch_handle, r);
564
565 if (r < 0) {
566 quiesce = false;
567 continue;
568 }
569
570 wait_unquiesce(locker);
571 }
572
573 run_quiesce_hook(cfg->quiesce_hook, cfg->devpath, "unquiesce");
574 }
575
7c673cae
FG
576 dout(20) << __func__ << ": terminated" << dendl;
577 }
578
579 class ThreadHelper : public Thread
580 {
581 public:
582 typedef void (NBDServer::*entry_func)();
583 private:
584 NBDServer &server;
585 entry_func func;
586 public:
587 ThreadHelper(NBDServer &_server, entry_func _func)
588 :server(_server)
589 ,func(_func)
590 {}
591 protected:
592 void* entry() override
593 {
594 (server.*func)();
7c673cae
FG
595 return NULL;
596 }
f67539c2
TL
597 } reader_thread, writer_thread, quiesce_thread;
598
599 bool started = false;
600 bool quiesce = false;
7c673cae 601
7c673cae
FG
602public:
603 void start()
604 {
605 if (!started) {
606 dout(10) << __func__ << ": starting" << dendl;
607
608 started = true;
609
f67539c2
TL
610 terminate_event_fd = eventfd(0, EFD_NONBLOCK);
611 ceph_assert(terminate_event_fd > 0);
612 int r = terminate_event_sock.init(terminate_event_fd,
613 EVENT_SOCKET_TYPE_EVENTFD);
614 ceph_assert(r >= 0);
615
7c673cae
FG
616 reader_thread.create("rbd_reader");
617 writer_thread.create("rbd_writer");
f67539c2
TL
618 if (cfg->quiesce) {
619 quiesce_thread.create("rbd_quiesce");
620 }
7c673cae
FG
621 }
622 }
623
eafe8130
TL
624 void wait_for_disconnect()
625 {
626 if (!started)
627 return;
628
9f95a23c
TL
629 std::unique_lock l{disconnect_lock};
630 disconnect_cond.wait(l);
eafe8130
TL
631 }
632
f67539c2
TL
633 void notify_quiesce() {
634 dout(10) << __func__ << dendl;
635
636 ceph_assert(cfg->quiesce);
637
638 std::unique_lock locker{lock};
639 ceph_assert(quiesce == false);
640 quiesce = true;
641 cond.notify_all();
642 }
643
644 void notify_unquiesce() {
645 dout(10) << __func__ << dendl;
646
647 ceph_assert(cfg->quiesce);
648
649 std::unique_lock locker{lock};
650 ceph_assert(quiesce == true);
651 quiesce = false;
652 cond.notify_all();
653 }
654
11fdf7f2 655 ~NBDServer()
7c673cae
FG
656 {
657 if (started) {
658 dout(10) << __func__ << ": terminating" << dendl;
659
f67539c2 660 terminate_event_sock.notify();
7c673cae
FG
661
662 reader_thread.join();
663 writer_thread.join();
f67539c2
TL
664 if (cfg->quiesce) {
665 quiesce_thread.join();
666 }
7c673cae 667
f67539c2 668 assert_clean();
7c673cae 669
f67539c2 670 close(terminate_event_fd);
7c673cae
FG
671 started = false;
672 }
673 }
7c673cae
FG
674};
675
676std::ostream &operator<<(std::ostream &os, const NBDServer::IOContext &ctx) {
677
678 os << "[" << std::hex << ntohll(*((uint64_t *)ctx.request.handle));
679
680 switch (ctx.command)
681 {
682 case NBD_CMD_WRITE:
683 os << " WRITE ";
684 break;
685 case NBD_CMD_READ:
686 os << " READ ";
687 break;
688 case NBD_CMD_FLUSH:
689 os << " FLUSH ";
690 break;
691 case NBD_CMD_TRIM:
692 os << " TRIM ";
693 break;
f67539c2
TL
694 case NBD_CMD_DISC:
695 os << " DISC ";
696 break;
7c673cae 697 default:
11fdf7f2 698 os << " UNKNOWN(" << ctx.command << ") ";
7c673cae
FG
699 break;
700 }
701
702 os << ctx.request.from << "~" << ctx.request.len << " "
11fdf7f2 703 << std::dec << ntohl(ctx.reply.error) << "]";
7c673cae
FG
704
705 return os;
706}
707
f67539c2
TL
708class NBDQuiesceWatchCtx : public librbd::QuiesceWatchCtx
709{
710public:
711 NBDQuiesceWatchCtx(NBDServer *server) : server(server) {
712 }
713
714 void handle_quiesce() override {
715 server->notify_quiesce();
716 }
717
718 void handle_unquiesce() override {
719 server->notify_unquiesce();
720 }
721
722private:
723 NBDServer *server;
724};
725
7c673cae
FG
726class NBDWatchCtx : public librbd::UpdateWatchCtx
727{
728private:
729 int fd;
eafe8130
TL
730 int nbd_index;
731 bool use_netlink;
7c673cae
FG
732 librados::IoCtx &io_ctx;
733 librbd::Image &image;
734 unsigned long size;
735public:
736 NBDWatchCtx(int _fd,
eafe8130
TL
737 int _nbd_index,
738 bool _use_netlink,
7c673cae
FG
739 librados::IoCtx &_io_ctx,
740 librbd::Image &_image,
741 unsigned long _size)
742 : fd(_fd)
eafe8130
TL
743 , nbd_index(_nbd_index)
744 , use_netlink(_use_netlink)
7c673cae
FG
745 , io_ctx(_io_ctx)
746 , image(_image)
747 , size(_size)
748 { }
749
750 ~NBDWatchCtx() override {}
751
752 void handle_notify() override
753 {
754 librbd::image_info_t info;
755 if (image.stat(info, sizeof(info)) == 0) {
756 unsigned long new_size = info.size;
eafe8130 757 int ret;
7c673cae
FG
758
759 if (new_size != size) {
b32b8144 760 dout(5) << "resize detected" << dendl;
7c673cae 761 if (ioctl(fd, BLKFLSBUF, NULL) < 0)
eafe8130
TL
762 derr << "invalidate page cache failed: " << cpp_strerror(errno)
763 << dendl;
764 if (use_netlink) {
765 ret = netlink_resize(nbd_index, new_size);
766 } else {
767 ret = ioctl(fd, NBD_SET_SIZE, new_size);
768 if (ret < 0)
7c673cae 769 derr << "resize failed: " << cpp_strerror(errno) << dendl;
eafe8130
TL
770 }
771
772 if (!ret)
7c673cae 773 size = new_size;
eafe8130 774
b32b8144
FG
775 if (ioctl(fd, BLKRRPART, NULL) < 0) {
776 derr << "rescan of partition table failed: " << cpp_strerror(errno)
777 << dendl;
778 }
7c673cae 779 if (image.invalidate_cache() < 0)
eafe8130 780 derr << "invalidate rbd cache failed" << dendl;
7c673cae
FG
781 }
782 }
783 }
784};
785
11fdf7f2
TL
786class NBDListIterator {
787public:
f67539c2 788 bool get(Config *cfg) {
11fdf7f2
TL
789 while (true) {
790 std::string nbd_path = "/sys/block/nbd" + stringify(m_index);
791 if(access(nbd_path.c_str(), F_OK) != 0) {
792 return false;
793 }
794
795 *cfg = Config();
796 cfg->devpath = "/dev/nbd" + stringify(m_index++);
797
f67539c2 798 int pid;
11fdf7f2
TL
799 std::ifstream ifs;
800 ifs.open(nbd_path + "/pid", std::ifstream::in);
801 if (!ifs.is_open()) {
802 continue;
803 }
f67539c2
TL
804 ifs >> pid;
805
806 // If the rbd-nbd is re-attached the pid may store garbage
807 // here. We are sure this is the case when it is negative or
808 // zero. Then we just try to find the attached process scanning
809 // /proc fs. If it is positive we check the process with this
810 // pid first and if it is not rbd-nbd fallback to searching the
811 // attached process.
812 do {
813 if (pid <= 0) {
814 pid = find_attached(cfg->devpath);
815 if (pid <= 0) {
816 break;
817 }
818 }
11fdf7f2 819
f67539c2
TL
820 if (get_mapped_info(pid, cfg) >= 0) {
821 return true;
822 }
823 pid = -1;
824 } while (true);
11fdf7f2
TL
825 }
826 }
827
828private:
829 int m_index = 0;
f67539c2 830 std::map<int, Config> m_mapped_info_cache;
11fdf7f2
TL
831
832 int get_mapped_info(int pid, Config *cfg) {
f67539c2
TL
833 ceph_assert(!cfg->devpath.empty());
834
835 auto it = m_mapped_info_cache.find(pid);
836 if (it != m_mapped_info_cache.end()) {
837 if (it->second.devpath != cfg->devpath) {
838 return -EINVAL;
839 }
840 *cfg = it->second;
841 return 0;
842 }
843
844 m_mapped_info_cache[pid] = {};
845
11fdf7f2 846 int r;
f67539c2 847 std::string path = "/proc/" + stringify(pid) + "/comm";
11fdf7f2 848 std::ifstream ifs;
f67539c2
TL
849 std::string comm;
850 ifs.open(path.c_str(), std::ifstream::in);
851 if (!ifs.is_open())
852 return -1;
853 ifs >> comm;
854 if (comm != "rbd-nbd") {
855 return -EINVAL;
856 }
857 ifs.close();
858
859 path = "/proc/" + stringify(pid) + "/cmdline";
11fdf7f2
TL
860 std::string cmdline;
861 std::vector<const char*> args;
862
863 ifs.open(path.c_str(), std::ifstream::in);
864 if (!ifs.is_open())
865 return -1;
866 ifs >> cmdline;
867
f67539c2
TL
868 if (cmdline.empty()) {
869 return -EINVAL;
870 }
871
11fdf7f2 872 for (unsigned i = 0; i < cmdline.size(); i++) {
9f95a23c 873 char *arg = &cmdline[i];
11fdf7f2
TL
874 if (i == 0) {
875 if (strcmp(basename(arg) , "rbd-nbd") != 0) {
876 return -EINVAL;
877 }
878 } else {
879 args.push_back(arg);
880 }
881
882 while (cmdline[i] != '\0') {
883 i++;
884 }
885 }
886
887 std::ostringstream err_msg;
f67539c2
TL
888 Config c;
889 r = parse_args(args, &err_msg, &c);
11fdf7f2
TL
890 if (r < 0) {
891 return r;
892 }
893
f67539c2 894 if (c.command != Map && c.command != Attach) {
11fdf7f2
TL
895 return -ENOENT;
896 }
897
f67539c2
TL
898 c.pid = pid;
899 m_mapped_info_cache.erase(pid);
900 if (!c.devpath.empty()) {
901 m_mapped_info_cache[pid] = c;
902 if (c.devpath != cfg->devpath) {
903 return -ENOENT;
904 }
905 } else {
906 c.devpath = cfg->devpath;
907 }
908
909 *cfg = c;
11fdf7f2
TL
910 return 0;
911 }
f67539c2
TL
912
913 int find_attached(const std::string &devpath) {
914 for (auto &entry : fs::directory_iterator("/proc")) {
915 if (!fs::is_directory(entry.status())) {
916 continue;
917 }
918
919 int pid;
920 try {
921 pid = boost::lexical_cast<uint64_t>(entry.path().filename().c_str());
922 } catch (boost::bad_lexical_cast&) {
923 continue;
924 }
925
926 Config cfg;
927 cfg.devpath = devpath;
928 if (get_mapped_info(pid, &cfg) >=0 && cfg.command == Attach) {
929 return cfg.pid;
930 }
931 }
932
933 return -1;
934 }
11fdf7f2
TL
935};
936
eafe8130 937static int load_module(Config *cfg)
7c673cae 938{
eafe8130
TL
939 ostringstream param;
940 int ret;
11fdf7f2 941
eafe8130
TL
942 if (cfg->nbds_max)
943 param << "nbds_max=" << cfg->nbds_max;
944
945 if (cfg->max_part)
946 param << " max_part=" << cfg->max_part;
947
948 if (!access("/sys/module/nbd", F_OK)) {
949 if (cfg->nbds_max || cfg->set_max_part)
950 cerr << "rbd-nbd: ignoring kernel module parameter options: nbd module already loaded"
951 << std::endl;
952 return 0;
7c673cae
FG
953 }
954
eafe8130
TL
955 ret = module_load("nbd", param.str().c_str());
956 if (ret < 0)
957 cerr << "rbd-nbd: failed to load nbd kernel module: " << cpp_strerror(-ret)
7c673cae 958 << std::endl;
11fdf7f2 959
eafe8130 960 return ret;
7c673cae
FG
961}
962
963static int check_device_size(int nbd_index, unsigned long expected_size)
964{
965 // There are bugs with some older kernel versions that result in an
966 // overflow for large image sizes. This check is to ensure we are
967 // not affected.
968
969 unsigned long size = 0;
970 std::string path = "/sys/block/nbd" + stringify(nbd_index) + "/size";
971 std::ifstream ifs;
972 ifs.open(path.c_str(), std::ifstream::in);
973 if (!ifs.is_open()) {
974 cerr << "rbd-nbd: failed to open " << path << std::endl;
975 return -EINVAL;
976 }
977 ifs >> size;
978 size *= RBD_NBD_BLKSIZE;
979
980 if (size == 0) {
981 // Newer kernel versions will report real size only after nbd
982 // connect. Assume this is the case and return success.
983 return 0;
984 }
985
986 if (size != expected_size) {
987 cerr << "rbd-nbd: kernel reported invalid device size (" << size
988 << ", expected " << expected_size << ")" << std::endl;
989 return -EINVAL;
990 }
991
992 return 0;
993}
994
eafe8130
TL
995static int parse_nbd_index(const std::string& devpath)
996{
997 int index, ret;
998
999 ret = sscanf(devpath.c_str(), "/dev/nbd%d", &index);
1000 if (ret <= 0) {
1001 // mean an early matching failure. But some cases need a negative value.
1002 if (ret == 0)
1003 ret = -EINVAL;
1004 cerr << "rbd-nbd: invalid device path: " << devpath
1005 << " (expected /dev/nbd{num})" << std::endl;
1006 return ret;
1007 }
1008
1009 return index;
1010}
1011
f67539c2
TL
1012static int try_ioctl_setup(Config *cfg, int fd, uint64_t size,
1013 uint64_t blksize, uint64_t flags)
eafe8130
TL
1014{
1015 int index = 0, r;
1016
1017 if (cfg->devpath.empty()) {
1018 char dev[64];
1019 const char *path = "/sys/module/nbd/parameters/nbds_max";
1020 int nbds_max = -1;
1021 if (access(path, F_OK) == 0) {
1022 std::ifstream ifs;
1023 ifs.open(path, std::ifstream::in);
1024 if (ifs.is_open()) {
1025 ifs >> nbds_max;
1026 ifs.close();
1027 }
1028 }
1029
1030 while (true) {
1031 snprintf(dev, sizeof(dev), "/dev/nbd%d", index);
1032
1033 nbd = open(dev, O_RDWR);
1034 if (nbd < 0) {
1035 if (nbd == -EPERM && nbds_max != -1 && index < (nbds_max-1)) {
1036 ++index;
1037 continue;
1038 }
1039 r = nbd;
1040 cerr << "rbd-nbd: failed to find unused device" << std::endl;
1041 goto done;
1042 }
1043
1044 r = ioctl(nbd, NBD_SET_SOCK, fd);
1045 if (r < 0) {
1046 close(nbd);
1047 ++index;
1048 continue;
1049 }
1050
1051 cfg->devpath = dev;
1052 break;
1053 }
1054 } else {
1055 r = parse_nbd_index(cfg->devpath);
1056 if (r < 0)
1057 goto done;
1058 index = r;
1059
1060 nbd = open(cfg->devpath.c_str(), O_RDWR);
1061 if (nbd < 0) {
1062 r = nbd;
1063 cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl;
1064 goto done;
1065 }
1066
1067 r = ioctl(nbd, NBD_SET_SOCK, fd);
1068 if (r < 0) {
1069 r = -errno;
1070 cerr << "rbd-nbd: the device " << cfg->devpath << " is busy" << std::endl;
1071 close(nbd);
1072 goto done;
1073 }
1074 }
1075
f67539c2 1076 r = ioctl(nbd, NBD_SET_BLKSIZE, blksize);
eafe8130
TL
1077 if (r < 0) {
1078 r = -errno;
f67539c2 1079 cerr << "rbd-nbd: NBD_SET_BLKSIZE failed" << std::endl;
eafe8130
TL
1080 goto close_nbd;
1081 }
1082
1083 r = ioctl(nbd, NBD_SET_SIZE, size);
1084 if (r < 0) {
f67539c2 1085 cerr << "rbd-nbd: NBD_SET_SIZE failed" << std::endl;
eafe8130
TL
1086 r = -errno;
1087 goto close_nbd;
1088 }
1089
1090 ioctl(nbd, NBD_SET_FLAGS, flags);
1091
f67539c2
TL
1092 if (cfg->io_timeout >= 0) {
1093 r = ioctl(nbd, NBD_SET_TIMEOUT, (unsigned long)cfg->io_timeout);
eafe8130
TL
1094 if (r < 0) {
1095 r = -errno;
f67539c2 1096 cerr << "rbd-nbd: failed to set IO timeout: " << cpp_strerror(r)
eafe8130
TL
1097 << std::endl;
1098 goto close_nbd;
1099 }
1100 }
1101
1102 dout(10) << "ioctl setup complete for " << cfg->devpath << dendl;
1103 nbd_index = index;
1104 return 0;
1105
1106close_nbd:
1107 if (r < 0) {
1108 ioctl(nbd, NBD_CLEAR_SOCK);
1109 cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r) << std::endl;
1110 }
1111 close(nbd);
1112done:
1113 return r;
1114}
1115
1116static void netlink_cleanup(struct nl_sock *sock)
1117{
1118 if (!sock)
1119 return;
1120
1121 nl_close(sock);
1122 nl_socket_free(sock);
1123}
1124
1125static struct nl_sock *netlink_init(int *id)
1126{
1127 struct nl_sock *sock;
1128 int ret;
1129
1130 sock = nl_socket_alloc();
1131 if (!sock) {
1132 cerr << "rbd-nbd: Could not allocate netlink socket." << std::endl;
1133 return NULL;
1134 }
1135
1136 ret = genl_connect(sock);
1137 if (ret < 0) {
1138 cerr << "rbd-nbd: Could not connect netlink socket. Error " << ret
1139 << std::endl;
1140 goto free_sock;
1141 }
1142
1143 *id = genl_ctrl_resolve(sock, "nbd");
1144 if (*id < 0)
1145 // nbd netlink interface not supported.
1146 goto close_sock;
1147
1148 return sock;
1149
1150close_sock:
1151 nl_close(sock);
1152free_sock:
1153 nl_socket_free(sock);
1154 return NULL;
1155}
1156
1157static int netlink_disconnect(int index)
1158{
1159 struct nl_sock *sock;
1160 struct nl_msg *msg;
1161 int ret, nl_id;
1162
1163 sock = netlink_init(&nl_id);
1164 if (!sock)
1165 // Try ioctl
1166 return 1;
1167
1168 nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL);
1169
1170 msg = nlmsg_alloc();
1171 if (!msg) {
1172 cerr << "rbd-nbd: Could not allocate netlink message." << std::endl;
1173 goto free_sock;
1174 }
1175
1176 if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0,
1177 NBD_CMD_DISCONNECT, 0)) {
1178 cerr << "rbd-nbd: Could not setup message." << std::endl;
1179 goto nla_put_failure;
1180 }
1181
1182 NLA_PUT_U32(msg, NBD_ATTR_INDEX, index);
1183
1184 ret = nl_send_sync(sock, msg);
1185 netlink_cleanup(sock);
1186 if (ret < 0) {
1187 cerr << "rbd-nbd: netlink disconnect failed: " << nl_geterror(-ret)
1188 << std::endl;
1189 return -EIO;
1190 }
1191
1192 return 0;
1193
1194nla_put_failure:
1195 nlmsg_free(msg);
1196free_sock:
1197 netlink_cleanup(sock);
1198 return -EIO;
1199}
1200
1201static int netlink_disconnect_by_path(const std::string& devpath)
1202{
1203 int index;
1204
1205 index = parse_nbd_index(devpath);
1206 if (index < 0)
1207 return index;
1208
1209 return netlink_disconnect(index);
1210}
1211
1212static int netlink_resize(int nbd_index, uint64_t size)
1213{
1214 struct nl_sock *sock;
1215 struct nl_msg *msg;
1216 int nl_id, ret;
1217
1218 sock = netlink_init(&nl_id);
1219 if (!sock) {
1220 cerr << "rbd-nbd: Netlink interface not supported." << std::endl;
1221 return 1;
1222 }
1223
1224 nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL);
1225
1226 msg = nlmsg_alloc();
1227 if (!msg) {
1228 cerr << "rbd-nbd: Could not allocate netlink message." << std::endl;
1229 goto free_sock;
1230 }
1231
1232 if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0,
1233 NBD_CMD_RECONFIGURE, 0)) {
1234 cerr << "rbd-nbd: Could not setup message." << std::endl;
1235 goto free_msg;
1236 }
1237
1238 NLA_PUT_U32(msg, NBD_ATTR_INDEX, nbd_index);
1239 NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size);
1240
1241 ret = nl_send_sync(sock, msg);
1242 if (ret < 0) {
1243 cerr << "rbd-nbd: netlink resize failed: " << nl_geterror(ret) << std::endl;
1244 goto free_sock;
1245 }
1246
1247 netlink_cleanup(sock);
1248 dout(10) << "netlink resize complete for nbd" << nbd_index << dendl;
1249 return 0;
1250
1251nla_put_failure:
1252free_msg:
1253 nlmsg_free(msg);
1254free_sock:
1255 netlink_cleanup(sock);
1256 return -EIO;
1257}
1258
1259static int netlink_connect_cb(struct nl_msg *msg, void *arg)
1260{
1261 struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlmsg_hdr(msg));
1262 Config *cfg = (Config *)arg;
1263 struct nlattr *msg_attr[NBD_ATTR_MAX + 1];
1264 uint32_t index;
1265 int ret;
1266
1267 ret = nla_parse(msg_attr, NBD_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
1268 genlmsg_attrlen(gnlh, 0), NULL);
1269 if (ret) {
1270 cerr << "rbd-nbd: Unsupported netlink reply" << std::endl;
1271 return -NLE_MSGTYPE_NOSUPPORT;
1272 }
1273
1274 if (!msg_attr[NBD_ATTR_INDEX]) {
1275 cerr << "rbd-nbd: netlink connect reply missing device index." << std::endl;
1276 return -NLE_MSGTYPE_NOSUPPORT;
1277 }
1278
1279 index = nla_get_u32(msg_attr[NBD_ATTR_INDEX]);
1280 cfg->devpath = "/dev/nbd" + stringify(index);
1281 nbd_index = index;
1282
1283 return NL_OK;
1284}
1285
1286static int netlink_connect(Config *cfg, struct nl_sock *sock, int nl_id, int fd,
f67539c2 1287 uint64_t size, uint64_t flags, bool reconnect)
eafe8130
TL
1288{
1289 struct nlattr *sock_attr;
1290 struct nlattr *sock_opt;
1291 struct nl_msg *msg;
1292 int ret;
1293
f67539c2
TL
1294 if (reconnect) {
1295 dout(10) << "netlink try reconnect for " << cfg->devpath << dendl;
1296
1297 nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL);
1298 } else {
1299 nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, netlink_connect_cb,
1300 cfg);
1301 }
eafe8130
TL
1302
1303 msg = nlmsg_alloc();
1304 if (!msg) {
1305 cerr << "rbd-nbd: Could not allocate netlink message." << std::endl;
1306 return -ENOMEM;
1307 }
1308
f67539c2
TL
1309 if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0,
1310 reconnect ? NBD_CMD_RECONFIGURE : NBD_CMD_CONNECT, 0)) {
eafe8130
TL
1311 cerr << "rbd-nbd: Could not setup message." << std::endl;
1312 goto free_msg;
1313 }
1314
1315 if (!cfg->devpath.empty()) {
1316 ret = parse_nbd_index(cfg->devpath);
1317 if (ret < 0)
1318 goto free_msg;
1319
1320 NLA_PUT_U32(msg, NBD_ATTR_INDEX, ret);
f67539c2
TL
1321 if (reconnect) {
1322 nbd_index = ret;
1323 }
eafe8130
TL
1324 }
1325
f67539c2
TL
1326 if (cfg->io_timeout >= 0)
1327 NLA_PUT_U64(msg, NBD_ATTR_TIMEOUT, cfg->io_timeout);
eafe8130
TL
1328
1329 NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size);
1330 NLA_PUT_U64(msg, NBD_ATTR_BLOCK_SIZE_BYTES, RBD_NBD_BLKSIZE);
1331 NLA_PUT_U64(msg, NBD_ATTR_SERVER_FLAGS, flags);
f67539c2 1332 NLA_PUT_U64(msg, NBD_ATTR_DEAD_CONN_TIMEOUT, cfg->reattach_timeout);
eafe8130
TL
1333
1334 sock_attr = nla_nest_start(msg, NBD_ATTR_SOCKETS);
1335 if (!sock_attr) {
1336 cerr << "rbd-nbd: Could not init sockets in netlink message." << std::endl;
1337 goto free_msg;
1338 }
1339
1340 sock_opt = nla_nest_start(msg, NBD_SOCK_ITEM);
1341 if (!sock_opt) {
1342 cerr << "rbd-nbd: Could not init sock in netlink message." << std::endl;
1343 goto free_msg;
1344 }
1345
1346 NLA_PUT_U32(msg, NBD_SOCK_FD, fd);
1347 nla_nest_end(msg, sock_opt);
1348 nla_nest_end(msg, sock_attr);
1349
1350 ret = nl_send_sync(sock, msg);
1351 if (ret < 0) {
1352 cerr << "rbd-nbd: netlink connect failed: " << nl_geterror(ret)
1353 << std::endl;
1354 return -EIO;
1355 }
1356
1357 dout(10) << "netlink connect complete for " << cfg->devpath << dendl;
1358 return 0;
1359
1360nla_put_failure:
1361free_msg:
1362 nlmsg_free(msg);
1363 return -EIO;
1364}
1365
f67539c2
TL
1366static int try_netlink_setup(Config *cfg, int fd, uint64_t size, uint64_t flags,
1367 bool reconnect)
eafe8130
TL
1368{
1369 struct nl_sock *sock;
1370 int nl_id, ret;
1371
1372 sock = netlink_init(&nl_id);
1373 if (!sock) {
1374 cerr << "rbd-nbd: Netlink interface not supported. Using ioctl interface."
1375 << std::endl;
1376 return 1;
1377 }
1378
1379 dout(10) << "netlink interface supported." << dendl;
1380
f67539c2 1381 ret = netlink_connect(cfg, sock, nl_id, fd, size, flags, reconnect);
eafe8130
TL
1382 netlink_cleanup(sock);
1383
1384 if (ret != 0)
1385 return ret;
1386
1387 nbd = open(cfg->devpath.c_str(), O_RDWR);
1388 if (nbd < 0) {
1389 cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl;
1390 return nbd;
1391 }
1392
1393 return 0;
1394}
1395
f67539c2
TL
1396static int run_quiesce_hook(const std::string &quiesce_hook,
1397 const std::string &devpath,
1398 const std::string &command) {
1399 dout(10) << __func__ << ": " << quiesce_hook << " " << devpath << " "
1400 << command << dendl;
1401
1402 SubProcess hook(quiesce_hook.c_str(), SubProcess::CLOSE, SubProcess::PIPE,
1403 SubProcess::PIPE);
1404 hook.add_cmd_args(devpath.c_str(), command.c_str(), NULL);
1405 bufferlist err;
1406 int r = hook.spawn();
1407 if (r < 0) {
1408 err.append("subprocess spawn failed");
1409 } else {
1410 err.read_fd(hook.get_stderr(), 16384);
1411 r = hook.join();
1412 if (r > 0) {
1413 r = -r;
1414 }
1415 }
1416 if (r < 0) {
1417 derr << __func__ << ": " << quiesce_hook << " " << devpath << " "
1418 << command << " failed: " << err.to_str() << dendl;
1419 } else {
1420 dout(10) << " succeeded: " << err.to_str() << dendl;
1421 }
1422
1423 return r;
1424}
1425
eafe8130
TL
1426static void handle_signal(int signum)
1427{
eafe8130
TL
1428 ceph_assert(signum == SIGINT || signum == SIGTERM);
1429 derr << "*** Got signal " << sig_str(signum) << " ***" << dendl;
1430
f67539c2 1431 dout(20) << __func__ << ": " << "notifying terminate" << dendl;
eafe8130 1432
f67539c2
TL
1433 ceph_assert(terminate_event_sock.is_valid());
1434 terminate_event_sock.notify();
eafe8130
TL
1435}
1436
f67539c2 1437static NBDServer *start_server(int fd, librbd::Image& image, Config *cfg)
eafe8130
TL
1438{
1439 NBDServer *server;
1440
f67539c2 1441 server = new NBDServer(fd, image, cfg);
eafe8130
TL
1442 server->start();
1443
1444 init_async_signal_handler();
1445 register_async_signal_handler(SIGHUP, sighup_handler);
1446 register_async_signal_handler_oneshot(SIGINT, handle_signal);
1447 register_async_signal_handler_oneshot(SIGTERM, handle_signal);
1448
1449 return server;
1450}
1451
1452static void run_server(Preforker& forker, NBDServer *server, bool netlink_used)
1453{
1454 if (g_conf()->daemonize) {
1455 global_init_postfork_finish(g_ceph_context);
1456 forker.daemonize();
1457 }
1458
1459 if (netlink_used)
1460 server->wait_for_disconnect();
1461 else
1462 ioctl(nbd, NBD_DO_IT);
1463
1464 unregister_async_signal_handler(SIGHUP, sighup_handler);
1465 unregister_async_signal_handler(SIGINT, handle_signal);
1466 unregister_async_signal_handler(SIGTERM, handle_signal);
1467 shutdown_async_signal_handler();
1468}
1469
f67539c2
TL
1470// Eventually it should be removed when pidfd_open is widely supported.
1471
1472static int wait_for_terminate_legacy(int pid, int timeout)
1473{
1474 for (int i = 0; ; i++) {
1475 if (kill(pid, 0) == -1) {
1476 if (errno == ESRCH) {
1477 return 0;
1478 }
1479 int r = -errno;
1480 cerr << "rbd-nbd: kill(" << pid << ", 0) failed: "
1481 << cpp_strerror(r) << std::endl;
1482 return r;
1483 }
1484 if (i >= timeout * 2) {
1485 break;
1486 }
1487 usleep(500000);
1488 }
1489
1490 cerr << "rbd-nbd: waiting for process exit timed out" << std::endl;
1491 return -ETIMEDOUT;
1492}
1493
1494// Eventually it should be replaced with glibc' pidfd_open
1495// when it is widely available.
1496
1497#ifdef __NR_pidfd_open
1498static int pidfd_open(pid_t pid, unsigned int flags)
1499{
1500 return syscall(__NR_pidfd_open, pid, flags);
1501}
1502#else
1503static int pidfd_open(pid_t pid, unsigned int flags)
1504{
1505 errno = ENOSYS;
1506 return -1;
1507}
1508#endif
1509
1510static int wait_for_terminate(int pid, int timeout)
1511{
1512 int fd = pidfd_open(pid, 0);
1513 if (fd == -1) {
1514 if (errno == ENOSYS) {
1515 return wait_for_terminate_legacy(pid, timeout);
1516 }
1517 if (errno == ESRCH) {
1518 return 0;
1519 }
1520 int r = -errno;
1521 cerr << "rbd-nbd: pidfd_open(" << pid << ") failed: "
1522 << cpp_strerror(r) << std::endl;
1523 return r;
1524 }
1525
1526 struct pollfd poll_fds[1];
1527 memset(poll_fds, 0, sizeof(struct pollfd));
1528 poll_fds[0].fd = fd;
1529 poll_fds[0].events = POLLIN;
1530
1531 int r = poll(poll_fds, 1, timeout * 1000);
1532 if (r == -1) {
1533 r = -errno;
1534 cerr << "rbd-nbd: failed to poll rbd-nbd process: " << cpp_strerror(r)
1535 << std::endl;
1536 goto done;
1537 } else {
1538 r = 0;
1539 }
1540
1541 if ((poll_fds[0].revents & POLLIN) == 0) {
1542 cerr << "rbd-nbd: waiting for process exit timed out" << std::endl;
1543 r = -ETIMEDOUT;
1544 }
1545
1546done:
1547 close(fd);
1548
1549 return r;
1550}
1551
1552static int do_map(int argc, const char *argv[], Config *cfg, bool reconnect)
7c673cae
FG
1553{
1554 int r;
1555
1556 librados::Rados rados;
1557 librbd::RBD rbd;
1558 librados::IoCtx io_ctx;
1559 librbd::Image image;
1560
1561 int read_only = 0;
1562 unsigned long flags;
1563 unsigned long size;
f67539c2 1564 unsigned long blksize = RBD_NBD_BLKSIZE;
eafe8130 1565 bool use_netlink;
7c673cae 1566
7c673cae
FG
1567 int fd[2];
1568
1569 librbd::image_info_t info;
1570
1571 Preforker forker;
eafe8130 1572 NBDServer *server;
7c673cae
FG
1573
1574 vector<const char*> args;
1575 argv_to_vec(argc, argv, args);
11fdf7f2
TL
1576 if (args.empty()) {
1577 cerr << argv[0] << ": -h or --help for usage" << std::endl;
1578 exit(1);
1579 }
1580 if (ceph_argparse_need_usage(args)) {
1581 usage();
1582 exit(0);
1583 }
7c673cae
FG
1584
1585 auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
1586 CODE_ENVIRONMENT_DAEMON,
1587 CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS);
11fdf7f2 1588 g_ceph_context->_conf.set_val_or_die("pid_file", "");
7c673cae
FG
1589
1590 if (global_init_prefork(g_ceph_context) >= 0) {
1591 std::string err;
1592 r = forker.prefork(err);
1593 if (r < 0) {
1594 cerr << err << std::endl;
1595 return r;
1596 }
7c673cae 1597 if (forker.is_parent()) {
7c673cae
FG
1598 if (forker.parent_wait(err) != 0) {
1599 return -ENXIO;
1600 }
1601 return 0;
1602 }
28e407b8 1603 global_init_postfork_start(g_ceph_context);
7c673cae
FG
1604 }
1605
1606 common_init_finish(g_ceph_context);
1607 global_init_chdir(g_ceph_context);
1608
1609 if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) {
1610 r = -errno;
1611 goto close_ret;
1612 }
1613
94b18763
FG
1614 r = rados.init_with_context(g_ceph_context);
1615 if (r < 0)
1616 goto close_fd;
1617
1618 r = rados.connect();
1619 if (r < 0)
1620 goto close_fd;
1621
1622 r = rados.ioctx_create(cfg->poolname.c_str(), io_ctx);
1623 if (r < 0)
1624 goto close_fd;
1625
11fdf7f2
TL
1626 io_ctx.set_namespace(cfg->nsname);
1627
94b18763
FG
1628 r = rbd.open(io_ctx, image, cfg->imgname.c_str());
1629 if (r < 0)
1630 goto close_fd;
1631
1632 if (cfg->exclusive) {
1633 r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE);
1634 if (r < 0) {
1635 cerr << "rbd-nbd: failed to acquire exclusive lock: " << cpp_strerror(r)
1636 << std::endl;
1637 goto close_fd;
1638 }
1639 }
1640
1641 if (!cfg->snapname.empty()) {
1642 r = image.snap_set(cfg->snapname.c_str());
1643 if (r < 0)
1644 goto close_fd;
1645 }
1646
f67539c2
TL
1647 if (cfg->encryption_format.has_value()) {
1648 if (!cfg->encryption_passphrase_file.has_value()) {
1649 r = -EINVAL;
1650 cerr << "rbd-nbd: missing encryption-passphrase-file" << std::endl;
1651 goto close_fd;
1652 }
1653 std::ifstream file(cfg->encryption_passphrase_file.value().c_str());
1654 if (file.fail()) {
1655 r = -errno;
1656 std::cerr << "rbd-nbd: unable to open passphrase file:"
1657 << cpp_strerror(errno) << std::endl;
1658 goto close_fd;
1659 }
1660 std::string passphrase((std::istreambuf_iterator<char>(file)),
1661 (std::istreambuf_iterator<char>()));
1662 auto sg = make_scope_guard([&] {
1663 ceph_memzero_s(&passphrase[0], passphrase.size(), passphrase.size()); });
1664 file.close();
1665 if (!passphrase.empty() && passphrase[passphrase.length() - 1] == '\n') {
1666 passphrase.erase(passphrase.length() - 1);
1667 }
1668
1669 switch (cfg->encryption_format.value()) {
1670 case RBD_ENCRYPTION_FORMAT_LUKS1: {
1671 librbd::encryption_luks1_format_options_t opts = {};
1672 opts.passphrase = passphrase;
1673 r = image.encryption_load(
1674 RBD_ENCRYPTION_FORMAT_LUKS1, &opts, sizeof(opts));
1675 break;
1676 }
1677 case RBD_ENCRYPTION_FORMAT_LUKS2: {
1678 librbd::encryption_luks2_format_options_t opts = {};
1679 opts.passphrase = passphrase;
1680 r = image.encryption_load(
1681 RBD_ENCRYPTION_FORMAT_LUKS2, &opts, sizeof(opts));
1682 blksize = 4096;
1683 break;
1684 }
1685 default:
1686 r = -ENOTSUP;
1687 cerr << "rbd-nbd: unsupported encryption format" << std::endl;
1688 goto close_fd;
1689 }
1690
1691 if (r != 0) {
1692 cerr << "rbd-nbd: failed to load encryption: " << cpp_strerror(r)
1693 << std::endl;
1694 goto close_fd;
1695 }
1696 }
1697
94b18763
FG
1698 r = image.stat(info, sizeof(info));
1699 if (r < 0)
1700 goto close_fd;
1701
7c673cae 1702 flags = NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_TRIM | NBD_FLAG_HAS_FLAGS;
31f18b77 1703 if (!cfg->snapname.empty() || cfg->readonly) {
7c673cae
FG
1704 flags |= NBD_FLAG_READ_ONLY;
1705 read_only = 1;
1706 }
1707
7c673cae
FG
1708 if (info.size > ULONG_MAX) {
1709 r = -EFBIG;
1adf2230
AA
1710 cerr << "rbd-nbd: image is too large (" << byte_u_t(info.size)
1711 << ", max is " << byte_u_t(ULONG_MAX) << ")" << std::endl;
eafe8130 1712 goto close_fd;
7c673cae
FG
1713 }
1714
1715 size = info.size;
1716
eafe8130
TL
1717 r = load_module(cfg);
1718 if (r < 0)
1719 goto close_fd;
1720
f67539c2 1721 server = start_server(fd[1], image, cfg);
eafe8130 1722
f67539c2 1723 use_netlink = cfg->try_netlink || reconnect;
eafe8130 1724 if (use_netlink) {
f67539c2 1725 r = try_netlink_setup(cfg, fd[0], size, flags, reconnect);
eafe8130
TL
1726 if (r < 0) {
1727 goto free_server;
1728 } else if (r == 1) {
1729 use_netlink = false;
1730 }
7c673cae
FG
1731 }
1732
eafe8130 1733 if (!use_netlink) {
f67539c2 1734 r = try_ioctl_setup(cfg, fd[0], size, blksize, flags);
eafe8130
TL
1735 if (r < 0)
1736 goto free_server;
7c673cae
FG
1737 }
1738
eafe8130
TL
1739 r = check_device_size(nbd_index, size);
1740 if (r < 0)
1741 goto close_nbd;
7c673cae
FG
1742
1743 r = ioctl(nbd, BLKROSET, (unsigned long) &read_only);
1744 if (r < 0) {
1745 r = -errno;
1746 goto close_nbd;
1747 }
1748
1749 {
f67539c2
TL
1750 NBDQuiesceWatchCtx quiesce_watch_ctx(server);
1751 if (cfg->quiesce) {
1752 r = image.quiesce_watch(&quiesce_watch_ctx,
1753 &server->quiesce_watch_handle);
1754 if (r < 0) {
1755 goto close_nbd;
1756 }
1757 }
1758
7c673cae
FG
1759 uint64_t handle;
1760
eafe8130
TL
1761 NBDWatchCtx watch_ctx(nbd, nbd_index, use_netlink, io_ctx, image,
1762 info.size);
7c673cae
FG
1763 r = image.update_watch(&watch_ctx, &handle);
1764 if (r < 0)
1765 goto close_nbd;
1766
31f18b77 1767 cout << cfg->devpath << std::endl;
7c673cae 1768
eafe8130 1769 run_server(forker, server, use_netlink);
7c673cae 1770
f67539c2
TL
1771 if (cfg->quiesce) {
1772 r = image.quiesce_unwatch(server->quiesce_watch_handle);
1773 ceph_assert(r == 0);
1774 }
1775
7c673cae 1776 r = image.update_unwatch(handle);
11fdf7f2 1777 ceph_assert(r == 0);
7c673cae
FG
1778 }
1779
1780close_nbd:
1781 if (r < 0) {
eafe8130
TL
1782 if (use_netlink) {
1783 netlink_disconnect(nbd_index);
1784 } else {
1785 ioctl(nbd, NBD_CLEAR_SOCK);
1786 cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r)
1787 << std::endl;
1788 }
7c673cae
FG
1789 }
1790 close(nbd);
eafe8130
TL
1791free_server:
1792 delete server;
7c673cae
FG
1793close_fd:
1794 close(fd[0]);
1795 close(fd[1]);
1796close_ret:
1797 image.close();
1798 io_ctx.close();
1799 rados.shutdown();
1800
1801 forker.exit(r < 0 ? EXIT_FAILURE : 0);
1802 // Unreachable;
1803 return r;
1804}
1805
f67539c2 1806static int do_detach(Config *cfg)
7c673cae 1807{
f67539c2
TL
1808 int r = kill(cfg->pid, SIGTERM);
1809 if (r == -1) {
1810 r = -errno;
1811 cerr << "rbd-nbd: failed to terminate " << cfg->pid << ": "
1812 << cpp_strerror(r) << std::endl;
1813 return r;
1814 }
1815
1816 return wait_for_terminate(cfg->pid, cfg->reattach_timeout);
1817}
eafe8130 1818
f67539c2
TL
1819static int do_unmap(Config *cfg)
1820{
eafe8130
TL
1821 /*
1822 * The netlink disconnect call supports devices setup with netlink or ioctl,
1823 * so we always try that first.
1824 */
f67539c2
TL
1825 int r = netlink_disconnect_by_path(cfg->devpath);
1826 if (r < 0) {
eafe8130 1827 return r;
7c673cae
FG
1828 }
1829
f67539c2
TL
1830 if (r == 1) {
1831 int nbd = open(cfg->devpath.c_str(), O_RDWR);
1832 if (nbd < 0) {
1833 cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl;
1834 return nbd;
1835 }
1836
1837 r = ioctl(nbd, NBD_DISCONNECT);
1838 if (r < 0) {
eafe8130 1839 cerr << "rbd-nbd: the device is not used" << std::endl;
f67539c2
TL
1840 }
1841
1842 close(nbd);
1843
1844 if (r < 0) {
1845 return r;
1846 }
7c673cae
FG
1847 }
1848
f67539c2
TL
1849 if (cfg->pid > 0) {
1850 r = wait_for_terminate(cfg->pid, cfg->reattach_timeout);
1851 }
1852
1853 return 0;
7c673cae
FG
1854}
1855
11fdf7f2
TL
1856static int parse_imgpath(const std::string &imgpath, Config *cfg,
1857 std::ostream *err_msg) {
1858 std::regex pattern("^(?:([^/]+)/(?:([^/@]+)/)?)?([^@]+)(?:@([^/@]+))?$");
1859 std::smatch match;
1860 if (!std::regex_match(imgpath, match, pattern)) {
7c673cae
FG
1861 std::cerr << "rbd-nbd: invalid spec '" << imgpath << "'" << std::endl;
1862 return -EINVAL;
1863 }
1864
31f18b77
FG
1865 if (match[1].matched) {
1866 cfg->poolname = match[1];
1867 }
7c673cae 1868
11fdf7f2
TL
1869 if (match[2].matched) {
1870 cfg->nsname = match[2];
1871 }
1872
1873 cfg->imgname = match[3];
7c673cae 1874
11fdf7f2
TL
1875 if (match[4].matched)
1876 cfg->snapname = match[4];
7c673cae
FG
1877
1878 return 0;
1879}
1880
11fdf7f2 1881static int do_list_mapped_devices(const std::string &format, bool pretty_format)
7c673cae 1882{
11fdf7f2
TL
1883 bool should_print = false;
1884 std::unique_ptr<ceph::Formatter> f;
1885 TextTable tbl;
7c673cae 1886
11fdf7f2
TL
1887 if (format == "json") {
1888 f.reset(new JSONFormatter(pretty_format));
1889 } else if (format == "xml") {
1890 f.reset(new XMLFormatter(pretty_format));
1891 } else if (!format.empty() && format != "plain") {
1892 std::cerr << "rbd-nbd: invalid output format: " << format << std::endl;
1893 return -EINVAL;
7c673cae
FG
1894 }
1895
11fdf7f2
TL
1896 if (f) {
1897 f->open_array_section("devices");
1898 } else {
1899 tbl.define_column("id", TextTable::LEFT, TextTable::LEFT);
1900 tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT);
1901 tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT);
1902 tbl.define_column("image", TextTable::LEFT, TextTable::LEFT);
1903 tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT);
1904 tbl.define_column("device", TextTable::LEFT, TextTable::LEFT);
7c673cae 1905 }
7c673cae 1906
11fdf7f2
TL
1907 Config cfg;
1908 NBDListIterator it;
f67539c2 1909 while (it.get(&cfg)) {
11fdf7f2
TL
1910 if (f) {
1911 f->open_object_section("device");
f67539c2 1912 f->dump_int("id", cfg.pid);
11fdf7f2
TL
1913 f->dump_string("pool", cfg.poolname);
1914 f->dump_string("namespace", cfg.nsname);
1915 f->dump_string("image", cfg.imgname);
1916 f->dump_string("snap", cfg.snapname);
1917 f->dump_string("device", cfg.devpath);
1918 f->close_section();
1919 } else {
31f18b77
FG
1920 should_print = true;
1921 if (cfg.snapname.empty()) {
1922 cfg.snapname = "-";
1923 }
f67539c2
TL
1924 tbl << cfg.pid << cfg.poolname << cfg.nsname << cfg.imgname
1925 << cfg.snapname << cfg.devpath << TextTable::endrow;
31f18b77 1926 }
31f18b77
FG
1927 }
1928
11fdf7f2
TL
1929 if (f) {
1930 f->close_section(); // devices
1931 f->flush(std::cout);
1932 }
31f18b77 1933 if (should_print) {
11fdf7f2 1934 std::cout << tbl;
31f18b77
FG
1935 }
1936 return 0;
1937}
1938
f67539c2 1939static bool find_mapped_dev_by_spec(Config *cfg, int skip_pid=-1) {
11fdf7f2
TL
1940 Config c;
1941 NBDListIterator it;
f67539c2
TL
1942 while (it.get(&c)) {
1943 if (c.pid != skip_pid &&
1944 c.poolname == cfg->poolname && c.nsname == cfg->nsname &&
1945 c.imgname == cfg->imgname && c.snapname == cfg->snapname &&
1946 (cfg->devpath.empty() || c.devpath == cfg->devpath)) {
11fdf7f2
TL
1947 *cfg = c;
1948 return true;
1949 }
1950 }
1951 return false;
1952}
1953
f67539c2
TL
1954static int find_proc_by_dev(Config *cfg) {
1955 Config c;
1956 NBDListIterator it;
1957 while (it.get(&c)) {
1958 if (c.devpath == cfg->devpath) {
1959 *cfg = c;
1960 return true;
1961 }
1962 }
1963 return false;
1964}
11fdf7f2
TL
1965
1966static int parse_args(vector<const char*>& args, std::ostream *err_msg,
f67539c2 1967 Config *cfg) {
181888fb
FG
1968 std::string conf_file_list;
1969 std::string cluster;
1970 CephInitParameters iparams = ceph_argparse_early_args(
1971 args, CEPH_ENTITY_TYPE_CLIENT, &cluster, &conf_file_list);
7c673cae 1972
11fdf7f2
TL
1973 ConfigProxy config{false};
1974 config->name = iparams.name;
1975 config->cluster = cluster;
181888fb
FG
1976
1977 if (!conf_file_list.empty()) {
1978 config.parse_config_files(conf_file_list.c_str(), nullptr, 0);
1979 } else {
1980 config.parse_config_files(nullptr, nullptr, 0);
1981 }
11fdf7f2 1982 config.parse_env(CEPH_ENTITY_TYPE_CLIENT);
31f18b77 1983 config.parse_argv(args);
181888fb
FG
1984 cfg->poolname = config.get_val<std::string>("rbd_default_pool");
1985
1986 std::vector<const char*>::iterator i;
1987 std::ostringstream err;
f67539c2 1988 std::string arg_value;
31f18b77 1989
7c673cae
FG
1990 for (i = args.begin(); i != args.end(); ) {
1991 if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
224ce89b
WB
1992 return HELP_INFO;
1993 } else if (ceph_argparse_flag(args, i, "-v", "--version", (char*)NULL)) {
1994 return VERSION_INFO;
31f18b77 1995 } else if (ceph_argparse_witharg(args, i, &cfg->devpath, "--device", (char *)NULL)) {
f67539c2
TL
1996 } else if (ceph_argparse_witharg(args, i, &cfg->io_timeout, err,
1997 "--io-timeout", (char *)NULL)) {
1998 if (!err.str().empty()) {
1999 *err_msg << "rbd-nbd: " << err.str();
2000 return -EINVAL;
2001 }
2002 if (cfg->io_timeout < 0) {
2003 *err_msg << "rbd-nbd: Invalid argument for io-timeout!";
2004 return -EINVAL;
2005 }
31f18b77 2006 } else if (ceph_argparse_witharg(args, i, &cfg->nbds_max, err, "--nbds_max", (char *)NULL)) {
7c673cae 2007 if (!err.str().empty()) {
31f18b77
FG
2008 *err_msg << "rbd-nbd: " << err.str();
2009 return -EINVAL;
7c673cae 2010 }
31f18b77
FG
2011 if (cfg->nbds_max < 0) {
2012 *err_msg << "rbd-nbd: Invalid argument for nbds_max!";
2013 return -EINVAL;
7c673cae 2014 }
31f18b77 2015 } else if (ceph_argparse_witharg(args, i, &cfg->max_part, err, "--max_part", (char *)NULL)) {
7c673cae 2016 if (!err.str().empty()) {
31f18b77
FG
2017 *err_msg << "rbd-nbd: " << err.str();
2018 return -EINVAL;
7c673cae 2019 }
31f18b77
FG
2020 if ((cfg->max_part < 0) || (cfg->max_part > 255)) {
2021 *err_msg << "rbd-nbd: Invalid argument for max_part(0~255)!";
2022 return -EINVAL;
7c673cae 2023 }
31f18b77 2024 cfg->set_max_part = true;
f67539c2
TL
2025 } else if (ceph_argparse_flag(args, i, "--quiesce", (char *)NULL)) {
2026 cfg->quiesce = true;
2027 } else if (ceph_argparse_witharg(args, i, &cfg->quiesce_hook,
2028 "--quiesce-hook", (char *)NULL)) {
7c673cae 2029 } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) {
31f18b77 2030 cfg->readonly = true;
f67539c2
TL
2031 } else if (ceph_argparse_witharg(args, i, &cfg->reattach_timeout, err,
2032 "--reattach-timeout", (char *)NULL)) {
2033 if (!err.str().empty()) {
2034 *err_msg << "rbd-nbd: " << err.str();
2035 return -EINVAL;
2036 }
2037 if (cfg->reattach_timeout < 0) {
2038 *err_msg << "rbd-nbd: Invalid argument for reattach-timeout!";
2039 return -EINVAL;
2040 }
7c673cae 2041 } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) {
31f18b77 2042 cfg->exclusive = true;
f67539c2
TL
2043 } else if (ceph_argparse_witharg(args, i, &cfg->io_timeout, err,
2044 "--timeout", (char *)NULL)) {
11fdf7f2
TL
2045 if (!err.str().empty()) {
2046 *err_msg << "rbd-nbd: " << err.str();
2047 return -EINVAL;
2048 }
f67539c2 2049 if (cfg->io_timeout < 0) {
11fdf7f2
TL
2050 *err_msg << "rbd-nbd: Invalid argument for timeout!";
2051 return -EINVAL;
2052 }
f67539c2 2053 *err_msg << "rbd-nbd: --timeout is deprecated (use --io-timeout)";
11fdf7f2
TL
2054 } else if (ceph_argparse_witharg(args, i, &cfg->format, err, "--format",
2055 (char *)NULL)) {
2056 } else if (ceph_argparse_flag(args, i, "--pretty-format", (char *)NULL)) {
2057 cfg->pretty_format = true;
eafe8130
TL
2058 } else if (ceph_argparse_flag(args, i, "--try-netlink", (char *)NULL)) {
2059 cfg->try_netlink = true;
f67539c2
TL
2060 } else if (ceph_argparse_witharg(args, i, &arg_value,
2061 "--encryption-format", (char *)NULL)) {
2062 if (arg_value == "luks1") {
2063 cfg->encryption_format =
2064 std::make_optional(RBD_ENCRYPTION_FORMAT_LUKS1);
2065 } else if (arg_value == "luks2") {
2066 cfg->encryption_format =
2067 std::make_optional(RBD_ENCRYPTION_FORMAT_LUKS2);
2068 } else {
2069 *err_msg << "rbd-nbd: Invalid encryption format";
2070 return -EINVAL;
2071 }
2072 } else if (ceph_argparse_witharg(args, i, &arg_value,
2073 "--encryption-passphrase-file",
2074 (char *)NULL)) {
2075 cfg->encryption_passphrase_file = std::make_optional(arg_value);
7c673cae
FG
2076 } else {
2077 ++i;
2078 }
2079 }
2080
11fdf7f2 2081 Command cmd = None;
7c673cae
FG
2082 if (args.begin() != args.end()) {
2083 if (strcmp(*args.begin(), "map") == 0) {
f67539c2 2084 cmd = Map;
7c673cae 2085 } else if (strcmp(*args.begin(), "unmap") == 0) {
f67539c2
TL
2086 cmd = Unmap;
2087 } else if (strcmp(*args.begin(), "attach") == 0) {
2088 cmd = Attach;
2089 } else if (strcmp(*args.begin(), "detach") == 0) {
2090 cmd = Detach;
7c673cae
FG
2091 } else if (strcmp(*args.begin(), "list-mapped") == 0) {
2092 cmd = List;
2093 } else {
31f18b77
FG
2094 *err_msg << "rbd-nbd: unknown command: " << *args.begin();
2095 return -EINVAL;
7c673cae
FG
2096 }
2097 args.erase(args.begin());
2098 }
2099
2100 if (cmd == None) {
31f18b77
FG
2101 *err_msg << "rbd-nbd: must specify command";
2102 return -EINVAL;
7c673cae
FG
2103 }
2104
2105 switch (cmd) {
f67539c2
TL
2106 case Attach:
2107 if (cfg->devpath.empty()) {
2108 *err_msg << "rbd-nbd: must specify device to attach";
2109 return -EINVAL;
2110 }
2111 [[fallthrough]];
2112 case Map:
7c673cae 2113 if (args.begin() == args.end()) {
31f18b77
FG
2114 *err_msg << "rbd-nbd: must specify image-or-snap-spec";
2115 return -EINVAL;
7c673cae 2116 }
11fdf7f2 2117 if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) {
31f18b77 2118 return -EINVAL;
11fdf7f2 2119 }
7c673cae
FG
2120 args.erase(args.begin());
2121 break;
f67539c2
TL
2122 case Detach:
2123 case Unmap:
7c673cae 2124 if (args.begin() == args.end()) {
11fdf7f2 2125 *err_msg << "rbd-nbd: must specify nbd device or image-or-snap-spec";
31f18b77 2126 return -EINVAL;
7c673cae 2127 }
11fdf7f2
TL
2128 if (boost::starts_with(*args.begin(), "/dev/")) {
2129 cfg->devpath = *args.begin();
2130 } else {
2131 if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) {
2132 return -EINVAL;
2133 }
11fdf7f2 2134 }
7c673cae
FG
2135 args.erase(args.begin());
2136 break;
2137 default:
2138 //shut up gcc;
2139 break;
2140 }
2141
2142 if (args.begin() != args.end()) {
31f18b77
FG
2143 *err_msg << "rbd-nbd: unknown args: " << *args.begin();
2144 return -EINVAL;
2145 }
2146
f67539c2 2147 cfg->command = cmd;
31f18b77
FG
2148 return 0;
2149}
2150
2151static int rbd_nbd(int argc, const char *argv[])
2152{
2153 int r;
2154 Config cfg;
2155 vector<const char*> args;
2156 argv_to_vec(argc, argv, args);
2157
2158 std::ostringstream err_msg;
f67539c2 2159 r = parse_args(args, &err_msg, &cfg);
224ce89b 2160 if (r == HELP_INFO) {
31f18b77 2161 usage();
11fdf7f2 2162 return 0;
224ce89b
WB
2163 } else if (r == VERSION_INFO) {
2164 std::cout << pretty_version_to_str() << std::endl;
2165 return 0;
11fdf7f2 2166 } else if (r < 0) {
31f18b77
FG
2167 cerr << err_msg.str() << std::endl;
2168 return r;
7c673cae
FG
2169 }
2170
f67539c2
TL
2171 if (!err_msg.str().empty()) {
2172 cerr << err_msg.str() << std::endl;
2173 }
2174
2175 switch (cfg.command) {
2176 case Attach:
2177 ceph_assert(!cfg.devpath.empty());
2178 if (find_mapped_dev_by_spec(&cfg, getpid())) {
2179 cerr << "rbd-nbd: " << cfg.devpath << " has process " << cfg.pid
2180 << " connected" << std::endl;
2181 return -EBUSY;
2182 }
2183 [[fallthrough]];
2184 case Map:
31f18b77 2185 if (cfg.imgname.empty()) {
7c673cae 2186 cerr << "rbd-nbd: image name was not specified" << std::endl;
31f18b77 2187 return -EINVAL;
7c673cae
FG
2188 }
2189
f67539c2
TL
2190 r = do_map(argc, argv, &cfg, cfg.command == Attach);
2191 if (r < 0)
2192 return -EINVAL;
2193 break;
2194 case Detach:
2195 if (cfg.devpath.empty()) {
2196 if (!find_mapped_dev_by_spec(&cfg)) {
2197 cerr << "rbd-nbd: " << cfg.image_spec() << " is not mapped"
2198 << std::endl;
2199 return -ENOENT;
2200 }
2201 } else if (!find_proc_by_dev(&cfg)) {
2202 cerr << "rbd-nbd: no process attached to " << cfg.devpath << " found"
2203 << std::endl;
2204 return -ENOENT;
2205 }
2206 r = do_detach(&cfg);
7c673cae 2207 if (r < 0)
31f18b77 2208 return -EINVAL;
7c673cae 2209 break;
f67539c2
TL
2210 case Unmap:
2211 if (cfg.devpath.empty()) {
2212 if (!find_mapped_dev_by_spec(&cfg)) {
2213 cerr << "rbd-nbd: " << cfg.image_spec() << " is not mapped"
2214 << std::endl;
2215 return -ENOENT;
2216 }
2217 } else if (!find_proc_by_dev(&cfg)) {
2218 // still try to send disconnect to the device
2219 }
eafe8130 2220 r = do_unmap(&cfg);
7c673cae 2221 if (r < 0)
31f18b77 2222 return -EINVAL;
7c673cae
FG
2223 break;
2224 case List:
11fdf7f2 2225 r = do_list_mapped_devices(cfg.format, cfg.pretty_format);
7c673cae 2226 if (r < 0)
31f18b77 2227 return -EINVAL;
7c673cae
FG
2228 break;
2229 default:
2230 usage();
d2e6a577 2231 break;
7c673cae
FG
2232 }
2233
2234 return 0;
2235}
2236
2237int main(int argc, const char *argv[])
2238{
31f18b77
FG
2239 int r = rbd_nbd(argc, argv);
2240 if (r < 0) {
2241 return EXIT_FAILURE;
2242 }
2243 return 0;
7c673cae 2244}