]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | /* | |
5 | * rbd-nbd - RBD in userspace | |
6 | * | |
7 | * Copyright (C) 2015 - 2016 Kylin Corporation | |
8 | * | |
9 | * Author: Yunchuan Wen <yunchuan.wen@kylin-cloud.com> | |
10 | * Li Wang <li.wang@kylin-cloud.com> | |
11 | * | |
12 | * This is free software; you can redistribute it and/or | |
13 | * modify it under the terms of the GNU Lesser General Public | |
14 | * License version 2.1, as published by the Free Software | |
15 | * Foundation. See file COPYING. | |
16 | * | |
17 | */ | |
18 | ||
19 | #include "include/int_types.h" | |
20 | ||
21 | #include <stdio.h> | |
22 | #include <stdlib.h> | |
23 | #include <stddef.h> | |
24 | #include <errno.h> | |
25 | #include <fcntl.h> | |
26 | #include <string.h> | |
27 | #include <sys/types.h> | |
28 | #include <unistd.h> | |
29 | ||
30 | #include <linux/nbd.h> | |
31 | #include <linux/fs.h> | |
32 | #include <sys/ioctl.h> | |
33 | #include <sys/socket.h> | |
34 | ||
35 | #include <iostream> | |
36 | #include <fstream> | |
37 | #include <boost/regex.hpp> | |
38 | ||
39 | #include "mon/MonClient.h" | |
40 | #include "common/config.h" | |
41 | #include "common/dout.h" | |
42 | ||
43 | #include "common/errno.h" | |
44 | #include "common/module.h" | |
45 | #include "common/safe_io.h" | |
31f18b77 | 46 | #include "common/TextTable.h" |
7c673cae FG |
47 | #include "common/ceph_argparse.h" |
48 | #include "common/Preforker.h" | |
224ce89b | 49 | #include "common/version.h" |
7c673cae FG |
50 | #include "global/global_init.h" |
51 | #include "global/signal_handler.h" | |
52 | ||
53 | #include "include/rados/librados.hpp" | |
54 | #include "include/rbd/librbd.hpp" | |
55 | #include "include/stringify.h" | |
56 | #include "include/xlist.h" | |
57 | ||
58 | #define dout_context g_ceph_context | |
59 | #define dout_subsys ceph_subsys_rbd | |
60 | #undef dout_prefix | |
61 | #define dout_prefix *_dout << "rbd-nbd: " | |
62 | ||
31f18b77 FG |
63 | struct Config { |
64 | int nbds_max = 0; | |
65 | int max_part = 255; | |
66 | ||
67 | bool exclusive = false; | |
68 | bool readonly = false; | |
69 | bool set_max_part = false; | |
70 | ||
71 | std::string poolname; | |
72 | std::string imgname; | |
73 | std::string snapname; | |
74 | std::string devpath; | |
75 | }; | |
76 | ||
7c673cae FG |
77 | static void usage() |
78 | { | |
79 | std::cout << "Usage: rbd-nbd [options] map <image-or-snap-spec> Map an image to nbd device\n" | |
80 | << " unmap <device path> Unmap nbd device\n" | |
81 | << " list-mapped List mapped nbd devices\n" | |
82 | << "Options:\n" | |
83 | << " --device <device path> Specify nbd device path\n" | |
84 | << " --read-only Map read-only\n" | |
85 | << " --nbds_max <limit> Override for module param nbds_max\n" | |
86 | << " --max_part <limit> Override for module param max_part\n" | |
87 | << " --exclusive Forbid writes by other clients\n" | |
88 | << std::endl; | |
89 | generic_server_usage(); | |
90 | } | |
91 | ||
7c673cae FG |
92 | static int nbd = -1; |
93 | ||
31f18b77 FG |
94 | static enum { |
95 | None, | |
96 | Connect, | |
97 | Disconnect, | |
98 | List | |
99 | } cmd = None; | |
100 | ||
7c673cae FG |
101 | #define RBD_NBD_BLKSIZE 512UL |
102 | ||
224ce89b WB |
103 | #define HELP_INFO 1 |
104 | #define VERSION_INFO 2 | |
105 | ||
7c673cae FG |
106 | #ifdef CEPH_BIG_ENDIAN |
107 | #define ntohll(a) (a) | |
108 | #elif defined(CEPH_LITTLE_ENDIAN) | |
109 | #define ntohll(a) swab(a) | |
110 | #else | |
111 | #error "Could not determine endianess" | |
112 | #endif | |
113 | #define htonll(a) ntohll(a) | |
114 | ||
31f18b77 FG |
115 | static int parse_args(vector<const char*>& args, std::ostream *err_msg, Config *cfg); |
116 | ||
7c673cae FG |
117 | static void handle_signal(int signum) |
118 | { | |
119 | assert(signum == SIGINT || signum == SIGTERM); | |
120 | derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; | |
121 | dout(20) << __func__ << ": " << "sending NBD_DISCONNECT" << dendl; | |
122 | if (ioctl(nbd, NBD_DISCONNECT) < 0) { | |
123 | derr << "rbd-nbd: disconnect failed: " << cpp_strerror(errno) << dendl; | |
124 | } else { | |
125 | dout(20) << __func__ << ": " << "disconnected" << dendl; | |
126 | } | |
127 | } | |
128 | ||
129 | class NBDServer | |
130 | { | |
131 | private: | |
132 | int fd; | |
133 | librbd::Image ℑ | |
134 | ||
135 | public: | |
136 | NBDServer(int _fd, librbd::Image& _image) | |
137 | : fd(_fd) | |
138 | , image(_image) | |
139 | , lock("NBDServer::Locker") | |
140 | , reader_thread(*this, &NBDServer::reader_entry) | |
141 | , writer_thread(*this, &NBDServer::writer_entry) | |
142 | , started(false) | |
143 | {} | |
144 | ||
145 | private: | |
146 | std::atomic<bool> terminated = { false }; | |
147 | ||
148 | void shutdown() | |
149 | { | |
150 | bool expected = false; | |
151 | if (terminated.compare_exchange_strong(expected, true)) { | |
152 | ::shutdown(fd, SHUT_RDWR); | |
153 | ||
154 | Mutex::Locker l(lock); | |
155 | cond.Signal(); | |
156 | } | |
157 | } | |
158 | ||
159 | struct IOContext | |
160 | { | |
161 | xlist<IOContext*>::item item; | |
162 | NBDServer *server; | |
163 | struct nbd_request request; | |
164 | struct nbd_reply reply; | |
165 | bufferlist data; | |
166 | int command; | |
167 | ||
168 | IOContext() | |
169 | : item(this) | |
170 | {} | |
171 | }; | |
172 | ||
173 | friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx); | |
174 | ||
175 | Mutex lock; | |
176 | Cond cond; | |
177 | xlist<IOContext*> io_pending; | |
178 | xlist<IOContext*> io_finished; | |
179 | ||
180 | void io_start(IOContext *ctx) | |
181 | { | |
182 | Mutex::Locker l(lock); | |
183 | io_pending.push_back(&ctx->item); | |
184 | } | |
185 | ||
186 | void io_finish(IOContext *ctx) | |
187 | { | |
188 | Mutex::Locker l(lock); | |
189 | assert(ctx->item.is_on_list()); | |
190 | ctx->item.remove_myself(); | |
191 | io_finished.push_back(&ctx->item); | |
192 | cond.Signal(); | |
193 | } | |
194 | ||
195 | IOContext *wait_io_finish() | |
196 | { | |
197 | Mutex::Locker l(lock); | |
198 | while(io_finished.empty() && !terminated) | |
199 | cond.Wait(lock); | |
200 | ||
201 | if (io_finished.empty()) | |
202 | return NULL; | |
203 | ||
204 | IOContext *ret = io_finished.front(); | |
205 | io_finished.pop_front(); | |
206 | ||
207 | return ret; | |
208 | } | |
209 | ||
210 | void wait_clean() | |
211 | { | |
212 | assert(!reader_thread.is_started()); | |
213 | Mutex::Locker l(lock); | |
214 | while(!io_pending.empty()) | |
215 | cond.Wait(lock); | |
216 | ||
217 | while(!io_finished.empty()) { | |
218 | ceph::unique_ptr<IOContext> free_ctx(io_finished.front()); | |
219 | io_finished.pop_front(); | |
220 | } | |
221 | } | |
222 | ||
223 | static void aio_callback(librbd::completion_t cb, void *arg) | |
224 | { | |
225 | librbd::RBD::AioCompletion *aio_completion = | |
226 | reinterpret_cast<librbd::RBD::AioCompletion*>(cb); | |
227 | ||
228 | IOContext *ctx = reinterpret_cast<IOContext *>(arg); | |
229 | int ret = aio_completion->get_return_value(); | |
230 | ||
231 | dout(20) << __func__ << ": " << *ctx << dendl; | |
232 | ||
233 | if (ret == -EINVAL) { | |
234 | // if shrinking an image, a pagecache writeback might reference | |
235 | // extents outside of the range of the new image extents | |
181888fb | 236 | dout(0) << __func__ << ": masking IO out-of-bounds error" << dendl; |
7c673cae FG |
237 | ctx->data.clear(); |
238 | ret = 0; | |
239 | } | |
240 | ||
241 | if (ret < 0) { | |
242 | ctx->reply.error = htonl(-ret); | |
243 | } else if ((ctx->command == NBD_CMD_READ) && | |
244 | ret < static_cast<int>(ctx->request.len)) { | |
245 | int pad_byte_count = static_cast<int> (ctx->request.len) - ret; | |
246 | ctx->data.append_zero(pad_byte_count); | |
247 | dout(20) << __func__ << ": " << *ctx << ": Pad byte count: " | |
248 | << pad_byte_count << dendl; | |
249 | ctx->reply.error = 0; | |
250 | } else { | |
251 | ctx->reply.error = htonl(0); | |
252 | } | |
253 | ctx->server->io_finish(ctx); | |
254 | ||
255 | aio_completion->release(); | |
256 | } | |
257 | ||
258 | void reader_entry() | |
259 | { | |
260 | while (!terminated) { | |
261 | ceph::unique_ptr<IOContext> ctx(new IOContext()); | |
262 | ctx->server = this; | |
263 | ||
264 | dout(20) << __func__ << ": waiting for nbd request" << dendl; | |
265 | ||
266 | int r = safe_read_exact(fd, &ctx->request, sizeof(struct nbd_request)); | |
267 | if (r < 0) { | |
268 | derr << "failed to read nbd request header: " << cpp_strerror(r) | |
269 | << dendl; | |
270 | return; | |
271 | } | |
272 | ||
273 | if (ctx->request.magic != htonl(NBD_REQUEST_MAGIC)) { | |
274 | derr << "invalid nbd request header" << dendl; | |
275 | return; | |
276 | } | |
277 | ||
278 | ctx->request.from = ntohll(ctx->request.from); | |
279 | ctx->request.type = ntohl(ctx->request.type); | |
280 | ctx->request.len = ntohl(ctx->request.len); | |
281 | ||
282 | ctx->reply.magic = htonl(NBD_REPLY_MAGIC); | |
283 | memcpy(ctx->reply.handle, ctx->request.handle, sizeof(ctx->reply.handle)); | |
284 | ||
285 | ctx->command = ctx->request.type & 0x0000ffff; | |
286 | ||
287 | dout(20) << *ctx << ": start" << dendl; | |
288 | ||
289 | switch (ctx->command) | |
290 | { | |
291 | case NBD_CMD_DISC: | |
292 | // NBD_DO_IT will return when pipe is closed | |
293 | dout(0) << "disconnect request received" << dendl; | |
294 | return; | |
295 | case NBD_CMD_WRITE: | |
296 | bufferptr ptr(ctx->request.len); | |
297 | r = safe_read_exact(fd, ptr.c_str(), ctx->request.len); | |
298 | if (r < 0) { | |
299 | derr << *ctx << ": failed to read nbd request data: " | |
300 | << cpp_strerror(r) << dendl; | |
301 | return; | |
302 | } | |
303 | ctx->data.push_back(ptr); | |
304 | break; | |
305 | } | |
306 | ||
307 | IOContext *pctx = ctx.release(); | |
308 | io_start(pctx); | |
309 | librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(pctx, aio_callback); | |
310 | switch (pctx->command) | |
311 | { | |
312 | case NBD_CMD_WRITE: | |
313 | image.aio_write(pctx->request.from, pctx->request.len, pctx->data, c); | |
314 | break; | |
315 | case NBD_CMD_READ: | |
316 | image.aio_read(pctx->request.from, pctx->request.len, pctx->data, c); | |
317 | break; | |
318 | case NBD_CMD_FLUSH: | |
319 | image.aio_flush(c); | |
320 | break; | |
321 | case NBD_CMD_TRIM: | |
322 | image.aio_discard(pctx->request.from, pctx->request.len, c); | |
323 | break; | |
324 | default: | |
325 | derr << *pctx << ": invalid request command" << dendl; | |
326 | c->release(); | |
327 | return; | |
328 | } | |
329 | } | |
330 | dout(20) << __func__ << ": terminated" << dendl; | |
331 | } | |
332 | ||
333 | void writer_entry() | |
334 | { | |
335 | while (!terminated) { | |
336 | dout(20) << __func__ << ": waiting for io request" << dendl; | |
337 | ceph::unique_ptr<IOContext> ctx(wait_io_finish()); | |
338 | if (!ctx) { | |
339 | dout(20) << __func__ << ": no io requests, terminating" << dendl; | |
340 | return; | |
341 | } | |
342 | ||
343 | dout(20) << __func__ << ": got: " << *ctx << dendl; | |
344 | ||
345 | int r = safe_write(fd, &ctx->reply, sizeof(struct nbd_reply)); | |
346 | if (r < 0) { | |
347 | derr << *ctx << ": failed to write reply header: " << cpp_strerror(r) | |
348 | << dendl; | |
349 | return; | |
350 | } | |
351 | if (ctx->command == NBD_CMD_READ && ctx->reply.error == htonl(0)) { | |
352 | r = ctx->data.write_fd(fd); | |
353 | if (r < 0) { | |
354 | derr << *ctx << ": failed to write replay data: " << cpp_strerror(r) | |
355 | << dendl; | |
356 | return; | |
357 | } | |
358 | } | |
359 | dout(20) << *ctx << ": finish" << dendl; | |
360 | } | |
361 | dout(20) << __func__ << ": terminated" << dendl; | |
362 | } | |
363 | ||
364 | class ThreadHelper : public Thread | |
365 | { | |
366 | public: | |
367 | typedef void (NBDServer::*entry_func)(); | |
368 | private: | |
369 | NBDServer &server; | |
370 | entry_func func; | |
371 | public: | |
372 | ThreadHelper(NBDServer &_server, entry_func _func) | |
373 | :server(_server) | |
374 | ,func(_func) | |
375 | {} | |
376 | protected: | |
377 | void* entry() override | |
378 | { | |
379 | (server.*func)(); | |
380 | server.shutdown(); | |
381 | return NULL; | |
382 | } | |
383 | } reader_thread, writer_thread; | |
384 | ||
385 | bool started; | |
386 | public: | |
387 | void start() | |
388 | { | |
389 | if (!started) { | |
390 | dout(10) << __func__ << ": starting" << dendl; | |
391 | ||
392 | started = true; | |
393 | ||
394 | reader_thread.create("rbd_reader"); | |
395 | writer_thread.create("rbd_writer"); | |
396 | } | |
397 | } | |
398 | ||
399 | void stop() | |
400 | { | |
401 | if (started) { | |
402 | dout(10) << __func__ << ": terminating" << dendl; | |
403 | ||
404 | shutdown(); | |
405 | ||
406 | reader_thread.join(); | |
407 | writer_thread.join(); | |
408 | ||
409 | wait_clean(); | |
410 | ||
411 | started = false; | |
412 | } | |
413 | } | |
414 | ||
415 | ~NBDServer() | |
416 | { | |
417 | stop(); | |
418 | } | |
419 | }; | |
420 | ||
421 | std::ostream &operator<<(std::ostream &os, const NBDServer::IOContext &ctx) { | |
422 | ||
423 | os << "[" << std::hex << ntohll(*((uint64_t *)ctx.request.handle)); | |
424 | ||
425 | switch (ctx.command) | |
426 | { | |
427 | case NBD_CMD_WRITE: | |
428 | os << " WRITE "; | |
429 | break; | |
430 | case NBD_CMD_READ: | |
431 | os << " READ "; | |
432 | break; | |
433 | case NBD_CMD_FLUSH: | |
434 | os << " FLUSH "; | |
435 | break; | |
436 | case NBD_CMD_TRIM: | |
437 | os << " TRIM "; | |
438 | break; | |
439 | default: | |
440 | os << " UNKNOW(" << ctx.command << ") "; | |
441 | break; | |
442 | } | |
443 | ||
444 | os << ctx.request.from << "~" << ctx.request.len << " " | |
445 | << ntohl(ctx.reply.error) << "]"; | |
446 | ||
447 | return os; | |
448 | } | |
449 | ||
450 | class NBDWatchCtx : public librbd::UpdateWatchCtx | |
451 | { | |
452 | private: | |
453 | int fd; | |
454 | librados::IoCtx &io_ctx; | |
455 | librbd::Image ℑ | |
456 | unsigned long size; | |
457 | public: | |
458 | NBDWatchCtx(int _fd, | |
459 | librados::IoCtx &_io_ctx, | |
460 | librbd::Image &_image, | |
461 | unsigned long _size) | |
462 | : fd(_fd) | |
463 | , io_ctx(_io_ctx) | |
464 | , image(_image) | |
465 | , size(_size) | |
466 | { } | |
467 | ||
468 | ~NBDWatchCtx() override {} | |
469 | ||
470 | void handle_notify() override | |
471 | { | |
472 | librbd::image_info_t info; | |
473 | if (image.stat(info, sizeof(info)) == 0) { | |
474 | unsigned long new_size = info.size; | |
475 | ||
476 | if (new_size != size) { | |
b32b8144 | 477 | dout(5) << "resize detected" << dendl; |
7c673cae | 478 | if (ioctl(fd, BLKFLSBUF, NULL) < 0) |
b32b8144 FG |
479 | derr << "invalidate page cache failed: " << cpp_strerror(errno) |
480 | << dendl; | |
7c673cae FG |
481 | if (ioctl(fd, NBD_SET_SIZE, new_size) < 0) { |
482 | derr << "resize failed: " << cpp_strerror(errno) << dendl; | |
483 | } else { | |
484 | size = new_size; | |
485 | } | |
b32b8144 FG |
486 | if (ioctl(fd, BLKRRPART, NULL) < 0) { |
487 | derr << "rescan of partition table failed: " << cpp_strerror(errno) | |
488 | << dendl; | |
489 | } | |
7c673cae FG |
490 | if (image.invalidate_cache() < 0) |
491 | derr << "invalidate rbd cache failed" << dendl; | |
492 | } | |
493 | } | |
494 | } | |
495 | }; | |
496 | ||
31f18b77 | 497 | static int open_device(const char* path, Config *cfg = nullptr, bool try_load_module = false) |
7c673cae FG |
498 | { |
499 | int nbd = open(path, O_RDWR); | |
500 | bool loaded_module = false; | |
501 | ||
502 | if (nbd < 0 && try_load_module && access("/sys/module/nbd", F_OK) != 0) { | |
503 | ostringstream param; | |
504 | int r; | |
31f18b77 FG |
505 | if (cfg->nbds_max) { |
506 | param << "nbds_max=" << cfg->nbds_max; | |
7c673cae | 507 | } |
31f18b77 FG |
508 | if (cfg->max_part) { |
509 | param << " max_part=" << cfg->max_part; | |
7c673cae FG |
510 | } |
511 | r = module_load("nbd", param.str().c_str()); | |
512 | if (r < 0) { | |
513 | cerr << "rbd-nbd: failed to load nbd kernel module: " << cpp_strerror(-r) << std::endl; | |
514 | return r; | |
515 | } else { | |
516 | loaded_module = true; | |
517 | } | |
518 | nbd = open(path, O_RDWR); | |
519 | } | |
520 | ||
31f18b77 FG |
521 | if (try_load_module && !loaded_module && |
522 | (cfg->nbds_max || cfg->set_max_part)) { | |
7c673cae FG |
523 | cerr << "rbd-nbd: ignoring kernel module parameter options: nbd module already loaded" |
524 | << std::endl; | |
525 | } | |
526 | ||
527 | return nbd; | |
528 | } | |
529 | ||
530 | static int check_device_size(int nbd_index, unsigned long expected_size) | |
531 | { | |
532 | // There are bugs with some older kernel versions that result in an | |
533 | // overflow for large image sizes. This check is to ensure we are | |
534 | // not affected. | |
535 | ||
536 | unsigned long size = 0; | |
537 | std::string path = "/sys/block/nbd" + stringify(nbd_index) + "/size"; | |
538 | std::ifstream ifs; | |
539 | ifs.open(path.c_str(), std::ifstream::in); | |
540 | if (!ifs.is_open()) { | |
541 | cerr << "rbd-nbd: failed to open " << path << std::endl; | |
542 | return -EINVAL; | |
543 | } | |
544 | ifs >> size; | |
545 | size *= RBD_NBD_BLKSIZE; | |
546 | ||
547 | if (size == 0) { | |
548 | // Newer kernel versions will report real size only after nbd | |
549 | // connect. Assume this is the case and return success. | |
550 | return 0; | |
551 | } | |
552 | ||
553 | if (size != expected_size) { | |
554 | cerr << "rbd-nbd: kernel reported invalid device size (" << size | |
555 | << ", expected " << expected_size << ")" << std::endl; | |
556 | return -EINVAL; | |
557 | } | |
558 | ||
559 | return 0; | |
560 | } | |
561 | ||
31f18b77 | 562 | static int do_map(int argc, const char *argv[], Config *cfg) |
7c673cae FG |
563 | { |
564 | int r; | |
565 | ||
566 | librados::Rados rados; | |
567 | librbd::RBD rbd; | |
568 | librados::IoCtx io_ctx; | |
569 | librbd::Image image; | |
570 | ||
571 | int read_only = 0; | |
572 | unsigned long flags; | |
573 | unsigned long size; | |
574 | ||
575 | int index = 0; | |
576 | int fd[2]; | |
577 | ||
578 | librbd::image_info_t info; | |
579 | ||
580 | Preforker forker; | |
581 | ||
582 | vector<const char*> args; | |
583 | argv_to_vec(argc, argv, args); | |
584 | env_to_vec(args); | |
585 | ||
586 | auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, | |
587 | CODE_ENVIRONMENT_DAEMON, | |
588 | CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); | |
589 | g_ceph_context->_conf->set_val_or_die("pid_file", ""); | |
590 | ||
591 | if (global_init_prefork(g_ceph_context) >= 0) { | |
592 | std::string err; | |
593 | r = forker.prefork(err); | |
594 | if (r < 0) { | |
595 | cerr << err << std::endl; | |
596 | return r; | |
597 | } | |
598 | ||
599 | if (forker.is_parent()) { | |
600 | global_init_postfork_start(g_ceph_context); | |
601 | if (forker.parent_wait(err) != 0) { | |
602 | return -ENXIO; | |
603 | } | |
604 | return 0; | |
605 | } | |
606 | } | |
607 | ||
608 | common_init_finish(g_ceph_context); | |
609 | global_init_chdir(g_ceph_context); | |
610 | ||
611 | if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) { | |
612 | r = -errno; | |
613 | goto close_ret; | |
614 | } | |
615 | ||
31f18b77 | 616 | if (cfg->devpath.empty()) { |
7c673cae FG |
617 | char dev[64]; |
618 | bool try_load_module = true; | |
b32b8144 FG |
619 | const char *path = "/sys/module/nbd/parameters/nbds_max"; |
620 | int nbds_max = -1; | |
621 | if (access(path, F_OK) == 0) { | |
622 | std::ifstream ifs; | |
623 | ifs.open(path, std::ifstream::in); | |
624 | if (ifs.is_open()) { | |
625 | ifs >> nbds_max; | |
626 | ifs.close(); | |
627 | } | |
628 | } | |
629 | ||
7c673cae FG |
630 | while (true) { |
631 | snprintf(dev, sizeof(dev), "/dev/nbd%d", index); | |
632 | ||
31f18b77 | 633 | nbd = open_device(dev, cfg, try_load_module); |
7c673cae FG |
634 | try_load_module = false; |
635 | if (nbd < 0) { | |
b32b8144 FG |
636 | if (nbd == -EPERM && nbds_max != -1 && index < (nbds_max-1)) { |
637 | ++index; | |
638 | continue; | |
639 | } | |
7c673cae FG |
640 | r = nbd; |
641 | cerr << "rbd-nbd: failed to find unused device" << std::endl; | |
642 | goto close_fd; | |
643 | } | |
644 | ||
645 | r = ioctl(nbd, NBD_SET_SOCK, fd[0]); | |
646 | if (r < 0) { | |
647 | close(nbd); | |
648 | ++index; | |
649 | continue; | |
650 | } | |
651 | ||
31f18b77 | 652 | cfg->devpath = dev; |
7c673cae FG |
653 | break; |
654 | } | |
655 | } else { | |
31f18b77 | 656 | r = sscanf(cfg->devpath.c_str(), "/dev/nbd%d", &index); |
7c673cae | 657 | if (r < 0) { |
31f18b77 | 658 | cerr << "rbd-nbd: invalid device path: " << cfg->devpath |
7c673cae FG |
659 | << " (expected /dev/nbd{num})" << std::endl; |
660 | goto close_fd; | |
661 | } | |
31f18b77 | 662 | nbd = open_device(cfg->devpath.c_str(), cfg, true); |
7c673cae FG |
663 | if (nbd < 0) { |
664 | r = nbd; | |
31f18b77 | 665 | cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl; |
7c673cae FG |
666 | goto close_fd; |
667 | } | |
668 | ||
669 | r = ioctl(nbd, NBD_SET_SOCK, fd[0]); | |
670 | if (r < 0) { | |
671 | r = -errno; | |
31f18b77 | 672 | cerr << "rbd-nbd: the device " << cfg->devpath << " is busy" << std::endl; |
7c673cae FG |
673 | close(nbd); |
674 | goto close_fd; | |
675 | } | |
676 | } | |
677 | ||
678 | flags = NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_TRIM | NBD_FLAG_HAS_FLAGS; | |
31f18b77 | 679 | if (!cfg->snapname.empty() || cfg->readonly) { |
7c673cae FG |
680 | flags |= NBD_FLAG_READ_ONLY; |
681 | read_only = 1; | |
682 | } | |
683 | ||
684 | r = rados.init_with_context(g_ceph_context); | |
685 | if (r < 0) | |
686 | goto close_nbd; | |
687 | ||
688 | r = rados.connect(); | |
689 | if (r < 0) | |
690 | goto close_nbd; | |
691 | ||
31f18b77 | 692 | r = rados.ioctx_create(cfg->poolname.c_str(), io_ctx); |
7c673cae FG |
693 | if (r < 0) |
694 | goto close_nbd; | |
695 | ||
31f18b77 | 696 | r = rbd.open(io_ctx, image, cfg->imgname.c_str()); |
7c673cae FG |
697 | if (r < 0) |
698 | goto close_nbd; | |
699 | ||
31f18b77 | 700 | if (cfg->exclusive) { |
7c673cae FG |
701 | r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE); |
702 | if (r < 0) { | |
703 | cerr << "rbd-nbd: failed to acquire exclusive lock: " << cpp_strerror(r) | |
704 | << std::endl; | |
705 | goto close_nbd; | |
706 | } | |
707 | } | |
708 | ||
31f18b77 FG |
709 | if (!cfg->snapname.empty()) { |
710 | r = image.snap_set(cfg->snapname.c_str()); | |
7c673cae FG |
711 | if (r < 0) |
712 | goto close_nbd; | |
713 | } | |
714 | ||
715 | r = image.stat(info, sizeof(info)); | |
716 | if (r < 0) | |
717 | goto close_nbd; | |
718 | ||
719 | r = ioctl(nbd, NBD_SET_BLKSIZE, RBD_NBD_BLKSIZE); | |
720 | if (r < 0) { | |
721 | r = -errno; | |
722 | goto close_nbd; | |
723 | } | |
724 | ||
725 | if (info.size > ULONG_MAX) { | |
726 | r = -EFBIG; | |
727 | cerr << "rbd-nbd: image is too large (" << prettybyte_t(info.size) | |
728 | << ", max is " << prettybyte_t(ULONG_MAX) << ")" << std::endl; | |
729 | goto close_nbd; | |
730 | } | |
731 | ||
732 | size = info.size; | |
733 | ||
734 | r = ioctl(nbd, NBD_SET_SIZE, size); | |
735 | if (r < 0) { | |
736 | r = -errno; | |
737 | goto close_nbd; | |
738 | } | |
739 | ||
740 | r = check_device_size(index, size); | |
741 | if (r < 0) { | |
742 | goto close_nbd; | |
743 | } | |
744 | ||
745 | ioctl(nbd, NBD_SET_FLAGS, flags); | |
746 | ||
747 | r = ioctl(nbd, BLKROSET, (unsigned long) &read_only); | |
748 | if (r < 0) { | |
749 | r = -errno; | |
750 | goto close_nbd; | |
751 | } | |
752 | ||
753 | { | |
754 | uint64_t handle; | |
755 | ||
756 | NBDWatchCtx watch_ctx(nbd, io_ctx, image, info.size); | |
757 | r = image.update_watch(&watch_ctx, &handle); | |
758 | if (r < 0) | |
759 | goto close_nbd; | |
760 | ||
31f18b77 | 761 | cout << cfg->devpath << std::endl; |
7c673cae FG |
762 | |
763 | if (g_conf->daemonize) { | |
764 | forker.daemonize(); | |
765 | global_init_postfork_start(g_ceph_context); | |
766 | global_init_postfork_finish(g_ceph_context); | |
767 | } | |
768 | ||
769 | { | |
770 | NBDServer server(fd[1], image); | |
771 | ||
772 | server.start(); | |
773 | ||
774 | init_async_signal_handler(); | |
775 | register_async_signal_handler(SIGHUP, sighup_handler); | |
776 | register_async_signal_handler_oneshot(SIGINT, handle_signal); | |
777 | register_async_signal_handler_oneshot(SIGTERM, handle_signal); | |
778 | ||
779 | ioctl(nbd, NBD_DO_IT); | |
780 | ||
781 | unregister_async_signal_handler(SIGHUP, sighup_handler); | |
782 | unregister_async_signal_handler(SIGINT, handle_signal); | |
783 | unregister_async_signal_handler(SIGTERM, handle_signal); | |
784 | shutdown_async_signal_handler(); | |
785 | ||
786 | server.stop(); | |
787 | } | |
788 | ||
789 | r = image.update_unwatch(handle); | |
790 | assert(r == 0); | |
791 | } | |
792 | ||
793 | close_nbd: | |
794 | if (r < 0) { | |
795 | ioctl(nbd, NBD_CLEAR_SOCK); | |
796 | cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r) << std::endl; | |
797 | } | |
798 | close(nbd); | |
799 | close_fd: | |
800 | close(fd[0]); | |
801 | close(fd[1]); | |
802 | close_ret: | |
803 | image.close(); | |
804 | io_ctx.close(); | |
805 | rados.shutdown(); | |
806 | ||
807 | forker.exit(r < 0 ? EXIT_FAILURE : 0); | |
808 | // Unreachable; | |
809 | return r; | |
810 | } | |
811 | ||
31f18b77 | 812 | static int do_unmap(const std::string &devpath) |
7c673cae | 813 | { |
31f18b77 FG |
814 | int r = 0; |
815 | ||
7c673cae FG |
816 | int nbd = open_device(devpath.c_str()); |
817 | if (nbd < 0) { | |
818 | cerr << "rbd-nbd: failed to open device: " << devpath << std::endl; | |
819 | return nbd; | |
820 | } | |
821 | ||
31f18b77 FG |
822 | r = ioctl(nbd, NBD_DISCONNECT); |
823 | if (r < 0) { | |
824 | cerr << "rbd-nbd: the device is not used" << std::endl; | |
7c673cae FG |
825 | } |
826 | ||
827 | close(nbd); | |
828 | ||
31f18b77 | 829 | return r; |
7c673cae FG |
830 | } |
831 | ||
31f18b77 | 832 | static int parse_imgpath(const std::string &imgpath, Config *cfg) |
7c673cae FG |
833 | { |
834 | boost::regex pattern("^(?:([^/@]+)/)?([^/@]+)(?:@([^/@]+))?$"); | |
835 | boost::smatch match; | |
836 | if (!boost::regex_match(imgpath, match, pattern)) { | |
837 | std::cerr << "rbd-nbd: invalid spec '" << imgpath << "'" << std::endl; | |
838 | return -EINVAL; | |
839 | } | |
840 | ||
31f18b77 FG |
841 | if (match[1].matched) { |
842 | cfg->poolname = match[1]; | |
843 | } | |
7c673cae | 844 | |
31f18b77 | 845 | cfg->imgname = match[2]; |
7c673cae FG |
846 | |
847 | if (match[3].matched) | |
31f18b77 | 848 | cfg->snapname = match[3]; |
7c673cae FG |
849 | |
850 | return 0; | |
851 | } | |
852 | ||
31f18b77 | 853 | static int get_mapped_info(int pid, Config *cfg) |
7c673cae | 854 | { |
31f18b77 FG |
855 | int r; |
856 | std::string path = "/proc/" + stringify(pid) + "/cmdline"; | |
857 | std::ifstream ifs; | |
858 | std::string cmdline; | |
859 | std::vector<const char*> args; | |
7c673cae | 860 | |
31f18b77 | 861 | ifs.open(path.c_str(), std::ifstream::in); |
b32b8144 FG |
862 | if (!ifs.is_open()) |
863 | return -1; | |
31f18b77 FG |
864 | ifs >> cmdline; |
865 | ||
866 | for (unsigned i = 0; i < cmdline.size(); i++) { | |
867 | const char *arg = &cmdline[i]; | |
868 | if (i == 0) { | |
869 | if (strcmp(basename(arg) , "rbd-nbd") != 0) { | |
870 | return -EINVAL; | |
871 | } | |
872 | } else { | |
873 | args.push_back(arg); | |
874 | } | |
875 | ||
876 | while (cmdline[i] != '\0') { | |
877 | i++; | |
878 | } | |
7c673cae | 879 | } |
31f18b77 FG |
880 | |
881 | std::ostringstream err_msg; | |
882 | r = parse_args(args, &err_msg, cfg); | |
883 | return r; | |
884 | } | |
7c673cae | 885 | |
31f18b77 FG |
886 | static int get_map_pid(const std::string& pid_path) |
887 | { | |
888 | int pid = 0; | |
889 | std::ifstream ifs; | |
890 | ifs.open(pid_path.c_str(), std::ifstream::in); | |
891 | if (!ifs.is_open()) { | |
892 | return 0; | |
7c673cae | 893 | } |
31f18b77 FG |
894 | ifs >> pid; |
895 | return pid; | |
7c673cae FG |
896 | } |
897 | ||
31f18b77 | 898 | static int do_list_mapped_devices() |
7c673cae FG |
899 | { |
900 | int r; | |
31f18b77 FG |
901 | bool should_print = false; |
902 | int index = 0; | |
903 | int pid = 0; | |
7c673cae | 904 | |
31f18b77 | 905 | std::string default_pool_name; |
7c673cae | 906 | |
31f18b77 | 907 | TextTable tbl; |
7c673cae | 908 | |
31f18b77 FG |
909 | tbl.define_column("pid", TextTable::LEFT, TextTable::LEFT); |
910 | tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT); | |
911 | tbl.define_column("image", TextTable::LEFT, TextTable::LEFT); | |
912 | tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT); | |
913 | tbl.define_column("device", TextTable::LEFT, TextTable::LEFT); | |
914 | ||
915 | while (true) { | |
916 | std::string nbd_path = "/sys/block/nbd" + stringify(index); | |
917 | if(access(nbd_path.c_str(), F_OK) != 0) { | |
918 | break; | |
919 | } | |
920 | std::string pid_path = nbd_path + "/pid"; | |
921 | pid = get_map_pid(pid_path); | |
922 | ||
923 | if(pid > 0) { | |
924 | Config cfg; | |
925 | r = get_mapped_info(pid, &cfg); | |
926 | if (r < 0) { | |
927 | index++; | |
928 | continue; | |
929 | } | |
930 | should_print = true; | |
931 | if (cfg.snapname.empty()) { | |
932 | cfg.snapname = "-"; | |
933 | } | |
934 | tbl << pid << cfg.poolname << cfg.imgname << cfg.snapname | |
935 | << "/dev/nbd" + stringify(index) << TextTable::endrow; | |
936 | } | |
937 | ||
938 | index++; | |
939 | } | |
940 | ||
941 | if (should_print) { | |
942 | cout << tbl; | |
943 | } | |
944 | return 0; | |
945 | } | |
946 | ||
947 | static int parse_args(vector<const char*>& args, std::ostream *err_msg, Config *cfg) | |
948 | { | |
181888fb FG |
949 | std::string conf_file_list; |
950 | std::string cluster; | |
951 | CephInitParameters iparams = ceph_argparse_early_args( | |
952 | args, CEPH_ENTITY_TYPE_CLIENT, &cluster, &conf_file_list); | |
7c673cae | 953 | |
31f18b77 | 954 | md_config_t config; |
181888fb FG |
955 | config.name = iparams.name; |
956 | config.cluster = cluster; | |
957 | ||
958 | if (!conf_file_list.empty()) { | |
959 | config.parse_config_files(conf_file_list.c_str(), nullptr, 0); | |
960 | } else { | |
961 | config.parse_config_files(nullptr, nullptr, 0); | |
962 | } | |
31f18b77 FG |
963 | config.parse_env(); |
964 | config.parse_argv(args); | |
181888fb FG |
965 | cfg->poolname = config.get_val<std::string>("rbd_default_pool"); |
966 | ||
967 | std::vector<const char*>::iterator i; | |
968 | std::ostringstream err; | |
31f18b77 | 969 | |
7c673cae FG |
970 | for (i = args.begin(); i != args.end(); ) { |
971 | if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { | |
224ce89b WB |
972 | return HELP_INFO; |
973 | } else if (ceph_argparse_flag(args, i, "-v", "--version", (char*)NULL)) { | |
974 | return VERSION_INFO; | |
31f18b77 FG |
975 | } else if (ceph_argparse_witharg(args, i, &cfg->devpath, "--device", (char *)NULL)) { |
976 | } else if (ceph_argparse_witharg(args, i, &cfg->nbds_max, err, "--nbds_max", (char *)NULL)) { | |
7c673cae | 977 | if (!err.str().empty()) { |
31f18b77 FG |
978 | *err_msg << "rbd-nbd: " << err.str(); |
979 | return -EINVAL; | |
7c673cae | 980 | } |
31f18b77 FG |
981 | if (cfg->nbds_max < 0) { |
982 | *err_msg << "rbd-nbd: Invalid argument for nbds_max!"; | |
983 | return -EINVAL; | |
7c673cae | 984 | } |
31f18b77 | 985 | } else if (ceph_argparse_witharg(args, i, &cfg->max_part, err, "--max_part", (char *)NULL)) { |
7c673cae | 986 | if (!err.str().empty()) { |
31f18b77 FG |
987 | *err_msg << "rbd-nbd: " << err.str(); |
988 | return -EINVAL; | |
7c673cae | 989 | } |
31f18b77 FG |
990 | if ((cfg->max_part < 0) || (cfg->max_part > 255)) { |
991 | *err_msg << "rbd-nbd: Invalid argument for max_part(0~255)!"; | |
992 | return -EINVAL; | |
7c673cae | 993 | } |
31f18b77 | 994 | cfg->set_max_part = true; |
7c673cae | 995 | } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) { |
31f18b77 | 996 | cfg->readonly = true; |
7c673cae | 997 | } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) { |
31f18b77 | 998 | cfg->exclusive = true; |
7c673cae FG |
999 | } else { |
1000 | ++i; | |
1001 | } | |
1002 | } | |
1003 | ||
1004 | if (args.begin() != args.end()) { | |
1005 | if (strcmp(*args.begin(), "map") == 0) { | |
1006 | cmd = Connect; | |
1007 | } else if (strcmp(*args.begin(), "unmap") == 0) { | |
1008 | cmd = Disconnect; | |
1009 | } else if (strcmp(*args.begin(), "list-mapped") == 0) { | |
1010 | cmd = List; | |
1011 | } else { | |
31f18b77 FG |
1012 | *err_msg << "rbd-nbd: unknown command: " << *args.begin(); |
1013 | return -EINVAL; | |
7c673cae FG |
1014 | } |
1015 | args.erase(args.begin()); | |
1016 | } | |
1017 | ||
1018 | if (cmd == None) { | |
31f18b77 FG |
1019 | *err_msg << "rbd-nbd: must specify command"; |
1020 | return -EINVAL; | |
7c673cae FG |
1021 | } |
1022 | ||
1023 | switch (cmd) { | |
1024 | case Connect: | |
1025 | if (args.begin() == args.end()) { | |
31f18b77 FG |
1026 | *err_msg << "rbd-nbd: must specify image-or-snap-spec"; |
1027 | return -EINVAL; | |
7c673cae | 1028 | } |
31f18b77 FG |
1029 | if (parse_imgpath(string(*args.begin()), cfg) < 0) |
1030 | return -EINVAL; | |
7c673cae FG |
1031 | args.erase(args.begin()); |
1032 | break; | |
1033 | case Disconnect: | |
1034 | if (args.begin() == args.end()) { | |
31f18b77 FG |
1035 | *err_msg << "rbd-nbd: must specify nbd device path"; |
1036 | return -EINVAL; | |
7c673cae | 1037 | } |
31f18b77 | 1038 | cfg->devpath = *args.begin(); |
7c673cae FG |
1039 | args.erase(args.begin()); |
1040 | break; | |
1041 | default: | |
1042 | //shut up gcc; | |
1043 | break; | |
1044 | } | |
1045 | ||
1046 | if (args.begin() != args.end()) { | |
31f18b77 FG |
1047 | *err_msg << "rbd-nbd: unknown args: " << *args.begin(); |
1048 | return -EINVAL; | |
1049 | } | |
1050 | ||
1051 | return 0; | |
1052 | } | |
1053 | ||
1054 | static int rbd_nbd(int argc, const char *argv[]) | |
1055 | { | |
1056 | int r; | |
1057 | Config cfg; | |
1058 | vector<const char*> args; | |
1059 | argv_to_vec(argc, argv, args); | |
1060 | ||
1061 | std::ostringstream err_msg; | |
1062 | r = parse_args(args, &err_msg, &cfg); | |
224ce89b | 1063 | if (r == HELP_INFO) { |
31f18b77 | 1064 | usage(); |
d2e6a577 | 1065 | assert(false); |
224ce89b WB |
1066 | } else if (r == VERSION_INFO) { |
1067 | std::cout << pretty_version_to_str() << std::endl; | |
1068 | return 0; | |
1069 | } | |
1070 | else if (r < 0) { | |
31f18b77 FG |
1071 | cerr << err_msg.str() << std::endl; |
1072 | return r; | |
7c673cae FG |
1073 | } |
1074 | ||
1075 | switch (cmd) { | |
1076 | case Connect: | |
31f18b77 | 1077 | if (cfg.imgname.empty()) { |
7c673cae | 1078 | cerr << "rbd-nbd: image name was not specified" << std::endl; |
31f18b77 | 1079 | return -EINVAL; |
7c673cae FG |
1080 | } |
1081 | ||
31f18b77 | 1082 | r = do_map(argc, argv, &cfg); |
7c673cae | 1083 | if (r < 0) |
31f18b77 | 1084 | return -EINVAL; |
7c673cae FG |
1085 | break; |
1086 | case Disconnect: | |
31f18b77 | 1087 | r = do_unmap(cfg.devpath); |
7c673cae | 1088 | if (r < 0) |
31f18b77 | 1089 | return -EINVAL; |
7c673cae FG |
1090 | break; |
1091 | case List: | |
1092 | r = do_list_mapped_devices(); | |
1093 | if (r < 0) | |
31f18b77 | 1094 | return -EINVAL; |
7c673cae FG |
1095 | break; |
1096 | default: | |
1097 | usage(); | |
d2e6a577 FG |
1098 | assert(false); |
1099 | break; | |
7c673cae FG |
1100 | } |
1101 | ||
1102 | return 0; | |
1103 | } | |
1104 | ||
1105 | int main(int argc, const char *argv[]) | |
1106 | { | |
31f18b77 FG |
1107 | int r = rbd_nbd(argc, argv); |
1108 | if (r < 0) { | |
1109 | return EXIT_FAILURE; | |
1110 | } | |
1111 | return 0; | |
7c673cae | 1112 | } |