]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | /* | |
5 | * rbd-nbd - RBD in userspace | |
6 | * | |
7 | * Copyright (C) 2015 - 2016 Kylin Corporation | |
8 | * | |
9 | * Author: Yunchuan Wen <yunchuan.wen@kylin-cloud.com> | |
10 | * Li Wang <li.wang@kylin-cloud.com> | |
11 | * | |
12 | * This is free software; you can redistribute it and/or | |
13 | * modify it under the terms of the GNU Lesser General Public | |
14 | * License version 2.1, as published by the Free Software | |
15 | * Foundation. See file COPYING. | |
16 | * | |
17 | */ | |
18 | ||
19 | #include "include/int_types.h" | |
20 | ||
21 | #include <stdio.h> | |
22 | #include <stdlib.h> | |
23 | #include <stddef.h> | |
24 | #include <errno.h> | |
25 | #include <fcntl.h> | |
26 | #include <string.h> | |
27 | #include <sys/types.h> | |
28 | #include <unistd.h> | |
29 | ||
30 | #include <linux/nbd.h> | |
31 | #include <linux/fs.h> | |
32 | #include <sys/ioctl.h> | |
33 | #include <sys/socket.h> | |
34 | ||
35 | #include <iostream> | |
36 | #include <fstream> | |
37 | #include <boost/regex.hpp> | |
38 | ||
39 | #include "mon/MonClient.h" | |
40 | #include "common/config.h" | |
41 | #include "common/dout.h" | |
42 | ||
43 | #include "common/errno.h" | |
44 | #include "common/module.h" | |
45 | #include "common/safe_io.h" | |
31f18b77 | 46 | #include "common/TextTable.h" |
7c673cae FG |
47 | #include "common/ceph_argparse.h" |
48 | #include "common/Preforker.h" | |
224ce89b | 49 | #include "common/version.h" |
7c673cae FG |
50 | #include "global/global_init.h" |
51 | #include "global/signal_handler.h" | |
52 | ||
53 | #include "include/rados/librados.hpp" | |
54 | #include "include/rbd/librbd.hpp" | |
55 | #include "include/stringify.h" | |
56 | #include "include/xlist.h" | |
57 | ||
58 | #define dout_context g_ceph_context | |
59 | #define dout_subsys ceph_subsys_rbd | |
60 | #undef dout_prefix | |
61 | #define dout_prefix *_dout << "rbd-nbd: " | |
62 | ||
31f18b77 FG |
63 | struct Config { |
64 | int nbds_max = 0; | |
65 | int max_part = 255; | |
66 | ||
67 | bool exclusive = false; | |
68 | bool readonly = false; | |
69 | bool set_max_part = false; | |
70 | ||
71 | std::string poolname; | |
72 | std::string imgname; | |
73 | std::string snapname; | |
74 | std::string devpath; | |
75 | }; | |
76 | ||
7c673cae FG |
77 | static void usage() |
78 | { | |
79 | std::cout << "Usage: rbd-nbd [options] map <image-or-snap-spec> Map an image to nbd device\n" | |
80 | << " unmap <device path> Unmap nbd device\n" | |
81 | << " list-mapped List mapped nbd devices\n" | |
82 | << "Options:\n" | |
83 | << " --device <device path> Specify nbd device path\n" | |
84 | << " --read-only Map read-only\n" | |
85 | << " --nbds_max <limit> Override for module param nbds_max\n" | |
86 | << " --max_part <limit> Override for module param max_part\n" | |
87 | << " --exclusive Forbid writes by other clients\n" | |
88 | << std::endl; | |
89 | generic_server_usage(); | |
90 | } | |
91 | ||
7c673cae FG |
92 | static int nbd = -1; |
93 | ||
31f18b77 FG |
94 | static enum { |
95 | None, | |
96 | Connect, | |
97 | Disconnect, | |
98 | List | |
99 | } cmd = None; | |
100 | ||
7c673cae FG |
101 | #define RBD_NBD_BLKSIZE 512UL |
102 | ||
224ce89b WB |
103 | #define HELP_INFO 1 |
104 | #define VERSION_INFO 2 | |
105 | ||
7c673cae FG |
106 | #ifdef CEPH_BIG_ENDIAN |
107 | #define ntohll(a) (a) | |
108 | #elif defined(CEPH_LITTLE_ENDIAN) | |
109 | #define ntohll(a) swab(a) | |
110 | #else | |
111 | #error "Could not determine endianess" | |
112 | #endif | |
113 | #define htonll(a) ntohll(a) | |
114 | ||
31f18b77 FG |
115 | static int parse_args(vector<const char*>& args, std::ostream *err_msg, Config *cfg); |
116 | ||
7c673cae FG |
117 | static void handle_signal(int signum) |
118 | { | |
119 | assert(signum == SIGINT || signum == SIGTERM); | |
120 | derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; | |
121 | dout(20) << __func__ << ": " << "sending NBD_DISCONNECT" << dendl; | |
122 | if (ioctl(nbd, NBD_DISCONNECT) < 0) { | |
123 | derr << "rbd-nbd: disconnect failed: " << cpp_strerror(errno) << dendl; | |
124 | } else { | |
125 | dout(20) << __func__ << ": " << "disconnected" << dendl; | |
126 | } | |
127 | } | |
128 | ||
129 | class NBDServer | |
130 | { | |
131 | private: | |
132 | int fd; | |
133 | librbd::Image ℑ | |
134 | ||
135 | public: | |
136 | NBDServer(int _fd, librbd::Image& _image) | |
137 | : fd(_fd) | |
138 | , image(_image) | |
139 | , lock("NBDServer::Locker") | |
140 | , reader_thread(*this, &NBDServer::reader_entry) | |
141 | , writer_thread(*this, &NBDServer::writer_entry) | |
142 | , started(false) | |
143 | {} | |
144 | ||
145 | private: | |
146 | std::atomic<bool> terminated = { false }; | |
147 | ||
148 | void shutdown() | |
149 | { | |
150 | bool expected = false; | |
151 | if (terminated.compare_exchange_strong(expected, true)) { | |
152 | ::shutdown(fd, SHUT_RDWR); | |
153 | ||
154 | Mutex::Locker l(lock); | |
155 | cond.Signal(); | |
156 | } | |
157 | } | |
158 | ||
159 | struct IOContext | |
160 | { | |
161 | xlist<IOContext*>::item item; | |
162 | NBDServer *server; | |
163 | struct nbd_request request; | |
164 | struct nbd_reply reply; | |
165 | bufferlist data; | |
166 | int command; | |
167 | ||
168 | IOContext() | |
169 | : item(this) | |
170 | {} | |
171 | }; | |
172 | ||
173 | friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx); | |
174 | ||
175 | Mutex lock; | |
176 | Cond cond; | |
177 | xlist<IOContext*> io_pending; | |
178 | xlist<IOContext*> io_finished; | |
179 | ||
180 | void io_start(IOContext *ctx) | |
181 | { | |
182 | Mutex::Locker l(lock); | |
183 | io_pending.push_back(&ctx->item); | |
184 | } | |
185 | ||
186 | void io_finish(IOContext *ctx) | |
187 | { | |
188 | Mutex::Locker l(lock); | |
189 | assert(ctx->item.is_on_list()); | |
190 | ctx->item.remove_myself(); | |
191 | io_finished.push_back(&ctx->item); | |
192 | cond.Signal(); | |
193 | } | |
194 | ||
195 | IOContext *wait_io_finish() | |
196 | { | |
197 | Mutex::Locker l(lock); | |
198 | while(io_finished.empty() && !terminated) | |
199 | cond.Wait(lock); | |
200 | ||
201 | if (io_finished.empty()) | |
202 | return NULL; | |
203 | ||
204 | IOContext *ret = io_finished.front(); | |
205 | io_finished.pop_front(); | |
206 | ||
207 | return ret; | |
208 | } | |
209 | ||
210 | void wait_clean() | |
211 | { | |
212 | assert(!reader_thread.is_started()); | |
213 | Mutex::Locker l(lock); | |
214 | while(!io_pending.empty()) | |
215 | cond.Wait(lock); | |
216 | ||
217 | while(!io_finished.empty()) { | |
218 | ceph::unique_ptr<IOContext> free_ctx(io_finished.front()); | |
219 | io_finished.pop_front(); | |
220 | } | |
221 | } | |
222 | ||
223 | static void aio_callback(librbd::completion_t cb, void *arg) | |
224 | { | |
225 | librbd::RBD::AioCompletion *aio_completion = | |
226 | reinterpret_cast<librbd::RBD::AioCompletion*>(cb); | |
227 | ||
228 | IOContext *ctx = reinterpret_cast<IOContext *>(arg); | |
229 | int ret = aio_completion->get_return_value(); | |
230 | ||
231 | dout(20) << __func__ << ": " << *ctx << dendl; | |
232 | ||
233 | if (ret == -EINVAL) { | |
234 | // if shrinking an image, a pagecache writeback might reference | |
235 | // extents outside of the range of the new image extents | |
181888fb | 236 | dout(0) << __func__ << ": masking IO out-of-bounds error" << dendl; |
7c673cae FG |
237 | ctx->data.clear(); |
238 | ret = 0; | |
239 | } | |
240 | ||
241 | if (ret < 0) { | |
242 | ctx->reply.error = htonl(-ret); | |
243 | } else if ((ctx->command == NBD_CMD_READ) && | |
244 | ret < static_cast<int>(ctx->request.len)) { | |
245 | int pad_byte_count = static_cast<int> (ctx->request.len) - ret; | |
246 | ctx->data.append_zero(pad_byte_count); | |
247 | dout(20) << __func__ << ": " << *ctx << ": Pad byte count: " | |
248 | << pad_byte_count << dendl; | |
249 | ctx->reply.error = 0; | |
250 | } else { | |
251 | ctx->reply.error = htonl(0); | |
252 | } | |
253 | ctx->server->io_finish(ctx); | |
254 | ||
255 | aio_completion->release(); | |
256 | } | |
257 | ||
258 | void reader_entry() | |
259 | { | |
260 | while (!terminated) { | |
261 | ceph::unique_ptr<IOContext> ctx(new IOContext()); | |
262 | ctx->server = this; | |
263 | ||
264 | dout(20) << __func__ << ": waiting for nbd request" << dendl; | |
265 | ||
266 | int r = safe_read_exact(fd, &ctx->request, sizeof(struct nbd_request)); | |
267 | if (r < 0) { | |
268 | derr << "failed to read nbd request header: " << cpp_strerror(r) | |
269 | << dendl; | |
270 | return; | |
271 | } | |
272 | ||
273 | if (ctx->request.magic != htonl(NBD_REQUEST_MAGIC)) { | |
274 | derr << "invalid nbd request header" << dendl; | |
275 | return; | |
276 | } | |
277 | ||
278 | ctx->request.from = ntohll(ctx->request.from); | |
279 | ctx->request.type = ntohl(ctx->request.type); | |
280 | ctx->request.len = ntohl(ctx->request.len); | |
281 | ||
282 | ctx->reply.magic = htonl(NBD_REPLY_MAGIC); | |
283 | memcpy(ctx->reply.handle, ctx->request.handle, sizeof(ctx->reply.handle)); | |
284 | ||
285 | ctx->command = ctx->request.type & 0x0000ffff; | |
286 | ||
287 | dout(20) << *ctx << ": start" << dendl; | |
288 | ||
289 | switch (ctx->command) | |
290 | { | |
291 | case NBD_CMD_DISC: | |
292 | // NBD_DO_IT will return when pipe is closed | |
293 | dout(0) << "disconnect request received" << dendl; | |
294 | return; | |
295 | case NBD_CMD_WRITE: | |
296 | bufferptr ptr(ctx->request.len); | |
297 | r = safe_read_exact(fd, ptr.c_str(), ctx->request.len); | |
298 | if (r < 0) { | |
299 | derr << *ctx << ": failed to read nbd request data: " | |
300 | << cpp_strerror(r) << dendl; | |
301 | return; | |
302 | } | |
303 | ctx->data.push_back(ptr); | |
304 | break; | |
305 | } | |
306 | ||
307 | IOContext *pctx = ctx.release(); | |
308 | io_start(pctx); | |
309 | librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(pctx, aio_callback); | |
310 | switch (pctx->command) | |
311 | { | |
312 | case NBD_CMD_WRITE: | |
313 | image.aio_write(pctx->request.from, pctx->request.len, pctx->data, c); | |
314 | break; | |
315 | case NBD_CMD_READ: | |
316 | image.aio_read(pctx->request.from, pctx->request.len, pctx->data, c); | |
317 | break; | |
318 | case NBD_CMD_FLUSH: | |
319 | image.aio_flush(c); | |
320 | break; | |
321 | case NBD_CMD_TRIM: | |
322 | image.aio_discard(pctx->request.from, pctx->request.len, c); | |
323 | break; | |
324 | default: | |
325 | derr << *pctx << ": invalid request command" << dendl; | |
326 | c->release(); | |
327 | return; | |
328 | } | |
329 | } | |
330 | dout(20) << __func__ << ": terminated" << dendl; | |
331 | } | |
332 | ||
333 | void writer_entry() | |
334 | { | |
335 | while (!terminated) { | |
336 | dout(20) << __func__ << ": waiting for io request" << dendl; | |
337 | ceph::unique_ptr<IOContext> ctx(wait_io_finish()); | |
338 | if (!ctx) { | |
339 | dout(20) << __func__ << ": no io requests, terminating" << dendl; | |
340 | return; | |
341 | } | |
342 | ||
343 | dout(20) << __func__ << ": got: " << *ctx << dendl; | |
344 | ||
345 | int r = safe_write(fd, &ctx->reply, sizeof(struct nbd_reply)); | |
346 | if (r < 0) { | |
347 | derr << *ctx << ": failed to write reply header: " << cpp_strerror(r) | |
348 | << dendl; | |
349 | return; | |
350 | } | |
351 | if (ctx->command == NBD_CMD_READ && ctx->reply.error == htonl(0)) { | |
352 | r = ctx->data.write_fd(fd); | |
353 | if (r < 0) { | |
354 | derr << *ctx << ": failed to write replay data: " << cpp_strerror(r) | |
355 | << dendl; | |
356 | return; | |
357 | } | |
358 | } | |
359 | dout(20) << *ctx << ": finish" << dendl; | |
360 | } | |
361 | dout(20) << __func__ << ": terminated" << dendl; | |
362 | } | |
363 | ||
364 | class ThreadHelper : public Thread | |
365 | { | |
366 | public: | |
367 | typedef void (NBDServer::*entry_func)(); | |
368 | private: | |
369 | NBDServer &server; | |
370 | entry_func func; | |
371 | public: | |
372 | ThreadHelper(NBDServer &_server, entry_func _func) | |
373 | :server(_server) | |
374 | ,func(_func) | |
375 | {} | |
376 | protected: | |
377 | void* entry() override | |
378 | { | |
379 | (server.*func)(); | |
380 | server.shutdown(); | |
381 | return NULL; | |
382 | } | |
383 | } reader_thread, writer_thread; | |
384 | ||
385 | bool started; | |
386 | public: | |
387 | void start() | |
388 | { | |
389 | if (!started) { | |
390 | dout(10) << __func__ << ": starting" << dendl; | |
391 | ||
392 | started = true; | |
393 | ||
394 | reader_thread.create("rbd_reader"); | |
395 | writer_thread.create("rbd_writer"); | |
396 | } | |
397 | } | |
398 | ||
399 | void stop() | |
400 | { | |
401 | if (started) { | |
402 | dout(10) << __func__ << ": terminating" << dendl; | |
403 | ||
404 | shutdown(); | |
405 | ||
406 | reader_thread.join(); | |
407 | writer_thread.join(); | |
408 | ||
409 | wait_clean(); | |
410 | ||
411 | started = false; | |
412 | } | |
413 | } | |
414 | ||
415 | ~NBDServer() | |
416 | { | |
417 | stop(); | |
418 | } | |
419 | }; | |
420 | ||
421 | std::ostream &operator<<(std::ostream &os, const NBDServer::IOContext &ctx) { | |
422 | ||
423 | os << "[" << std::hex << ntohll(*((uint64_t *)ctx.request.handle)); | |
424 | ||
425 | switch (ctx.command) | |
426 | { | |
427 | case NBD_CMD_WRITE: | |
428 | os << " WRITE "; | |
429 | break; | |
430 | case NBD_CMD_READ: | |
431 | os << " READ "; | |
432 | break; | |
433 | case NBD_CMD_FLUSH: | |
434 | os << " FLUSH "; | |
435 | break; | |
436 | case NBD_CMD_TRIM: | |
437 | os << " TRIM "; | |
438 | break; | |
439 | default: | |
440 | os << " UNKNOW(" << ctx.command << ") "; | |
441 | break; | |
442 | } | |
443 | ||
444 | os << ctx.request.from << "~" << ctx.request.len << " " | |
445 | << ntohl(ctx.reply.error) << "]"; | |
446 | ||
447 | return os; | |
448 | } | |
449 | ||
450 | class NBDWatchCtx : public librbd::UpdateWatchCtx | |
451 | { | |
452 | private: | |
453 | int fd; | |
454 | librados::IoCtx &io_ctx; | |
455 | librbd::Image ℑ | |
456 | unsigned long size; | |
457 | public: | |
458 | NBDWatchCtx(int _fd, | |
459 | librados::IoCtx &_io_ctx, | |
460 | librbd::Image &_image, | |
461 | unsigned long _size) | |
462 | : fd(_fd) | |
463 | , io_ctx(_io_ctx) | |
464 | , image(_image) | |
465 | , size(_size) | |
466 | { } | |
467 | ||
468 | ~NBDWatchCtx() override {} | |
469 | ||
470 | void handle_notify() override | |
471 | { | |
472 | librbd::image_info_t info; | |
473 | if (image.stat(info, sizeof(info)) == 0) { | |
474 | unsigned long new_size = info.size; | |
475 | ||
476 | if (new_size != size) { | |
477 | if (ioctl(fd, BLKFLSBUF, NULL) < 0) | |
478 | derr << "invalidate page cache failed: " << cpp_strerror(errno) << dendl; | |
479 | if (ioctl(fd, NBD_SET_SIZE, new_size) < 0) { | |
480 | derr << "resize failed: " << cpp_strerror(errno) << dendl; | |
481 | } else { | |
482 | size = new_size; | |
483 | } | |
484 | if (image.invalidate_cache() < 0) | |
485 | derr << "invalidate rbd cache failed" << dendl; | |
486 | } | |
487 | } | |
488 | } | |
489 | }; | |
490 | ||
31f18b77 | 491 | static int open_device(const char* path, Config *cfg = nullptr, bool try_load_module = false) |
7c673cae FG |
492 | { |
493 | int nbd = open(path, O_RDWR); | |
494 | bool loaded_module = false; | |
495 | ||
496 | if (nbd < 0 && try_load_module && access("/sys/module/nbd", F_OK) != 0) { | |
497 | ostringstream param; | |
498 | int r; | |
31f18b77 FG |
499 | if (cfg->nbds_max) { |
500 | param << "nbds_max=" << cfg->nbds_max; | |
7c673cae | 501 | } |
31f18b77 FG |
502 | if (cfg->max_part) { |
503 | param << " max_part=" << cfg->max_part; | |
7c673cae FG |
504 | } |
505 | r = module_load("nbd", param.str().c_str()); | |
506 | if (r < 0) { | |
507 | cerr << "rbd-nbd: failed to load nbd kernel module: " << cpp_strerror(-r) << std::endl; | |
508 | return r; | |
509 | } else { | |
510 | loaded_module = true; | |
511 | } | |
512 | nbd = open(path, O_RDWR); | |
513 | } | |
514 | ||
31f18b77 FG |
515 | if (try_load_module && !loaded_module && |
516 | (cfg->nbds_max || cfg->set_max_part)) { | |
7c673cae FG |
517 | cerr << "rbd-nbd: ignoring kernel module parameter options: nbd module already loaded" |
518 | << std::endl; | |
519 | } | |
520 | ||
521 | return nbd; | |
522 | } | |
523 | ||
524 | static int check_device_size(int nbd_index, unsigned long expected_size) | |
525 | { | |
526 | // There are bugs with some older kernel versions that result in an | |
527 | // overflow for large image sizes. This check is to ensure we are | |
528 | // not affected. | |
529 | ||
530 | unsigned long size = 0; | |
531 | std::string path = "/sys/block/nbd" + stringify(nbd_index) + "/size"; | |
532 | std::ifstream ifs; | |
533 | ifs.open(path.c_str(), std::ifstream::in); | |
534 | if (!ifs.is_open()) { | |
535 | cerr << "rbd-nbd: failed to open " << path << std::endl; | |
536 | return -EINVAL; | |
537 | } | |
538 | ifs >> size; | |
539 | size *= RBD_NBD_BLKSIZE; | |
540 | ||
541 | if (size == 0) { | |
542 | // Newer kernel versions will report real size only after nbd | |
543 | // connect. Assume this is the case and return success. | |
544 | return 0; | |
545 | } | |
546 | ||
547 | if (size != expected_size) { | |
548 | cerr << "rbd-nbd: kernel reported invalid device size (" << size | |
549 | << ", expected " << expected_size << ")" << std::endl; | |
550 | return -EINVAL; | |
551 | } | |
552 | ||
553 | return 0; | |
554 | } | |
555 | ||
31f18b77 | 556 | static int do_map(int argc, const char *argv[], Config *cfg) |
7c673cae FG |
557 | { |
558 | int r; | |
559 | ||
560 | librados::Rados rados; | |
561 | librbd::RBD rbd; | |
562 | librados::IoCtx io_ctx; | |
563 | librbd::Image image; | |
564 | ||
565 | int read_only = 0; | |
566 | unsigned long flags; | |
567 | unsigned long size; | |
568 | ||
569 | int index = 0; | |
570 | int fd[2]; | |
571 | ||
572 | librbd::image_info_t info; | |
573 | ||
574 | Preforker forker; | |
575 | ||
576 | vector<const char*> args; | |
577 | argv_to_vec(argc, argv, args); | |
578 | env_to_vec(args); | |
579 | ||
580 | auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, | |
581 | CODE_ENVIRONMENT_DAEMON, | |
582 | CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); | |
583 | g_ceph_context->_conf->set_val_or_die("pid_file", ""); | |
584 | ||
585 | if (global_init_prefork(g_ceph_context) >= 0) { | |
586 | std::string err; | |
587 | r = forker.prefork(err); | |
588 | if (r < 0) { | |
589 | cerr << err << std::endl; | |
590 | return r; | |
591 | } | |
592 | ||
593 | if (forker.is_parent()) { | |
594 | global_init_postfork_start(g_ceph_context); | |
595 | if (forker.parent_wait(err) != 0) { | |
596 | return -ENXIO; | |
597 | } | |
598 | return 0; | |
599 | } | |
600 | } | |
601 | ||
602 | common_init_finish(g_ceph_context); | |
603 | global_init_chdir(g_ceph_context); | |
604 | ||
605 | if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) { | |
606 | r = -errno; | |
607 | goto close_ret; | |
608 | } | |
609 | ||
31f18b77 | 610 | if (cfg->devpath.empty()) { |
7c673cae FG |
611 | char dev[64]; |
612 | bool try_load_module = true; | |
613 | while (true) { | |
614 | snprintf(dev, sizeof(dev), "/dev/nbd%d", index); | |
615 | ||
31f18b77 | 616 | nbd = open_device(dev, cfg, try_load_module); |
7c673cae FG |
617 | try_load_module = false; |
618 | if (nbd < 0) { | |
619 | r = nbd; | |
620 | cerr << "rbd-nbd: failed to find unused device" << std::endl; | |
621 | goto close_fd; | |
622 | } | |
623 | ||
624 | r = ioctl(nbd, NBD_SET_SOCK, fd[0]); | |
625 | if (r < 0) { | |
626 | close(nbd); | |
627 | ++index; | |
628 | continue; | |
629 | } | |
630 | ||
31f18b77 | 631 | cfg->devpath = dev; |
7c673cae FG |
632 | break; |
633 | } | |
634 | } else { | |
31f18b77 | 635 | r = sscanf(cfg->devpath.c_str(), "/dev/nbd%d", &index); |
7c673cae | 636 | if (r < 0) { |
31f18b77 | 637 | cerr << "rbd-nbd: invalid device path: " << cfg->devpath |
7c673cae FG |
638 | << " (expected /dev/nbd{num})" << std::endl; |
639 | goto close_fd; | |
640 | } | |
31f18b77 | 641 | nbd = open_device(cfg->devpath.c_str(), cfg, true); |
7c673cae FG |
642 | if (nbd < 0) { |
643 | r = nbd; | |
31f18b77 | 644 | cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl; |
7c673cae FG |
645 | goto close_fd; |
646 | } | |
647 | ||
648 | r = ioctl(nbd, NBD_SET_SOCK, fd[0]); | |
649 | if (r < 0) { | |
650 | r = -errno; | |
31f18b77 | 651 | cerr << "rbd-nbd: the device " << cfg->devpath << " is busy" << std::endl; |
7c673cae FG |
652 | close(nbd); |
653 | goto close_fd; | |
654 | } | |
655 | } | |
656 | ||
657 | flags = NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_TRIM | NBD_FLAG_HAS_FLAGS; | |
31f18b77 | 658 | if (!cfg->snapname.empty() || cfg->readonly) { |
7c673cae FG |
659 | flags |= NBD_FLAG_READ_ONLY; |
660 | read_only = 1; | |
661 | } | |
662 | ||
663 | r = rados.init_with_context(g_ceph_context); | |
664 | if (r < 0) | |
665 | goto close_nbd; | |
666 | ||
667 | r = rados.connect(); | |
668 | if (r < 0) | |
669 | goto close_nbd; | |
670 | ||
31f18b77 | 671 | r = rados.ioctx_create(cfg->poolname.c_str(), io_ctx); |
7c673cae FG |
672 | if (r < 0) |
673 | goto close_nbd; | |
674 | ||
31f18b77 | 675 | r = rbd.open(io_ctx, image, cfg->imgname.c_str()); |
7c673cae FG |
676 | if (r < 0) |
677 | goto close_nbd; | |
678 | ||
31f18b77 | 679 | if (cfg->exclusive) { |
7c673cae FG |
680 | r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE); |
681 | if (r < 0) { | |
682 | cerr << "rbd-nbd: failed to acquire exclusive lock: " << cpp_strerror(r) | |
683 | << std::endl; | |
684 | goto close_nbd; | |
685 | } | |
686 | } | |
687 | ||
31f18b77 FG |
688 | if (!cfg->snapname.empty()) { |
689 | r = image.snap_set(cfg->snapname.c_str()); | |
7c673cae FG |
690 | if (r < 0) |
691 | goto close_nbd; | |
692 | } | |
693 | ||
694 | r = image.stat(info, sizeof(info)); | |
695 | if (r < 0) | |
696 | goto close_nbd; | |
697 | ||
698 | r = ioctl(nbd, NBD_SET_BLKSIZE, RBD_NBD_BLKSIZE); | |
699 | if (r < 0) { | |
700 | r = -errno; | |
701 | goto close_nbd; | |
702 | } | |
703 | ||
704 | if (info.size > ULONG_MAX) { | |
705 | r = -EFBIG; | |
706 | cerr << "rbd-nbd: image is too large (" << prettybyte_t(info.size) | |
707 | << ", max is " << prettybyte_t(ULONG_MAX) << ")" << std::endl; | |
708 | goto close_nbd; | |
709 | } | |
710 | ||
711 | size = info.size; | |
712 | ||
713 | r = ioctl(nbd, NBD_SET_SIZE, size); | |
714 | if (r < 0) { | |
715 | r = -errno; | |
716 | goto close_nbd; | |
717 | } | |
718 | ||
719 | r = check_device_size(index, size); | |
720 | if (r < 0) { | |
721 | goto close_nbd; | |
722 | } | |
723 | ||
724 | ioctl(nbd, NBD_SET_FLAGS, flags); | |
725 | ||
726 | r = ioctl(nbd, BLKROSET, (unsigned long) &read_only); | |
727 | if (r < 0) { | |
728 | r = -errno; | |
729 | goto close_nbd; | |
730 | } | |
731 | ||
732 | { | |
733 | uint64_t handle; | |
734 | ||
735 | NBDWatchCtx watch_ctx(nbd, io_ctx, image, info.size); | |
736 | r = image.update_watch(&watch_ctx, &handle); | |
737 | if (r < 0) | |
738 | goto close_nbd; | |
739 | ||
31f18b77 | 740 | cout << cfg->devpath << std::endl; |
7c673cae FG |
741 | |
742 | if (g_conf->daemonize) { | |
743 | forker.daemonize(); | |
744 | global_init_postfork_start(g_ceph_context); | |
745 | global_init_postfork_finish(g_ceph_context); | |
746 | } | |
747 | ||
748 | { | |
749 | NBDServer server(fd[1], image); | |
750 | ||
751 | server.start(); | |
752 | ||
753 | init_async_signal_handler(); | |
754 | register_async_signal_handler(SIGHUP, sighup_handler); | |
755 | register_async_signal_handler_oneshot(SIGINT, handle_signal); | |
756 | register_async_signal_handler_oneshot(SIGTERM, handle_signal); | |
757 | ||
758 | ioctl(nbd, NBD_DO_IT); | |
759 | ||
760 | unregister_async_signal_handler(SIGHUP, sighup_handler); | |
761 | unregister_async_signal_handler(SIGINT, handle_signal); | |
762 | unregister_async_signal_handler(SIGTERM, handle_signal); | |
763 | shutdown_async_signal_handler(); | |
764 | ||
765 | server.stop(); | |
766 | } | |
767 | ||
768 | r = image.update_unwatch(handle); | |
769 | assert(r == 0); | |
770 | } | |
771 | ||
772 | close_nbd: | |
773 | if (r < 0) { | |
774 | ioctl(nbd, NBD_CLEAR_SOCK); | |
775 | cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r) << std::endl; | |
776 | } | |
777 | close(nbd); | |
778 | close_fd: | |
779 | close(fd[0]); | |
780 | close(fd[1]); | |
781 | close_ret: | |
782 | image.close(); | |
783 | io_ctx.close(); | |
784 | rados.shutdown(); | |
785 | ||
786 | forker.exit(r < 0 ? EXIT_FAILURE : 0); | |
787 | // Unreachable; | |
788 | return r; | |
789 | } | |
790 | ||
31f18b77 | 791 | static int do_unmap(const std::string &devpath) |
7c673cae | 792 | { |
31f18b77 FG |
793 | int r = 0; |
794 | ||
7c673cae FG |
795 | int nbd = open_device(devpath.c_str()); |
796 | if (nbd < 0) { | |
797 | cerr << "rbd-nbd: failed to open device: " << devpath << std::endl; | |
798 | return nbd; | |
799 | } | |
800 | ||
31f18b77 FG |
801 | r = ioctl(nbd, NBD_DISCONNECT); |
802 | if (r < 0) { | |
803 | cerr << "rbd-nbd: the device is not used" << std::endl; | |
7c673cae FG |
804 | } |
805 | ||
806 | close(nbd); | |
807 | ||
31f18b77 | 808 | return r; |
7c673cae FG |
809 | } |
810 | ||
31f18b77 | 811 | static int parse_imgpath(const std::string &imgpath, Config *cfg) |
7c673cae FG |
812 | { |
813 | boost::regex pattern("^(?:([^/@]+)/)?([^/@]+)(?:@([^/@]+))?$"); | |
814 | boost::smatch match; | |
815 | if (!boost::regex_match(imgpath, match, pattern)) { | |
816 | std::cerr << "rbd-nbd: invalid spec '" << imgpath << "'" << std::endl; | |
817 | return -EINVAL; | |
818 | } | |
819 | ||
31f18b77 FG |
820 | if (match[1].matched) { |
821 | cfg->poolname = match[1]; | |
822 | } | |
7c673cae | 823 | |
31f18b77 | 824 | cfg->imgname = match[2]; |
7c673cae FG |
825 | |
826 | if (match[3].matched) | |
31f18b77 | 827 | cfg->snapname = match[3]; |
7c673cae FG |
828 | |
829 | return 0; | |
830 | } | |
831 | ||
31f18b77 | 832 | static int get_mapped_info(int pid, Config *cfg) |
7c673cae | 833 | { |
31f18b77 FG |
834 | int r; |
835 | std::string path = "/proc/" + stringify(pid) + "/cmdline"; | |
836 | std::ifstream ifs; | |
837 | std::string cmdline; | |
838 | std::vector<const char*> args; | |
7c673cae | 839 | |
31f18b77 FG |
840 | ifs.open(path.c_str(), std::ifstream::in); |
841 | assert (ifs.is_open()); | |
842 | ifs >> cmdline; | |
843 | ||
844 | for (unsigned i = 0; i < cmdline.size(); i++) { | |
845 | const char *arg = &cmdline[i]; | |
846 | if (i == 0) { | |
847 | if (strcmp(basename(arg) , "rbd-nbd") != 0) { | |
848 | return -EINVAL; | |
849 | } | |
850 | } else { | |
851 | args.push_back(arg); | |
852 | } | |
853 | ||
854 | while (cmdline[i] != '\0') { | |
855 | i++; | |
856 | } | |
7c673cae | 857 | } |
31f18b77 FG |
858 | |
859 | std::ostringstream err_msg; | |
860 | r = parse_args(args, &err_msg, cfg); | |
861 | return r; | |
862 | } | |
7c673cae | 863 | |
31f18b77 FG |
864 | static int get_map_pid(const std::string& pid_path) |
865 | { | |
866 | int pid = 0; | |
867 | std::ifstream ifs; | |
868 | ifs.open(pid_path.c_str(), std::ifstream::in); | |
869 | if (!ifs.is_open()) { | |
870 | return 0; | |
7c673cae | 871 | } |
31f18b77 FG |
872 | ifs >> pid; |
873 | return pid; | |
7c673cae FG |
874 | } |
875 | ||
31f18b77 | 876 | static int do_list_mapped_devices() |
7c673cae FG |
877 | { |
878 | int r; | |
31f18b77 FG |
879 | bool should_print = false; |
880 | int index = 0; | |
881 | int pid = 0; | |
7c673cae | 882 | |
31f18b77 | 883 | std::string default_pool_name; |
7c673cae | 884 | |
31f18b77 | 885 | TextTable tbl; |
7c673cae | 886 | |
31f18b77 FG |
887 | tbl.define_column("pid", TextTable::LEFT, TextTable::LEFT); |
888 | tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT); | |
889 | tbl.define_column("image", TextTable::LEFT, TextTable::LEFT); | |
890 | tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT); | |
891 | tbl.define_column("device", TextTable::LEFT, TextTable::LEFT); | |
892 | ||
893 | while (true) { | |
894 | std::string nbd_path = "/sys/block/nbd" + stringify(index); | |
895 | if(access(nbd_path.c_str(), F_OK) != 0) { | |
896 | break; | |
897 | } | |
898 | std::string pid_path = nbd_path + "/pid"; | |
899 | pid = get_map_pid(pid_path); | |
900 | ||
901 | if(pid > 0) { | |
902 | Config cfg; | |
903 | r = get_mapped_info(pid, &cfg); | |
904 | if (r < 0) { | |
905 | index++; | |
906 | continue; | |
907 | } | |
908 | should_print = true; | |
909 | if (cfg.snapname.empty()) { | |
910 | cfg.snapname = "-"; | |
911 | } | |
912 | tbl << pid << cfg.poolname << cfg.imgname << cfg.snapname | |
913 | << "/dev/nbd" + stringify(index) << TextTable::endrow; | |
914 | } | |
915 | ||
916 | index++; | |
917 | } | |
918 | ||
919 | if (should_print) { | |
920 | cout << tbl; | |
921 | } | |
922 | return 0; | |
923 | } | |
924 | ||
925 | static int parse_args(vector<const char*>& args, std::ostream *err_msg, Config *cfg) | |
926 | { | |
181888fb FG |
927 | std::string conf_file_list; |
928 | std::string cluster; | |
929 | CephInitParameters iparams = ceph_argparse_early_args( | |
930 | args, CEPH_ENTITY_TYPE_CLIENT, &cluster, &conf_file_list); | |
7c673cae | 931 | |
31f18b77 | 932 | md_config_t config; |
181888fb FG |
933 | config.name = iparams.name; |
934 | config.cluster = cluster; | |
935 | ||
936 | if (!conf_file_list.empty()) { | |
937 | config.parse_config_files(conf_file_list.c_str(), nullptr, 0); | |
938 | } else { | |
939 | config.parse_config_files(nullptr, nullptr, 0); | |
940 | } | |
31f18b77 FG |
941 | config.parse_env(); |
942 | config.parse_argv(args); | |
181888fb FG |
943 | cfg->poolname = config.get_val<std::string>("rbd_default_pool"); |
944 | ||
945 | std::vector<const char*>::iterator i; | |
946 | std::ostringstream err; | |
31f18b77 | 947 | |
7c673cae FG |
948 | for (i = args.begin(); i != args.end(); ) { |
949 | if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { | |
224ce89b WB |
950 | return HELP_INFO; |
951 | } else if (ceph_argparse_flag(args, i, "-v", "--version", (char*)NULL)) { | |
952 | return VERSION_INFO; | |
31f18b77 FG |
953 | } else if (ceph_argparse_witharg(args, i, &cfg->devpath, "--device", (char *)NULL)) { |
954 | } else if (ceph_argparse_witharg(args, i, &cfg->nbds_max, err, "--nbds_max", (char *)NULL)) { | |
7c673cae | 955 | if (!err.str().empty()) { |
31f18b77 FG |
956 | *err_msg << "rbd-nbd: " << err.str(); |
957 | return -EINVAL; | |
7c673cae | 958 | } |
31f18b77 FG |
959 | if (cfg->nbds_max < 0) { |
960 | *err_msg << "rbd-nbd: Invalid argument for nbds_max!"; | |
961 | return -EINVAL; | |
7c673cae | 962 | } |
31f18b77 | 963 | } else if (ceph_argparse_witharg(args, i, &cfg->max_part, err, "--max_part", (char *)NULL)) { |
7c673cae | 964 | if (!err.str().empty()) { |
31f18b77 FG |
965 | *err_msg << "rbd-nbd: " << err.str(); |
966 | return -EINVAL; | |
7c673cae | 967 | } |
31f18b77 FG |
968 | if ((cfg->max_part < 0) || (cfg->max_part > 255)) { |
969 | *err_msg << "rbd-nbd: Invalid argument for max_part(0~255)!"; | |
970 | return -EINVAL; | |
7c673cae | 971 | } |
31f18b77 | 972 | cfg->set_max_part = true; |
7c673cae | 973 | } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) { |
31f18b77 | 974 | cfg->readonly = true; |
7c673cae | 975 | } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) { |
31f18b77 | 976 | cfg->exclusive = true; |
7c673cae FG |
977 | } else { |
978 | ++i; | |
979 | } | |
980 | } | |
981 | ||
982 | if (args.begin() != args.end()) { | |
983 | if (strcmp(*args.begin(), "map") == 0) { | |
984 | cmd = Connect; | |
985 | } else if (strcmp(*args.begin(), "unmap") == 0) { | |
986 | cmd = Disconnect; | |
987 | } else if (strcmp(*args.begin(), "list-mapped") == 0) { | |
988 | cmd = List; | |
989 | } else { | |
31f18b77 FG |
990 | *err_msg << "rbd-nbd: unknown command: " << *args.begin(); |
991 | return -EINVAL; | |
7c673cae FG |
992 | } |
993 | args.erase(args.begin()); | |
994 | } | |
995 | ||
996 | if (cmd == None) { | |
31f18b77 FG |
997 | *err_msg << "rbd-nbd: must specify command"; |
998 | return -EINVAL; | |
7c673cae FG |
999 | } |
1000 | ||
1001 | switch (cmd) { | |
1002 | case Connect: | |
1003 | if (args.begin() == args.end()) { | |
31f18b77 FG |
1004 | *err_msg << "rbd-nbd: must specify image-or-snap-spec"; |
1005 | return -EINVAL; | |
7c673cae | 1006 | } |
31f18b77 FG |
1007 | if (parse_imgpath(string(*args.begin()), cfg) < 0) |
1008 | return -EINVAL; | |
7c673cae FG |
1009 | args.erase(args.begin()); |
1010 | break; | |
1011 | case Disconnect: | |
1012 | if (args.begin() == args.end()) { | |
31f18b77 FG |
1013 | *err_msg << "rbd-nbd: must specify nbd device path"; |
1014 | return -EINVAL; | |
7c673cae | 1015 | } |
31f18b77 | 1016 | cfg->devpath = *args.begin(); |
7c673cae FG |
1017 | args.erase(args.begin()); |
1018 | break; | |
1019 | default: | |
1020 | //shut up gcc; | |
1021 | break; | |
1022 | } | |
1023 | ||
1024 | if (args.begin() != args.end()) { | |
31f18b77 FG |
1025 | *err_msg << "rbd-nbd: unknown args: " << *args.begin(); |
1026 | return -EINVAL; | |
1027 | } | |
1028 | ||
1029 | return 0; | |
1030 | } | |
1031 | ||
1032 | static int rbd_nbd(int argc, const char *argv[]) | |
1033 | { | |
1034 | int r; | |
1035 | Config cfg; | |
1036 | vector<const char*> args; | |
1037 | argv_to_vec(argc, argv, args); | |
1038 | ||
1039 | std::ostringstream err_msg; | |
1040 | r = parse_args(args, &err_msg, &cfg); | |
224ce89b | 1041 | if (r == HELP_INFO) { |
31f18b77 | 1042 | usage(); |
d2e6a577 | 1043 | assert(false); |
224ce89b WB |
1044 | } else if (r == VERSION_INFO) { |
1045 | std::cout << pretty_version_to_str() << std::endl; | |
1046 | return 0; | |
1047 | } | |
1048 | else if (r < 0) { | |
31f18b77 FG |
1049 | cerr << err_msg.str() << std::endl; |
1050 | return r; | |
7c673cae FG |
1051 | } |
1052 | ||
1053 | switch (cmd) { | |
1054 | case Connect: | |
31f18b77 | 1055 | if (cfg.imgname.empty()) { |
7c673cae | 1056 | cerr << "rbd-nbd: image name was not specified" << std::endl; |
31f18b77 | 1057 | return -EINVAL; |
7c673cae FG |
1058 | } |
1059 | ||
31f18b77 | 1060 | r = do_map(argc, argv, &cfg); |
7c673cae | 1061 | if (r < 0) |
31f18b77 | 1062 | return -EINVAL; |
7c673cae FG |
1063 | break; |
1064 | case Disconnect: | |
31f18b77 | 1065 | r = do_unmap(cfg.devpath); |
7c673cae | 1066 | if (r < 0) |
31f18b77 | 1067 | return -EINVAL; |
7c673cae FG |
1068 | break; |
1069 | case List: | |
1070 | r = do_list_mapped_devices(); | |
1071 | if (r < 0) | |
31f18b77 | 1072 | return -EINVAL; |
7c673cae FG |
1073 | break; |
1074 | default: | |
1075 | usage(); | |
d2e6a577 FG |
1076 | assert(false); |
1077 | break; | |
7c673cae FG |
1078 | } |
1079 | ||
1080 | return 0; | |
1081 | } | |
1082 | ||
1083 | int main(int argc, const char *argv[]) | |
1084 | { | |
31f18b77 FG |
1085 | int r = rbd_nbd(argc, argv); |
1086 | if (r < 0) { | |
1087 | return EXIT_FAILURE; | |
1088 | } | |
1089 | return 0; | |
7c673cae | 1090 | } |