]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | /* | |
5 | * rbd-nbd - RBD in userspace | |
6 | * | |
7 | * Copyright (C) 2015 - 2016 Kylin Corporation | |
8 | * | |
9 | * Author: Yunchuan Wen <yunchuan.wen@kylin-cloud.com> | |
10 | * Li Wang <li.wang@kylin-cloud.com> | |
11 | * | |
12 | * This is free software; you can redistribute it and/or | |
13 | * modify it under the terms of the GNU Lesser General Public | |
14 | * License version 2.1, as published by the Free Software | |
15 | * Foundation. See file COPYING. | |
16 | * | |
17 | */ | |
18 | ||
19 | #include "include/int_types.h" | |
20 | ||
21 | #include <stdio.h> | |
22 | #include <stdlib.h> | |
23 | #include <stddef.h> | |
24 | #include <errno.h> | |
25 | #include <fcntl.h> | |
26 | #include <string.h> | |
27 | #include <sys/types.h> | |
28 | #include <unistd.h> | |
29 | ||
30 | #include <linux/nbd.h> | |
31 | #include <linux/fs.h> | |
32 | #include <sys/ioctl.h> | |
33 | #include <sys/socket.h> | |
34 | ||
35 | #include <iostream> | |
36 | #include <fstream> | |
37 | #include <boost/regex.hpp> | |
38 | ||
39 | #include "mon/MonClient.h" | |
40 | #include "common/config.h" | |
41 | #include "common/dout.h" | |
42 | ||
43 | #include "common/errno.h" | |
44 | #include "common/module.h" | |
45 | #include "common/safe_io.h" | |
46 | #include "common/ceph_argparse.h" | |
47 | #include "common/Preforker.h" | |
48 | #include "global/global_init.h" | |
49 | #include "global/signal_handler.h" | |
50 | ||
51 | #include "include/rados/librados.hpp" | |
52 | #include "include/rbd/librbd.hpp" | |
53 | #include "include/stringify.h" | |
54 | #include "include/xlist.h" | |
55 | ||
56 | #define dout_context g_ceph_context | |
57 | #define dout_subsys ceph_subsys_rbd | |
58 | #undef dout_prefix | |
59 | #define dout_prefix *_dout << "rbd-nbd: " | |
60 | ||
61 | static void usage() | |
62 | { | |
63 | std::cout << "Usage: rbd-nbd [options] map <image-or-snap-spec> Map an image to nbd device\n" | |
64 | << " unmap <device path> Unmap nbd device\n" | |
65 | << " list-mapped List mapped nbd devices\n" | |
66 | << "Options:\n" | |
67 | << " --device <device path> Specify nbd device path\n" | |
68 | << " --read-only Map read-only\n" | |
69 | << " --nbds_max <limit> Override for module param nbds_max\n" | |
70 | << " --max_part <limit> Override for module param max_part\n" | |
71 | << " --exclusive Forbid writes by other clients\n" | |
72 | << std::endl; | |
73 | generic_server_usage(); | |
74 | } | |
75 | ||
76 | static std::string devpath, poolname("rbd"), imgname, snapname; | |
77 | static bool readonly = false; | |
78 | static int nbds_max = 0; | |
79 | static int max_part = 255; | |
80 | static bool set_max_part = false; | |
81 | static bool exclusive = false; | |
82 | static int nbd = -1; | |
83 | ||
84 | #define RBD_NBD_BLKSIZE 512UL | |
85 | ||
86 | #ifdef CEPH_BIG_ENDIAN | |
87 | #define ntohll(a) (a) | |
88 | #elif defined(CEPH_LITTLE_ENDIAN) | |
89 | #define ntohll(a) swab(a) | |
90 | #else | |
91 | #error "Could not determine endianess" | |
92 | #endif | |
93 | #define htonll(a) ntohll(a) | |
94 | ||
95 | static void handle_signal(int signum) | |
96 | { | |
97 | assert(signum == SIGINT || signum == SIGTERM); | |
98 | derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; | |
99 | dout(20) << __func__ << ": " << "sending NBD_DISCONNECT" << dendl; | |
100 | if (ioctl(nbd, NBD_DISCONNECT) < 0) { | |
101 | derr << "rbd-nbd: disconnect failed: " << cpp_strerror(errno) << dendl; | |
102 | } else { | |
103 | dout(20) << __func__ << ": " << "disconnected" << dendl; | |
104 | } | |
105 | } | |
106 | ||
107 | class NBDServer | |
108 | { | |
109 | private: | |
110 | int fd; | |
111 | librbd::Image ℑ | |
112 | ||
113 | public: | |
114 | NBDServer(int _fd, librbd::Image& _image) | |
115 | : fd(_fd) | |
116 | , image(_image) | |
117 | , lock("NBDServer::Locker") | |
118 | , reader_thread(*this, &NBDServer::reader_entry) | |
119 | , writer_thread(*this, &NBDServer::writer_entry) | |
120 | , started(false) | |
121 | {} | |
122 | ||
123 | private: | |
124 | std::atomic<bool> terminated = { false }; | |
125 | ||
126 | void shutdown() | |
127 | { | |
128 | bool expected = false; | |
129 | if (terminated.compare_exchange_strong(expected, true)) { | |
130 | ::shutdown(fd, SHUT_RDWR); | |
131 | ||
132 | Mutex::Locker l(lock); | |
133 | cond.Signal(); | |
134 | } | |
135 | } | |
136 | ||
137 | struct IOContext | |
138 | { | |
139 | xlist<IOContext*>::item item; | |
140 | NBDServer *server; | |
141 | struct nbd_request request; | |
142 | struct nbd_reply reply; | |
143 | bufferlist data; | |
144 | int command; | |
145 | ||
146 | IOContext() | |
147 | : item(this) | |
148 | {} | |
149 | }; | |
150 | ||
151 | friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx); | |
152 | ||
153 | Mutex lock; | |
154 | Cond cond; | |
155 | xlist<IOContext*> io_pending; | |
156 | xlist<IOContext*> io_finished; | |
157 | ||
158 | void io_start(IOContext *ctx) | |
159 | { | |
160 | Mutex::Locker l(lock); | |
161 | io_pending.push_back(&ctx->item); | |
162 | } | |
163 | ||
164 | void io_finish(IOContext *ctx) | |
165 | { | |
166 | Mutex::Locker l(lock); | |
167 | assert(ctx->item.is_on_list()); | |
168 | ctx->item.remove_myself(); | |
169 | io_finished.push_back(&ctx->item); | |
170 | cond.Signal(); | |
171 | } | |
172 | ||
173 | IOContext *wait_io_finish() | |
174 | { | |
175 | Mutex::Locker l(lock); | |
176 | while(io_finished.empty() && !terminated) | |
177 | cond.Wait(lock); | |
178 | ||
179 | if (io_finished.empty()) | |
180 | return NULL; | |
181 | ||
182 | IOContext *ret = io_finished.front(); | |
183 | io_finished.pop_front(); | |
184 | ||
185 | return ret; | |
186 | } | |
187 | ||
188 | void wait_clean() | |
189 | { | |
190 | assert(!reader_thread.is_started()); | |
191 | Mutex::Locker l(lock); | |
192 | while(!io_pending.empty()) | |
193 | cond.Wait(lock); | |
194 | ||
195 | while(!io_finished.empty()) { | |
196 | ceph::unique_ptr<IOContext> free_ctx(io_finished.front()); | |
197 | io_finished.pop_front(); | |
198 | } | |
199 | } | |
200 | ||
201 | static void aio_callback(librbd::completion_t cb, void *arg) | |
202 | { | |
203 | librbd::RBD::AioCompletion *aio_completion = | |
204 | reinterpret_cast<librbd::RBD::AioCompletion*>(cb); | |
205 | ||
206 | IOContext *ctx = reinterpret_cast<IOContext *>(arg); | |
207 | int ret = aio_completion->get_return_value(); | |
208 | ||
209 | dout(20) << __func__ << ": " << *ctx << dendl; | |
210 | ||
211 | if (ret == -EINVAL) { | |
212 | // if shrinking an image, a pagecache writeback might reference | |
213 | // extents outside of the range of the new image extents | |
214 | dout(5) << __func__ << ": masking IO out-of-bounds error" << dendl; | |
215 | ctx->data.clear(); | |
216 | ret = 0; | |
217 | } | |
218 | ||
219 | if (ret < 0) { | |
220 | ctx->reply.error = htonl(-ret); | |
221 | } else if ((ctx->command == NBD_CMD_READ) && | |
222 | ret < static_cast<int>(ctx->request.len)) { | |
223 | int pad_byte_count = static_cast<int> (ctx->request.len) - ret; | |
224 | ctx->data.append_zero(pad_byte_count); | |
225 | dout(20) << __func__ << ": " << *ctx << ": Pad byte count: " | |
226 | << pad_byte_count << dendl; | |
227 | ctx->reply.error = 0; | |
228 | } else { | |
229 | ctx->reply.error = htonl(0); | |
230 | } | |
231 | ctx->server->io_finish(ctx); | |
232 | ||
233 | aio_completion->release(); | |
234 | } | |
235 | ||
236 | void reader_entry() | |
237 | { | |
238 | while (!terminated) { | |
239 | ceph::unique_ptr<IOContext> ctx(new IOContext()); | |
240 | ctx->server = this; | |
241 | ||
242 | dout(20) << __func__ << ": waiting for nbd request" << dendl; | |
243 | ||
244 | int r = safe_read_exact(fd, &ctx->request, sizeof(struct nbd_request)); | |
245 | if (r < 0) { | |
246 | derr << "failed to read nbd request header: " << cpp_strerror(r) | |
247 | << dendl; | |
248 | return; | |
249 | } | |
250 | ||
251 | if (ctx->request.magic != htonl(NBD_REQUEST_MAGIC)) { | |
252 | derr << "invalid nbd request header" << dendl; | |
253 | return; | |
254 | } | |
255 | ||
256 | ctx->request.from = ntohll(ctx->request.from); | |
257 | ctx->request.type = ntohl(ctx->request.type); | |
258 | ctx->request.len = ntohl(ctx->request.len); | |
259 | ||
260 | ctx->reply.magic = htonl(NBD_REPLY_MAGIC); | |
261 | memcpy(ctx->reply.handle, ctx->request.handle, sizeof(ctx->reply.handle)); | |
262 | ||
263 | ctx->command = ctx->request.type & 0x0000ffff; | |
264 | ||
265 | dout(20) << *ctx << ": start" << dendl; | |
266 | ||
267 | switch (ctx->command) | |
268 | { | |
269 | case NBD_CMD_DISC: | |
270 | // NBD_DO_IT will return when pipe is closed | |
271 | dout(0) << "disconnect request received" << dendl; | |
272 | return; | |
273 | case NBD_CMD_WRITE: | |
274 | bufferptr ptr(ctx->request.len); | |
275 | r = safe_read_exact(fd, ptr.c_str(), ctx->request.len); | |
276 | if (r < 0) { | |
277 | derr << *ctx << ": failed to read nbd request data: " | |
278 | << cpp_strerror(r) << dendl; | |
279 | return; | |
280 | } | |
281 | ctx->data.push_back(ptr); | |
282 | break; | |
283 | } | |
284 | ||
285 | IOContext *pctx = ctx.release(); | |
286 | io_start(pctx); | |
287 | librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(pctx, aio_callback); | |
288 | switch (pctx->command) | |
289 | { | |
290 | case NBD_CMD_WRITE: | |
291 | image.aio_write(pctx->request.from, pctx->request.len, pctx->data, c); | |
292 | break; | |
293 | case NBD_CMD_READ: | |
294 | image.aio_read(pctx->request.from, pctx->request.len, pctx->data, c); | |
295 | break; | |
296 | case NBD_CMD_FLUSH: | |
297 | image.aio_flush(c); | |
298 | break; | |
299 | case NBD_CMD_TRIM: | |
300 | image.aio_discard(pctx->request.from, pctx->request.len, c); | |
301 | break; | |
302 | default: | |
303 | derr << *pctx << ": invalid request command" << dendl; | |
304 | c->release(); | |
305 | return; | |
306 | } | |
307 | } | |
308 | dout(20) << __func__ << ": terminated" << dendl; | |
309 | } | |
310 | ||
311 | void writer_entry() | |
312 | { | |
313 | while (!terminated) { | |
314 | dout(20) << __func__ << ": waiting for io request" << dendl; | |
315 | ceph::unique_ptr<IOContext> ctx(wait_io_finish()); | |
316 | if (!ctx) { | |
317 | dout(20) << __func__ << ": no io requests, terminating" << dendl; | |
318 | return; | |
319 | } | |
320 | ||
321 | dout(20) << __func__ << ": got: " << *ctx << dendl; | |
322 | ||
323 | int r = safe_write(fd, &ctx->reply, sizeof(struct nbd_reply)); | |
324 | if (r < 0) { | |
325 | derr << *ctx << ": failed to write reply header: " << cpp_strerror(r) | |
326 | << dendl; | |
327 | return; | |
328 | } | |
329 | if (ctx->command == NBD_CMD_READ && ctx->reply.error == htonl(0)) { | |
330 | r = ctx->data.write_fd(fd); | |
331 | if (r < 0) { | |
332 | derr << *ctx << ": failed to write replay data: " << cpp_strerror(r) | |
333 | << dendl; | |
334 | return; | |
335 | } | |
336 | } | |
337 | dout(20) << *ctx << ": finish" << dendl; | |
338 | } | |
339 | dout(20) << __func__ << ": terminated" << dendl; | |
340 | } | |
341 | ||
342 | class ThreadHelper : public Thread | |
343 | { | |
344 | public: | |
345 | typedef void (NBDServer::*entry_func)(); | |
346 | private: | |
347 | NBDServer &server; | |
348 | entry_func func; | |
349 | public: | |
350 | ThreadHelper(NBDServer &_server, entry_func _func) | |
351 | :server(_server) | |
352 | ,func(_func) | |
353 | {} | |
354 | protected: | |
355 | void* entry() override | |
356 | { | |
357 | (server.*func)(); | |
358 | server.shutdown(); | |
359 | return NULL; | |
360 | } | |
361 | } reader_thread, writer_thread; | |
362 | ||
363 | bool started; | |
364 | public: | |
365 | void start() | |
366 | { | |
367 | if (!started) { | |
368 | dout(10) << __func__ << ": starting" << dendl; | |
369 | ||
370 | started = true; | |
371 | ||
372 | reader_thread.create("rbd_reader"); | |
373 | writer_thread.create("rbd_writer"); | |
374 | } | |
375 | } | |
376 | ||
377 | void stop() | |
378 | { | |
379 | if (started) { | |
380 | dout(10) << __func__ << ": terminating" << dendl; | |
381 | ||
382 | shutdown(); | |
383 | ||
384 | reader_thread.join(); | |
385 | writer_thread.join(); | |
386 | ||
387 | wait_clean(); | |
388 | ||
389 | started = false; | |
390 | } | |
391 | } | |
392 | ||
393 | ~NBDServer() | |
394 | { | |
395 | stop(); | |
396 | } | |
397 | }; | |
398 | ||
399 | std::ostream &operator<<(std::ostream &os, const NBDServer::IOContext &ctx) { | |
400 | ||
401 | os << "[" << std::hex << ntohll(*((uint64_t *)ctx.request.handle)); | |
402 | ||
403 | switch (ctx.command) | |
404 | { | |
405 | case NBD_CMD_WRITE: | |
406 | os << " WRITE "; | |
407 | break; | |
408 | case NBD_CMD_READ: | |
409 | os << " READ "; | |
410 | break; | |
411 | case NBD_CMD_FLUSH: | |
412 | os << " FLUSH "; | |
413 | break; | |
414 | case NBD_CMD_TRIM: | |
415 | os << " TRIM "; | |
416 | break; | |
417 | default: | |
418 | os << " UNKNOW(" << ctx.command << ") "; | |
419 | break; | |
420 | } | |
421 | ||
422 | os << ctx.request.from << "~" << ctx.request.len << " " | |
423 | << ntohl(ctx.reply.error) << "]"; | |
424 | ||
425 | return os; | |
426 | } | |
427 | ||
428 | class NBDWatchCtx : public librbd::UpdateWatchCtx | |
429 | { | |
430 | private: | |
431 | int fd; | |
432 | librados::IoCtx &io_ctx; | |
433 | librbd::Image ℑ | |
434 | unsigned long size; | |
435 | public: | |
436 | NBDWatchCtx(int _fd, | |
437 | librados::IoCtx &_io_ctx, | |
438 | librbd::Image &_image, | |
439 | unsigned long _size) | |
440 | : fd(_fd) | |
441 | , io_ctx(_io_ctx) | |
442 | , image(_image) | |
443 | , size(_size) | |
444 | { } | |
445 | ||
446 | ~NBDWatchCtx() override {} | |
447 | ||
448 | void handle_notify() override | |
449 | { | |
450 | librbd::image_info_t info; | |
451 | if (image.stat(info, sizeof(info)) == 0) { | |
452 | unsigned long new_size = info.size; | |
453 | ||
454 | if (new_size != size) { | |
455 | if (ioctl(fd, BLKFLSBUF, NULL) < 0) | |
456 | derr << "invalidate page cache failed: " << cpp_strerror(errno) << dendl; | |
457 | if (ioctl(fd, NBD_SET_SIZE, new_size) < 0) { | |
458 | derr << "resize failed: " << cpp_strerror(errno) << dendl; | |
459 | } else { | |
460 | size = new_size; | |
461 | } | |
462 | if (image.invalidate_cache() < 0) | |
463 | derr << "invalidate rbd cache failed" << dendl; | |
464 | } | |
465 | } | |
466 | } | |
467 | }; | |
468 | ||
469 | static int open_device(const char* path, bool try_load_module = false) | |
470 | { | |
471 | int nbd = open(path, O_RDWR); | |
472 | bool loaded_module = false; | |
473 | ||
474 | if (nbd < 0 && try_load_module && access("/sys/module/nbd", F_OK) != 0) { | |
475 | ostringstream param; | |
476 | int r; | |
477 | if (nbds_max) { | |
478 | param << "nbds_max=" << nbds_max; | |
479 | } | |
480 | if (max_part) { | |
481 | param << " max_part=" << max_part; | |
482 | } | |
483 | r = module_load("nbd", param.str().c_str()); | |
484 | if (r < 0) { | |
485 | cerr << "rbd-nbd: failed to load nbd kernel module: " << cpp_strerror(-r) << std::endl; | |
486 | return r; | |
487 | } else { | |
488 | loaded_module = true; | |
489 | } | |
490 | nbd = open(path, O_RDWR); | |
491 | } | |
492 | ||
493 | if ((nbds_max || set_max_part) && | |
494 | try_load_module && !loaded_module) { | |
495 | cerr << "rbd-nbd: ignoring kernel module parameter options: nbd module already loaded" | |
496 | << std::endl; | |
497 | } | |
498 | ||
499 | return nbd; | |
500 | } | |
501 | ||
502 | static int check_device_size(int nbd_index, unsigned long expected_size) | |
503 | { | |
504 | // There are bugs with some older kernel versions that result in an | |
505 | // overflow for large image sizes. This check is to ensure we are | |
506 | // not affected. | |
507 | ||
508 | unsigned long size = 0; | |
509 | std::string path = "/sys/block/nbd" + stringify(nbd_index) + "/size"; | |
510 | std::ifstream ifs; | |
511 | ifs.open(path.c_str(), std::ifstream::in); | |
512 | if (!ifs.is_open()) { | |
513 | cerr << "rbd-nbd: failed to open " << path << std::endl; | |
514 | return -EINVAL; | |
515 | } | |
516 | ifs >> size; | |
517 | size *= RBD_NBD_BLKSIZE; | |
518 | ||
519 | if (size == 0) { | |
520 | // Newer kernel versions will report real size only after nbd | |
521 | // connect. Assume this is the case and return success. | |
522 | return 0; | |
523 | } | |
524 | ||
525 | if (size != expected_size) { | |
526 | cerr << "rbd-nbd: kernel reported invalid device size (" << size | |
527 | << ", expected " << expected_size << ")" << std::endl; | |
528 | return -EINVAL; | |
529 | } | |
530 | ||
531 | return 0; | |
532 | } | |
533 | ||
534 | static int do_map(int argc, const char *argv[]) | |
535 | { | |
536 | int r; | |
537 | ||
538 | librados::Rados rados; | |
539 | librbd::RBD rbd; | |
540 | librados::IoCtx io_ctx; | |
541 | librbd::Image image; | |
542 | ||
543 | int read_only = 0; | |
544 | unsigned long flags; | |
545 | unsigned long size; | |
546 | ||
547 | int index = 0; | |
548 | int fd[2]; | |
549 | ||
550 | librbd::image_info_t info; | |
551 | ||
552 | Preforker forker; | |
553 | ||
554 | vector<const char*> args; | |
555 | argv_to_vec(argc, argv, args); | |
556 | env_to_vec(args); | |
557 | ||
558 | auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, | |
559 | CODE_ENVIRONMENT_DAEMON, | |
560 | CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); | |
561 | g_ceph_context->_conf->set_val_or_die("pid_file", ""); | |
562 | ||
563 | if (global_init_prefork(g_ceph_context) >= 0) { | |
564 | std::string err; | |
565 | r = forker.prefork(err); | |
566 | if (r < 0) { | |
567 | cerr << err << std::endl; | |
568 | return r; | |
569 | } | |
570 | ||
571 | if (forker.is_parent()) { | |
572 | global_init_postfork_start(g_ceph_context); | |
573 | if (forker.parent_wait(err) != 0) { | |
574 | return -ENXIO; | |
575 | } | |
576 | return 0; | |
577 | } | |
578 | } | |
579 | ||
580 | common_init_finish(g_ceph_context); | |
581 | global_init_chdir(g_ceph_context); | |
582 | ||
583 | if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) { | |
584 | r = -errno; | |
585 | goto close_ret; | |
586 | } | |
587 | ||
588 | if (devpath.empty()) { | |
589 | char dev[64]; | |
590 | bool try_load_module = true; | |
591 | while (true) { | |
592 | snprintf(dev, sizeof(dev), "/dev/nbd%d", index); | |
593 | ||
594 | nbd = open_device(dev, try_load_module); | |
595 | try_load_module = false; | |
596 | if (nbd < 0) { | |
597 | r = nbd; | |
598 | cerr << "rbd-nbd: failed to find unused device" << std::endl; | |
599 | goto close_fd; | |
600 | } | |
601 | ||
602 | r = ioctl(nbd, NBD_SET_SOCK, fd[0]); | |
603 | if (r < 0) { | |
604 | close(nbd); | |
605 | ++index; | |
606 | continue; | |
607 | } | |
608 | ||
609 | devpath = dev; | |
610 | break; | |
611 | } | |
612 | } else { | |
613 | r = sscanf(devpath.c_str(), "/dev/nbd%d", &index); | |
614 | if (r < 0) { | |
615 | cerr << "rbd-nbd: invalid device path: " << devpath | |
616 | << " (expected /dev/nbd{num})" << std::endl; | |
617 | goto close_fd; | |
618 | } | |
619 | nbd = open_device(devpath.c_str(), true); | |
620 | if (nbd < 0) { | |
621 | r = nbd; | |
622 | cerr << "rbd-nbd: failed to open device: " << devpath << std::endl; | |
623 | goto close_fd; | |
624 | } | |
625 | ||
626 | r = ioctl(nbd, NBD_SET_SOCK, fd[0]); | |
627 | if (r < 0) { | |
628 | r = -errno; | |
629 | cerr << "rbd-nbd: the device " << devpath << " is busy" << std::endl; | |
630 | close(nbd); | |
631 | goto close_fd; | |
632 | } | |
633 | } | |
634 | ||
635 | flags = NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_TRIM | NBD_FLAG_HAS_FLAGS; | |
636 | if (!snapname.empty() || readonly) { | |
637 | flags |= NBD_FLAG_READ_ONLY; | |
638 | read_only = 1; | |
639 | } | |
640 | ||
641 | r = rados.init_with_context(g_ceph_context); | |
642 | if (r < 0) | |
643 | goto close_nbd; | |
644 | ||
645 | r = rados.connect(); | |
646 | if (r < 0) | |
647 | goto close_nbd; | |
648 | ||
649 | r = rados.ioctx_create(poolname.c_str(), io_ctx); | |
650 | if (r < 0) | |
651 | goto close_nbd; | |
652 | ||
653 | r = rbd.open(io_ctx, image, imgname.c_str()); | |
654 | if (r < 0) | |
655 | goto close_nbd; | |
656 | ||
657 | if (exclusive) { | |
658 | r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE); | |
659 | if (r < 0) { | |
660 | cerr << "rbd-nbd: failed to acquire exclusive lock: " << cpp_strerror(r) | |
661 | << std::endl; | |
662 | goto close_nbd; | |
663 | } | |
664 | } | |
665 | ||
666 | if (!snapname.empty()) { | |
667 | r = image.snap_set(snapname.c_str()); | |
668 | if (r < 0) | |
669 | goto close_nbd; | |
670 | } | |
671 | ||
672 | r = image.stat(info, sizeof(info)); | |
673 | if (r < 0) | |
674 | goto close_nbd; | |
675 | ||
676 | r = ioctl(nbd, NBD_SET_BLKSIZE, RBD_NBD_BLKSIZE); | |
677 | if (r < 0) { | |
678 | r = -errno; | |
679 | goto close_nbd; | |
680 | } | |
681 | ||
682 | if (info.size > ULONG_MAX) { | |
683 | r = -EFBIG; | |
684 | cerr << "rbd-nbd: image is too large (" << prettybyte_t(info.size) | |
685 | << ", max is " << prettybyte_t(ULONG_MAX) << ")" << std::endl; | |
686 | goto close_nbd; | |
687 | } | |
688 | ||
689 | size = info.size; | |
690 | ||
691 | r = ioctl(nbd, NBD_SET_SIZE, size); | |
692 | if (r < 0) { | |
693 | r = -errno; | |
694 | goto close_nbd; | |
695 | } | |
696 | ||
697 | r = check_device_size(index, size); | |
698 | if (r < 0) { | |
699 | goto close_nbd; | |
700 | } | |
701 | ||
702 | ioctl(nbd, NBD_SET_FLAGS, flags); | |
703 | ||
704 | r = ioctl(nbd, BLKROSET, (unsigned long) &read_only); | |
705 | if (r < 0) { | |
706 | r = -errno; | |
707 | goto close_nbd; | |
708 | } | |
709 | ||
710 | { | |
711 | uint64_t handle; | |
712 | ||
713 | NBDWatchCtx watch_ctx(nbd, io_ctx, image, info.size); | |
714 | r = image.update_watch(&watch_ctx, &handle); | |
715 | if (r < 0) | |
716 | goto close_nbd; | |
717 | ||
718 | cout << devpath << std::endl; | |
719 | ||
720 | if (g_conf->daemonize) { | |
721 | forker.daemonize(); | |
722 | global_init_postfork_start(g_ceph_context); | |
723 | global_init_postfork_finish(g_ceph_context); | |
724 | } | |
725 | ||
726 | { | |
727 | NBDServer server(fd[1], image); | |
728 | ||
729 | server.start(); | |
730 | ||
731 | init_async_signal_handler(); | |
732 | register_async_signal_handler(SIGHUP, sighup_handler); | |
733 | register_async_signal_handler_oneshot(SIGINT, handle_signal); | |
734 | register_async_signal_handler_oneshot(SIGTERM, handle_signal); | |
735 | ||
736 | ioctl(nbd, NBD_DO_IT); | |
737 | ||
738 | unregister_async_signal_handler(SIGHUP, sighup_handler); | |
739 | unregister_async_signal_handler(SIGINT, handle_signal); | |
740 | unregister_async_signal_handler(SIGTERM, handle_signal); | |
741 | shutdown_async_signal_handler(); | |
742 | ||
743 | server.stop(); | |
744 | } | |
745 | ||
746 | r = image.update_unwatch(handle); | |
747 | assert(r == 0); | |
748 | } | |
749 | ||
750 | close_nbd: | |
751 | if (r < 0) { | |
752 | ioctl(nbd, NBD_CLEAR_SOCK); | |
753 | cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r) << std::endl; | |
754 | } | |
755 | close(nbd); | |
756 | close_fd: | |
757 | close(fd[0]); | |
758 | close(fd[1]); | |
759 | close_ret: | |
760 | image.close(); | |
761 | io_ctx.close(); | |
762 | rados.shutdown(); | |
763 | ||
764 | forker.exit(r < 0 ? EXIT_FAILURE : 0); | |
765 | // Unreachable; | |
766 | return r; | |
767 | } | |
768 | ||
769 | static int do_unmap() | |
770 | { | |
771 | int nbd = open_device(devpath.c_str()); | |
772 | if (nbd < 0) { | |
773 | cerr << "rbd-nbd: failed to open device: " << devpath << std::endl; | |
774 | return nbd; | |
775 | } | |
776 | ||
777 | if (ioctl(nbd, NBD_DISCONNECT) < 0) { | |
778 | cerr << "rbd-nbd: the device is not used" << std::endl; | |
779 | } | |
780 | ||
781 | close(nbd); | |
782 | ||
783 | return 0; | |
784 | } | |
785 | ||
786 | static int parse_imgpath(const std::string &imgpath) | |
787 | { | |
788 | boost::regex pattern("^(?:([^/@]+)/)?([^/@]+)(?:@([^/@]+))?$"); | |
789 | boost::smatch match; | |
790 | if (!boost::regex_match(imgpath, match, pattern)) { | |
791 | std::cerr << "rbd-nbd: invalid spec '" << imgpath << "'" << std::endl; | |
792 | return -EINVAL; | |
793 | } | |
794 | ||
795 | if (match[1].matched) | |
796 | poolname = match[1]; | |
797 | ||
798 | imgname = match[2]; | |
799 | ||
800 | if (match[3].matched) | |
801 | snapname = match[3]; | |
802 | ||
803 | return 0; | |
804 | } | |
805 | ||
806 | static int do_list_mapped_devices() | |
807 | { | |
808 | char path[64]; | |
809 | int m = 0; | |
810 | int fd[2]; | |
811 | ||
812 | if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) { | |
813 | int r = -errno; | |
814 | cerr << "rbd-nbd: socketpair failed: " << cpp_strerror(-r) << std::endl; | |
815 | return r; | |
816 | } | |
817 | ||
818 | while (true) { | |
819 | snprintf(path, sizeof(path), "/dev/nbd%d", m); | |
820 | int nbd = open_device(path); | |
821 | if (nbd < 0) | |
822 | break; | |
823 | if (ioctl(nbd, NBD_SET_SOCK, fd[0]) != 0) | |
824 | cout << path << std::endl; | |
825 | else | |
826 | ioctl(nbd, NBD_CLEAR_SOCK); | |
827 | close(nbd); | |
828 | m++; | |
829 | } | |
830 | ||
831 | close(fd[0]); | |
832 | close(fd[1]); | |
833 | ||
834 | return 0; | |
835 | } | |
836 | ||
837 | static int rbd_nbd(int argc, const char *argv[]) | |
838 | { | |
839 | int r; | |
840 | enum { | |
841 | None, | |
842 | Connect, | |
843 | Disconnect, | |
844 | List | |
845 | } cmd = None; | |
846 | ||
847 | vector<const char*> args; | |
848 | ||
849 | argv_to_vec(argc, argv, args); | |
850 | md_config_t().parse_argv(args); | |
851 | ||
852 | std::vector<const char*>::iterator i; | |
853 | std::ostringstream err; | |
854 | ||
855 | for (i = args.begin(); i != args.end(); ) { | |
856 | if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { | |
857 | usage(); | |
858 | return 0; | |
859 | } else if (ceph_argparse_witharg(args, i, &devpath, "--device", (char *)NULL)) { | |
860 | } else if (ceph_argparse_witharg(args, i, &nbds_max, err, "--nbds_max", (char *)NULL)) { | |
861 | if (!err.str().empty()) { | |
862 | cerr << err.str() << std::endl; | |
863 | return EXIT_FAILURE; | |
864 | } | |
865 | if (nbds_max < 0) { | |
866 | cerr << "rbd-nbd: Invalid argument for nbds_max!" << std::endl; | |
867 | return EXIT_FAILURE; | |
868 | } | |
869 | } else if (ceph_argparse_witharg(args, i, &max_part, err, "--max_part", (char *)NULL)) { | |
870 | if (!err.str().empty()) { | |
871 | cerr << err.str() << std::endl; | |
872 | return EXIT_FAILURE; | |
873 | } | |
874 | if ((max_part < 0) || (max_part > 255)) { | |
875 | cerr << "rbd-nbd: Invalid argument for max_part(0~255)!" << std::endl; | |
876 | return EXIT_FAILURE; | |
877 | } | |
878 | set_max_part = true; | |
879 | } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) { | |
880 | readonly = true; | |
881 | } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) { | |
882 | exclusive = true; | |
883 | } else { | |
884 | ++i; | |
885 | } | |
886 | } | |
887 | ||
888 | if (args.begin() != args.end()) { | |
889 | if (strcmp(*args.begin(), "map") == 0) { | |
890 | cmd = Connect; | |
891 | } else if (strcmp(*args.begin(), "unmap") == 0) { | |
892 | cmd = Disconnect; | |
893 | } else if (strcmp(*args.begin(), "list-mapped") == 0) { | |
894 | cmd = List; | |
895 | } else { | |
896 | cerr << "rbd-nbd: unknown command: " << *args.begin() << std::endl; | |
897 | return EXIT_FAILURE; | |
898 | } | |
899 | args.erase(args.begin()); | |
900 | } | |
901 | ||
902 | if (cmd == None) { | |
903 | cerr << "rbd-nbd: must specify command" << std::endl; | |
904 | return EXIT_FAILURE; | |
905 | } | |
906 | ||
907 | switch (cmd) { | |
908 | case Connect: | |
909 | if (args.begin() == args.end()) { | |
910 | cerr << "rbd-nbd: must specify image-or-snap-spec" << std::endl; | |
911 | return EXIT_FAILURE; | |
912 | } | |
913 | if (parse_imgpath(string(*args.begin())) < 0) | |
914 | return EXIT_FAILURE; | |
915 | args.erase(args.begin()); | |
916 | break; | |
917 | case Disconnect: | |
918 | if (args.begin() == args.end()) { | |
919 | cerr << "rbd-nbd: must specify nbd device path" << std::endl; | |
920 | return EXIT_FAILURE; | |
921 | } | |
922 | devpath = *args.begin(); | |
923 | args.erase(args.begin()); | |
924 | break; | |
925 | default: | |
926 | //shut up gcc; | |
927 | break; | |
928 | } | |
929 | ||
930 | if (args.begin() != args.end()) { | |
931 | cerr << "rbd-nbd: unknown args: " << *args.begin() << std::endl; | |
932 | return EXIT_FAILURE; | |
933 | } | |
934 | ||
935 | switch (cmd) { | |
936 | case Connect: | |
937 | if (imgname.empty()) { | |
938 | cerr << "rbd-nbd: image name was not specified" << std::endl; | |
939 | return EXIT_FAILURE; | |
940 | } | |
941 | ||
942 | r = do_map(argc, argv); | |
943 | if (r < 0) | |
944 | return EXIT_FAILURE; | |
945 | break; | |
946 | case Disconnect: | |
947 | r = do_unmap(); | |
948 | if (r < 0) | |
949 | return EXIT_FAILURE; | |
950 | break; | |
951 | case List: | |
952 | r = do_list_mapped_devices(); | |
953 | if (r < 0) | |
954 | return EXIT_FAILURE; | |
955 | break; | |
956 | default: | |
957 | usage(); | |
958 | return EXIT_FAILURE; | |
959 | } | |
960 | ||
961 | return 0; | |
962 | } | |
963 | ||
964 | int main(int argc, const char *argv[]) | |
965 | { | |
966 | return rbd_nbd(argc, argv); | |
967 | } |