]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | /* | |
5 | * rbd-nbd - RBD in userspace | |
6 | * | |
7 | * Copyright (C) 2015 - 2016 Kylin Corporation | |
8 | * | |
9 | * Author: Yunchuan Wen <yunchuan.wen@kylin-cloud.com> | |
10 | * Li Wang <li.wang@kylin-cloud.com> | |
11 | * | |
12 | * This is free software; you can redistribute it and/or | |
13 | * modify it under the terms of the GNU Lesser General Public | |
14 | * License version 2.1, as published by the Free Software | |
15 | * Foundation. See file COPYING. | |
16 | * | |
17 | */ | |
18 | ||
19 | #include "include/int_types.h" | |
20 | ||
21 | #include <stdio.h> | |
22 | #include <stdlib.h> | |
23 | #include <stddef.h> | |
24 | #include <errno.h> | |
25 | #include <fcntl.h> | |
26 | #include <string.h> | |
27 | #include <sys/types.h> | |
28 | #include <unistd.h> | |
29 | ||
30 | #include <linux/nbd.h> | |
31 | #include <linux/fs.h> | |
32 | #include <sys/ioctl.h> | |
33 | #include <sys/socket.h> | |
34 | ||
35 | #include <iostream> | |
36 | #include <fstream> | |
37 | #include <boost/regex.hpp> | |
38 | ||
39 | #include "mon/MonClient.h" | |
40 | #include "common/config.h" | |
41 | #include "common/dout.h" | |
42 | ||
43 | #include "common/errno.h" | |
44 | #include "common/module.h" | |
45 | #include "common/safe_io.h" | |
31f18b77 | 46 | #include "common/TextTable.h" |
7c673cae FG |
47 | #include "common/ceph_argparse.h" |
48 | #include "common/Preforker.h" | |
224ce89b | 49 | #include "common/version.h" |
7c673cae FG |
50 | #include "global/global_init.h" |
51 | #include "global/signal_handler.h" | |
52 | ||
53 | #include "include/rados/librados.hpp" | |
54 | #include "include/rbd/librbd.hpp" | |
55 | #include "include/stringify.h" | |
56 | #include "include/xlist.h" | |
57 | ||
58 | #define dout_context g_ceph_context | |
59 | #define dout_subsys ceph_subsys_rbd | |
60 | #undef dout_prefix | |
61 | #define dout_prefix *_dout << "rbd-nbd: " | |
62 | ||
31f18b77 FG |
63 | struct Config { |
64 | int nbds_max = 0; | |
65 | int max_part = 255; | |
66 | ||
67 | bool exclusive = false; | |
68 | bool readonly = false; | |
69 | bool set_max_part = false; | |
70 | ||
71 | std::string poolname; | |
72 | std::string imgname; | |
73 | std::string snapname; | |
74 | std::string devpath; | |
75 | }; | |
76 | ||
7c673cae FG |
77 | static void usage() |
78 | { | |
79 | std::cout << "Usage: rbd-nbd [options] map <image-or-snap-spec> Map an image to nbd device\n" | |
80 | << " unmap <device path> Unmap nbd device\n" | |
81 | << " list-mapped List mapped nbd devices\n" | |
82 | << "Options:\n" | |
83 | << " --device <device path> Specify nbd device path\n" | |
84 | << " --read-only Map read-only\n" | |
85 | << " --nbds_max <limit> Override for module param nbds_max\n" | |
86 | << " --max_part <limit> Override for module param max_part\n" | |
87 | << " --exclusive Forbid writes by other clients\n" | |
88 | << std::endl; | |
89 | generic_server_usage(); | |
90 | } | |
91 | ||
7c673cae FG |
92 | static int nbd = -1; |
93 | ||
31f18b77 FG |
94 | static enum { |
95 | None, | |
96 | Connect, | |
97 | Disconnect, | |
98 | List | |
99 | } cmd = None; | |
100 | ||
7c673cae FG |
101 | #define RBD_NBD_BLKSIZE 512UL |
102 | ||
224ce89b WB |
103 | #define HELP_INFO 1 |
104 | #define VERSION_INFO 2 | |
105 | ||
7c673cae FG |
106 | #ifdef CEPH_BIG_ENDIAN |
107 | #define ntohll(a) (a) | |
108 | #elif defined(CEPH_LITTLE_ENDIAN) | |
109 | #define ntohll(a) swab(a) | |
110 | #else | |
111 | #error "Could not determine endianess" | |
112 | #endif | |
113 | #define htonll(a) ntohll(a) | |
114 | ||
31f18b77 FG |
115 | static int parse_args(vector<const char*>& args, std::ostream *err_msg, Config *cfg); |
116 | ||
7c673cae FG |
117 | static void handle_signal(int signum) |
118 | { | |
119 | assert(signum == SIGINT || signum == SIGTERM); | |
120 | derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; | |
121 | dout(20) << __func__ << ": " << "sending NBD_DISCONNECT" << dendl; | |
122 | if (ioctl(nbd, NBD_DISCONNECT) < 0) { | |
123 | derr << "rbd-nbd: disconnect failed: " << cpp_strerror(errno) << dendl; | |
124 | } else { | |
125 | dout(20) << __func__ << ": " << "disconnected" << dendl; | |
126 | } | |
127 | } | |
128 | ||
129 | class NBDServer | |
130 | { | |
131 | private: | |
132 | int fd; | |
133 | librbd::Image ℑ | |
134 | ||
135 | public: | |
136 | NBDServer(int _fd, librbd::Image& _image) | |
137 | : fd(_fd) | |
138 | , image(_image) | |
139 | , lock("NBDServer::Locker") | |
140 | , reader_thread(*this, &NBDServer::reader_entry) | |
141 | , writer_thread(*this, &NBDServer::writer_entry) | |
142 | , started(false) | |
143 | {} | |
144 | ||
145 | private: | |
146 | std::atomic<bool> terminated = { false }; | |
147 | ||
148 | void shutdown() | |
149 | { | |
150 | bool expected = false; | |
151 | if (terminated.compare_exchange_strong(expected, true)) { | |
152 | ::shutdown(fd, SHUT_RDWR); | |
153 | ||
154 | Mutex::Locker l(lock); | |
155 | cond.Signal(); | |
156 | } | |
157 | } | |
158 | ||
159 | struct IOContext | |
160 | { | |
161 | xlist<IOContext*>::item item; | |
162 | NBDServer *server; | |
163 | struct nbd_request request; | |
164 | struct nbd_reply reply; | |
165 | bufferlist data; | |
166 | int command; | |
167 | ||
168 | IOContext() | |
169 | : item(this) | |
170 | {} | |
171 | }; | |
172 | ||
173 | friend std::ostream &operator<<(std::ostream &os, const IOContext &ctx); | |
174 | ||
175 | Mutex lock; | |
176 | Cond cond; | |
177 | xlist<IOContext*> io_pending; | |
178 | xlist<IOContext*> io_finished; | |
179 | ||
180 | void io_start(IOContext *ctx) | |
181 | { | |
182 | Mutex::Locker l(lock); | |
183 | io_pending.push_back(&ctx->item); | |
184 | } | |
185 | ||
186 | void io_finish(IOContext *ctx) | |
187 | { | |
188 | Mutex::Locker l(lock); | |
189 | assert(ctx->item.is_on_list()); | |
190 | ctx->item.remove_myself(); | |
191 | io_finished.push_back(&ctx->item); | |
192 | cond.Signal(); | |
193 | } | |
194 | ||
195 | IOContext *wait_io_finish() | |
196 | { | |
197 | Mutex::Locker l(lock); | |
198 | while(io_finished.empty() && !terminated) | |
199 | cond.Wait(lock); | |
200 | ||
201 | if (io_finished.empty()) | |
202 | return NULL; | |
203 | ||
204 | IOContext *ret = io_finished.front(); | |
205 | io_finished.pop_front(); | |
206 | ||
207 | return ret; | |
208 | } | |
209 | ||
210 | void wait_clean() | |
211 | { | |
212 | assert(!reader_thread.is_started()); | |
213 | Mutex::Locker l(lock); | |
214 | while(!io_pending.empty()) | |
215 | cond.Wait(lock); | |
216 | ||
217 | while(!io_finished.empty()) { | |
218 | ceph::unique_ptr<IOContext> free_ctx(io_finished.front()); | |
219 | io_finished.pop_front(); | |
220 | } | |
221 | } | |
222 | ||
223 | static void aio_callback(librbd::completion_t cb, void *arg) | |
224 | { | |
225 | librbd::RBD::AioCompletion *aio_completion = | |
226 | reinterpret_cast<librbd::RBD::AioCompletion*>(cb); | |
227 | ||
228 | IOContext *ctx = reinterpret_cast<IOContext *>(arg); | |
229 | int ret = aio_completion->get_return_value(); | |
230 | ||
231 | dout(20) << __func__ << ": " << *ctx << dendl; | |
232 | ||
233 | if (ret == -EINVAL) { | |
234 | // if shrinking an image, a pagecache writeback might reference | |
235 | // extents outside of the range of the new image extents | |
181888fb | 236 | dout(0) << __func__ << ": masking IO out-of-bounds error" << dendl; |
7c673cae FG |
237 | ctx->data.clear(); |
238 | ret = 0; | |
239 | } | |
240 | ||
241 | if (ret < 0) { | |
242 | ctx->reply.error = htonl(-ret); | |
243 | } else if ((ctx->command == NBD_CMD_READ) && | |
244 | ret < static_cast<int>(ctx->request.len)) { | |
245 | int pad_byte_count = static_cast<int> (ctx->request.len) - ret; | |
246 | ctx->data.append_zero(pad_byte_count); | |
247 | dout(20) << __func__ << ": " << *ctx << ": Pad byte count: " | |
248 | << pad_byte_count << dendl; | |
249 | ctx->reply.error = 0; | |
250 | } else { | |
251 | ctx->reply.error = htonl(0); | |
252 | } | |
253 | ctx->server->io_finish(ctx); | |
254 | ||
255 | aio_completion->release(); | |
256 | } | |
257 | ||
258 | void reader_entry() | |
259 | { | |
260 | while (!terminated) { | |
261 | ceph::unique_ptr<IOContext> ctx(new IOContext()); | |
262 | ctx->server = this; | |
263 | ||
264 | dout(20) << __func__ << ": waiting for nbd request" << dendl; | |
265 | ||
266 | int r = safe_read_exact(fd, &ctx->request, sizeof(struct nbd_request)); | |
267 | if (r < 0) { | |
268 | derr << "failed to read nbd request header: " << cpp_strerror(r) | |
269 | << dendl; | |
270 | return; | |
271 | } | |
272 | ||
273 | if (ctx->request.magic != htonl(NBD_REQUEST_MAGIC)) { | |
274 | derr << "invalid nbd request header" << dendl; | |
275 | return; | |
276 | } | |
277 | ||
278 | ctx->request.from = ntohll(ctx->request.from); | |
279 | ctx->request.type = ntohl(ctx->request.type); | |
280 | ctx->request.len = ntohl(ctx->request.len); | |
281 | ||
282 | ctx->reply.magic = htonl(NBD_REPLY_MAGIC); | |
283 | memcpy(ctx->reply.handle, ctx->request.handle, sizeof(ctx->reply.handle)); | |
284 | ||
285 | ctx->command = ctx->request.type & 0x0000ffff; | |
286 | ||
287 | dout(20) << *ctx << ": start" << dendl; | |
288 | ||
289 | switch (ctx->command) | |
290 | { | |
291 | case NBD_CMD_DISC: | |
292 | // NBD_DO_IT will return when pipe is closed | |
293 | dout(0) << "disconnect request received" << dendl; | |
294 | return; | |
295 | case NBD_CMD_WRITE: | |
296 | bufferptr ptr(ctx->request.len); | |
297 | r = safe_read_exact(fd, ptr.c_str(), ctx->request.len); | |
298 | if (r < 0) { | |
299 | derr << *ctx << ": failed to read nbd request data: " | |
300 | << cpp_strerror(r) << dendl; | |
301 | return; | |
302 | } | |
303 | ctx->data.push_back(ptr); | |
304 | break; | |
305 | } | |
306 | ||
307 | IOContext *pctx = ctx.release(); | |
308 | io_start(pctx); | |
309 | librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(pctx, aio_callback); | |
310 | switch (pctx->command) | |
311 | { | |
312 | case NBD_CMD_WRITE: | |
313 | image.aio_write(pctx->request.from, pctx->request.len, pctx->data, c); | |
314 | break; | |
315 | case NBD_CMD_READ: | |
316 | image.aio_read(pctx->request.from, pctx->request.len, pctx->data, c); | |
317 | break; | |
318 | case NBD_CMD_FLUSH: | |
319 | image.aio_flush(c); | |
320 | break; | |
321 | case NBD_CMD_TRIM: | |
322 | image.aio_discard(pctx->request.from, pctx->request.len, c); | |
323 | break; | |
324 | default: | |
325 | derr << *pctx << ": invalid request command" << dendl; | |
326 | c->release(); | |
327 | return; | |
328 | } | |
329 | } | |
330 | dout(20) << __func__ << ": terminated" << dendl; | |
331 | } | |
332 | ||
333 | void writer_entry() | |
334 | { | |
335 | while (!terminated) { | |
336 | dout(20) << __func__ << ": waiting for io request" << dendl; | |
337 | ceph::unique_ptr<IOContext> ctx(wait_io_finish()); | |
338 | if (!ctx) { | |
339 | dout(20) << __func__ << ": no io requests, terminating" << dendl; | |
340 | return; | |
341 | } | |
342 | ||
343 | dout(20) << __func__ << ": got: " << *ctx << dendl; | |
344 | ||
345 | int r = safe_write(fd, &ctx->reply, sizeof(struct nbd_reply)); | |
346 | if (r < 0) { | |
347 | derr << *ctx << ": failed to write reply header: " << cpp_strerror(r) | |
348 | << dendl; | |
349 | return; | |
350 | } | |
351 | if (ctx->command == NBD_CMD_READ && ctx->reply.error == htonl(0)) { | |
352 | r = ctx->data.write_fd(fd); | |
353 | if (r < 0) { | |
354 | derr << *ctx << ": failed to write replay data: " << cpp_strerror(r) | |
355 | << dendl; | |
356 | return; | |
357 | } | |
358 | } | |
359 | dout(20) << *ctx << ": finish" << dendl; | |
360 | } | |
361 | dout(20) << __func__ << ": terminated" << dendl; | |
362 | } | |
363 | ||
364 | class ThreadHelper : public Thread | |
365 | { | |
366 | public: | |
367 | typedef void (NBDServer::*entry_func)(); | |
368 | private: | |
369 | NBDServer &server; | |
370 | entry_func func; | |
371 | public: | |
372 | ThreadHelper(NBDServer &_server, entry_func _func) | |
373 | :server(_server) | |
374 | ,func(_func) | |
375 | {} | |
376 | protected: | |
377 | void* entry() override | |
378 | { | |
379 | (server.*func)(); | |
380 | server.shutdown(); | |
381 | return NULL; | |
382 | } | |
383 | } reader_thread, writer_thread; | |
384 | ||
385 | bool started; | |
386 | public: | |
387 | void start() | |
388 | { | |
389 | if (!started) { | |
390 | dout(10) << __func__ << ": starting" << dendl; | |
391 | ||
392 | started = true; | |
393 | ||
394 | reader_thread.create("rbd_reader"); | |
395 | writer_thread.create("rbd_writer"); | |
396 | } | |
397 | } | |
398 | ||
399 | void stop() | |
400 | { | |
401 | if (started) { | |
402 | dout(10) << __func__ << ": terminating" << dendl; | |
403 | ||
404 | shutdown(); | |
405 | ||
406 | reader_thread.join(); | |
407 | writer_thread.join(); | |
408 | ||
409 | wait_clean(); | |
410 | ||
411 | started = false; | |
412 | } | |
413 | } | |
414 | ||
415 | ~NBDServer() | |
416 | { | |
417 | stop(); | |
418 | } | |
419 | }; | |
420 | ||
421 | std::ostream &operator<<(std::ostream &os, const NBDServer::IOContext &ctx) { | |
422 | ||
423 | os << "[" << std::hex << ntohll(*((uint64_t *)ctx.request.handle)); | |
424 | ||
425 | switch (ctx.command) | |
426 | { | |
427 | case NBD_CMD_WRITE: | |
428 | os << " WRITE "; | |
429 | break; | |
430 | case NBD_CMD_READ: | |
431 | os << " READ "; | |
432 | break; | |
433 | case NBD_CMD_FLUSH: | |
434 | os << " FLUSH "; | |
435 | break; | |
436 | case NBD_CMD_TRIM: | |
437 | os << " TRIM "; | |
438 | break; | |
439 | default: | |
440 | os << " UNKNOW(" << ctx.command << ") "; | |
441 | break; | |
442 | } | |
443 | ||
444 | os << ctx.request.from << "~" << ctx.request.len << " " | |
445 | << ntohl(ctx.reply.error) << "]"; | |
446 | ||
447 | return os; | |
448 | } | |
449 | ||
450 | class NBDWatchCtx : public librbd::UpdateWatchCtx | |
451 | { | |
452 | private: | |
453 | int fd; | |
454 | librados::IoCtx &io_ctx; | |
455 | librbd::Image ℑ | |
456 | unsigned long size; | |
457 | public: | |
458 | NBDWatchCtx(int _fd, | |
459 | librados::IoCtx &_io_ctx, | |
460 | librbd::Image &_image, | |
461 | unsigned long _size) | |
462 | : fd(_fd) | |
463 | , io_ctx(_io_ctx) | |
464 | , image(_image) | |
465 | , size(_size) | |
466 | { } | |
467 | ||
468 | ~NBDWatchCtx() override {} | |
469 | ||
470 | void handle_notify() override | |
471 | { | |
472 | librbd::image_info_t info; | |
473 | if (image.stat(info, sizeof(info)) == 0) { | |
474 | unsigned long new_size = info.size; | |
475 | ||
476 | if (new_size != size) { | |
b32b8144 | 477 | dout(5) << "resize detected" << dendl; |
7c673cae | 478 | if (ioctl(fd, BLKFLSBUF, NULL) < 0) |
b32b8144 FG |
479 | derr << "invalidate page cache failed: " << cpp_strerror(errno) |
480 | << dendl; | |
7c673cae FG |
481 | if (ioctl(fd, NBD_SET_SIZE, new_size) < 0) { |
482 | derr << "resize failed: " << cpp_strerror(errno) << dendl; | |
483 | } else { | |
484 | size = new_size; | |
485 | } | |
b32b8144 FG |
486 | if (ioctl(fd, BLKRRPART, NULL) < 0) { |
487 | derr << "rescan of partition table failed: " << cpp_strerror(errno) | |
488 | << dendl; | |
489 | } | |
7c673cae FG |
490 | if (image.invalidate_cache() < 0) |
491 | derr << "invalidate rbd cache failed" << dendl; | |
492 | } | |
493 | } | |
494 | } | |
495 | }; | |
496 | ||
31f18b77 | 497 | static int open_device(const char* path, Config *cfg = nullptr, bool try_load_module = false) |
7c673cae FG |
498 | { |
499 | int nbd = open(path, O_RDWR); | |
500 | bool loaded_module = false; | |
501 | ||
502 | if (nbd < 0 && try_load_module && access("/sys/module/nbd", F_OK) != 0) { | |
503 | ostringstream param; | |
504 | int r; | |
31f18b77 FG |
505 | if (cfg->nbds_max) { |
506 | param << "nbds_max=" << cfg->nbds_max; | |
7c673cae | 507 | } |
31f18b77 FG |
508 | if (cfg->max_part) { |
509 | param << " max_part=" << cfg->max_part; | |
7c673cae FG |
510 | } |
511 | r = module_load("nbd", param.str().c_str()); | |
512 | if (r < 0) { | |
513 | cerr << "rbd-nbd: failed to load nbd kernel module: " << cpp_strerror(-r) << std::endl; | |
514 | return r; | |
515 | } else { | |
516 | loaded_module = true; | |
517 | } | |
518 | nbd = open(path, O_RDWR); | |
519 | } | |
520 | ||
31f18b77 FG |
521 | if (try_load_module && !loaded_module && |
522 | (cfg->nbds_max || cfg->set_max_part)) { | |
7c673cae FG |
523 | cerr << "rbd-nbd: ignoring kernel module parameter options: nbd module already loaded" |
524 | << std::endl; | |
525 | } | |
526 | ||
527 | return nbd; | |
528 | } | |
529 | ||
530 | static int check_device_size(int nbd_index, unsigned long expected_size) | |
531 | { | |
532 | // There are bugs with some older kernel versions that result in an | |
533 | // overflow for large image sizes. This check is to ensure we are | |
534 | // not affected. | |
535 | ||
536 | unsigned long size = 0; | |
537 | std::string path = "/sys/block/nbd" + stringify(nbd_index) + "/size"; | |
538 | std::ifstream ifs; | |
539 | ifs.open(path.c_str(), std::ifstream::in); | |
540 | if (!ifs.is_open()) { | |
541 | cerr << "rbd-nbd: failed to open " << path << std::endl; | |
542 | return -EINVAL; | |
543 | } | |
544 | ifs >> size; | |
545 | size *= RBD_NBD_BLKSIZE; | |
546 | ||
547 | if (size == 0) { | |
548 | // Newer kernel versions will report real size only after nbd | |
549 | // connect. Assume this is the case and return success. | |
550 | return 0; | |
551 | } | |
552 | ||
553 | if (size != expected_size) { | |
554 | cerr << "rbd-nbd: kernel reported invalid device size (" << size | |
555 | << ", expected " << expected_size << ")" << std::endl; | |
556 | return -EINVAL; | |
557 | } | |
558 | ||
559 | return 0; | |
560 | } | |
561 | ||
31f18b77 | 562 | static int do_map(int argc, const char *argv[], Config *cfg) |
7c673cae FG |
563 | { |
564 | int r; | |
565 | ||
566 | librados::Rados rados; | |
567 | librbd::RBD rbd; | |
568 | librados::IoCtx io_ctx; | |
569 | librbd::Image image; | |
570 | ||
571 | int read_only = 0; | |
572 | unsigned long flags; | |
573 | unsigned long size; | |
574 | ||
575 | int index = 0; | |
576 | int fd[2]; | |
577 | ||
578 | librbd::image_info_t info; | |
579 | ||
580 | Preforker forker; | |
581 | ||
582 | vector<const char*> args; | |
583 | argv_to_vec(argc, argv, args); | |
584 | env_to_vec(args); | |
585 | ||
586 | auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, | |
587 | CODE_ENVIRONMENT_DAEMON, | |
588 | CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); | |
589 | g_ceph_context->_conf->set_val_or_die("pid_file", ""); | |
590 | ||
591 | if (global_init_prefork(g_ceph_context) >= 0) { | |
592 | std::string err; | |
593 | r = forker.prefork(err); | |
594 | if (r < 0) { | |
595 | cerr << err << std::endl; | |
596 | return r; | |
597 | } | |
7c673cae | 598 | if (forker.is_parent()) { |
7c673cae FG |
599 | if (forker.parent_wait(err) != 0) { |
600 | return -ENXIO; | |
601 | } | |
602 | return 0; | |
603 | } | |
28e407b8 | 604 | global_init_postfork_start(g_ceph_context); |
7c673cae FG |
605 | } |
606 | ||
607 | common_init_finish(g_ceph_context); | |
608 | global_init_chdir(g_ceph_context); | |
609 | ||
610 | if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) { | |
611 | r = -errno; | |
612 | goto close_ret; | |
613 | } | |
614 | ||
94b18763 FG |
615 | r = rados.init_with_context(g_ceph_context); |
616 | if (r < 0) | |
617 | goto close_fd; | |
618 | ||
619 | r = rados.connect(); | |
620 | if (r < 0) | |
621 | goto close_fd; | |
622 | ||
623 | r = rados.ioctx_create(cfg->poolname.c_str(), io_ctx); | |
624 | if (r < 0) | |
625 | goto close_fd; | |
626 | ||
627 | r = rbd.open(io_ctx, image, cfg->imgname.c_str()); | |
628 | if (r < 0) | |
629 | goto close_fd; | |
630 | ||
631 | if (cfg->exclusive) { | |
632 | r = image.lock_acquire(RBD_LOCK_MODE_EXCLUSIVE); | |
633 | if (r < 0) { | |
634 | cerr << "rbd-nbd: failed to acquire exclusive lock: " << cpp_strerror(r) | |
635 | << std::endl; | |
636 | goto close_fd; | |
637 | } | |
638 | } | |
639 | ||
640 | if (!cfg->snapname.empty()) { | |
641 | r = image.snap_set(cfg->snapname.c_str()); | |
642 | if (r < 0) | |
643 | goto close_fd; | |
644 | } | |
645 | ||
646 | r = image.stat(info, sizeof(info)); | |
647 | if (r < 0) | |
648 | goto close_fd; | |
649 | ||
31f18b77 | 650 | if (cfg->devpath.empty()) { |
7c673cae FG |
651 | char dev[64]; |
652 | bool try_load_module = true; | |
b32b8144 FG |
653 | const char *path = "/sys/module/nbd/parameters/nbds_max"; |
654 | int nbds_max = -1; | |
655 | if (access(path, F_OK) == 0) { | |
656 | std::ifstream ifs; | |
657 | ifs.open(path, std::ifstream::in); | |
658 | if (ifs.is_open()) { | |
659 | ifs >> nbds_max; | |
660 | ifs.close(); | |
661 | } | |
662 | } | |
663 | ||
7c673cae FG |
664 | while (true) { |
665 | snprintf(dev, sizeof(dev), "/dev/nbd%d", index); | |
666 | ||
31f18b77 | 667 | nbd = open_device(dev, cfg, try_load_module); |
7c673cae FG |
668 | try_load_module = false; |
669 | if (nbd < 0) { | |
b32b8144 FG |
670 | if (nbd == -EPERM && nbds_max != -1 && index < (nbds_max-1)) { |
671 | ++index; | |
672 | continue; | |
673 | } | |
7c673cae FG |
674 | r = nbd; |
675 | cerr << "rbd-nbd: failed to find unused device" << std::endl; | |
676 | goto close_fd; | |
677 | } | |
678 | ||
679 | r = ioctl(nbd, NBD_SET_SOCK, fd[0]); | |
680 | if (r < 0) { | |
681 | close(nbd); | |
682 | ++index; | |
683 | continue; | |
684 | } | |
685 | ||
31f18b77 | 686 | cfg->devpath = dev; |
7c673cae FG |
687 | break; |
688 | } | |
689 | } else { | |
31f18b77 | 690 | r = sscanf(cfg->devpath.c_str(), "/dev/nbd%d", &index); |
7c673cae | 691 | if (r < 0) { |
31f18b77 | 692 | cerr << "rbd-nbd: invalid device path: " << cfg->devpath |
7c673cae FG |
693 | << " (expected /dev/nbd{num})" << std::endl; |
694 | goto close_fd; | |
695 | } | |
31f18b77 | 696 | nbd = open_device(cfg->devpath.c_str(), cfg, true); |
7c673cae FG |
697 | if (nbd < 0) { |
698 | r = nbd; | |
31f18b77 | 699 | cerr << "rbd-nbd: failed to open device: " << cfg->devpath << std::endl; |
7c673cae FG |
700 | goto close_fd; |
701 | } | |
702 | ||
703 | r = ioctl(nbd, NBD_SET_SOCK, fd[0]); | |
704 | if (r < 0) { | |
705 | r = -errno; | |
31f18b77 | 706 | cerr << "rbd-nbd: the device " << cfg->devpath << " is busy" << std::endl; |
7c673cae FG |
707 | close(nbd); |
708 | goto close_fd; | |
709 | } | |
710 | } | |
711 | ||
712 | flags = NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_TRIM | NBD_FLAG_HAS_FLAGS; | |
31f18b77 | 713 | if (!cfg->snapname.empty() || cfg->readonly) { |
7c673cae FG |
714 | flags |= NBD_FLAG_READ_ONLY; |
715 | read_only = 1; | |
716 | } | |
717 | ||
7c673cae FG |
718 | r = ioctl(nbd, NBD_SET_BLKSIZE, RBD_NBD_BLKSIZE); |
719 | if (r < 0) { | |
720 | r = -errno; | |
721 | goto close_nbd; | |
722 | } | |
723 | ||
724 | if (info.size > ULONG_MAX) { | |
725 | r = -EFBIG; | |
726 | cerr << "rbd-nbd: image is too large (" << prettybyte_t(info.size) | |
727 | << ", max is " << prettybyte_t(ULONG_MAX) << ")" << std::endl; | |
728 | goto close_nbd; | |
729 | } | |
730 | ||
731 | size = info.size; | |
732 | ||
733 | r = ioctl(nbd, NBD_SET_SIZE, size); | |
734 | if (r < 0) { | |
735 | r = -errno; | |
736 | goto close_nbd; | |
737 | } | |
738 | ||
739 | r = check_device_size(index, size); | |
740 | if (r < 0) { | |
741 | goto close_nbd; | |
742 | } | |
743 | ||
744 | ioctl(nbd, NBD_SET_FLAGS, flags); | |
745 | ||
746 | r = ioctl(nbd, BLKROSET, (unsigned long) &read_only); | |
747 | if (r < 0) { | |
748 | r = -errno; | |
749 | goto close_nbd; | |
750 | } | |
751 | ||
752 | { | |
753 | uint64_t handle; | |
754 | ||
755 | NBDWatchCtx watch_ctx(nbd, io_ctx, image, info.size); | |
756 | r = image.update_watch(&watch_ctx, &handle); | |
757 | if (r < 0) | |
758 | goto close_nbd; | |
759 | ||
31f18b77 | 760 | cout << cfg->devpath << std::endl; |
7c673cae FG |
761 | |
762 | if (g_conf->daemonize) { | |
7c673cae | 763 | global_init_postfork_finish(g_ceph_context); |
28e407b8 | 764 | forker.daemonize(); |
7c673cae FG |
765 | } |
766 | ||
767 | { | |
768 | NBDServer server(fd[1], image); | |
769 | ||
770 | server.start(); | |
771 | ||
772 | init_async_signal_handler(); | |
773 | register_async_signal_handler(SIGHUP, sighup_handler); | |
774 | register_async_signal_handler_oneshot(SIGINT, handle_signal); | |
775 | register_async_signal_handler_oneshot(SIGTERM, handle_signal); | |
776 | ||
777 | ioctl(nbd, NBD_DO_IT); | |
778 | ||
779 | unregister_async_signal_handler(SIGHUP, sighup_handler); | |
780 | unregister_async_signal_handler(SIGINT, handle_signal); | |
781 | unregister_async_signal_handler(SIGTERM, handle_signal); | |
782 | shutdown_async_signal_handler(); | |
783 | ||
784 | server.stop(); | |
785 | } | |
786 | ||
787 | r = image.update_unwatch(handle); | |
788 | assert(r == 0); | |
789 | } | |
790 | ||
791 | close_nbd: | |
792 | if (r < 0) { | |
793 | ioctl(nbd, NBD_CLEAR_SOCK); | |
794 | cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r) << std::endl; | |
795 | } | |
796 | close(nbd); | |
797 | close_fd: | |
798 | close(fd[0]); | |
799 | close(fd[1]); | |
800 | close_ret: | |
801 | image.close(); | |
802 | io_ctx.close(); | |
803 | rados.shutdown(); | |
804 | ||
805 | forker.exit(r < 0 ? EXIT_FAILURE : 0); | |
806 | // Unreachable; | |
807 | return r; | |
808 | } | |
809 | ||
31f18b77 | 810 | static int do_unmap(const std::string &devpath) |
7c673cae | 811 | { |
31f18b77 FG |
812 | int r = 0; |
813 | ||
7c673cae FG |
814 | int nbd = open_device(devpath.c_str()); |
815 | if (nbd < 0) { | |
816 | cerr << "rbd-nbd: failed to open device: " << devpath << std::endl; | |
817 | return nbd; | |
818 | } | |
819 | ||
31f18b77 FG |
820 | r = ioctl(nbd, NBD_DISCONNECT); |
821 | if (r < 0) { | |
822 | cerr << "rbd-nbd: the device is not used" << std::endl; | |
7c673cae FG |
823 | } |
824 | ||
825 | close(nbd); | |
826 | ||
31f18b77 | 827 | return r; |
7c673cae FG |
828 | } |
829 | ||
31f18b77 | 830 | static int parse_imgpath(const std::string &imgpath, Config *cfg) |
7c673cae FG |
831 | { |
832 | boost::regex pattern("^(?:([^/@]+)/)?([^/@]+)(?:@([^/@]+))?$"); | |
833 | boost::smatch match; | |
834 | if (!boost::regex_match(imgpath, match, pattern)) { | |
835 | std::cerr << "rbd-nbd: invalid spec '" << imgpath << "'" << std::endl; | |
836 | return -EINVAL; | |
837 | } | |
838 | ||
31f18b77 FG |
839 | if (match[1].matched) { |
840 | cfg->poolname = match[1]; | |
841 | } | |
7c673cae | 842 | |
31f18b77 | 843 | cfg->imgname = match[2]; |
7c673cae FG |
844 | |
845 | if (match[3].matched) | |
31f18b77 | 846 | cfg->snapname = match[3]; |
7c673cae FG |
847 | |
848 | return 0; | |
849 | } | |
850 | ||
31f18b77 | 851 | static int get_mapped_info(int pid, Config *cfg) |
7c673cae | 852 | { |
31f18b77 FG |
853 | int r; |
854 | std::string path = "/proc/" + stringify(pid) + "/cmdline"; | |
855 | std::ifstream ifs; | |
856 | std::string cmdline; | |
857 | std::vector<const char*> args; | |
7c673cae | 858 | |
31f18b77 | 859 | ifs.open(path.c_str(), std::ifstream::in); |
b32b8144 FG |
860 | if (!ifs.is_open()) |
861 | return -1; | |
31f18b77 FG |
862 | ifs >> cmdline; |
863 | ||
864 | for (unsigned i = 0; i < cmdline.size(); i++) { | |
865 | const char *arg = &cmdline[i]; | |
866 | if (i == 0) { | |
867 | if (strcmp(basename(arg) , "rbd-nbd") != 0) { | |
868 | return -EINVAL; | |
869 | } | |
870 | } else { | |
871 | args.push_back(arg); | |
872 | } | |
873 | ||
874 | while (cmdline[i] != '\0') { | |
875 | i++; | |
876 | } | |
7c673cae | 877 | } |
31f18b77 FG |
878 | |
879 | std::ostringstream err_msg; | |
880 | r = parse_args(args, &err_msg, cfg); | |
881 | return r; | |
882 | } | |
7c673cae | 883 | |
31f18b77 FG |
884 | static int get_map_pid(const std::string& pid_path) |
885 | { | |
886 | int pid = 0; | |
887 | std::ifstream ifs; | |
888 | ifs.open(pid_path.c_str(), std::ifstream::in); | |
889 | if (!ifs.is_open()) { | |
890 | return 0; | |
7c673cae | 891 | } |
31f18b77 FG |
892 | ifs >> pid; |
893 | return pid; | |
7c673cae FG |
894 | } |
895 | ||
31f18b77 | 896 | static int do_list_mapped_devices() |
7c673cae FG |
897 | { |
898 | int r; | |
31f18b77 FG |
899 | bool should_print = false; |
900 | int index = 0; | |
901 | int pid = 0; | |
7c673cae | 902 | |
31f18b77 | 903 | std::string default_pool_name; |
7c673cae | 904 | |
31f18b77 | 905 | TextTable tbl; |
7c673cae | 906 | |
31f18b77 FG |
907 | tbl.define_column("pid", TextTable::LEFT, TextTable::LEFT); |
908 | tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT); | |
909 | tbl.define_column("image", TextTable::LEFT, TextTable::LEFT); | |
910 | tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT); | |
911 | tbl.define_column("device", TextTable::LEFT, TextTable::LEFT); | |
912 | ||
913 | while (true) { | |
914 | std::string nbd_path = "/sys/block/nbd" + stringify(index); | |
915 | if(access(nbd_path.c_str(), F_OK) != 0) { | |
916 | break; | |
917 | } | |
918 | std::string pid_path = nbd_path + "/pid"; | |
919 | pid = get_map_pid(pid_path); | |
920 | ||
921 | if(pid > 0) { | |
922 | Config cfg; | |
923 | r = get_mapped_info(pid, &cfg); | |
924 | if (r < 0) { | |
925 | index++; | |
926 | continue; | |
927 | } | |
928 | should_print = true; | |
929 | if (cfg.snapname.empty()) { | |
930 | cfg.snapname = "-"; | |
931 | } | |
932 | tbl << pid << cfg.poolname << cfg.imgname << cfg.snapname | |
933 | << "/dev/nbd" + stringify(index) << TextTable::endrow; | |
934 | } | |
935 | ||
936 | index++; | |
937 | } | |
938 | ||
939 | if (should_print) { | |
940 | cout << tbl; | |
941 | } | |
942 | return 0; | |
943 | } | |
944 | ||
945 | static int parse_args(vector<const char*>& args, std::ostream *err_msg, Config *cfg) | |
946 | { | |
181888fb FG |
947 | std::string conf_file_list; |
948 | std::string cluster; | |
949 | CephInitParameters iparams = ceph_argparse_early_args( | |
950 | args, CEPH_ENTITY_TYPE_CLIENT, &cluster, &conf_file_list); | |
7c673cae | 951 | |
31f18b77 | 952 | md_config_t config; |
181888fb FG |
953 | config.name = iparams.name; |
954 | config.cluster = cluster; | |
955 | ||
956 | if (!conf_file_list.empty()) { | |
957 | config.parse_config_files(conf_file_list.c_str(), nullptr, 0); | |
958 | } else { | |
959 | config.parse_config_files(nullptr, nullptr, 0); | |
960 | } | |
31f18b77 FG |
961 | config.parse_env(); |
962 | config.parse_argv(args); | |
181888fb FG |
963 | cfg->poolname = config.get_val<std::string>("rbd_default_pool"); |
964 | ||
965 | std::vector<const char*>::iterator i; | |
966 | std::ostringstream err; | |
31f18b77 | 967 | |
7c673cae FG |
968 | for (i = args.begin(); i != args.end(); ) { |
969 | if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { | |
224ce89b WB |
970 | return HELP_INFO; |
971 | } else if (ceph_argparse_flag(args, i, "-v", "--version", (char*)NULL)) { | |
972 | return VERSION_INFO; | |
31f18b77 FG |
973 | } else if (ceph_argparse_witharg(args, i, &cfg->devpath, "--device", (char *)NULL)) { |
974 | } else if (ceph_argparse_witharg(args, i, &cfg->nbds_max, err, "--nbds_max", (char *)NULL)) { | |
7c673cae | 975 | if (!err.str().empty()) { |
31f18b77 FG |
976 | *err_msg << "rbd-nbd: " << err.str(); |
977 | return -EINVAL; | |
7c673cae | 978 | } |
31f18b77 FG |
979 | if (cfg->nbds_max < 0) { |
980 | *err_msg << "rbd-nbd: Invalid argument for nbds_max!"; | |
981 | return -EINVAL; | |
7c673cae | 982 | } |
31f18b77 | 983 | } else if (ceph_argparse_witharg(args, i, &cfg->max_part, err, "--max_part", (char *)NULL)) { |
7c673cae | 984 | if (!err.str().empty()) { |
31f18b77 FG |
985 | *err_msg << "rbd-nbd: " << err.str(); |
986 | return -EINVAL; | |
7c673cae | 987 | } |
31f18b77 FG |
988 | if ((cfg->max_part < 0) || (cfg->max_part > 255)) { |
989 | *err_msg << "rbd-nbd: Invalid argument for max_part(0~255)!"; | |
990 | return -EINVAL; | |
7c673cae | 991 | } |
31f18b77 | 992 | cfg->set_max_part = true; |
7c673cae | 993 | } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) { |
31f18b77 | 994 | cfg->readonly = true; |
7c673cae | 995 | } else if (ceph_argparse_flag(args, i, "--exclusive", (char *)NULL)) { |
31f18b77 | 996 | cfg->exclusive = true; |
7c673cae FG |
997 | } else { |
998 | ++i; | |
999 | } | |
1000 | } | |
1001 | ||
1002 | if (args.begin() != args.end()) { | |
1003 | if (strcmp(*args.begin(), "map") == 0) { | |
1004 | cmd = Connect; | |
1005 | } else if (strcmp(*args.begin(), "unmap") == 0) { | |
1006 | cmd = Disconnect; | |
1007 | } else if (strcmp(*args.begin(), "list-mapped") == 0) { | |
1008 | cmd = List; | |
1009 | } else { | |
31f18b77 FG |
1010 | *err_msg << "rbd-nbd: unknown command: " << *args.begin(); |
1011 | return -EINVAL; | |
7c673cae FG |
1012 | } |
1013 | args.erase(args.begin()); | |
1014 | } | |
1015 | ||
1016 | if (cmd == None) { | |
31f18b77 FG |
1017 | *err_msg << "rbd-nbd: must specify command"; |
1018 | return -EINVAL; | |
7c673cae FG |
1019 | } |
1020 | ||
1021 | switch (cmd) { | |
1022 | case Connect: | |
1023 | if (args.begin() == args.end()) { | |
31f18b77 FG |
1024 | *err_msg << "rbd-nbd: must specify image-or-snap-spec"; |
1025 | return -EINVAL; | |
7c673cae | 1026 | } |
31f18b77 FG |
1027 | if (parse_imgpath(string(*args.begin()), cfg) < 0) |
1028 | return -EINVAL; | |
7c673cae FG |
1029 | args.erase(args.begin()); |
1030 | break; | |
1031 | case Disconnect: | |
1032 | if (args.begin() == args.end()) { | |
31f18b77 FG |
1033 | *err_msg << "rbd-nbd: must specify nbd device path"; |
1034 | return -EINVAL; | |
7c673cae | 1035 | } |
31f18b77 | 1036 | cfg->devpath = *args.begin(); |
7c673cae FG |
1037 | args.erase(args.begin()); |
1038 | break; | |
1039 | default: | |
1040 | //shut up gcc; | |
1041 | break; | |
1042 | } | |
1043 | ||
1044 | if (args.begin() != args.end()) { | |
31f18b77 FG |
1045 | *err_msg << "rbd-nbd: unknown args: " << *args.begin(); |
1046 | return -EINVAL; | |
1047 | } | |
1048 | ||
1049 | return 0; | |
1050 | } | |
1051 | ||
1052 | static int rbd_nbd(int argc, const char *argv[]) | |
1053 | { | |
1054 | int r; | |
1055 | Config cfg; | |
1056 | vector<const char*> args; | |
1057 | argv_to_vec(argc, argv, args); | |
1058 | ||
1059 | std::ostringstream err_msg; | |
1060 | r = parse_args(args, &err_msg, &cfg); | |
224ce89b | 1061 | if (r == HELP_INFO) { |
31f18b77 | 1062 | usage(); |
d2e6a577 | 1063 | assert(false); |
224ce89b WB |
1064 | } else if (r == VERSION_INFO) { |
1065 | std::cout << pretty_version_to_str() << std::endl; | |
1066 | return 0; | |
1067 | } | |
1068 | else if (r < 0) { | |
31f18b77 FG |
1069 | cerr << err_msg.str() << std::endl; |
1070 | return r; | |
7c673cae FG |
1071 | } |
1072 | ||
1073 | switch (cmd) { | |
1074 | case Connect: | |
31f18b77 | 1075 | if (cfg.imgname.empty()) { |
7c673cae | 1076 | cerr << "rbd-nbd: image name was not specified" << std::endl; |
31f18b77 | 1077 | return -EINVAL; |
7c673cae FG |
1078 | } |
1079 | ||
31f18b77 | 1080 | r = do_map(argc, argv, &cfg); |
7c673cae | 1081 | if (r < 0) |
31f18b77 | 1082 | return -EINVAL; |
7c673cae FG |
1083 | break; |
1084 | case Disconnect: | |
31f18b77 | 1085 | r = do_unmap(cfg.devpath); |
7c673cae | 1086 | if (r < 0) |
31f18b77 | 1087 | return -EINVAL; |
7c673cae FG |
1088 | break; |
1089 | case List: | |
1090 | r = do_list_mapped_devices(); | |
1091 | if (r < 0) | |
31f18b77 | 1092 | return -EINVAL; |
7c673cae FG |
1093 | break; |
1094 | default: | |
1095 | usage(); | |
d2e6a577 FG |
1096 | assert(false); |
1097 | break; | |
7c673cae FG |
1098 | } |
1099 | ||
1100 | return 0; | |
1101 | } | |
1102 | ||
1103 | int main(int argc, const char *argv[]) | |
1104 | { | |
31f18b77 FG |
1105 | int r = rbd_nbd(argc, argv); |
1106 | if (r < 0) { | |
1107 | return EXIT_FAILURE; | |
1108 | } | |
1109 | return 0; | |
7c673cae | 1110 | } |