]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Ceph - scalable distributed file system | |
3 | * | |
4 | * Copyright (C) 2014 Inktank Storage, Inc. | |
5 | * | |
6 | * This is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License version 2.1, as published by the Free Software | |
9 | * Foundation. See file COPYING. | |
10 | * | |
11 | */ | |
12 | ||
13 | #include <errno.h> | |
14 | #include <fcntl.h> | |
15 | #include <iostream> | |
11fdf7f2 | 16 | #include <optional> |
7c673cae FG |
17 | #include <poll.h> |
18 | #include <sstream> | |
19 | #include <stdio.h> | |
20 | #include <stdlib.h> | |
21 | #include <string.h> | |
22 | #include <string> | |
23 | #include <sys/stat.h> | |
11fdf7f2 | 24 | #include <sys/sysmacros.h> |
7c673cae FG |
25 | #include <sys/types.h> |
26 | #include <unistd.h> | |
27 | ||
28 | #include "auth/KeyRing.h" | |
29 | #include "common/errno.h" | |
30 | #include "common/Formatter.h" | |
31 | #include "common/module.h" | |
32 | #include "common/run_cmd.h" | |
33 | #include "common/safe_io.h" | |
34 | #include "common/secret.h" | |
35 | #include "common/TextTable.h" | |
11fdf7f2 | 36 | #include "include/ceph_assert.h" |
7c673cae FG |
37 | #include "include/stringify.h" |
38 | #include "include/krbd.h" | |
39 | #include "mon/MonMap.h" | |
40 | ||
41 | #include <blkid/blkid.h> | |
42 | #include <libudev.h> | |
43 | ||
7c673cae FG |
44 | |
45 | const static int POLL_TIMEOUT=120000; | |
46 | ||
47 | struct krbd_ctx { | |
48 | CephContext *cct; | |
49 | struct udev *udev; | |
50 | }; | |
51 | ||
11fdf7f2 TL |
52 | static const std::string SNAP_HEAD_NAME("-"); |
53 | ||
54 | struct krbd_spec { | |
55 | std::string pool_name; | |
56 | std::string nspace_name; | |
57 | std::string image_name; | |
58 | std::string snap_name; | |
59 | ||
60 | krbd_spec(const char *pool_name, const char *nspace_name, | |
61 | const char *image_name, const char *snap_name) | |
62 | : pool_name(pool_name), | |
63 | nspace_name(nspace_name), | |
64 | image_name(image_name), | |
65 | snap_name(*snap_name ? snap_name : SNAP_HEAD_NAME) { } | |
66 | ||
67 | bool operator==(const krbd_spec& rhs) const { | |
68 | return pool_name == rhs.pool_name && | |
69 | nspace_name == rhs.nspace_name && | |
70 | image_name == rhs.image_name && | |
71 | snap_name == rhs.snap_name; | |
72 | } | |
73 | }; | |
74 | ||
75 | std::ostream& operator<<(std::ostream& os, const krbd_spec& spec) { | |
76 | os << spec.pool_name << "/"; | |
77 | if (!spec.nspace_name.empty()) | |
78 | os << spec.nspace_name << "/"; | |
79 | os << spec.image_name; | |
80 | if (spec.snap_name != SNAP_HEAD_NAME) | |
81 | os << "@" << spec.snap_name; | |
82 | return os; | |
83 | } | |
84 | ||
85 | std::optional<krbd_spec> spec_from_dev(udev_device *dev) { | |
86 | const char *pool_name = udev_device_get_sysattr_value(dev, "pool"); | |
87 | const char *nspace_name = udev_device_get_sysattr_value(dev, "pool_ns"); | |
88 | const char *image_name = udev_device_get_sysattr_value(dev, "name"); | |
89 | const char *snap_name = udev_device_get_sysattr_value(dev, "current_snap"); | |
90 | ||
91 | if (!pool_name || !image_name || !snap_name) | |
92 | return std::nullopt; | |
93 | ||
94 | return std::make_optional<krbd_spec>( | |
95 | pool_name, nspace_name ?: "", image_name, snap_name); | |
96 | } | |
97 | ||
7c673cae FG |
98 | static string get_kernel_rbd_name(const char *id) |
99 | { | |
100 | return string("/dev/rbd") + id; | |
101 | } | |
102 | ||
103 | static int sysfs_write_rbd(const char *which, const string& buf) | |
104 | { | |
105 | const string s = string("/sys/bus/rbd/") + which; | |
106 | const string t = s + "_single_major"; | |
107 | int fd; | |
108 | int r; | |
109 | ||
110 | /* | |
111 | * 'add' and 'add_single_major' interfaces are identical, but if rbd | |
112 | * kernel module is new enough and is configured to use single-major | |
113 | * scheme, 'add' is disabled in order to prevent old userspace from | |
114 | * doing weird things at unmap time. | |
115 | * | |
116 | * Same goes for 'remove' vs 'remove_single_major'. | |
117 | */ | |
118 | fd = open(t.c_str(), O_WRONLY); | |
119 | if (fd < 0) { | |
120 | if (errno == ENOENT) { | |
121 | fd = open(s.c_str(), O_WRONLY); | |
122 | if (fd < 0) | |
123 | return -errno; | |
124 | } else { | |
125 | return -errno; | |
126 | } | |
127 | } | |
128 | ||
129 | r = safe_write(fd, buf.c_str(), buf.size()); | |
130 | ||
131 | close(fd); | |
132 | return r; | |
133 | } | |
134 | ||
135 | static int sysfs_write_rbd_add(const string& buf) | |
136 | { | |
137 | return sysfs_write_rbd("add", buf); | |
138 | } | |
139 | ||
140 | static int sysfs_write_rbd_remove(const string& buf) | |
141 | { | |
142 | return sysfs_write_rbd("remove", buf); | |
143 | } | |
144 | ||
145 | static int have_minor_attr(void) | |
146 | { | |
147 | /* | |
148 | * 'minor' attribute was added as part of single_major merge, which | |
149 | * exposed the 'single_major' parameter. 'minor' is always present, | |
150 | * regardless of whether single-major scheme is turned on or not. | |
151 | * | |
152 | * (Something like ver >= KERNEL_VERSION(3, 14, 0) is a no-go because | |
153 | * this has to work with rbd.ko backported to various kernels.) | |
154 | */ | |
155 | return access("/sys/module/rbd/parameters/single_major", F_OK) == 0; | |
156 | } | |
157 | ||
11fdf7f2 TL |
158 | static int build_map_buf(CephContext *cct, const krbd_spec& spec, |
159 | const char *options, string *pbuf) | |
7c673cae FG |
160 | { |
161 | ostringstream oss; | |
162 | int r; | |
163 | ||
164 | MonMap monmap; | |
11fdf7f2 | 165 | r = monmap.build_initial(cct, false, cerr); |
7c673cae FG |
166 | if (r < 0) |
167 | return r; | |
168 | ||
169 | list<entity_addr_t> mon_addr; | |
170 | monmap.list_addrs(mon_addr); | |
171 | ||
172 | for (const auto &p : mon_addr) { | |
173 | if (oss.tellp() > 0) { | |
174 | oss << ","; | |
175 | } | |
176 | oss << p.get_sockaddr(); | |
177 | } | |
178 | ||
179 | oss << " name=" << cct->_conf->name.get_id(); | |
180 | ||
181 | KeyRing keyring; | |
11fdf7f2 TL |
182 | auto auth_client_required = |
183 | cct->_conf.get_val<std::string>("auth_client_required"); | |
184 | if (auth_client_required != "none") { | |
224ce89b | 185 | r = keyring.from_ceph_context(cct); |
11fdf7f2 TL |
186 | auto keyfile = cct->_conf.get_val<std::string>("keyfile"); |
187 | auto key = cct->_conf.get_val<std::string>("key"); | |
188 | if (r == -ENOENT && keyfile.empty() && key.empty()) | |
224ce89b WB |
189 | r = 0; |
190 | if (r < 0) { | |
191 | cerr << "rbd: failed to get secret" << std::endl; | |
192 | return r; | |
193 | } | |
7c673cae FG |
194 | } |
195 | ||
196 | CryptoKey secret; | |
197 | string key_name = string("client.") + cct->_conf->name.get_id(); | |
198 | if (keyring.get_secret(cct->_conf->name, secret)) { | |
199 | string secret_str; | |
200 | secret.encode_base64(secret_str); | |
201 | ||
202 | r = set_kernel_secret(secret_str.c_str(), key_name.c_str()); | |
203 | if (r >= 0) { | |
204 | if (r == 0) | |
205 | cerr << "rbd: warning: secret has length 0" << std::endl; | |
206 | oss << ",key=" << key_name; | |
207 | } else if (r == -ENODEV || r == -ENOSYS) { | |
208 | // running against older kernel; fall back to secret= in options | |
209 | oss << ",secret=" << secret_str; | |
210 | } else { | |
211 | cerr << "rbd: failed to add secret '" << key_name << "' to kernel" | |
212 | << std::endl; | |
213 | return r; | |
214 | } | |
215 | } else if (is_kernel_secret(key_name.c_str())) { | |
216 | oss << ",key=" << key_name; | |
217 | } | |
218 | ||
219 | if (strcmp(options, "") != 0) | |
220 | oss << "," << options; | |
11fdf7f2 TL |
221 | if (!spec.nspace_name.empty()) |
222 | oss << ",_pool_ns=" << spec.nspace_name; | |
7c673cae | 223 | |
11fdf7f2 TL |
224 | oss << " " << spec.pool_name << " " << spec.image_name << " " |
225 | << spec.snap_name; | |
7c673cae FG |
226 | |
227 | *pbuf = oss.str(); | |
228 | return 0; | |
229 | } | |
230 | ||
11fdf7f2 | 231 | static int wait_for_udev_add(struct udev_monitor *mon, const krbd_spec& spec, |
7c673cae FG |
232 | string *pname) |
233 | { | |
11fdf7f2 | 234 | struct udev_device *bus_dev = nullptr; |
7c673cae FG |
235 | |
236 | /* | |
237 | * Catch /sys/devices/rbd/<id>/ and wait for the corresponding | |
238 | * block device to show up. This is necessary because rbd devices | |
239 | * and block devices aren't linked together in our sysfs layout. | |
240 | */ | |
241 | for (;;) { | |
242 | struct pollfd fds[1]; | |
243 | struct udev_device *dev; | |
11fdf7f2 | 244 | int r; |
7c673cae FG |
245 | |
246 | fds[0].fd = udev_monitor_get_fd(mon); | |
247 | fds[0].events = POLLIN; | |
11fdf7f2 TL |
248 | r = poll(fds, 1, POLL_TIMEOUT); |
249 | if (r < 0) | |
7c673cae FG |
250 | return -errno; |
251 | ||
11fdf7f2 TL |
252 | if (r == 0) |
253 | return -ETIMEDOUT; | |
254 | ||
7c673cae FG |
255 | dev = udev_monitor_receive_device(mon); |
256 | if (!dev) | |
257 | continue; | |
258 | ||
259 | if (strcmp(udev_device_get_action(dev), "add") != 0) | |
260 | goto next; | |
261 | ||
262 | if (!bus_dev) { | |
263 | if (strcmp(udev_device_get_subsystem(dev), "rbd") == 0) { | |
11fdf7f2 TL |
264 | auto cur_spec = spec_from_dev(dev); |
265 | if (cur_spec && *cur_spec == spec) { | |
7c673cae FG |
266 | bus_dev = dev; |
267 | continue; | |
268 | } | |
269 | } | |
270 | } else { | |
271 | if (strcmp(udev_device_get_subsystem(dev), "block") == 0) { | |
272 | const char *major = udev_device_get_sysattr_value(bus_dev, "major"); | |
273 | const char *minor = udev_device_get_sysattr_value(bus_dev, "minor"); | |
274 | const char *this_major = udev_device_get_property_value(dev, "MAJOR"); | |
275 | const char *this_minor = udev_device_get_property_value(dev, "MINOR"); | |
276 | ||
11fdf7f2 | 277 | ceph_assert(!minor ^ have_minor_attr()); |
7c673cae FG |
278 | |
279 | if (strcmp(this_major, major) == 0 && | |
280 | (!minor || strcmp(this_minor, minor) == 0)) { | |
281 | string name = get_kernel_rbd_name(udev_device_get_sysname(bus_dev)); | |
282 | ||
11fdf7f2 | 283 | ceph_assert(strcmp(udev_device_get_devnode(dev), name.c_str()) == 0); |
7c673cae FG |
284 | *pname = name; |
285 | ||
286 | udev_device_unref(dev); | |
287 | udev_device_unref(bus_dev); | |
288 | break; | |
289 | } | |
290 | } | |
291 | } | |
292 | ||
293 | next: | |
294 | udev_device_unref(dev); | |
295 | } | |
296 | ||
297 | return 0; | |
298 | } | |
299 | ||
11fdf7f2 TL |
300 | static int do_map(struct udev *udev, const krbd_spec& spec, const string& buf, |
301 | string *pname) | |
7c673cae FG |
302 | { |
303 | struct udev_monitor *mon; | |
304 | int r; | |
305 | ||
306 | mon = udev_monitor_new_from_netlink(udev, "udev"); | |
307 | if (!mon) | |
308 | return -ENOMEM; | |
309 | ||
11fdf7f2 | 310 | r = udev_monitor_filter_add_match_subsystem_devtype(mon, "rbd", nullptr); |
7c673cae FG |
311 | if (r < 0) |
312 | goto out_mon; | |
313 | ||
314 | r = udev_monitor_filter_add_match_subsystem_devtype(mon, "block", "disk"); | |
315 | if (r < 0) | |
316 | goto out_mon; | |
317 | ||
318 | r = udev_monitor_enable_receiving(mon); | |
319 | if (r < 0) | |
320 | goto out_mon; | |
321 | ||
322 | r = sysfs_write_rbd_add(buf); | |
323 | if (r < 0) { | |
324 | cerr << "rbd: sysfs write failed" << std::endl; | |
325 | goto out_mon; | |
326 | } | |
327 | ||
11fdf7f2 | 328 | r = wait_for_udev_add(mon, spec, pname); |
7c673cae FG |
329 | if (r < 0) { |
330 | cerr << "rbd: wait failed" << std::endl; | |
331 | goto out_mon; | |
332 | } | |
333 | ||
334 | out_mon: | |
335 | udev_monitor_unref(mon); | |
336 | return r; | |
337 | } | |
338 | ||
11fdf7f2 TL |
339 | static int map_image(struct krbd_ctx *ctx, const krbd_spec& spec, |
340 | const char *options, string *pname) | |
7c673cae FG |
341 | { |
342 | string buf; | |
343 | int r; | |
344 | ||
11fdf7f2 | 345 | r = build_map_buf(ctx->cct, spec, options, &buf); |
7c673cae FG |
346 | if (r < 0) |
347 | return r; | |
348 | ||
349 | /* | |
350 | * Modprobe rbd kernel module. If it supports single-major device | |
351 | * number allocation scheme, make sure it's turned on. | |
352 | */ | |
353 | if (access("/sys/bus/rbd", F_OK) != 0) { | |
354 | const char *module_options = NULL; | |
355 | if (module_has_param("rbd", "single_major")) | |
356 | module_options = "single_major=Y"; | |
357 | ||
358 | r = module_load("rbd", module_options); | |
359 | if (r) { | |
360 | cerr << "rbd: failed to load rbd kernel module (" << r << ")" | |
361 | << std::endl; | |
362 | /* | |
363 | * Ignore the error: modprobe failing doesn't necessarily prevent | |
364 | * from working. | |
365 | */ | |
366 | } | |
367 | } | |
368 | ||
11fdf7f2 | 369 | return do_map(ctx->udev, spec, buf, pname); |
7c673cae FG |
370 | } |
371 | ||
372 | static int devno_to_krbd_id(struct udev *udev, dev_t devno, string *pid) | |
373 | { | |
374 | struct udev_enumerate *enm; | |
375 | struct udev_list_entry *l; | |
376 | struct udev_device *dev; | |
377 | int r; | |
378 | ||
379 | enm = udev_enumerate_new(udev); | |
380 | if (!enm) | |
381 | return -ENOMEM; | |
382 | ||
383 | r = udev_enumerate_add_match_subsystem(enm, "rbd"); | |
384 | if (r < 0) | |
385 | goto out_enm; | |
386 | ||
387 | r = udev_enumerate_add_match_sysattr(enm, "major", | |
388 | stringify(major(devno)).c_str()); | |
389 | if (r < 0) | |
390 | goto out_enm; | |
391 | ||
392 | if (have_minor_attr()) { | |
393 | r = udev_enumerate_add_match_sysattr(enm, "minor", | |
394 | stringify(minor(devno)).c_str()); | |
395 | if (r < 0) | |
396 | goto out_enm; | |
397 | } | |
398 | ||
399 | r = udev_enumerate_scan_devices(enm); | |
400 | if (r < 0) | |
401 | goto out_enm; | |
402 | ||
403 | l = udev_enumerate_get_list_entry(enm); | |
404 | if (!l) { | |
405 | r = -ENOENT; | |
406 | goto out_enm; | |
407 | } | |
408 | ||
409 | /* make sure there is only one match */ | |
11fdf7f2 | 410 | ceph_assert(!udev_list_entry_get_next(l)); |
7c673cae FG |
411 | |
412 | dev = udev_device_new_from_syspath(udev, udev_list_entry_get_name(l)); | |
413 | if (!dev) { | |
414 | r = -ENOMEM; | |
415 | goto out_enm; | |
416 | } | |
417 | ||
418 | *pid = udev_device_get_sysname(dev); | |
419 | ||
420 | udev_device_unref(dev); | |
421 | out_enm: | |
422 | udev_enumerate_unref(enm); | |
423 | return r; | |
424 | } | |
425 | ||
11fdf7f2 TL |
426 | static int __enumerate_devices(struct udev *udev, const krbd_spec& spec, |
427 | bool match_nspace, struct udev_enumerate **penm) | |
7c673cae FG |
428 | { |
429 | struct udev_enumerate *enm; | |
7c673cae FG |
430 | int r; |
431 | ||
432 | enm = udev_enumerate_new(udev); | |
433 | if (!enm) | |
434 | return -ENOMEM; | |
435 | ||
436 | r = udev_enumerate_add_match_subsystem(enm, "rbd"); | |
437 | if (r < 0) | |
438 | goto out_enm; | |
439 | ||
11fdf7f2 | 440 | r = udev_enumerate_add_match_sysattr(enm, "pool", spec.pool_name.c_str()); |
7c673cae FG |
441 | if (r < 0) |
442 | goto out_enm; | |
443 | ||
11fdf7f2 TL |
444 | if (match_nspace) { |
445 | r = udev_enumerate_add_match_sysattr(enm, "pool_ns", | |
446 | spec.nspace_name.c_str()); | |
447 | } else { | |
448 | /* | |
449 | * Match _only_ devices that don't have pool_ns attribute. | |
450 | * If the kernel supports namespaces, the result will be empty. | |
451 | */ | |
452 | r = udev_enumerate_add_nomatch_sysattr(enm, "pool_ns", nullptr); | |
453 | } | |
454 | if (r < 0) | |
455 | goto out_enm; | |
456 | ||
457 | r = udev_enumerate_add_match_sysattr(enm, "name", spec.image_name.c_str()); | |
7c673cae FG |
458 | if (r < 0) |
459 | goto out_enm; | |
460 | ||
11fdf7f2 TL |
461 | r = udev_enumerate_add_match_sysattr(enm, "current_snap", |
462 | spec.snap_name.c_str()); | |
7c673cae FG |
463 | if (r < 0) |
464 | goto out_enm; | |
465 | ||
466 | r = udev_enumerate_scan_devices(enm); | |
467 | if (r < 0) | |
468 | goto out_enm; | |
469 | ||
11fdf7f2 TL |
470 | *penm = enm; |
471 | return 0; | |
472 | ||
473 | out_enm: | |
474 | udev_enumerate_unref(enm); | |
475 | return r; | |
476 | } | |
477 | ||
478 | static int enumerate_devices(struct udev *udev, const krbd_spec& spec, | |
479 | struct udev_enumerate **penm) | |
480 | { | |
481 | struct udev_enumerate *enm; | |
482 | int r; | |
483 | ||
484 | r = __enumerate_devices(udev, spec, true, &enm); | |
485 | if (r < 0) | |
486 | return r; | |
487 | ||
488 | /* | |
489 | * If no namespace is set, try again with match_nspace=false to | |
490 | * handle older kernels. On a newer kernel the result will remain | |
491 | * the same (i.e. empty). | |
492 | */ | |
493 | if (!udev_enumerate_get_list_entry(enm) && spec.nspace_name.empty()) { | |
494 | udev_enumerate_unref(enm); | |
495 | r = __enumerate_devices(udev, spec, false, &enm); | |
496 | if (r < 0) | |
497 | return r; | |
498 | } | |
499 | ||
500 | *penm = enm; | |
501 | return 0; | |
502 | } | |
503 | ||
504 | static int spec_to_devno_and_krbd_id(struct udev *udev, const krbd_spec& spec, | |
505 | dev_t *pdevno, string *pid) | |
506 | { | |
507 | struct udev_enumerate *enm; | |
508 | struct udev_list_entry *l; | |
509 | struct udev_device *dev; | |
510 | unsigned int maj, min = 0; | |
511 | string err; | |
512 | int r; | |
513 | ||
514 | r = enumerate_devices(udev, spec, &enm); | |
515 | if (r < 0) | |
516 | return r; | |
517 | ||
7c673cae FG |
518 | l = udev_enumerate_get_list_entry(enm); |
519 | if (!l) { | |
520 | r = -ENOENT; | |
521 | goto out_enm; | |
522 | } | |
523 | ||
524 | dev = udev_device_new_from_syspath(udev, udev_list_entry_get_name(l)); | |
525 | if (!dev) { | |
526 | r = -ENOMEM; | |
527 | goto out_enm; | |
528 | } | |
529 | ||
530 | maj = strict_strtoll(udev_device_get_sysattr_value(dev, "major"), 10, &err); | |
531 | if (!err.empty()) { | |
532 | cerr << "rbd: couldn't parse major: " << err << std::endl; | |
533 | r = -EINVAL; | |
534 | goto out_dev; | |
535 | } | |
536 | if (have_minor_attr()) { | |
537 | min = strict_strtoll(udev_device_get_sysattr_value(dev, "minor"), 10, &err); | |
538 | if (!err.empty()) { | |
539 | cerr << "rbd: couldn't parse minor: " << err << std::endl; | |
540 | r = -EINVAL; | |
541 | goto out_dev; | |
542 | } | |
543 | } | |
544 | ||
545 | /* | |
546 | * If an image is mapped more than once don't bother trying to unmap | |
547 | * all devices - let users run unmap the same number of times they | |
548 | * ran map. | |
549 | */ | |
550 | if (udev_list_entry_get_next(l)) | |
11fdf7f2 | 551 | cerr << "rbd: " << spec << ": mapped more than once, unmapping " |
7c673cae FG |
552 | << get_kernel_rbd_name(udev_device_get_sysname(dev)) |
553 | << " only" << std::endl; | |
554 | ||
555 | *pdevno = makedev(maj, min); | |
556 | *pid = udev_device_get_sysname(dev); | |
557 | ||
558 | out_dev: | |
559 | udev_device_unref(dev); | |
560 | out_enm: | |
561 | udev_enumerate_unref(enm); | |
562 | return r; | |
563 | } | |
564 | ||
565 | static string build_unmap_buf(const string& id, const char *options) | |
566 | { | |
567 | string buf(id); | |
568 | if (strcmp(options, "") != 0) { | |
569 | buf += " "; | |
570 | buf += options; | |
571 | } | |
572 | return buf; | |
573 | } | |
574 | ||
575 | static int wait_for_udev_remove(struct udev_monitor *mon, dev_t devno) | |
576 | { | |
577 | for (;;) { | |
578 | struct pollfd fds[1]; | |
579 | struct udev_device *dev; | |
11fdf7f2 | 580 | int r; |
7c673cae FG |
581 | |
582 | fds[0].fd = udev_monitor_get_fd(mon); | |
583 | fds[0].events = POLLIN; | |
11fdf7f2 TL |
584 | r = poll(fds, 1, POLL_TIMEOUT); |
585 | if (r < 0) | |
7c673cae FG |
586 | return -errno; |
587 | ||
11fdf7f2 TL |
588 | if (r == 0) |
589 | return -ETIMEDOUT; | |
590 | ||
7c673cae FG |
591 | dev = udev_monitor_receive_device(mon); |
592 | if (!dev) | |
593 | continue; | |
594 | ||
595 | if (strcmp(udev_device_get_action(dev), "remove") == 0 && | |
596 | udev_device_get_devnum(dev) == devno) { | |
597 | udev_device_unref(dev); | |
598 | break; | |
599 | } | |
600 | ||
601 | udev_device_unref(dev); | |
602 | } | |
603 | ||
604 | return 0; | |
605 | } | |
606 | ||
607 | static int do_unmap(struct udev *udev, dev_t devno, const string& buf) | |
608 | { | |
609 | struct udev_monitor *mon; | |
610 | int r; | |
611 | ||
612 | mon = udev_monitor_new_from_netlink(udev, "udev"); | |
613 | if (!mon) | |
614 | return -ENOMEM; | |
615 | ||
616 | r = udev_monitor_filter_add_match_subsystem_devtype(mon, "block", "disk"); | |
617 | if (r < 0) | |
618 | goto out_mon; | |
619 | ||
620 | r = udev_monitor_enable_receiving(mon); | |
621 | if (r < 0) | |
622 | goto out_mon; | |
623 | ||
624 | /* | |
625 | * On final device close(), kernel sends a block change event, in | |
626 | * response to which udev apparently runs blkid on the device. This | |
627 | * makes unmap fail with EBUSY, if issued right after final close(). | |
628 | * Try to circumvent this with a retry before turning to udev. | |
629 | */ | |
630 | for (int tries = 0; ; tries++) { | |
631 | r = sysfs_write_rbd_remove(buf); | |
632 | if (r >= 0) { | |
633 | break; | |
634 | } else if (r == -EBUSY && tries < 2) { | |
635 | if (!tries) { | |
636 | usleep(250 * 1000); | |
637 | } else { | |
638 | /* | |
639 | * libudev does not provide the "wait until the queue is empty" | |
640 | * API or the sufficient amount of primitives to build it from. | |
641 | */ | |
11fdf7f2 | 642 | string err = run_cmd("udevadm", "settle", "--timeout", "10", (char*)NULL); |
7c673cae FG |
643 | if (!err.empty()) |
644 | cerr << "rbd: " << err << std::endl; | |
645 | } | |
646 | } else { | |
647 | cerr << "rbd: sysfs write failed" << std::endl; | |
648 | goto out_mon; | |
649 | } | |
650 | } | |
651 | ||
652 | r = wait_for_udev_remove(mon, devno); | |
653 | if (r < 0) { | |
654 | cerr << "rbd: wait failed" << std::endl; | |
655 | goto out_mon; | |
656 | } | |
657 | ||
658 | out_mon: | |
659 | udev_monitor_unref(mon); | |
660 | return r; | |
661 | } | |
662 | ||
663 | static int unmap_image(struct krbd_ctx *ctx, const char *devnode, | |
664 | const char *options) | |
665 | { | |
666 | struct stat sb; | |
667 | dev_t wholedevno = 0; | |
668 | string id; | |
669 | int r; | |
670 | ||
671 | if (stat(devnode, &sb) < 0 || !S_ISBLK(sb.st_mode)) { | |
672 | cerr << "rbd: '" << devnode << "' is not a block device" << std::endl; | |
673 | return -EINVAL; | |
674 | } | |
675 | ||
676 | r = blkid_devno_to_wholedisk(sb.st_rdev, NULL, 0, &wholedevno); | |
677 | if (r < 0) { | |
678 | cerr << "rbd: couldn't compute wholedevno: " << cpp_strerror(r) | |
679 | << std::endl; | |
680 | /* | |
681 | * Ignore the error: we are given whole disks most of the time, and | |
682 | * if it turns out this is a partition we will fail later anyway. | |
683 | */ | |
684 | wholedevno = sb.st_rdev; | |
685 | } | |
686 | ||
687 | r = devno_to_krbd_id(ctx->udev, wholedevno, &id); | |
688 | if (r < 0) { | |
689 | if (r == -ENOENT) { | |
690 | cerr << "rbd: '" << devnode << "' is not an rbd device" << std::endl; | |
691 | r = -EINVAL; | |
692 | } | |
693 | return r; | |
694 | } | |
695 | ||
696 | return do_unmap(ctx->udev, wholedevno, build_unmap_buf(id, options)); | |
697 | } | |
698 | ||
11fdf7f2 | 699 | static int unmap_image(struct krbd_ctx *ctx, const krbd_spec& spec, |
7c673cae FG |
700 | const char *options) |
701 | { | |
702 | dev_t devno = 0; | |
703 | string id; | |
704 | int r; | |
705 | ||
11fdf7f2 | 706 | r = spec_to_devno_and_krbd_id(ctx->udev, spec, &devno, &id); |
7c673cae FG |
707 | if (r < 0) { |
708 | if (r == -ENOENT) { | |
11fdf7f2 TL |
709 | cerr << "rbd: " << spec << ": not a mapped image or snapshot" |
710 | << std::endl; | |
7c673cae FG |
711 | r = -EINVAL; |
712 | } | |
713 | return r; | |
714 | } | |
715 | ||
716 | return do_unmap(ctx->udev, devno, build_unmap_buf(id, options)); | |
717 | } | |
718 | ||
719 | static bool dump_one_image(Formatter *f, TextTable *tbl, | |
720 | struct udev_device *dev) | |
721 | { | |
722 | const char *id = udev_device_get_sysname(dev); | |
11fdf7f2 | 723 | auto spec = spec_from_dev(dev); |
7c673cae FG |
724 | string kname = get_kernel_rbd_name(id); |
725 | ||
11fdf7f2 | 726 | if (!spec) |
7c673cae FG |
727 | return false; |
728 | ||
729 | if (f) { | |
11fdf7f2 TL |
730 | f->open_object_section("device"); |
731 | f->dump_string("id", id); | |
732 | f->dump_string("pool", spec->pool_name); | |
733 | f->dump_string("namespace", spec->nspace_name); | |
734 | f->dump_string("name", spec->image_name); | |
735 | f->dump_string("snap", spec->snap_name); | |
7c673cae FG |
736 | f->dump_string("device", kname); |
737 | f->close_section(); | |
738 | } else { | |
11fdf7f2 TL |
739 | *tbl << id << spec->pool_name << spec->nspace_name << spec->image_name |
740 | << spec->snap_name << kname << TextTable::endrow; | |
7c673cae FG |
741 | } |
742 | ||
743 | return true; | |
744 | } | |
745 | ||
746 | static int do_dump(struct udev *udev, Formatter *f, TextTable *tbl) | |
747 | { | |
748 | struct udev_enumerate *enm; | |
11fdf7f2 | 749 | struct udev_list_entry *l = NULL; |
7c673cae FG |
750 | bool have_output = false; |
751 | int r; | |
752 | ||
753 | enm = udev_enumerate_new(udev); | |
754 | if (!enm) | |
755 | return -ENOMEM; | |
756 | ||
757 | r = udev_enumerate_add_match_subsystem(enm, "rbd"); | |
758 | if (r < 0) | |
759 | goto out_enm; | |
760 | ||
761 | r = udev_enumerate_scan_devices(enm); | |
762 | if (r < 0) | |
763 | goto out_enm; | |
764 | ||
765 | udev_list_entry_foreach(l, udev_enumerate_get_list_entry(enm)) { | |
766 | struct udev_device *dev; | |
767 | ||
768 | dev = udev_device_new_from_syspath(udev, udev_list_entry_get_name(l)); | |
769 | if (dev) { | |
770 | have_output |= dump_one_image(f, tbl, dev); | |
771 | udev_device_unref(dev); | |
772 | } | |
773 | } | |
774 | ||
775 | r = have_output; | |
776 | out_enm: | |
777 | udev_enumerate_unref(enm); | |
778 | return r; | |
779 | } | |
780 | ||
781 | int dump_images(struct krbd_ctx *ctx, Formatter *f) | |
782 | { | |
783 | TextTable tbl; | |
784 | int r; | |
785 | ||
786 | if (f) { | |
11fdf7f2 | 787 | f->open_array_section("devices"); |
7c673cae FG |
788 | } else { |
789 | tbl.define_column("id", TextTable::LEFT, TextTable::LEFT); | |
790 | tbl.define_column("pool", TextTable::LEFT, TextTable::LEFT); | |
11fdf7f2 | 791 | tbl.define_column("namespace", TextTable::LEFT, TextTable::LEFT); |
7c673cae FG |
792 | tbl.define_column("image", TextTable::LEFT, TextTable::LEFT); |
793 | tbl.define_column("snap", TextTable::LEFT, TextTable::LEFT); | |
794 | tbl.define_column("device", TextTable::LEFT, TextTable::LEFT); | |
795 | } | |
796 | ||
797 | r = do_dump(ctx->udev, f, &tbl); | |
798 | ||
799 | if (f) { | |
800 | f->close_section(); | |
801 | f->flush(cout); | |
802 | } else { | |
803 | if (r > 0) | |
804 | cout << tbl; | |
805 | } | |
806 | ||
807 | return r; | |
808 | } | |
809 | ||
11fdf7f2 TL |
810 | static int is_mapped_image(struct udev *udev, const krbd_spec& spec, |
811 | string *pname) | |
812 | { | |
813 | struct udev_enumerate *enm; | |
814 | struct udev_list_entry *l; | |
815 | int r; | |
816 | ||
817 | r = enumerate_devices(udev, spec, &enm); | |
818 | if (r < 0) | |
819 | return r; | |
820 | ||
821 | l = udev_enumerate_get_list_entry(enm); | |
822 | if (l) { | |
823 | struct udev_device *dev; | |
824 | ||
825 | dev = udev_device_new_from_syspath(udev, udev_list_entry_get_name(l)); | |
826 | if (!dev) { | |
827 | r = -ENOMEM; | |
828 | goto out_enm; | |
829 | } | |
830 | ||
831 | r = 1; | |
832 | *pname = get_kernel_rbd_name(udev_device_get_sysname(dev)); | |
833 | udev_device_unref(dev); | |
834 | } else { | |
835 | r = 0; /* not mapped */ | |
836 | } | |
837 | ||
838 | out_enm: | |
839 | udev_enumerate_unref(enm); | |
840 | return r; | |
841 | } | |
842 | ||
7c673cae FG |
843 | extern "C" int krbd_create_from_context(rados_config_t cct, |
844 | struct krbd_ctx **pctx) | |
845 | { | |
846 | struct krbd_ctx *ctx = new struct krbd_ctx(); | |
847 | ||
848 | ctx->cct = reinterpret_cast<CephContext *>(cct); | |
849 | ctx->udev = udev_new(); | |
850 | if (!ctx->udev) { | |
851 | delete ctx; | |
852 | return -ENOMEM; | |
853 | } | |
854 | ||
855 | *pctx = ctx; | |
856 | return 0; | |
857 | } | |
858 | ||
859 | extern "C" void krbd_destroy(struct krbd_ctx *ctx) | |
860 | { | |
861 | if (!ctx) | |
862 | return; | |
863 | ||
864 | udev_unref(ctx->udev); | |
865 | ||
866 | delete ctx; | |
867 | } | |
868 | ||
11fdf7f2 TL |
869 | extern "C" int krbd_map(struct krbd_ctx *ctx, |
870 | const char *pool_name, | |
871 | const char *nspace_name, | |
872 | const char *image_name, | |
873 | const char *snap_name, | |
874 | const char *options, | |
875 | char **pdevnode) | |
7c673cae | 876 | { |
11fdf7f2 | 877 | krbd_spec spec(pool_name, nspace_name, image_name, snap_name); |
7c673cae FG |
878 | string name; |
879 | char *devnode; | |
880 | int r; | |
881 | ||
11fdf7f2 | 882 | r = map_image(ctx, spec, options, &name); |
7c673cae FG |
883 | if (r < 0) |
884 | return r; | |
885 | ||
886 | devnode = strdup(name.c_str()); | |
887 | if (!devnode) | |
888 | return -ENOMEM; | |
889 | ||
890 | *pdevnode = devnode; | |
891 | return r; | |
892 | } | |
893 | ||
894 | extern "C" int krbd_unmap(struct krbd_ctx *ctx, const char *devnode, | |
895 | const char *options) | |
896 | { | |
897 | return unmap_image(ctx, devnode, options); | |
898 | } | |
899 | ||
11fdf7f2 TL |
900 | extern "C" int krbd_unmap_by_spec(struct krbd_ctx *ctx, |
901 | const char *pool_name, | |
902 | const char *nspace_name, | |
903 | const char *image_name, | |
904 | const char *snap_name, | |
7c673cae FG |
905 | const char *options) |
906 | { | |
11fdf7f2 TL |
907 | krbd_spec spec(pool_name, nspace_name, image_name, snap_name); |
908 | return unmap_image(ctx, spec, options); | |
7c673cae FG |
909 | } |
910 | ||
911 | int krbd_showmapped(struct krbd_ctx *ctx, Formatter *f) | |
912 | { | |
913 | return dump_images(ctx, f); | |
914 | } | |
11fdf7f2 TL |
915 | |
916 | extern "C" int krbd_is_mapped(struct krbd_ctx *ctx, | |
917 | const char *pool_name, | |
918 | const char *nspace_name, | |
919 | const char *image_name, | |
920 | const char *snap_name, | |
921 | char **pdevnode) | |
922 | { | |
923 | krbd_spec spec(pool_name, nspace_name, image_name, snap_name); | |
924 | string name; | |
925 | char *devnode; | |
926 | int r; | |
927 | ||
928 | r = is_mapped_image(ctx->udev, spec, &name); | |
929 | if (r <= 0) /* error or not mapped */ | |
930 | return r; | |
931 | ||
932 | devnode = strdup(name.c_str()); | |
933 | if (!devnode) | |
934 | return -ENOMEM; | |
935 | ||
936 | *pdevnode = devnode; | |
937 | return r; | |
938 | } |