]> git.proxmox.com Git - mirror_zfs.git/blob - lib/libzutil/os/linux/zutil_import_os.c
zpool: Add slot power control, print power status
[mirror_zfs.git] / lib / libzutil / os / linux / zutil_import_os.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
25 * Copyright 2015 RackTop Systems.
26 * Copyright (c) 2016, Intel Corporation.
27 */
28
29 /*
30 * Pool import support functions.
31 *
32 * Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
33 * these commands are expected to run in the global zone, we can assume
34 * that the devices are all readable when called.
35 *
36 * To import a pool, we rely on reading the configuration information from the
37 * ZFS label of each device. If we successfully read the label, then we
38 * organize the configuration information in the following hierarchy:
39 *
40 * pool guid -> toplevel vdev guid -> label txg
41 *
42 * Duplicate entries matching this same tuple will be discarded. Once we have
43 * examined every device, we pick the best label txg config for each toplevel
44 * vdev. We then arrange these toplevel vdevs into a complete pool config, and
45 * update any paths that have changed. Finally, we attempt to import the pool
46 * using our derived config, and record the results.
47 */
48
49 #include <ctype.h>
50 #include <dirent.h>
51 #include <errno.h>
52 #include <libintl.h>
53 #include <libgen.h>
54 #include <stddef.h>
55 #include <stdlib.h>
56 #include <stdio.h>
57 #include <string.h>
58 #include <sys/stat.h>
59 #include <unistd.h>
60 #include <fcntl.h>
61 #include <sys/dktp/fdisk.h>
62 #include <sys/vdev_impl.h>
63 #include <sys/fs/zfs.h>
64
65 #include <thread_pool.h>
66 #include <libzutil.h>
67 #include <libnvpair.h>
68 #include <libzfs.h>
69
70 #include "zutil_import.h"
71
72 #ifdef HAVE_LIBUDEV
73 #include <libudev.h>
74 #include <sched.h>
75 #endif
76 #include <blkid/blkid.h>
77
78 #define DEV_BYID_PATH "/dev/disk/by-id/"
79
80 /*
81 * Skip devices with well known prefixes:
82 * there can be side effects when opening devices which need to be avoided.
83 *
84 * hpet - High Precision Event Timer
85 * watchdog[N] - Watchdog must be closed in a special way.
86 */
87 static boolean_t
88 should_skip_dev(const char *dev)
89 {
90 return ((strcmp(dev, "watchdog") == 0) ||
91 (strncmp(dev, "watchdog", 8) == 0 && isdigit(dev[8])) ||
92 (strcmp(dev, "hpet") == 0));
93 }
94
95 int
96 zfs_dev_flush(int fd)
97 {
98 return (ioctl(fd, BLKFLSBUF));
99 }
100
101 void
102 zpool_open_func(void *arg)
103 {
104 rdsk_node_t *rn = arg;
105 libpc_handle_t *hdl = rn->rn_hdl;
106 struct stat64 statbuf;
107 nvlist_t *config;
108 uint64_t vdev_guid = 0;
109 int error;
110 int num_labels = 0;
111 int fd;
112
113 if (should_skip_dev(zfs_basename(rn->rn_name)))
114 return;
115
116 /*
117 * Ignore failed stats. We only want regular files and block devices.
118 * Ignore files that are too small to hold a zpool.
119 */
120 if (stat64(rn->rn_name, &statbuf) != 0 ||
121 (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)) ||
122 (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE))
123 return;
124
125 /*
126 * Preferentially open using O_DIRECT to bypass the block device
127 * cache which may be stale for multipath devices. An EINVAL errno
128 * indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
129 */
130 fd = open(rn->rn_name, O_RDONLY | O_DIRECT | O_CLOEXEC);
131 if ((fd < 0) && (errno == EINVAL))
132 fd = open(rn->rn_name, O_RDONLY | O_CLOEXEC);
133 if ((fd < 0) && (errno == EACCES))
134 hdl->lpc_open_access_error = B_TRUE;
135 if (fd < 0)
136 return;
137
138 error = zpool_read_label(fd, &config, &num_labels);
139 if (error != 0) {
140 (void) close(fd);
141 return;
142 }
143
144 if (num_labels == 0) {
145 (void) close(fd);
146 nvlist_free(config);
147 return;
148 }
149
150 /*
151 * Check that the vdev is for the expected guid. Additional entries
152 * are speculatively added based on the paths stored in the labels.
153 * Entries with valid paths but incorrect guids must be removed.
154 */
155 error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
156 if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
157 (void) close(fd);
158 nvlist_free(config);
159 return;
160 }
161
162 (void) close(fd);
163
164 rn->rn_config = config;
165 rn->rn_num_labels = num_labels;
166
167 /*
168 * Add additional entries for paths described by this label.
169 */
170 if (rn->rn_labelpaths) {
171 const char *path = NULL;
172 const char *devid = NULL;
173 rdsk_node_t *slice;
174 avl_index_t where;
175 int error;
176
177 if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
178 return;
179
180 /*
181 * Allow devlinks to stabilize so all paths are available.
182 */
183 zpool_disk_wait(rn->rn_name);
184
185 if (path != NULL) {
186 slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
187 slice->rn_name = zutil_strdup(hdl, path);
188 slice->rn_vdev_guid = vdev_guid;
189 slice->rn_avl = rn->rn_avl;
190 slice->rn_hdl = hdl;
191 slice->rn_order = IMPORT_ORDER_PREFERRED_1;
192 slice->rn_labelpaths = B_FALSE;
193 pthread_mutex_lock(rn->rn_lock);
194 if (avl_find(rn->rn_avl, slice, &where)) {
195 pthread_mutex_unlock(rn->rn_lock);
196 free(slice->rn_name);
197 free(slice);
198 } else {
199 avl_insert(rn->rn_avl, slice, where);
200 pthread_mutex_unlock(rn->rn_lock);
201 zpool_open_func(slice);
202 }
203 }
204
205 if (devid != NULL) {
206 slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
207 error = asprintf(&slice->rn_name, "%s%s",
208 DEV_BYID_PATH, devid);
209 if (error == -1) {
210 free(slice);
211 return;
212 }
213
214 slice->rn_vdev_guid = vdev_guid;
215 slice->rn_avl = rn->rn_avl;
216 slice->rn_hdl = hdl;
217 slice->rn_order = IMPORT_ORDER_PREFERRED_2;
218 slice->rn_labelpaths = B_FALSE;
219 pthread_mutex_lock(rn->rn_lock);
220 if (avl_find(rn->rn_avl, slice, &where)) {
221 pthread_mutex_unlock(rn->rn_lock);
222 free(slice->rn_name);
223 free(slice);
224 } else {
225 avl_insert(rn->rn_avl, slice, where);
226 pthread_mutex_unlock(rn->rn_lock);
227 zpool_open_func(slice);
228 }
229 }
230 }
231 }
232
233 static const char * const
234 zpool_default_import_path[] = {
235 "/dev/disk/by-vdev", /* Custom rules, use first if they exist */
236 "/dev/mapper", /* Use multipath devices before components */
237 "/dev/disk/by-partlabel", /* Single unique entry set by user */
238 "/dev/disk/by-partuuid", /* Generated partition uuid */
239 "/dev/disk/by-label", /* Custom persistent labels */
240 "/dev/disk/by-uuid", /* Single unique entry and persistent */
241 "/dev/disk/by-id", /* May be multiple entries and persistent */
242 "/dev/disk/by-path", /* Encodes physical location and persistent */
243 "/dev" /* UNSAFE device names will change */
244 };
245
246 const char * const *
247 zpool_default_search_paths(size_t *count)
248 {
249 *count = ARRAY_SIZE(zpool_default_import_path);
250 return (zpool_default_import_path);
251 }
252
253 /*
254 * Given a full path to a device determine if that device appears in the
255 * import search path. If it does return the first match and store the
256 * index in the passed 'order' variable, otherwise return an error.
257 */
258 static int
259 zfs_path_order(const char *name, int *order)
260 {
261 const char *env = getenv("ZPOOL_IMPORT_PATH");
262
263 if (env) {
264 for (int i = 0; ; ++i) {
265 env += strspn(env, ":");
266 size_t dirlen = strcspn(env, ":");
267 if (dirlen) {
268 if (strncmp(name, env, dirlen) == 0) {
269 *order = i;
270 return (0);
271 }
272
273 env += dirlen;
274 } else
275 break;
276 }
277 } else {
278 for (int i = 0; i < ARRAY_SIZE(zpool_default_import_path);
279 ++i) {
280 if (strncmp(name, zpool_default_import_path[i],
281 strlen(zpool_default_import_path[i])) == 0) {
282 *order = i;
283 return (0);
284 }
285 }
286 }
287
288 return (ENOENT);
289 }
290
291 /*
292 * Use libblkid to quickly enumerate all known zfs devices.
293 */
294 int
295 zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
296 avl_tree_t **slice_cache)
297 {
298 rdsk_node_t *slice;
299 blkid_cache cache;
300 blkid_dev_iterate iter;
301 blkid_dev dev;
302 avl_index_t where;
303 int error;
304
305 *slice_cache = NULL;
306
307 error = blkid_get_cache(&cache, NULL);
308 if (error != 0)
309 return (error);
310
311 error = blkid_probe_all_new(cache);
312 if (error != 0) {
313 blkid_put_cache(cache);
314 return (error);
315 }
316
317 iter = blkid_dev_iterate_begin(cache);
318 if (iter == NULL) {
319 blkid_put_cache(cache);
320 return (EINVAL);
321 }
322
323 /* Only const char *s since 2.32 */
324 error = blkid_dev_set_search(iter,
325 (char *)"TYPE", (char *)"zfs_member");
326 if (error != 0) {
327 blkid_dev_iterate_end(iter);
328 blkid_put_cache(cache);
329 return (error);
330 }
331
332 *slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
333 avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
334 offsetof(rdsk_node_t, rn_node));
335
336 while (blkid_dev_next(iter, &dev) == 0) {
337 slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
338 slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev));
339 slice->rn_vdev_guid = 0;
340 slice->rn_lock = lock;
341 slice->rn_avl = *slice_cache;
342 slice->rn_hdl = hdl;
343 slice->rn_labelpaths = B_TRUE;
344
345 error = zfs_path_order(slice->rn_name, &slice->rn_order);
346 if (error == 0)
347 slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
348 else
349 slice->rn_order = IMPORT_ORDER_DEFAULT;
350
351 pthread_mutex_lock(lock);
352 if (avl_find(*slice_cache, slice, &where)) {
353 free(slice->rn_name);
354 free(slice);
355 } else {
356 avl_insert(*slice_cache, slice, where);
357 }
358 pthread_mutex_unlock(lock);
359 }
360
361 blkid_dev_iterate_end(iter);
362 blkid_put_cache(cache);
363
364 return (0);
365 }
366
367 /*
368 * Linux persistent device strings for vdev labels
369 *
370 * based on libudev for consistency with libudev disk add/remove events
371 */
372
373 typedef struct vdev_dev_strs {
374 char vds_devid[128];
375 char vds_devphys[128];
376 } vdev_dev_strs_t;
377
378 #ifdef HAVE_LIBUDEV
379
380 /*
381 * Obtain the persistent device id string (describes what)
382 *
383 * used by ZED vdev matching for auto-{online,expand,replace}
384 */
385 int
386 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
387 {
388 struct udev_list_entry *entry;
389 const char *bus;
390 char devbyid[MAXPATHLEN];
391
392 /* The bus based by-id path is preferred */
393 bus = udev_device_get_property_value(dev, "ID_BUS");
394
395 if (bus == NULL) {
396 const char *dm_uuid;
397
398 /*
399 * For multipath nodes use the persistent uuid based identifier
400 *
401 * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
402 */
403 dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
404 if (dm_uuid != NULL) {
405 (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
406 return (0);
407 }
408
409 /*
410 * For volumes use the persistent /dev/zvol/dataset identifier
411 */
412 entry = udev_device_get_devlinks_list_entry(dev);
413 while (entry != NULL) {
414 const char *name;
415
416 name = udev_list_entry_get_name(entry);
417 if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
418 (void) strlcpy(bufptr, name, buflen);
419 return (0);
420 }
421 entry = udev_list_entry_get_next(entry);
422 }
423
424 /*
425 * NVME 'by-id' symlinks are similar to bus case
426 */
427 struct udev_device *parent;
428
429 parent = udev_device_get_parent_with_subsystem_devtype(dev,
430 "nvme", NULL);
431 if (parent != NULL)
432 bus = "nvme"; /* continue with bus symlink search */
433 else
434 return (ENODATA);
435 }
436
437 /*
438 * locate the bus specific by-id link
439 */
440 (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
441 entry = udev_device_get_devlinks_list_entry(dev);
442 while (entry != NULL) {
443 const char *name;
444
445 name = udev_list_entry_get_name(entry);
446 if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
447 name += strlen(DEV_BYID_PATH);
448 (void) strlcpy(bufptr, name, buflen);
449 return (0);
450 }
451 entry = udev_list_entry_get_next(entry);
452 }
453
454 return (ENODATA);
455 }
456
457 /*
458 * Obtain the persistent physical location string (describes where)
459 *
460 * used by ZED vdev matching for auto-{online,expand,replace}
461 */
462 int
463 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
464 {
465 const char *physpath = NULL;
466 struct udev_list_entry *entry;
467
468 /*
469 * Normal disks use ID_PATH for their physical path.
470 */
471 physpath = udev_device_get_property_value(dev, "ID_PATH");
472 if (physpath != NULL && strlen(physpath) > 0) {
473 (void) strlcpy(bufptr, physpath, buflen);
474 return (0);
475 }
476
477 /*
478 * Device mapper devices are virtual and don't have a physical
479 * path. For them we use ID_VDEV instead, which is setup via the
480 * /etc/vdev_id.conf file. ID_VDEV provides a persistent path
481 * to a virtual device. If you don't have vdev_id.conf setup,
482 * you cannot use multipath autoreplace with device mapper.
483 */
484 physpath = udev_device_get_property_value(dev, "ID_VDEV");
485 if (physpath != NULL && strlen(physpath) > 0) {
486 (void) strlcpy(bufptr, physpath, buflen);
487 return (0);
488 }
489
490 /*
491 * For ZFS volumes use the persistent /dev/zvol/dataset identifier
492 */
493 entry = udev_device_get_devlinks_list_entry(dev);
494 while (entry != NULL) {
495 physpath = udev_list_entry_get_name(entry);
496 if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
497 (void) strlcpy(bufptr, physpath, buflen);
498 return (0);
499 }
500 entry = udev_list_entry_get_next(entry);
501 }
502
503 /*
504 * For all other devices fallback to using the by-uuid name.
505 */
506 entry = udev_device_get_devlinks_list_entry(dev);
507 while (entry != NULL) {
508 physpath = udev_list_entry_get_name(entry);
509 if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
510 (void) strlcpy(bufptr, physpath, buflen);
511 return (0);
512 }
513 entry = udev_list_entry_get_next(entry);
514 }
515
516 return (ENODATA);
517 }
518
519 /*
520 * A disk is considered a multipath whole disk when:
521 * DEVNAME key value has "dm-"
522 * DM_NAME key value has "mpath" prefix
523 * DM_UUID key exists
524 * ID_PART_TABLE_TYPE key does not exist or is not gpt
525 */
526 static boolean_t
527 udev_mpath_whole_disk(struct udev_device *dev)
528 {
529 const char *devname, *type, *uuid;
530
531 devname = udev_device_get_property_value(dev, "DEVNAME");
532 type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
533 uuid = udev_device_get_property_value(dev, "DM_UUID");
534
535 if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
536 ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
537 (uuid != NULL)) {
538 return (B_TRUE);
539 }
540
541 return (B_FALSE);
542 }
543
544 static int
545 udev_device_is_ready(struct udev_device *dev)
546 {
547 #ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
548 return (udev_device_get_is_initialized(dev));
549 #else
550 /* wait for DEVLINKS property to be initialized */
551 return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
552 #endif
553 }
554
555 #else
556
557 int
558 zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
559 {
560 (void) dev, (void) bufptr, (void) buflen;
561 return (ENODATA);
562 }
563
564 int
565 zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
566 {
567 (void) dev, (void) bufptr, (void) buflen;
568 return (ENODATA);
569 }
570
571 #endif /* HAVE_LIBUDEV */
572
573 /*
574 * Wait up to timeout_ms for udev to set up the device node. The device is
575 * considered ready when libudev determines it has been initialized, all of
576 * the device links have been verified to exist, and it has been allowed to
577 * settle. At this point the device can be accessed reliably. Depending on
578 * the complexity of the udev rules this process could take several seconds.
579 */
580 int
581 zpool_label_disk_wait(const char *path, int timeout_ms)
582 {
583 #ifdef HAVE_LIBUDEV
584 struct udev *udev;
585 struct udev_device *dev = NULL;
586 char nodepath[MAXPATHLEN];
587 char *sysname = NULL;
588 int ret = ENODEV;
589 int settle_ms = 50;
590 long sleep_ms = 10;
591 hrtime_t start, settle;
592
593 if ((udev = udev_new()) == NULL)
594 return (ENXIO);
595
596 start = gethrtime();
597 settle = 0;
598
599 do {
600 if (sysname == NULL) {
601 if (realpath(path, nodepath) != NULL) {
602 sysname = strrchr(nodepath, '/') + 1;
603 } else {
604 (void) usleep(sleep_ms * MILLISEC);
605 continue;
606 }
607 }
608
609 dev = udev_device_new_from_subsystem_sysname(udev,
610 "block", sysname);
611 if ((dev != NULL) && udev_device_is_ready(dev)) {
612 struct udev_list_entry *links, *link = NULL;
613
614 ret = 0;
615 links = udev_device_get_devlinks_list_entry(dev);
616
617 udev_list_entry_foreach(link, links) {
618 struct stat64 statbuf;
619 const char *name;
620
621 name = udev_list_entry_get_name(link);
622 errno = 0;
623 if (stat64(name, &statbuf) == 0 && errno == 0)
624 continue;
625
626 settle = 0;
627 ret = ENODEV;
628 break;
629 }
630
631 if (ret == 0) {
632 if (settle == 0) {
633 settle = gethrtime();
634 } else if (NSEC2MSEC(gethrtime() - settle) >=
635 settle_ms) {
636 udev_device_unref(dev);
637 break;
638 }
639 }
640 }
641
642 udev_device_unref(dev);
643 (void) usleep(sleep_ms * MILLISEC);
644
645 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
646
647 udev_unref(udev);
648
649 return (ret);
650 #else
651 int settle_ms = 50;
652 long sleep_ms = 10;
653 hrtime_t start, settle;
654 struct stat64 statbuf;
655
656 start = gethrtime();
657 settle = 0;
658
659 do {
660 errno = 0;
661 if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
662 if (settle == 0)
663 settle = gethrtime();
664 else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
665 return (0);
666 } else if (errno != ENOENT) {
667 return (errno);
668 }
669
670 usleep(sleep_ms * MILLISEC);
671 } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
672
673 return (ENODEV);
674 #endif /* HAVE_LIBUDEV */
675 }
676
677 /*
678 * Simplified version of zpool_label_disk_wait() where we wait for a device
679 * to appear using the default timeouts.
680 */
681 int
682 zpool_disk_wait(const char *path)
683 {
684 int timeout;
685 timeout = zpool_getenv_int("ZPOOL_IMPORT_UDEV_TIMEOUT_MS",
686 DISK_LABEL_WAIT);
687
688 return (zpool_label_disk_wait(path, timeout));
689 }
690
691 /*
692 * Encode the persistent devices strings
693 * used for the vdev disk label
694 */
695 static int
696 encode_device_strings(const char *path, vdev_dev_strs_t *ds,
697 boolean_t wholedisk)
698 {
699 #ifdef HAVE_LIBUDEV
700 struct udev *udev;
701 struct udev_device *dev = NULL;
702 char nodepath[MAXPATHLEN];
703 char *sysname;
704 int ret = ENODEV;
705 hrtime_t start;
706
707 if ((udev = udev_new()) == NULL)
708 return (ENXIO);
709
710 /* resolve path to a runtime device node instance */
711 if (realpath(path, nodepath) == NULL)
712 goto no_dev;
713
714 sysname = strrchr(nodepath, '/') + 1;
715
716 /*
717 * Wait up to 3 seconds for udev to set up the device node context
718 */
719 start = gethrtime();
720 do {
721 dev = udev_device_new_from_subsystem_sysname(udev, "block",
722 sysname);
723 if (dev == NULL)
724 goto no_dev;
725 if (udev_device_is_ready(dev))
726 break; /* udev ready */
727
728 udev_device_unref(dev);
729 dev = NULL;
730
731 if (NSEC2MSEC(gethrtime() - start) < 10)
732 (void) sched_yield(); /* yield/busy wait up to 10ms */
733 else
734 (void) usleep(10 * MILLISEC);
735
736 } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
737
738 if (dev == NULL)
739 goto no_dev;
740
741 /*
742 * Only whole disks require extra device strings
743 */
744 if (!wholedisk && !udev_mpath_whole_disk(dev))
745 goto no_dev;
746
747 ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
748 if (ret != 0)
749 goto no_dev_ref;
750
751 /* physical location string (optional) */
752 if (zfs_device_get_physical(dev, ds->vds_devphys,
753 sizeof (ds->vds_devphys)) != 0) {
754 ds->vds_devphys[0] = '\0'; /* empty string --> not available */
755 }
756
757 no_dev_ref:
758 udev_device_unref(dev);
759 no_dev:
760 udev_unref(udev);
761
762 return (ret);
763 #else
764 (void) path;
765 (void) ds;
766 (void) wholedisk;
767 return (ENOENT);
768 #endif
769 }
770
771 /*
772 * Rescan the enclosure sysfs path for turning on enclosure LEDs and store it
773 * in the nvlist * (if applicable). Like:
774 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
775 *
776 * If an old path was in the nvlist, and the rescan can not find a new path,
777 * then keep the old path, since the disk may have been removed.
778 *
779 * path: The vdev path (value from ZPOOL_CONFIG_PATH)
780 * key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH)
781 */
782 void
783 update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
784 const char *key)
785 {
786 char *upath, *spath;
787 const char *oldpath = NULL;
788
789 (void) nvlist_lookup_string(nv, key, &oldpath);
790
791 /* Add enclosure sysfs path (if disk is in an enclosure). */
792 upath = zfs_get_underlying_path(path);
793 spath = zfs_get_enclosure_sysfs_path(upath);
794
795 if (spath) {
796 (void) nvlist_add_string(nv, key, spath);
797 } else {
798 /*
799 * We couldn't dynamically scan the disk's enclosure sysfs path.
800 * This could be because the disk went away. If there's an old
801 * enclosure sysfs path in the nvlist, then keep using it.
802 */
803 if (!oldpath) {
804 (void) nvlist_remove_all(nv, key);
805 }
806 }
807
808 free(upath);
809 free(spath);
810 }
811
812 /*
813 * This will get called for each leaf vdev.
814 */
815 static int
816 sysfs_path_pool_vdev_iter_f(void *hdl_data, nvlist_t *nv, void *data)
817 {
818 (void) hdl_data, (void) data;
819
820 const char *path = NULL;
821 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
822 return (1);
823
824 /* Rescan our enclosure sysfs path for this vdev */
825 update_vdev_config_dev_sysfs_path(nv, path,
826 ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
827 return (0);
828 }
829
830 /*
831 * Given an nvlist for our pool (with vdev tree), iterate over all the
832 * leaf vdevs and update their ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH.
833 */
834 void
835 update_vdevs_config_dev_sysfs_path(nvlist_t *config)
836 {
837 nvlist_t *nvroot = NULL;
838 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
839 &nvroot) == 0);
840 for_each_vdev_in_nvlist(nvroot, sysfs_path_pool_vdev_iter_f, NULL);
841 }
842
843 /*
844 * Update a leaf vdev's persistent device strings
845 *
846 * - only applies for a dedicated leaf vdev (aka whole disk)
847 * - updated during pool create|add|attach|import
848 * - used for matching device matching during auto-{online,expand,replace}
849 * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
850 * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
851 *
852 * single device node example:
853 * devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1'
854 * phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
855 *
856 * multipath device node example:
857 * devid: 'dm-uuid-mpath-35000c5006304de3f'
858 *
859 * We also store the enclosure sysfs path for turning on enclosure LEDs
860 * (if applicable):
861 * vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
862 */
863 void
864 update_vdev_config_dev_strs(nvlist_t *nv)
865 {
866 vdev_dev_strs_t vds;
867 const char *env, *type, *path;
868 uint64_t wholedisk = 0;
869
870 /*
871 * For the benefit of legacy ZFS implementations, allow
872 * for opting out of devid strings in the vdev label.
873 *
874 * example use:
875 * env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
876 *
877 * explanation:
878 * Older OpenZFS implementations had issues when attempting to
879 * display pool config VDEV names if a "devid" NVP value is
880 * present in the pool's config.
881 *
882 * For example, a pool that originated on illumos platform would
883 * have a devid value in the config and "zpool status" would fail
884 * when listing the config.
885 *
886 * A pool can be stripped of any "devid" values on import or
887 * prevented from adding them on zpool create|add by setting
888 * ZFS_VDEV_DEVID_OPT_OUT.
889 */
890 env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
891 if (env && (strtoul(env, NULL, 0) > 0 ||
892 !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
893 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
894 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
895 return;
896 }
897
898 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
899 strcmp(type, VDEV_TYPE_DISK) != 0) {
900 return;
901 }
902 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
903 return;
904 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
905
906 /*
907 * Update device string values in the config nvlist.
908 */
909 if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
910 (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
911 if (vds.vds_devphys[0] != '\0') {
912 (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
913 vds.vds_devphys);
914 }
915 update_vdev_config_dev_sysfs_path(nv, path,
916 ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
917 } else {
918 /* Clear out any stale entries. */
919 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
920 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
921 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
922 }
923 }