]> git.proxmox.com Git - mirror_zfs-debian.git/blobdiff - lib/libzfs/libzfs_import.c
New upstream version 0.7.2
[mirror_zfs-debian.git] / lib / libzfs / libzfs_import.c
index 0d4240406ce46be73813e2f376566b696439dd82..f371d925bc59c4223089260bcb482fdc0700035b 100644 (file)
  * CDDL HEADER END
  */
 /*
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+ * Copyright 2015 RackTop Systems.
+ * Copyright (c) 2016, Intel Corporation.
  */
 
 /*
 #include <dirent.h>
 #include <errno.h>
 #include <libintl.h>
+#ifdef HAVE_LIBUDEV
+#include <libudev.h>
+#include <sched.h>
+#endif
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/vtoc.h>
 #include <sys/dktp/fdisk.h>
 #include <sys/efi_partition.h>
-
 #include <sys/vdev_impl.h>
-#ifdef HAVE_LIBBLKID
 #include <blkid/blkid.h>
-#endif
-
 #include "libzfs.h"
 #include "libzfs_impl.h"
+#include <libzfs.h>
 
 /*
  * Intermediate structures used to gather configuration information.
@@ -99,30 +102,427 @@ typedef struct pool_list {
 
 #define        DEV_BYID_PATH   "/dev/disk/by-id/"
 
-static char *
-get_devid(const char *path)
+/*
+ * Linux persistent device strings for vdev labels
+ *
+ * based on libudev for consistency with libudev disk add/remove events
+ */
+#ifdef HAVE_LIBUDEV
+
+typedef struct vdev_dev_strs {
+       char    vds_devid[128];
+       char    vds_devphys[128];
+} vdev_dev_strs_t;
+
+/*
+ * Obtain the persistent device id string (describes what)
+ *
+ * used by ZED vdev matching for auto-{online,expand,replace}
+ */
+int
+zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
 {
-       int fd;
-       ddi_devid_t devid;
-       char *minor, *ret;
+       struct udev_list_entry *entry;
+       const char *bus;
+       char devbyid[MAXPATHLEN];
 
-       if ((fd = open(path, O_RDONLY)) < 0)
-               return (NULL);
+       /* The bus based by-id path is preferred */
+       bus = udev_device_get_property_value(dev, "ID_BUS");
+
+       if (bus == NULL) {
+               const char *dm_uuid;
 
-       minor = NULL;
-       ret = NULL;
-       if (devid_get(fd, &devid) == 0) {
-               if (devid_get_minor_name(fd, &minor) == 0)
-                       ret = devid_str_encode(devid, minor);
-               if (minor != NULL)
-                       devid_str_free(minor);
-               devid_free(devid);
+               /*
+                * For multipath nodes use the persistent uuid based identifier
+                *
+                * Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
+                */
+               dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
+               if (dm_uuid != NULL) {
+                       (void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
+                       return (0);
+               }
+               return (ENODATA);
        }
-       (void) close(fd);
+
+       /*
+        * locate the bus specific by-id link
+        */
+       (void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
+       entry = udev_device_get_devlinks_list_entry(dev);
+       while (entry != NULL) {
+               const char *name;
+
+               name = udev_list_entry_get_name(entry);
+               if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
+                       name += strlen(DEV_BYID_PATH);
+                       (void) strlcpy(bufptr, name, buflen);
+                       return (0);
+               }
+               entry = udev_list_entry_get_next(entry);
+       }
+
+       return (ENODATA);
+}
+
+/*
+ * Obtain the persistent physical location string (describes where)
+ *
+ * used by ZED vdev matching for auto-{online,expand,replace}
+ */
+int
+zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
+{
+       const char *physpath = NULL;
+
+       /*
+        * Normal disks use ID_PATH for their physical path.  Device mapper
+        * devices are virtual and don't have a physical path.  For them we
+        * use ID_VDEV instead, which is setup via the /etc/vdev_id.conf file.
+        * ID_VDEV provides a persistent path to a virtual device.  If you
+        * don't have vdev_id.conf setup, you cannot use multipath autoreplace.
+        */
+       if (!((physpath = udev_device_get_property_value(dev, "ID_PATH")) &&
+           physpath[0])) {
+               if (!((physpath =
+                   udev_device_get_property_value(dev, "ID_VDEV")) &&
+                   physpath[0])) {
+                       return (ENODATA);
+               }
+       }
+
+       (void) strlcpy(bufptr, physpath, buflen);
+
+       return (0);
+}
+
+boolean_t
+udev_is_mpath(struct udev_device *dev)
+{
+       return udev_device_get_property_value(dev, "DM_UUID") &&
+           udev_device_get_property_value(dev, "MPATH_SBIN_PATH");
+}
+
+/*
+ * A disk is considered a multipath whole disk when:
+ *     DEVNAME key value has "dm-"
+ *     DM_NAME key value has "mpath" prefix
+ *     DM_UUID key exists
+ *     ID_PART_TABLE_TYPE key does not exist or is not gpt
+ */
+static boolean_t
+udev_mpath_whole_disk(struct udev_device *dev)
+{
+       const char *devname, *type, *uuid;
+
+       devname = udev_device_get_property_value(dev, "DEVNAME");
+       type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
+       uuid = udev_device_get_property_value(dev, "DM_UUID");
+
+       if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
+           ((type == NULL) || (strcmp(type, "gpt") != 0)) &&
+           (uuid != NULL)) {
+               return (B_TRUE);
+       }
+
+       return (B_FALSE);
+}
+
+/*
+ * Check if a disk is effectively a multipath whole disk
+ */
+boolean_t
+is_mpath_whole_disk(const char *path)
+{
+       struct udev *udev;
+       struct udev_device *dev = NULL;
+       char nodepath[MAXPATHLEN];
+       char *sysname;
+       boolean_t wholedisk = B_FALSE;
+
+       if (realpath(path, nodepath) == NULL)
+               return (B_FALSE);
+       sysname = strrchr(nodepath, '/') + 1;
+       if (strncmp(sysname, "dm-", 3) != 0)
+               return (B_FALSE);
+       if ((udev = udev_new()) == NULL)
+               return (B_FALSE);
+       if ((dev = udev_device_new_from_subsystem_sysname(udev, "block",
+           sysname)) == NULL) {
+               udev_device_unref(dev);
+               return (B_FALSE);
+       }
+
+       wholedisk = udev_mpath_whole_disk(dev);
+
+       udev_device_unref(dev);
+       return (wholedisk);
+}
+
+static int
+udev_device_is_ready(struct udev_device *dev)
+{
+#ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
+       return (udev_device_get_is_initialized(dev));
+#else
+       /* wait for DEVLINKS property to be initialized */
+       return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
+#endif
+}
+
+/*
+ * Wait up to timeout_ms for udev to set up the device node.  The device is
+ * considered ready when libudev determines it has been initialized, all of
+ * the device links have been verified to exist, and it has been allowed to
+ * settle.  At this point the device the device can be accessed reliably.
+ * Depending on the complexity of the udev rules this process could take
+ * several seconds.
+ */
+int
+zpool_label_disk_wait(char *path, int timeout_ms)
+{
+       struct udev *udev;
+       struct udev_device *dev = NULL;
+       char nodepath[MAXPATHLEN];
+       char *sysname = NULL;
+       int ret = ENODEV;
+       int settle_ms = 50;
+       long sleep_ms = 10;
+       hrtime_t start, settle;
+
+       if ((udev = udev_new()) == NULL)
+               return (ENXIO);
+
+       start = gethrtime();
+       settle = 0;
+
+       do {
+               if (sysname == NULL) {
+                       if (realpath(path, nodepath) != NULL) {
+                               sysname = strrchr(nodepath, '/') + 1;
+                       } else {
+                               (void) usleep(sleep_ms * MILLISEC);
+                               continue;
+                       }
+               }
+
+               dev = udev_device_new_from_subsystem_sysname(udev,
+                   "block", sysname);
+               if ((dev != NULL) && udev_device_is_ready(dev)) {
+                       struct udev_list_entry *links, *link;
+
+                       ret = 0;
+                       links = udev_device_get_devlinks_list_entry(dev);
+
+                       udev_list_entry_foreach(link, links) {
+                               struct stat64 statbuf;
+                               const char *name;
+
+                               name = udev_list_entry_get_name(link);
+                               errno = 0;
+                               if (stat64(name, &statbuf) == 0 && errno == 0)
+                                       continue;
+
+                               settle = 0;
+                               ret = ENODEV;
+                               break;
+                       }
+
+                       if (ret == 0) {
+                               if (settle == 0) {
+                                       settle = gethrtime();
+                               } else if (NSEC2MSEC(gethrtime() - settle) >=
+                                   settle_ms) {
+                                       udev_device_unref(dev);
+                                       break;
+                               }
+                       }
+               }
+
+               udev_device_unref(dev);
+               (void) usleep(sleep_ms * MILLISEC);
+
+       } while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
+
+       udev_unref(udev);
+
+       return (ret);
+}
+
+
+/*
+ * Encode the persistent devices strings
+ * used for the vdev disk label
+ */
+static int
+encode_device_strings(const char *path, vdev_dev_strs_t *ds,
+    boolean_t wholedisk)
+{
+       struct udev *udev;
+       struct udev_device *dev = NULL;
+       char nodepath[MAXPATHLEN];
+       char *sysname;
+       int ret = ENODEV;
+       hrtime_t start;
+
+       if ((udev = udev_new()) == NULL)
+               return (ENXIO);
+
+       /* resolve path to a runtime device node instance */
+       if (realpath(path, nodepath) == NULL)
+               goto no_dev;
+
+       sysname = strrchr(nodepath, '/') + 1;
+
+       /*
+        * Wait up to 3 seconds for udev to set up the device node context
+        */
+       start = gethrtime();
+       do {
+               dev = udev_device_new_from_subsystem_sysname(udev, "block",
+                   sysname);
+               if (dev == NULL)
+                       goto no_dev;
+               if (udev_device_is_ready(dev))
+                       break;  /* udev ready */
+
+               udev_device_unref(dev);
+               dev = NULL;
+
+               if (NSEC2MSEC(gethrtime() - start) < 10)
+                       (void) sched_yield();   /* yield/busy wait up to 10ms */
+               else
+                       (void) usleep(10 * MILLISEC);
+
+       } while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
+
+       if (dev == NULL)
+               goto no_dev;
+
+       /*
+        * Only whole disks require extra device strings
+        */
+       if (!wholedisk && !udev_mpath_whole_disk(dev))
+               goto no_dev;
+
+       ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
+       if (ret != 0)
+               goto no_dev_ref;
+
+       /* physical location string (optional) */
+       if (zfs_device_get_physical(dev, ds->vds_devphys,
+           sizeof (ds->vds_devphys)) != 0) {
+               ds->vds_devphys[0] = '\0'; /* empty string --> not available */
+       }
+
+no_dev_ref:
+       udev_device_unref(dev);
+no_dev:
+       udev_unref(udev);
 
        return (ret);
 }
 
+/*
+ * Update a leaf vdev's persistent device strings (Linux only)
+ *
+ * - only applies for a dedicated leaf vdev (aka whole disk)
+ * - updated during pool create|add|attach|import
+ * - used for matching device matching during auto-{online,expand,replace}
+ * - stored in a leaf disk config label (i.e. alongside 'path' NVP)
+ * - these strings are currently not used in kernel (i.e. for vdev_disk_open)
+ *
+ * single device node example:
+ *     devid:          'scsi-MG03SCA300_350000494a8cb3d67-part1'
+ *     phys_path:      'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
+ *
+ * multipath device node example:
+ *     devid:          'dm-uuid-mpath-35000c5006304de3f'
+ *
+ * We also store the enclosure sysfs path for turning on enclosure LEDs
+ * (if applicable):
+ *     vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
+ */
+void
+update_vdev_config_dev_strs(nvlist_t *nv)
+{
+       vdev_dev_strs_t vds;
+       char *env, *type, *path;
+       uint64_t wholedisk = 0;
+       char *upath, *spath;
+
+       /*
+        * For the benefit of legacy ZFS implementations, allow
+        * for opting out of devid strings in the vdev label.
+        *
+        * example use:
+        *      env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
+        *
+        * explanation:
+        * Older ZFS on Linux implementations had issues when attempting to
+        * display pool config VDEV names if a "devid" NVP value is present
+        * in the pool's config.
+        *
+        * For example, a pool that originated on illumos platform would
+        * have a devid value in the config and "zpool status" would fail
+        * when listing the config.
+        *
+        * A pool can be stripped of any "devid" values on import or
+        * prevented from adding them on zpool create|add by setting
+        * ZFS_VDEV_DEVID_OPT_OUT.
+        */
+       env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
+       if (env && (strtoul(env, NULL, 0) > 0 ||
+           !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
+               (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+               (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
+               return;
+       }
+
+       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
+           strcmp(type, VDEV_TYPE_DISK) != 0) {
+               return;
+       }
+       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
+               return;
+       (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
+
+       /*
+        * Update device string values in config nvlist
+        */
+       if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
+               (void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
+               if (vds.vds_devphys[0] != '\0') {
+                       (void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
+                           vds.vds_devphys);
+               }
+
+               /* Add enclosure sysfs path (if disk is in an enclosure) */
+               upath = zfs_get_underlying_path(path);
+               spath = zfs_get_enclosure_sysfs_path(upath);
+               if (spath)
+                       nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
+                           spath);
+               else
+                       nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
+
+               free(upath);
+               free(spath);
+       } else {
+               /* clear out any stale entries */
+               (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
+               (void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
+               (void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
+       }
+}
+#else
+
+boolean_t
+is_mpath_whole_disk(const char *path)
+{
+       return (B_FALSE);
+}
+
 /*
  * Wait up to timeout_ms for udev to set up the device node.  The device is
  * considered ready when the provided path have been verified to exist and
@@ -158,6 +558,13 @@ zpool_label_disk_wait(char *path, int timeout_ms)
        return (ENODEV);
 }
 
+void
+update_vdev_config_dev_strs(nvlist_t *nv)
+{
+}
+
+#endif /* HAVE_LIBUDEV */
+
 /*
  * Go through and fix up any path and/or devid information for the given vdev
  * configuration.
@@ -169,7 +576,7 @@ fix_paths(nvlist_t *nv, name_entry_t *names)
        uint_t c, children;
        uint64_t guid;
        name_entry_t *ne, *best;
-       char *path, *devid;
+       char *path;
 
        if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
            &child, &children) == 0) {
@@ -235,13 +642,8 @@ fix_paths(nvlist_t *nv, name_entry_t *names)
        if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
                return (-1);
 
-       if ((devid = get_devid(best->ne_name)) == NULL) {
-               (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
-       } else {
-               if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0)
-                       return (-1);
-               devid_str_free(devid);
-       }
+       /* Linux only - update ZPOOL_CONFIG_DEVID and ZPOOL_CONFIG_PHYS_PATH */
+       update_vdev_config_dev_strs(nv);
 
        return (0);
 }
@@ -268,11 +670,14 @@ add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
            &state) == 0 &&
            (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
            nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
-               if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+               if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) {
+                       nvlist_free(config);
                        return (-1);
+               }
 
                if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
                        free(ne);
+                       nvlist_free(config);
                        return (-1);
                }
                ne->ne_guid = vdev_guid;
@@ -280,6 +685,7 @@ add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
                ne->ne_num_labels = num_labels;
                ne->ne_next = pl->names;
                pl->names = ne;
+               nvlist_free(config);
                return (0);
        }
 
@@ -387,118 +793,6 @@ add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
        return (0);
 }
 
-#ifdef HAVE_LIBBLKID
-static int
-add_path(libzfs_handle_t *hdl, pool_list_t *pools, uint64_t pool_guid,
-    uint64_t vdev_guid, const char *path, int order)
-{
-       nvlist_t *label;
-       uint64_t guid;
-       int error, fd, num_labels;
-
-       fd = open64(path, O_RDONLY);
-       if (fd < 0)
-               return (errno);
-
-       error = zpool_read_label(fd, &label, &num_labels);
-       close(fd);
-
-       if (error || label == NULL)
-               return (ENOENT);
-
-       error = nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, &guid);
-       if (error || guid != pool_guid) {
-               nvlist_free(label);
-               return (EINVAL);
-       }
-
-       error = nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid);
-       if (error || guid != vdev_guid) {
-               nvlist_free(label);
-               return (EINVAL);
-       }
-
-       error = add_config(hdl, pools, path, order, num_labels, label);
-
-       return (error);
-}
-
-static int
-add_configs_from_label_impl(libzfs_handle_t *hdl, pool_list_t *pools,
-    nvlist_t *nvroot, uint64_t pool_guid, uint64_t vdev_guid)
-{
-       char udevpath[MAXPATHLEN];
-       char *path;
-       nvlist_t **child;
-       uint_t c, children;
-       uint64_t guid;
-       int error;
-
-       if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
-           &child, &children) == 0) {
-               for (c = 0; c < children; c++) {
-                       error  = add_configs_from_label_impl(hdl, pools,
-                           child[c], pool_guid, vdev_guid);
-                       if (error)
-                               return (error);
-               }
-               return (0);
-       }
-
-       if (nvroot == NULL)
-               return (0);
-
-       error = nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_GUID, &guid);
-       if ((error != 0) || (guid != vdev_guid))
-               return (0);
-
-       error = nvlist_lookup_string(nvroot, ZPOOL_CONFIG_PATH, &path);
-       if (error == 0)
-               (void) add_path(hdl, pools, pool_guid, vdev_guid, path, 0);
-
-       error = nvlist_lookup_string(nvroot, ZPOOL_CONFIG_DEVID, &path);
-       if (error == 0) {
-               sprintf(udevpath, "%s%s", DEV_BYID_PATH, path);
-               (void) add_path(hdl, pools, pool_guid, vdev_guid, udevpath, 1);
-       }
-
-       return (0);
-}
-
-/*
- * Given a disk label call add_config() for all known paths to the device
- * as described by the label itself.  The paths are added in the following
- * priority order: 'path', 'devid', 'devnode'.  As these alternate paths are
- * added the labels are verified to make sure they refer to the same device.
- */
-static int
-add_configs_from_label(libzfs_handle_t *hdl, pool_list_t *pools,
-    char *devname, int num_labels, nvlist_t *label)
-{
-       nvlist_t *nvroot;
-       uint64_t pool_guid;
-       uint64_t vdev_guid;
-       int error;
-
-       if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_VDEV_TREE, &nvroot) ||
-           nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, &pool_guid) ||
-           nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &vdev_guid))
-               return (ENOENT);
-
-       /* Allow devlinks to stabilize so all paths are available. */
-       zpool_label_disk_wait(devname, DISK_LABEL_WAIT);
-
-       /* Add alternate paths as described by the label vdev_tree. */
-       (void) add_configs_from_label_impl(hdl, pools, nvroot,
-           pool_guid, vdev_guid);
-
-       /* Add the device node /dev/sdX path as a last resort. */
-       error = add_config(hdl, pools, devname, 100, num_labels, label);
-
-       return (error);
-}
-#endif /* HAVE_LIBBLKID */
-
 /*
  * Returns true if the named pool matches the given GUID.
  */
@@ -531,13 +825,14 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
 {
        nvlist_t *nvl;
        zfs_cmd_t zc = {"\0"};
-       int err;
+       int err, dstbuf_size;
 
        if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
                return (NULL);
 
-       if (zcmd_alloc_dst_nvlist(hdl, &zc,
-           zc.zc_nvlist_conf_size * 2) != 0) {
+       dstbuf_size = MAX(CONFIG_BUF_MINSIZE, zc.zc_nvlist_conf_size * 4);
+
+       if (zcmd_alloc_dst_nvlist(hdl, &zc, dstbuf_size) != 0) {
                zcmd_free_nvlists(&zc);
                return (NULL);
        }
@@ -822,8 +1117,10 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
                                    nvlist_add_uint64(holey,
                                    ZPOOL_CONFIG_ID, c) != 0 ||
                                    nvlist_add_uint64(holey,
-                                   ZPOOL_CONFIG_GUID, 0ULL) != 0)
+                                   ZPOOL_CONFIG_GUID, 0ULL) != 0) {
+                                       nvlist_free(holey);
                                        goto nomem;
+                               }
                                child[c] = holey;
                        }
                }
@@ -1019,6 +1316,7 @@ zpool_read_label(int fd, nvlist_t **config, int *num_labels)
        vdev_label_t *label;
        nvlist_t *expected_config = NULL;
        uint64_t expected_guid = 0, size;
+       int error;
 
        *config = NULL;
 
@@ -1026,7 +1324,8 @@ zpool_read_label(int fd, nvlist_t **config, int *num_labels)
                return (0);
        size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
 
-       if ((label = malloc(sizeof (vdev_label_t))) == NULL)
+       error = posix_memalign((void **)&label, PAGESIZE, sizeof (*label));
+       if (error)
                return (-1);
 
        for (l = 0; l < VDEV_LABELS; l++) {
@@ -1080,6 +1379,270 @@ zpool_read_label(int fd, nvlist_t **config, int *num_labels)
        return (0);
 }
 
+typedef struct rdsk_node {
+       char *rn_name;                  /* Full path to device */
+       int rn_order;                   /* Preferred order (low to high) */
+       int rn_num_labels;              /* Number of valid labels */
+       uint64_t rn_vdev_guid;          /* Expected vdev guid when set */
+       libzfs_handle_t *rn_hdl;
+       nvlist_t *rn_config;            /* Label config */
+       avl_tree_t *rn_avl;
+       avl_node_t rn_node;
+       kmutex_t *rn_lock;
+       boolean_t rn_labelpaths;
+} rdsk_node_t;
+
+/*
+ * Sorted by vdev guid and full path to allow for multiple entries with
+ * the same full path name.  This is required because it's possible to
+ * have multiple block devices with labels that refer to the same
+ * ZPOOL_CONFIG_PATH yet have different vdev guids.  In this case both
+ * entries need to be added to the cache.  Scenarios where this can occur
+ * include overwritten pool labels, devices which are visible from multiple
+ * hosts and multipath devices.
+ */
+static int
+slice_cache_compare(const void *arg1, const void *arg2)
+{
+       const char  *nm1 = ((rdsk_node_t *)arg1)->rn_name;
+       const char  *nm2 = ((rdsk_node_t *)arg2)->rn_name;
+       uint64_t guid1 = ((rdsk_node_t *)arg1)->rn_vdev_guid;
+       uint64_t guid2 = ((rdsk_node_t *)arg2)->rn_vdev_guid;
+       int rv;
+
+       rv = AVL_CMP(guid1, guid2);
+       if (rv)
+               return (rv);
+
+       return (AVL_ISIGN(strcmp(nm1, nm2)));
+}
+
+static boolean_t
+is_watchdog_dev(char *dev)
+{
+       /* For 'watchdog' dev */
+       if (strcmp(dev, "watchdog") == 0)
+               return (B_TRUE);
+
+       /* For 'watchdog<digit><whatever> */
+       if (strstr(dev, "watchdog") == dev && isdigit(dev[8]))
+               return (B_TRUE);
+
+       return (B_FALSE);
+}
+
+static int
+label_paths_impl(libzfs_handle_t *hdl, nvlist_t *nvroot, uint64_t pool_guid,
+    uint64_t vdev_guid, char **path, char **devid)
+{
+       nvlist_t **child;
+       uint_t c, children;
+       uint64_t guid;
+       char *val;
+       int error;
+
+       if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+           &child, &children) == 0) {
+               for (c = 0; c < children; c++) {
+                       error  = label_paths_impl(hdl, child[c],
+                           pool_guid, vdev_guid, path, devid);
+                       if (error)
+                               return (error);
+               }
+               return (0);
+       }
+
+       if (nvroot == NULL)
+               return (0);
+
+       error = nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_GUID, &guid);
+       if ((error != 0) || (guid != vdev_guid))
+               return (0);
+
+       error = nvlist_lookup_string(nvroot, ZPOOL_CONFIG_PATH, &val);
+       if (error == 0)
+               *path = val;
+
+       error = nvlist_lookup_string(nvroot, ZPOOL_CONFIG_DEVID, &val);
+       if (error == 0)
+               *devid = val;
+
+       return (0);
+}
+
+/*
+ * Given a disk label fetch the ZPOOL_CONFIG_PATH and ZPOOL_CONFIG_DEVID
+ * and store these strings as config_path and devid_path respectively.
+ * The returned pointers are only valid as long as label remains valid.
+ */
+static int
+label_paths(libzfs_handle_t *hdl, nvlist_t *label, char **path, char **devid)
+{
+       nvlist_t *nvroot;
+       uint64_t pool_guid;
+       uint64_t vdev_guid;
+
+       *path = NULL;
+       *devid = NULL;
+
+       if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_VDEV_TREE, &nvroot) ||
+           nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, &pool_guid) ||
+           nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &vdev_guid))
+               return (ENOENT);
+
+       return (label_paths_impl(hdl, nvroot, pool_guid, vdev_guid, path,
+           devid));
+}
+
+static void
+zpool_open_func(void *arg)
+{
+       rdsk_node_t *rn = arg;
+       libzfs_handle_t *hdl = rn->rn_hdl;
+       struct stat64 statbuf;
+       nvlist_t *config;
+       char *bname, *dupname;
+       uint64_t vdev_guid = 0;
+       int error;
+       int num_labels;
+       int fd;
+
+       /*
+        * Skip devices with well known prefixes there can be side effects
+        * when opening devices which need to be avoided.
+        *
+        * hpet     - High Precision Event Timer
+        * watchdog - Watchdog must be closed in a special way.
+        */
+       dupname = zfs_strdup(hdl, rn->rn_name);
+       bname = basename(dupname);
+       error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname));
+       free(dupname);
+       if (error)
+               return;
+
+       /*
+        * Ignore failed stats.  We only want regular files and block devices.
+        */
+       if (stat64(rn->rn_name, &statbuf) != 0 ||
+           (!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)))
+               return;
+
+       /*
+        * Preferentially open using O_DIRECT to bypass the block device
+        * cache which may be stale for multipath devices.  An EINVAL errno
+        * indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
+        */
+       fd = open(rn->rn_name, O_RDONLY | O_DIRECT);
+       if ((fd < 0) && (errno == EINVAL))
+               fd = open(rn->rn_name, O_RDONLY);
+
+       if (fd < 0)
+               return;
+
+       /*
+        * This file is too small to hold a zpool
+        */
+       if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) {
+               (void) close(fd);
+               return;
+       }
+
+       error = zpool_read_label(fd, &config, &num_labels);
+       if (error != 0) {
+               (void) close(fd);
+               return;
+       }
+
+       if (num_labels == 0) {
+               (void) close(fd);
+               nvlist_free(config);
+               return;
+       }
+
+       /*
+        * Check that the vdev is for the expected guid.  Additional entries
+        * are speculatively added based on the paths stored in the labels.
+        * Entries with valid paths but incorrect guids must be removed.
+        */
+       error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
+       if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
+               (void) close(fd);
+               nvlist_free(config);
+               return;
+       }
+
+       (void) close(fd);
+
+       rn->rn_config = config;
+       rn->rn_num_labels = num_labels;
+
+       /*
+        * Add additional entries for paths described by this label.
+        */
+       if (rn->rn_labelpaths) {
+               char *path = NULL;
+               char *devid = NULL;
+               rdsk_node_t *slice;
+               avl_index_t where;
+               int error;
+
+               if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
+                       return;
+
+               /*
+                * Allow devlinks to stabilize so all paths are available.
+                */
+               zpool_label_disk_wait(rn->rn_name, DISK_LABEL_WAIT);
+
+               if (path != NULL) {
+                       slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
+                       slice->rn_name = zfs_strdup(hdl, path);
+                       slice->rn_vdev_guid = vdev_guid;
+                       slice->rn_avl = rn->rn_avl;
+                       slice->rn_hdl = hdl;
+                       slice->rn_order = IMPORT_ORDER_PREFERRED_1;
+                       slice->rn_labelpaths = B_FALSE;
+                       mutex_enter(rn->rn_lock);
+                       if (avl_find(rn->rn_avl, slice, &where)) {
+                       mutex_exit(rn->rn_lock);
+                               free(slice->rn_name);
+                               free(slice);
+                       } else {
+                               avl_insert(rn->rn_avl, slice, where);
+                               mutex_exit(rn->rn_lock);
+                               zpool_open_func(slice);
+                       }
+               }
+
+               if (devid != NULL) {
+                       slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
+                       error = asprintf(&slice->rn_name, "%s%s",
+                           DEV_BYID_PATH, devid);
+                       if (error == -1) {
+                               free(slice);
+                               return;
+                       }
+
+                       slice->rn_vdev_guid = vdev_guid;
+                       slice->rn_avl = rn->rn_avl;
+                       slice->rn_hdl = hdl;
+                       slice->rn_order = IMPORT_ORDER_PREFERRED_2;
+                       slice->rn_labelpaths = B_FALSE;
+                       mutex_enter(rn->rn_lock);
+                       if (avl_find(rn->rn_avl, slice, &where)) {
+                               mutex_exit(rn->rn_lock);
+                               free(slice->rn_name);
+                               free(slice);
+                       } else {
+                               avl_insert(rn->rn_avl, slice, where);
+                               mutex_exit(rn->rn_lock);
+                               zpool_open_func(slice);
+                       }
+               }
+       }
+}
+
 /*
  * Given a file descriptor, clear (zero) the label information.  This function
  * is used in the appliance stack as part of the ZFS sysevent module and
@@ -1112,82 +1675,172 @@ zpool_clear_label(int fd)
        return (0);
 }
 
-#ifdef HAVE_LIBBLKID
 /*
- * Use libblkid to quickly search for zfs devices
+ * Scan a list of directories for zfs devices.
  */
 static int
-zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools)
+zpool_find_import_scan(libzfs_handle_t *hdl, kmutex_t *lock,
+    avl_tree_t **slice_cache, char **dir, int dirs)
 {
+       avl_tree_t *cache;
+       rdsk_node_t *slice;
+       void *cookie;
+       int i, error;
+
+       *slice_cache = NULL;
+       cache = zfs_alloc(hdl, sizeof (avl_tree_t));
+       avl_create(cache, slice_cache_compare, sizeof (rdsk_node_t),
+           offsetof(rdsk_node_t, rn_node));
+
+       for (i = 0; i < dirs; i++) {
+               char path[MAXPATHLEN];
+               struct dirent64 *dp;
+               DIR *dirp;
+
+               if (realpath(dir[i], path) == NULL) {
+                       error = errno;
+                       if (error == ENOENT)
+                               continue;
+
+                       zfs_error_aux(hdl, strerror(error));
+                       (void) zfs_error_fmt(hdl, EZFS_BADPATH, dgettext(
+                           TEXT_DOMAIN, "cannot resolve path '%s'"), dir[i]);
+                       goto error;
+               }
+
+               dirp = opendir(path);
+               if (dirp == NULL) {
+                       error = errno;
+                       zfs_error_aux(hdl, strerror(error));
+                       (void) zfs_error_fmt(hdl, EZFS_BADPATH,
+                           dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
+                       goto error;
+               }
+
+               while ((dp = readdir64(dirp)) != NULL) {
+                       const char *name = dp->d_name;
+                       if (name[0] == '.' &&
+                           (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
+                               continue;
+
+                       slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
+                       error = asprintf(&slice->rn_name, "%s/%s", path, name);
+                       if (error == -1) {
+                               free(slice);
+                               continue;
+                       }
+                       slice->rn_vdev_guid = 0;
+                       slice->rn_lock = lock;
+                       slice->rn_avl = cache;
+                       slice->rn_hdl = hdl;
+                       slice->rn_order = i + IMPORT_ORDER_SCAN_OFFSET;
+                       slice->rn_labelpaths = B_FALSE;
+                       mutex_enter(lock);
+                       avl_add(cache, slice);
+                       mutex_exit(lock);
+               }
+
+               (void) closedir(dirp);
+       }
+
+       *slice_cache = cache;
+       return (0);
+
+error:
+       cookie = NULL;
+       while ((slice = avl_destroy_nodes(cache, &cookie)) != NULL) {
+               free(slice->rn_name);
+               free(slice);
+       }
+       free(cache);
+
+       return (error);
+}
+
+/*
+ * Use libblkid to quickly enumerate all known zfs devices.
+ */
+static int
+zpool_find_import_blkid(libzfs_handle_t *hdl, kmutex_t *lock,
+    avl_tree_t **slice_cache)
+{
+       rdsk_node_t *slice;
        blkid_cache cache;
        blkid_dev_iterate iter;
        blkid_dev dev;
-       int err;
+       avl_index_t where;
+       int error;
 
-       err = blkid_get_cache(&cache, NULL);
-       if (err != 0) {
-               (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
-                   dgettext(TEXT_DOMAIN, "blkid_get_cache() %d"), err);
-               goto err_blkid1;
-       }
+       *slice_cache = NULL;
 
-       err = blkid_probe_all(cache);
-       if (err != 0) {
-               (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
-                   dgettext(TEXT_DOMAIN, "blkid_probe_all() %d"), err);
-               goto err_blkid2;
+       error = blkid_get_cache(&cache, NULL);
+       if (error != 0)
+               return (error);
+
+       error = blkid_probe_all_new(cache);
+       if (error != 0) {
+               blkid_put_cache(cache);
+               return (error);
        }
 
        iter = blkid_dev_iterate_begin(cache);
        if (iter == NULL) {
-               (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
-                   dgettext(TEXT_DOMAIN, "blkid_dev_iterate_begin()"));
-               goto err_blkid2;
+               blkid_put_cache(cache);
+               return (EINVAL);
        }
 
-       err = blkid_dev_set_search(iter, "TYPE", "zfs_member");
-       if (err != 0) {
-               (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
-                   dgettext(TEXT_DOMAIN, "blkid_dev_set_search() %d"), err);
-               goto err_blkid3;
+       error = blkid_dev_set_search(iter, "TYPE", "zfs_member");
+       if (error != 0) {
+               blkid_dev_iterate_end(iter);
+               blkid_put_cache(cache);
+               return (error);
        }
 
-       while (blkid_dev_next(iter, &dev) == 0) {
-               nvlist_t *label;
-               char *devname;
-               int fd, num_labels;
-
-               devname = (char *) blkid_dev_devname(dev);
-               if ((fd = open64(devname, O_RDONLY)) < 0)
-                       continue;
+       *slice_cache = zfs_alloc(hdl, sizeof (avl_tree_t));
+       avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
+           offsetof(rdsk_node_t, rn_node));
 
-               err = zpool_read_label(fd, &label, &num_labels);
-               (void) close(fd);
-
-               if (err || label == NULL)
-                       continue;
+       while (blkid_dev_next(iter, &dev) == 0) {
+               slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
+               slice->rn_name = zfs_strdup(hdl, blkid_dev_devname(dev));
+               slice->rn_vdev_guid = 0;
+               slice->rn_lock = lock;
+               slice->rn_avl = *slice_cache;
+               slice->rn_hdl = hdl;
+               slice->rn_labelpaths = B_TRUE;
+
+               error = zfs_path_order(slice->rn_name, &slice->rn_order);
+               if (error == 0)
+                       slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
+               else
+                       slice->rn_order = IMPORT_ORDER_DEFAULT;
 
-               add_configs_from_label(hdl, pools, devname, num_labels, label);
+               mutex_enter(lock);
+               if (avl_find(*slice_cache, slice, &where)) {
+                       free(slice->rn_name);
+                       free(slice);
+               } else {
+                       avl_insert(*slice_cache, slice, where);
+               }
+               mutex_exit(lock);
        }
-       err = 0;
 
-err_blkid3:
        blkid_dev_iterate_end(iter);
-err_blkid2:
        blkid_put_cache(cache);
-err_blkid1:
-       return (err);
+
+       return (0);
 }
-#endif /* HAVE_LIBBLKID */
 
 char *
 zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
        "/dev/disk/by-vdev",    /* Custom rules, use first if they exist */
        "/dev/mapper",          /* Use multipath devices before components */
+       "/dev/disk/by-partlabel", /* Single unique entry set by user */
+       "/dev/disk/by-partuuid", /* Generated partition uuid */
+       "/dev/disk/by-label",   /* Custom persistent labels */
        "/dev/disk/by-uuid",    /* Single unique entry and persistent */
        "/dev/disk/by-id",      /* May be multiple entries and persistent */
        "/dev/disk/by-path",    /* Encodes physical location and persistent */
-       "/dev/disk/by-label",   /* Custom persistent labels */
        "/dev"                  /* UNSAFE device names will change */
 };
 
@@ -1201,207 +1854,137 @@ zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
 static nvlist_t *
 zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 {
-       int i, num_labels, dirs = iarg->paths;
-       DIR *dirp = NULL;
-       struct dirent64 *dp;
-       char path[MAXPATHLEN];
-       char *end, **dir = iarg->path;
-       size_t pathleft;
-       struct stat64 statbuf;
-       nvlist_t *ret = NULL, *config;
-       int fd;
+       nvlist_t *ret = NULL;
        pool_list_t pools = { 0 };
        pool_entry_t *pe, *penext;
        vdev_entry_t *ve, *venext;
        config_entry_t *ce, *cenext;
        name_entry_t *ne, *nenext;
+       kmutex_t lock;
+       avl_tree_t *cache;
+       rdsk_node_t *slice;
+       void *cookie;
+       taskq_t *t;
 
        verify(iarg->poolname == NULL || iarg->guid == 0);
+       mutex_init(&lock, NULL, MUTEX_DEFAULT, NULL);
 
-       if (dirs == 0) {
-#ifdef HAVE_LIBBLKID
-               /* Use libblkid to scan all device for their type */
-               if (zpool_find_import_blkid(hdl, &pools) == 0)
-                       goto skip_scanning;
+       /*
+        * Locate pool member vdevs using libblkid or by directory scanning.
+        * On success a newly allocated AVL tree which is populated with an
+        * entry for each discovered vdev will be returned as the cache.
+        * It's the callers responsibility to consume and destroy this tree.
+        */
+       if (iarg->scan || iarg->paths != 0) {
+               int dirs = iarg->paths;
+               char **dir = iarg->path;
 
-               (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
-                   dgettext(TEXT_DOMAIN, "blkid failure falling back "
-                   "to manual probing"));
-#endif /* HAVE_LIBBLKID */
+               if (dirs == 0) {
+                       dir = zpool_default_import_path;
+                       dirs = DEFAULT_IMPORT_PATH_SIZE;
+               }
 
-               dir = zpool_default_import_path;
-               dirs = DEFAULT_IMPORT_PATH_SIZE;
+               if (zpool_find_import_scan(hdl, &lock, &cache, dir,  dirs) != 0)
+                       return (NULL);
+       } else {
+               if (zpool_find_import_blkid(hdl, &lock, &cache) != 0)
+                       return (NULL);
        }
 
        /*
-        * Go through and read the label configuration information from every
-        * possible device, organizing the information according to pool GUID
-        * and toplevel GUID.
+        * Create a thread pool to parallelize the process of reading and
+        * validating labels, a large number of threads can be used due to
+        * minimal contention.
         */
-       for (i = 0; i < dirs; i++) {
-               char *rdsk;
-               int dfd;
-
-               /* use realpath to normalize the path */
-               if (realpath(dir[i], path) == 0) {
-
-                       /* it is safe to skip missing search paths */
-                       if (errno == ENOENT)
-                               continue;
-
-                       zfs_error_aux(hdl, strerror(errno));
-                       (void) zfs_error_fmt(hdl, EZFS_BADPATH,
-                           dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
-                       goto error;
-               }
-               end = &path[strlen(path)];
-               *end++ = '/';
-               *end = 0;
-               pathleft = &path[sizeof (path)] - end;
+       t = taskq_create("z_import", 2 * boot_ncpus, defclsyspri,
+           2 * boot_ncpus, INT_MAX, TASKQ_PREPOPULATE);
 
-               /*
-                * Using raw devices instead of block devices when we're
-                * reading the labels skips a bunch of slow operations during
-                * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
-                */
-               if (strcmp(path, "/dev/dsk/") == 0)
-                       rdsk = "/dev/rdsk/";
-               else
-                       rdsk = path;
+       for (slice = avl_first(cache); slice;
+           (slice = avl_walk(cache, slice, AVL_AFTER)))
+               (void) taskq_dispatch(t, zpool_open_func, slice, TQ_SLEEP);
 
-               if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
-                   (dirp = fdopendir(dfd)) == NULL) {
-                       zfs_error_aux(hdl, strerror(errno));
-                       (void) zfs_error_fmt(hdl, EZFS_BADPATH,
-                           dgettext(TEXT_DOMAIN, "cannot open '%s'"),
-                           rdsk);
-                       goto error;
-               }
+       taskq_wait(t);
+       taskq_destroy(t);
 
-               /*
-                * This is not MT-safe, but we have no MT consumers of libzfs
-                */
-               while ((dp = readdir64(dirp)) != NULL) {
-                       const char *name = dp->d_name;
-                       if (name[0] == '.' &&
-                           (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
-                               continue;
-
-                       /*
-                        * Skip checking devices with well known prefixes:
-                        * watchdog - A special close is required to avoid
-                        *            triggering it and resetting the system.
-                        * fuse     - Fuse control device.
-                        * ppp      - Generic PPP driver.
-                        * tty*     - Generic serial interface.
-                        * vcs*     - Virtual console memory.
-                        * parport* - Parallel port interface.
-                        * lp*      - Printer interface.
-                        * fd*      - Floppy interface.
-                        * hpet     - High Precision Event Timer, crashes qemu
-                        *            when accessed from a virtual machine.
-                        * core     - Symlink to /proc/kcore, causes a crash
-                        *            when access from Xen dom0.
-                        */
-                       if ((strncmp(name, "watchdog", 8) == 0) ||
-                           (strncmp(name, "fuse", 4) == 0) ||
-                           (strncmp(name, "ppp", 3) == 0) ||
-                           (strncmp(name, "tty", 3) == 0) ||
-                           (strncmp(name, "vcs", 3) == 0) ||
-                           (strncmp(name, "parport", 7) == 0) ||
-                           (strncmp(name, "lp", 2) == 0) ||
-                           (strncmp(name, "fd", 2) == 0) ||
-                           (strncmp(name, "hpet", 4) == 0) ||
-                           (strncmp(name, "core", 4) == 0))
-                               continue;
+       /*
+        * Process the cache filtering out any entries which are not
+        * for the specificed pool then adding matching label configs.
+        */
+       cookie = NULL;
+       while ((slice = avl_destroy_nodes(cache, &cookie)) != NULL) {
+               if (slice->rn_config != NULL) {
+                       nvlist_t *config = slice->rn_config;
+                       boolean_t matched = B_TRUE;
+                       boolean_t aux = B_FALSE;
+                       int fd;
 
                        /*
-                        * Ignore failed stats.  We only want regular
-                        * files and block devices.
+                        * Check if it's a spare or l2cache device. If it is,
+                        * we need to skip the name and guid check since they
+                        * don't exist on aux device label.
                         */
-                       if ((fstatat64(dfd, name, &statbuf, 0) != 0) ||
-                           (!S_ISREG(statbuf.st_mode) &&
-                           !S_ISBLK(statbuf.st_mode)))
-                               continue;
-
-                       if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
-                               continue;
-
-                       if ((zpool_read_label(fd, &config, &num_labels))) {
-                               (void) close(fd);
-                               (void) no_memory(hdl);
-                               goto error;
+                       if (iarg->poolname != NULL || iarg->guid != 0) {
+                               uint64_t state;
+                               aux = nvlist_lookup_uint64(config,
+                                   ZPOOL_CONFIG_POOL_STATE, &state) == 0 &&
+                                   (state == POOL_STATE_SPARE ||
+                                   state == POOL_STATE_L2CACHE);
                        }
 
-                       (void) close(fd);
-
-                       if (config != NULL) {
-                               boolean_t matched = B_TRUE;
-                               boolean_t aux = B_FALSE;
+                       if (iarg->poolname != NULL && !aux) {
                                char *pname;
 
+                               matched = nvlist_lookup_string(config,
+                                   ZPOOL_CONFIG_POOL_NAME, &pname) == 0 &&
+                                   strcmp(iarg->poolname, pname) == 0;
+                       } else if (iarg->guid != 0 && !aux) {
+                               uint64_t this_guid;
+
+                               matched = nvlist_lookup_uint64(config,
+                                   ZPOOL_CONFIG_POOL_GUID, &this_guid) == 0 &&
+                                   iarg->guid == this_guid;
+                       }
+                       if (!matched) {
+                               nvlist_free(config);
+                       } else {
                                /*
-                                * Check if it's a spare or l2cache device. If
-                                * it is, we need to skip the name and guid
-                                * check since they don't exist on aux device
-                                * label.
+                                * Verify all remaining entries can be opened
+                                * exclusively. This will prune all underlying
+                                * multipath devices which otherwise could
+                                * result in the vdev appearing as UNAVAIL.
+                                *
+                                * Under zdb, this step isn't required and
+                                * would prevent a zdb -e of active pools with
+                                * no cachefile.
                                 */
-                               if (iarg->poolname != NULL ||
-                                   iarg->guid != 0) {
-                                       uint64_t state;
-                                       aux = nvlist_lookup_uint64(config,
-                                           ZPOOL_CONFIG_POOL_STATE,
-                                           &state) == 0 &&
-                                           (state == POOL_STATE_SPARE ||
-                                           state == POOL_STATE_L2CACHE);
-                               }
-
-                               if ((iarg->poolname != NULL) && !aux &&
-                                   (nvlist_lookup_string(config,
-                                   ZPOOL_CONFIG_POOL_NAME, &pname) == 0)) {
-
-                                       if (strcmp(iarg->poolname, pname))
-                                               matched = B_FALSE;
-
-                               } else if (iarg->guid != 0 && !aux) {
-                                       uint64_t this_guid;
-
-                                       matched = nvlist_lookup_uint64(config,
-                                           ZPOOL_CONFIG_POOL_GUID,
-                                           &this_guid) == 0 &&
-                                           iarg->guid == this_guid;
-                               }
-                               if (!matched) {
+                               fd = open(slice->rn_name, O_RDONLY | O_EXCL);
+                               if (fd >= 0 || iarg->can_be_active) {
+                                       if (fd >= 0)
+                                               close(fd);
+                                       add_config(hdl, &pools,
+                                           slice->rn_name, slice->rn_order,
+                                           slice->rn_num_labels, config);
+                               } else {
                                        nvlist_free(config);
-                                       config = NULL;
-                                       continue;
                                }
-                               /* use the non-raw path for the config */
-                               (void) strlcpy(end, name, pathleft);
-                               if (add_config(hdl, &pools, path, i+1,
-                                   num_labels, config))
-                                       goto error;
                        }
                }
-
-               (void) closedir(dirp);
-               dirp = NULL;
+               free(slice->rn_name);
+               free(slice);
        }
+       avl_destroy(cache);
+       free(cache);
+       mutex_destroy(&lock);
 
-#ifdef HAVE_LIBBLKID
-skip_scanning:
-#endif
        ret = get_configs(hdl, &pools, iarg->can_be_active);
 
-error:
        for (pe = pools.pools; pe != NULL; pe = penext) {
                penext = pe->pe_next;
                for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
                        venext = ve->ve_next;
                        for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
                                cenext = ce->ce_next;
-                               if (ce->ce_config)
-                                       nvlist_free(ce->ce_config);
+                               nvlist_free(ce->ce_config);
                                free(ce);
                        }
                        free(ve);
@@ -1411,14 +1994,10 @@ error:
 
        for (ne = pools.names; ne != NULL; ne = nenext) {
                nenext = ne->ne_next;
-               if (ne->ne_name)
-                       free(ne->ne_name);
+               free(ne->ne_name);
                free(ne);
        }
 
-       if (dirp)
-               (void) closedir(dirp);
-
        return (ret);
 }
 
@@ -1587,6 +2166,80 @@ zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
        return (zpool_find_import_impl(hdl, import));
 }
 
+static boolean_t
+pool_match(nvlist_t *cfg, char *tgt)
+{
+       uint64_t v, guid = strtoull(tgt, NULL, 0);
+       char *s;
+
+       if (guid != 0) {
+               if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
+                       return (v == guid);
+       } else {
+               if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
+                       return (strcmp(s, tgt) == 0);
+       }
+       return (B_FALSE);
+}
+
+int
+zpool_tryimport(libzfs_handle_t *hdl, char *target, nvlist_t **configp,
+    importargs_t *args)
+{
+       nvlist_t *pools;
+       nvlist_t *match = NULL;
+       nvlist_t *config = NULL;
+       char *name = NULL, *sepp = NULL;
+       char sep = '\0';
+       int count = 0;
+       char *targetdup = strdup(target);
+
+       *configp = NULL;
+
+       if ((sepp = strpbrk(targetdup, "/@")) != NULL) {
+               sep = *sepp;
+               *sepp = '\0';
+       }
+
+       pools = zpool_search_import(hdl, args);
+
+       if (pools != NULL) {
+               nvpair_t *elem = NULL;
+               while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
+                       VERIFY0(nvpair_value_nvlist(elem, &config));
+                       if (pool_match(config, targetdup)) {
+                               count++;
+                               if (match != NULL) {
+                                       /* multiple matches found */
+                                       continue;
+                               } else {
+                                       match = config;
+                                       name = nvpair_name(elem);
+                               }
+                       }
+               }
+       }
+
+       if (count == 0) {
+               (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+                   "no pools found"));
+               free(targetdup);
+               return (ENOENT);
+       }
+
+       if (count > 1) {
+               (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+                   "%d pools found, use pool GUID\n"), count);
+               free(targetdup);
+               return (EINVAL);
+       }
+
+       *configp = match;
+       free(targetdup);
+
+       return (0);
+}
+
 boolean_t
 find_guid(nvlist_t *nv, uint64_t guid)
 {
@@ -1776,9 +2429,9 @@ zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
                cb.cb_type = ZPOOL_CONFIG_SPARES;
                if (zpool_iter(hdl, find_aux, &cb) == 1) {
                        name = (char *)zpool_get_name(cb.cb_zhp);
-                       ret = TRUE;
+                       ret = B_TRUE;
                } else {
-                       ret = FALSE;
+                       ret = B_FALSE;
                }
                break;
 
@@ -1792,9 +2445,9 @@ zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
                cb.cb_type = ZPOOL_CONFIG_L2CACHE;
                if (zpool_iter(hdl, find_aux, &cb) == 1) {
                        name = (char *)zpool_get_name(cb.cb_zhp);
-                       ret = TRUE;
+                       ret = B_TRUE;
                } else {
-                       ret = FALSE;
+                       ret = B_FALSE;
                }
                break;