]> git.proxmox.com Git - mirror_zfs-debian.git/blobdiff - cmd/zpool/zpool_vdev.c
New upstream version 0.7.6
[mirror_zfs-debian.git] / cmd / zpool / zpool_vdev.c
index 10007c14927f6961051c68e0eed420e6e958b454..fd6bd9e7677d1554da2c7e2ca5662987876d1993 100644 (file)
  */
 
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2016 Intel Corporation.
+ * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
  */
 
 /*
@@ -51,7 +53,7 @@
  *
  *     1. Construct the vdev specification.  Performs syntax validation and
  *         makes sure each device is valid.
- *     2. Check for devices in use.  Using libdiskmgt, makes sure that no
+ *     2. Check for devices in use.  Using libblkid to make sure that no
  *         devices are also in use.  Some can be overridden using the 'force'
  *         flag, others cannot.
  *     3. Check for replication errors if the 'force' flag is not specified.
  */
 
 #include <assert.h>
+#include <ctype.h>
 #include <devid.h>
 #include <errno.h>
 #include <fcntl.h>
-#include <libdiskmgt.h>
 #include <libintl.h>
 #include <libnvpair.h>
+#include <limits.h>
+#include <sys/spa.h>
+#include <scsi/scsi.h>
+#include <scsi/sg.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
 #include <sys/stat.h>
 #include <sys/vtoc.h>
 #include <sys/mntent.h>
-
+#include <uuid/uuid.h>
+#include <blkid/blkid.h>
 #include "zpool_util.h"
-
-#define        DISK_ROOT       "/dev/dsk"
-#define        RDISK_ROOT      "/dev/rdsk"
-#define        BACKUP_SLICE    "s2"
+#include <sys/zfs_context.h>
 
 /*
  * For any given vdev specification, we can have multiple errors.  The
 boolean_t error_seen;
 boolean_t is_force;
 
-/*PRINTFLIKE1*/
-static void
-vdev_error(const char *fmt, ...)
+typedef struct vdev_disk_db_entry
 {
-       va_list ap;
-
-       if (!error_seen) {
-               (void) fprintf(stderr, gettext("invalid vdev specification\n"));
-               if (!is_force)
-                       (void) fprintf(stderr, gettext("use '-f' to override "
-                           "the following errors:\n"));
-               else
-                       (void) fprintf(stderr, gettext("the following errors "
-                           "must be manually repaired:\n"));
-               error_seen = B_TRUE;
-       }
-
-       va_start(ap, fmt);
-       (void) vfprintf(stderr, fmt, ap);
-       va_end(ap);
-}
-
-static void
-libdiskmgt_error(int error)
-{
-       /*
-        * ENXIO/ENODEV is a valid error message if the device doesn't live in
-        * /dev/dsk.  Don't bother printing an error message in this case.
-        */
-       if (error == ENXIO || error == ENODEV)
-               return;
-
-       (void) fprintf(stderr, gettext("warning: device in use checking "
-           "failed: %s\n"), strerror(error));
-}
+       char id[24];
+       int sector_size;
+} vdev_disk_db_entry_t;
 
 /*
- * Validate a device, passing the bulk of the work off to libdiskmgt.
+ * Database of block devices that lie about physical sector sizes.  The
+ * identification string must be precisely 24 characters to avoid false
+ * negatives
  */
-static int
-check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
-{
-       char *msg;
-       int error = 0;
-       dm_who_type_t who;
-
-       if (force)
-               who = DM_WHO_ZPOOL_FORCE;
-       else if (isspare)
-               who = DM_WHO_ZPOOL_SPARE;
-       else
-               who = DM_WHO_ZPOOL;
-
-       if (dm_inuse((char *)path, &msg, who, &error) || error) {
-               if (error != 0) {
-                       libdiskmgt_error(error);
-                       return (0);
-               } else {
-                       vdev_error("%s", msg);
-                       free(msg);
-                       return (-1);
-               }
-       }
+static vdev_disk_db_entry_t vdev_disk_database[] = {
+       {"ATA     ADATA SSD S396 3", 8192},
+       {"ATA     APPLE SSD SM128E", 8192},
+       {"ATA     APPLE SSD SM256E", 8192},
+       {"ATA     APPLE SSD SM512E", 8192},
+       {"ATA     APPLE SSD SM768E", 8192},
+       {"ATA     C400-MTFDDAC064M", 8192},
+       {"ATA     C400-MTFDDAC128M", 8192},
+       {"ATA     C400-MTFDDAC256M", 8192},
+       {"ATA     C400-MTFDDAC512M", 8192},
+       {"ATA     Corsair Force 3 ", 8192},
+       {"ATA     Corsair Force GS", 8192},
+       {"ATA     INTEL SSDSA2CT04", 8192},
+       {"ATA     INTEL SSDSA2BZ10", 8192},
+       {"ATA     INTEL SSDSA2BZ20", 8192},
+       {"ATA     INTEL SSDSA2BZ30", 8192},
+       {"ATA     INTEL SSDSA2CW04", 8192},
+       {"ATA     INTEL SSDSA2CW08", 8192},
+       {"ATA     INTEL SSDSA2CW12", 8192},
+       {"ATA     INTEL SSDSA2CW16", 8192},
+       {"ATA     INTEL SSDSA2CW30", 8192},
+       {"ATA     INTEL SSDSA2CW60", 8192},
+       {"ATA     INTEL SSDSC2CT06", 8192},
+       {"ATA     INTEL SSDSC2CT12", 8192},
+       {"ATA     INTEL SSDSC2CT18", 8192},
+       {"ATA     INTEL SSDSC2CT24", 8192},
+       {"ATA     INTEL SSDSC2CW06", 8192},
+       {"ATA     INTEL SSDSC2CW12", 8192},
+       {"ATA     INTEL SSDSC2CW18", 8192},
+       {"ATA     INTEL SSDSC2CW24", 8192},
+       {"ATA     INTEL SSDSC2CW48", 8192},
+       {"ATA     KINGSTON SH100S3", 8192},
+       {"ATA     KINGSTON SH103S3", 8192},
+       {"ATA     M4-CT064M4SSD2  ", 8192},
+       {"ATA     M4-CT128M4SSD2  ", 8192},
+       {"ATA     M4-CT256M4SSD2  ", 8192},
+       {"ATA     M4-CT512M4SSD2  ", 8192},
+       {"ATA     OCZ-AGILITY2    ", 8192},
+       {"ATA     OCZ-AGILITY3    ", 8192},
+       {"ATA     OCZ-VERTEX2 3.5 ", 8192},
+       {"ATA     OCZ-VERTEX3     ", 8192},
+       {"ATA     OCZ-VERTEX3 LT  ", 8192},
+       {"ATA     OCZ-VERTEX3 MI  ", 8192},
+       {"ATA     OCZ-VERTEX4     ", 8192},
+       {"ATA     SAMSUNG MZ7WD120", 8192},
+       {"ATA     SAMSUNG MZ7WD240", 8192},
+       {"ATA     SAMSUNG MZ7WD480", 8192},
+       {"ATA     SAMSUNG MZ7WD960", 8192},
+       {"ATA     SAMSUNG SSD 830 ", 8192},
+       {"ATA     Samsung SSD 840 ", 8192},
+       {"ATA     SanDisk SSD U100", 8192},
+       {"ATA     TOSHIBA THNSNH06", 8192},
+       {"ATA     TOSHIBA THNSNH12", 8192},
+       {"ATA     TOSHIBA THNSNH25", 8192},
+       {"ATA     TOSHIBA THNSNH51", 8192},
+       {"ATA     APPLE SSD TS064C", 4096},
+       {"ATA     APPLE SSD TS128C", 4096},
+       {"ATA     APPLE SSD TS256C", 4096},
+       {"ATA     APPLE SSD TS512C", 4096},
+       {"ATA     INTEL SSDSA2M040", 4096},
+       {"ATA     INTEL SSDSA2M080", 4096},
+       {"ATA     INTEL SSDSA2M160", 4096},
+       {"ATA     INTEL SSDSC2MH12", 4096},
+       {"ATA     INTEL SSDSC2MH25", 4096},
+       {"ATA     OCZ CORE_SSD    ", 4096},
+       {"ATA     OCZ-VERTEX      ", 4096},
+       {"ATA     SAMSUNG MCCOE32G", 4096},
+       {"ATA     SAMSUNG MCCOE64G", 4096},
+       {"ATA     SAMSUNG SSD PM80", 4096},
+       /* Flash drives optimized for 4KB IOs on larger pages */
+       {"ATA     INTEL SSDSC2BA10", 4096},
+       {"ATA     INTEL SSDSC2BA20", 4096},
+       {"ATA     INTEL SSDSC2BA40", 4096},
+       {"ATA     INTEL SSDSC2BA80", 4096},
+       {"ATA     INTEL SSDSC2BB08", 4096},
+       {"ATA     INTEL SSDSC2BB12", 4096},
+       {"ATA     INTEL SSDSC2BB16", 4096},
+       {"ATA     INTEL SSDSC2BB24", 4096},
+       {"ATA     INTEL SSDSC2BB30", 4096},
+       {"ATA     INTEL SSDSC2BB40", 4096},
+       {"ATA     INTEL SSDSC2BB48", 4096},
+       {"ATA     INTEL SSDSC2BB60", 4096},
+       {"ATA     INTEL SSDSC2BB80", 4096},
+       {"ATA     INTEL SSDSC2BW24", 4096},
+       {"ATA     INTEL SSDSC2BP24", 4096},
+       {"ATA     INTEL SSDSC2BP48", 4096},
+       {"NA      SmrtStorSDLKAE9W", 4096},
+       /* Imported from Open Solaris */
+       {"ATA     MARVELL SD88SA02", 4096},
+       /* Advanced format Hard drives */
+       {"ATA     Hitachi HDS5C303", 4096},
+       {"ATA     SAMSUNG HD204UI ", 4096},
+       {"ATA     ST2000DL004 HD20", 4096},
+       {"ATA     WDC WD10EARS-00M", 4096},
+       {"ATA     WDC WD10EARS-00S", 4096},
+       {"ATA     WDC WD10EARS-00Z", 4096},
+       {"ATA     WDC WD15EARS-00M", 4096},
+       {"ATA     WDC WD15EARS-00S", 4096},
+       {"ATA     WDC WD15EARS-00Z", 4096},
+       {"ATA     WDC WD20EARS-00M", 4096},
+       {"ATA     WDC WD20EARS-00S", 4096},
+       {"ATA     WDC WD20EARS-00Z", 4096},
+       {"ATA     WDC WD1600BEVT-0", 4096},
+       {"ATA     WDC WD2500BEVT-0", 4096},
+       {"ATA     WDC WD3200BEVT-0", 4096},
+       {"ATA     WDC WD5000BEVT-0", 4096},
+       /* Virtual disks: Assume zvols with default volblocksize */
+#if 0
+       {"ATA     QEMU HARDDISK   ", 8192},
+       {"IET     VIRTUAL-DISK    ", 8192},
+       {"OI      COMSTAR         ", 8192},
+       {"SUN     COMSTAR         ", 8192},
+       {"NETAPP  LUN             ", 8192},
+#endif
+};
+
+static const int vdev_disk_database_size =
+       sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]);
+
+#define        INQ_REPLY_LEN   96
+#define        INQ_CMD_LEN     6
 
-       /*
-        * If we're given a whole disk, ignore overlapping slices since we're
-        * about to label it anyway.
-        */
-       error = 0;
-       if (!wholedisk && !force &&
-           (dm_isoverlapping((char *)path, &msg, &error) || error)) {
-               if (error == 0) {
-                       /* dm_isoverlapping returned -1 */
-                       vdev_error(gettext("%s overlaps with %s\n"), path, msg);
-                       free(msg);
-                       return (-1);
-               } else if (error != ENODEV) {
-                       /* libdiskmgt's devcache only handles physical drives */
-                       libdiskmgt_error(error);
-                       return (0);
-               }
-       }
-
-       return (0);
-}
-
-
-/*
- * Validate a whole disk.  Iterate over all slices on the disk and make sure
- * that none is in use by calling check_slice().
- */
-static int
-check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
+static boolean_t
+check_sector_size_database(char *path, int *sector_size)
 {
-       dm_descriptor_t *drive, *media, *slice;
-       int err = 0;
+       unsigned char inq_buff[INQ_REPLY_LEN];
+       unsigned char sense_buffer[32];
+       unsigned char inq_cmd_blk[INQ_CMD_LEN] =
+           {INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0};
+       sg_io_hdr_t io_hdr;
+       int error;
+       int fd;
        int i;
-       int ret;
 
-       /*
-        * Get the drive associated with this disk.  This should never fail,
-        * because we already have an alias handle open for the device.
-        */
-       if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE,
-           &err)) == NULL || *drive == NULL) {
-               if (err)
-                       libdiskmgt_error(err);
-               return (0);
-       }
-
-       if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA,
-           &err)) == NULL) {
-               dm_free_descriptors(drive);
-               if (err)
-                       libdiskmgt_error(err);
-               return (0);
-       }
-
-       dm_free_descriptors(drive);
-
-       /*
-        * It is possible that the user has specified a removable media drive,
-        * and the media is not present.
-        */
-       if (*media == NULL) {
-               dm_free_descriptors(media);
-               vdev_error(gettext("'%s' has no media in drive\n"), name);
-               return (-1);
-       }
+       /* Prepare INQUIRY command */
+       memset(&io_hdr, 0, sizeof (sg_io_hdr_t));
+       io_hdr.interface_id = 'S';
+       io_hdr.cmd_len = sizeof (inq_cmd_blk);
+       io_hdr.mx_sb_len = sizeof (sense_buffer);
+       io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+       io_hdr.dxfer_len = INQ_REPLY_LEN;
+       io_hdr.dxferp = inq_buff;
+       io_hdr.cmdp = inq_cmd_blk;
+       io_hdr.sbp = sense_buffer;
+       io_hdr.timeout = 10;            /* 10 milliseconds is ample time */
+
+       if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
+               return (B_FALSE);
 
-       if ((slice = dm_get_associated_descriptors(*media, DM_SLICE,
-           &err)) == NULL) {
-               dm_free_descriptors(media);
-               if (err)
-                       libdiskmgt_error(err);
-               return (0);
-       }
+       error = ioctl(fd, SG_IO, (unsigned long) &io_hdr);
 
-       dm_free_descriptors(media);
+       (void) close(fd);
 
-       ret = 0;
+       if (error < 0)
+               return (B_FALSE);
 
-       /*
-        * Iterate over all slices and report any errors.  We don't care about
-        * overlapping slices because we are using the whole disk.
-        */
-       for (i = 0; slice[i] != NULL; i++) {
-               char *name = dm_get_name(slice[i], &err);
+       if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK)
+               return (B_FALSE);
 
-               if (check_slice(name, force, B_TRUE, isspare) != 0)
-                       ret = -1;
+       for (i = 0; i < vdev_disk_database_size; i++) {
+               if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24))
+                       continue;
 
-               dm_free_name(name);
+               *sector_size = vdev_disk_database[i].sector_size;
+               return (B_TRUE);
        }
 
-       dm_free_descriptors(slice);
-       return (ret);
+       return (B_FALSE);
 }
 
-/*
- * Validate a device.
- */
-static int
-check_device(const char *path, boolean_t force, boolean_t isspare)
+/*PRINTFLIKE1*/
+static void
+vdev_error(const char *fmt, ...)
 {
-       dm_descriptor_t desc;
-       int err;
-       char *dev;
+       va_list ap;
 
-       /*
-        * For whole disks, libdiskmgt does not include the leading dev path.
-        */
-       dev = strrchr(path, '/');
-       assert(dev != NULL);
-       dev++;
-       if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
-               err = check_disk(path, desc, force, isspare);
-               dm_free_descriptor(desc);
-               return (err);
+       if (!error_seen) {
+               (void) fprintf(stderr, gettext("invalid vdev specification\n"));
+               if (!is_force)
+                       (void) fprintf(stderr, gettext("use '-f' to override "
+                           "the following errors:\n"));
+               else
+                       (void) fprintf(stderr, gettext("the following errors "
+                           "must be manually repaired:\n"));
+               error_seen = B_TRUE;
        }
 
-       return (check_slice(path, force, B_FALSE, isspare));
+       va_start(ap, fmt);
+       (void) vfprintf(stderr, fmt, ap);
+       va_end(ap);
 }
 
 /*
@@ -283,19 +306,9 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
        char  *name;
        int fd;
        int ret = 0;
-       int err;
        pool_state_t state;
        boolean_t inuse;
 
-       if (dm_inuse_swap(file, &err)) {
-               if (err)
-                       libdiskmgt_error(err);
-               else
-                       vdev_error(gettext("%s is currently used by swap. "
-                           "Please see swap(1M).\n"), file);
-               return (-1);
-       }
-
        if ((fd = open(file, O_RDONLY)) < 0)
                return (0);
 
@@ -323,8 +336,11 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
                /*
                 * Allow hot spares to be shared between pools.
                 */
-               if (state == POOL_STATE_SPARE && isspare)
+               if (state == POOL_STATE_SPARE && isspare) {
+                       free(name);
+                       (void) close(fd);
                        return (0);
+               }
 
                if (state == POOL_STATE_ACTIVE ||
                    state == POOL_STATE_SPARE || !force) {
@@ -348,33 +364,222 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
        return (ret);
 }
 
+static int
+check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare)
+{
+       int err;
+       char *value;
+
+       /* No valid type detected device is safe to use */
+       value = blkid_get_tag_value(cache, "TYPE", path);
+       if (value == NULL)
+               return (0);
+
+       /*
+        * If libblkid detects a ZFS device, we check the device
+        * using check_file() to see if it's safe.  The one safe
+        * case is a spare device shared between multiple pools.
+        */
+       if (strcmp(value, "zfs_member") == 0) {
+               err = check_file(path, force, isspare);
+       } else {
+               if (force) {
+                       err = 0;
+               } else {
+                       err = -1;
+                       vdev_error(gettext("%s contains a filesystem of "
+                           "type '%s'\n"), path, value);
+               }
+       }
+
+       free(value);
+
+       return (err);
+}
 
 /*
- * By "whole disk" we mean an entire physical disk (something we can
- * label, toggle the write cache on, etc.) as opposed to the full
- * capacity of a pseudo-device such as lofi or did.  We act as if we
- * are labeling the disk, which should be a pretty good test of whether
- * it's a viable device or not.  Returns B_TRUE if it is and B_FALSE if
- * it isn't.
+ * Validate that a disk including all partitions are safe to use.
+ *
+ * For EFI labeled disks this can done relatively easily with the libefi
+ * library.  The partition numbers are extracted from the label and used
+ * to generate the expected /dev/ paths.  Each partition can then be
+ * checked for conflicts.
+ *
+ * For non-EFI labeled disks (MBR/EBR/etc) the same process is possible
+ * but due to the lack of a readily available libraries this scanning is
+ * not implemented.  Instead only the device path as given is checked.
+ */
+static int
+check_disk(const char *path, blkid_cache cache, int force,
+    boolean_t isspare, boolean_t iswholedisk)
+{
+       struct dk_gpt *vtoc;
+       char slice_path[MAXPATHLEN];
+       int err = 0;
+       int fd, i;
+
+       if (!iswholedisk)
+               return (check_slice(path, cache, force, isspare));
+
+       if ((fd = open(path, O_RDONLY|O_DIRECT|O_EXCL)) < 0) {
+               char *value = blkid_get_tag_value(cache, "TYPE", path);
+               (void) fprintf(stderr, gettext("%s is in use and contains "
+                   "a %s filesystem.\n"), path, value ? value : "unknown");
+               return (-1);
+       }
+
+       /*
+        * Expected to fail for non-EFI labled disks.  Just check the device
+        * as given and do not attempt to detect and scan partitions.
+        */
+       err = efi_alloc_and_read(fd, &vtoc);
+       if (err) {
+               (void) close(fd);
+               return (check_slice(path, cache, force, isspare));
+       }
+
+       /*
+        * The primary efi partition label is damaged however the secondary
+        * label at the end of the device is intact.  Rather than use this
+        * label we should play it safe and treat this as a non efi device.
+        */
+       if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
+               efi_free(vtoc);
+               (void) close(fd);
+
+               if (force) {
+                       /* Partitions will now be created using the backup */
+                       return (0);
+               } else {
+                       vdev_error(gettext("%s contains a corrupt primary "
+                           "EFI label.\n"), path);
+                       return (-1);
+               }
+       }
+
+       for (i = 0; i < vtoc->efi_nparts; i++) {
+
+               if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED ||
+                   uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
+                       continue;
+
+               if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
+                       (void) snprintf(slice_path, sizeof (slice_path),
+                           "%s%s%d", path, "-part", i+1);
+               else
+                       (void) snprintf(slice_path, sizeof (slice_path),
+                           "%s%s%d", path, isdigit(path[strlen(path)-1]) ?
+                           "p" : "", i+1);
+
+               err = check_slice(slice_path, cache, force, isspare);
+               if (err)
+                       break;
+       }
+
+       efi_free(vtoc);
+       (void) close(fd);
+
+       return (err);
+}
+
+static int
+check_device(const char *path, boolean_t force,
+    boolean_t isspare, boolean_t iswholedisk)
+{
+       blkid_cache cache;
+       int error;
+
+       error = blkid_get_cache(&cache, NULL);
+       if (error != 0) {
+               (void) fprintf(stderr, gettext("unable to access the blkid "
+                   "cache.\n"));
+               return (-1);
+       }
+
+       error = check_disk(path, cache, force, isspare, iswholedisk);
+       blkid_put_cache(cache);
+
+       return (error);
+}
+
+/*
+ * This may be a shorthand device path or it could be total gibberish.
+ * Check to see if it is a known device available in zfs_vdev_paths.
+ * As part of this check, see if we've been given an entire disk
+ * (minus the slice number).
+ */
+static int
+is_shorthand_path(const char *arg, char *path, size_t path_size,
+    struct stat64 *statbuf, boolean_t *wholedisk)
+{
+       int error;
+
+       error = zfs_resolve_shortname(arg, path, path_size);
+       if (error == 0) {
+               *wholedisk = zfs_dev_is_whole_disk(path);
+               if (*wholedisk || (stat64(path, statbuf) == 0))
+                       return (0);
+       }
+
+       strlcpy(path, arg, path_size);
+       memset(statbuf, 0, sizeof (*statbuf));
+       *wholedisk = B_FALSE;
+
+       return (error);
+}
+
+/*
+ * Determine if the given path is a hot spare within the given configuration.
+ * If no configuration is given we rely solely on the label.
  */
 static boolean_t
-is_whole_disk(const char *arg)
+is_spare(nvlist_t *config, const char *path)
 {
-       struct dk_gpt *label;
-       int     fd;
-       char    path[MAXPATHLEN];
+       int fd;
+       pool_state_t state;
+       char *name = NULL;
+       nvlist_t *label;
+       uint64_t guid, spareguid;
+       nvlist_t *nvroot;
+       nvlist_t **spares;
+       uint_t i, nspares;
+       boolean_t inuse;
 
-       (void) snprintf(path, sizeof (path), "%s%s%s",
-           RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
-       if ((fd = open(path, O_RDWR | O_NDELAY)) < 0)
+       if ((fd = open(path, O_RDONLY)) < 0)
                return (B_FALSE);
-       if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
+
+       if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
+           !inuse ||
+           state != POOL_STATE_SPARE ||
+           zpool_read_label(fd, &label, NULL) != 0) {
+               free(name);
                (void) close(fd);
                return (B_FALSE);
        }
-       efi_free(label);
+       free(name);
        (void) close(fd);
-       return (B_TRUE);
+
+       if (config == NULL) {
+               nvlist_free(label);
+               return (B_TRUE);
+       }
+
+       verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
+       nvlist_free(label);
+
+       verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+           &nvroot) == 0);
+       if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+           &spares, &nspares) == 0) {
+               for (i = 0; i < nspares; i++) {
+                       verify(nvlist_lookup_uint64(spares[i],
+                           ZPOOL_CONFIG_GUID, &spareguid) == 0);
+                       if (spareguid == guid)
+                               return (B_TRUE);
+               }
+       }
+
+       return (B_FALSE);
 }
 
 /*
@@ -382,18 +587,20 @@ is_whole_disk(const char *arg)
  * device, fill in the device id to make a complete nvlist.  Valid forms for a
  * leaf vdev are:
  *
- *     /dev/dsk/xxx    Complete disk path
- *     /xxx            Full path to file
- *     xxx             Shorthand for /dev/dsk/xxx
+ *     /dev/xxx        Complete disk path
+ *     /xxx            Full path to file
+ *     xxx             Shorthand for <zfs_vdev_paths>/xxx
  */
 static nvlist_t *
-make_leaf_vdev(const char *arg, uint64_t is_log)
+make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
 {
        char path[MAXPATHLEN];
        struct stat64 statbuf;
        nvlist_t *vdev = NULL;
        char *type = NULL;
        boolean_t wholedisk = B_FALSE;
+       uint64_t ashift = 0;
+       int err;
 
        /*
         * Determine what type of vdev this is, and put the full path into
@@ -403,28 +610,32 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
        if (arg[0] == '/') {
                /*
                 * Complete device or file path.  Exact type is determined by
-                * examining the file descriptor afterwards.
+                * examining the file descriptor afterwards.  Symbolic links
+                * are resolved to their real paths to determine whole disk
+                * and S_ISBLK/S_ISREG type checks.  However, we are careful
+                * to store the given path as ZPOOL_CONFIG_PATH to ensure we
+                * can leverage udev's persistent device labels.
                 */
-               wholedisk = is_whole_disk(arg);
-               if (!wholedisk && (stat64(arg, &statbuf) != 0)) {
+               if (realpath(arg, path) == NULL) {
+                       (void) fprintf(stderr,
+                           gettext("cannot resolve path '%s'\n"), arg);
+                       return (NULL);
+               }
+
+               wholedisk = zfs_dev_is_whole_disk(path);
+               if (!wholedisk && (stat64(path, &statbuf) != 0)) {
                        (void) fprintf(stderr,
                            gettext("cannot open '%s': %s\n"),
-                           arg, strerror(errno));
+                           path, strerror(errno));
                        return (NULL);
                }
 
-               (void) strlcpy(path, arg, sizeof (path));
+               /* After whole disk check restore original passed path */
+               strlcpy(path, arg, sizeof (path));
        } else {
-               /*
-                * This may be a short path for a device, or it could be total
-                * gibberish.  Check to see if it's a known device in
-                * /dev/dsk/.  As part of this check, see if we've been given a
-                * an entire disk (minus the slice number).
-                */
-               (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT,
-                   arg);
-               wholedisk = is_whole_disk(path);
-               if (!wholedisk && (stat64(path, &statbuf) != 0)) {
+               err = is_shorthand_path(arg, path, sizeof (path),
+                   &statbuf, &wholedisk);
+               if (err != 0) {
                        /*
                         * If we got ENOENT, then the user gave us
                         * gibberish, so try to direct them with a
@@ -432,7 +643,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
                         * regurgitate strerror() since it's the best we
                         * can do.
                         */
-                       if (errno == ENOENT) {
+                       if (err == ENOENT) {
                                (void) fprintf(stderr,
                                    gettext("cannot open '%s': no such "
                                    "device in %s\n"), arg, DISK_ROOT);
@@ -476,40 +687,44 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
                    (uint64_t)wholedisk) == 0);
 
        /*
-        * For a whole disk, defer getting its devid until after labeling it.
+        * Override defaults if custom properties are provided.
         */
-       if (S_ISBLK(statbuf.st_mode) && !wholedisk) {
-               /*
-                * Get the devid for the device.
-                */
-               int fd;
-               ddi_devid_t devid;
-               char *minor = NULL, *devid_str = NULL;
-
-               if ((fd = open(path, O_RDONLY)) < 0) {
-                       (void) fprintf(stderr, gettext("cannot open '%s': "
-                           "%s\n"), path, strerror(errno));
-                       nvlist_free(vdev);
-                       return (NULL);
-               }
+       if (props != NULL) {
+               char *value = NULL;
 
-               if (devid_get(fd, &devid) == 0) {
-                       if (devid_get_minor_name(fd, &minor) == 0 &&
-                           (devid_str = devid_str_encode(devid, minor)) !=
-                           NULL) {
-                               verify(nvlist_add_string(vdev,
-                                   ZPOOL_CONFIG_DEVID, devid_str) == 0);
+               if (nvlist_lookup_string(props,
+                   zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) {
+                       if (zfs_nicestrtonum(NULL, value, &ashift) != 0) {
+                               (void) fprintf(stderr,
+                                   gettext("ashift must be a number.\n"));
+                               return (NULL);
+                       }
+                       if (ashift != 0 &&
+                           (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) {
+                               (void) fprintf(stderr,
+                                   gettext("invalid 'ashift=%" PRIu64 "' "
+                                   "property: only values between %" PRId32 " "
+                                   "and %" PRId32 " are allowed.\n"),
+                                   ashift, ASHIFT_MIN, ASHIFT_MAX);
+                               return (NULL);
                        }
-                       if (devid_str != NULL)
-                               devid_str_free(devid_str);
-                       if (minor != NULL)
-                               devid_str_free(minor);
-                       devid_free(devid);
                }
+       }
 
-               (void) close(fd);
+       /*
+        * If the device is known to incorrectly report its physical sector
+        * size explicitly provide the known correct value.
+        */
+       if (ashift == 0) {
+               int sector_size;
+
+               if (check_sector_size_database(path, &sector_size) == B_TRUE)
+                       ashift = highbit64(sector_size) - 1;
        }
 
+       if (ashift > 0)
+               (void) nvlist_add_uint64(vdev, ZPOOL_CONFIG_ASHIFT, ashift);
+
        return (vdev);
 }
 
@@ -534,6 +749,19 @@ typedef struct replication_level {
 
 #define        ZPOOL_FUZZ      (16 * 1024 * 1024)
 
+static boolean_t
+is_raidz_mirror(replication_level_t *a, replication_level_t *b,
+    replication_level_t **raidz, replication_level_t **mirror)
+{
+       if (strcmp(a->zprl_type, "raidz") == 0 &&
+           strcmp(b->zprl_type, "mirror") == 0) {
+               *raidz = a;
+               *mirror = b;
+               return (B_TRUE);
+       }
+       return (B_FALSE);
+}
+
 /*
  * Given a list of toplevel vdevs, return the current replication level.  If
  * the config is inconsistent, then NULL is returned.  If 'fatal' is set, then
@@ -548,7 +776,10 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
        uint_t c, children;
        nvlist_t *nv;
        char *type;
-       replication_level_t lastrep, rep, *ret;
+       replication_level_t lastrep = {0};
+       replication_level_t rep;
+       replication_level_t *ret;
+       replication_level_t *raidz, *mirror;
        boolean_t dontreport;
 
        ret = safe_malloc(sizeof (replication_level_t));
@@ -556,7 +787,6 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
        verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
            &top, &toplevels) == 0);
 
-       lastrep.zprl_type = NULL;
        for (t = 0; t < toplevels; t++) {
                uint64_t is_log = B_FALSE;
 
@@ -570,8 +800,11 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
                if (is_log)
                        continue;
 
-               verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE,
-                   &type) == 0);
+               /* Ignore holes introduced by removing aux devices */
+               verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
+               if (strcmp(type, VDEV_TYPE_HOLE) == 0)
+                       continue;
+
                if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
                    &child, &children) != 0) {
                        /*
@@ -627,9 +860,11 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
 
                                /*
                                 * If this is a replacing or spare vdev, then
-                                * get the real first child of the vdev.
+                                * get the real first child of the vdev: do this
+                                * in a loop because replacing and spare vdevs
+                                * can be nested.
                                 */
-                               if (strcmp(childtype,
+                               while (strcmp(childtype,
                                    VDEV_TYPE_REPLACING) == 0 ||
                                    strcmp(childtype, VDEV_TYPE_SPARE) == 0) {
                                        nvlist_t **rchild;
@@ -685,7 +920,7 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
                                 * this device altogether.
                                 */
                                if ((fd = open(path, O_RDONLY)) >= 0) {
-                                       err = fstat64(fd, &statbuf);
+                                       err = fstat64_blk(fd, &statbuf);
                                        (void) close(fd);
                                } else {
                                        err = stat64(path, &statbuf);
@@ -732,7 +967,35 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
                 * different.
                 */
                if (lastrep.zprl_type != NULL) {
-                       if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
+                       if (is_raidz_mirror(&lastrep, &rep, &raidz, &mirror) ||
+                           is_raidz_mirror(&rep, &lastrep, &raidz, &mirror)) {
+                               /*
+                                * Accepted raidz and mirror when they can
+                                * handle the same number of disk failures.
+                                */
+                               if (raidz->zprl_parity !=
+                                   mirror->zprl_children - 1) {
+                                       if (ret != NULL)
+                                               free(ret);
+                                       ret = NULL;
+                                       if (fatal)
+                                               vdev_error(gettext(
+                                                   "mismatched replication "
+                                                   "level: "
+                                                   "%s and %s vdevs with "
+                                                   "different redundancy, "
+                                                   "%llu vs. %llu (%llu-way) "
+                                                   "are present\n"),
+                                                   raidz->zprl_type,
+                                                   mirror->zprl_type,
+                                                   raidz->zprl_parity,
+                                                   mirror->zprl_children - 1,
+                                                   mirror->zprl_children);
+                                       else
+                                               return (NULL);
+                               }
+                       } else if (strcmp(lastrep.zprl_type, rep.zprl_type) !=
+                           0) {
                                if (ret != NULL)
                                        free(ret);
                                ret = NULL;
@@ -795,6 +1058,7 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
        nvlist_t **child;
        uint_t  children;
        replication_level_t *current = NULL, *new;
+       replication_level_t *raidz, *mirror;
        int ret;
 
        /*
@@ -842,7 +1106,21 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
         */
        ret = 0;
        if (current != NULL) {
-               if (strcmp(current->zprl_type, new->zprl_type) != 0) {
+               if (is_raidz_mirror(current, new, &raidz, &mirror) ||
+                   is_raidz_mirror(new, current, &raidz, &mirror)) {
+                       if (raidz->zprl_parity != mirror->zprl_children - 1) {
+                               vdev_error(gettext(
+                                   "mismatched replication level: pool and "
+                                   "new vdev with different redundancy, %s "
+                                   "and %s vdevs, %llu vs. %llu (%llu-way)\n"),
+                                   raidz->zprl_type,
+                                   mirror->zprl_type,
+                                   raidz->zprl_parity,
+                                   mirror->zprl_children - 1,
+                                   mirror->zprl_children);
+                               ret = -1;
+                       }
+               } else if (strcmp(current->zprl_type, new->zprl_type) != 0) {
                        vdev_error(gettext(
                            "mismatched replication level: pool uses %s "
                            "and new vdev is %s\n"),
@@ -871,6 +1149,39 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
        return (ret);
 }
 
+static int
+zero_label(char *path)
+{
+       const int size = 4096;
+       char buf[size];
+       int err, fd;
+
+       if ((fd = open(path, O_WRONLY|O_EXCL)) < 0) {
+               (void) fprintf(stderr, gettext("cannot open '%s': %s\n"),
+                   path, strerror(errno));
+               return (-1);
+       }
+
+       memset(buf, 0, size);
+       err = write(fd, buf, size);
+       (void) fdatasync(fd);
+       (void) close(fd);
+
+       if (err == -1) {
+               (void) fprintf(stderr, gettext("cannot zero first %d bytes "
+                   "of '%s': %s\n"), size, path, strerror(errno));
+               return (-1);
+       }
+
+       if (err != size) {
+               (void) fprintf(stderr, gettext("could only zero %d/%d bytes "
+                   "of '%s'\n"), err, size, path);
+               return (-1);
+       }
+
+       return (0);
+}
+
 /*
  * Go through and find any whole disks in the vdev specification, labelling them
  * as appropriate.  When constructing the vdev spec, we were unable to open this
@@ -886,13 +1197,14 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
 {
        nvlist_t **child;
        uint_t c, children;
-       char *type, *path, *diskname;
-       char buf[MAXPATHLEN];
+       char *type, *path;
+       char devpath[MAXPATHLEN];
+       char udevpath[MAXPATHLEN];
        uint64_t wholedisk;
+       struct stat64 statbuf;
+       int is_exclusive = 0;
        int fd;
        int ret;
-       ddi_devid_t devid;
-       char *minor = NULL, *devid_str = NULL;
 
        verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
 
@@ -903,55 +1215,110 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
                        return (0);
 
                /*
-                * We have a disk device.  Get the path to the device
-                * and see if it's a whole disk by appending the backup
-                * slice and stat()ing the device.
+                * We have a disk device.  If this is a whole disk write
+                * out the efi partition table, otherwise write zero's to
+                * the first 4k of the partition.  This is to ensure that
+                * libblkid will not misidentify the partition due to a
+                * magic value left by the previous filesystem.
                 */
-               verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
-               if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
-                   &wholedisk) != 0 || !wholedisk)
+               verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path));
+               verify(!nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+                   &wholedisk));
+
+               if (!wholedisk) {
+                       /*
+                        * Update device id string for mpath nodes (Linux only)
+                        */
+                       if (is_mpath_whole_disk(path))
+                               update_vdev_config_dev_strs(nv);
+
+                       if (!is_spare(NULL, path))
+                               (void) zero_label(path);
                        return (0);
+               }
 
-               diskname = strrchr(path, '/');
-               assert(diskname != NULL);
-               diskname++;
-               if (zpool_label_disk(g_zfs, zhp, diskname) == -1)
-                       return (-1);
+               if (realpath(path, devpath) == NULL) {
+                       ret = errno;
+                       (void) fprintf(stderr,
+                           gettext("cannot resolve path '%s'\n"), path);
+                       return (ret);
+               }
 
                /*
-                * Fill in the devid, now that we've labeled the disk.
+                * Remove any previously existing symlink from a udev path to
+                * the device before labeling the disk.  This ensures that
+                * only newly created links are used.  Otherwise there is a
+                * window between when udev deletes and recreates the link
+                * during which access attempts will fail with ENOENT.
                 */
-               (void) snprintf(buf, sizeof (buf), "%ss0", path);
-               if ((fd = open(buf, O_RDONLY)) < 0) {
-                       (void) fprintf(stderr,
-                           gettext("cannot open '%s': %s\n"),
-                           buf, strerror(errno));
-                       return (-1);
+               strlcpy(udevpath, path, MAXPATHLEN);
+               (void) zfs_append_partition(udevpath, MAXPATHLEN);
+
+               fd = open(devpath, O_RDWR|O_EXCL);
+               if (fd == -1) {
+                       if (errno == EBUSY)
+                               is_exclusive = 1;
+               } else {
+                       (void) close(fd);
                }
 
-               if (devid_get(fd, &devid) == 0) {
-                       if (devid_get_minor_name(fd, &minor) == 0 &&
-                           (devid_str = devid_str_encode(devid, minor)) !=
-                           NULL) {
-                               verify(nvlist_add_string(nv,
-                                   ZPOOL_CONFIG_DEVID, devid_str) == 0);
+               /*
+                * If the partition exists, contains a valid spare label,
+                * and is opened exclusively there is no need to partition
+                * it.  Hot spares have already been partitioned and are
+                * held open exclusively by the kernel as a safety measure.
+                *
+                * If the provided path is for a /dev/disk/ device its
+                * symbolic link will be removed, partition table created,
+                * and then block until udev creates the new link.
+                */
+               if (!is_exclusive || !is_spare(NULL, udevpath)) {
+                       char *devnode = strrchr(devpath, '/') + 1;
+
+                       ret = strncmp(udevpath, UDISK_ROOT, strlen(UDISK_ROOT));
+                       if (ret == 0) {
+                               ret = lstat64(udevpath, &statbuf);
+                               if (ret == 0 && S_ISLNK(statbuf.st_mode))
+                                       (void) unlink(udevpath);
                        }
-                       if (devid_str != NULL)
-                               devid_str_free(devid_str);
-                       if (minor != NULL)
-                               devid_str_free(minor);
-                       devid_free(devid);
+
+                       /*
+                        * When labeling a pool the raw device node name
+                        * is provided as it appears under /dev/.
+                        */
+                       if (zpool_label_disk(g_zfs, zhp, devnode) == -1)
+                               return (-1);
+
+                       /*
+                        * Wait for udev to signal the device is available
+                        * by the provided path.
+                        */
+                       ret = zpool_label_disk_wait(udevpath, DISK_LABEL_WAIT);
+                       if (ret) {
+                               (void) fprintf(stderr,
+                                   gettext("missing link: %s was "
+                                   "partitioned but %s is missing\n"),
+                                   devnode, udevpath);
+                               return (ret);
+                       }
+
+                       ret = zero_label(udevpath);
+                       if (ret)
+                               return (ret);
                }
 
                /*
-                * Update the path to refer to the 's0' slice.  The presence of
+                * Update the path to refer to the partition.  The presence of
                 * the 'whole_disk' field indicates to the CLI that we should
-                * chop off the slice number when displaying the device in
+                * chop off the partition number when displaying the device in
                 * future output.
                 */
-               verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
+               verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, udevpath) == 0);
 
-               (void) close(fd);
+               /*
+                * Update device id strings for whole disks (Linux only)
+                */
+               update_vdev_config_dev_strs(nv);
 
                return (0);
        }
@@ -975,138 +1342,110 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
        return (0);
 }
 
-/*
- * Determine if the given path is a hot spare within the given configuration.
- */
-static boolean_t
-is_spare(nvlist_t *config, const char *path)
-{
-       int fd;
-       pool_state_t state;
-       char *name = NULL;
-       nvlist_t *label;
-       uint64_t guid, spareguid;
-       nvlist_t *nvroot;
-       nvlist_t **spares;
-       uint_t i, nspares;
-       boolean_t inuse;
-
-       if ((fd = open(path, O_RDONLY)) < 0)
-               return (B_FALSE);
-
-       if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
-           !inuse ||
-           state != POOL_STATE_SPARE ||
-           zpool_read_label(fd, &label) != 0) {
-               free(name);
-               (void) close(fd);
-               return (B_FALSE);
-       }
-       free(name);
-
-       (void) close(fd);
-       verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
-       nvlist_free(label);
-
-       verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
-           &nvroot) == 0);
-       if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
-           &spares, &nspares) == 0) {
-               for (i = 0; i < nspares; i++) {
-                       verify(nvlist_lookup_uint64(spares[i],
-                           ZPOOL_CONFIG_GUID, &spareguid) == 0);
-                       if (spareguid == guid)
-                               return (B_TRUE);
-               }
-       }
-
-       return (B_FALSE);
-}
-
 /*
  * Go through and find any devices that are in use.  We rely on libdiskmgt for
  * the majority of this task.
  */
-static int
-check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
-    int isspare)
+static boolean_t
+is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
+    boolean_t replacing, boolean_t isspare)
 {
        nvlist_t **child;
        uint_t c, children;
        char *type, *path;
-       int ret;
+       int ret = 0;
        char buf[MAXPATHLEN];
-       uint64_t wholedisk;
+       uint64_t wholedisk = B_FALSE;
+       boolean_t anyinuse = B_FALSE;
 
        verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
 
        if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
            &child, &children) != 0) {
 
-               verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
+               verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path));
+               if (strcmp(type, VDEV_TYPE_DISK) == 0)
+                       verify(!nvlist_lookup_uint64(nv,
+                           ZPOOL_CONFIG_WHOLE_DISK, &wholedisk));
 
                /*
                 * As a generic check, we look to see if this is a replace of a
                 * hot spare within the same pool.  If so, we allow it
-                * regardless of what libdiskmgt or zpool_in_use() says.
+                * regardless of what libblkid or zpool_in_use() says.
                 */
-               if (isreplacing) {
-                       if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
-                           &wholedisk) == 0 && wholedisk)
-                               (void) snprintf(buf, sizeof (buf), "%ss0",
-                                   path);
-                       else
-                               (void) strlcpy(buf, path, sizeof (buf));
+               if (replacing) {
+                       (void) strlcpy(buf, path, sizeof (buf));
+                       if (wholedisk) {
+                               ret = zfs_append_partition(buf,  sizeof (buf));
+                               if (ret == -1)
+                                       return (-1);
+                       }
+
                        if (is_spare(config, buf))
-                               return (0);
+                               return (B_FALSE);
                }
 
                if (strcmp(type, VDEV_TYPE_DISK) == 0)
-                       ret = check_device(path, force, isspare);
+                       ret = check_device(path, force, isspare, wholedisk);
 
-               if (strcmp(type, VDEV_TYPE_FILE) == 0)
+               else if (strcmp(type, VDEV_TYPE_FILE) == 0)
                        ret = check_file(path, force, isspare);
 
-               return (ret);
+               return (ret != 0);
        }
 
        for (c = 0; c < children; c++)
-               if ((ret = check_in_use(config, child[c], force,
-                   isreplacing, B_FALSE)) != 0)
-                       return (ret);
+               if (is_device_in_use(config, child[c], force, replacing,
+                   B_FALSE))
+                       anyinuse = B_TRUE;
 
        if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
            &child, &children) == 0)
                for (c = 0; c < children; c++)
-                       if ((ret = check_in_use(config, child[c], force,
-                           isreplacing, B_TRUE)) != 0)
-                               return (ret);
+                       if (is_device_in_use(config, child[c], force, replacing,
+                           B_TRUE))
+                               anyinuse = B_TRUE;
 
        if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
            &child, &children) == 0)
                for (c = 0; c < children; c++)
-                       if ((ret = check_in_use(config, child[c], force,
-                           isreplacing, B_FALSE)) != 0)
-                               return (ret);
+                       if (is_device_in_use(config, child[c], force, replacing,
+                           B_FALSE))
+                               anyinuse = B_TRUE;
 
-       return (0);
+       return (anyinuse);
 }
 
 static const char *
-is_grouping(const char *type, int *mindev)
+is_grouping(const char *type, int *mindev, int *maxdev)
 {
-       if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) {
-               if (mindev != NULL)
-                       *mindev = 2;
-               return (VDEV_TYPE_RAIDZ);
-       }
+       if (strncmp(type, "raidz", 5) == 0) {
+               const char *p = type + 5;
+               char *end;
+               long nparity;
+
+               if (*p == '\0') {
+                       nparity = 1;
+               } else if (*p == '0') {
+                       return (NULL); /* no zero prefixes allowed */
+               } else {
+                       errno = 0;
+                       nparity = strtol(p, &end, 10);
+                       if (errno != 0 || nparity < 1 || nparity >= 255 ||
+                           *end != '\0')
+                               return (NULL);
+               }
 
-       if (strcmp(type, "raidz2") == 0) {
                if (mindev != NULL)
-                       *mindev = 3;
+                       *mindev = nparity + 1;
+               if (maxdev != NULL)
+                       *maxdev = 255;
                return (VDEV_TYPE_RAIDZ);
        }
 
+       if (maxdev != NULL)
+               *maxdev = INT_MAX;
+
        if (strcmp(type, "mirror") == 0) {
                if (mindev != NULL)
                        *mindev = 2;
@@ -1141,10 +1480,10 @@ is_grouping(const char *type, int *mindev)
  * because the program is just going to exit anyway.
  */
 nvlist_t *
-construct_spec(int argc, char **argv)
+construct_spec(nvlist_t *props, int argc, char **argv)
 {
        nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
-       int t, toplevels, mindev, nspares, nlogs, nl2cache;
+       int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
        const char *type;
        uint64_t is_log;
        boolean_t seen_logs;
@@ -1158,6 +1497,7 @@ construct_spec(int argc, char **argv)
        nl2cache = 0;
        is_log = B_FALSE;
        seen_logs = B_FALSE;
+       nvroot = NULL;
 
        while (argc > 0) {
                nv = NULL;
@@ -1166,7 +1506,7 @@ construct_spec(int argc, char **argv)
                 * If it's a mirror or raidz, the subsequent arguments are
                 * its leaves -- until we encounter the next mirror or raidz.
                 */
-               if ((type = is_grouping(argv[0], &mindev)) != NULL) {
+               if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) {
                        nvlist_t **child = NULL;
                        int c, children = 0;
 
@@ -1176,7 +1516,7 @@ construct_spec(int argc, char **argv)
                                            gettext("invalid vdev "
                                            "specification: 'spare' can be "
                                            "specified only once\n"));
-                                       return (NULL);
+                                       goto spec_out;
                                }
                                is_log = B_FALSE;
                        }
@@ -1187,7 +1527,7 @@ construct_spec(int argc, char **argv)
                                            gettext("invalid vdev "
                                            "specification: 'log' can be "
                                            "specified only once\n"));
-                                       return (NULL);
+                                       goto spec_out;
                                }
                                seen_logs = B_TRUE;
                                is_log = B_TRUE;
@@ -1206,7 +1546,7 @@ construct_spec(int argc, char **argv)
                                            gettext("invalid vdev "
                                            "specification: 'cache' can be "
                                            "specified only once\n"));
-                                       return (NULL);
+                                       goto spec_out;
                                }
                                is_log = B_FALSE;
                        }
@@ -1217,22 +1557,27 @@ construct_spec(int argc, char **argv)
                                            gettext("invalid vdev "
                                            "specification: unsupported 'log' "
                                            "device: %s\n"), type);
-                                       return (NULL);
+                                       goto spec_out;
                                }
                                nlogs++;
                        }
 
                        for (c = 1; c < argc; c++) {
-                               if (is_grouping(argv[c], NULL) != NULL)
+                               if (is_grouping(argv[c], NULL, NULL) != NULL)
                                        break;
                                children++;
                                child = realloc(child,
                                    children * sizeof (nvlist_t *));
                                if (child == NULL)
                                        zpool_no_memory();
-                               if ((nv = make_leaf_vdev(argv[c], B_FALSE))
-                                   == NULL)
-                                       return (NULL);
+                               if ((nv = make_leaf_vdev(props, argv[c],
+                                   B_FALSE)) == NULL) {
+                                       for (c = 0; c < children - 1; c++)
+                                               nvlist_free(child[c]);
+                                       free(child);
+                                       goto spec_out;
+                               }
+
                                child[children - 1] = nv;
                        }
 
@@ -1240,7 +1585,20 @@ construct_spec(int argc, char **argv)
                                (void) fprintf(stderr, gettext("invalid vdev "
                                    "specification: %s requires at least %d "
                                    "devices\n"), argv[0], mindev);
-                               return (NULL);
+                               for (c = 0; c < children; c++)
+                                       nvlist_free(child[c]);
+                               free(child);
+                               goto spec_out;
+                       }
+
+                       if (children > maxdev) {
+                               (void) fprintf(stderr, gettext("invalid vdev "
+                                   "specification: %s supports no more than "
+                                   "%d devices\n"), argv[0], maxdev);
+                               for (c = 0; c < children; c++)
+                                       nvlist_free(child[c]);
+                               free(child);
+                               goto spec_out;
                        }
 
                        argc -= c;
@@ -1279,8 +1637,10 @@ construct_spec(int argc, char **argv)
                         * We have a device.  Pass off to make_leaf_vdev() to
                         * construct the appropriate nvlist describing the vdev.
                         */
-                       if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL)
-                               return (NULL);
+                       if ((nv = make_leaf_vdev(props, argv[0],
+                           is_log)) == NULL)
+                               goto spec_out;
+
                        if (is_log)
                                nlogs++;
                        argc--;
@@ -1298,13 +1658,13 @@ construct_spec(int argc, char **argv)
                (void) fprintf(stderr, gettext("invalid vdev "
                    "specification: at least one toplevel vdev must be "
                    "specified\n"));
-               return (NULL);
+               goto spec_out;
        }
 
        if (seen_logs && nlogs == 0) {
                (void) fprintf(stderr, gettext("invalid vdev specification: "
                    "log requires at least 1 device\n"));
-               return (NULL);
+               goto spec_out;
        }
 
        /*
@@ -1322,21 +1682,66 @@ construct_spec(int argc, char **argv)
                verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
                    l2cache, nl2cache) == 0);
 
+spec_out:
        for (t = 0; t < toplevels; t++)
                nvlist_free(top[t]);
        for (t = 0; t < nspares; t++)
                nvlist_free(spares[t]);
        for (t = 0; t < nl2cache; t++)
                nvlist_free(l2cache[t]);
-       if (spares)
-               free(spares);
-       if (l2cache)
-               free(l2cache);
+
+       free(spares);
+       free(l2cache);
        free(top);
 
        return (nvroot);
 }
 
+nvlist_t *
+split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
+    splitflags_t flags, int argc, char **argv)
+{
+       nvlist_t *newroot = NULL, **child;
+       uint_t c, children;
+
+       if (argc > 0) {
+               if ((newroot = construct_spec(props, argc, argv)) == NULL) {
+                       (void) fprintf(stderr, gettext("Unable to build a "
+                           "pool from the specified devices\n"));
+                       return (NULL);
+               }
+
+               if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
+                       nvlist_free(newroot);
+                       return (NULL);
+               }
+
+               /* avoid any tricks in the spec */
+               verify(nvlist_lookup_nvlist_array(newroot,
+                   ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
+               for (c = 0; c < children; c++) {
+                       char *path;
+                       const char *type;
+                       int min, max;
+
+                       verify(nvlist_lookup_string(child[c],
+                           ZPOOL_CONFIG_PATH, &path) == 0);
+                       if ((type = is_grouping(path, &min, &max)) != NULL) {
+                               (void) fprintf(stderr, gettext("Cannot use "
+                                   "'%s' as a device for splitting\n"), type);
+                               nvlist_free(newroot);
+                               return (NULL);
+                       }
+               }
+       }
+
+       if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
+               nvlist_free(newroot);
+               return (NULL);
+       }
+
+       return (newroot);
+}
 
 /*
  * Get and validate the contents of the given vdev specification.  This ensures
@@ -1349,8 +1754,8 @@ construct_spec(int argc, char **argv)
  * added, even if they appear in use.
  */
 nvlist_t *
-make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
-    boolean_t isreplacing, boolean_t dryrun, int argc, char **argv)
+make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
+    boolean_t replacing, boolean_t dryrun, int argc, char **argv)
 {
        nvlist_t *newroot;
        nvlist_t *poolconfig = NULL;
@@ -1361,11 +1766,13 @@ make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
         * that we have a valid specification, and that all devices can be
         * opened.
         */
-       if ((newroot = construct_spec(argc, argv)) == NULL)
+       if ((newroot = construct_spec(props, argc, argv)) == NULL)
                return (NULL);
 
-       if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL))
+       if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL)) {
+               nvlist_free(newroot);
                return (NULL);
+       }
 
        /*
         * Validate each device to make sure that its not shared with another
@@ -1373,8 +1780,7 @@ make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
         * uses (such as a dedicated dump device) that even '-f' cannot
         * override.
         */
-       if (check_in_use(poolconfig, newroot, force, isreplacing,
-           B_FALSE) != 0) {
+       if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) {
                nvlist_free(newroot);
                return (NULL);
        }