Add linux user disk support

author Brian Behlendorf <behlendorf1@llnl.gov>

Thu, 26 Aug 2010 18:56:53 +0000 (11:56 -0700)

committer Brian Behlendorf <behlendorf1@llnl.gov>

Tue, 31 Aug 2010 20:42:00 +0000 (13:42 -0700)
author Brian Behlendorf <behlendorf1@llnl.gov>
Thu, 26 Aug 2010 18:56:53 +0000 (11:56 -0700)
committer Brian Behlendorf <behlendorf1@llnl.gov>
Tue, 31 Aug 2010 20:42:00 +0000 (13:42 -0700)
diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c

index 459445bb729d2eeeaf5ab6ada739f7acc43ad236..d0354809d33196927842c8385c94c0981253f34e 100644 (file)
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -55,7 +55,6 @@
  #include <sys/arc.h>
  #include <sys/ddt.h>
  #undef ZFS_MAXNAMELEN
-#undef verify
  #include <libzfs.h>
  
  #define        ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \
diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c

index 9cb69c1bf21ea95266d84634bbb8504d2e2368e5..ebcec2d6e26d7bd3b263a251fcdcaa112c7efe3d 100644 (file)
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -318,6 +318,7 @@ safe_malloc(size_t size)
         return (data);
  }
  
+#ifdef HAVE_ZPL
  static char *
  safe_strdup(char *str)
  {
@@ -328,6 +329,7 @@ safe_strdup(char *str)
  
         return (dupstr);
  }
+#endif /* HAVE_ZPL */
  
  /*
   * Callback routine that will print out information for each of
@@ -495,6 +497,7 @@ parse_depth(char *opt, int *flags)
  
  #define        PROGRESS_DELAY 2                /* seconds */
  
+#ifdef HAVE_ZPL
  static char *pt_reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
  static time_t pt_begin;
  static char *pt_header = NULL;
@@ -546,6 +549,8 @@ finish_progress(char *done)
         free(pt_header);
         pt_header = NULL;
  }
+#endif /* HAVE_ZPL */
+
  /*
   * zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
   *
@@ -626,6 +631,7 @@ zfs_do_clone(int argc, char **argv)
         ret = zfs_clone(zhp, argv[1], props);
  
         /* create the mountpoint if necessary */
+#ifdef HAVE_ZPL
         if (ret == 0) {
                 zfs_handle_t *clone;
  
@@ -637,6 +643,7 @@ zfs_do_clone(int argc, char **argv)
                         zfs_close(clone);
                 }
         }
+#endif /* HAVE_ZPL */
  
         zfs_close(zhp);
         nvlist_free(props);
@@ -824,6 +831,7 @@ zfs_do_create(int argc, char **argv)
          * verbose error message to let the user know that their filesystem was
          * in fact created, even if we failed to mount or share it.
          */
+#ifdef HAVE_ZPL
         if (canmount == ZFS_CANMOUNT_ON) {
                 if (zfs_mount(zhp, NULL, 0) != 0) {
                         (void) fprintf(stderr, gettext("filesystem "
@@ -835,6 +843,7 @@ zfs_do_create(int argc, char **argv)
                         ret = 1;
                 }
         }
+#endif /* HAVE_ZPL */
  
  error:
         if (zhp)
@@ -2940,6 +2949,7 @@ zfs_do_release(int argc, char **argv)
  #define        SPINNER_TIME 3          /* seconds */
  #define        MOUNT_TIME 5            /* seconds */
  
+#ifdef HAVE_ZPL
  static int
  get_one_dataset(zfs_handle_t *zhp, void *data)
  {
@@ -3387,6 +3397,7 @@ share_mount(int op, int argc, char **argv)
  
         return (ret);
  }
+#endif  /* HAVE_ZPL */
  
  /*
   * zfs mount -a [nfs]
@@ -3397,7 +3408,11 @@ share_mount(int op, int argc, char **argv)
  static int
  zfs_do_mount(int argc, char **argv)
  {
+#ifdef HAVE_ZPL
         return (share_mount(OP_MOUNT, argc, argv));
+#else
+       return ENOSYS;
+#endif  /* HAVE_ZPL */
  }
  
  /*
@@ -3409,9 +3424,14 @@ zfs_do_mount(int argc, char **argv)
  static int
  zfs_do_share(int argc, char **argv)
  {
+#ifdef HAVE_ZPL
         return (share_mount(OP_SHARE, argc, argv));
+#else
+       return ENOSYS;
+#endif  /* HAVE_ZPL */
  }
  
+#ifdef HAVE_ZPL
  typedef struct unshare_unmount_node {
         zfs_handle_t    *un_zhp;
         char            *un_mountp;
@@ -3795,6 +3815,7 @@ unshare_unmount(int op, int argc, char **argv)
  
         return (ret);
  }
+#endif  /* HAVE_ZPL */
  
  /*
   * zfs unmount -a
@@ -3805,7 +3826,11 @@ unshare_unmount(int op, int argc, char **argv)
  static int
  zfs_do_unmount(int argc, char **argv)
  {
+#ifdef HAVE_ZPL
         return (unshare_unmount(OP_MOUNT, argc, argv));
+#else
+       return ENOSYS;
+#endif  /* HAVE_ZPL */
  }
  
  /*
@@ -3817,7 +3842,11 @@ zfs_do_unmount(int argc, char **argv)
  static int
  zfs_do_unshare(int argc, char **argv)
  {
+#ifdef HAVE_ZPL
         return (unshare_unmount(OP_SHARE, argc, argv));
+#else
+       return ENOSYS;
+#endif  /* HAVE_ZPL */
  }
  
  /* ARGSUSED */
@@ -3833,6 +3862,7 @@ zfs_do_python(int argc, char **argv)
   * Called when invoked as /etc/fs/zfs/mount.  Do the mount if the mountpoint is
   * 'legacy'.  Otherwise, complain that use should be using 'zfs mount'.
   */
+#ifdef HAVE_ZPL
  static int
  manual_mount(int argc, char **argv)
  {
@@ -3963,6 +3993,7 @@ manual_unmount(int argc, char **argv)
  
         return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE));
  }
+#endif /* HAVE_ZPL */
  
  static int
  find_command_idx(char *command, int *idx)
@@ -4061,7 +4092,9 @@ main(int argc, char **argv)
  {
         int ret;
         int i = 0;
+#ifdef HAVE_ZPL
         char *progname;
+#endif
         char *cmdname;
  
         (void) setlocale(LC_ALL, "");
@@ -4086,6 +4119,7 @@ main(int argc, char **argv)
                 return (1);
         }
  
+#ifdef HAVE_ZPL
         /*
          * This command also doubles as the /etc/fs mount and unmount program.
          * Determine if we should take this behavior based on argv[0].
@@ -4096,6 +4130,9 @@ main(int argc, char **argv)
         } else if (strcmp(progname, "umount") == 0) {
                 ret = manual_unmount(argc, argv);
         } else {
+#else
+       {
+#endif /* HAVE_ZPL */
                 /*
                  * Make sure the user has specified some command.
                  */
diff --git a/cmd/zinject/zinject.c b/cmd/zinject/zinject.c

index 60c53ceb3fce2d52259185ad7a88ff3f3a5c903a..643d73e7fd8c44ea654d7127f4bed4b76f8359e1 100644 (file)
--- a/cmd/zinject/zinject.c
+++ b/cmd/zinject/zinject.c
@@ -954,17 +954,20 @@ main(int argc, char **argv)
         if (dataset[0] != '\0' && domount) {
                 if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL)
                         return (1);
-
+#ifdef HAVE_ZPL
                 if (zfs_unmount(zhp, NULL, 0) != 0)
                         return (1);
+#endif /* HAVE_ZPL */
         }
  
         record.zi_error = error;
  
         ret = register_handler(pool, flags, &record, quiet);
  
+#ifdef HAVE_ZPL
         if (dataset[0] != '\0' && domount)
                 ret = (zfs_mount(zhp, NULL, 0) != 0);
+#endif /* HAVE_ZPL */
  
         libzfs_fini(g_zfs);
  
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c

index 074f76e81eb1c575438bf2f84d62340e1c2e2965..b1b71acf841efd4c8fb0c05952e39e77a23207ad 100644 (file)
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -716,7 +716,9 @@ zpool_do_create(int argc, char **argv)
             (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 &&
             strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) {
                 char buf[MAXPATHLEN];
+#ifdef HAVE_ZPL
                 DIR *dirp;
+#endif
  
                 if (mountpoint && mountpoint[0] != '/') {
                         (void) fprintf(stderr, gettext("invalid mountpoint "
@@ -741,6 +743,7 @@ zpool_do_create(int argc, char **argv)
                                     mountpoint);
                 }
  
+#ifdef HAVE_ZPL
                 if ((dirp = opendir(buf)) == NULL && errno != ENOENT) {
                         (void) fprintf(stderr, gettext("mountpoint '%s' : "
                             "%s\n"), buf, strerror(errno));
@@ -763,6 +766,7 @@ zpool_do_create(int argc, char **argv)
                                 goto errout;
                         }
                 }
+#endif /* HAVE_ZPL */
         }
  
         if (dryrun) {
@@ -793,8 +797,12 @@ zpool_do_create(int argc, char **argv)
                                             zfs_prop_to_name(
                                             ZFS_PROP_MOUNTPOINT),
                                             mountpoint) == 0);
+#ifdef HAVE_ZPL
                                 if (zfs_mount(pool, NULL, 0) == 0)
                                         ret = zfs_shareall(pool);
+#else
+                               ret = 0;
+#endif /* HAVE_ZPL */
                                 zfs_close(pool);
                         }
                 } else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) {
@@ -1571,12 +1579,14 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
         if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL)
                 return (1);
  
+#if HAVE_ZPL
         if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
             !(flags & ZFS_IMPORT_ONLY) &&
             zpool_enable_datasets(zhp, mntopts, 0) != 0) {
                 zpool_close(zhp);
                 return (1);
         }
+#endif /* HAVE_ZPL */
  
         zpool_close(zhp);
         return (0);
@@ -1592,7 +1602,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
   *      -c     Read pool information from a cachefile instead of searching
   *             devices.
   *
- *       -d    Scan in a specific directory, other than /dev/dsk.  More than
+ *       -d    Scan in a specific directory, other than /dev/.  More than
   *             one directory can be specified using multiple '-d' options.
   *
   *       -D     Scan for previously destroyed pools or import all or only
@@ -1773,12 +1783,6 @@ zpool_do_import(int argc, char **argv)
             nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind_policy) != 0)
                 goto error;
  
-       if (searchdirs == NULL) {
-               searchdirs = safe_malloc(sizeof (char *));
-               searchdirs[0] = "/dev/dsk";
-               nsearch = 1;
-       }
-
         /* check argument count */
         if (do_all) {
                 if (argc != 0) {
@@ -1799,7 +1803,9 @@ zpool_do_import(int argc, char **argv)
                 if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) {
                         (void) fprintf(stderr, gettext("cannot "
                             "discover pools: permission denied\n"));
-                       free(searchdirs);
+                       if (searchdirs != NULL)
+                               free(searchdirs);
+
                         nvlist_free(policy);
                         return (1);
                 }
@@ -1867,7 +1873,8 @@ zpool_do_import(int argc, char **argv)
         }
  
         if (err == 1) {
-               free(searchdirs);
+               if (searchdirs != NULL)
+                       free(searchdirs);
                 nvlist_free(policy);
                 return (1);
         }
@@ -1968,7 +1975,8 @@ error:
         nvlist_free(props);
         nvlist_free(pools);
         nvlist_free(policy);
-       free(searchdirs);
+       if (searchdirs != NULL)
+               free(searchdirs);
  
         return (err ? 1 : 0);
  }
diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c

index 0c224903d85ada8944f4ce496998b1ae86006a29..febdda95f48bba4588af76a5ea5b3ace1dcb35be 100644 (file)
--- a/cmd/zpool/zpool_vdev.c
+++ b/cmd/zpool/zpool_vdev.c
@@ -50,7 +50,7 @@
   *
   *     1. Construct the vdev specification.  Performs syntax validation and
   *         makes sure each device is valid.
- *     2. Check for devices in use.  Using libdiskmgt, makes sure that no
+ *     2. Check for devices in use.  Using libblkid to make sure that no
   *         devices are also in use.  Some can be overridden using the 'force'
   *         flag, others cannot.
   *     3. Check for replication errors if the 'force' flag is not specified.
@@ -60,10 +60,10 @@
   */
  
  #include <assert.h>
+#include <ctype.h>
  #include <devid.h>
  #include <errno.h>
  #include <fcntl.h>
-#include <libdiskmgt.h>
  #include <libintl.h>
  #include <libnvpair.h>
  #include <limits.h>
@@ -74,13 +74,15 @@
  #include <sys/stat.h>
  #include <sys/vtoc.h>
  #include <sys/mntent.h>
+#include <uuid/uuid.h>
+#ifdef HAVE_LIBBLKID
+#include <blkid/blkid.h>
+#else
+#define blkid_cache void *
+#endif /* HAVE_LIBBLKID */
  
  #include "zpool_util.h"
  
-#define        DISK_ROOT       "/dev/dsk"
-#define        RDISK_ROOT      "/dev/rdsk"
-#define        BACKUP_SLICE    "s2"
-
  /*
   * For any given vdev specification, we can have multiple errors.  The
   * vdev_error() function keeps track of whether we have seen an error yet, and
@@ -111,168 +113,6 @@ vdev_error(const char *fmt, ...)
         va_end(ap);
  }
  
-static void
-libdiskmgt_error(int error)
-{
-       /*
-        * ENXIO/ENODEV is a valid error message if the device doesn't live in
-        * /dev/dsk.  Don't bother printing an error message in this case.
-        */
-       if (error == ENXIO || error == ENODEV)
-               return;
-
-       (void) fprintf(stderr, gettext("warning: device in use checking "
-           "failed: %s\n"), strerror(error));
-}
-
-/*
- * Validate a device, passing the bulk of the work off to libdiskmgt.
- */
-static int
-check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
-{
-       char *msg;
-       int error = 0;
-       dm_who_type_t who;
-
-       if (force)
-               who = DM_WHO_ZPOOL_FORCE;
-       else if (isspare)
-               who = DM_WHO_ZPOOL_SPARE;
-       else
-               who = DM_WHO_ZPOOL;
-
-       if (dm_inuse((char *)path, &msg, who, &error) || error) {
-               if (error != 0) {
-                       libdiskmgt_error(error);
-                       return (0);
-               } else {
-                       vdev_error("%s", msg);
-                       free(msg);
-                       return (-1);
-               }
-       }
-
-       /*
-        * If we're given a whole disk, ignore overlapping slices since we're
-        * about to label it anyway.
-        */
-       error = 0;
-       if (!wholedisk && !force &&
-           (dm_isoverlapping((char *)path, &msg, &error) || error)) {
-               if (error == 0) {
-                       /* dm_isoverlapping returned -1 */
-                       vdev_error(gettext("%s overlaps with %s\n"), path, msg);
-                       free(msg);
-                       return (-1);
-               } else if (error != ENODEV) {
-                       /* libdiskmgt's devcache only handles physical drives */
-                       libdiskmgt_error(error);
-                       return (0);
-               }
-       }
-
-       return (0);
-}
-
-
-/*
- * Validate a whole disk.  Iterate over all slices on the disk and make sure
- * that none is in use by calling check_slice().
- */
-static int
-check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
-{
-       dm_descriptor_t *drive, *media, *slice;
-       int err = 0;
-       int i;
-       int ret;
-
-       /*
-        * Get the drive associated with this disk.  This should never fail,
-        * because we already have an alias handle open for the device.
-        */
-       if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE,
-           &err)) == NULL || *drive == NULL) {
-               if (err)
-                       libdiskmgt_error(err);
-               return (0);
-       }
-
-       if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA,
-           &err)) == NULL) {
-               dm_free_descriptors(drive);
-               if (err)
-                       libdiskmgt_error(err);
-               return (0);
-       }
-
-       dm_free_descriptors(drive);
-
-       /*
-        * It is possible that the user has specified a removable media drive,
-        * and the media is not present.
-        */
-       if (*media == NULL) {
-               dm_free_descriptors(media);
-               vdev_error(gettext("'%s' has no media in drive\n"), name);
-               return (-1);
-       }
-
-       if ((slice = dm_get_associated_descriptors(*media, DM_SLICE,
-           &err)) == NULL) {
-               dm_free_descriptors(media);
-               if (err)
-                       libdiskmgt_error(err);
-               return (0);
-       }
-
-       dm_free_descriptors(media);
-
-       ret = 0;
-
-       /*
-        * Iterate over all slices and report any errors.  We don't care about
-        * overlapping slices because we are using the whole disk.
-        */
-       for (i = 0; slice[i] != NULL; i++) {
-               char *name = dm_get_name(slice[i], &err);
-
-               if (check_slice(name, force, B_TRUE, isspare) != 0)
-                       ret = -1;
-
-               dm_free_name(name);
-       }
-
-       dm_free_descriptors(slice);
-       return (ret);
-}
-
-/*
- * Validate a device.
- */
-static int
-check_device(const char *path, boolean_t force, boolean_t isspare)
-{
-       dm_descriptor_t desc;
-       int err;
-       char *dev;
-
-       /*
-        * For whole disks, libdiskmgt does not include the leading dev path.
-        */
-       dev = strrchr(path, '/');
-       assert(dev != NULL);
-       dev++;
-       if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
-               err = check_disk(path, desc, force, isspare);
-               dm_free_descriptor(desc);
-               return (err);
-       }
-
-       return (check_slice(path, force, B_FALSE, isspare));
-}
-
  /*
   * Check that a file is valid.  All we can do in this case is check that it's
   * not in use by another pool, and not in use by swap.
@@ -283,19 +123,9 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
         char  *name;
         int fd;
         int ret = 0;
-       int err;
         pool_state_t state;
         boolean_t inuse;
  
-       if (dm_inuse_swap(file, &err)) {
-               if (err)
-                       libdiskmgt_error(err);
-               else
-                       vdev_error(gettext("%s is currently used by swap. "
-                           "Please see swap(1M).\n"), file);
-               return (-1);
-       }
-
         if ((fd = open(file, O_RDONLY)) < 0)
                 return (0);
  
@@ -348,6 +178,175 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
         return (ret);
  }
  
+static void
+check_error(int err)
+{
+       (void) fprintf(stderr, gettext("warning: device in use checking "
+           "failed: %s\n"), strerror(err));
+}
+
+static int
+check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare)
+{
+       struct stat64 statbuf;
+       int err;
+#ifdef HAVE_LIBBLKID
+       char *value;
+#endif /* HAVE_LIBBLKID */
+
+       if (stat64(path, &statbuf) != 0) {
+               vdev_error(gettext("cannot stat %s: %s\n"),
+                          path, strerror(errno));
+               return (-1);
+       }
+
+#ifdef HAVE_LIBBLKID
+       /* No valid type detected device is safe to use */
+       value = blkid_get_tag_value(cache, "TYPE", path);
+       if (value == NULL)
+               return (0);
+
+       /*
+        * If libblkid detects a ZFS device, we check the device
+        * using check_file() to see if it's safe.  The one safe
+        * case is a spare device shared between multiple pools.
+        */
+       if (strcmp(value, "zfs") == 0) {
+               err = check_file(path, force, isspare);
+       } else {
+               if (force) {
+                       err = 0;
+               } else {
+                       err = -1;
+                       vdev_error(gettext("%s contains a filesystem of "
+                                  "type '%s'\n"), path, value);
+               }
+       }
+
+       free(value);
+#else
+       err = check_file(path, force, isspare);
+#endif /* HAVE_LIBBLKID */
+
+       return (err);
+}
+
+/*
+ * Validate a whole disk.  Iterate over all slices on the disk and make sure
+ * that none is in use by calling check_slice().
+ */
+static int
+check_disk(const char *path, blkid_cache cache, int force,
+          boolean_t isspare, boolean_t iswholedisk)
+{
+       struct dk_gpt *vtoc;
+       char slice_path[MAXPATHLEN];
+       int err = 0;
+       int fd, i;
+
+       /* This is not a wholedisk we only check the given partition */
+       if (!iswholedisk)
+               return check_slice(path, cache, force, isspare);
+
+       /*
+        * When the device is a whole disk try to read the efi partition
+        * label.  If this is successful we safely check the all of the
+        * partitions.  However, when it fails it may simply be because
+        * the disk is partitioned via the MBR.  Since we currently can
+        * not easily decode the MBR return a failure and prompt to the
+        * user to use force option since we cannot check the partitions.
+        */
+       if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) {
+               check_error(errno);
+               return -1;
+       }
+
+       if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
+               (void) close(fd);
+
+               if (force) {
+                       return 0;
+               } else {
+                       vdev_error(gettext("%s does not contain an EFI "
+                           "label but it may contain partition\n"
+                           "information in the MBR.\n"), path);
+                       return -1;
+               }
+       }
+
+       /*
+        * The primary efi partition label is damaged however the secondary
+        * label at the end of the device is intact.  Rather than use this
+        * label we should play it safe and treat this as a non efi device.
+        */
+       if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
+               efi_free(vtoc);
+               (void) close(fd);
+
+               if (force) {
+                       /* Partitions will no be created using the backup */
+                       return 0;
+               } else {
+                       vdev_error(gettext("%s contains a corrupt primary "
+                           "EFI label.\n"), path);
+                       return -1;
+               }
+       }
+
+       for (i = 0; i < vtoc->efi_nparts; i++) {
+
+               if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED ||
+                   uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
+                       continue;
+
+               if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
+                       (void) snprintf(slice_path, sizeof (slice_path),
+                           "%s%s%d", path, "-part", i+1);
+               else
+                       (void) snprintf(slice_path, sizeof (slice_path),
+                           "%s%s%d", path, isdigit(path[strlen(path)-1]) ?
+                           "p" : "", i+1);
+
+               err = check_slice(slice_path, cache, force, isspare);
+               if (err)
+                       break;
+       }
+
+       efi_free(vtoc);
+       (void) close(fd);
+
+        return (err);
+}
+
+static int
+check_device(const char *path, boolean_t force,
+            boolean_t isspare, boolean_t iswholedisk)
+{
+       static blkid_cache cache = NULL;
+
+#ifdef HAVE_LIBBLKID
+       /*
+        * There is no easy way to add a correct blkid_put_cache() call,
+        * memory will be reclaimed when the command exits.
+        */
+       if (cache == NULL) {
+               int err;
+
+               if ((err = blkid_get_cache(&cache, NULL)) != 0) {
+                       check_error(err);
+                       return -1;
+               }
+
+               if ((err = blkid_probe_all(cache)) != 0) {
+                       blkid_put_cache(cache);
+                       check_error(err);
+                       return -1;
+               }
+       }
+#endif /* HAVE_LIBBLKID */
+
+       return check_disk(path, cache, force, isspare, iswholedisk);
+}
  
  /*
   * By "whole disk" we mean an entire physical disk (something we can
@@ -358,15 +357,12 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
   * it isn't.
   */
  static boolean_t
-is_whole_disk(const char *arg)
+is_whole_disk(const char *path)
  {
         struct dk_gpt *label;
         int     fd;
-       char    path[MAXPATHLEN];
  
-       (void) snprintf(path, sizeof (path), "%s%s%s",
-           RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
-       if ((fd = open(path, O_RDWR | O_NDELAY)) < 0)
+       if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0)
                 return (B_FALSE);
         if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
                 (void) close(fd);
@@ -377,14 +373,52 @@ is_whole_disk(const char *arg)
         return (B_TRUE);
  }
  
+/*
+ * This may be a shorthand device path or it could be total gibberish.
+ * Check to see if it's a known device in /dev/, /dev/disk/by-id,
+ * /dev/disk/by-label, /dev/disk/by-path, /dev/disk/by-uuid, or
+ * /dev/disk/zpool/.  As part of this check, see if we've been given
+ * an entire disk (minus the slice number).
+ */
+static int
+is_shorthand_path(const char *arg, char *path,
+                  struct stat64 *statbuf, boolean_t *wholedisk)
+{
+       char dirs[5][8] = {"by-id", "by-label", "by-path", "by-uuid", "zpool"};
+       int i, err;
+
+       /* /dev/<name> */
+       (void) snprintf(path, MAXPATHLEN, "%s/%s", DISK_ROOT, arg);
+       *wholedisk = is_whole_disk(path);
+       err = stat64(path, statbuf);
+       if (*wholedisk || err == 0)
+               return (0);
+
+       /* /dev/disk/<dirs>/<name> */
+       for (i = 0; i < 5; i++) {
+               (void) snprintf(path, MAXPATHLEN, "%s/%s/%s",
+                   UDISK_ROOT, dirs[i], arg);
+               *wholedisk = is_whole_disk(path);
+               err = stat64(path, statbuf);
+               if (*wholedisk || err == 0)
+                       return (0);
+       }
+
+       strlcpy(path, arg, sizeof(path));
+       memset(statbuf, 0, sizeof(*statbuf));
+       *wholedisk = B_FALSE;
+
+       return (ENOENT);
+}
+
  /*
   * Create a leaf vdev.  Determine if this is a file or a device.  If it's a
   * device, fill in the device id to make a complete nvlist.  Valid forms for a
   * leaf vdev are:
   *
- *     /dev/dsk/xxx    Complete disk path
+ *     /dev/xxx        Complete disk path
   *     /xxx            Full path to file
- *     xxx             Shorthand for /dev/dsk/xxx
+ *     xxx             Shorthand for /dev/disk/yyy/xxx
   */
  static nvlist_t *
  make_leaf_vdev(const char *arg, uint64_t is_log)
@@ -394,6 +428,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
         nvlist_t *vdev = NULL;
         char *type = NULL;
         boolean_t wholedisk = B_FALSE;
+       int err;
  
         /*
          * Determine what type of vdev this is, and put the full path into
@@ -403,28 +438,31 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
         if (arg[0] == '/') {
                 /*
                  * Complete device or file path.  Exact type is determined by
-                * examining the file descriptor afterwards.
+                * examining the file descriptor afterwards.  Symbolic links
+                * are resolved to their real paths for the is_whole_disk()
+                * and S_ISBLK/S_ISREG type checks.  However, we are careful
+                * to store the given path as ZPOOL_CONFIG_PATH to ensure we
+                * can leverage udev's persistent device labels.
                  */
-               wholedisk = is_whole_disk(arg);
-               if (!wholedisk && (stat64(arg, &statbuf) != 0)) {
+               if (realpath(arg, path) == NULL) {
                         (void) fprintf(stderr,
-                           gettext("cannot open '%s': %s\n"),
-                           arg, strerror(errno));
+                           gettext("cannot resolve path '%s'\n"), arg);
                         return (NULL);
                 }
  
-               (void) strlcpy(path, arg, sizeof (path));
-       } else {
-               /*
-                * This may be a short path for a device, or it could be total
-                * gibberish.  Check to see if it's a known device in
-                * /dev/dsk/.  As part of this check, see if we've been given a
-                * an entire disk (minus the slice number).
-                */
-               (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT,
-                   arg);
                 wholedisk = is_whole_disk(path);
                 if (!wholedisk && (stat64(path, &statbuf) != 0)) {
+                       (void) fprintf(stderr,
+                           gettext("cannot open '%s': %s\n"),
+                           path, strerror(errno));
+                       return (NULL);
+               }
+
+               /* After is_whole_disk() check restore original passed path */
+               strlcpy(path, arg, MAXPATHLEN);
+       } else {
+               err = is_shorthand_path(arg, path, &statbuf, &wholedisk);
+               if (err != 0) {
                         /*
                          * If we got ENOENT, then the user gave us
                          * gibberish, so try to direct them with a
@@ -432,7 +470,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
                          * regurgitate strerror() since it's the best we
                          * can do.
                          */
-                       if (errno == ENOENT) {
+                       if (err == ENOENT) {
                                 (void) fprintf(stderr,
                                     gettext("cannot open '%s': no such "
                                     "device in %s\n"), arg, DISK_ROOT);
@@ -475,6 +513,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
                 verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
                     (uint64_t)wholedisk) == 0);
  
+#if defined(__sun__) || defined(__sun)
         /*
          * For a whole disk, defer getting its devid until after labeling it.
          */
@@ -486,7 +525,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
                 ddi_devid_t devid;
                 char *minor = NULL, *devid_str = NULL;
  
-               if ((fd = open(path, O_RDONLY)) < 0) {
+               if ((fd = open(path, O_RDONLY|O_EXCL)) < 0) {
                         (void) fprintf(stderr, gettext("cannot open '%s': "
                             "%s\n"), path, strerror(errno));
                         nvlist_free(vdev);
@@ -509,6 +548,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
  
                 (void) close(fd);
         }
+#endif
  
         return (vdev);
  }
@@ -871,6 +911,39 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
         return (ret);
  }
  
+static int
+zero_label(char *path)
+{
+       const int size = 4096;
+       char buf[size];
+       int err, fd;
+
+       if ((fd = open(path, O_WRONLY|O_EXCL)) < 0) {
+               (void) fprintf(stderr, gettext("cannot open '%s': %s\n"),
+                   path, strerror(errno));
+               return (-1);
+       }
+
+       memset(buf, 0, size);
+       err = write(fd, buf, size);
+       (void) fdatasync(fd);
+       (void) close(fd);
+
+       if (err == -1) {
+               (void) fprintf(stderr, gettext("cannot zero first %d bytes "
+                   "of '%s': %s\n"), size, path, strerror(errno));
+               return (-1);
+       }
+
+       if (err != size) {
+               (void) fprintf(stderr, gettext("could only zero %d/%d bytes "
+                   "of '%s'\n"), err, size, path);
+               return (-1);
+       }
+
+       return 0;
+}
+
  /*
   * Go through and find any whole disks in the vdev specification, labelling them
   * as appropriate.  When constructing the vdev spec, we were unable to open this
@@ -889,10 +962,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
         char *type, *path, *diskname;
         char buf[MAXPATHLEN];
         uint64_t wholedisk;
-       int fd;
         int ret;
-       ddi_devid_t devid;
-       char *minor = NULL, *devid_str = NULL;
  
         verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
  
@@ -903,55 +973,66 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
                         return (0);
  
                 /*
-                * We have a disk device.  Get the path to the device
-                * and see if it's a whole disk by appending the backup
-                * slice and stat()ing the device.
+                * We have a disk device.  If this is a whole disk write
+                * out the efi partition table, otherwise write zero's to
+                * the first 4k of the partition.  This is to ensure that
+                * libblkid will not misidentify the partition due to a
+                * magic value left by the previous filesystem.
                  */
-               verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
-               if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
-                   &wholedisk) != 0 || !wholedisk)
-                       return (0);
+               verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path));
+               verify(!nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+                   &wholedisk));
+
+               if (!wholedisk) {
+                       ret = zero_label(path);
+                       return (ret);
+               }
+
+               if (realpath(path, buf) == NULL) {
+                       ret = errno;
+                       (void) fprintf(stderr,
+                           gettext("cannot resolve path '%s'\n"), path);
+                       return (ret);
+               }
  
-               diskname = strrchr(path, '/');
+               diskname = strrchr(buf, '/');
                 assert(diskname != NULL);
                 diskname++;
                 if (zpool_label_disk(g_zfs, zhp, diskname) == -1)
                         return (-1);
  
                 /*
-                * Fill in the devid, now that we've labeled the disk.
+                * Now the we've labeled the disk and the partitions have
+                * been created.  We still need to wait for udev to create
+                * the symlinks to those partitions.  If we are accessing
+                * the devices via a udev disk path, /dev/disk, then wait
+                * for *-part# to be created.  Otherwise just use the normal
+                * syntax for devices in /dev.
                  */
-               (void) snprintf(buf, sizeof (buf), "%ss0", path);
-               if ((fd = open(buf, O_RDONLY)) < 0) {
+               if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
+                       (void) snprintf(buf, sizeof (buf),
+                           "%s%s%s", path, "-part", FIRST_SLICE);
+               else
+                       (void) snprintf(buf, sizeof (buf),
+                           "%s%s%s", path, isdigit(path[strlen(path)-1]) ?
+                           "p" : "", FIRST_SLICE);
+
+               if ((ret = zpool_label_disk_wait(buf, 1000)) != 0) {
                         (void) fprintf(stderr,
-                           gettext("cannot open '%s': %s\n"),
-                           buf, strerror(errno));
+                           gettext( "cannot resolve path '%s'\n"), buf);
                         return (-1);
                 }
  
-               if (devid_get(fd, &devid) == 0) {
-                       if (devid_get_minor_name(fd, &minor) == 0 &&
-                           (devid_str = devid_str_encode(devid, minor)) !=
-                           NULL) {
-                               verify(nvlist_add_string(nv,
-                                   ZPOOL_CONFIG_DEVID, devid_str) == 0);
-                       }
-                       if (devid_str != NULL)
-                               devid_str_free(devid_str);
-                       if (minor != NULL)
-                               devid_str_free(minor);
-                       devid_free(devid);
-               }
-
                 /*
-                * Update the path to refer to the 's0' slice.  The presence of
+                * Update the path to refer to FIRST_SLICE.  The presence of
                  * the 'whole_disk' field indicates to the CLI that we should
                  * chop off the slice number when displaying the device in
                  * future output.
                  */
                 verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
  
-               (void) close(fd);
+               /* Just in case this partition already existed. */
+               (void) zero_label(buf);
  
                 return (0);
         }
@@ -991,7 +1072,7 @@ is_spare(nvlist_t *config, const char *path)
         uint_t i, nspares;
         boolean_t inuse;
  
-       if ((fd = open(path, O_RDONLY)) < 0)
+       if ((fd = open(path, O_RDONLY|O_EXCL)) < 0)
                 return (B_FALSE);
  
         if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
@@ -1034,25 +1115,27 @@ check_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
         nvlist_t **child;
         uint_t c, children;
         char *type, *path;
-       int ret;
+       int ret = 0;
         char buf[MAXPATHLEN];
-       uint64_t wholedisk;
+       uint64_t wholedisk = B_FALSE;
  
         verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
  
         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
             &child, &children) != 0) {
  
-               verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
+               verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path));
+               if (strcmp(type, VDEV_TYPE_DISK) == 0)
+                       verify(!nvlist_lookup_uint64(nv,
+                              ZPOOL_CONFIG_WHOLE_DISK, &wholedisk));
  
                 /*
                  * As a generic check, we look to see if this is a replace of a
                  * hot spare within the same pool.  If so, we allow it
-                * regardless of what libdiskmgt or zpool_in_use() says.
+                * regardless of what libblkid or zpool_in_use() says.
                  */
                 if (replacing) {
-                       if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
-                           &wholedisk) == 0 && wholedisk)
+                       if (wholedisk)
                                 (void) snprintf(buf, sizeof (buf), "%ss0",
                                     path);
                         else
@@ -1063,7 +1146,7 @@ check_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
                 }
  
                 if (strcmp(type, VDEV_TYPE_DISK) == 0)
-                       ret = check_device(path, force, isspare);
+                       ret = check_device(path, force, isspare, wholedisk);
  
                 if (strcmp(type, VDEV_TYPE_FILE) == 0)
                         ret = check_file(path, force, isspare);
diff --git a/lib/libefi/include/sys/uuid.h b/lib/libefi/include/sys/uuid.h

index 9ce872e345f7ed5965bfff79c2ba0cbd6d1698b9..eab4622a6d9a4a99a6769d8e59584672aa09d8c5 100644 (file)
--- a/lib/libefi/include/sys/uuid.h
+++ b/lib/libefi/include/sys/uuid.h
@@ -74,12 +74,8 @@ struct uuid {
         uint8_t         node_addr[6];
  };
  
-#define        UUID_LEN        16
-
  #define        UUID_PRINTABLE_STRING_LENGTH 37
  
-typedef uchar_t                uuid_t[UUID_LEN];
-
  /*
   * Convert a uuid to/from little-endian format
   */
diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c

index e682b840a708bfb7467e05cf3fb538e208c68405..da71e3486c2270886a3621403cd9ddb17b4ba29b 100644 (file)
--- a/lib/libefi/rdwr_efi.c
+++ b/lib/libefi/rdwr_efi.c
@@ -29,6 +29,7 @@
  #include <strings.h>
  #include <unistd.h>
  #include <uuid/uuid.h>
+#include <zlib.h>
  #include <libintl.h>
  #include <sys/types.h>
  #include <sys/dkio.h>
@@ -38,7 +39,9 @@
  #include <sys/dktp/fdisk.h>
  #include <sys/efi_partition.h>
  #include <sys/byteorder.h>
-#include <sys/ddi.h>
+#if defined(__linux__)
+#include <linux/fs.h>
+#endif
  
  static struct uuid_to_ptag {
         struct uuid     uuid;
@@ -49,11 +52,11 @@ static struct uuid_to_ptag {
         { EFI_SWAP },
         { EFI_USR },
         { EFI_BACKUP },
-       { 0 },                  /* STAND is never used */
+       { EFI_UNUSED },         /* STAND is never used */
         { EFI_VAR },
         { EFI_HOME },
         { EFI_ALTSCTR },
-       { 0 },                  /* CACHE (cachefs) is never used */
+       { EFI_UNUSED },         /* CACHE (cachefs) is never used */
         { EFI_RESERVED },
         { EFI_SYSTEM },
         { EFI_LEGACY_MBR },
@@ -107,19 +110,142 @@ int efi_debug = 1;
  int efi_debug = 0;
  #endif
  
-extern unsigned int    efi_crc32(const unsigned char *, unsigned int);
-static int             efi_read(int, struct dk_gpt *);
+static int efi_read(int, struct dk_gpt *);
+
+/*
+ * Return a 32-bit CRC of the contents of the buffer.  Pre-and-post
+ * one's conditioning will be handled by crc32() internally.
+ */
+static uint32_t
+efi_crc32(const unsigned char *buf, unsigned int size)
+{
+       uint32_t crc = crc32(0, Z_NULL, 0);
+
+       crc = crc32(crc, buf, size);
+
+       return (crc);
+}
  
  static int
  read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
  {
-       struct dk_minfo         disk_info;
+       int sector_size;
+       unsigned long long capacity_size;
+
+        if (ioctl(fd, BLKSSZGET, &sector_size) < 0)
+                return (-1);
+
+       if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0)
+               return (-1);
+
+       *lbsize = (uint_t)sector_size;
+       *capacity = (diskaddr_t)(capacity_size / sector_size);
+
+       return (0);
+}
  
-       if ((ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info)) == -1)
-               return (errno);
-       *capacity = disk_info.dki_capacity;
-       *lbsize = disk_info.dki_lbsize;
+static int
+efi_get_info(int fd, struct dk_cinfo *dki_info)
+{
+#if defined(__linux__)
+       char *path;
+       char *dev_path;
+       int rval = 0;
+
+       memset(dki_info, 0, sizeof(*dki_info));
+
+       path = calloc(PATH_MAX, 1);
+       if (path == NULL)
+               goto error;
+
+       /*
+        * The simplest way to get the partition number under linux is
+        * to parse it out of the /dev/<disk><parition> block device name.
+        * The kernel creates this using the partition number when it
+        * populates /dev/ so it may be trusted.  The tricky bit here is
+        * that the naming convention is based on the block device type.
+        * So we need to take this in to account when parsing out the
+        * partition information.  Another issue is that the libefi API
+        * API only provides the open fd and not the file path.  To handle
+        * this realpath(3) is used to resolve the block device name from
+        * /proc/self/fd/<fd>.  Aside from the partition number we collect
+        * some additional device info.
+        */
+       (void) sprintf(path, "/proc/self/fd/%d", fd);
+       dev_path = realpath(path, NULL);
+       free(path);
+
+       if (dev_path == NULL)
+               goto error;
+
+       if ((strncmp(dev_path, "/dev/sd", 7) == 0)) {
+               strcpy(dki_info->dki_cname, "sd");
+               dki_info->dki_ctype = DKC_SCSI_CCS;
+               rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
+                             dki_info->dki_dname,
+                             &dki_info->dki_partition);
+       } else if ((strncmp(dev_path, "/dev/hd", 7) == 0)) {
+               strcpy(dki_info->dki_cname, "hd");
+               dki_info->dki_ctype = DKC_DIRECT;
+               rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
+                             dki_info->dki_dname,
+                             &dki_info->dki_partition);
+       } else if ((strncmp(dev_path, "/dev/md", 7) == 0)) {
+               strcpy(dki_info->dki_cname, "pseudo");
+               dki_info->dki_ctype = DKC_MD;
+               rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu",
+                             dki_info->dki_dname,
+                             &dki_info->dki_partition);
+       } else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) {
+               strcpy(dki_info->dki_cname, "pseudo");
+               dki_info->dki_ctype = DKC_VBD;
+               rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9-]p%hu",
+                             dki_info->dki_dname,
+                             &dki_info->dki_partition);
+       } else if ((strncmp(dev_path, "/dev/ram", 8) == 0)) {
+               strcpy(dki_info->dki_cname, "pseudo");
+               dki_info->dki_ctype = DKC_PCMCIA_MEM;
+               rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu",
+                             dki_info->dki_dname,
+                             &dki_info->dki_partition);
+       } else if ((strncmp(dev_path, "/dev/loop", 9) == 0)) {
+               strcpy(dki_info->dki_cname, "pseudo");
+               dki_info->dki_ctype = DKC_VBD;
+               rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9]p%hu",
+                             dki_info->dki_dname,
+                             &dki_info->dki_partition);
+       } else {
+               strcpy(dki_info->dki_dname, "unknown");
+               strcpy(dki_info->dki_cname, "unknown");
+               dki_info->dki_ctype = DKC_UNKNOWN;
+       }
+
+       switch (rval) {
+       case 0:
+               errno = EINVAL;
+               goto error;
+       case 1:
+               dki_info->dki_partition = 0;
+       }
+
+       free(dev_path);
+#else
+       if (ioctl(fd, DKIOCINFO, (caddr_t)dki_info) == -1)
+               goto error;
+#endif
         return (0);
+error:
+       if (efi_debug)
+               (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
+
+       switch (errno) {
+       case EIO:
+               return (VT_EIO);
+       case EINVAL:
+               return (VT_EINVAL);
+       default:
+               return (VT_ERROR);
+       }
  }
  
  /*
@@ -135,12 +261,13 @@ read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
  int
  efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
  {
-       diskaddr_t      capacity;
-       uint_t          lbsize;
+       diskaddr_t      capacity = 0;
+       uint_t          lbsize = 0;
         uint_t          nblocks;
         size_t          length;
         struct dk_gpt   *vptr;
         struct uuid     uuid;
+       struct dk_cinfo dki_info;
  
         if (read_disk_info(fd, &capacity, &lbsize) != 0) {
                 if (efi_debug)
@@ -148,6 +275,22 @@ efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
                             "couldn't read disk information\n");
                 return (-1);
         }
+#if defined(__linux__)
+       if (efi_get_info(fd, &dki_info) != 0) {
+               if (efi_debug)
+                       (void) fprintf(stderr,
+                           "couldn't read disk information\n");
+               return (-1);
+       }
+
+       if (dki_info.dki_partition != 0)
+               return (-1);
+
+       if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) ||
+           (dki_info.dki_ctype == DKC_VBD) ||
+           (dki_info.dki_ctype == DKC_UNKNOWN))
+               return (-1);
+#endif
  
         nblocks = NBLOCKS(nparts, lbsize);
         if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) {
@@ -243,14 +386,138 @@ efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc)
  {
         void *data = dk_ioc->dki_data;
         int error;
+#if defined(__linux__)
+       diskaddr_t capacity;
+       uint_t lbsize;
+
+       /*
+        * When the IO is not being performed in kernel as an ioctl we need
+        * to know the sector size so we can seek to the proper byte offset.
+        */
+       if (read_disk_info(fd, &capacity, &lbsize) == -1) {
+               if (efi_debug)
+                       fprintf(stderr,"unable to read disk info: %d",errno);
+
+               errno = EIO;
+               return -1;
+       }
+
+       switch (cmd) {
+       case DKIOCGETEFI:
+               if (lbsize == 0) {
+                       if (efi_debug)
+                               (void) fprintf(stderr, "DKIOCGETEFI assuming "
+                                              "LBA %d bytes\n", DEV_BSIZE);
+
+                       lbsize = DEV_BSIZE;
+               }
+
+               error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
+               if (error == -1) {
+                       if (efi_debug)
+                               (void) fprintf(stderr, "DKIOCGETEFI lseek "
+                                              "error: %d\n", errno);
+                       return error;
+               }
+
+               error = read(fd, data, dk_ioc->dki_length);
+               if (error == -1) {
+                       if (efi_debug)
+                               (void) fprintf(stderr, "DKIOCGETEFI read "
+                                              "error: %d\n", errno);
+                       return error;
+               }
  
+               if (error != dk_ioc->dki_length) {
+                       if (efi_debug)
+                               (void) fprintf(stderr, "DKIOCGETEFI short "
+                                              "read of %d bytes\n", error);
+                       errno = EIO;
+                       return -1;
+               }
+               error = 0;
+               break;
+
+       case DKIOCSETEFI:
+               if (lbsize == 0) {
+                       if (efi_debug)
+                               (void) fprintf(stderr, "DKIOCSETEFI unknown "
+                                              "LBA size\n");
+                       errno = EIO;
+                       return -1;
+               }
+
+               error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
+               if (error == -1) {
+                       if (efi_debug)
+                               (void) fprintf(stderr, "DKIOCSETEFI lseek "
+                                              "error: %d\n", errno);
+                       return error;
+               }
+
+               error = write(fd, data, dk_ioc->dki_length);
+               if (error == -1) {
+                       if (efi_debug)
+                               (void) fprintf(stderr, "DKIOCSETEFI write "
+                                              "error: %d\n", errno);
+                       return error;
+               }
+
+               if (error != dk_ioc->dki_length) {
+                       if (efi_debug)
+                               (void) fprintf(stderr, "DKIOCSETEFI short "
+                                              "write of %d bytes\n", error);
+                       errno = EIO;
+                       return -1;
+               }
+
+               /* Sync the new EFI table to disk */
+               error = fsync(fd);
+               if (error == -1)
+                       return error;
+
+               /* Ensure any local disk cache is also flushed */
+               if (ioctl(fd, BLKFLSBUF, 0) == -1)
+                       return error;
+
+               error = 0;
+               break;
+
+       default:
+               if (efi_debug)
+                       (void) fprintf(stderr, "unsupported ioctl()\n");
+
+               errno = EIO;
+               return -1;
+       }
+#else
         dk_ioc->dki_data_64 = (uint64_t)(uintptr_t)data;
         error = ioctl(fd, cmd, (void *)dk_ioc);
         dk_ioc->dki_data = data;
-
+#endif
         return (error);
  }
  
+#if defined(__linux__)
+static int
+efi_rescan(int fd)
+{
+       int retry = 5;
+       int error;
+
+       /* Notify the kernel a devices partition table has been updated */
+       while ((error = ioctl(fd, BLKRRPART)) != 0) {
+               if (--retry == 0) {
+                       (void) fprintf(stderr, "the kernel failed to rescan "
+                                      "the partition table: %d\n", errno);
+                       return (-1);
+               }
+       }
+
+       return (0);
+}
+#endif
+
  static int
  check_label(int fd, dk_efi_t *dk_ioc)
  {
@@ -305,6 +572,8 @@ efi_read(int fd, struct dk_gpt *vtoc)
         int                     rval = 0;
         int                     md_flag = 0;
         int                     vdc_flag = 0;
+       diskaddr_t              capacity = 0;
+       uint_t                  lbsize = 0;
         struct dk_minfo         disk_info;
         dk_efi_t                dk_ioc;
         efi_gpt_t               *efi;
@@ -316,19 +585,9 @@ efi_read(int fd, struct dk_gpt *vtoc)
         /*
          * get the partition number for this file descriptor.
          */
-       if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) {
-               if (efi_debug) {
-                       (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
-               }
-               switch (errno) {
-               case EIO:
-                       return (VT_EIO);
-               case EINVAL:
-                       return (VT_EINVAL);
-               default:
-                       return (VT_ERROR);
-               }
-       }
+       if ((rval = efi_get_info(fd, &dki_info)) != 0)
+               return rval;
+
         if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
             (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
                 md_flag++;
@@ -342,14 +601,18 @@ efi_read(int fd, struct dk_gpt *vtoc)
         }
  
         /* get the LBA size */
-       if (ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info) == -1) {
+       if (read_disk_info(fd, &capacity, &lbsize) == -1) {
                 if (efi_debug) {
                         (void) fprintf(stderr,
-                           "assuming LBA 512 bytes %d\n",
-                           errno);
+                                      "unable to read disk info: %d",
+                                      errno);
                 }
-               disk_info.dki_lbsize = DEV_BSIZE;
+               return (VT_EINVAL);
         }
+
+       disk_info.dki_lbsize = lbsize;
+       disk_info.dki_capacity = capacity;
+
         if (disk_info.dki_lbsize == 0) {
                 if (efi_debug) {
                         (void) fprintf(stderr,
@@ -374,9 +637,11 @@ efi_read(int fd, struct dk_gpt *vtoc)
                 }
         }
  
-       if ((dk_ioc.dki_data = calloc(label_len, 1)) == NULL)
+       if (posix_memalign((void **)&dk_ioc.dki_data,
+                          disk_info.dki_lbsize, label_len))
                 return (VT_ERROR);
  
+       memset(dk_ioc.dki_data, 0, label_len);
         dk_ioc.dki_length = disk_info.dki_lbsize;
         user_length = vtoc->efi_nparts;
         efi = dk_ioc.dki_data;
@@ -572,12 +837,14 @@ write_pmbr(int fd, struct dk_gpt *vtoc)
         int             len;
  
         len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize;
-       buf = calloc(len, 1);
+       if (posix_memalign((void **)&buf, len, len))
+               return (VT_ERROR);
  
         /*
          * Preserve any boot code and disk signature if the first block is
          * already an MBR.
          */
+       memset(buf, 0, len);
         dk_ioc.dki_lba = 0;
         dk_ioc.dki_length = len;
         /* LINTED -- always longlong aligned */
@@ -663,10 +930,9 @@ check_input(struct dk_gpt *vtoc)
                 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
                     (vtoc->efi_parts[i].p_size != 0)) {
                         if (efi_debug) {
-                               (void) fprintf(stderr,
-"partition %d is \"unassigned\" but has a size of %llu",
-                                   i,
-                                   vtoc->efi_parts[i].p_size);
+                               (void) fprintf(stderr, "partition %d is "
+                                   "\"unassigned\" but has a size of %llu",
+                                   i, vtoc->efi_parts[i].p_size);
                         }
                         return (VT_EINVAL);
                 }
@@ -679,9 +945,9 @@ check_input(struct dk_gpt *vtoc)
                 if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
                         if (resv_part != -1) {
                                 if (efi_debug) {
-                                       (void) fprintf(stderr,
-"found duplicate reserved partition at %d\n",
-                                           i);
+                                       (void) fprintf(stderr, "found "
+                                           "duplicate reserved partition "
+                                           "at %d\n", i);
                                 }
                                 return (VT_EINVAL);
                         }
@@ -732,8 +998,8 @@ check_input(struct dk_gpt *vtoc)
                                     (istart <= endsect)) {
                                         if (efi_debug) {
                                                 (void) fprintf(stderr,
-"Partition %d overlaps partition %d.",
-                                                   i, j);
+                                                   "Partition %d overlaps "
+                                                   "partition %d.", i, j);
                                         }
                                         return (VT_EINVAL);
                                 }
@@ -839,22 +1105,13 @@ efi_write(int fd, struct dk_gpt *vtoc)
         efi_gpe_t               *efi_parts;
         int                     i, j;
         struct dk_cinfo         dki_info;
+       int                     rval;
         int                     md_flag = 0;
         int                     nblocks;
         diskaddr_t              lba_backup_gpt_hdr;
  
-       if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) {
-               if (efi_debug)
-                       (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
-               switch (errno) {
-               case EIO:
-                       return (VT_EIO);
-               case EINVAL:
-                       return (VT_EINVAL);
-               default:
-                       return (VT_ERROR);
-               }
-       }
+       if ((rval = efi_get_info(fd, &dki_info)) != 0)
+               return rval;
  
         /* check if we are dealing wih a metadevice */
         if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
@@ -892,9 +1149,11 @@ efi_write(int fd, struct dk_gpt *vtoc)
          * for backup GPT header.
          */
         lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks;
-       if ((dk_ioc.dki_data = calloc(dk_ioc.dki_length, 1)) == NULL)
+       if (posix_memalign((void **)&dk_ioc.dki_data,
+                          vtoc->efi_lbasize, dk_ioc.dki_length))
                 return (VT_ERROR);
  
+       memset(dk_ioc.dki_data, 0, dk_ioc.dki_length);
         efi = dk_ioc.dki_data;
  
         /* stuff user's input into EFI struct */
@@ -941,6 +1200,10 @@ efi_write(int fd, struct dk_gpt *vtoc)
                         return (VT_EINVAL);
                 }
  
+               /* Zero's should be written for empty partitions */
+               if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
+                       continue;
+
                 efi_parts[i].efi_gpe_StartingLBA =
                     LE_64(vtoc->efi_parts[i].p_start);
                 efi_parts[i].efi_gpe_EndingLBA =
@@ -1032,6 +1295,13 @@ efi_write(int fd, struct dk_gpt *vtoc)
         /* write the PMBR */
         (void) write_pmbr(fd, vtoc);
         free(dk_ioc.dki_data);
+
+#if defined(__linux__)
+       rval = efi_rescan(fd);
+       if (rval)
+               return (VT_ERROR);
+#endif
+
         return (0);
  }
  
@@ -1049,6 +1319,7 @@ efi_free(struct dk_gpt *ptr)
  int
  efi_type(int fd)
  {
+#if 0
         struct vtoc vtoc;
         struct extvtoc extvtoc;
  
@@ -1062,6 +1333,9 @@ efi_type(int fd)
                 }
         }
         return (0);
+#else
+       return (ENOSYS);
+#endif
  }
  
  void
@@ -1175,7 +1449,7 @@ efi_auto_sense(int fd, struct dk_gpt **vtoc)
                 return (-1);
         }
  
-       for (i = 0; i < min((*vtoc)->efi_nparts, V_NUMPAR); i++) {
+       for (i = 0; i < MIN((*vtoc)->efi_nparts, V_NUMPAR); i++) {
                 (*vtoc)->efi_parts[i].p_tag = default_vtoc_map[i].p_tag;
                 (*vtoc)->efi_parts[i].p_flag = default_vtoc_map[i].p_flag;
                 (*vtoc)->efi_parts[i].p_start = 0;
diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h

index 197e2eefc761cfc402cf4cc14d4b554522d09d2d..dcbd283acf076813c1ca156ae54194f003e71547 100644 (file)
--- a/lib/libzfs/include/libzfs.h
+++ b/lib/libzfs/include/libzfs.h
@@ -48,6 +48,26 @@ extern "C" {
  #define        ZFS_MAXPROPLEN          MAXPATHLEN
  #define        ZPOOL_MAXPROPLEN        MAXPATHLEN
  
+/*
+ * Default device paths
+ */
+
+#if defined(__sun__) || defined(__sun)
+#define        DISK_ROOT       "/dev/dsk"
+#define        RDISK_ROOT      "/dev/rdsk"
+#define        UDISK_ROOT      RDISK_ROOT
+#define        FIRST_SLICE     "s0"
+#define        BACKUP_SLICE    "s2"
+#endif
+
+#ifdef __linux__
+#define        DISK_ROOT       "/dev"
+#define        RDISK_ROOT      DISK_ROOT
+#define        UDISK_ROOT      "/dev/disk"
+#define        FIRST_SLICE     "1"
+#define        BACKUP_SLICE    ""
+#endif
+
  /*
   * libzfs errors
   */
@@ -248,6 +268,7 @@ extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
      boolean_t *, boolean_t *);
  extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
      boolean_t *, boolean_t *, boolean_t *);
+extern int zpool_label_disk_wait(char *, int);
  extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
  
  /*
@@ -661,9 +682,6 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
  extern int zpool_read_label(int, nvlist_t **);
  extern int zpool_clear_label(int);
  
-/* is this zvol valid for use as a dump device? */
-extern int zvol_check_dump_config(char *);
-
  /*
   * Management interfaces for SMB ACL files
   */
diff --git a/lib/libzfs/include/libzfs_impl.h b/lib/libzfs/include/libzfs_impl.h

index 3d001df076b1ea7f99716ed01f337e30b0406de9..2389b7823aaa2148a70ac302be6238de663507e7 100644 (file)
--- a/lib/libzfs/include/libzfs_impl.h
+++ b/lib/libzfs/include/libzfs_impl.h
@@ -191,6 +191,8 @@ zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
  
  int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
  
+int zvol_create_link(libzfs_handle_t *, const char *);
+int zvol_remove_link(libzfs_handle_t *, const char *);
  boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *);
  
  int zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c

index 0bcfc0423b6b5e6d4c00addecbfa52f898377fd7..6f067d563631bfa7e6ef0a020789e9a19cb093d4 100644 (file)
--- a/lib/libzfs/libzfs_changelist.c
+++ b/lib/libzfs/libzfs_changelist.c
@@ -93,6 +93,7 @@ struct prop_changelist {
  int
  changelist_prefix(prop_changelist_t *clp)
  {
+#ifdef HAVE_ZPL
         prop_changenode_t *cn;
         int ret = 0;
  
@@ -141,6 +142,9 @@ changelist_prefix(prop_changelist_t *clp)
                 (void) changelist_postfix(clp);
  
         return (ret);
+#else
+       return 0;
+#endif  /* HAVE_ZPL */
  }
  
  /*
@@ -155,6 +159,7 @@ changelist_prefix(prop_changelist_t *clp)
  int
  changelist_postfix(prop_changelist_t *clp)
  {
+#ifdef HAVE_ZPL
         prop_changenode_t *cn;
         char shareopts[ZFS_MAXPROPLEN];
         int errors = 0;
@@ -255,6 +260,9 @@ changelist_postfix(prop_changelist_t *clp)
         }
  
         return (errors ? -1 : 0);
+#else
+       return 0;
+#endif  /* HAVE_ZPL */
  }
  
  /*
@@ -317,6 +325,7 @@ changelist_rename(prop_changelist_t *clp, const char *src, const char *dst)
  int
  changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto)
  {
+#ifdef HAVE_ZPL
         prop_changenode_t *cn;
         int ret = 0;
  
@@ -331,6 +340,9 @@ changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto)
         }
  
         return (ret);
+#else
+       return 0;
+#endif
  }
  
  /*
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c

index baf289b64e516d3226395b9a65c901573c791b59..d876e5d1fc425b99cf77bd7af4688edbf944c931 100644 (file)
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -57,6 +57,7 @@
  #include "libzfs_impl.h"
  #include "zfs_deleg.h"
  
+static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
  static int userquota_propname_decode(const char *propname, boolean_t zoned,
      zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp);
  
@@ -994,6 +995,7 @@ badlabel:
  
                         /*FALLTHRU*/
  
+#ifdef HAVE_ZPL
                 case ZFS_PROP_SHARESMB:
                 case ZFS_PROP_SHARENFS:
                         /*
@@ -1104,6 +1106,7 @@ badlabel:
                         }
  
                         break;
+#endif /* HAVE_ZPL */
                 case ZFS_PROP_UTF8ONLY:
                         chosen_utf = (int)intval;
                         break;
@@ -2742,6 +2745,7 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
                         goto ancestorerr;
                 }
  
+#ifdef HAVE_ZPL
                 if (zfs_mount(h, NULL, 0) != 0) {
                         opname = dgettext(TEXT_DOMAIN, "mount");
                         goto ancestorerr;
@@ -2751,6 +2755,7 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
                         opname = dgettext(TEXT_DOMAIN, "share");
                         goto ancestorerr;
                 }
+#endif /* HAVE_ZPL */
  
                 zfs_close(h);
         }
@@ -2887,6 +2892,18 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
         /* create the dataset */
         ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc);
  
+       if (ret == 0 && type == ZFS_TYPE_VOLUME) {
+               ret = zvol_create_link(hdl, path);
+               if (ret) {
+                       (void) zfs_standard_error(hdl, errno,
+                           dgettext(TEXT_DOMAIN,
+                           "Volume successfully created, but device links "
+                           "were not created"));
+                       zcmd_free_nvlists(&zc);
+                       return (-1);
+               }
+       }
+
         zcmd_free_nvlists(&zc);
  
         /* check for failure */
@@ -2949,6 +2966,9 @@ zfs_destroy(zfs_handle_t *zhp, boolean_t defer)
         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
  
         if (ZFS_IS_VOLUME(zhp)) {
+               if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
+                       return (-1);
+
                 zc.zc_objset_type = DMU_OST_ZVOL;
         } else {
                 zc.zc_objset_type = DMU_OST_ZFS;
@@ -2991,9 +3011,17 @@ zfs_check_snap_cb(zfs_handle_t *zhp, void *arg)
                 zfs_close(szhp);
         }
  
+       if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+               (void) zvol_remove_link(zhp->zfs_hdl, name);
+               /*
+                * NB: this is simply a best-effort.  We don't want to
+                * return an error, because then we wouldn't visit all
+                * the volumes.
+                */
+       }
+
         dd->closezhp = B_TRUE;
-       if (!dd->gotone)
-               rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, arg);
+       rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, arg);
         if (closezhp)
                 zfs_close(zhp);
         return (rv);
@@ -3128,11 +3156,70 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
                         return (zfs_standard_error(zhp->zfs_hdl, errno,
                             errbuf));
                 }
+       } else if (ZFS_IS_VOLUME(zhp)) {
+               ret = zvol_create_link(zhp->zfs_hdl, target);
         }
  
         return (ret);
  }
  
+typedef struct promote_data {
+       char cb_mountpoint[MAXPATHLEN];
+       const char *cb_target;
+       const char *cb_errbuf;
+       uint64_t cb_pivot_txg;
+} promote_data_t;
+
+static int
+promote_snap_cb(zfs_handle_t *zhp, void *data)
+{
+       promote_data_t *pd = data;
+       zfs_handle_t *szhp;
+       char snapname[MAXPATHLEN];
+       int rv = 0;
+
+       /* We don't care about snapshots after the pivot point */
+       if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) {
+               zfs_close(zhp);
+               return (0);
+       }
+
+       /* Remove the device link if it's a zvol. */
+       if (ZFS_IS_VOLUME(zhp))
+               (void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
+
+       /* Check for conflicting names */
+       (void) strlcpy(snapname, pd->cb_target, sizeof (snapname));
+       (void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname));
+       szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
+       if (szhp != NULL) {
+               zfs_close(szhp);
+               zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+                   "snapshot name '%s' from origin \n"
+                   "conflicts with '%s' from target"),
+                   zhp->zfs_name, snapname);
+               rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf);
+       }
+       zfs_close(zhp);
+       return (rv);
+}
+
+static int
+promote_snap_done_cb(zfs_handle_t *zhp, void *data)
+{
+       promote_data_t *pd = data;
+
+       /* We don't care about snapshots after the pivot point */
+       if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) {
+               /* Create the device link if it's a zvol. */
+               if (ZFS_IS_VOLUME(zhp))
+                       (void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
+       }
+
+       zfs_close(zhp);
+       return (0);
+}
+
  /*
   * Promotes the given clone fs to be the clone parent.
   */
@@ -3142,7 +3229,10 @@ zfs_promote(zfs_handle_t *zhp)
         libzfs_handle_t *hdl = zhp->zfs_hdl;
         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
         char parent[MAXPATHLEN];
+       char *cp;
         int ret;
+       zfs_handle_t *pzhp;
+       promote_data_t pd;
         char errbuf[1024];
  
         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@@ -3160,7 +3250,29 @@ zfs_promote(zfs_handle_t *zhp)
                     "not a cloned filesystem"));
                 return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
         }
+       cp = strchr(parent, '@');
+       *cp = '\0';
+
+       /* Walk the snapshots we will be moving */
+       pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
+       if (pzhp == NULL)
+               return (-1);
+       pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
+       zfs_close(pzhp);
+       pd.cb_target = zhp->zfs_name;
+       pd.cb_errbuf = errbuf;
+       pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET);
+       if (pzhp == NULL)
+               return (-1);
+       (void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
+           sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
+       ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd);
+       if (ret != 0) {
+               zfs_close(pzhp);
+               return (-1);
+       }
  
+       /* issue the ioctl */
         (void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin,
             sizeof (zc.zc_value));
         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
@@ -3169,9 +3281,16 @@ zfs_promote(zfs_handle_t *zhp)
         if (ret != 0) {
                 int save_errno = errno;
  
+               (void) zfs_iter_snapshots(pzhp, promote_snap_done_cb, &pd);
+               zfs_close(pzhp);
+
                 switch (save_errno) {
                 case EEXIST:
-                       /* There is a conflicting snapshot name. */
+                       /*
+                        * There is a conflicting snapshot name.  We
+                        * should have caught this above, but they could
+                        * have renamed something in the mean time.
+                        */
                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
                             "conflicting snapshot '%s' from parent '%s'"),
                             zc.zc_string, parent);
@@ -3180,7 +3299,44 @@ zfs_promote(zfs_handle_t *zhp)
                 default:
                         return (zfs_standard_error(hdl, save_errno, errbuf));
                 }
+       } else {
+               (void) zfs_iter_snapshots(zhp, promote_snap_done_cb, &pd);
         }
+
+       zfs_close(pzhp);
+       return (ret);
+}
+
+struct createdata {
+       const char *cd_snapname;
+       int cd_ifexists;
+};
+
+static int
+zfs_create_link_cb(zfs_handle_t *zhp, void *arg)
+{
+       struct createdata *cd = arg;
+       int ret;
+
+       if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+               char name[MAXPATHLEN];
+
+               (void) strlcpy(name, zhp->zfs_name, sizeof (name));
+               (void) strlcat(name, "@", sizeof (name));
+               (void) strlcat(name, cd->cd_snapname, sizeof (name));
+               (void) zvol_create_link_common(zhp->zfs_hdl, name,
+                   cd->cd_ifexists);
+               /*
+                * NB: this is simply a best-effort.  We don't want to
+                * return an error, because then we wouldn't visit all
+                * the volumes.
+                */
+       }
+
+       ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd);
+
+       zfs_close(zhp);
+
         return (ret);
  }
  
@@ -3244,12 +3400,32 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
          * if it was recursive, the one that actually failed will be in
          * zc.zc_name.
          */
-       if (ret != 0) {
+       if (ret != 0)
                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
                     "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
-               (void) zfs_standard_error(hdl, errno, errbuf);
+
+       if (ret == 0 && recursive) {
+               struct createdata cd;
+
+               cd.cd_snapname = delim + 1;
+               cd.cd_ifexists = B_FALSE;
+               (void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd);
+       }
+       if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
+               ret = zvol_create_link(zhp->zfs_hdl, path);
+               if (ret != 0) {
+                       (void) zfs_standard_error(hdl, errno,
+                           dgettext(TEXT_DOMAIN,
+                           "Volume successfully snapshotted, but device links "
+                           "were not created"));
+                       zfs_close(zhp);
+                       return (-1);
+               }
         }
  
+       if (ret != 0)
+               (void) zfs_standard_error(hdl, errno, errbuf);
+
         zfs_close(zhp);
  
         return (ret);
@@ -3351,6 +3527,8 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
          */
  
         if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+               if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
+                       return (-1);
                 if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
                         return (-1);
                 old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
@@ -3388,6 +3566,10 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
          */
         if ((zhp->zfs_type == ZFS_TYPE_VOLUME) &&
             (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) {
+               if ((err = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name))) {
+                       zfs_close(zhp);
+                       return (err);
+               }
                 if (restore_resv) {
                         new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
                         if (old_volsize != new_volsize)
@@ -3536,6 +3718,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
         }
  
         if (recursive) {
+               struct destroydata dd;
  
                 parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name);
                 if (parentname == NULL) {
@@ -3550,6 +3733,15 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
                         goto error;
                 }
  
+               dd.snapname = delim + 1;
+               dd.gotone = B_FALSE;
+               dd.closezhp = B_TRUE;
+
+               /* We remove any zvol links prior to renaming them */
+               ret = zfs_iter_filesystems(zhrp, zfs_check_snap_cb, &dd);
+               if (ret) {
+                       goto error;
+               }
         } else {
                 if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0)) == NULL)
                         return (-1);
@@ -3598,10 +3790,27 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
                  * On failure, we still want to remount any filesystems that
                  * were previously mounted, so we don't alter the system state.
                  */
-               if (!recursive)
+               if (recursive) {
+                       struct createdata cd;
+
+                       /* only create links for datasets that had existed */
+                       cd.cd_snapname = delim + 1;
+                       cd.cd_ifexists = B_TRUE;
+                       (void) zfs_iter_filesystems(zhrp, zfs_create_link_cb,
+                           &cd);
+               } else {
                         (void) changelist_postfix(cl);
+               }
         } else {
-               if (!recursive) {
+               if (recursive) {
+                       struct createdata cd;
+
+                       /* only create links for datasets that had existed */
+                       cd.cd_snapname = strchr(target, '@') + 1;
+                       cd.cd_ifexists = B_TRUE;
+                       ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb,
+                           &cd);
+               } else {
                         changelist_rename(cl, zfs_get_name(zhp), target);
                         ret = changelist_postfix(cl);
                 }
@@ -3620,19 +3829,103 @@ error:
         return (ret);
  }
  
-nvlist_t *
-zfs_get_user_props(zfs_handle_t *zhp)
+/*
+ * Given a zvol dataset, issue the ioctl to create the appropriate minor node,
+ * and wait briefly for udev to create the /dev link.
+ */
+int
+zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
  {
-       return (zhp->zfs_user_props);
+       return (zvol_create_link_common(hdl, dataset, B_FALSE));
+}
+
+static int
+zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
+{
+       zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+       char path[MAXPATHLEN];
+       int error;
+
+       (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+
+       /*
+        * Issue the appropriate ioctl.
+        */
+       if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
+               switch (errno) {
+               case EEXIST:
+                       /*
+                        * Silently ignore the case where the link already
+                        * exists.  This allows 'zfs volinit' to be run multiple
+                        * times without errors.
+                        */
+                       return (0);
+
+               case ENOENT:
+                       /*
+                        * Dataset does not exist in the kernel.  If we
+                        * don't care (see zfs_rename), then ignore the
+                        * error quietly.
+                        */
+                       if (ifexists) {
+                               return (0);
+                       }
+
+                       /* FALLTHROUGH */
+
+               default:
+                       return (zfs_standard_error_fmt(hdl, errno,
+                           dgettext(TEXT_DOMAIN, "cannot create device links "
+                           "for '%s'"), dataset));
+               }
+       }
+
+       /*
+        * Wait up to 10 seconds for udev to create the device.
+        */
+       (void) snprintf(path, sizeof (path), "%s/%s", ZVOL_DIR, dataset);
+       error = zpool_label_disk_wait(path, 10000);
+       if (error)
+               (void) printf(gettext("%s may not be immediately "
+                   "available\n"), path);
+
+       return (0);
+}
+
+/*
+ * Remove a minor node for the given zvol and the associated /dev links.
+ */
+int
+zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
+{
+       zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+
+       (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+
+       if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
+               switch (errno) {
+               case ENXIO:
+                       /*
+                        * Silently ignore the case where the link no longer
+                        * exists, so that 'zfs volfini' can be run multiple
+                        * times without errors.
+                        */
+                       return (0);
+
+               default:
+                       return (zfs_standard_error_fmt(hdl, errno,
+                           dgettext(TEXT_DOMAIN, "cannot remove device "
+                           "links for '%s'"), dataset));
+               }
+       }
+
+       return (0);
  }
  
  nvlist_t *
-zfs_get_recvd_props(zfs_handle_t *zhp)
+zfs_get_user_props(zfs_handle_t *zhp)
  {
-       if (zhp->zfs_recvd_props == NULL)
-               if (get_recvd_props_ioctl(zhp) != 0)
-                       return (NULL);
-       return (zhp->zfs_recvd_props);
+       return (zhp->zfs_user_props);
  }
  
  /*
@@ -3744,6 +4037,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received)
         return (0);
  }
  
+#ifdef HAVE_ZPL
  int
  zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path,
      char *resource, void *export, void *sharetab,
@@ -3763,6 +4057,7 @@ zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path,
         error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc);
         return (error);
  }
+#endif /* HAVE_ZPL */
  
  void
  zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props)
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c

index 386ab002f20b30a7178b6100af68204a3c754c70..ee00648920ae606c554357c3f1a96a7528b9a46b 100644 (file)
--- a/lib/libzfs/libzfs_import.c
+++ b/lib/libzfs/libzfs_import.c
@@ -52,9 +52,11 @@
  #include <sys/vtoc.h>
  #include <sys/dktp/fdisk.h>
  #include <sys/efi_partition.h>
-#include <thread_pool.h>
  
  #include <sys/vdev_impl.h>
+#ifdef HAVE_LIBBLKID
+#include <blkid/blkid.h>
+#endif
  
  #include "libzfs.h"
  #include "libzfs_impl.h"
@@ -904,211 +906,76 @@ zpool_read_label(int fd, nvlist_t **config)
         return (0);
  }
  
-typedef struct rdsk_node {
-       char *rn_name;
-       int rn_dfd;
-       libzfs_handle_t *rn_hdl;
-       nvlist_t *rn_config;
-       avl_tree_t *rn_avl;
-       avl_node_t rn_node;
-       boolean_t rn_nozpool;
-} rdsk_node_t;
-
+#ifdef HAVE_LIBBLKID
+/*
+ * Use libblkid to quickly search for zfs devices
+ */
  static int
-slice_cache_compare(const void *arg1, const void *arg2)
-{
-       const char  *nm1 = ((rdsk_node_t *)arg1)->rn_name;
-       const char  *nm2 = ((rdsk_node_t *)arg2)->rn_name;
-       char *nm1slice, *nm2slice;
-       int rv;
-
-       /*
-        * slices zero and two are the most likely to provide results,
-        * so put those first
-        */
-       nm1slice = strstr(nm1, "s0");
-       nm2slice = strstr(nm2, "s0");
-       if (nm1slice && !nm2slice) {
-               return (-1);
-       }
-       if (!nm1slice && nm2slice) {
-               return (1);
-       }
-       nm1slice = strstr(nm1, "s2");
-       nm2slice = strstr(nm2, "s2");
-       if (nm1slice && !nm2slice) {
-               return (-1);
-       }
-       if (!nm1slice && nm2slice) {
-               return (1);
-       }
-
-       rv = strcmp(nm1, nm2);
-       if (rv == 0)
-               return (0);
-       return (rv > 0 ? 1 : -1);
-}
-
-static void
-check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
-    diskaddr_t size, uint_t blksz)
-{
-       rdsk_node_t tmpnode;
-       rdsk_node_t *node;
-       char sname[MAXNAMELEN];
-
-       tmpnode.rn_name = &sname[0];
-       (void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
-           diskname, partno);
-       /*
-        * protect against division by zero for disk labels that
-        * contain a bogus sector size
-        */
-       if (blksz == 0)
-               blksz = DEV_BSIZE;
-       /* too small to contain a zpool? */
-       if ((size < (SPA_MINDEVSIZE / blksz)) &&
-           (node = avl_find(r, &tmpnode, NULL)))
-               node->rn_nozpool = B_TRUE;
-}
-
-static void
-nozpool_all_slices(avl_tree_t *r, const char *sname)
-{
-       char diskname[MAXNAMELEN];
-       char *ptr;
-       int i;
-
-       (void) strncpy(diskname, sname, MAXNAMELEN);
-       if (((ptr = strrchr(diskname, 's')) == NULL) &&
-           ((ptr = strrchr(diskname, 'p')) == NULL))
-               return;
-       ptr[0] = 's';
-       ptr[1] = '\0';
-       for (i = 0; i < NDKMAP; i++)
-               check_one_slice(r, diskname, i, 0, 1);
-       ptr[0] = 'p';
-       for (i = 0; i <= FD_NUMPART; i++)
-               check_one_slice(r, diskname, i, 0, 1);
-}
-
-static void
-check_slices(avl_tree_t *r, int fd, const char *sname)
+zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools)
  {
-       struct extvtoc vtoc;
-       struct dk_gpt *gpt;
-       char diskname[MAXNAMELEN];
-       char *ptr;
-       int i;
-
-       (void) strncpy(diskname, sname, MAXNAMELEN);
-       if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
-               return;
-       ptr[1] = '\0';
-
-       if (read_extvtoc(fd, &vtoc) >= 0) {
-               for (i = 0; i < NDKMAP; i++)
-                       check_one_slice(r, diskname, i,
-                           vtoc.v_part[i].p_size, vtoc.v_sectorsz);
-       } else if (efi_alloc_and_read(fd, &gpt) >= 0) {
-               /*
-                * on x86 we'll still have leftover links that point
-                * to slices s[9-15], so use NDKMAP instead
-                */
-               for (i = 0; i < NDKMAP; i++)
-                       check_one_slice(r, diskname, i,
-                           gpt->efi_parts[i].p_size, gpt->efi_lbasize);
-               /* nodes p[1-4] are never used with EFI labels */
-               ptr[0] = 'p';
-               for (i = 1; i <= FD_NUMPART; i++)
-                       check_one_slice(r, diskname, i, 0, 1);
-               efi_free(gpt);
-       }
-}
-
-static void
-zpool_open_func(void *arg)
-{
-       rdsk_node_t *rn = arg;
-       struct stat64 statbuf;
+       blkid_cache cache;
+       blkid_dev_iterate iter;
+       blkid_dev dev;
+       const char *devname;
         nvlist_t *config;
-       int fd;
+       int fd, err;
  
-       if (rn->rn_nozpool)
-               return;
-       if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
-               /* symlink to a device that's no longer there */
-               if (errno == ENOENT)
-                       nozpool_all_slices(rn->rn_avl, rn->rn_name);
-               return;
-       }
-       /*
-        * Ignore failed stats.  We only want regular
-        * files, character devs and block devs.
-        */
-       if (fstat64(fd, &statbuf) != 0 ||
-           (!S_ISREG(statbuf.st_mode) &&
-           !S_ISCHR(statbuf.st_mode) &&
-           !S_ISBLK(statbuf.st_mode))) {
-               (void) close(fd);
-               return;
-       }
-       /* this file is too small to hold a zpool */
-       if (S_ISREG(statbuf.st_mode) &&
-           statbuf.st_size < SPA_MINDEVSIZE) {
-               (void) close(fd);
-               return;
-       } else if (!S_ISREG(statbuf.st_mode)) {
-               /*
-                * Try to read the disk label first so we don't have to
-                * open a bunch of minor nodes that can't have a zpool.
-                */
-               check_slices(rn->rn_avl, fd, rn->rn_name);
+       err = blkid_get_cache(&cache, NULL);
+       if (err != 0) {
+               (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+                   dgettext(TEXT_DOMAIN, "blkid_get_cache() %d"), err);
+               goto err_blkid1;
         }
  
-       if ((zpool_read_label(fd, &config)) != 0) {
-               (void) close(fd);
-               (void) no_memory(rn->rn_hdl);
-               return;
+       err = blkid_probe_all(cache);
+       if (err != 0) {
+               (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+                   dgettext(TEXT_DOMAIN, "blkid_probe_all() %d"), err);
+               goto err_blkid2;
         }
-       (void) close(fd);
  
+       iter = blkid_dev_iterate_begin(cache);
+       if (iter == NULL) {
+               (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+                   dgettext(TEXT_DOMAIN, "blkid_dev_iterate_begin()"));
+               goto err_blkid2;
+       }
  
-       rn->rn_config = config;
-       if (config != NULL) {
-               assert(rn->rn_nozpool == B_FALSE);
+       err = blkid_dev_set_search(iter, "TYPE", "zfs");
+       if (err != 0) {
+               (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+                   dgettext(TEXT_DOMAIN, "blkid_dev_set_search() %d"), err);
+               goto err_blkid3;
         }
-}
  
-/*
- * Given a file descriptor, clear (zero) the label information.  This function
- * is currently only used in the appliance stack as part of the ZFS sysevent
- * module.
- */
-int
-zpool_clear_label(int fd)
-{
-       struct stat64 statbuf;
-       int l;
-       vdev_label_t *label;
-       uint64_t size;
+       while (blkid_dev_next(iter, &dev) == 0) {
+               devname = blkid_dev_devname(dev);
+               if ((fd = open64(devname, O_RDONLY)) < 0)
+                       continue;
  
-       if (fstat64(fd, &statbuf) == -1)
-               return (0);
-       size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
+               err = zpool_read_label(fd, &config);
+               (void) close(fd);
  
-       if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
-               return (-1);
+               if (err != 0) {
+                       (void) no_memory(hdl);
+                       goto err_blkid3;
+               }
  
-       for (l = 0; l < VDEV_LABELS; l++) {
-               if (pwrite64(fd, label, sizeof (vdev_label_t),
-                   label_offset(size, l)) != sizeof (vdev_label_t))
-                       return (-1);
+               if (config != NULL) {
+                       err = add_config(hdl, pools, devname, config);
+                       if (err != 0)
+                               goto err_blkid3;
+               }
         }
  
-       free(label);
-       return (0);
+err_blkid3:
+       blkid_dev_iterate_end(iter);
+err_blkid2:
+       blkid_put_cache(cache);
+err_blkid1:
+       return err;
  }
+#endif /* HAVE_LIBBLKID */
  
  /*
   * Given a list of directories to search, find all pools stored on disk.  This
@@ -1126,18 +993,28 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
         char path[MAXPATHLEN];
         char *end, **dir = iarg->path;
         size_t pathleft;
-       nvlist_t *ret = NULL;
-       static char *default_dir = "/dev/dsk";
+       struct stat64 statbuf;
+       nvlist_t *ret = NULL, *config;
+       static char *default_dir = DISK_ROOT;
+       int fd;
         pool_list_t pools = { 0 };
         pool_entry_t *pe, *penext;
         vdev_entry_t *ve, *venext;
         config_entry_t *ce, *cenext;
         name_entry_t *ne, *nenext;
-       avl_tree_t slice_cache;
-       rdsk_node_t *slice;
-       void *cookie;
+
+       verify(iarg->poolname == NULL || iarg->guid == 0);
  
         if (dirs == 0) {
+#ifdef HAVE_LIBBLKID
+               /* Use libblkid to scan all device for their type */
+               if (zpool_find_import_blkid(hdl, &pools) == 0)
+                       goto skip_scanning;
+
+               (void) zfs_error_fmt(hdl, EZFS_BADCACHE,
+                   dgettext(TEXT_DOMAIN, "blkid failure falling back "
+                   "to manual probing"));
+#endif /* HAVE_LIBBLKID */
                 dirs = 1;
                 dir = &default_dir;
         }
@@ -1148,7 +1025,6 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
          * and toplevel GUID.
          */
         for (i = 0; i < dirs; i++) {
-               tpool_t *t;
                 char *rdsk;
                 int dfd;
  
@@ -1182,8 +1058,6 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
                         goto error;
                 }
  
-               avl_create(&slice_cache, slice_cache_compare,
-                   sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
                 /*
                  * This is not MT-safe, but we have no MT consumers of libzfs
                  */
@@ -1193,37 +1067,51 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
                             (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
                                 continue;
  
-                       slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
-                       slice->rn_name = zfs_strdup(hdl, name);
-                       slice->rn_avl = &slice_cache;
-                       slice->rn_dfd = dfd;
-                       slice->rn_hdl = hdl;
-                       slice->rn_nozpool = B_FALSE;
-                       avl_add(&slice_cache, slice);
-               }
-               /*
-                * create a thread pool to do all of this in parallel;
-                * rn_nozpool is not protected, so this is racy in that
-                * multiple tasks could decide that the same slice can
-                * not hold a zpool, which is benign.  Also choose
-                * double the number of processors; we hold a lot of
-                * locks in the kernel, so going beyond this doesn't
-                * buy us much.
-                */
-               t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
-                   0, NULL);
-               for (slice = avl_first(&slice_cache); slice;
-                   (slice = avl_walk(&slice_cache, slice,
-                   AVL_AFTER)))
-                       (void) tpool_dispatch(t, zpool_open_func, slice);
-               tpool_wait(t);
-               tpool_destroy(t);
-
-               cookie = NULL;
-               while ((slice = avl_destroy_nodes(&slice_cache,
-                   &cookie)) != NULL) {
-                       if (slice->rn_config != NULL) {
-                               nvlist_t *config = slice->rn_config;
+                       /*
+                        * Skip checking devices with well known prefixes:
+                        * watchdog - A special close is required to avoid
+                        *            triggering it and resetting the system.
+                        * fuse     - Fuse control device.
+                        * ppp      - Generic PPP driver.
+                        * tty*     - Generic serial interface.
+                        * vcs*     - Virtual console memory.
+                        * parport* - Parallel port interface.
+                        * lp*      - Printer interface.
+                        * fd*      - Floppy interface.
+                        */
+                       if ((strncmp(name, "watchdog", 8) == 0) ||
+                           (strncmp(name, "fuse", 4) == 0)     ||
+                           (strncmp(name, "ppp", 3) == 0)      ||
+                           (strncmp(name, "tty", 3) == 0)      ||
+                           (strncmp(name, "vcs", 3) == 0)      ||
+                           (strncmp(name, "parport", 7) == 0)  ||
+                           (strncmp(name, "lp", 2) == 0)       ||
+                           (strncmp(name, "fd", 2) == 0))
+                               continue;
+
+                       if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
+                               continue;
+
+                       /*
+                        * Ignore failed stats.  We only want regular
+                        * files and block devs.
+                        */
+                       if (fstat64(fd, &statbuf) != 0 ||
+                           (!S_ISREG(statbuf.st_mode) &&
+                           !S_ISBLK(statbuf.st_mode))) {
+                               (void) close(fd);
+                               continue;
+                       }
+
+                       if ((zpool_read_label(fd, &config)) != 0) {
+                               (void) close(fd);
+                               (void) no_memory(hdl);
+                               goto error;
+                       }
+
+                       (void) close(fd);
+
+                       if (config != NULL) {
                                 boolean_t matched = B_TRUE;
  
                                 if (iarg->poolname != NULL) {
@@ -1247,19 +1135,19 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
                                         continue;
                                 }
                                 /* use the non-raw path for the config */
-                               (void) strlcpy(end, slice->rn_name, pathleft);
+                               (void) strlcpy(end, name, pathleft);
                                 if (add_config(hdl, &pools, path, config) != 0)
                                         goto error;
                         }
-                       free(slice->rn_name);
-                       free(slice);
                 }
-               avl_destroy(&slice_cache);
  
                 (void) closedir(dirp);
                 dirp = NULL;
         }
  
+#ifdef HAVE_LIBBLKID
+skip_scanning:
+#endif
         ret = get_configs(hdl, &pools, iarg->can_be_active);
  
  error:
diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c

index c31a123712c3ecedca01667910090a180b1aca57..4b9038de8d4de5e76f0ff08407b552ecedddb972 100644 (file)
--- a/lib/libzfs/libzfs_mount.c
+++ b/lib/libzfs/libzfs_mount.c
@@ -81,6 +81,7 @@
  #include <sys/systeminfo.h>
  #define        MAXISALEN       257     /* based on sysinfo(2) man page */
  
+#ifdef HAVE_ZPL
  static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
  zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
      zfs_share_proto_t);
@@ -1268,3 +1269,53 @@ out:
  
         return (ret);
  }
+
+#else  /* HAVE_ZPL */
+
+int
+zfs_unshare_iscsi(zfs_handle_t *zhp)
+{
+       return 0;
+}
+
+int
+zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
+{
+       return 0;
+}
+
+void
+remove_mountpoint(zfs_handle_t *zhp) {
+       return;
+}
+
+boolean_t
+is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
+{
+       return B_FALSE;
+}
+
+boolean_t
+zfs_is_mounted(zfs_handle_t *zhp, char **where)
+{
+       return is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where);
+}
+
+boolean_t
+zfs_is_shared(zfs_handle_t *zhp)
+{
+       return B_FALSE;
+}
+
+int
+zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
+{
+       return B_FALSE;
+}
+
+int
+zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
+{
+       return B_FALSE;
+}
+#endif /* HAVE_ZPL */
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c

index 42f303894aed40cd800d1689998c77440baa0f6b..ec27b575682451868641ef7a161fb83ae9f1c2d6 100644 (file)
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -32,6 +32,8 @@
  #include <stdlib.h>
  #include <strings.h>
  #include <unistd.h>
+#include <zone.h>
+#include <sys/stat.h>
  #include <sys/efi_partition.h>
  #include <sys/vtoc.h>
  #include <sys/zfs_ioctl.h>
@@ -44,10 +46,6 @@
  
  static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
  
-#define        DISK_ROOT       "/dev/dsk"
-#define        RDISK_ROOT      "/dev/rdsk"
-#define        BACKUP_SLICE    "s2"
-
  typedef struct prop_flags {
         int create:1;   /* Validate property on creation */
         int import:1;   /* Validate property on import */
@@ -651,9 +649,12 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
  
  /*
   * Don't start the slice at the default block of 34; many storage
- * devices will use a stripe width of 128k, so start there instead.
+ * devices will use a stripe width of 128k, other vendors prefer a 1m
+ * alignment.  It is best to play it safe and ensure a 1m alignment
+ * give 512b blocks.  When the block size is larger by a power of 2
+ * we will still be 1m aligned.
   */
-#define        NEW_START_BLOCK 256
+#define        NEW_START_BLOCK 2048
  
  /*
   * Validate the given pool name, optionally putting an extended error message in
@@ -948,10 +949,12 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
                          * This can happen if the user has specified the same
                          * device multiple times.  We can't reliably detect this
                          * until we try to add it and see we already have a
-                        * label.
+                        * label.  This can also happen under if the device is
+                        * part of an active md or lvm device.
                          */
                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-                           "one or more vdevs refer to the same device"));
+                           "one or more vdevs refer to the same device, or one of\n"
+                           "the devices is part of an active md or lvm device"));
                         return (zfs_error(hdl, EZFS_BADDEV, msg));
  
                 case EOVERFLOW:
@@ -1928,7 +1931,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
         } else if (zpool_vdev_is_interior(path)) {
                 verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
         } else if (path[0] != '/') {
-               (void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
+               (void) snprintf(buf, sizeof (buf), "%s/%s", DISK_ROOT, path);
                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
         } else {
                 verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
@@ -2101,22 +2104,14 @@ zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
   * the disk to use the new unallocated space.
   */
  static int
-zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
+zpool_relabel_disk(libzfs_handle_t *hdl, const char *path)
  {
-       char path[MAXPATHLEN];
         char errbuf[1024];
         int fd, error;
-       int (*_efi_use_whole_disk)(int);
-
-       if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT,
-           "efi_use_whole_disk")) == NULL)
-               return (-1);
  
-       (void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name);
-
-       if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
+       if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
-                   "relabel '%s': unable to open device"), name);
+                   "relabel '%s': unable to open device"), path);
                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
         }
  
@@ -2125,11 +2120,11 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
          * does not have any unallocated space left. If so, we simply
          * ignore that error and continue on.
          */
-       error = _efi_use_whole_disk(fd);
+       error = efi_use_whole_disk(fd);
         (void) close(fd);
         if (error && error != VT_ENOSPC) {
                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
-                   "relabel '%s': unable to read disk capacity"), name);
+                   "relabel '%s': unable to read disk capacity"), path);
                 return (zfs_error(hdl, EZFS_NOCAP, errbuf));
         }
         return (0);
@@ -3071,7 +3066,7 @@ char *
  zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
      boolean_t verbose)
  {
-       char *path, *devid;
+       char *path, *devid, *type;
         uint64_t value;
         char buf[64];
         vdev_stat_t *vs;
@@ -3085,7 +3080,6 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
                     (u_longlong_t)value);
                 path = buf;
         } else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
-
                 /*
                  * If the device is dead (faulted, offline, etc) then don't
                  * bother opening it.  Otherwise we may be forcing the user to
@@ -3124,9 +3118,19 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
                                 devid_str_free(newdevid);
                 }
  
-               if (strncmp(path, "/dev/dsk/", 9) == 0)
-                       path += 9;
+               /*
+                * For a block device only use the name.
+                */
+               verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
+               if (strcmp(type, VDEV_TYPE_DISK) == 0) {
+                       path = strrchr(path, '/');
+                       path++;
+               }
  
+#if defined(__sun__) || defined(__sun)
+               /*
+                * The following code strips the slice from the device path.
+                */
                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
                     &value) == 0 && value) {
                         int pathlen = strlen(path);
@@ -3148,6 +3152,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
                         }
                         return (tmp);
                 }
+#endif
         } else {
                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
  
@@ -3629,7 +3634,7 @@ read_efi_label(nvlist_t *config, diskaddr_t *sb)
  
         (void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
             strrchr(path, '/'));
-       if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
+       if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) {
                 struct dk_gpt *vtoc;
  
                 if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
@@ -3675,6 +3680,54 @@ find_start_block(nvlist_t *config)
         return (MAXOFFSET_T);
  }
  
+int
+zpool_label_disk_wait(char *path, int timeout)
+{
+       struct stat64 statbuf;
+       int i;
+
+       /*
+        * Wait timeout miliseconds for a newly created device to be available
+        * from the given path.  There is a small window when a /dev/ device
+        * will exist and the udev link will not, so we must wait for the
+        * symlink.  Depending on the udev rules this may take a few seconds.
+        */
+       for (i = 0; i < timeout; i++) {
+               usleep(1000);
+
+               errno = 0;
+               if ((stat64(path, &statbuf) == 0) && (errno == 0))
+                       return (0);
+       }
+
+       return (ENOENT);
+}
+
+int
+zpool_label_disk_check(char *path)
+{
+       struct dk_gpt *vtoc;
+       int fd, err;
+
+       if ((fd = open(path, O_RDWR|O_DIRECT)) < 0)
+               return errno;
+
+       if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
+               (void) close(fd);
+               return err;
+       }
+
+       if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
+               efi_free(vtoc);
+               (void) close(fd);
+               return EIDRM;
+       }
+
+       efi_free(vtoc);
+       (void) close(fd);
+       return 0;
+}
+
  /*
   * Label an individual disk.  The name provided is the short name,
   * stripped of any leading /dev path.
@@ -3684,7 +3737,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
  {
         char path[MAXPATHLEN];
         struct dk_gpt *vtoc;
-       int fd;
+       int rval, fd;
         size_t resv = EFI_MIN_RESV_SIZE;
         uint64_t slice_size;
         diskaddr_t start_block;
@@ -3720,13 +3773,13 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
         (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
             BACKUP_SLICE);
  
-       if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
+       if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
                 /*
                  * This shouldn't happen.  We've long since verified that this
                  * is a valid device.
                  */
-               zfs_error_aux(hdl,
-                   dgettext(TEXT_DOMAIN, "unable to open device"));
+               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+                   "unable to open device '%s': %d"), path, errno);
                 return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
         }
  
@@ -3769,7 +3822,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
         vtoc->efi_parts[8].p_size = resv;
         vtoc->efi_parts[8].p_tag = V_RESERVED;
  
-       if (efi_write(fd, vtoc) != 0) {
+       if ((rval = efi_write(fd, vtoc)) != 0) {
                 /*
                  * Some block drivers (like pcata) may not support EFI
                  * GPT labels.  Print out a helpful error message dir-
@@ -3779,123 +3832,34 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
                 (void) close(fd);
                 efi_free(vtoc);
  
-               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-                   "try using fdisk(1M) and then provide a specific slice"));
+               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
+                   "parted(8) and then provide a specific slice: %d"), rval);
                 return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
         }
  
         (void) close(fd);
         efi_free(vtoc);
-       return (0);
-}
-
-static boolean_t
-supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
-{
-       char *type;
-       nvlist_t **child;
-       uint_t children, c;
-
-       verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
-       if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
-           strcmp(type, VDEV_TYPE_FILE) == 0 ||
-           strcmp(type, VDEV_TYPE_LOG) == 0 ||
-           strcmp(type, VDEV_TYPE_HOLE) == 0 ||
-           strcmp(type, VDEV_TYPE_MISSING) == 0) {
-               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-                   "vdev type '%s' is not supported"), type);
-               (void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
-               return (B_FALSE);
-       }
-       if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
-           &child, &children) == 0) {
-               for (c = 0; c < children; c++) {
-                       if (!supported_dump_vdev_type(hdl, child[c], errbuf))
-                               return (B_FALSE);
-               }
-       }
-       return (B_TRUE);
-}
-
-/*
- * check if this zvol is allowable for use as a dump device; zero if
- * it is, > 0 if it isn't, < 0 if it isn't a zvol
- */
-int
-zvol_check_dump_config(char *arg)
-{
-       zpool_handle_t *zhp = NULL;
-       nvlist_t *config, *nvroot;
-       char *p, *volname;
-       nvlist_t **top;
-       uint_t toplevels;
-       libzfs_handle_t *hdl;
-       char errbuf[1024];
-       char poolname[ZPOOL_MAXNAMELEN];
-       int pathlen = strlen(ZVOL_FULL_DEV_DIR);
-       int ret = 1;
-
-       if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
-               return (-1);
-       }
-
-       (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-           "dump is not supported on device '%s'"), arg);
  
-       if ((hdl = libzfs_init()) == NULL)
-               return (1);
-       libzfs_print_on_error(hdl, B_TRUE);
-
-       volname = arg + pathlen;
-
-       /* check the configuration of the pool */
-       if ((p = strchr(volname, '/')) == NULL) {
-               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-                   "malformed dataset name"));
-               (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
-               return (1);
-       } else if (p - volname >= ZFS_MAXNAMELEN) {
-               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-                   "dataset name is too long"));
-               (void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
-               return (1);
-       } else {
-               (void) strncpy(poolname, volname, p - volname);
-               poolname[p - volname] = '\0';
-       }
-
-       if ((zhp = zpool_open(hdl, poolname)) == NULL) {
-               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-                   "could not open pool '%s'"), poolname);
-               (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
-               goto out;
-       }
-       config = zpool_get_config(zhp, NULL);
-       if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
-           &nvroot) != 0) {
-               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-                   "could not obtain vdev configuration for  '%s'"), poolname);
-               (void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
-               goto out;
-       }
-
-       verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
-           &top, &toplevels) == 0);
-       if (toplevels != 1) {
-               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-                   "'%s' has multiple top level vdevs"), poolname);
-               (void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
-               goto out;
+       /* Wait for the first expected slice to appear. */
+       (void) snprintf(path, sizeof (path), "%s/%s%s%s", DISK_ROOT, name,
+           isdigit(name[strlen(name)-1]) ? "p" : "", FIRST_SLICE);
+       rval = zpool_label_disk_wait(path, 3000);
+       if (rval) {
+               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
+                   "detect device partitions on '%s': %d"), path, rval);
+               return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
         }
  
-       if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
-               goto out;
+       /* We can't be to paranoid.  Read the label back and verify it. */
+       (void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
+       rval = zpool_label_disk_check(path);
+       if (rval) {
+               zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
+                   "EFI label on '%s' is damaged.  Ensure\nthis device "
+                   "is not in in use, and is functioning properly: %d"),
+                   path, rval);
+               return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
         }
-       ret = 0;
  
-out:
-       if (zhp)
-               zpool_close(zhp);
-       libzfs_fini(hdl);
-       return (ret);
+       return 0;
  }
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c

index 87ffd124fd44e574cb394a87887c64d0e8ece92c..40d1d2e5352a766213ad6b11956c7d2e1644175e 100644 (file)
--- a/lib/libzfs/libzfs_sendrecv.c
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -2608,6 +2608,12 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
                                 return (-1);
                         }
                 }
+               if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME &&
+                   zvol_remove_link(hdl, zhp->zfs_name) != 0) {
+                       zfs_close(zhp);
+                       zcmd_free_nvlists(&zc);
+                       return (-1);
+               }
                 zfs_close(zhp);
         } else {
                 /*
@@ -2813,6 +2819,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
                 if (h != NULL) {
                         if (h->zfs_type == ZFS_TYPE_VOLUME) {
                                 *cp = '@';
+                               err = zvol_create_link(hdl, h->zfs_name);
+                               if (err == 0 && ioctl_err == 0)
+                                       err = zvol_create_link(hdl,
+                                           zc.zc_value);
                         } else if (newfs || stream_avl) {
                                 /*
                                  * Track the first/top of hierarchy fs,
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c

index cb7d87cb2814acd73b4788012bfbd16ea201d2ca..71f81831b23fe02b4f0bee8903c8f5a02026581b 100644 (file)
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -36,6 +36,7 @@
  #include <unistd.h>
  #include <ctype.h>
  #include <math.h>
+#include <sys/stat.h>
  #include <sys/mnttab.h>
  #include <sys/mntent.h>
  #include <sys/types.h>
@@ -648,7 +649,9 @@ libzfs_fini(libzfs_handle_t *hdl)
  #endif
         if (hdl->libzfs_sharetab)
                 (void) fclose(hdl->libzfs_sharetab);
+#ifdef HAVE_ZPL
         zfs_uninit_libshare(hdl);
+#endif
         if (hdl->libzfs_log_str)
                 (void) free(hdl->libzfs_log_str);
         zpool_free_handles(hdl);
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c

index 494e544ea7f8ba8746f0909f2de0921e1b0a1e4f..6f06f4001293309d03aa0b400859e88d6aed7654 100644 (file)
--- a/lib/libzpool/kernel.c
+++ b/lib/libzpool/kernel.c
@@ -35,6 +35,8 @@
  #include <sys/processor.h>
  #include <sys/zfs_context.h>
  #include <sys/utsname.h>
+#include <sys/time.h>
+#include <sys/mount.h> /* for BLKGETSIZE64 */
  #include <sys/systeminfo.h>
  
  /*
@@ -533,7 +535,11 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
          * for its size.  So -- gag -- we open the block device to get
          * its size, and remember it for subsequent VOP_GETATTR().
          */
+#if defined(__sun__) || defined(__sun)
         if (strncmp(path, "/dev/", 5) == 0) {
+#else
+       if (0) {
+#endif
                 char *dsk;
                 fd = open64(path, O_RDONLY);
                 if (fd == -1) {
@@ -562,6 +568,14 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
                 }
         }
  
+       if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) {
+#ifdef __linux__
+               flags |= O_DIRECT;
+#endif
+               /* We shouldn't be writing to block devices in userspace */
+               VERIFY(!(flags & FWRITE));
+       }
+
         if (flags & FCREAT)
                 old_umask = umask(0);
  
@@ -584,6 +598,16 @@ vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
                 return (err);
         }
  
+#ifdef __linux__
+       /* In Linux, use an ioctl to get the size of a block device. */
+       if (S_ISBLK(st.st_mode)) {
+               if (ioctl(fd, BLKGETSIZE64, &st.st_size) != 0) {
+                       err = errno;
+                       close(fd);
+                       return (err);
+               }
+       }
+#endif
         (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
  
         *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
@@ -637,6 +661,16 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
                 }
         }
  
+#ifdef __linux__
+       if (rc == -1 && errno == EINVAL) {
+               /*
+                * Under Linux, this most likely means an alignment issue
+                * (memory or disk) due to O_DIRECT, so we abort() in order to
+                * catch the offender.
+                */
+                abort();
+       }
+#endif
         if (rc == -1)
                 return (errno);
  
diff --git a/module/zfs/sa.c b/module/zfs/sa.c

index 37b815f94e8dcfe25216c73ab4be3c8973e2d238..ee1140ffbb433cd4b5961e8c3335bac233482784 100644 (file)
--- a/module/zfs/sa.c
+++ b/module/zfs/sa.c
@@ -1436,6 +1436,7 @@ sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen)
  int
  sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
  {
+#ifdef HAVE_ZPL
         int error;
         sa_bulk_attr_t bulk;
  
@@ -1452,7 +1453,9 @@ sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
         }
         mutex_exit(&hdl->sa_lock);
         return (error);
-
+#else
+       return ENOSYS;
+#endif /* HAVE_ZPL */
  }
  #endif
  
diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c

index d141e43d722a06eff607fb8f700e8b3371c835ae..88fd789668ac2d692f8df06e8ec223584f2f01e9 100644 (file)
--- a/module/zfs/zfs_sa.c
+++ b/module/zfs/zfs_sa.c
@@ -67,7 +67,7 @@ sa_attr_reg_t zfs_attr_table[ZPL_END+1] = {
  };
  
  #ifdef _KERNEL
-
+#ifdef HAVE_ZPL
  int
  zfs_sa_readlink(znode_t *zp, uio_t *uio)
  {
@@ -331,4 +331,5 @@ zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp)
         }
  }
  
+#endif /* HAVE_ZPL */
  #endif
author	Brian Behlendorf <behlendorf1@llnl.gov>
	Thu, 26 Aug 2010 18:56:53 +0000 (11:56 -0700)
committer	Brian Behlendorf <behlendorf1@llnl.gov>
	Tue, 31 Aug 2010 20:42:00 +0000 (13:42 -0700)
cmd/zdb/zdb.c		patch \| blob \| blame \| history
cmd/zfs/zfs_main.c		patch \| blob \| blame \| history
cmd/zinject/zinject.c		patch \| blob \| blame \| history
cmd/zpool/zpool_main.c		patch \| blob \| blame \| history
cmd/zpool/zpool_vdev.c		patch \| blob \| blame \| history
lib/libefi/include/sys/uuid.h		patch \| blob \| blame \| history
lib/libefi/rdwr_efi.c		patch \| blob \| blame \| history
lib/libzfs/include/libzfs.h		patch \| blob \| blame \| history
lib/libzfs/include/libzfs_impl.h		patch \| blob \| blame \| history
lib/libzfs/libzfs_changelist.c		patch \| blob \| blame \| history
lib/libzfs/libzfs_dataset.c		patch \| blob \| blame \| history
lib/libzfs/libzfs_import.c		patch \| blob \| blame \| history
lib/libzfs/libzfs_mount.c		patch \| blob \| blame \| history
lib/libzfs/libzfs_pool.c		patch \| blob \| blame \| history
lib/libzfs/libzfs_sendrecv.c		patch \| blob \| blame \| history
lib/libzfs/libzfs_util.c		patch \| blob \| blame \| history
lib/libzpool/kernel.c		patch \| blob \| blame \| history
module/zfs/sa.c		patch \| blob \| blame \| history
module/zfs/zfs_sa.c		patch \| blob \| blame \| history