]> git.proxmox.com Git - mirror_zfs.git/commitdiff
zpool: Add slot power control, print power status
authorTony Hutter <hutter2@llnl.gov>
Thu, 21 Dec 2023 18:53:16 +0000 (10:53 -0800)
committerTony Hutter <hutter2@llnl.gov>
Mon, 29 Jan 2024 23:12:06 +0000 (15:12 -0800)
Add `zpool` flags to control the slot power to drives.  This assumes
your SAS or NVMe enclosure supports slot power control via sysfs.

The new `--power` flag is added to `zpool offline|online|clear`:

    zpool offline --power <pool> <device>    Turn off device slot power
    zpool online --power <pool> <device>     Turn on device slot power
    zpool clear --power <pool> [device]      Turn on device slot power

If the ZPOOL_AUTO_POWER_ON_SLOT env var is set, then the '--power'
option is automatically implied for `zpool online` and `zpool clear`
and does not need to be passed.

zpool status also gets a --power option to print the slot power status.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mart Frauenlob <AllKind@fastest.cc>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes #15662

16 files changed:
cmd/zpool/os/freebsd/zpool_vdev_os.c
cmd/zpool/os/linux/zpool_vdev_os.c
cmd/zpool/zpool_iter.c
cmd/zpool/zpool_main.c
cmd/zpool/zpool_util.h
include/libzfs.h
include/libzutil.h
lib/libzfs/libzfs.abi
lib/libzfs/libzfs_pool.c
lib/libzutil/os/linux/zutil_import_os.c
lib/libzutil/zutil_import.c
lib/libzutil/zutil_pool.c
man/man8/zpool-clear.8
man/man8/zpool-offline.8
man/man8/zpool-status.8
man/man8/zpool.8

index 231ca97f1f6fe5e4979a80f484c1bbe4655a116c..9dd733989e2c79b42f8379f2d13754526673092c 100644 (file)
@@ -124,3 +124,17 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
 {
        return (check_file_generic(file, force, isspare));
 }
+
+int
+zpool_power_current_state(zpool_handle_t *zhp, char *vdev)
+{
+       /* Enclosure slot power not supported on FreeBSD yet */
+       return (-1);
+}
+
+int
+zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on)
+{
+       /* Enclosure slot power not supported on FreeBSD yet */
+       return (ENOTSUP);
+}
index 7f4486e062fe9e529f1f13942521385498cdcb31..006a3a7d8e01893f28b1034af24ea2bceacd818a 100644 (file)
@@ -416,3 +416,258 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
 {
        return (check_file_generic(file, force, isspare));
 }
+
+/*
+ * Read from a sysfs file and return an allocated string.  Removes
+ * the newline from the end of the string if there is one.
+ *
+ * Returns a string on success (which must be freed), or NULL on error.
+ */
+static char *zpool_sysfs_gets(char *path)
+{
+       int fd;
+       struct stat statbuf;
+       char *buf = NULL;
+       ssize_t count = 0;
+       fd = open(path, O_RDONLY);
+       if (fd < 0)
+               return (NULL);
+
+       if (fstat(fd, &statbuf) != 0) {
+               close(fd);
+               return (NULL);
+       }
+
+       buf = calloc(sizeof (*buf), statbuf.st_size + 1);
+       if (buf == NULL) {
+               close(fd);
+               return (NULL);
+       }
+
+       /*
+        * Note, we can read less bytes than st_size, and that's ok.  Sysfs
+        * files will report their size is 4k even if they only return a small
+        * string.
+        */
+       count = read(fd, buf, statbuf.st_size);
+       if (count < 0) {
+               /* Error doing read() or we overran the buffer */
+               close(fd);
+               free(buf);
+               return (NULL);
+       }
+
+       /* Remove trailing newline */
+       if (buf[count - 1] == '\n')
+               buf[count - 1] = 0;
+
+       close(fd);
+
+       return (buf);
+}
+
+/*
+ * Write a string to a sysfs file.
+ *
+ * Returns 0 on success, non-zero otherwise.
+ */
+static int zpool_sysfs_puts(char *path, char *str)
+{
+       FILE *file;
+
+       file = fopen(path, "w");
+       if (!file) {
+               return (-1);
+       }
+
+       if (fputs(str, file) < 0) {
+               fclose(file);
+               return (-2);
+       }
+       fclose(file);
+       return (0);
+}
+
+/* Given a vdev nvlist_t, rescan its enclosure sysfs path */
+static void
+rescan_vdev_config_dev_sysfs_path(nvlist_t *vdev_nv)
+{
+       update_vdev_config_dev_sysfs_path(vdev_nv,
+           fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH),
+           ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
+}
+
+/*
+ * Given a power string: "on", "off", "1", or "0", return 0 if it's an
+ * off value, 1 if it's an on value, and -1 if the value is unrecognized.
+ */
+static int zpool_power_parse_value(char *str)
+{
+       if ((strcmp(str, "off") == 0) || (strcmp(str, "0") == 0))
+               return (0);
+
+       if ((strcmp(str, "on") == 0) || (strcmp(str, "1") == 0))
+               return (1);
+
+       return (-1);
+}
+
+/*
+ * Given a vdev string return an allocated string containing the sysfs path to
+ * its power control file.  Also do a check if the power control file really
+ * exists and has correct permissions.
+ *
+ * Example returned strings:
+ *
+ * /sys/class/enclosure/0:0:122:0/10/power_status
+ * /sys/bus/pci/slots/10/power
+ *
+ * Returns allocated string on success (which must be freed), NULL on failure.
+ */
+static char *
+zpool_power_sysfs_path(zpool_handle_t *zhp, char *vdev)
+{
+       const char *enc_sysfs_dir = NULL;
+       char *path = NULL;
+       nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL);
+
+       if (vdev_nv == NULL) {
+               return (NULL);
+       }
+
+       /* Make sure we're getting the updated enclosure sysfs path */
+       rescan_vdev_config_dev_sysfs_path(vdev_nv);
+
+       if (nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
+           &enc_sysfs_dir) != 0) {
+               return (NULL);
+       }
+
+       if (asprintf(&path, "%s/power_status", enc_sysfs_dir) == -1)
+               return (NULL);
+
+       if (access(path, W_OK) != 0) {
+               free(path);
+               path = NULL;
+               /* No HDD 'power_control' file, maybe it's NVMe? */
+               if (asprintf(&path, "%s/power", enc_sysfs_dir) == -1) {
+                       return (NULL);
+               }
+
+               if (access(path, R_OK | W_OK) != 0) {
+                       /* Not NVMe either */
+                       free(path);
+                       return (NULL);
+               }
+       }
+
+       return (path);
+}
+
+/*
+ * Given a path to a sysfs power control file, return B_TRUE if you should use
+ * "on/off" words to control it, or B_FALSE otherwise ("0/1" to control).
+ */
+static boolean_t
+zpool_power_use_word(char *sysfs_path)
+{
+       if (strcmp(&sysfs_path[strlen(sysfs_path) - strlen("power_status")],
+           "power_status") == 0) {
+               return (B_TRUE);
+       }
+       return (B_FALSE);
+}
+
+/*
+ * Check the sysfs power control value for a vdev.
+ *
+ * Returns:
+ *  0 - Power is off
+ *  1 - Power is on
+ * -1 - Error or unsupported
+ */
+int
+zpool_power_current_state(zpool_handle_t *zhp, char *vdev)
+{
+       char *val;
+       int rc;
+
+       char *path = zpool_power_sysfs_path(zhp, vdev);
+       if (path == NULL)
+               return (-1);
+
+       val = zpool_sysfs_gets(path);
+       if (val == NULL) {
+               free(path);
+               return (-1);
+       }
+
+       rc = zpool_power_parse_value(val);
+       free(val);
+       free(path);
+       return (rc);
+}
+
+/*
+ * Turn on or off the slot to a device
+ *
+ * Device path is the full path to the device (like /dev/sda or /dev/sda1).
+ *
+ * Return code:
+ * 0:          Success
+ * ENOTSUP:    Power control not supported for OS
+ * EBADSLT:    Couldn't read current power state
+ * ENOENT:     No sysfs path to power control
+ * EIO:        Couldn't write sysfs power value
+ * EBADE:      Sysfs power value didn't change
+ */
+int
+zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on)
+{
+       char *sysfs_path;
+       const char *val;
+       int rc;
+       int timeout_ms;
+
+       rc = zpool_power_current_state(zhp, vdev);
+       if (rc == -1) {
+               return (EBADSLT);
+       }
+
+       /* Already correct value? */
+       if (rc == (int)turn_on)
+               return (0);
+
+       sysfs_path = zpool_power_sysfs_path(zhp, vdev);
+       if (sysfs_path == NULL)
+               return (ENOENT);
+
+       if (zpool_power_use_word(sysfs_path)) {
+               val = turn_on ? "on" : "off";
+       } else {
+               val = turn_on ? "1" : "0";
+       }
+
+       rc = zpool_sysfs_puts(sysfs_path, (char *)val);
+
+       free(sysfs_path);
+       if (rc != 0) {
+               return (EIO);
+       }
+
+       /*
+        * Wait up to 30 seconds for sysfs power value to change after
+        * writing it.
+        */
+       timeout_ms = zpool_getenv_int("ZPOOL_POWER_ON_SLOT_TIMEOUT_MS", 30000);
+       for (int i = 0; i < MAX(1, timeout_ms / 200); i++) {
+               rc = zpool_power_current_state(zhp, vdev);
+               if (rc == (int)turn_on)
+                       return (0);     /* success */
+
+               fsleep(0.200);  /* 200ms */
+       }
+
+       /* sysfs value never changed */
+       return (EBADE);
+}
index 506b529dce486b9660d4e168b6e352793736f4e8..ae2e9da9108de3deaa3d73229b85dc3e59ff7218 100644 (file)
@@ -554,6 +554,10 @@ for_each_vdev_run_cb(void *zhp_data, nvlist_t *nv, void *cb_vcdl)
        if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
                return (1);
 
+       /* Make sure we're getting the updated enclosure sysfs path */
+       update_vdev_config_dev_sysfs_path(nv, path,
+           ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
+
        nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
            &vdev_enc_sysfs_path);
 
index 5f96dc8d0040eb826c1dd0cad2e392f10f4d2921..6687a4464459815914c779a51b12ca76ea223fb5 100644 (file)
@@ -353,7 +353,7 @@ get_usage(zpool_help_t idx)
                return (gettext("\tattach [-fsw] [-o property=value] "
                    "<pool> <device> <new-device>\n"));
        case HELP_CLEAR:
-               return (gettext("\tclear [-nF] <pool> [device]\n"));
+               return (gettext("\tclear [[--power]|[-nF]] <pool> [device]\n"));
        case HELP_CREATE:
                return (gettext("\tcreate [-fnd] [-o property=value] ... \n"
                    "\t    [-O file-system-property=value] ... \n"
@@ -389,9 +389,11 @@ get_usage(zpool_help_t idx)
                    "[-T d|u] [pool] ... \n"
                    "\t    [interval [count]]\n"));
        case HELP_OFFLINE:
-               return (gettext("\toffline [-f] [-t] <pool> <device> ...\n"));
+               return (gettext("\toffline [--power]|[[-f][-t]] <pool> "
+                   "<device> ...\n"));
        case HELP_ONLINE:
-               return (gettext("\tonline [-e] <pool> <device> ...\n"));
+               return (gettext("\tonline [--power][-e] <pool> <device> "
+                   "...\n"));
        case HELP_REPLACE:
                return (gettext("\treplace [-fsw] [-o property=value] "
                    "<pool> <device> [new-device]\n"));
@@ -410,7 +412,7 @@ get_usage(zpool_help_t idx)
                return (gettext("\ttrim [-dw] [-r <rate>] [-c | -s] <pool> "
                    "[<device> ...]\n"));
        case HELP_STATUS:
-               return (gettext("\tstatus [-c [script1,script2,...]] "
+               return (gettext("\tstatus [--power] [-c [script1,script2,...]] "
                    "[-igLpPstvxD]  [-T d|u] [pool] ... \n"
                    "\t    [interval [count]]\n"));
        case HELP_UPGRADE:
@@ -516,6 +518,77 @@ print_vdev_prop_cb(int prop, void *cb)
        return (ZPROP_CONT);
 }
 
+/*
+ * Given a leaf vdev name like 'L5' return its VDEV_CONFIG_PATH like
+ * '/dev/disk/by-vdev/L5'.
+ */
+static const char *
+vdev_name_to_path(zpool_handle_t *zhp, char *vdev)
+{
+       nvlist_t *vdev_nv = zpool_find_vdev(zhp, vdev, NULL, NULL, NULL);
+       if (vdev_nv == NULL) {
+               return (NULL);
+       }
+       return (fnvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH));
+}
+
+static int
+zpool_power_on(zpool_handle_t *zhp, char *vdev)
+{
+       return (zpool_power(zhp, vdev, B_TRUE));
+}
+
+static int
+zpool_power_on_and_disk_wait(zpool_handle_t *zhp, char *vdev)
+{
+       int rc;
+
+       rc = zpool_power_on(zhp, vdev);
+       if (rc != 0)
+               return (rc);
+
+       zpool_disk_wait(vdev_name_to_path(zhp, vdev));
+
+       return (0);
+}
+
+static int
+zpool_power_on_pool_and_wait_for_devices(zpool_handle_t *zhp)
+{
+       nvlist_t *nv;
+       const char *path = NULL;
+       int rc;
+
+       /* Power up all the devices first */
+       FOR_EACH_REAL_LEAF_VDEV(zhp, nv) {
+               path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH);
+               if (path != NULL) {
+                       rc = zpool_power_on(zhp, (char *)path);
+                       if (rc != 0) {
+                               return (rc);
+                       }
+               }
+       }
+
+       /*
+        * Wait for their devices to show up.  Since we powered them on
+        * at roughly the same time, they should all come online around
+        * the same time.
+        */
+       FOR_EACH_REAL_LEAF_VDEV(zhp, nv) {
+               path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH);
+               zpool_disk_wait(path);
+       }
+
+       return (0);
+}
+
+static int
+zpool_power_off(zpool_handle_t *zhp, char *vdev)
+{
+       return (zpool_power(zhp, vdev, B_FALSE));
+}
+
 /*
  * Display usage message.  If we're inside a command, display only the usage for
  * that command.  Otherwise, iterate over the entire command table and display
@@ -2093,6 +2166,7 @@ typedef struct status_cbdata {
        boolean_t       cb_print_vdev_init;
        boolean_t       cb_print_vdev_trim;
        vdev_cmd_data_list_t    *vcdl;
+       boolean_t       cb_print_power;
 } status_cbdata_t;
 
 /* Return 1 if string is NULL, empty, or whitespace; return 0 otherwise. */
@@ -2378,6 +2452,26 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
                        else
                                printf(" %5s", rbuf);
                }
+               if (cb->cb_print_power) {
+                       if (children == 0)  {
+                               /* Only leaf vdevs have physical slots */
+                               switch (zpool_power_current_state(zhp, (char *)
+                                   fnvlist_lookup_string(nv,
+                                   ZPOOL_CONFIG_PATH))) {
+                               case 0:
+                                       printf_color(ANSI_RED, " %5s",
+                                           gettext("off"));
+                                       break;
+                               case 1:
+                                       printf(" %5s", gettext("on"));
+                                       break;
+                               default:
+                                       printf(" %5s", "-");
+                               }
+                       } else {
+                               printf(" %5s", "-");
+                       }
+               }
        }
 
        if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
@@ -5428,19 +5522,6 @@ get_interval_count_filter_guids(int *argc, char **argv, float *interval,
            interval, count);
 }
 
-/*
- * Floating point sleep().  Allows you to pass in a floating point value for
- * seconds.
- */
-static void
-fsleep(float sec)
-{
-       struct timespec req;
-       req.tv_sec = floor(sec);
-       req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC;
-       nanosleep(&req, NULL);
-}
-
 /*
  * Terminal height, in rows. Returns -1 if stdout is not connected to a TTY or
  * if we were unable to determine its size.
@@ -6939,10 +7020,12 @@ zpool_do_split(int argc, char **argv)
        return (ret);
 }
 
-
+#define        POWER_OPT 1024
 
 /*
- * zpool online <pool> <device> ...
+ * zpool online [--power] <pool> <device> ...
+ *
+ * --power: Power on the enclosure slot to the drive (if possible)
  */
 int
 zpool_do_online(int argc, char **argv)
@@ -6953,13 +7036,21 @@ zpool_do_online(int argc, char **argv)
        int ret = 0;
        vdev_state_t newstate;
        int flags = 0;
+       boolean_t is_power_on = B_FALSE;
+       struct option long_options[] = {
+               {"power", no_argument, NULL, POWER_OPT},
+               {0, 0, 0, 0}
+       };
 
        /* check options */
-       while ((c = getopt(argc, argv, "e")) != -1) {
+       while ((c = getopt_long(argc, argv, "e", long_options, NULL)) != -1) {
                switch (c) {
                case 'e':
                        flags |= ZFS_ONLINE_EXPAND;
                        break;
+               case POWER_OPT:
+                       is_power_on = B_TRUE;
+                       break;
                case '?':
                        (void) fprintf(stderr, gettext("invalid option '%c'\n"),
                            optopt);
@@ -6967,6 +7058,9 @@ zpool_do_online(int argc, char **argv)
                }
        }
 
+       if (libzfs_envvar_is_set("ZPOOL_AUTO_POWER_ON_SLOT"))
+               is_power_on = B_TRUE;
+
        argc -= optind;
        argv += optind;
 
@@ -6988,6 +7082,18 @@ zpool_do_online(int argc, char **argv)
        for (i = 1; i < argc; i++) {
                vdev_state_t oldstate;
                boolean_t avail_spare, l2cache;
+               int rc;
+
+               if (is_power_on) {
+                       rc = zpool_power_on_and_disk_wait(zhp, argv[i]);
+                       if (rc == ENOTSUP) {
+                               (void) fprintf(stderr,
+                                   gettext("Power control not supported\n"));
+                       }
+                       if (rc != 0)
+                               return (rc);
+               }
+
                nvlist_t *tgt = zpool_find_vdev(zhp, argv[i], &avail_spare,
                    &l2cache, NULL);
                if (tgt == NULL) {
@@ -7033,12 +7139,15 @@ zpool_do_online(int argc, char **argv)
 }
 
 /*
- * zpool offline [-ft] <pool> <device> ...
+ * zpool offline [-ft]|[--power] <pool> <device> ...
+ *
  *
  *     -f      Force the device into a faulted state.
  *
  *     -t      Only take the device off-line temporarily.  The offline/faulted
  *             state will not be persistent across reboots.
+ *
+ *     --power Power off the enclosure slot to the drive (if possible)
  */
 int
 zpool_do_offline(int argc, char **argv)
@@ -7049,9 +7158,15 @@ zpool_do_offline(int argc, char **argv)
        int ret = 0;
        boolean_t istmp = B_FALSE;
        boolean_t fault = B_FALSE;
+       boolean_t is_power_off = B_FALSE;
+
+       struct option long_options[] = {
+               {"power", no_argument, NULL, POWER_OPT},
+               {0, 0, 0, 0}
+       };
 
        /* check options */
-       while ((c = getopt(argc, argv, "ft")) != -1) {
+       while ((c = getopt_long(argc, argv, "ft", long_options, NULL)) != -1) {
                switch (c) {
                case 'f':
                        fault = B_TRUE;
@@ -7059,6 +7174,9 @@ zpool_do_offline(int argc, char **argv)
                case 't':
                        istmp = B_TRUE;
                        break;
+               case POWER_OPT:
+                       is_power_off = B_TRUE;
+                       break;
                case '?':
                        (void) fprintf(stderr, gettext("invalid option '%c'\n"),
                            optopt);
@@ -7066,6 +7184,20 @@ zpool_do_offline(int argc, char **argv)
                }
        }
 
+       if (is_power_off && fault) {
+               (void) fprintf(stderr,
+                   gettext("-0 and -f cannot be used together\n"));
+               usage(B_FALSE);
+               return (1);
+       }
+
+       if (is_power_off && istmp) {
+               (void) fprintf(stderr,
+                   gettext("-0 and -t cannot be used together\n"));
+               usage(B_FALSE);
+               return (1);
+       }
+
        argc -= optind;
        argv += optind;
 
@@ -7085,8 +7217,22 @@ zpool_do_offline(int argc, char **argv)
                return (1);
 
        for (i = 1; i < argc; i++) {
-               if (fault) {
-                       uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]);
+               uint64_t guid = zpool_vdev_path_to_guid(zhp, argv[i]);
+               if (is_power_off) {
+                       /*
+                        * Note: we have to power off first, then set REMOVED,
+                        * or else zpool_vdev_set_removed_state() returns
+                        * EAGAIN.
+                        */
+                       ret = zpool_power_off(zhp, argv[i]);
+                       if (ret != 0) {
+                               (void) fprintf(stderr, "%s %s %d\n",
+                                   gettext("unable to power off slot for"),
+                                   argv[i], ret);
+                       }
+                       zpool_vdev_set_removed_state(zhp, guid, VDEV_AUX_NONE);
+
+               } else if (fault) {
                        vdev_aux_t aux;
                        if (istmp == B_FALSE) {
                                /* Force the fault to persist across imports */
@@ -7109,7 +7255,7 @@ zpool_do_offline(int argc, char **argv)
 }
 
 /*
- * zpool clear <pool> [device]
+ * zpool clear [-nF]|[--power] <pool> [device]
  *
  * Clear all errors associated with a pool or a particular device.
  */
@@ -7121,13 +7267,20 @@ zpool_do_clear(int argc, char **argv)
        boolean_t dryrun = B_FALSE;
        boolean_t do_rewind = B_FALSE;
        boolean_t xtreme_rewind = B_FALSE;
+       boolean_t is_power_on = B_FALSE;
        uint32_t rewind_policy = ZPOOL_NO_REWIND;
        nvlist_t *policy = NULL;
        zpool_handle_t *zhp;
        char *pool, *device;
 
+       struct option long_options[] = {
+               {"power", no_argument, NULL, POWER_OPT},
+               {0, 0, 0, 0}
+       };
+
        /* check options */
-       while ((c = getopt(argc, argv, "FnX")) != -1) {
+       while ((c = getopt_long(argc, argv, "FnX", long_options,
+           NULL)) != -1) {
                switch (c) {
                case 'F':
                        do_rewind = B_TRUE;
@@ -7138,6 +7291,9 @@ zpool_do_clear(int argc, char **argv)
                case 'X':
                        xtreme_rewind = B_TRUE;
                        break;
+               case POWER_OPT:
+                       is_power_on = B_TRUE;
+                       break;
                case '?':
                        (void) fprintf(stderr, gettext("invalid option '%c'\n"),
                            optopt);
@@ -7145,6 +7301,9 @@ zpool_do_clear(int argc, char **argv)
                }
        }
 
+       if (libzfs_envvar_is_set("ZPOOL_AUTO_POWER_ON_SLOT"))
+               is_power_on = B_TRUE;
+
        argc -= optind;
        argv += optind;
 
@@ -7185,6 +7344,14 @@ zpool_do_clear(int argc, char **argv)
                return (1);
        }
 
+       if (is_power_on) {
+               if (device == NULL) {
+                       zpool_power_on_pool_and_wait_for_devices(zhp);
+               } else {
+                       zpool_power_on_and_disk_wait(zhp, device);
+               }
+       }
+
        if (zpool_clear(zhp, device, policy) != 0)
                ret = 1;
 
@@ -8801,6 +8968,10 @@ status_callback(zpool_handle_t *zhp, void *data)
                        printf_color(ANSI_BOLD, " %5s", gettext("SLOW"));
                }
 
+               if (cbp->cb_print_power) {
+                       printf_color(ANSI_BOLD, " %5s", gettext("POWER"));
+               }
+
                if (cbp->vcdl != NULL)
                        print_cmd_columns(cbp->vcdl, 0);
 
@@ -8847,8 +9018,8 @@ status_callback(zpool_handle_t *zhp, void *data)
 }
 
 /*
- * zpool status [-c [script1,script2,...]] [-igLpPstvx] [-T d|u] [pool] ...
- *              [interval [count]]
+ * zpool status [-c [script1,script2,...]] [-igLpPstvx] [--power] [-T d|u] ...
+ *              [pool] [interval [count]]
  *
  *     -c CMD  For each vdev, run command CMD
  *     -i      Display vdev initialization status.
@@ -8862,6 +9033,7 @@ status_callback(zpool_handle_t *zhp, void *data)
  *     -D      Display dedup status (undocumented)
  *     -t      Display vdev TRIM status.
  *     -T      Display a timestamp in date(1) or Unix format
+ *     --power Display vdev enclosure slot power status
  *
  * Describes the health status of all pools or some subset.
  */
@@ -8875,8 +9047,14 @@ zpool_do_status(int argc, char **argv)
        status_cbdata_t cb = { 0 };
        char *cmd = NULL;
 
+       struct option long_options[] = {
+               {"power", no_argument, NULL, POWER_OPT},
+               {0, 0, 0, 0}
+       };
+
        /* check options */
-       while ((c = getopt(argc, argv, "c:igLpPsvxDtT:")) != -1) {
+       while ((c = getopt_long(argc, argv, "c:igLpPsvxDtT:", long_options,
+           NULL)) != -1) {
                switch (c) {
                case 'c':
                        if (cmd != NULL) {
@@ -8935,6 +9113,9 @@ zpool_do_status(int argc, char **argv)
                case 'T':
                        get_timestamp_arg(*optarg);
                        break;
+               case POWER_OPT:
+                       cb.cb_print_power = B_TRUE;
+                       break;
                case '?':
                        if (optopt == 'c') {
                                print_zpool_script_list("status");
index db8e631dc6be89253530197f5dbf20207d45a3df..7f5406f063e1f6a255adf8fc2d5fb882032b884c 100644 (file)
@@ -138,6 +138,9 @@ int check_file(const char *file, boolean_t force, boolean_t isspare);
 void after_zpool_upgrade(zpool_handle_t *zhp);
 int check_file_generic(const char *file, boolean_t force, boolean_t isspare);
 
+int zpool_power(zpool_handle_t *zhp, char *vdev, boolean_t turn_on);
+int zpool_power_current_state(zpool_handle_t *zhp, char *vdev);
+
 #ifdef __cplusplus
 }
 #endif
index 4adfa38e87befc64394d22cafb0410b7146bfbe3..770c5e1f201ce47966a4fca250f905e282ed1d0b 100644 (file)
@@ -318,6 +318,9 @@ _LIBZFS_H int zpool_vdev_remove_wanted(zpool_handle_t *, const char *);
 
 _LIBZFS_H int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
 _LIBZFS_H int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
+_LIBZFS_H int zpool_vdev_set_removed_state(zpool_handle_t *, uint64_t,
+    vdev_aux_t);
+
 _LIBZFS_H int zpool_vdev_clear(zpool_handle_t *, uint64_t);
 
 _LIBZFS_H nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
index 9842c225b6f022412c6e9080c634e3dd51faaf80..839486fb62bfc4e761b1eeb8d4d6ef50f7ad3adc 100644 (file)
@@ -97,6 +97,7 @@ _LIBZUTIL_H int zpool_find_config(libpc_handle_t *, const char *, nvlist_t **,
 _LIBZUTIL_H const char * const * zpool_default_search_paths(size_t *count);
 _LIBZUTIL_H int zpool_read_label(int, nvlist_t **, int *);
 _LIBZUTIL_H int zpool_label_disk_wait(const char *, int);
+_LIBZUTIL_H int zpool_disk_wait(const char *);
 
 struct udev_device;
 
@@ -163,6 +164,8 @@ _LIBZUTIL_H void zfs_niceraw(uint64_t, char *, size_t);
 _LIBZUTIL_H void zpool_dump_ddt(const ddt_stat_t *, const ddt_histogram_t *);
 _LIBZUTIL_H int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***,
     uint_t *);
+_LIBZUTIL_H void fsleep(float sec);
+_LIBZUTIL_H int zpool_getenv_int(const char *env, int default_val);
 
 struct zfs_cmd;
 
@@ -205,6 +208,60 @@ _LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...);
 typedef int (*pool_vdev_iter_f)(void *, nvlist_t *, void *);
 int for_each_vdev_cb(void *zhp, nvlist_t *nv, pool_vdev_iter_f func,
     void *data);
+int for_each_vdev_macro_helper_func(void *zhp_data, nvlist_t *nv, void *data);
+int for_each_real_leaf_vdev_macro_helper_func(void *zhp_data, nvlist_t *nv,
+    void *data);
+/*
+ * Often you'll want to iterate over all the vdevs in the pool, but don't want
+ * to use for_each_vdev() since it requires a callback function.
+ *
+ * Instead you can use FOR_EACH_VDEV():
+ *
+ *     zpool_handle_t *zhp      // Assume this is initialized
+ *     nvlist_t *nv
+ *     ...
+ *     FOR_EACH_VDEV(zhp, nv) {
+ *      const char *path = NULL;
+ *      nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path);
+ *      printf("Looking at vdev %s\n", path);
+ *     }
+ *
+ * Note: FOR_EACH_VDEV runs in O(n^2) time where n = number of vdevs.  However,
+ * there's an upper limit of 256 vdevs per dRAID top-level vdevs (TLDs), 255 for
+ * raidz2 TLDs, a real world limit of ~500 vdevs for mirrors, so this shouldn't
+ * really be an issue.
+ *
+ * Here are some micro-benchmarks of a complete FOR_EACH_VDEV loop on a RAID0
+ * pool:
+ *
+ * 100  vdevs = 0.7ms
+ * 500  vdevs = 17ms
+ * 750  vdevs = 40ms
+ * 1000 vdevs = 82ms
+ *
+ * The '__nv += 0' at the end of the for() loop gets around a "comma or
+ * semicolon followed by non-blank" checkstyle error.  Note on most compliers
+ * the '__nv += 0' can just be replaced with 'NULL', but gcc on Centos 7
+ * will give a 'warning: statement with no effect' error if you do that.
+ */
+#define        __FOR_EACH_VDEV(__zhp, __nv, __func) { \
+       __nv = zpool_get_config(__zhp, NULL); \
+       VERIFY0(nvlist_lookup_nvlist(__nv, ZPOOL_CONFIG_VDEV_TREE, &__nv)); \
+       } \
+       for (nvlist_t *__root_nv = __nv, *__state = (nvlist_t *)0; \
+           for_each_vdev_cb(&__state, __root_nv, __func, &__nv) == 1; \
+           __nv += 0)
+
+#define        FOR_EACH_VDEV(__zhp, __nv) \
+       __FOR_EACH_VDEV(__zhp, __nv, for_each_vdev_macro_helper_func)
+
+/*
+ * "real leaf" vdevs are leaf vdevs that are real devices (disks or files).
+ * This excludes leaf vdevs like like draid spares.
+ */
+#define        FOR_EACH_REAL_LEAF_VDEV(__zhp, __nv) \
+       __FOR_EACH_VDEV(__zhp, __nv, for_each_real_leaf_vdev_macro_helper_func)
+
 int for_each_vdev_in_nvlist(nvlist_t *nvroot, pool_vdev_iter_f func,
     void *data);
 void update_vdevs_config_dev_sysfs_path(nvlist_t *config);
index 3c975397ed384c67501e2f0a970272a56d1add79..9bb8f6a47de12e9e86ffdcd02efa6a9890fb4606 100644 (file)
     <elf-symbol name='fletcher_4_native' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='fletcher_4_native_varsize' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='fletcher_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='fsleep' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='get_dataset_depth' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='get_system_hostid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='getexecname' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_disable_datasets_os' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_disable_volume_os' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_discard_checkpoint' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_disk_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_dump_ddt' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_enable_datasets' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_events_clear' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_get_userprop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_get_vdev_prop' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_get_vdev_prop_value' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_getenv_int' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_history_unpack' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_import' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_import_props' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_vdev_remove_wanted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_vdev_script_alloc_env' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_vdev_script_free_env' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zpool_vdev_set_removed_state' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
     <qualified-type-def type-id='0897719a' const='yes' id='c4a7b189'/>
     <pointer-type-def type-id='c4a7b189' size-in-bits='64' id='36fca399'/>
     <qualified-type-def type-id='36fca399' restrict='yes' id='37e4897b'/>
-    <qualified-type-def type-id='a9c79a1f' const='yes' id='cd087e36'/>
-    <pointer-type-def type-id='cd087e36' size-in-bits='64' id='e05e8614'/>
     <qualified-type-def type-id='e05e8614' restrict='yes' id='0be2e71c'/>
     <pointer-type-def type-id='8037c762' size-in-bits='64' id='d74a6869'/>
     <qualified-type-def type-id='7292109c' restrict='yes' id='6942f6a4'/>
       <parameter type-id='9d774e0b' name='aux'/>
       <return type-id='95e97e5e'/>
     </function-decl>
+    <function-decl name='zpool_vdev_set_removed_state' mangled-name='zpool_vdev_set_removed_state' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_set_removed_state'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='9c313c2d' name='guid'/>
+      <parameter type-id='9d774e0b' name='aux'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
     <function-decl name='zpool_vdev_attach' mangled-name='zpool_vdev_attach' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_attach'>
       <parameter type-id='4c81de99' name='zhp'/>
       <parameter type-id='80f4b756' name='old_disk'/>
     <qualified-type-def type-id='d33f11cb' restrict='yes' id='5c53ba29'/>
     <pointer-type-def type-id='ffa52b96' size-in-bits='64' id='76c8174b'/>
     <pointer-type-def type-id='f3d87113' size-in-bits='64' id='0d2a0670'/>
+    <function-decl name='zpool_label_disk' mangled-name='zpool_label_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_label_disk'>
+      <parameter type-id='b0382bb3'/>
+      <parameter type-id='4c81de99'/>
+      <parameter type-id='80f4b756'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
     <function-decl name='zfs_version_kernel' mangled-name='zfs_version_kernel' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_kernel'>
       <return type-id='26a90f95'/>
     </function-decl>
     <function-decl name='libzfs_core_fini' visibility='default' binding='global' size-in-bits='64'>
       <return type-id='48b5725f'/>
     </function-decl>
+    <function-decl name='zfs_get_underlying_path' mangled-name='zfs_get_underlying_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_underlying_path'>
+      <parameter type-id='80f4b756'/>
+      <return type-id='26a90f95'/>
+    </function-decl>
     <function-decl name='zpool_prop_unsupported' mangled-name='zpool_prop_unsupported' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prop_unsupported'>
       <parameter type-id='80f4b756'/>
       <return type-id='c19b74c3'/>
       <parameter type-id='b59d7dce'/>
       <return type-id='95e97e5e'/>
     </function-decl>
+    <function-decl name='access' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='80f4b756'/>
+      <parameter type-id='95e97e5e'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
     <function-decl name='dup2' visibility='default' binding='global' size-in-bits='64'>
       <parameter type-id='95e97e5e'/>
       <parameter type-id='95e97e5e'/>
       <parameter is-variadic='yes'/>
       <return type-id='95e97e5e'/>
     </function-decl>
+    <function-decl name='zpool_vdev_script_alloc_env' mangled-name='zpool_vdev_script_alloc_env' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_script_alloc_env'>
+      <parameter type-id='80f4b756' name='pool_name'/>
+      <parameter type-id='80f4b756' name='vdev_path'/>
+      <parameter type-id='80f4b756' name='vdev_upath'/>
+      <parameter type-id='80f4b756' name='vdev_enc_sysfs_path'/>
+      <parameter type-id='80f4b756' name='opt_key'/>
+      <parameter type-id='80f4b756' name='opt_val'/>
+      <return type-id='9b23c9ad'/>
+    </function-decl>
+    <function-decl name='zpool_vdev_script_free_env' mangled-name='zpool_vdev_script_free_env' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_vdev_script_free_env'>
+      <parameter type-id='9b23c9ad' name='env'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_prepare_disk' mangled-name='zpool_prepare_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prepare_disk'>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='5ce45b60' name='vdev_nv'/>
+      <parameter type-id='80f4b756' name='prepare_str'/>
+      <parameter type-id='c0563f85' name='lines'/>
+      <parameter type-id='7292109c' name='lines_cnt'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
+    <function-decl name='zpool_prepare_and_label_disk' mangled-name='zpool_prepare_and_label_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_prepare_and_label_disk'>
+      <parameter type-id='b0382bb3' name='hdl'/>
+      <parameter type-id='4c81de99' name='zhp'/>
+      <parameter type-id='80f4b756' name='name'/>
+      <parameter type-id='5ce45b60' name='vdev_nv'/>
+      <parameter type-id='80f4b756' name='prepare_str'/>
+      <parameter type-id='c0563f85' name='lines'/>
+      <parameter type-id='7292109c' name='lines_cnt'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
   </abi-instr>
   <abi-instr address-size='64' path='lib/libzfs/os/linux/libzfs_mount_os.c' language='LANG_C99'>
     <pointer-type-def type-id='7359adad' size-in-bits='64' id='1d2c2b85'/>
       <parameter type-id='95e97e5e'/>
       <return type-id='95e97e5e'/>
     </function-decl>
-    <function-decl name='zpool_label_disk' mangled-name='zpool_label_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_label_disk'>
-      <parameter type-id='b0382bb3' name='hdl'/>
-      <parameter type-id='4c81de99' name='zhp'/>
-      <parameter type-id='80f4b756' name='name'/>
-      <return type-id='95e97e5e'/>
-    </function-decl>
   </abi-instr>
   <abi-instr address-size='64' path='lib/libzfs/os/linux/libzfs_util_os.c' language='LANG_C99'>
     <class-decl name='itimerspec' size-in-bits='256' is-struct='yes' visibility='default' id='acbdbcc6'>
     <pointer-type-def type-id='4ba62af7' size-in-bits='64' id='f39579e7'/>
     <pointer-type-def type-id='acbdbcc6' size-in-bits='64' id='116842ac'/>
     <pointer-type-def type-id='b440e872' size-in-bits='64' id='3ac36db0'/>
-    <function-decl name='access' visibility='default' binding='global' size-in-bits='64'>
-      <parameter type-id='80f4b756'/>
-      <parameter type-id='95e97e5e'/>
-      <return type-id='95e97e5e'/>
-    </function-decl>
     <function-decl name='__poll_chk' visibility='default' binding='global' size-in-bits='64'>
       <parameter type-id='3ac36db0'/>
       <parameter type-id='555eef66'/>
       <parameter type-id='80f4b756' name='dev_name'/>
       <return type-id='c19b74c3'/>
     </function-decl>
-    <function-decl name='zfs_get_underlying_path' mangled-name='zfs_get_underlying_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_get_underlying_path'>
-      <parameter type-id='80f4b756' name='dev_name'/>
-      <return type-id='26a90f95'/>
-    </function-decl>
     <function-decl name='is_mpath_whole_disk' mangled-name='is_mpath_whole_disk' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='is_mpath_whole_disk'>
       <parameter type-id='80f4b756' name='path'/>
       <return type-id='c19b74c3'/>
       <parameter type-id='b59d7dce' name='buflen'/>
       <return type-id='95e97e5e'/>
     </function-decl>
+    <function-decl name='zpool_disk_wait' mangled-name='zpool_disk_wait' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_disk_wait'>
+      <parameter type-id='80f4b756' name='path'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
     <function-decl name='update_vdev_config_dev_sysfs_path' mangled-name='update_vdev_config_dev_sysfs_path' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='update_vdev_config_dev_sysfs_path'>
       <parameter type-id='5ce45b60' name='nv'/>
       <parameter type-id='80f4b756' name='path'/>
       <parameter type-id='95e97e5e'/>
       <return type-id='95e97e5e'/>
     </function-decl>
+    <function-decl name='clearenv' visibility='default' binding='global' size-in-bits='64'>
+      <return type-id='95e97e5e'/>
+    </function-decl>
     <function-decl name='zfs_setproctitle_init' mangled-name='zfs_setproctitle_init' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_setproctitle_init'>
       <parameter type-id='95e97e5e' name='argc'/>
       <parameter type-id='9b23c9ad' name='argv'/>
     <array-type-def dimensions='1' type-id='853fd5dc' size-in-bits='32768' id='b505fc2f'>
       <subrange length='64' type-id='7359adad' id='b10be967'/>
     </array-type-def>
+    <type-decl name='float' size-in-bits='32' id='a6c45d85'/>
     <class-decl name='ddt_stat' size-in-bits='512' is-struct='yes' visibility='default' id='65242dfe'>
       <data-member access='public' layout-offset-in-bits='0'>
         <var-decl name='dds_blocks' type-id='9c313c2d' visibility='default'/>
     <pointer-type-def type-id='ec92d602' size-in-bits='64' id='932720f8'/>
     <qualified-type-def type-id='853fd5dc' const='yes' id='764c298c'/>
     <pointer-type-def type-id='764c298c' size-in-bits='64' id='dfe59052'/>
+    <qualified-type-def type-id='a9c79a1f' const='yes' id='cd087e36'/>
+    <pointer-type-def type-id='cd087e36' size-in-bits='64' id='e05e8614'/>
+    <function-decl name='nanosleep' visibility='default' binding='global' size-in-bits='64'>
+      <parameter type-id='e05e8614'/>
+      <parameter type-id='3d83ba87'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
     <function-decl name='zpool_dump_ddt' mangled-name='zpool_dump_ddt' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_dump_ddt'>
       <parameter type-id='dfe59052' name='dds_total'/>
       <parameter type-id='932720f8' name='ddh'/>
       <return type-id='48b5725f'/>
     </function-decl>
+    <function-decl name='fsleep' mangled-name='fsleep' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fsleep'>
+      <parameter type-id='a6c45d85' name='sec'/>
+      <return type-id='48b5725f'/>
+    </function-decl>
+    <function-decl name='zpool_getenv_int' mangled-name='zpool_getenv_int' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_getenv_int'>
+      <parameter type-id='80f4b756' name='env'/>
+      <parameter type-id='95e97e5e' name='default_val'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
   </abi-instr>
   <abi-instr address-size='64' path='module/avl/avl.c' language='LANG_C99'>
     <function-decl name='avl_last' mangled-name='avl_last' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='avl_last'>
index 4ebd112f452f7432657bfc8deb5fbd45c1c124b1..2f9ccbc2ab57c5f4da960ac3339a8779792656c2 100644 (file)
@@ -3036,6 +3036,9 @@ zpool_vdev_is_interior(const char *name)
        return (B_FALSE);
 }
 
+/*
+ * Lookup the nvlist for a given vdev.
+ */
 nvlist_t *
 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
     boolean_t *l2cache, boolean_t *log)
@@ -3043,6 +3046,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
        char *end;
        nvlist_t *nvroot, *search, *ret;
        uint64_t guid;
+       boolean_t __avail_spare, __l2cache, __log;
 
        search = fnvlist_alloc();
 
@@ -3058,6 +3062,18 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
        nvroot = fnvlist_lookup_nvlist(zhp->zpool_config,
            ZPOOL_CONFIG_VDEV_TREE);
 
+       /*
+        * User can pass NULL for avail_spare, l2cache, and log, but
+        * we still need to provide variables to vdev_to_nvlist_iter(), so
+        * just point them to junk variables here.
+        */
+       if (!avail_spare)
+               avail_spare = &__avail_spare;
+       if (!l2cache)
+               l2cache = &__l2cache;
+       if (!log)
+               log = &__log;
+
        *avail_spare = B_FALSE;
        *l2cache = B_FALSE;
        if (log != NULL)
@@ -3313,21 +3329,23 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
 }
 
 /*
- * Mark the given vdev degraded.
+ * Generic set vdev state function
  */
-int
-zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
+static int
+zpool_vdev_set_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux,
+    vdev_state_t state)
 {
        zfs_cmd_t zc = {"\0"};
        char errbuf[ERRBUFLEN];
        libzfs_handle_t *hdl = zhp->zpool_hdl;
 
        (void) snprintf(errbuf, sizeof (errbuf),
-           dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
+           dgettext(TEXT_DOMAIN, "cannot set %s %llu"),
+           zpool_state_to_name(state, aux), (u_longlong_t)guid);
 
        (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
        zc.zc_guid = guid;
-       zc.zc_cookie = VDEV_STATE_DEGRADED;
+       zc.zc_cookie = state;
        zc.zc_obj = aux;
 
        if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
@@ -3336,6 +3354,27 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
        return (zpool_standard_error(hdl, errno, errbuf));
 }
 
+/*
+ * Mark the given vdev degraded.
+ */
+int
+zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
+{
+       return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_DEGRADED));
+}
+
+/*
+ * Mark the given vdev as in a removed state (as if the device does not exist).
+ *
+ * This is different than zpool_vdev_remove() which does a removal of a device
+ * from the pool (but the device does exist).
+ */
+int
+zpool_vdev_set_removed_state(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
+{
+       return (zpool_vdev_set_state(zhp, guid, aux, VDEV_STATE_REMOVED));
+}
+
 /*
  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
  * a hot spare.
index fbfae4f7e685cee36066bdea4bf8a03f926096af..bb91dec5acfff934355ca142164db8fee0f4b20c 100644 (file)
@@ -170,25 +170,17 @@ zpool_open_func(void *arg)
        if (rn->rn_labelpaths) {
                const char *path = NULL;
                const char *devid = NULL;
-               const char *env = NULL;
                rdsk_node_t *slice;
                avl_index_t where;
-               int timeout;
                int error;
 
                if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
                        return;
 
-               env = getenv("ZPOOL_IMPORT_UDEV_TIMEOUT_MS");
-               if ((env == NULL) || sscanf(env, "%d", &timeout) != 1 ||
-                   timeout < 0) {
-                       timeout = DISK_LABEL_WAIT;
-               }
-
                /*
                 * Allow devlinks to stabilize so all paths are available.
                 */
-               zpool_label_disk_wait(rn->rn_name, timeout);
+               zpool_disk_wait(rn->rn_name);
 
                if (path != NULL) {
                        slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
@@ -682,6 +674,20 @@ zpool_label_disk_wait(const char *path, int timeout_ms)
 #endif /* HAVE_LIBUDEV */
 }
 
+/*
+ * Simplified version of zpool_label_disk_wait() where we wait for a device
+ * to appear using the default timeouts.
+ */
+int
+zpool_disk_wait(const char *path)
+{
+       int timeout;
+       timeout = zpool_getenv_int("ZPOOL_IMPORT_UDEV_TIMEOUT_MS",
+           DISK_LABEL_WAIT);
+
+       return (zpool_label_disk_wait(path, timeout));
+}
+
 /*
  * Encode the persistent devices strings
  * used for the vdev disk label
@@ -767,6 +773,10 @@ no_dev:
  * in the nvlist * (if applicable).  Like:
  *    vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
  *
+ * If an old path was in the nvlist, and the rescan can not find a new path,
+ * then keep the old path, since the disk may have been removed.
+ *
+ * path: The vdev path (value from ZPOOL_CONFIG_PATH)
  * key: The nvlist_t name (like ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH)
  */
 void
@@ -774,6 +784,9 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
     const char *key)
 {
        char *upath, *spath;
+       const char *oldpath = NULL;
+
+       (void) nvlist_lookup_string(nv, key, &oldpath);
 
        /* Add enclosure sysfs path (if disk is in an enclosure). */
        upath = zfs_get_underlying_path(path);
@@ -782,7 +795,14 @@ update_vdev_config_dev_sysfs_path(nvlist_t *nv, const char *path,
        if (spath) {
                (void) nvlist_add_string(nv, key, spath);
        } else {
-               (void) nvlist_remove_all(nv, key);
+               /*
+                * We couldn't dynamically scan the disk's enclosure sysfs path.
+                * This could be because the disk went away.  If there's an old
+                * enclosure sysfs path in the nvlist, then keep using it.
+                */
+               if (!oldpath) {
+                       (void) nvlist_remove_all(nv, key);
+               }
        }
 
        free(upath);
index f7ef69a1d934505d954c52c3311722eebc334d97..eb91311904586f3402326153724f8f46a80562ee 100644 (file)
@@ -1922,6 +1922,104 @@ zpool_find_config(libpc_handle_t *hdl, const char *target, nvlist_t **configp,
        return (0);
 }
 
+/* Return if a vdev is a leaf vdev.  Note: draid spares are leaf vdevs. */
+static boolean_t
+vdev_is_leaf(nvlist_t *nv)
+{
+       uint_t children = 0;
+       nvlist_t **child;
+
+       (void) nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+           &child, &children);
+
+       return (children == 0);
+}
+
+/* Return if a vdev is a leaf vdev and a real device (disk or file) */
+static boolean_t
+vdev_is_real_leaf(nvlist_t *nv)
+{
+       const char *type = NULL;
+       if (!vdev_is_leaf(nv))
+               return (B_FALSE);
+
+       (void) nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type);
+       if ((strcmp(type, VDEV_TYPE_DISK) == 0) ||
+           (strcmp(type, VDEV_TYPE_FILE) == 0)) {
+               return (B_TRUE);
+       }
+
+       return (B_FALSE);
+}
+
+/*
+ * This function is called by our FOR_EACH_VDEV() macros.
+ *
+ * state:   State machine status (stored inside of a (nvlist_t *))
+ * nv:      The current vdev nvlist_t we are iterating over.
+ * last_nv: The previous vdev nvlist_t we returned to the user in
+ *          the last iteration of FOR_EACH_VDEV().  We use it
+ *          to find the next vdev nvlist_t we should return.
+ * real_leaves_only: Only return leaf vdevs.
+ *
+ * Returns 1 if we found the next vdev nvlist_t for this iteration.  0 if
+ * we're still searching for it.
+ */
+static int
+__for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv,
+    boolean_t real_leaves_only)
+{
+       enum {FIRST_NV = 0, NEXT_IS_MATCH = 1, STOP_LOOKING = 2};
+
+       /* The very first entry in the NV list is a special case */
+       if (*((nvlist_t **)state) == (nvlist_t *)FIRST_NV) {
+               if (real_leaves_only && !vdev_is_real_leaf(nv))
+                       return (0);
+
+               *((nvlist_t **)last_nv) = nv;
+               *((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING;
+               return (1);
+       }
+
+       /*
+        * We came across our last_nv, meaning the next one is the one we
+        * want
+        */
+       if (nv == *((nvlist_t **)last_nv)) {
+               /* Next iteration of this function will return the nvlist_t */
+               *((nvlist_t **)state) = (nvlist_t *)NEXT_IS_MATCH;
+               return (0);
+       }
+
+       /*
+        * We marked NEXT_IS_MATCH on the previous iteration, so this is the one
+        * we want.
+        */
+       if (*(nvlist_t **)state == (nvlist_t *)NEXT_IS_MATCH) {
+               if (real_leaves_only && !vdev_is_real_leaf(nv))
+                       return (0);
+
+               *((nvlist_t **)last_nv) = nv;
+               *((nvlist_t **)state) = (nvlist_t *)STOP_LOOKING;
+               return (1);
+       }
+
+       return (0);
+}
+
+int
+for_each_vdev_macro_helper_func(void *state, nvlist_t *nv, void *last_nv)
+{
+       return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_FALSE));
+}
+
+int
+for_each_real_leaf_vdev_macro_helper_func(void *state, nvlist_t *nv,
+    void *last_nv)
+{
+       return (__for_each_vdev_macro_helper_func(state, nv, last_nv, B_TRUE));
+}
+
 /*
  * Internal function for iterating over the vdevs.
  *
index 288a0033cd134cde7b7884c8908b87269c669464..86460de3fc61d724cfcc0d9afd6e68de8fce4e56 100644 (file)
@@ -28,6 +28,7 @@
 #include <string.h>
 #include <sys/nvpair.h>
 #include <sys/fs/zfs.h>
+#include <math.h>
 
 #include <libzutil.h>
 
@@ -144,3 +145,33 @@ zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
        *leftover = bytes_read;
        return (0);
 }
+
+/*
+ * Floating point sleep().  Allows you to pass in a floating point value for
+ * seconds.
+ */
+void
+fsleep(float sec)
+{
+       struct timespec req;
+       req.tv_sec = floor(sec);
+       req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC;
+       nanosleep(&req, NULL);
+}
+
+/*
+ * Get environment variable 'env' and return it as an integer.
+ * If 'env' is not set, then return 'default_val' instead.
+ */
+int
+zpool_getenv_int(const char *env, int default_val)
+{
+       char *str;
+       int val;
+       str = getenv(env);
+       if ((str == NULL) || sscanf(str, "%d", &val) != 1 ||
+           val < 0) {
+               val = default_val;
+       }
+       return (val);
+}
index 7b9d40c74ebdc937f079f1d31de0df6e31079332..c61ecae483ac1cbe5779c923308cdfb54102513b 100644 (file)
@@ -36,6 +36,7 @@
 .Sh SYNOPSIS
 .Nm zpool
 .Cm clear
+.Op Fl -power
 .Ar pool
 .Oo Ar device Oc Ns …
 .
@@ -52,6 +53,16 @@ Pools with
 enabled which have been suspended cannot be resumed.
 While the pool was suspended, it may have been imported on
 another host, and resuming I/O could result in pool damage.
+.Bl -tag -width Ds
+.It Fl -power
+Power on the devices's slot in the storage enclosure and wait for the device
+to show up before attempting to clear errors.
+This is done on all the devices specified.
+Alternatively, you can set the
+.Sy ZPOOL_AUTO_POWER_ON_SLOT
+environment variable to always enable this behavior.
+Note: This flag currently works on Linux only.
+.El
 .
 .Sh SEE ALSO
 .Xr zdb 8 ,
index edcf1d06ab6719687d46f37702e5d73a9a81b64b..1b6095d63709a296e6d86c39a9ca771682fa918b 100644 (file)
 .Sh SYNOPSIS
 .Nm zpool
 .Cm offline
-.Op Fl ft
+.Op Fl Sy -power Ns | Ns Op Fl Sy ft
 .Ar pool
 .Ar device Ns …
 .Nm zpool
 .Cm online
-.Op Fl e
+.Op Fl Sy -power
+.Op Fl Sy e
 .Ar pool
 .Ar device Ns …
 .
@@ -50,7 +51,7 @@
 .It Xo
 .Nm zpool
 .Cm offline
-.Op Fl ft
+.Op Fl Sy -power Ns | Ns Op Fl Sy ft
 .Ar pool
 .Ar device Ns …
 .Xc
@@ -60,6 +61,9 @@ While the
 is offline, no attempt is made to read or write to the device.
 This command is not applicable to spares.
 .Bl -tag -width Ds
+.It Fl -power
+Power off the device's slot in the storage enclosure.
+This flag currently works on Linux only
 .It Fl f
 Force fault.
 Instead of offlining the disk, put it into a faulted state.
@@ -73,6 +77,7 @@ Upon reboot, the specified physical device reverts to its previous state.
 .It Xo
 .Nm zpool
 .Cm online
+.Op Fl -power
 .Op Fl e
 .Ar pool
 .Ar device Ns …
@@ -80,6 +85,13 @@ Upon reboot, the specified physical device reverts to its previous state.
 Brings the specified physical device online.
 This command is not applicable to spares.
 .Bl -tag -width Ds
+.It Fl -power
+Power on the device's slot in the storage enclosure and wait for the device
+to show up before attempting to online it.
+Alternatively, you can set the
+.Sy ZPOOL_AUTO_POWER_ON_SLOT
+environment variable to always enable this behavior.
+This flag currently works on Linux only
 .It Fl e
 Expand the device to use all available space.
 If the device is part of a mirror or raidz then all devices must be expanded
index 10424b9f5b5dc1b42341906bbe8f8dfe0db3c9e9..56fa4aed057b60c6437cfce650f833ad2a7451ba 100644 (file)
@@ -57,6 +57,8 @@ and the estimated time to completion.
 Both of these are only approximate, because the amount of data in the pool and
 the other workloads on the system can change.
 .Bl -tag -width Ds
+.It Fl -power
+Display vdev enclosure slot power status (on or off).
 .It Fl c Op Ar SCRIPT1 Ns Oo , Ns Ar SCRIPT2 Oc Ns …
 Run a script (or scripts) on each vdev and include the output as a new column
 in the
index 4c4020bdd810e3a5f593b7570e845f04331c6412..fe44e15cabe18ed5a23f913cdfec62127cf48cc5 100644 (file)
@@ -444,7 +444,7 @@ rpool       14.6G  54.9G      4     55   250K  2.69M
 .Ed
 .
 .Sh ENVIRONMENT VARIABLES
-.Bl -tag -compact -width "ZPOOL_IMPORT_UDEV_TIMEOUT_MS"
+.Bl -tag -compact -width "ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE"
 .It Sy ZFS_ABORT
 Cause
 .Nm
@@ -456,6 +456,23 @@ Use ANSI color in
 and
 .Nm zpool Cm iostat
 output.
+.It Sy ZPOOL_AUTO_POWER_ON_SLOT
+Automatically attempt to turn on the drives enclosure slot power to a drive when
+running the
+.Nm zpool Cm online
+or
+.Nm zpool Cm clear
+commands.
+This has the same effect as passing the
+.Fl -power
+option to those commands.
+.It Sy ZPOOL_POWER_ON_SLOT_TIMEOUT_MS
+The maximum time in milliseconds to wait for a slot power sysfs value
+to return the correct value after writing it.
+For example, after writing "on" to the sysfs enclosure slot power_control file,
+it can take some time for the enclosure to power down the slot and return
+"on" if you read back the 'power_control' value.
+Defaults to 30 seconds (30000ms) if not set.
 .It Sy ZPOOL_IMPORT_PATH
 The search path for devices or files to use with the pool.
 This is a colon-separated list of directories in which