]> git.proxmox.com Git - mirror_zfs.git/commitdiff
zed: add hotplug support for spare vdevs
authorAmeer Hamza <106930537+ixhamza@users.noreply.github.com>
Mon, 9 Jan 2023 20:43:03 +0000 (01:43 +0500)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Mon, 27 Mar 2023 18:32:09 +0000 (11:32 -0700)
This commit supports for spare vdev hotplug. The
spare vdev associated with all the pools will be
marked as "Removed" when the driveĀ is physically
detached and will become "Available" when the
drive is reattached. Currently, the spare vdev
status does not change on the drive removal and
the same is the case with reattachment.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
Closes #14295

cmd/zed/agents/zfs_agents.c
cmd/zed/agents/zfs_mod.c
cmd/zed/agents/zfs_retire.c
include/sys/fs/zfs.h
lib/libzfs/libzfs_pool.c
module/zfs/spa_config.c

index c8774010d5eb2c068f8629ce8b9dfb484b0916b4..e148ae52dbf06b872389ea26775a09a7fcaed196 100644 (file)
@@ -170,7 +170,7 @@ zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
        }
 
        zpool_close(zhp);
-       return (gsp->gs_vdev_guid != 0);
+       return (gsp->gs_devid != NULL && gsp->gs_vdev_guid != 0);
 }
 
 void
index 1945c298e6bc2ce19902d3c27c24ca269db08b2e..f67fd96af04509dd49b5389b7a6329b7190f457c 100644 (file)
@@ -185,10 +185,12 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
        uint64_t wholedisk = 0ULL;
        uint64_t offline = 0ULL, faulted = 0ULL;
        uint64_t guid = 0ULL;
+       uint64_t is_spare = 0;
        char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
        char rawpath[PATH_MAX], fullpath[PATH_MAX];
        char devpath[PATH_MAX];
        int ret;
+       int online_flag = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;
        boolean_t is_sd = B_FALSE;
        boolean_t is_mpath_wholedisk = B_FALSE;
        uint_t c;
@@ -214,6 +216,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
        (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted);
 
        (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid);
+       (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_IS_SPARE, &is_spare);
 
        /*
         * Special case:
@@ -304,11 +307,13 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
                }
        }
 
+       if (is_spare)
+               online_flag |= ZFS_ONLINE_SPARE;
+
        /*
         * Attempt to online the device.
         */
-       if (zpool_vdev_online(zhp, fullpath,
-           ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
+       if (zpool_vdev_online(zhp, fullpath, online_flag, &newstate) == 0 &&
            (newstate == VDEV_STATE_HEALTHY ||
            newstate == VDEV_STATE_DEGRADED)) {
                zed_log_msg(LOG_INFO,
@@ -527,6 +532,7 @@ typedef struct dev_data {
        uint64_t                dd_vdev_guid;
        uint64_t                dd_new_vdev_guid;
        const char              *dd_new_devid;
+       uint64_t                dd_num_spares;
 } dev_data_t;
 
 static void
@@ -537,6 +543,7 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
        uint_t c, children;
        nvlist_t **child;
        uint64_t guid = 0;
+       uint64_t isspare = 0;
 
        /*
         * First iterate over any children.
@@ -562,7 +569,7 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
        }
 
        /* once a vdev was matched and processed there is nothing left to do */
-       if (dp->dd_found)
+       if (dp->dd_found && dp->dd_num_spares == 0)
                return;
        (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &guid);
 
@@ -612,6 +619,10 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
                }
        }
 
+       if (dp->dd_found == B_TRUE && nvlist_lookup_uint64(nvl,
+           ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
+               dp->dd_num_spares++;
+
        (dp->dd_func)(zhp, nvl, dp->dd_islabeled);
 }
 
@@ -672,7 +683,9 @@ zfs_iter_pool(zpool_handle_t *zhp, void *data)
        }
 
        zpool_close(zhp);
-       return (dp->dd_found);  /* cease iteration after a match */
+
+       /* cease iteration after a match */
+       return (dp->dd_found && dp->dd_num_spares == 0);
 }
 
 /*
index 3b71a63e0a772320a57f6596722c314dbbbd2ec9..b4794e31193f1ec1d2ea216ec78fec775dac342f 100644 (file)
@@ -75,6 +75,8 @@ typedef struct find_cbdata {
        uint64_t        cb_guid;
        zpool_handle_t  *cb_zhp;
        nvlist_t        *cb_vdev;
+       uint64_t        cb_vdev_guid;
+       uint64_t        cb_num_spares;
 } find_cbdata_t;
 
 static int
@@ -140,6 +142,64 @@ find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, uint64_t search_guid)
        return (NULL);
 }
 
+static int
+remove_spares(zpool_handle_t *zhp, void *data)
+{
+       nvlist_t *config, *nvroot;
+       nvlist_t **spares;
+       uint_t nspares;
+       char *devname;
+       find_cbdata_t *cbp = data;
+       uint64_t spareguid = 0;
+       vdev_stat_t *vs;
+       unsigned int c;
+
+       config = zpool_get_config(zhp, NULL);
+       if (nvlist_lookup_nvlist(config,
+           ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) {
+               zpool_close(zhp);
+               return (0);
+       }
+
+       if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+           &spares, &nspares) != 0) {
+               zpool_close(zhp);
+               return (0);
+       }
+
+       for (int i = 0; i < nspares; i++) {
+               if (nvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_GUID,
+                   &spareguid) == 0 && spareguid == cbp->cb_vdev_guid) {
+                       devname = zpool_vdev_name(NULL, zhp, spares[i],
+                           B_FALSE);
+                       nvlist_lookup_uint64_array(spares[i],
+                           ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c);
+                       if (vs->vs_state != VDEV_STATE_REMOVED &&
+                           zpool_vdev_remove_wanted(zhp, devname) == 0)
+                               cbp->cb_num_spares++;
+                       break;
+               }
+       }
+
+       zpool_close(zhp);
+       return (0);
+}
+
+/*
+ * Given a vdev guid, find and remove all spares associated with it.
+ */
+static int
+find_and_remove_spares(libzfs_handle_t *zhdl, uint64_t vdev_guid)
+{
+       find_cbdata_t cb;
+
+       cb.cb_num_spares = 0;
+       cb.cb_vdev_guid = vdev_guid;
+       zpool_iter(zhdl, remove_spares, &cb);
+
+       return (cb.cb_num_spares);
+}
+
 /*
  * Given a (pool, vdev) GUID pair, find the matching pool and vdev.
  */
@@ -315,6 +375,8 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
        libzfs_handle_t *zhdl = zdp->zrd_hdl;
        boolean_t fault_device, degrade_device;
        boolean_t is_repair;
+       boolean_t l2arc = B_FALSE;
+       boolean_t spare = B_FALSE;
        char *scheme;
        nvlist_t *vdev = NULL;
        char *uuid;
@@ -323,7 +385,6 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
        boolean_t is_disk;
        vdev_aux_t aux;
        uint64_t state = 0;
-       int l2arc;
        vdev_stat_t *vs;
        unsigned int c;
 
@@ -342,10 +403,26 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
                char *devtype;
                char *devname;
 
+               if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
+                   &devtype) == 0) {
+                       if (strcmp(devtype, VDEV_TYPE_SPARE) == 0)
+                               spare = B_TRUE;
+                       else if (strcmp(devtype, VDEV_TYPE_L2CACHE) == 0)
+                               l2arc = B_TRUE;
+               }
+
+               if (nvlist_lookup_uint64(nvl,
+                   FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
+                       return;
+
+               if (spare) {
+                       int nspares = find_and_remove_spares(zhdl, vdev_guid);
+                       fmd_hdl_debug(hdl, "%d spares removed", nspares);
+                       return;
+               }
+
                if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
-                   &pool_guid) != 0 ||
-                   nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
-                   &vdev_guid) != 0)
+                   &pool_guid) != 0)
                        return;
 
                if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid,
@@ -366,10 +443,6 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
                    state == VDEV_STATE_REMOVED)
                        return;
 
-               l2arc = (nvlist_lookup_string(nvl,
-                   FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, &devtype) == 0 &&
-                   strcmp(devtype, VDEV_TYPE_L2CACHE) == 0);
-
                /* Remove the vdev since device is unplugged */
                if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) {
                        int status = zpool_vdev_remove_wanted(zhp, devname);
index f4dc2ab465364579123c9f3b259ef92a1e020772..111e70ece1516ca8125b05601931e3a09ee3c473 100644 (file)
@@ -1518,6 +1518,7 @@ typedef enum {
 #define        ZFS_ONLINE_UNSPARE      0x2
 #define        ZFS_ONLINE_FORCEFAULT   0x4
 #define        ZFS_ONLINE_EXPAND       0x8
+#define        ZFS_ONLINE_SPARE        0x10
 #define        ZFS_OFFLINE_TEMPORARY   0x1
 
 /*
index c8659c5fe2e3ffa4955eea35379fa3de016bebc4..29f077841da0b318a3f2367c6341d39255f5f0e2 100644 (file)
@@ -3051,7 +3051,7 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
 
        verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
-       if (avail_spare)
+       if (!(flags & ZFS_ONLINE_SPARE) && avail_spare)
                return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
        if ((flags & ZFS_ONLINE_EXPAND ||
@@ -3184,9 +3184,6 @@ zpool_vdev_remove_wanted(zpool_handle_t *zhp, const char *path)
 
        zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
 
-       if (avail_spare)
-               return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
-
        zc.zc_cookie = VDEV_STATE_REMOVED;
 
        if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
index 432f8b8f3696d9eeda1819f4fb4348dfa20d21a3..c4282b0cf3a8efdd309bd193dbc887b07f854821 100644 (file)
@@ -354,6 +354,8 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent,
                vdev_post_kobj_evt(target->spa_root_vdev);
                for (int i = 0; i < target->spa_l2cache.sav_count; i++)
                        vdev_post_kobj_evt(target->spa_l2cache.sav_vdevs[i]);
+               for (int i = 0; i < target->spa_spares.sav_count; i++)
+                       vdev_post_kobj_evt(target->spa_spares.sav_vdevs[i]);
        }
 }