]> git.proxmox.com Git - mirror_zfs.git/commitdiff
zed: Fix config_sync autoexpand flood
authorTony Hutter <hutter2@llnl.gov>
Thu, 8 Sep 2022 17:32:30 +0000 (10:32 -0700)
committerGitHub <noreply@github.com>
Thu, 8 Sep 2022 17:32:30 +0000 (10:32 -0700)
Users were seeing floods of `config_sync` events when autoexpand was
enabled.  This happened because all "disk status change" udev events
invoke the autoexpand codepath, which calls zpool_relabel_disk(),
which in turn cause another "disk status change" event to happen,
in a feedback loop.  Note that "disk status change" happens every time
a user calls close() on a block device.

This commit breaks the feedback loop by only allowing an autoexpand
to happen if the disk actually changed size.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes: #7132
Closes: #7366
Closes #13729

cmd/zed/agents/zfs_mod.c
cmd/zed/zed_disk_event.c
include/sys/sysevent/dev.h

index d75854f2875a8ff9df46fbc002bbd2a18e1c459c..7364dd2c6286f7e17f72a733d9e0f911175886a9 100644 (file)
@@ -894,14 +894,90 @@ zfs_deliver_check(nvlist_t *nvl)
        return (0);
 }
 
+/*
+ * Given a path to a vdev, lookup the vdev's physical size from its
+ * config nvlist.
+ *
+ * Returns the vdev's physical size in bytes on success, 0 on error.
+ */
+static uint64_t
+vdev_size_from_config(zpool_handle_t *zhp, const char *vdev_path)
+{
+       nvlist_t *nvl = NULL;
+       boolean_t avail_spare, l2cache, log;
+       vdev_stat_t *vs = NULL;
+       uint_t c;
+
+       nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
+       if (!nvl)
+               return (0);
+
+       verify(nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_VDEV_STATS,
+           (uint64_t **)&vs, &c) == 0);
+       if (!vs) {
+               zed_log_msg(LOG_INFO, "%s: no nvlist for '%s'", __func__,
+                   vdev_path);
+               return (0);
+       }
+
+       return (vs->vs_pspace);
+}
+
+/*
+ * Given a path to a vdev, lookup if the vdev is a "whole disk" in the
+ * config nvlist.  "whole disk" means that ZFS was passed a whole disk
+ * at pool creation time, which it partitioned up and has full control over.
+ * Thus a partition with wholedisk=1 set tells us that zfs created the
+ * partition at creation time.  A partition without whole disk set would have
+ * been created by externally (like with fdisk) and passed to ZFS.
+ *
+ * Returns the whole disk value (either 0 or 1).
+ */
+static uint64_t
+vdev_whole_disk_from_config(zpool_handle_t *zhp, const char *vdev_path)
+{
+       nvlist_t *nvl = NULL;
+       boolean_t avail_spare, l2cache, log;
+       uint64_t wholedisk;
+
+       nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
+       if (!nvl)
+               return (0);
+
+       verify(nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_WHOLE_DISK,
+           &wholedisk) == 0);
+
+       return (wholedisk);
+}
+
+/*
+ * If the device size grew more than 1% then return true.
+ */
+#define        DEVICE_GREW(oldsize, newsize) \
+                   ((newsize > oldsize) && \
+                   ((newsize / (newsize - oldsize)) <= 100))
+
 static int
 zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
 {
-       char *devname = data;
        boolean_t avail_spare, l2cache;
+       nvlist_t *udev_nvl = data;
        nvlist_t *tgt;
        int error;
 
+       char *tmp_devname, devname[MAXPATHLEN];
+       uint64_t guid;
+
+       if (nvlist_lookup_uint64(udev_nvl, ZFS_EV_VDEV_GUID, &guid) == 0) {
+               sprintf(devname, "%llu", (u_longlong_t)guid);
+       } else if (nvlist_lookup_string(udev_nvl, DEV_PHYS_PATH,
+           &tmp_devname) == 0) {
+               strlcpy(devname, tmp_devname, MAXPATHLEN);
+               zfs_append_partition(devname, MAXPATHLEN);
+       } else {
+               zed_log_msg(LOG_INFO, "%s: no guid or physpath", __func__);
+       }
+
        zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'",
            devname, zpool_get_name(zhp));
 
@@ -953,12 +1029,75 @@ zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
                        vdev_state_t newstate;
 
                        if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
-                               error = zpool_vdev_online(zhp, fullpath, 0,
-                                   &newstate);
-                               zed_log_msg(LOG_INFO, "zfsdle_vdev_online: "
-                                   "setting device '%s' to ONLINE state "
-                                   "in pool '%s': %d", fullpath,
-                                   zpool_get_name(zhp), error);
+                               /*
+                                * If this disk size has not changed, then
+                                * there's no need to do an autoexpand.  To
+                                * check we look at the disk's size in its
+                                * config, and compare it to the disk size
+                                * that udev is reporting.
+                                */
+                               uint64_t udev_size = 0, conf_size = 0,
+                                   wholedisk = 0, udev_parent_size = 0;
+
+                               /*
+                                * Get the size of our disk that udev is
+                                * reporting.
+                                */
+                               if (nvlist_lookup_uint64(udev_nvl, DEV_SIZE,
+                                   &udev_size) != 0) {
+                                       udev_size = 0;
+                               }
+
+                               /*
+                                * Get the size of our disk's parent device
+                                * from udev (where sda1's parent is sda).
+                                */
+                               if (nvlist_lookup_uint64(udev_nvl,
+                                   DEV_PARENT_SIZE, &udev_parent_size) != 0) {
+                                       udev_parent_size = 0;
+                               }
+
+                               conf_size = vdev_size_from_config(zhp,
+                                   fullpath);
+
+                               wholedisk = vdev_whole_disk_from_config(zhp,
+                                   fullpath);
+
+                               /*
+                                * Only attempt an autoexpand if the vdev size
+                                * changed.  There are two different cases
+                                * to consider.
+                                *
+                                * 1. wholedisk=1
+                                * If you do a 'zpool create' on a whole disk
+                                * (like /dev/sda), then zfs will create
+                                * partitions on the disk (like /dev/sda1).  In
+                                * that case, wholedisk=1 will be set in the
+                                * partition's nvlist config.  So zed will need
+                                * to see if your parent device (/dev/sda)
+                                * expanded in size, and if so, then attempt
+                                * the autoexpand.
+                                *
+                                * 2. wholedisk=0
+                                * If you do a 'zpool create' on an existing
+                                * partition, or a device that doesn't allow
+                                * partitions, then wholedisk=0, and you will
+                                * simply need to check if the device itself
+                                * expanded in size.
+                                */
+                               if (DEVICE_GREW(conf_size, udev_size) ||
+                                   (wholedisk && DEVICE_GREW(conf_size,
+                                   udev_parent_size))) {
+                                       error = zpool_vdev_online(zhp, fullpath,
+                                           0, &newstate);
+
+                                       zed_log_msg(LOG_INFO,
+                                           "%s: autoexpanding '%s' from %llu"
+                                           " to %llu bytes in pool '%s': %d",
+                                           __func__, fullpath, conf_size,
+                                           MAX(udev_size, udev_parent_size),
+                                           zpool_get_name(zhp), error);
+                               }
                        }
                }
                zpool_close(zhp);
@@ -989,7 +1128,7 @@ zfs_deliver_dle(nvlist_t *nvl)
                zed_log_msg(LOG_INFO, "zfs_deliver_dle: no guid or physpath");
        }
 
-       if (zpool_iter(g_zfshdl, zfsdle_vdev_online, name) != 1) {
+       if (zpool_iter(g_zfshdl, zfsdle_vdev_online, nvl) != 1) {
                zed_log_msg(LOG_INFO, "zfs_deliver_dle: device '%s' not "
                    "found", name);
                return (1);
index 8845c5b2d00944d8109756b0e4574b8b07bd64fc..3c8e2fb38c151306bffe29891b29278385505844 100644 (file)
@@ -78,6 +78,8 @@ zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
                zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
        if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
                zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval);
+       if (nvlist_lookup_uint64(nvl, DEV_PARENT_SIZE, &numval) == 0)
+               zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_PARENT_SIZE, numval);
        if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0)
                zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval);
        if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
@@ -130,6 +132,20 @@ dev_event_nvlist(struct udev_device *dev)
 
                numval *= strtoull(value, NULL, 10);
                (void) nvlist_add_uint64(nvl, DEV_SIZE, numval);
+
+               /*
+                * If the device has a parent, then get the parent block
+                * device's size as well.  For example, /dev/sda1's parent
+                * is /dev/sda.
+                */
+               struct udev_device *parent_dev = udev_device_get_parent(dev);
+               if ((value = udev_device_get_sysattr_value(parent_dev, "size"))
+                   != NULL) {
+                       uint64_t numval = DEV_BSIZE;
+
+                       numval *= strtoull(value, NULL, 10);
+                       (void) nvlist_add_uint64(nvl, DEV_PARENT_SIZE, numval);
+               }
        }
 
        /*
index da6539b4a0d5b029719f193ddf1fe9182f2c5b19..0783d0073162452763bd4af95c6ceb1c84343bb9 100644 (file)
@@ -244,6 +244,9 @@ extern "C" {
 #define        DEV_PATH                "path"
 #define        DEV_IS_PART             "is_slice"
 #define        DEV_SIZE                "dev_size"
+
+/* Size of the whole parent block device (if dev is a partition) */
+#define        DEV_PARENT_SIZE         "dev_parent_size"
 #endif /* __linux__ */
 
 #define        EV_V1                   1