]> git.proxmox.com Git - mirror_zfs.git/commitdiff
zpool import progress kstat
authorOlaf Faaland <faaland1@llnl.gov>
Thu, 9 May 2019 17:08:05 +0000 (10:08 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 9 May 2019 17:08:05 +0000 (10:08 -0700)
When an import requires a long MMP activity check, or when the user
requests pool recovery, the import make take a long time.  The user may
not know why, or be able to tell whether the import is progressing or is
hung.

Add a kstat which lists all imports currently being processed by the
kernel (currently only one at a time is possible, but the kstat allows
for more than one).  The kstat is /proc/spl/kstat/zfs/import_progress.

The kstat contents are as follows:
pool_guid         load_state multihost_secs  max_txg pool_name
16667015954387398 3          15              0       tank3

load_state: the value of spa_load_state
multihost_secs:  seconds until the end of the multihost activity
                 check; if over, or none required, this is 0
max_txg: current spa_load_max_txg, if rewind is occurring

This could be used by outside tools, such as a pacemaker resource agent,
to report import progress, or as a part of manual troubleshooting.  The
zpool import subcommand could also be modified to report this
information.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Closes #8696

include/sys/spa.h
module/zfs/spa.c
module/zfs/spa_misc.c

index 343977b30a3829b8d1d15ac9dc6a15b60e24d567..23434edbc72ef9c1d567a83912b8b00bd244e483 100644 (file)
@@ -965,6 +965,14 @@ extern void spa_iostats_trim_add(spa_t *spa, trim_type_t type,
     uint64_t extents_written, uint64_t bytes_written,
     uint64_t extents_skipped, uint64_t bytes_skipped,
     uint64_t extents_failed, uint64_t bytes_failed);
+extern void spa_import_progress_add(spa_t *spa);
+extern void spa_import_progress_remove(uint64_t spa_guid);
+extern int spa_import_progress_set_mmp_check(uint64_t pool_guid,
+    uint64_t mmp_sec_remaining);
+extern int spa_import_progress_set_max_txg(uint64_t pool_guid,
+    uint64_t max_txg);
+extern int spa_import_progress_set_state(uint64_t pool_guid,
+    spa_load_state_t spa_load_state);
 
 /* Pool configuration locks */
 extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
index 4d26d698e09949d8b467d9714528d2f31e26e55a..eb3ff91a073c8af8c3365d8beb5ff4154abc8f52 100644 (file)
@@ -1437,6 +1437,7 @@ spa_unload(spa_t *spa)
 
        ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
+       spa_import_progress_remove(spa_guid(spa));
        spa_load_note(spa, "UNLOADING");
 
        /*
@@ -2375,6 +2376,8 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type)
        int error;
 
        spa->spa_load_state = state;
+       (void) spa_import_progress_set_state(spa_guid(spa),
+           spa_load_state(spa));
 
        gethrestime(&spa->spa_loaded_ts);
        error = spa_load_impl(spa, type, &ereport);
@@ -2397,6 +2400,9 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type)
        spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
        spa->spa_ena = 0;
 
+       (void) spa_import_progress_set_state(spa_guid(spa),
+           spa_load_state(spa));
+
        return (error);
 }
 
@@ -2469,6 +2475,7 @@ spa_activity_check_required(spa_t *spa, uberblock_t *ub, nvlist_t *label,
         */
        if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay == 0)
                return (B_FALSE);
+
        /*
         * If the tryconfig_ values are nonzero, they are the results of an
         * earlier tryimport.  If they all match the uberblock we just found,
@@ -2617,10 +2624,14 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
        import_delay = spa_activity_check_duration(spa, ub);
 
        /* Add a small random factor in case of simultaneous imports (0-25%) */
-       import_expire = gethrtime() + import_delay +
-           (import_delay * spa_get_random(250) / 1000);
+       import_delay += import_delay * spa_get_random(250) / 1000;
+
+       import_expire = gethrtime() + import_delay;
 
        while (gethrtime() < import_expire) {
+               (void) spa_import_progress_set_mmp_check(spa_guid(spa),
+                   NSEC2SEC(import_expire - gethrtime()));
+
                vdev_uberblock_load(rvd, ub, &mmp_label);
 
                if (txg != ub->ub_txg || timestamp != ub->ub_timestamp ||
@@ -2987,6 +2998,10 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type)
                return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
        }
 
+       if (spa->spa_load_max_txg != UINT64_MAX) {
+               (void) spa_import_progress_set_max_txg(spa_guid(spa),
+                   (u_longlong_t)spa->spa_load_max_txg);
+       }
        spa_load_note(spa, "using uberblock with txg=%llu",
            (u_longlong_t)ub->ub_txg);
 
@@ -3916,6 +3931,8 @@ spa_ld_mos_init(spa_t *spa, spa_import_type_t type)
        if (error != 0)
                return (error);
 
+       spa_import_progress_add(spa);
+
        /*
         * Now that we have the vdev tree, try to open each vdev. This involves
         * opening the underlying physical device, retrieving its geometry and
@@ -4346,6 +4363,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
                spa_config_exit(spa, SCL_CONFIG, FTAG);
        }
 
+       spa_import_progress_remove(spa_guid(spa));
        spa_load_note(spa, "LOADED");
 
        return (0);
@@ -4406,6 +4424,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request,
                 * from previous txgs when spa_load fails.
                 */
                ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT);
+               spa_import_progress_remove(spa_guid(spa));
                return (load_error);
        }
 
@@ -4417,6 +4436,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request,
 
        if (rewind_flags & ZPOOL_NEVER_REWIND) {
                nvlist_free(config);
+               spa_import_progress_remove(spa_guid(spa));
                return (load_error);
        }
 
@@ -4459,6 +4479,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request,
 
        if (state == SPA_LOAD_RECOVER) {
                ASSERT3P(loadinfo, ==, NULL);
+               spa_import_progress_remove(spa_guid(spa));
                return (rewind_error);
        } else {
                /* Store the rewind info as part of the initial load info */
@@ -4469,6 +4490,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request,
                fnvlist_free(spa->spa_load_info);
                spa->spa_load_info = loadinfo;
 
+               spa_import_progress_remove(spa_guid(spa));
                return (load_error);
        }
 }
index f4497ca1cbe2606d47cff4e3b4f14c8dbf8be187..e2d1ae3fcf130341082b8068e262d0a76b921353 100644 (file)
@@ -2019,6 +2019,214 @@ spa_dirty_data(spa_t *spa)
        return (spa->spa_dsl_pool->dp_dirty_total);
 }
 
+/*
+ * ==========================================================================
+ * SPA Import Progress Routines
+ * ==========================================================================
+ */
+
+typedef struct spa_import_progress {
+       uint64_t                pool_guid;      /* unique id for updates */
+       char                    *pool_name;
+       spa_load_state_t        spa_load_state;
+       uint64_t                mmp_sec_remaining;      /* MMP activity check */
+       uint64_t                spa_load_max_txg;       /* rewind txg */
+       procfs_list_node_t      smh_node;
+} spa_import_progress_t;
+
+spa_history_list_t *spa_import_progress_list = NULL;
+
+static int
+spa_import_progress_show_header(struct seq_file *f)
+{
+       seq_printf(f, "%-20s %-14s %-14s %-12s %s\n", "pool_guid",
+           "load_state", "multihost_secs", "max_txg",
+           "pool_name");
+       return (0);
+}
+
+static int
+spa_import_progress_show(struct seq_file *f, void *data)
+{
+       spa_import_progress_t *sip = (spa_import_progress_t *)data;
+
+       seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %s\n",
+           (u_longlong_t)sip->pool_guid, (u_longlong_t)sip->spa_load_state,
+           (u_longlong_t)sip->mmp_sec_remaining,
+           (u_longlong_t)sip->spa_load_max_txg,
+           (sip->pool_name ? sip->pool_name : "-"));
+
+       return (0);
+}
+
+/* Remove oldest elements from list until there are no more than 'size' left */
+static void
+spa_import_progress_truncate(spa_history_list_t *shl, unsigned int size)
+{
+       spa_import_progress_t *sip;
+       while (shl->size > size) {
+               sip = list_remove_head(&shl->procfs_list.pl_list);
+               if (sip->pool_name)
+                       spa_strfree(sip->pool_name);
+               kmem_free(sip, sizeof (spa_import_progress_t));
+               shl->size--;
+       }
+
+       IMPLY(size == 0, list_is_empty(&shl->procfs_list.pl_list));
+}
+
+static void
+spa_import_progress_init(void)
+{
+       spa_import_progress_list = kmem_zalloc(sizeof (spa_history_list_t),
+           KM_SLEEP);
+
+       spa_import_progress_list->size = 0;
+
+       spa_import_progress_list->procfs_list.pl_private =
+           spa_import_progress_list;
+
+       procfs_list_install("zfs",
+           "import_progress",
+           0644,
+           &spa_import_progress_list->procfs_list,
+           spa_import_progress_show,
+           spa_import_progress_show_header,
+           NULL,
+           offsetof(spa_import_progress_t, smh_node));
+}
+
+static void
+spa_import_progress_destroy(void)
+{
+       spa_history_list_t *shl = spa_import_progress_list;
+       procfs_list_uninstall(&shl->procfs_list);
+       spa_import_progress_truncate(shl, 0);
+       kmem_free(shl, sizeof (spa_history_list_t));
+       procfs_list_destroy(&shl->procfs_list);
+}
+
+int
+spa_import_progress_set_state(uint64_t pool_guid,
+    spa_load_state_t load_state)
+{
+       spa_history_list_t *shl = spa_import_progress_list;
+       spa_import_progress_t *sip;
+       int error = ENOENT;
+
+       if (shl->size == 0)
+               return (0);
+
+       mutex_enter(&shl->procfs_list.pl_lock);
+       for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL;
+           sip = list_prev(&shl->procfs_list.pl_list, sip)) {
+               if (sip->pool_guid == pool_guid) {
+                       sip->spa_load_state = load_state;
+                       error = 0;
+                       break;
+               }
+       }
+       mutex_exit(&shl->procfs_list.pl_lock);
+
+       return (error);
+}
+
+int
+spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t load_max_txg)
+{
+       spa_history_list_t *shl = spa_import_progress_list;
+       spa_import_progress_t *sip;
+       int error = ENOENT;
+
+       if (shl->size == 0)
+               return (0);
+
+       mutex_enter(&shl->procfs_list.pl_lock);
+       for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL;
+           sip = list_prev(&shl->procfs_list.pl_list, sip)) {
+               if (sip->pool_guid == pool_guid) {
+                       sip->spa_load_max_txg = load_max_txg;
+                       error = 0;
+                       break;
+               }
+       }
+       mutex_exit(&shl->procfs_list.pl_lock);
+
+       return (error);
+}
+
+int
+spa_import_progress_set_mmp_check(uint64_t pool_guid,
+    uint64_t mmp_sec_remaining)
+{
+       spa_history_list_t *shl = spa_import_progress_list;
+       spa_import_progress_t *sip;
+       int error = ENOENT;
+
+       if (shl->size == 0)
+               return (0);
+
+       mutex_enter(&shl->procfs_list.pl_lock);
+       for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL;
+           sip = list_prev(&shl->procfs_list.pl_list, sip)) {
+               if (sip->pool_guid == pool_guid) {
+                       sip->mmp_sec_remaining = mmp_sec_remaining;
+                       error = 0;
+                       break;
+               }
+       }
+       mutex_exit(&shl->procfs_list.pl_lock);
+
+       return (error);
+}
+
+/*
+ * A new import is in progress, add an entry.
+ */
+void
+spa_import_progress_add(spa_t *spa)
+{
+       spa_history_list_t *shl = spa_import_progress_list;
+       spa_import_progress_t *sip;
+       char *poolname = NULL;
+
+       sip = kmem_zalloc(sizeof (spa_import_progress_t), KM_SLEEP);
+       sip->pool_guid = spa_guid(spa);
+
+       (void) nvlist_lookup_string(spa->spa_config, ZPOOL_CONFIG_POOL_NAME,
+           &poolname);
+       if (poolname == NULL)
+               poolname = spa_name(spa);
+       sip->pool_name = spa_strdup(poolname);
+       sip->spa_load_state = spa_load_state(spa);
+
+       mutex_enter(&shl->procfs_list.pl_lock);
+       procfs_list_add(&shl->procfs_list, sip);
+       shl->size++;
+       mutex_exit(&shl->procfs_list.pl_lock);
+}
+
+void
+spa_import_progress_remove(uint64_t pool_guid)
+{
+       spa_history_list_t *shl = spa_import_progress_list;
+       spa_import_progress_t *sip;
+
+       mutex_enter(&shl->procfs_list.pl_lock);
+       for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL;
+           sip = list_prev(&shl->procfs_list.pl_list, sip)) {
+               if (sip->pool_guid == pool_guid) {
+                       if (sip->pool_name)
+                               spa_strfree(sip->pool_name);
+                       list_remove(&shl->procfs_list.pl_list, sip);
+                       shl->size--;
+                       kmem_free(sip, sizeof (spa_import_progress_t));
+                       break;
+               }
+       }
+       mutex_exit(&shl->procfs_list.pl_lock);
+}
+
 /*
  * ==========================================================================
  * Initialization and Termination
@@ -2099,6 +2307,7 @@ spa_init(int mode)
        l2arc_start();
        scan_init();
        qat_init();
+       spa_import_progress_init();
 }
 
 void
@@ -2123,6 +2332,7 @@ spa_fini(void)
        fm_fini();
        scan_fini();
        qat_fini();
+       spa_import_progress_destroy();
 
        avl_destroy(&spa_namespace_avl);
        avl_destroy(&spa_spare_avl);