]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Report pool suspended due to MMP
authorOlaf Faaland <faaland1@llnl.gov>
Thu, 15 Mar 2018 17:56:55 +0000 (10:56 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 15 Mar 2018 17:56:55 +0000 (10:56 -0700)
When the pool is suspended, record whether it was due to an I/O error or
due to MMP writes failing to succeed within the required time.

Change spa_suspended from uint8_t to zio_suspend_reason_t to store the
reason.

When userspace queries pool status via spa_tryimport(), report the
reason the pool was suspended in a new key,
ZPOOL_CONFIG_SUSPENDED_REASON.

In libzfs, when interpreting the returned config nvlist, report
suspension due to MMP with a new pool status enum value,
ZPOOL_STATUS_IO_FAILURE_MMP.

In status_callback(), which generates and emits the message when 'zpool
status' is executed, add a case to print an appropriate message for the
new pool status enum value.

Reviewed-by: George Melikov <mail@gmelikov.ru>
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Closes #7296

cmd/zpool/zpool_main.c
include/libzfs.h
include/sys/fs/zfs.h
include/sys/spa_impl.h
include/sys/zio.h
lib/libzfs/libzfs_status.c
module/zfs/mmp.c
module/zfs/spa.c
module/zfs/spa_misc.c
module/zfs/zio.c

index 1d31fde469a53b30a2af225d2bfc4af33313355c..a3537b1771a2bfefbd64b6c563a276d269af3041 100644 (file)
@@ -6467,6 +6467,15 @@ status_callback(zpool_handle_t *zhp, void *data)
                    "to be recovered.\n"));
                break;
 
+       case ZPOOL_STATUS_IO_FAILURE_MMP:
+               (void) printf(gettext("status: The pool is suspended because "
+                   "multihost writes failed or were delayed;\n\tanother "
+                   "system could import the pool undetected.\n"));
+               (void) printf(gettext("action: Make sure the pool's devices "
+                   "are connected, then reboot your system and\n\timport the "
+                   "pool.\n"));
+               break;
+
        case ZPOOL_STATUS_IO_FAILURE_WAIT:
        case ZPOOL_STATUS_IO_FAILURE_CONTINUE:
                (void) printf(gettext("status: One or more devices are "
index 5288ff7f8c86695e9705567e1bbc46bcb1668ca9..00f22cfb11bffa9c0e06cccfafed099fe5b566bb 100644 (file)
@@ -333,6 +333,7 @@ typedef enum {
        ZPOOL_STATUS_HOSTID_REQUIRED,   /* multihost=on and hostid=0 */
        ZPOOL_STATUS_IO_FAILURE_WAIT,   /* failed I/O, failmode 'wait' */
        ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
+       ZPOOL_STATUS_IO_FAILURE_MMP,    /* failed MMP, failmode not 'panic' */
        ZPOOL_STATUS_BAD_LOG,           /* cannot read log chain(s) */
        ZPOOL_STATUS_ERRATA,            /* informational errata available */
 
index 88f590276fede2d6cbca75bba573a7d131005de3..e40c427f61d9729acc340bd35005e8400d18e790 100644 (file)
@@ -681,6 +681,7 @@ typedef struct zpool_rewind_policy {
 #define        ZPOOL_CONFIG_RESILVER_TXG       "resilver_txg"
 #define        ZPOOL_CONFIG_COMMENT            "comment"
 #define        ZPOOL_CONFIG_SUSPENDED          "suspended"     /* not stored on disk */
+#define        ZPOOL_CONFIG_SUSPENDED_REASON   "suspended_reason"      /* not stored */
 #define        ZPOOL_CONFIG_TIMESTAMP          "timestamp"     /* not stored on disk */
 #define        ZPOOL_CONFIG_BOOTFS             "bootfs"        /* not stored on disk */
 #define        ZPOOL_CONFIG_MISSING_DEVICES    "missing_vdevs" /* not stored on disk */
index 66fcafd9c60f0107f45fbda85fc48484b45f9464..af1d6aef0fe9abac99e3e702e38113d6ed70b903 100644 (file)
@@ -236,7 +236,7 @@ struct spa {
        zio_t           *spa_suspend_zio_root;  /* root of all suspended I/O */
        kmutex_t        spa_suspend_lock;       /* protects suspend_zio_root */
        kcondvar_t      spa_suspend_cv;         /* notification of resume */
-       uint8_t         spa_suspended;          /* pool is suspended */
+       zio_suspend_reason_t    spa_suspended;  /* pool is suspended */
        uint8_t         spa_claiming;           /* pool is doing zil_claim() */
        boolean_t       spa_debug;              /* debug enabled? */
        boolean_t       spa_is_root;            /* pool is root */
index 8d5f99d465366e98c59504ef4594175953889a94..9d3adb7f50e60d7897cdd0f2c56e34d544f02c4e 100644 (file)
@@ -167,6 +167,12 @@ enum zio_encrypt {
 #define        ZIO_FAILURE_MODE_CONTINUE       1
 #define        ZIO_FAILURE_MODE_PANIC          2
 
+typedef enum zio_suspend_reason {
+       ZIO_SUSPEND_NONE = 0,
+       ZIO_SUSPEND_IOERR,
+       ZIO_SUSPEND_MMP,
+} zio_suspend_reason_t;
+
 enum zio_flag {
        /*
         * Flags inherited by gang, ddt, and vdev children,
@@ -610,7 +616,7 @@ extern enum zio_checksum zio_checksum_dedup_select(spa_t *spa,
 extern enum zio_compress zio_compress_select(spa_t *spa,
     enum zio_compress child, enum zio_compress parent);
 
-extern void zio_suspend(spa_t *spa, zio_t *zio);
+extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t);
 extern int zio_resume(spa_t *spa);
 extern void zio_resume_wait(spa_t *spa);
 
index f900ac723107fb40fdfd66e54ff835977a4f8f73..57d2deabfc87213f8716df4f90a8fa2766f4cdbe 100644 (file)
@@ -275,10 +275,16 @@ check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap)
                return (ZPOOL_STATUS_BAD_GUID_SUM);
 
        /*
-        * Check whether the pool has suspended due to failed I/O.
+        * Check whether the pool has suspended.
         */
        if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
            &suspended) == 0) {
+               uint64_t reason;
+
+               if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED_REASON,
+                   &reason) == 0 && reason == ZIO_SUSPEND_MMP)
+                       return (ZPOOL_STATUS_IO_FAILURE_MMP);
+
                if (suspended == ZIO_FAILURE_MODE_CONTINUE)
                        return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
                return (ZPOOL_STATUS_IO_FAILURE_WAIT);
index 757dfa82d144bbc5e1dcd64435119d98710a34b9..14379d804693aa71116c3aaa9b3d2dd925ed963c 100644 (file)
@@ -520,7 +520,7 @@ mmp_thread(void *arg)
                            "succeeded in over %llus; suspending pool",
                            spa_name(spa),
                            NSEC2SEC(start - mmp->mmp_last_write));
-                       zio_suspend(spa, NULL);
+                       zio_suspend(spa, NULL, ZIO_SUSPEND_MMP);
                }
 
                if (multihost && !suspended)
index 736b51feae1e67aa7cfab77e2dc987bbe52ba8b0..1e9e7b0131b935ed2c3cdde7011468ebe15cf691 100644 (file)
@@ -3766,10 +3766,14 @@ spa_get_stats(const char *name, nvlist_t **config,
                            ZPOOL_CONFIG_ERRCOUNT,
                            spa_get_errlog_size(spa)) == 0);
 
-                       if (spa_suspended(spa))
+                       if (spa_suspended(spa)) {
                                VERIFY(nvlist_add_uint64(*config,
                                    ZPOOL_CONFIG_SUSPENDED,
                                    spa->spa_failmode) == 0);
+                               VERIFY(nvlist_add_uint64(*config,
+                                   ZPOOL_CONFIG_SUSPENDED_REASON,
+                                   spa->spa_suspended) == 0);
+                       }
 
                        spa_add_spares(spa, *config);
                        spa_add_l2cache(spa, *config);
@@ -6984,7 +6988,7 @@ spa_sync(spa_t *spa, uint64_t txg)
 
                if (error == 0)
                        break;
-               zio_suspend(spa, NULL);
+               zio_suspend(spa, NULL, ZIO_SUSPEND_IOERR);
                zio_resume_wait(spa);
        }
        dmu_tx_commit(tx);
index c67bacbbb0edde5b81cd5b09a140727849ae4aeb..d71468db3cc3155dfbb02b1d080add9a1b898848 100644 (file)
@@ -1709,7 +1709,7 @@ spa_get_failmode(spa_t *spa)
 boolean_t
 spa_suspended(spa_t *spa)
 {
-       return (spa->spa_suspended);
+       return (spa->spa_suspended != ZIO_SUSPEND_NONE);
 }
 
 uint64_t
index 7544cf4e3a3de6d5ec2a27fdf68ab24ae1633099..44cf984d0b00c75c95ef46c4d1e646e92a2bb640 100644 (file)
@@ -2092,7 +2092,7 @@ zio_reexecute(zio_t *pio)
 }
 
 void
-zio_suspend(spa_t *spa, zio_t *zio)
+zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
 {
        if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC)
                fm_panic("Pool '%s' has encountered an uncorrectable I/O "
@@ -2112,7 +2112,7 @@ zio_suspend(spa_t *spa, zio_t *zio)
                    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
                    ZIO_FLAG_GODFATHER);
 
-       spa->spa_suspended = B_TRUE;
+       spa->spa_suspended = reason;
 
        if (zio != NULL) {
                ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
@@ -2135,7 +2135,7 @@ zio_resume(spa_t *spa)
         * Reexecute all previously suspended i/o.
         */
        mutex_enter(&spa->spa_suspend_lock);
-       spa->spa_suspended = B_FALSE;
+       spa->spa_suspended = ZIO_SUSPEND_NONE;
        cv_broadcast(&spa->spa_suspend_cv);
        pio = spa->spa_suspend_zio_root;
        spa->spa_suspend_zio_root = NULL;
@@ -4390,7 +4390,7 @@ zio_done(zio_t *zio)
                         * We'd fail again if we reexecuted now, so suspend
                         * until conditions improve (e.g. device comes online).
                         */
-                       zio_suspend(zio->io_spa, zio);
+                       zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR);
                } else {
                        /*
                         * Reexecution is potentially a huge amount of work.