]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/spa_stats.c
Record skipped MMP writes in multihost_history
[mirror_zfs.git] / module / zfs / spa_stats.c
index c560d642f46cff4a9048e528f1c194fae4a0dab6..f604836c823fc06ee28e946618a3c379fa98eaea 100644 (file)
@@ -718,18 +718,28 @@ spa_io_history_destroy(spa_t *spa)
  */
 
 /*
- * MMP statistics - Information exported regarding each MMP update
+ * MMP statistics - Information exported regarding attempted MMP writes
+ *   For MMP writes issued, fields used as per comments below.
+ *   For MMP writes skipped, an entry represents a span of time when
+ *      writes were skipped for same reason (error from mmp_random_leaf).
+ *      Differences are:
+ *      timestamp      time first write skipped, if >1 skipped in a row
+ *      mmp_delay      delay value at timestamp
+ *      vdev_guid      number of writes skipped
+ *      io_error       one of enum mmp_error
+ *      duration       time span (ns) of skipped writes
  */
 
 typedef struct spa_mmp_history {
        uint64_t        mmp_kstat_id;   /* unique # for updates */
        uint64_t        txg;            /* txg of last sync */
-       uint64_t        timestamp;      /* UTC time of of last sync */
-       uint64_t        mmp_delay;      /* nanosec since last MMP write */
+       uint64_t        timestamp;      /* UTC time MMP write issued */
+       uint64_t        mmp_delay;      /* mmp_thread.mmp_delay at timestamp */
        uint64_t        vdev_guid;      /* unique ID of leaf vdev */
        char            *vdev_path;
-       uint64_t        vdev_label;     /* vdev label */
+       int             vdev_label;     /* vdev label */
        int             io_error;       /* error status of MMP write */
+       hrtime_t        error_start;    /* hrtime of start of error period */
        hrtime_t        duration;       /* time from submission to completion */
        list_node_t     smh_link;
 } spa_mmp_history_t;
@@ -747,9 +757,12 @@ static int
 spa_mmp_history_data(char *buf, size_t size, void *data)
 {
        spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
+       char skip_fmt[] = "%-10llu %-10llu %10llu %#6llx %10lld %12llu %-24llu "
+           "%-10lld %s\n";
+       char write_fmt[] = "%-10llu %-10llu %10llu %6lld %10lld %12llu %-24llu "
+           "%-10lld %s\n";
 
-       (void) snprintf(buf, size, "%-10llu %-10llu %-10llu %-6lld %-10lld "
-           "%-12llu %-24llu %-10llu %s\n",
+       (void) snprintf(buf, size, (smh->error_start ? skip_fmt : write_fmt),
            (u_longlong_t)smh->mmp_kstat_id, (u_longlong_t)smh->txg,
            (u_longlong_t)smh->timestamp, (longlong_t)smh->io_error,
            (longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay,
@@ -869,8 +882,42 @@ spa_mmp_history_destroy(spa_t *spa)
        mutex_destroy(&ssh->lock);
 }
 
+/*
+ * Set duration in existing "skip" record to how long we have waited for a leaf
+ * vdev to become available.
+ *
+ * Important that we start search at the head of the list where new
+ * records are inserted, so this is normally an O(1) operation.
+ */
+int
+spa_mmp_history_set_skip(spa_t *spa, uint64_t mmp_kstat_id)
+{
+       spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
+       spa_mmp_history_t *smh;
+       int error = ENOENT;
+
+       if (zfs_multihost_history == 0 && ssh->size == 0)
+               return (0);
+
+       mutex_enter(&ssh->lock);
+       for (smh = list_head(&ssh->list); smh != NULL;
+           smh = list_next(&ssh->list, smh)) {
+               if (smh->mmp_kstat_id == mmp_kstat_id) {
+                       ASSERT3U(smh->io_error, !=, 0);
+                       smh->duration = gethrtime() - smh->error_start;
+                       smh->vdev_guid++;
+                       error = 0;
+                       break;
+               }
+       }
+       mutex_exit(&ssh->lock);
+
+       return (error);
+}
+
 /*
  * Set MMP write duration and error status in existing record.
+ * See comment re: search order above spa_mmp_history_set_skip().
  */
 int
 spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
@@ -887,6 +934,7 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
        for (smh = list_head(&ssh->list); smh != NULL;
            smh = list_next(&ssh->list, smh)) {
                if (smh->mmp_kstat_id == mmp_kstat_id) {
+                       ASSERT(smh->io_error == 0);
                        smh->io_error = io_error;
                        smh->duration = duration;
                        error = 0;
@@ -899,29 +947,39 @@ spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
 }
 
 /*
- * Add a new MMP write to historical record.
+ * Add a new MMP historical record.
+ * error == 0 : a write was issued.
+ * error != 0 : a write was not issued because no leaves were found.
  */
-void
-spa_mmp_history_add(uint64_t txg, uint64_t timestamp, uint64_t mmp_delay,
-    vdev_t *vd, int label, uint64_t mmp_kstat_id)
+void *
+spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
+    uint64_t mmp_delay, vdev_t *vd, int label, uint64_t mmp_kstat_id,
+    int error)
 {
-       spa_t *spa = vd->vdev_spa;
        spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
        spa_mmp_history_t *smh, *rm;
 
        if (zfs_multihost_history == 0 && ssh->size == 0)
-               return;
+               return (NULL);
 
        smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
        smh->txg = txg;
        smh->timestamp = timestamp;
        smh->mmp_delay = mmp_delay;
-       smh->vdev_guid = vd->vdev_guid;
-       if (vd->vdev_path)
-               smh->vdev_path = strdup(vd->vdev_path);
+       if (vd) {
+               smh->vdev_guid = vd->vdev_guid;
+               if (vd->vdev_path)
+                       smh->vdev_path = strdup(vd->vdev_path);
+       }
        smh->vdev_label = label;
        smh->mmp_kstat_id = mmp_kstat_id;
 
+       if (error) {
+               smh->io_error = error;
+               smh->error_start = gethrtime();
+               smh->vdev_guid = 1;
+       }
+
        mutex_enter(&ssh->lock);
 
        list_insert_head(&ssh->list, smh);
@@ -936,6 +994,7 @@ spa_mmp_history_add(uint64_t txg, uint64_t timestamp, uint64_t mmp_delay,
        }
 
        mutex_exit(&ssh->lock);
+       return ((void *)smh);
 }
 
 void