]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Add request size histograms (-r) to zpool iostat, minor man page fix
authorTony Hutter <hutter2@llnl.gov>
Wed, 25 May 2016 21:21:35 +0000 (14:21 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Wed, 25 May 2016 22:49:35 +0000 (15:49 -0700)
Add -r option to "zpool iostat" to print request size histograms for the leaf
ZIOs. This includes histograms of individual ZIOs ("ind") and aggregate ZIOs
("agg"). These stats can be useful for seeing how well the ZFS IO aggregator
is working.

$ zpool iostat -r
mypool        sync_read    sync_write    async_read    async_write      scrub
req_size      ind    agg    ind    agg    ind    agg    ind    agg    ind    agg
----------  -----  -----  -----  -----  -----  -----  -----  -----  -----  -----
512             0      0      0      0      0      0    530      0      0      0
1K              0      0    260      0      0      0    116    246      0      0
2K              0      0      0      0      0      0      0    431      0      0
4K              0      0      0      0      0      0      3    107      0      0
8K             15      0     35      0      0      0      0      6      0      0
16K             0      0      0      0      0      0      0     39      0      0
32K             0      0      0      0      0      0      0      0      0      0
64K            20      0     40      0      0      0      0      0      0      0
128K            0      0     20      0      0      0      0      0      0      0
256K            0      0      0      0      0      0      0      0      0      0
512K            0      0      0      0      0      0      0      0      0      0
1M              0      0      0      0      0      0      0      0      0      0
2M              0      0      0      0      0      0      0      0      0      0
4M              0      0      0      0      0      0    155     19      0      0
8M              0      0      0      0      0      0      0    811      0      0
16M             0      0      0      0      0      0      0     68      0      0
--------------------------------------------------------------------------------

Also rename the stray "-G" in the man page to be "-w" for latency histograms.

Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tim Chase <tim@chase2k.com>
Closes #4659

cmd/zpool/zpool_main.c
include/sys/fs/zfs.h
man/man8/zpool.8
module/zfs/vdev.c
module/zfs/vdev_label.c
tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_003_neg.ksh
tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_004_pos.ksh

index 0603efdbae810063f852b18768f0e01a74fe0bde..072404264b0169857714892a9a1069222a429d55 100644 (file)
@@ -153,6 +153,7 @@ enum iostat_type {
        IOS_LATENCY = 1,
        IOS_QUEUES = 2,
        IOS_L_HISTO = 3,
+       IOS_RQ_HISTO = 4,
        IOS_COUNT,      /* always last element */
 };
 
@@ -161,6 +162,62 @@ enum iostat_type {
 #define        IOS_LATENCY_M   (1ULL << IOS_LATENCY)
 #define        IOS_QUEUES_M    (1ULL << IOS_QUEUES)
 #define        IOS_L_HISTO_M   (1ULL << IOS_L_HISTO)
+#define        IOS_RQ_HISTO_M  (1ULL << IOS_RQ_HISTO)
+
+/* Mask of all the histo bits */
+#define        IOS_ANYHISTO_M (IOS_L_HISTO_M | IOS_RQ_HISTO_M)
+
+/*
+ * Lookup table for iostat flags to nvlist names.  Basically a list
+ * of all the nvlists a flag requires.  Also specifies the order in
+ * which data gets printed in zpool iostat.
+ */
+static const char *vsx_type_to_nvlist[IOS_COUNT][11] = {
+       [IOS_L_HISTO] = {
+           ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
+           ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
+           ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
+           ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
+           ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO,
+           ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO,
+           ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO,
+           ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
+           ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
+           NULL},
+       [IOS_LATENCY] = {
+           ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
+           ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
+           ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
+           ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
+           NULL},
+       [IOS_QUEUES] = {
+           ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE,
+           ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE,
+           ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE,
+           ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE,
+           ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,
+           NULL},
+       [IOS_RQ_HISTO] = {
+           ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO,
+           ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO,
+           ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO,
+           ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO,
+           ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO,
+           ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO,
+           ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO,
+           ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO,
+           ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO,
+           ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO,
+           NULL},
+};
+
+
+/*
+ * Given a cb->cb_flags with a histogram bit set, return the iostat_type.
+ * Right now, only one histo bit is ever set at one time, so we can
+ * just do a highbit64(a)
+ */
+#define        IOS_HISTO_IDX(a)        (highbit64(a & IOS_ANYHISTO_M) - 1)
 
 typedef struct zpool_command {
        const char      *name;
@@ -255,7 +312,8 @@ get_usage(zpool_help_t idx) {
                    "[-R root] [-F [-n]]\n"
                    "\t    <pool | id> [newpool]\n"));
        case HELP_IOSTAT:
-               return (gettext("\tiostat [-T d | u] [-ghHLpPvy] [[-lq]|-w]\n"
+               return (gettext("\tiostat [-T d | u] [-ghHLpPvy] "
+                   "[[-lq]|[-r|-w]]\n"
                    "\t    [[pool ...]|[pool vdev ...]|[vdev ...]] "
                    "[interval [count]]\n"));
        case HELP_LABELCLEAR:
@@ -2531,6 +2589,9 @@ static const name_and_columns_t iostat_top_labels[][IOSTAT_MAX_LABELS] =
            {NULL}},
        [IOS_L_HISTO] = {{"total_wait", 2}, {"disk_wait", 2},
            {"sync_queue", 2}, {"async_queue", 2}, {NULL}},
+       [IOS_RQ_HISTO] = {{"sync_read", 2}, {"sync_write", 2},
+           {"async_read", 2}, {"async_write", 2}, {"scrub", 2}, {NULL}},
+
 };
 
 /* Shorthand - if "columns" field not set, default to 1 column */
@@ -2544,6 +2605,13 @@ static const name_and_columns_t iostat_bottom_labels[][IOSTAT_MAX_LABELS] =
            {"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"}, {NULL}},
        [IOS_L_HISTO] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"},
            {"write"}, {"read"}, {"write"}, {"scrub"}, {NULL}},
+       [IOS_RQ_HISTO] = {{"ind"}, {"agg"}, {"ind"}, {"agg"}, {"ind"}, {"agg"},
+           {"ind"}, {"agg"}, {"ind"}, {"agg"}, {NULL}},
+};
+
+static const char *histo_to_title[] = {
+       [IOS_L_HISTO] = "latency",
+       [IOS_RQ_HISTO] = "req_size",
 };
 
 /*
@@ -2562,6 +2630,25 @@ label_array_len(const name_and_columns_t *labels)
        return (i);
 }
 
+/*
+ * Return the number of strings in a null-terminated string array.
+ * For example:
+ *
+ *     const char foo[] = {"bar", "baz", NULL}
+ *
+ * returns 2
+ */
+static uint64_t
+str_array_len(const char *array[])
+{
+       uint64_t i = 0;
+       while (array[i])
+               i++;
+
+       return (i);
+}
+
+
 /*
  * Return a default column width for default/latency/queue columns. This does
  * not include histograms, which have their columns autosized.
@@ -2673,14 +2760,22 @@ print_iostat_dashes(iostat_cbdata_t *cb, unsigned int force_column_width,
        uint64_t f;
        int idx;
        const name_and_columns_t *labels;
+       const char *title;
+
+
+       if (cb->cb_flags & IOS_ANYHISTO_M) {
+               title = histo_to_title[IOS_HISTO_IDX(cb->cb_flags)];
+       } else if (cb->cb_vdev_names_count) {
+               title = "vdev";
+       } else  {
+               title = "pool";
+       }
+
+       namewidth = MAX(MAX(strlen(title), cb->cb_namewidth),
+           name ? strlen(name) : 0);
 
-       if (cb->cb_flags & IOS_L_HISTO_M)
-               namewidth = MAX(cb->cb_namewidth, strlen("latency"));
-       else
-               namewidth = cb->cb_namewidth;
 
        if (name) {
-               namewidth = MAX(cb->cb_namewidth, strlen(name));
                printf("%-*s", namewidth, name);
        } else {
                for (i = 0; i < namewidth; i++)
@@ -2727,22 +2822,28 @@ print_iostat_header_impl(iostat_cbdata_t *cb, unsigned int force_column_width,
     const char *histo_vdev_name)
 {
        unsigned int namewidth;
-       uint64_t flags = cb->cb_flags;
+       const char *title;
 
-       if (flags & IOS_L_HISTO_M)
-               namewidth = MAX(cb->cb_namewidth, strlen("latency"));
-       else
-               namewidth = cb->cb_namewidth;
+       if (cb->cb_flags & IOS_ANYHISTO_M) {
+               title = histo_to_title[IOS_HISTO_IDX(cb->cb_flags)];
+       } else if (cb->cb_vdev_names_count) {
+               title = "vdev";
+       } else  {
+               title = "pool";
+       }
 
-       if (flags & IOS_L_HISTO_M)
+       namewidth = MAX(MAX(strlen(title), cb->cb_namewidth),
+           histo_vdev_name ? strlen(histo_vdev_name) : 0);
+
+       if (histo_vdev_name)
                printf("%-*s", namewidth, histo_vdev_name);
        else
                printf("%*s", namewidth, "");
 
+
        print_iostat_labels(cb, force_column_width, iostat_top_labels);
 
-       printf("%-*s", namewidth, flags & IOS_L_HISTO_M ? "latency" :
-           cb->cb_vdev_names_count ? "vdev" : "pool");
+       printf("%-*s", namewidth, title);
 
        print_iostat_labels(cb, force_column_width, iostat_bottom_labels);
 
@@ -2918,6 +3019,7 @@ print_iostat_histo(struct stat_array *nva, unsigned int len,
        uint64_t val;
        enum zfs_nicenum_format format;
        unsigned int buckets;
+       unsigned int start_bucket;
 
        if (cb->cb_literal)
                format = ZFS_NICENUM_RAW;
@@ -2927,12 +3029,25 @@ print_iostat_histo(struct stat_array *nva, unsigned int len,
        /* All these histos are the same size, so just use nva[0].count */
        buckets = nva[0].count;
 
-       for (j = 0; j < buckets; j++) {
-               /* Ending range of this bucket */
-               val = (1UL << (j + 1)) - 1;
+       if (cb->cb_flags & IOS_RQ_HISTO_M) {
+               /* Start at 512 - req size should never be lower than this */
+               start_bucket = 9;
+       } else {
+               start_bucket = 0;
+       }
 
+       for (j = start_bucket; j < buckets; j++) {
                /* Print histogram bucket label */
-               zfs_nicetime(val, buf, sizeof (buf));
+               if (cb->cb_flags & IOS_L_HISTO_M) {
+                       /* Ending range of this bucket */
+                       val = (1UL << (j + 1)) - 1;
+                       zfs_nicetime(val, buf, sizeof (buf));
+               } else {
+                       /* Request size (starting range of bucket) */
+                       val = (1UL << j);
+                       zfs_nicenum(val, buf, sizeof (buf));
+               }
+
                if (cb->cb_scripted)
                        printf("%llu", (u_longlong_t) val);
                else
@@ -2962,30 +3077,29 @@ print_iostat_histos(iostat_cbdata_t *cb, nvlist_t *oldnv,
        unsigned int column_width;
        unsigned int namewidth;
        unsigned int entire_width;
-
-       const char *names[] = {
-               ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
-               ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
-               ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
-               ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
-               ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO,
-               ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO,
-               ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO,
-               ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
-               ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
-       };
+       enum iostat_type type;
        struct stat_array *nva;
-       nva = calc_and_alloc_stats_ex(names, ARRAY_SIZE(names), oldnv, newnv);
+       const char **names;
+       unsigned int names_len;
+
+       /* What type of histo are we? */
+       type = IOS_HISTO_IDX(cb->cb_flags);
+
+       /* Get NULL-terminated array of nvlist names for our histo */
+       names = vsx_type_to_nvlist[type];
+       names_len = str_array_len(names); /* num of names */
+
+       nva = calc_and_alloc_stats_ex(names, names_len, oldnv, newnv);
 
        if (cb->cb_literal) {
                column_width = MAX(5,
-                   (unsigned int) log10(stat_histo_max(nva,
-                   ARRAY_SIZE(names))) + 1);
+                   (unsigned int) log10(stat_histo_max(nva, names_len)) + 1);
        } else {
                column_width = 5;
        }
 
-       namewidth = MAX(cb->cb_namewidth, strlen("latency"));
+       namewidth = MAX(cb->cb_namewidth,
+           strlen(histo_to_title[IOS_HISTO_IDX(cb->cb_flags)]));
 
        /*
         * Calculate the entire line width of what we're printing.  The
@@ -2998,17 +3112,17 @@ print_iostat_histos(iostat_cbdata_t *cb, nvlist_t *oldnv,
        /*      |__________|  <--- entire_width         */
        /*                                              */
        entire_width = namewidth + (column_width + 2) *
-           label_array_len(iostat_bottom_labels[IOS_L_HISTO]);
+           label_array_len(iostat_bottom_labels[type]);
 
        if (cb->cb_scripted)
                printf("%s\n", name);
        else
                print_iostat_header_impl(cb, column_width, name);
 
-       print_iostat_histo(nva, ARRAY_SIZE(names), cb, column_width,
+       print_iostat_histo(nva, names_len, cb, column_width,
            namewidth, scale);
 
-       free_calc_stats(nva, ARRAY_SIZE(names));
+       free_calc_stats(nva, names_len);
        if (!cb->cb_scripted)
                print_solid_separator(entire_width);
 }
@@ -3219,7 +3333,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
         * Print the vdev name unless it's is a histogram.  Histograms
         * display the vdev name in the header itself.
         */
-       if (!(cb->cb_flags & IOS_L_HISTO_M)) {
+       if (!(cb->cb_flags & IOS_ANYHISTO_M)) {
                if (cb->cb_scripted) {
                        printf("%s", name);
                } else {
@@ -3234,7 +3348,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
 
        /* Calculate our scaling factor */
        tdelta = newvs->vs_timestamp - oldvs->vs_timestamp;
-       if ((oldvs->vs_timestamp == 0) && (cb->cb_flags & IOS_L_HISTO_M)) {
+       if ((oldvs->vs_timestamp == 0) && (cb->cb_flags & IOS_ANYHISTO_M)) {
                /*
                 * If we specify printing histograms with no time interval, then
                 * print the histogram numbers over the entire lifetime of the
@@ -3256,12 +3370,12 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
                print_iostat_latency(cb, oldnv, newnv, scale);
        if (cb->cb_flags & IOS_QUEUES_M)
                print_iostat_queues(cb, oldnv, newnv, scale);
-       if (cb->cb_flags & IOS_L_HISTO_M) {
+       if (cb->cb_flags & IOS_ANYHISTO_M) {
                printf("\n");
                print_iostat_histos(cb, oldnv, newnv, scale, name);
        }
 
-       if (!(cb->cb_flags & IOS_L_HISTO_M))
+       if (!(cb->cb_flags & IOS_ANYHISTO_M))
                printf("\n");
 
        free(calcvs);
@@ -3303,7 +3417,7 @@ children:
         */
 
        if (num_logs(newnv) > 0) {
-               if ((!(cb->cb_flags & IOS_L_HISTO_M)) && !cb->cb_scripted &&
+               if ((!(cb->cb_flags & IOS_ANYHISTO_M)) && !cb->cb_scripted &&
                    !cb->cb_vdev_names) {
                        print_iostat_dashes(cb, 0, "logs");
                }
@@ -3337,7 +3451,7 @@ children:
                return (ret);
 
        if (children > 0) {
-               if ((!(cb->cb_flags & IOS_L_HISTO_M)) && !cb->cb_scripted &&
+               if ((!(cb->cb_flags & IOS_ANYHISTO_M)) && !cb->cb_scripted &&
                    !cb->cb_vdev_names) {
                        print_iostat_dashes(cb, 0, "cache");
                }
@@ -3399,9 +3513,10 @@ print_iostat(zpool_handle_t *zhp, void *data)
 
        ret = print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot,
                                                                        cb, 0);
-       if ((ret != 0) && !(cb->cb_flags & IOS_L_HISTO_M) && !cb->cb_scripted &&
-           cb->cb_verbose && !cb->cb_vdev_names_count)
-                               print_iostat_separator(cb);
+       if ((ret != 0) && !(cb->cb_flags & IOS_ANYHISTO_M) &&
+           !cb->cb_scripted && cb->cb_verbose && !cb->cb_vdev_names_count) {
+               print_iostat_separator(cb);
+       }
 
        return (ret);
 }
@@ -3553,37 +3668,6 @@ get_stat_flags_cb(zpool_handle_t *zhp, void *data)
        uint64_t flags = 0;
        int i, j;
 
-       /*
-        * Lookup table for extended iostat flags to nvlist names.
-        * Basically a list of all the nvpairs a flag requires.
-        */
-       static const char *vsx_type_to_nvlist[IOS_COUNT][10] = {
-               [IOS_L_HISTO] = {
-                   ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
-                   ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
-                   ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
-                   ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
-                   ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO,
-                   ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO,
-                   ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO,
-                   ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
-                   ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
-                   NULL},
-               [IOS_LATENCY] = {
-                   ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
-                   ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
-                   ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
-                   ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
-                   NULL},
-               [IOS_QUEUES] = {
-                   ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE,
-                   ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE,
-                   ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE,
-                   ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE,
-                   ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,
-                   NULL}
-       };
-
        config = zpool_get_config(zhp, NULL);
        verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
            &nvroot) == 0);
@@ -3818,7 +3902,7 @@ fsleep(float sec) {
 
 
 /*
- * zpool iostat [-ghHLpPvy] [[-lq]-w] [-n name] [-T d|u]
+ * zpool iostat [-ghHLpPvy] [[-lq]|[-r|-w]] [-n name] [-T d|u]
  *             [[ pool ...]|[pool vdev ...]|[vdev ...]]
  *             [interval [count]]
  *
@@ -3832,7 +3916,8 @@ fsleep(float sec) {
  *             by a single tab.
  *     -l      Display average latency
  *     -q      Display queue depths
- *     -w      Display histograms
+ *     -w      Display latency histograms
+ *     -r      Display request size histogram
  *     -T      Display a timestamp in date(1) or Unix format
  *
  * This command can be tricky because we want to be able to deal with pool
@@ -3851,7 +3936,7 @@ zpool_do_iostat(int argc, char **argv)
        unsigned long count = 0;
        zpool_list_t *list;
        boolean_t verbose = B_FALSE;
-       boolean_t latency = B_FALSE, histo = B_FALSE;
+       boolean_t latency = B_FALSE, l_histo = B_FALSE, rq_histo = B_FALSE;
        boolean_t queues = B_FALSE, parsable = B_FALSE, scripted = B_FALSE;
        boolean_t omit_since_boot = B_FALSE;
        boolean_t guid = B_FALSE;
@@ -3861,12 +3946,12 @@ zpool_do_iostat(int argc, char **argv)
 
        /* Used for printing error message */
        const char flag_to_arg[] = {[IOS_LATENCY] = 'l', [IOS_QUEUES] = 'q',
-           [IOS_L_HISTO] = 'w'};
+           [IOS_L_HISTO] = 'w', [IOS_RQ_HISTO] = 'r'};
 
        uint64_t unsupported_flags;
 
        /* check options */
-       while ((c = getopt(argc, argv, "gLPT:vyhplqwH")) != -1) {
+       while ((c = getopt(argc, argv, "gLPT:vyhplqrwH")) != -1) {
                switch (c) {
                case 'g':
                        guid = B_TRUE;
@@ -3896,7 +3981,10 @@ zpool_do_iostat(int argc, char **argv)
                        scripted = B_TRUE;
                        break;
                case 'w':
-                       histo = B_TRUE;
+                       l_histo = B_TRUE;
+                       break;
+               case 'r':
+                       rq_histo = B_TRUE;
                        break;
                case 'y':
                        omit_since_boot = B_TRUE;
@@ -3997,10 +4085,18 @@ zpool_do_iostat(int argc, char **argv)
                return (1);
        }
 
-       if (histo && (queues || latency)) {
+       if ((l_histo || rq_histo) && (queues || latency)) {
+               pool_list_free(list);
+               (void) fprintf(stderr,
+                   gettext("[-r|-w] isn't allowed with [-q|-l]\n"));
+               usage(B_FALSE);
+               return (1);
+       }
+
+       if (l_histo && rq_histo) {
                pool_list_free(list);
                (void) fprintf(stderr,
-                   gettext("-w isn't allowed with [-q|-l]\n"));
+                   gettext("Only one of [-r|-w] can be passed at a time\n"));
                usage(B_FALSE);
                return (1);
        }
@@ -4010,13 +4106,15 @@ zpool_do_iostat(int argc, char **argv)
         */
        cb.cb_list = list;
 
-       if (histo) {
+       if (l_histo) {
                /*
                 * Histograms tables look out of place when you try to display
                 * them with the other stats, so make a rule that you can only
                 * print histograms by themselves.
                 */
                cb.cb_flags = IOS_L_HISTO_M;
+       } else if (rq_histo) {
+               cb.cb_flags = IOS_RQ_HISTO_M;
        } else {
                cb.cb_flags = IOS_DEFAULT_M;
                if (latency)
@@ -4088,7 +4186,7 @@ zpool_do_iostat(int argc, char **argv)
                         */
                        if (((++cb.cb_iteration == 1 && !skip) ||
                            (skip != verbose)) &&
-                           (!(cb.cb_flags & IOS_L_HISTO_M)) &&
+                           (!(cb.cb_flags & IOS_ANYHISTO_M)) &&
                            !cb.cb_scripted)
                                print_iostat_header(&cb);
 
@@ -4108,8 +4206,8 @@ zpool_do_iostat(int argc, char **argv)
                         * we also want an ending separator.
                         */
                        if (((npools > 1 && !verbose &&
-                           !(cb.cb_flags & IOS_L_HISTO_M)) ||
-                           (!(cb.cb_flags & IOS_L_HISTO_M) &&
+                           !(cb.cb_flags & IOS_ANYHISTO_M)) ||
+                           (!(cb.cb_flags & IOS_ANYHISTO_M) &&
                            cb.cb_vdev_names_count)) &&
                            !cb.cb_scripted) {
                                print_iostat_separator(&cb);
index 65dba125c94129fa746f95569223aadcf0e0b6cf..9de50e9a273ffdeb0adca89bfb4a16aae3e87dfc 100644 (file)
@@ -558,7 +558,17 @@ typedef struct zpool_rewind_policy {
 #define        ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO     "vdev_async_w_lat_histo"
 #define        ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO       "vdev_scrub_histo"
 
-
+/* Request size histograms */
+#define        ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO      "vdev_sync_ind_r_histo"
+#define        ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO      "vdev_sync_ind_w_histo"
+#define        ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO     "vdev_async_ind_r_histo"
+#define        ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO     "vdev_async_ind_w_histo"
+#define        ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO       "vdev_ind_scrub_histo"
+#define        ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO      "vdev_sync_agg_r_histo"
+#define        ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO      "vdev_sync_agg_w_histo"
+#define        ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO     "vdev_async_agg_r_histo"
+#define        ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO     "vdev_async_agg_w_histo"
+#define        ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO       "vdev_agg_scrub_histo"
 
 #define        ZPOOL_CONFIG_WHOLE_DISK         "whole_disk"
 #define        ZPOOL_CONFIG_ERRCOUNT           "error_count"
@@ -824,20 +834,33 @@ typedef struct vdev_stat_ex {
         * 2^37 nanoseconds = 134s. Timeouts will probably start kicking in
         * before this.
         */
-#define        VDEV_HISTO_BUCKETS 37
+#define        VDEV_L_HISTO_BUCKETS 37         /* Latency histo buckets */
+#define        VDEV_RQ_HISTO_BUCKETS 25        /* Request size histo buckets */
+
 
        /* Amount of time in ZIO queue (ns) */
        uint64_t vsx_queue_histo[ZIO_PRIORITY_NUM_QUEUEABLE]
-           [VDEV_HISTO_BUCKETS];
+           [VDEV_L_HISTO_BUCKETS];
 
        /* Total ZIO latency (ns).  Includes queuing and disk access time */
-       uint64_t vsx_total_histo[ZIO_TYPES][VDEV_HISTO_BUCKETS];
+       uint64_t vsx_total_histo[ZIO_TYPES][VDEV_L_HISTO_BUCKETS];
 
        /* Amount of time to read/write the disk (ns) */
-       uint64_t vsx_disk_histo[ZIO_TYPES][VDEV_HISTO_BUCKETS];
+       uint64_t vsx_disk_histo[ZIO_TYPES][VDEV_L_HISTO_BUCKETS];
+
+       /* "lookup the bucket for a value" histogram macros */
+#define        HISTO(val, buckets) (val != 0 ? MIN(highbit64(val) - 1, \
+           buckets - 1) : 0)
+#define        L_HISTO(a) HISTO(a, VDEV_L_HISTO_BUCKETS)
+#define        RQ_HISTO(a) HISTO(a, VDEV_RQ_HISTO_BUCKETS)
+
+       /* Physical IO histogram */
+       uint64_t vsx_ind_histo[ZIO_PRIORITY_NUM_QUEUEABLE]
+           [VDEV_RQ_HISTO_BUCKETS];
 
-       /* "lookup the bucket for a value" macro */
-#define        HISTO(a) (a != 0 ? MIN(highbit64(a) - 1, VDEV_HISTO_BUCKETS - 1) : 0)
+       /* Delegated (aggregated) physical IO histogram */
+       uint64_t vsx_agg_histo[ZIO_PRIORITY_NUM_QUEUEABLE]
+           [VDEV_RQ_HISTO_BUCKETS];
 
 } vdev_stat_ex_t;
 
index eb35b74c02e0bcad54ce34428a4d1c7b4d3e120d..80402c55ec6368ffb8687d32f848908a52df41ec 100644 (file)
@@ -95,7 +95,7 @@ zpool \- configures ZFS storage pools
 
 .LP
 .nf
-\fB\fBzpool iostat\fR [\fB-T\fR \fBd\fR | \fBu\fR] [\fB-ghHLpPvy\fR] [\fB-G\fR|[\fB-lq\fR]]
+\fB\fBzpool iostat\fR [\fB-T\fR \fBd\fR | \fBu\fR] [\fB-ghHLpPvy\fR] [\fB-lq\fR]|[\fB-r\fR|-\fBw\fR]]
      [[\fIpool\fR ...]|[\fIpool vdev\fR ...]|[\fIvdev\fR ...]] [\fIinterval\fR[\fIcount\fR]]\fR
 
 .fi
@@ -1510,7 +1510,7 @@ Scan using the default search path, the libblkid cache will not be consulted.  A
 .sp
 .ne 2
 .na
-\fB\fBzpool iostat\fR [\fB-T\fR \fBd\fR | \fBu\fR] [\fB-ghHLpPvy\fR] [\fB-w\fR|[\fB-lq\fR]] [[\fIpool\fR ...]|[\fIpool vdev\fR ...]|[\fIvdev\fR ...]] [\fIinterval\fR[\fIcount\fR]]\fR
+\fB\fBzpool iostat\fR [\fB-T\fR \fBd\fR | \fBu\fR] [\fB-ghHLpPvy\fR] [[\fB-lq\fR]|[\fB-r\fR|\fB-w\fR]] [[\fIpool\fR ...]|[\fIpool vdev\fR ...]|[\fIvdev\fR ...]] [\fIinterval\fR[\fIcount\fR]]\fR
 
 .ad
 .sp .6
@@ -1582,6 +1582,19 @@ Display numbers in parsable (exact) values.  Time values are in nanoseconds.
 Display full paths for vdevs instead of only the last component of the path.  This can be used in conjunction with the \fB-L\fR flag.
 .RE
 
+.sp
+.ne 2
+.na
+\fB\fB-r\fR\fR
+.ad
+.RS 12n
+Print request size histograms for the leaf ZIOs.  This includes histograms of
+individual ZIOs ("ind") and aggregate ZIOs ("agg").  These stats can be useful
+for seeing how well the ZFS IO aggregator is working.  Do not confuse these
+request size stats with the block layer requests; it's possible ZIOs can
+be broken up before being sent to the block device.
+.RE
+
 .sp
 .ne 2
 .na
index 13739017382ab0d02257b35d461323209063e792..607be3ce3b81a0eecf6deb9e6fe3daa706c62d94 100644 (file)
@@ -2784,21 +2784,30 @@ vdev_get_child_stat_ex(vdev_t *cvd, vdev_stat_ex_t *vsx, vdev_stat_ex_t *cvsx)
 {
        int t, b;
        for (t = 0; t < ZIO_TYPES; t++) {
-               for (b = 0; b < VDEV_HISTO_BUCKETS; b++) {
+               for (b = 0; b < ARRAY_SIZE(vsx->vsx_disk_histo[0]); b++)
                        vsx->vsx_disk_histo[t][b] += cvsx->vsx_disk_histo[t][b];
+
+               for (b = 0; b < ARRAY_SIZE(vsx->vsx_total_histo[0]); b++) {
                        vsx->vsx_total_histo[t][b] +=
                            cvsx->vsx_total_histo[t][b];
                }
        }
 
        for (t = 0; t < ZIO_PRIORITY_NUM_QUEUEABLE; t++) {
-               for (b = 0; b < VDEV_HISTO_BUCKETS; b++) {
+               for (b = 0; b < ARRAY_SIZE(vsx->vsx_queue_histo[0]); b++) {
                        vsx->vsx_queue_histo[t][b] +=
                            cvsx->vsx_queue_histo[t][b];
                }
                vsx->vsx_active_queue[t] += cvsx->vsx_active_queue[t];
                vsx->vsx_pend_queue[t] += cvsx->vsx_pend_queue[t];
+
+               for (b = 0; b < ARRAY_SIZE(vsx->vsx_ind_histo[0]); b++)
+                       vsx->vsx_ind_histo[t][b] += cvsx->vsx_ind_histo[t][b];
+
+               for (b = 0; b < ARRAY_SIZE(vsx->vsx_agg_histo[0]); b++)
+                       vsx->vsx_agg_histo[t][b] += cvsx->vsx_agg_histo[t][b];
        }
+
 }
 
 /*
@@ -2974,13 +2983,21 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
                        vs->vs_ops[type]++;
                        vs->vs_bytes[type] += psize;
 
+                       if (flags & ZIO_FLAG_DELEGATED) {
+                               vsx->vsx_agg_histo[zio->io_priority]
+                                   [RQ_HISTO(zio->io_size)]++;
+                       } else {
+                               vsx->vsx_ind_histo[zio->io_priority]
+                                   [RQ_HISTO(zio->io_size)]++;
+                       }
+
                        if (zio->io_delta && zio->io_delay) {
                                vsx->vsx_queue_histo[zio->io_priority]
-                                   [HISTO(zio->io_delta - zio->io_delay)]++;
+                                   [L_HISTO(zio->io_delta - zio->io_delay)]++;
                                vsx->vsx_disk_histo[type]
-                                   [HISTO(zio->io_delay)]++;
+                                   [L_HISTO(zio->io_delay)]++;
                                vsx->vsx_total_histo[type]
-                                   [HISTO(zio->io_delta)]++;
+                                   [L_HISTO(zio->io_delta)]++;
                        }
                }
 
index 1400aee7b7579f9a1f3a02a968e049d87995ac6a..59bce1370923b8e333d982e5aaa2a189c6b27bd2 100644 (file)
@@ -302,6 +302,47 @@ vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv)
            vsx->vsx_queue_histo[ZIO_PRIORITY_SCRUB],
            ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SCRUB]));
 
+       /* Request sizes */
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO,
+           vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_READ],
+           ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_READ]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO,
+           vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_WRITE],
+           ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_WRITE]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO,
+           vsx->vsx_ind_histo[ZIO_PRIORITY_ASYNC_READ],
+           ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_ASYNC_READ]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO,
+           vsx->vsx_ind_histo[ZIO_PRIORITY_ASYNC_WRITE],
+           ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_ASYNC_WRITE]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO,
+           vsx->vsx_ind_histo[ZIO_PRIORITY_SCRUB],
+           ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_SCRUB]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO,
+           vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_READ],
+           ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_READ]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO,
+           vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_WRITE],
+           ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_WRITE]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO,
+           vsx->vsx_agg_histo[ZIO_PRIORITY_ASYNC_READ],
+           ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_ASYNC_READ]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO,
+           vsx->vsx_agg_histo[ZIO_PRIORITY_ASYNC_WRITE],
+           ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_ASYNC_WRITE]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO,
+           vsx->vsx_agg_histo[ZIO_PRIORITY_SCRUB],
+           ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_SCRUB]));
+
        /* Add extended stats nvlist to main nvlist */
        fnvlist_add_nvlist(nv, ZPOOL_CONFIG_VDEV_STATS_EX, nvx);
 
index ae1e5a1523c2db7d08d7b73bef1ddb9be2a10401..097be9e7449e61eb6334aa35fa9819a9d649c481 100755 (executable)
@@ -52,7 +52,8 @@ fi
 
 set -A args "" "-?" "-f" "nonexistpool" "$TESTPOOL/$TESTFS" \
        "$testpool 0" "$testpool -1" "$testpool 1 0" \
-       "$testpool 0 0" "$testpool -wl" "$testpool -wq"
+       "$testpool 0 0" "$testpool -wl" "$testpool -wq" "$testpool -wr" \
+       "$testpool -rq" "$testpool -lr"
 
 log_assert "Executing 'zpool iostat' with bad options fails"
 
index f8bd8e3db69c084bd03ffef17a7a0c918d61a19c..0119a7061f326169312a7b8f434d06cbb4eca409 100755 (executable)
@@ -36,7 +36,7 @@
 #
 # DESCRIPTION:
 # Executing 'zpool iostat' command with various combinations of extended
-# stats (-vqL), parsable/script options (-pH), and misc lists of pools
+# stats (-lqwr), parsable/script options (-pH), and misc lists of pools
 # and vdevs.
 #
 # STRATEGY:
@@ -59,7 +59,9 @@ set -A args "" "-v" "-q" "-l" "-lq $TESTPOOL" "-ql ${DISKS[0]} ${DISKS[1]}" \
        "-wp $TESTPOOL" \
        "-qlH $TESTPOOL ${DISKS[0]}" \
        "-vpH ${DISKS[0]}" \
-       "-wpH ${DISKS[0]}"
+       "-wpH ${DISKS[0]}" \
+       "-r ${DISKS[0]}" \
+       "-rpH ${DISKS[0]}"
 
 log_assert "Executing 'zpool iostat' with extended stat options succeeds"
 log_note "testpool: $TESTPOOL, disks $DISKS"