From: Matthew Ahrens Date: Wed, 17 Sep 2014 06:59:43 +0000 (+0200) Subject: Illumos 5176 - lock contention on godfather zio X-Git-Tag: zfs-0.7.12~1647 X-Git-Url: https://git.proxmox.com/?p=mirror_zfs.git;a=commitdiff_plain;h=e022864d19ee124c88f6fb9018e4b2d2cec76a08 Illumos 5176 - lock contention on godfather zio 5176 lock contention on godfather zio Reviewed by: Adam Leventhal Reviewed by: Alex Reece Reviewed by: Christopher Siden Reviewed by: George Wilson Reviewed by: Richard Elling Reviewed by: Bayard Bell Approved by: Garrett D'Amore References: https://www.illumos.org/issues/5176 https://github.com/illumos/illumos-gate/commit/6f834bc Porting notes: Under Linux max_ncpus is defined as num_possible_cpus(). This is largest number of cpu ids which might be available during the life time of the system boot. This value can be larger than the number of present cpus if CONFIG_HOTPLUG_CPU is defined. Ported by: Turbo Fredriksson Signed-off-by: Brian Behlendorf Closes #2711 --- diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 4b6e73e20..5f9770984 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -2565,7 +2565,7 @@ dump_block_stats(spa_t *spa) uint64_t norm_alloc, norm_space, total_alloc, total_found; int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; boolean_t leaks = B_FALSE; - int e; + int e, c; bp_embedded_type_t i; (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", @@ -2614,10 +2614,12 @@ dump_block_stats(spa_t *spa) * all async I/Os to complete. */ if (dump_opt['c']) { - (void) zio_wait(spa->spa_async_zio_root); - spa->spa_async_zio_root = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | - ZIO_FLAG_GODFATHER); + for (c = 0; c < max_ncpus; c++) { + (void) zio_wait(spa->spa_async_zio_root[c]); + spa->spa_async_zio_root[c] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); + } } if (zcb.zcb_haderrors) { diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index cd6aeef47..1cb535b9f 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -204,7 +204,8 @@ struct spa { uint64_t spa_failmode; /* failure mode for the pool */ uint64_t spa_delegation; /* delegation on/off */ list_t spa_config_list; /* previous cache file(s) */ - zio_t *spa_async_zio_root; /* root of all async I/O */ + /* per-CPU array of root of async I/O: */ + zio_t **spa_async_zio_root; zio_t *spa_suspend_zio_root; /* root of all suspended I/O */ kmutex_t spa_suspend_lock; /* protects suspend_zio_root */ kcondvar_t spa_suspend_cv; /* notification of resume */ diff --git a/module/zfs/spa.c b/module/zfs/spa.c index d96bda3fa..1b1bf7a5c 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1252,7 +1252,9 @@ spa_unload(spa_t *spa) * Wait for any outstanding async I/O to complete. */ if (spa->spa_async_zio_root != NULL) { - (void) zio_wait(spa->spa_async_zio_root); + for (i = 0; i < max_ncpus; i++) + (void) zio_wait(spa->spa_async_zio_root[i]); + kmem_free(spa->spa_async_zio_root, max_ncpus * sizeof (void *)); spa->spa_async_zio_root = NULL; } @@ -2165,7 +2167,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, uberblock_t *ub = &spa->spa_uberblock; uint64_t children, config_cache_txg = spa->spa_config_txg; int orig_mode = spa->spa_mode; - int parse; + int parse, i; uint64_t obj; boolean_t missing_feat_write = B_FALSE; @@ -2189,8 +2191,13 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, /* * Create "The Godfather" zio to hold all async IOs */ - spa->spa_async_zio_root = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); + spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), + KM_SLEEP); + for (i = 0; i < max_ncpus; i++) { + spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); + } /* * Parse the configuration into a vdev tree. We explicitly set the @@ -3495,7 +3502,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, uint64_t version, obj; boolean_t has_features; nvpair_t *elem; - int c; + int c, i; char *poolname; nvlist_t *nvl; @@ -3556,8 +3563,13 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, /* * Create "The Godfather" zio to hold all async IOs */ - spa->spa_async_zio_root = zio_root(spa, NULL, NULL, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); + spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), + KM_SLEEP); + for (i = 0; i < max_ncpus; i++) { + spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); + } /* * Create the root vdev. diff --git a/module/zfs/zio.c b/module/zfs/zio.c index e4f1271d3..844b909fb 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1474,7 +1474,7 @@ zio_nowait(zio_t *zio) */ spa_t *spa = zio->io_spa; - zio_add_child(spa->spa_async_zio_root, zio); + zio_add_child(spa->spa_async_zio_root[CPU_SEQID], zio); } __zio_execute(zio);