* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+ * Copyright (c) 2023, Klara Inc.
*/
#include <sys/zfs_context.h>
+#include <sys/zfs_chksum.h>
#include <sys/spa_impl.h>
#include <sys/zio.h>
#include <sys/zio_checksum.h>
#include <sys/fs/zfs.h>
#include <sys/metaslab_impl.h>
#include <sys/arc.h>
+#include <sys/brt.h>
#include <sys/ddt.h>
#include <sys/kstat.h>
#include "zfs_prop.h"
static avl_tree_t spa_namespace_avl;
kmutex_t spa_namespace_lock;
static kcondvar_t spa_namespace_cv;
-int spa_max_replication_override = SPA_DVAS_PER_BP;
+static const int spa_max_replication_override = SPA_DVAS_PER_BP;
static kmutex_t spa_spare_lock;
static avl_tree_t spa_spare_avl;
static kmutex_t spa_l2cache_lock;
static avl_tree_t spa_l2cache_avl;
-kmem_cache_t *spa_buffer_pool;
spa_mode_t spa_mode_global = SPA_MODE_UNINIT;
#ifdef ZFS_DEBUG
* has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
* in one of three behaviors controlled by zfs_deadman_failmode.
*/
-unsigned long zfs_deadman_synctime_ms = 600000UL;
+uint64_t zfs_deadman_synctime_ms = 600000UL; /* 10 min. */
/*
* This value controls the maximum amount of time zio_wait() will block for an
* outstanding IO. By default this is 300 seconds at which point the "hung"
* behavior will be applied as described for zfs_deadman_synctime_ms.
*/
-unsigned long zfs_deadman_ziotime_ms = 300000UL;
+uint64_t zfs_deadman_ziotime_ms = 300000UL; /* 5 min. */
/*
* Check time in milliseconds. This defines the frequency at which we check
* for hung I/O.
*/
-unsigned long zfs_deadman_checktime_ms = 60000UL;
+uint64_t zfs_deadman_checktime_ms = 60000UL; /* 1 min. */
/*
* By default the deadman is enabled.
*/
-int zfs_deadman_enabled = 1;
+int zfs_deadman_enabled = B_TRUE;
/*
* Controls the behavior of the deadman when it detects a "hung" I/O.
* continue - Attempt to recover from a "hung" I/O
* panic - Panic the system
*/
-char *zfs_deadman_failmode = "wait";
+const char *zfs_deadman_failmode = "wait";
/*
* The worst case is single-sector max-parity RAID-Z blocks, in which
* the worst case is:
* (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2 == 24
*/
-int spa_asize_inflation = 24;
+uint_t spa_asize_inflation = 24;
/*
* Normally, we don't allow the last 3.2% (1/(2^spa_slop_shift)) of space in
*
* See also the comments in zfs_space_check_t.
*/
-int spa_slop_shift = 5;
-uint64_t spa_min_slop = 128ULL * 1024 * 1024;
-uint64_t spa_max_slop = 128ULL * 1024 * 1024 * 1024;
-int spa_allocators = 4;
+uint_t spa_slop_shift = 5;
+static const uint64_t spa_min_slop = 128ULL * 1024 * 1024;
+static const uint64_t spa_max_slop = 128ULL * 1024 * 1024 * 1024;
+/*
+ * Number of allocators to use, per spa instance
+ */
+static int spa_num_allocators = 4;
+
+/*
+ * Spa active allocator.
+ * Valid values are zfs_active_allocator=<dynamic|cursor|new-dynamic>.
+ */
+const char *zfs_active_allocator = "dynamic";
-/*PRINTFLIKE2*/
void
spa_load_failed(spa_t *spa, const char *fmt, ...)
{
spa->spa_trust_config ? "trusted" : "untrusted", buf);
}
-/*PRINTFLIKE2*/
void
spa_load_note(spa_t *spa, const char *fmt, ...)
{
zfs_dbgmsg("spa_load(%s, config %s): %s", spa->spa_name,
spa->spa_trust_config ? "trusted" : "untrusted", buf);
+
+ spa_import_progress_set_notes_nolog(spa, "%s", buf);
}
/*
* By default dedup and user data indirects land in the special class
*/
-int zfs_ddt_data_is_special = B_TRUE;
-int zfs_user_indirect_is_special = B_TRUE;
+static int zfs_ddt_data_is_special = B_TRUE;
+static int zfs_user_indirect_is_special = B_TRUE;
/*
* The percentage of special class final space reserved for metadata only.
* Once we allocate 100 - zfs_special_class_metadata_reserve_pct we only
* let metadata into the class.
*/
-int zfs_special_class_metadata_reserve_pct = 25;
+static uint_t zfs_special_class_metadata_reserve_pct = 25;
/*
* ==========================================================================
spa_config_lock_t *scl = &spa->spa_config_lock[i];
mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
- zfs_refcount_create_untracked(&scl->scl_count);
scl->scl_writer = NULL;
scl->scl_write_wanted = 0;
+ scl->scl_count = 0;
}
}
spa_config_lock_t *scl = &spa->spa_config_lock[i];
mutex_destroy(&scl->scl_lock);
cv_destroy(&scl->scl_cv);
- zfs_refcount_destroy(&scl->scl_count);
ASSERT(scl->scl_writer == NULL);
ASSERT(scl->scl_write_wanted == 0);
+ ASSERT(scl->scl_count == 0);
}
}
int
-spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
+spa_config_tryenter(spa_t *spa, int locks, const void *tag, krw_t rw)
{
for (int i = 0; i < SCL_LOCKS; i++) {
spa_config_lock_t *scl = &spa->spa_config_lock[i];
}
} else {
ASSERT(scl->scl_writer != curthread);
- if (!zfs_refcount_is_zero(&scl->scl_count)) {
+ if (scl->scl_count != 0) {
mutex_exit(&scl->scl_lock);
spa_config_exit(spa, locks & ((1 << i) - 1),
tag);
}
scl->scl_writer = curthread;
}
- (void) zfs_refcount_add(&scl->scl_count, tag);
+ scl->scl_count++;
mutex_exit(&scl->scl_lock);
}
return (1);
}
-void
-spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
+static void
+spa_config_enter_impl(spa_t *spa, int locks, const void *tag, krw_t rw,
+ int mmp_flag)
{
+ (void) tag;
int wlocks_held = 0;
ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY);
continue;
mutex_enter(&scl->scl_lock);
if (rw == RW_READER) {
- while (scl->scl_writer || scl->scl_write_wanted) {
+ while (scl->scl_writer ||
+ (!mmp_flag && scl->scl_write_wanted)) {
cv_wait(&scl->scl_cv, &scl->scl_lock);
}
} else {
ASSERT(scl->scl_writer != curthread);
- while (!zfs_refcount_is_zero(&scl->scl_count)) {
+ while (scl->scl_count != 0) {
scl->scl_write_wanted++;
cv_wait(&scl->scl_cv, &scl->scl_lock);
scl->scl_write_wanted--;
}
scl->scl_writer = curthread;
}
- (void) zfs_refcount_add(&scl->scl_count, tag);
+ scl->scl_count++;
mutex_exit(&scl->scl_lock);
}
ASSERT3U(wlocks_held, <=, locks);
}
+void
+spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
+{
+ spa_config_enter_impl(spa, locks, tag, rw, 0);
+}
+
+/*
+ * The spa_config_enter_mmp() allows the mmp thread to cut in front of
+ * outstanding write lock requests. This is needed since the mmp updates are
+ * time sensitive and failure to service them promptly will result in a
+ * suspended pool. This pool suspension has been seen in practice when there is
+ * a single disk in a pool that is responding slowly and presumably about to
+ * fail.
+ */
+
+void
+spa_config_enter_mmp(spa_t *spa, int locks, const void *tag, krw_t rw)
+{
+ spa_config_enter_impl(spa, locks, tag, rw, 1);
+}
+
void
spa_config_exit(spa_t *spa, int locks, const void *tag)
{
+ (void) tag;
for (int i = SCL_LOCKS - 1; i >= 0; i--) {
spa_config_lock_t *scl = &spa->spa_config_lock[i];
if (!(locks & (1 << i)))
continue;
mutex_enter(&scl->scl_lock);
- ASSERT(!zfs_refcount_is_zero(&scl->scl_count));
- if (zfs_refcount_remove(&scl->scl_count, tag) == 0) {
+ ASSERT(scl->scl_count > 0);
+ if (--scl->scl_count == 0) {
ASSERT(scl->scl_writer == NULL ||
scl->scl_writer == curthread);
scl->scl_writer = NULL; /* OK in either case */
spa_config_lock_t *scl = &spa->spa_config_lock[i];
if (!(locks & (1 << i)))
continue;
- if ((rw == RW_READER &&
- !zfs_refcount_is_zero(&scl->scl_count)) ||
+ if ((rw == RW_READER && scl->scl_count != 0) ||
(rw == RW_WRITER && scl->scl_writer == curthread))
locks_held |= 1 << i;
}
zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
(gethrtime() - spa->spa_sync_starttime) / NANOSEC,
- ++spa->spa_deadman_calls);
+ (u_longlong_t)++spa->spa_deadman_calls);
if (zfs_deadman_enabled)
vdev_deadman(spa->spa_root_vdev, FTAG);
spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms);
spa->spa_deadman_ziotime = MSEC2NSEC(zfs_deadman_ziotime_ms);
spa_set_deadman_failmode(spa, zfs_deadman_failmode);
+ spa_set_allocator(spa, zfs_active_allocator);
zfs_refcount_create(&spa->spa_refcount);
spa_config_lock_init(spa);
if (altroot)
spa->spa_root = spa_strdup(altroot);
- spa->spa_alloc_count = spa_allocators;
- spa->spa_alloc_locks = kmem_zalloc(spa->spa_alloc_count *
- sizeof (kmutex_t), KM_SLEEP);
- spa->spa_alloc_trees = kmem_zalloc(spa->spa_alloc_count *
- sizeof (avl_tree_t), KM_SLEEP);
+ /* Do not allow more allocators than CPUs. */
+ spa->spa_alloc_count = MIN(MAX(spa_num_allocators, 1), boot_ncpus);
+
+ spa->spa_allocs = kmem_zalloc(spa->spa_alloc_count *
+ sizeof (spa_alloc_t), KM_SLEEP);
for (int i = 0; i < spa->spa_alloc_count; i++) {
- mutex_init(&spa->spa_alloc_locks[i], NULL, MUTEX_DEFAULT, NULL);
- avl_create(&spa->spa_alloc_trees[i], zio_bookmark_compare,
- sizeof (zio_t), offsetof(zio_t, io_alloc_node));
+ mutex_init(&spa->spa_allocs[i].spaa_lock, NULL, MUTEX_DEFAULT,
+ NULL);
+ avl_create(&spa->spa_allocs[i].spaa_tree, zio_bookmark_compare,
+ sizeof (zio_t), offsetof(zio_t, io_queue_node.a));
}
+
avl_create(&spa->spa_metaslabs_by_flushed, metaslab_sort_by_flushed,
sizeof (metaslab_t), offsetof(metaslab_t, ms_spa_txg_node));
avl_create(&spa->spa_sm_logs_by_txg, spa_log_sm_sort_by_txg,
spa->spa_min_ashift = INT_MAX;
spa->spa_max_ashift = 0;
spa->spa_min_alloc = INT_MAX;
+ spa->spa_gcd_alloc = INT_MAX;
/* Reset cached value */
spa->spa_dedup_dspace = ~0ULL;
if (spa->spa_root)
spa_strfree(spa->spa_root);
- while ((dp = list_head(&spa->spa_config_list)) != NULL) {
- list_remove(&spa->spa_config_list, dp);
+ while ((dp = list_remove_head(&spa->spa_config_list)) != NULL) {
if (dp->scd_path != NULL)
spa_strfree(dp->scd_path);
kmem_free(dp, sizeof (spa_config_dirent_t));
}
for (int i = 0; i < spa->spa_alloc_count; i++) {
- avl_destroy(&spa->spa_alloc_trees[i]);
- mutex_destroy(&spa->spa_alloc_locks[i]);
+ avl_destroy(&spa->spa_allocs[i].spaa_tree);
+ mutex_destroy(&spa->spa_allocs[i].spaa_lock);
}
- kmem_free(spa->spa_alloc_locks, spa->spa_alloc_count *
- sizeof (kmutex_t));
- kmem_free(spa->spa_alloc_trees, spa->spa_alloc_count *
- sizeof (avl_tree_t));
+ kmem_free(spa->spa_allocs, spa->spa_alloc_count *
+ sizeof (spa_alloc_t));
avl_destroy(&spa->spa_metaslabs_by_flushed);
avl_destroy(&spa->spa_sm_logs_by_txg);
* have the namespace lock held.
*/
void
-spa_open_ref(spa_t *spa, void *tag)
+spa_open_ref(spa_t *spa, const void *tag)
{
ASSERT(zfs_refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
MUTEX_HELD(&spa_namespace_lock));
* have the namespace lock held.
*/
void
-spa_close(spa_t *spa, void *tag)
+spa_close(spa_t *spa, const void *tag)
{
ASSERT(zfs_refcount_count(&spa->spa_refcount) > spa->spa_minref ||
MUTEX_HELD(&spa_namespace_lock));
* so the asserts in spa_close() do not apply.
*/
void
-spa_async_close(spa_t *spa, void *tag)
+spa_async_close(spa_t *spa, const void *tag)
{
(void) zfs_refcount_remove(&spa->spa_refcount, tag);
}
* of multiple transactions without releasing the spa_namespace_lock.
*/
void
-spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
+spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error,
+ const char *tag)
{
ASSERT(MUTEX_HELD(&spa_namespace_lock));
* If the config changed, update the config cache.
*/
if (config_changed)
- spa_write_cachefile(spa, B_FALSE, B_TRUE);
+ spa_write_cachefile(spa, B_FALSE, B_TRUE, B_TRUE);
}
/*
*/
if (config_changed) {
mutex_enter(&spa_namespace_lock);
- spa_write_cachefile(spa, B_FALSE, B_TRUE);
+ spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
mutex_exit(&spa_namespace_lock);
}
len = strlen(s);
new = kmem_alloc(len + 1, KM_SLEEP);
- bcopy(s, new, len);
- new[len] = '\0';
+ memcpy(new, s, len + 1);
return (new);
}
kmem_free(s, strlen(s) + 1);
}
-uint64_t
-spa_get_random(uint64_t range)
-{
- uint64_t r;
-
- ASSERT(range != 0);
-
- if (range == 1)
- return (0);
-
- (void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));
-
- return (r % range);
-}
-
uint64_t
spa_generate_guid(spa_t *spa)
{
- uint64_t guid = spa_get_random(-1ULL);
+ uint64_t guid;
if (spa != NULL) {
- while (guid == 0 || spa_guid_exists(spa_guid(spa), guid))
- guid = spa_get_random(-1ULL);
+ do {
+ (void) random_get_pseudo_bytes((void *)&guid,
+ sizeof (guid));
+ } while (guid == 0 || spa_guid_exists(spa_guid(spa), guid));
} else {
- while (guid == 0 || spa_guid_exists(guid, 0))
- guid = spa_get_random(-1ULL);
+ do {
+ (void) random_get_pseudo_bytes((void *)&guid,
+ sizeof (guid));
+ } while (guid == 0 || spa_guid_exists(guid, 0));
}
return (guid);
snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
{
char type[256];
- char *checksum = NULL;
- char *compress = NULL;
+ const char *checksum = NULL;
+ const char *compress = NULL;
if (bp != NULL) {
if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) {
compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
}
- SNPRINTF_BLKPTR(snprintf, ' ', buf, buflen, bp, type, checksum,
+ SNPRINTF_BLKPTR(kmem_scnprintf, ' ', buf, buflen, bp, type, checksum,
compress);
}
if (spa->spa_root == NULL)
buf[0] = '\0';
else
- (void) strncpy(buf, spa->spa_root, buflen);
+ (void) strlcpy(buf, spa->spa_root, buflen);
}
-int
+uint32_t
spa_sync_pass(spa_t *spa)
{
return (spa->spa_sync_pass);
uint64_t
spa_get_slop_space(spa_t *spa)
{
- uint64_t space = spa_get_dspace(spa);
- uint64_t slop = MIN(space >> spa_slop_shift, spa_max_slop);
+ uint64_t space = 0;
+ uint64_t slop = 0;
+
+ /*
+ * Make sure spa_dedup_dspace has been set.
+ */
+ if (spa->spa_dedup_dspace == ~0ULL)
+ spa_update_dspace(spa);
+
+ /*
+ * spa_get_dspace() includes the space only logically "used" by
+ * deduplicated data, so since it's not useful to reserve more
+ * space with more deduplicated data, we subtract that out here.
+ */
+ space = spa_get_dspace(spa) - spa->spa_dedup_dspace;
+ slop = MIN(space >> spa_slop_shift, spa_max_slop);
/*
* Subtract the embedded log space, but no more than half the (3.2%)
spa_update_dspace(spa_t *spa)
{
spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) +
- ddt_get_dedup_dspace(spa);
- if (spa->spa_vdev_removal != NULL) {
+ ddt_get_dedup_dspace(spa) + brt_get_dspace(spa);
+ if (spa->spa_nonallocating_dspace > 0) {
/*
- * We can't allocate from the removing device, so subtract
- * its size if it was included in dspace (i.e. if this is a
- * normal-class vdev, not special/dedup). This prevents the
- * DMU/DSL from filling up the (now smaller) pool while we
- * are in the middle of removing the device.
+ * Subtract the space provided by all non-allocating vdevs that
+ * contribute to dspace. If a file is overwritten, its old
+ * blocks are freed and new blocks are allocated. If there are
+ * no snapshots of the file, the available space should remain
+ * the same. The old blocks could be freed from the
+ * non-allocating vdev, but the new blocks must be allocated on
+ * other (allocating) vdevs. By reserving the entire size of
+ * the non-allocating vdevs (including allocated space), we
+ * ensure that there will be enough space on the allocating
+ * vdevs for this file overwrite to succeed.
*
* Note that the DMU/DSL doesn't actually know or care
* how much space is allocated (it does its own tracking
* of how much space has been logically used). So it
* doesn't matter that the data we are moving may be
- * allocated twice (on the old device and the new
- * device).
+ * allocated twice (on the old device and the new device).
*/
- spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
- vdev_t *vd =
- vdev_lookup_top(spa, spa->spa_vdev_removal->svr_vdev_id);
- if (vd->vdev_mg->mg_class == spa_normal_class(spa)) {
- spa->spa_dspace -= spa_deflate(spa) ?
- vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space;
- }
- spa_config_exit(spa, SCL_VDEV, FTAG);
+ ASSERT3U(spa->spa_dspace, >=, spa->spa_nonallocating_dspace);
+ spa->spa_dspace -= spa->spa_nonallocating_dspace;
}
}
uint64_t pool_guid; /* unique id for updates */
char *pool_name;
spa_load_state_t spa_load_state;
+ char *spa_load_notes;
uint64_t mmp_sec_remaining; /* MMP activity check */
uint64_t spa_load_max_txg; /* rewind txg */
procfs_list_node_t smh_node;
static int
spa_import_progress_show_header(struct seq_file *f)
{
- seq_printf(f, "%-20s %-14s %-14s %-12s %s\n", "pool_guid",
+ seq_printf(f, "%-20s %-14s %-14s %-12s %-16s %s\n", "pool_guid",
"load_state", "multihost_secs", "max_txg",
- "pool_name");
+ "pool_name", "notes");
return (0);
}
{
spa_import_progress_t *sip = (spa_import_progress_t *)data;
- seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %s\n",
+ seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %-16s %s\n",
(u_longlong_t)sip->pool_guid, (u_longlong_t)sip->spa_load_state,
(u_longlong_t)sip->mmp_sec_remaining,
(u_longlong_t)sip->spa_load_max_txg,
- (sip->pool_name ? sip->pool_name : "-"));
+ (sip->pool_name ? sip->pool_name : "-"),
+ (sip->spa_load_notes ? sip->spa_load_notes : "-"));
return (0);
}
sip = list_remove_head(&shl->procfs_list.pl_list);
if (sip->pool_name)
spa_strfree(sip->pool_name);
+ if (sip->spa_load_notes)
+ kmem_strfree(sip->spa_load_notes);
kmem_free(sip, sizeof (spa_import_progress_t));
shl->size--;
}
sip = list_prev(&shl->procfs_list.pl_list, sip)) {
if (sip->pool_guid == pool_guid) {
sip->spa_load_state = load_state;
+ if (sip->spa_load_notes != NULL) {
+ kmem_strfree(sip->spa_load_notes);
+ sip->spa_load_notes = NULL;
+ }
error = 0;
break;
}
return (error);
}
+static void
+spa_import_progress_set_notes_impl(spa_t *spa, boolean_t log_dbgmsg,
+ const char *fmt, va_list adx)
+{
+ spa_history_list_t *shl = spa_import_progress_list;
+ spa_import_progress_t *sip;
+ uint64_t pool_guid = spa_guid(spa);
+
+ if (shl->size == 0)
+ return;
+
+ char *notes = kmem_vasprintf(fmt, adx);
+
+ mutex_enter(&shl->procfs_list.pl_lock);
+ for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL;
+ sip = list_prev(&shl->procfs_list.pl_list, sip)) {
+ if (sip->pool_guid == pool_guid) {
+ if (sip->spa_load_notes != NULL) {
+ kmem_strfree(sip->spa_load_notes);
+ sip->spa_load_notes = NULL;
+ }
+ sip->spa_load_notes = notes;
+ if (log_dbgmsg)
+ zfs_dbgmsg("'%s' %s", sip->pool_name, notes);
+ notes = NULL;
+ break;
+ }
+ }
+ mutex_exit(&shl->procfs_list.pl_lock);
+ if (notes != NULL)
+ kmem_strfree(notes);
+}
+
+void
+spa_import_progress_set_notes(spa_t *spa, const char *fmt, ...)
+{
+ va_list adx;
+
+ va_start(adx, fmt);
+ spa_import_progress_set_notes_impl(spa, B_TRUE, fmt, adx);
+ va_end(adx);
+}
+
+void
+spa_import_progress_set_notes_nolog(spa_t *spa, const char *fmt, ...)
+{
+ va_list adx;
+
+ va_start(adx, fmt);
+ spa_import_progress_set_notes_impl(spa, B_FALSE, fmt, adx);
+ va_end(adx);
+}
+
int
spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t load_max_txg)
{
{
spa_history_list_t *shl = spa_import_progress_list;
spa_import_progress_t *sip;
- char *poolname = NULL;
+ const char *poolname = NULL;
sip = kmem_zalloc(sizeof (spa_import_progress_t), KM_SLEEP);
sip->pool_guid = spa_guid(spa);
poolname = spa_name(spa);
sip->pool_name = spa_strdup(poolname);
sip->spa_load_state = spa_load_state(spa);
+ sip->spa_load_notes = NULL;
mutex_enter(&shl->procfs_list.pl_lock);
procfs_list_add(&shl->procfs_list, sip);
if (sip->pool_guid == pool_guid) {
if (sip->pool_name)
spa_strfree(sip->pool_name);
+ if (sip->spa_load_notes)
+ spa_strfree(sip->spa_load_notes);
list_remove(&shl->procfs_list.pl_list, sip);
shl->size--;
kmem_free(sip, sizeof (spa_import_progress_t));
unique_init();
zfs_btree_init();
metaslab_stat_init();
+ brt_init();
ddt_init();
zio_init();
dmu_init();
zil_init();
- vdev_cache_stat_init();
vdev_mirror_stat_init();
vdev_raidz_math_init();
vdev_file_init();
zfs_prop_init();
+ chksum_init();
zpool_prop_init();
zpool_feature_init();
spa_config_load();
+ vdev_prop_init();
l2arc_start();
scan_init();
qat_init();
spa_evict_all();
vdev_file_fini();
- vdev_cache_stat_fini();
vdev_mirror_stat_fini();
vdev_raidz_math_fini();
+ chksum_fini();
zil_fini();
dmu_fini();
zio_fini();
ddt_fini();
+ brt_fini();
metaslab_stat_fini();
zfs_btree_fini();
unique_fini();
spa->spa_scan_pass_scrub_pause = spa->spa_scan_pass_start;
else
spa->spa_scan_pass_scrub_pause = 0;
+
+ if (dsl_errorscrub_is_paused(spa->spa_dsl_pool->dp_scan))
+ spa->spa_scan_pass_errorscrub_pause = spa->spa_scan_pass_start;
+ else
+ spa->spa_scan_pass_errorscrub_pause = 0;
+
spa->spa_scan_pass_scrub_spent_paused = 0;
spa->spa_scan_pass_exam = 0;
spa->spa_scan_pass_issued = 0;
- vdev_scan_stat_init(spa->spa_root_vdev);
+
+ // error scrub stats
+ spa->spa_scan_pass_errorscrub_spent_paused = 0;
}
/*
{
dsl_scan_t *scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL;
- if (scn == NULL || scn->scn_phys.scn_func == POOL_SCAN_NONE)
+ if (scn == NULL || (scn->scn_phys.scn_func == POOL_SCAN_NONE &&
+ scn->errorscrub_phys.dep_func == POOL_SCAN_NONE))
return (SET_ERROR(ENOENT));
- bzero(ps, sizeof (pool_scan_stat_t));
+
+ memset(ps, 0, sizeof (pool_scan_stat_t));
/* data stored on disk */
ps->pss_func = scn->scn_phys.scn_func;
ps->pss_end_time = scn->scn_phys.scn_end_time;
ps->pss_to_examine = scn->scn_phys.scn_to_examine;
ps->pss_examined = scn->scn_phys.scn_examined;
- ps->pss_to_process = scn->scn_phys.scn_to_process;
+ ps->pss_skipped = scn->scn_phys.scn_skipped;
ps->pss_processed = scn->scn_phys.scn_processed;
ps->pss_errors = scn->scn_phys.scn_errors;
ps->pss_issued =
scn->scn_issued_before_pass + spa->spa_scan_pass_issued;
+ /* error scrub data stored on disk */
+ ps->pss_error_scrub_func = scn->errorscrub_phys.dep_func;
+ ps->pss_error_scrub_state = scn->errorscrub_phys.dep_state;
+ ps->pss_error_scrub_start = scn->errorscrub_phys.dep_start_time;
+ ps->pss_error_scrub_end = scn->errorscrub_phys.dep_end_time;
+ ps->pss_error_scrub_examined = scn->errorscrub_phys.dep_examined;
+ ps->pss_error_scrub_to_be_examined =
+ scn->errorscrub_phys.dep_to_examine;
+
+ /* error scrub data not stored on disk */
+ ps->pss_pass_error_scrub_pause = spa->spa_scan_pass_errorscrub_pause;
+
return (0);
}
vdev_state_t state = rvd->vdev_state;
vdev_aux_t aux = rvd->vdev_stat.vs_aux;
- if (spa_suspended(spa) &&
- (spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE))
+ if (spa_suspended(spa))
return ("SUSPENDED");
switch (state) {
EXPORT_SYMBOL(spa_guid_exists);
EXPORT_SYMBOL(spa_strdup);
EXPORT_SYMBOL(spa_strfree);
-EXPORT_SYMBOL(spa_get_random);
EXPORT_SYMBOL(spa_generate_guid);
EXPORT_SYMBOL(snprintf_blkptr);
EXPORT_SYMBOL(spa_freeze);
ZFS_MODULE_PARAM(zfs, zfs_, free_leak_on_eio, INT, ZMOD_RW,
"Set to ignore IO errors during free and permanently leak the space");
-ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, checktime_ms, ULONG, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, checktime_ms, U64, ZMOD_RW,
"Dead I/O check interval in milliseconds");
ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, enabled, INT, ZMOD_RW,
"Enable deadman timer");
-ZFS_MODULE_PARAM(zfs_spa, spa_, asize_inflation, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs_spa, spa_, asize_inflation, UINT, ZMOD_RW,
"SPA size estimate multiplication factor");
ZFS_MODULE_PARAM(zfs, zfs_, ddt_data_is_special, INT, ZMOD_RW,
"Failmode for deadman timer");
ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, synctime_ms,
- param_set_deadman_synctime, param_get_ulong, ZMOD_RW,
+ param_set_deadman_synctime, spl_param_get_u64, ZMOD_RW,
"Pool sync expiration time in milliseconds");
ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, ziotime_ms,
- param_set_deadman_ziotime, param_get_ulong, ZMOD_RW,
+ param_set_deadman_ziotime, spl_param_get_u64, ZMOD_RW,
"IO expiration time in milliseconds");
-ZFS_MODULE_PARAM(zfs, zfs_, special_class_metadata_reserve_pct, INT, ZMOD_RW,
+ZFS_MODULE_PARAM(zfs, zfs_, special_class_metadata_reserve_pct, UINT, ZMOD_RW,
"Small file blocks in special vdevs depends on this much "
"free space available");
/* END CSTYLED */
ZFS_MODULE_PARAM_CALL(zfs_spa, spa_, slop_shift, param_set_slop_shift,
- param_get_int, ZMOD_RW, "Reserved free space in pool");
+ param_get_uint, ZMOD_RW, "Reserved free space in pool");
+
+ZFS_MODULE_PARAM(zfs, spa_, num_allocators, INT, ZMOD_RW,
+ "Number of allocators per spa, capped by ncpus");