*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
#include <sys/zfs_context.h>
static avl_tree_t spa_namespace_avl;
kmutex_t spa_namespace_lock;
static kcondvar_t spa_namespace_cv;
-static int spa_active_count;
int spa_max_replication_override = SPA_DVAS_PER_BP;
static kmutex_t spa_spare_lock;
*/
int spa_asize_inflation = 24;
+/*
+ * Normally, we don't allow the last 3.2% (1/(2^spa_slop_shift)) of space in
+ * the pool to be consumed. This ensures that we don't run the pool
+ * completely out of space, due to unaccounted changes (e.g. to the MOS).
+ * It also limits the worst-case time to allocate space. If we have
+ * less than this amount of free space, most ZPL operations (e.g. write,
+ * create) will return ENOSPC.
+ *
+ * Certain operations (e.g. file removal, most administrative actions) can
+ * use half the slop space. They will only return ENOSPC if less than half
+ * the slop space is free. Typically, once the pool has less than the slop
+ * space free, the user will use these operations to free up space in the pool.
+ * These are the operations that call dsl_pool_adjustedsize() with the netfree
+ * argument set to TRUE.
+ *
+ * A very restricted set of operations are always permitted, regardless of
+ * the amount of free space. These are the operations that call
+ * dsl_sync_task(ZFS_SPACE_CHECK_NONE), e.g. "zfs destroy". If these
+ * operations result in a net increase in the amount of space used,
+ * it is possible to run the pool completely out of space, causing it to
+ * be permanently read-only.
+ *
+ * See also the comments in zfs_space_check_t.
+ */
+int spa_slop_shift = 5;
+
/*
* ==========================================================================
* SPA config locking
mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&spa->spa_evicting_os_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
/*
* Set the alternate root, if there is one.
*/
- if (altroot) {
+ if (altroot)
spa->spa_root = spa_strdup(altroot);
- spa_active_count++;
- }
/*
* Every pool starts with the default cachefile
spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0);
+ spa->spa_min_ashift = INT_MAX;
+ spa->spa_max_ashift = 0;
+
/*
* As a pool is being created, treat all features as disabled by
* setting SPA_FEATURE_DISABLED for all entries in the feature
ASSERT(MUTEX_HELD(&spa_namespace_lock));
ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
+ ASSERT3U(refcount_count(&spa->spa_refcount), ==, 0);
nvlist_free(spa->spa_config_splitting);
avl_remove(&spa_namespace_avl, spa);
cv_broadcast(&spa_namespace_cv);
- if (spa->spa_root) {
+ if (spa->spa_root)
spa_strfree(spa->spa_root);
- spa_active_count--;
- }
while ((dp = list_head(&spa->spa_config_list)) != NULL) {
list_remove(&spa->spa_config_list, dp);
bplist_destroy(&spa->spa_free_bplist[t]);
cv_destroy(&spa->spa_async_cv);
+ cv_destroy(&spa->spa_evicting_os_cv);
cv_destroy(&spa->spa_proc_cv);
cv_destroy(&spa->spa_scrub_io_cv);
cv_destroy(&spa->spa_suspend_cv);
mutex_destroy(&spa->spa_async_lock);
mutex_destroy(&spa->spa_errlist_lock);
mutex_destroy(&spa->spa_errlog_lock);
+ mutex_destroy(&spa->spa_evicting_os_lock);
mutex_destroy(&spa->spa_history_lock);
mutex_destroy(&spa->spa_proc_lock);
mutex_destroy(&spa->spa_props_lock);
(void) refcount_remove(&spa->spa_refcount, tag);
}
+/*
+ * Remove a reference to the given spa_t held by a dsl dir that is
+ * being asynchronously released. Async releases occur from a taskq
+ * performing eviction of dsl datasets and dirs. The namespace lock
+ * isn't held and the hold by the object being evicted may contribute to
+ * spa_minref (e.g. dataset or directory released during pool export),
+ * so the asserts in spa_close() do not apply.
+ */
+void
+spa_async_close(spa_t *spa, void *tag)
+{
+ (void) refcount_remove(&spa->spa_refcount, tag);
+}
+
/*
* Check to see if the spa refcount is zero. Must be called with
* spa_namespace_lock held. We really compare against spa_minref, which is the
return (lsize * spa_asize_inflation);
}
+/*
+ * Return the amount of slop space in bytes. It is 1/32 of the pool (3.2%),
+ * or at least 32MB.
+ *
+ * See the comment above spa_slop_shift for details.
+ */
+uint64_t
+spa_get_slop_space(spa_t *spa) {
+ uint64_t space = spa_get_dspace(spa);
+ return (MAX(space >> spa_slop_shift, SPA_MINDEVSIZE >> 1));
+}
+
uint64_t
spa_get_dspace(spa_t *spa)
{
return (spa->spa_log_class);
}
+void
+spa_evicting_os_register(spa_t *spa, objset_t *os)
+{
+ mutex_enter(&spa->spa_evicting_os_lock);
+ list_insert_head(&spa->spa_evicting_os_list, os);
+ mutex_exit(&spa->spa_evicting_os_lock);
+}
+
+void
+spa_evicting_os_deregister(spa_t *spa, objset_t *os)
+{
+ mutex_enter(&spa->spa_evicting_os_lock);
+ list_remove(&spa->spa_evicting_os_list, os);
+ cv_broadcast(&spa->spa_evicting_os_cv);
+ mutex_exit(&spa->spa_evicting_os_lock);
+}
+
+void
+spa_evicting_os_wait(spa_t *spa)
+{
+ mutex_enter(&spa->spa_evicting_os_lock);
+ while (!list_is_empty(&spa->spa_evicting_os_list))
+ cv_wait(&spa->spa_evicting_os_cv, &spa->spa_evicting_os_lock);
+ mutex_exit(&spa->spa_evicting_os_lock);
+
+ dmu_buf_user_evict_wait();
+}
+
int
spa_max_replication(spa_t *spa)
{
dmu_init();
zil_init();
vdev_cache_stat_init();
- vdev_file_init();
zfs_prop_init();
zpool_prop_init();
zpool_feature_init();
spa_evict_all();
- vdev_file_fini();
vdev_cache_stat_fini();
zil_fini();
dmu_fini();
return (spa->spa_debug);
}
+int
+spa_maxblocksize(spa_t *spa)
+{
+ if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS))
+ return (SPA_MAXBLOCKSIZE);
+ else
+ return (SPA_OLD_MAXBLOCKSIZE);
+}
+
#if defined(_KERNEL) && defined(HAVE_SPL)
/* Namespace manipulation */
EXPORT_SYMBOL(spa_lookup);
EXPORT_SYMBOL(spa_bootfs);
EXPORT_SYMBOL(spa_delegation);
EXPORT_SYMBOL(spa_meta_objset);
+EXPORT_SYMBOL(spa_maxblocksize);
/* Miscellaneous support routines */
EXPORT_SYMBOL(spa_rename);
module_param(spa_asize_inflation, int, 0644);
MODULE_PARM_DESC(spa_asize_inflation,
"SPA size estimate multiplication factor");
+
+module_param(spa_slop_shift, int, 0644);
+MODULE_PARM_DESC(spa_slop_shift, "Reserved free space in pool");
#endif