*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
*/
#include <sys/spa.h>
#include <sys/fs/zfs.h>
#include <sys/vdev_impl.h>
#include <sys/zfs_ioctl.h>
-#include <sys/utsname.h>
#include <sys/systeminfo.h>
#include <sys/sunddi.h>
+#include <sys/zfeature.h>
#ifdef _KERNEL
#include <sys/kobj.h>
#include <sys/zone.h>
* This can be overridden in userland to preserve an alternate namespace for
* userland pools when doing testing.
*/
-const char *spa_config_path = ZPOOL_CACHE;
+char *spa_config_path = ZPOOL_CACHE;
+int zfs_autoimport_disable = 1;
/*
* Called when the module is first loaded, this routine loads the configuration
void *buf = NULL;
nvlist_t *nvlist, *child;
nvpair_t *nvpair;
- spa_t *spa;
char *pathname;
struct _buf *file;
uint64_t fsize;
+#ifdef _KERNEL
+ if (zfs_autoimport_disable)
+ return;
+#endif
+
/*
* Open the configuration file.
*/
mutex_enter(&spa_namespace_lock);
nvpair = NULL;
while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) {
-
if (nvpair_type(nvpair) != DATA_TYPE_NVLIST)
continue;
if (spa_lookup(nvpair_name(nvpair)) != NULL)
continue;
- spa = spa_add(nvpair_name(nvpair), NULL);
-
- /*
- * We blindly duplicate the configuration here. If it's
- * invalid, we will catch it when the pool is first opened.
- */
- VERIFY(nvlist_dup(child, &spa->spa_config, 0) == 0);
+ (void) spa_add(nvpair_name(nvpair), child, NULL);
}
mutex_exit(&spa_namespace_lock);
*/
VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0);
- buf = kmem_alloc(buflen, KM_SLEEP);
+ buf = vmem_alloc(buflen, KM_SLEEP);
temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR,
(void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
}
(void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
- VN_RELE(vp);
}
(void) vn_remove(temp, UIO_SYSSPACE, RMFILE);
- kmem_free(buf, buflen);
+ vmem_free(buf, buflen);
kmem_free(temp, MAXPATHLEN);
}
/*
* Synchronize pool configuration to disk. This must be called with the
- * namespace lock held.
+ * namespace lock held. Synchronizing the pool cache is typically done after
+ * the configuration has been synced to the MOS. This exposes a window where
+ * the MOS config will have been updated but the cache file has not. If
+ * the system were to crash at that instant then the cached config may not
+ * contain the correct information to open the pool and an explicity import
+ * would be required.
*/
void
spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
{
spa_config_dirent_t *dp, *tdp;
nvlist_t *nvl;
+ char *pool_name;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
- if (rootdir == NULL)
+ if (rootdir == NULL || !(spa_mode_global & FWRITE))
return;
/*
*/
nvl = NULL;
while ((spa = spa_next(spa)) != NULL) {
- if (spa == target && removing)
+ /*
+ * Skip over our own pool if we're about to remove
+ * ourselves from the spa namespace or any pool that
+ * is readonly. Since we cannot guarantee that a
+ * readonly pool would successfully import upon reboot,
+ * we don't allow them to be written to the cache file.
+ */
+ if ((spa == target && removing) ||
+ !spa_writeable(spa))
continue;
mutex_enter(&spa->spa_props_lock);
VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME,
KM_SLEEP) == 0);
- VERIFY(nvlist_add_nvlist(nvl, spa->spa_name,
+ if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) {
+ VERIFY0(nvlist_lookup_string(spa->spa_config,
+ ZPOOL_CONFIG_POOL_NAME, &pool_name));
+ } else
+ pool_name = spa_name(spa);
+
+ VERIFY(nvlist_add_nvlist(nvl, pool_name,
spa->spa_config) == 0);
mutex_exit(&spa->spa_props_lock);
}
spa_config_generation++;
if (postsysevent)
- spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC);
+ spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC);
}
/*
/*
* Generate the pool's configuration based on the current in-core state.
+ *
* We infer whether to generate a complete config or just one top-level config
* based on whether vd is the root vdev.
*/
vdev_t *rvd = spa->spa_root_vdev;
unsigned long hostid = 0;
boolean_t locked = B_FALSE;
+ uint64_t split_guid;
+ char *pool_name;
if (vd == NULL) {
vd = rvd;
if (txg == -1ULL)
txg = spa->spa_config_txg;
+ /*
+ * Originally, users had to handle spa namespace collisions by either
+ * exporting the already imported pool or by specifying a new name for
+ * the pool with a conflicting name. In the case of root pools from
+ * virtual guests, neither approach to collision resolution is
+ * reasonable. This is addressed by extending the new name syntax with
+ * an option to specify that the new name is temporary. When specified,
+ * ZFS_IMPORT_TEMP_NAME will be set in spa->spa_import_flags to tell us
+ * to use the previous name, which we do below.
+ */
+ if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) {
+ VERIFY0(nvlist_lookup_string(spa->spa_config,
+ ZPOOL_CONFIG_POOL_NAME, &pool_name));
+ } else
+ pool_name = spa_name(spa);
+
VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
spa_version(spa)) == 0);
VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
- spa_name(spa)) == 0);
+ pool_name) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
spa_state(spa)) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
txg) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
spa_guid(spa)) == 0);
+ VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA,
+ spa->spa_errata) == 0);
+ VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
+ ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);
+
+
#ifdef _KERNEL
hostid = zone_get_hostid(NULL);
#else /* _KERNEL */
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
hostid) == 0);
}
- VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
- utsname.nodename) == 0);
+ VERIFY0(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
+ utsname()->nodename));
if (vd != rvd) {
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
1ULL) == 0);
vd = vd->vdev_top; /* label contains top config */
+ } else {
+ /*
+ * Only add the (potentially large) split information
+ * in the mos config, and not in the vdev labels
+ */
+ if (spa->spa_config_splitting != NULL)
+ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
+ spa->spa_config_splitting) == 0);
+ }
+
+ /*
+ * Add the top-level config. We even add this on pools which
+ * don't support holes in the namespace.
+ */
+ vdev_top_config_generate(spa, config);
+
+ /*
+ * If we're splitting, record the original pool's guid.
+ */
+ if (spa->spa_config_splitting != NULL &&
+ nvlist_lookup_uint64(spa->spa_config_splitting,
+ ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
+ VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
+ split_guid) == 0);
}
- nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE);
+ nvroot = vdev_config_generate(spa, vd, getstats, 0);
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
nvlist_free(nvroot);
+ /*
+ * Store what's necessary for reading the MOS in the label.
+ */
+ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
+ spa->spa_label_features) == 0);
+
+ if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
+ ddt_histogram_t *ddh;
+ ddt_stat_t *dds;
+ ddt_object_t *ddo;
+
+ ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
+ ddt_get_dedup_histogram(spa, ddh);
+ VERIFY(nvlist_add_uint64_array(config,
+ ZPOOL_CONFIG_DDT_HISTOGRAM,
+ (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
+ kmem_free(ddh, sizeof (ddt_histogram_t));
+
+ ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP);
+ ddt_get_dedup_object_stats(spa, ddo);
+ VERIFY(nvlist_add_uint64_array(config,
+ ZPOOL_CONFIG_DDT_OBJ_STATS,
+ (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
+ kmem_free(ddo, sizeof (ddt_object_t));
+
+ dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP);
+ ddt_get_dedup_stats(spa, dds);
+ VERIFY(nvlist_add_uint64_array(config,
+ ZPOOL_CONFIG_DDT_STATS,
+ (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
+ kmem_free(dds, sizeof (ddt_stat_t));
+ }
+
if (locked)
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
return (config);
}
-/*
- * For a pool that's not currently a booting rootpool, update all disk labels,
- * generate a fresh config based on the current in-core state, and sync the
- * global config cache.
- */
-void
-spa_config_update(spa_t *spa, int what)
-{
- spa_config_update_common(spa, what, FALSE);
-}
-
/*
* Update all disk labels, generate a fresh config based on the current
* in-core state, and sync the global config cache (do not sync the config
* cache if this is a booting rootpool).
*/
void
-spa_config_update_common(spa_t *spa, int what, boolean_t isroot)
+spa_config_update(spa_t *spa, int what)
{
vdev_t *rvd = spa->spa_root_vdev;
uint64_t txg;
/*
* Update the global config cache to reflect the new mosconfig.
*/
- if (!isroot)
+ if (!spa->spa_is_root)
spa_config_sync(spa, B_FALSE, what != SPA_CONFIG_UPDATE_POOL);
if (what == SPA_CONFIG_UPDATE_POOL)
- spa_config_update_common(spa, SPA_CONFIG_UPDATE_VDEVS, isroot);
+ spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(spa_config_sync);
+EXPORT_SYMBOL(spa_config_load);
+EXPORT_SYMBOL(spa_all_configs);
+EXPORT_SYMBOL(spa_config_set);
+EXPORT_SYMBOL(spa_config_generate);
+EXPORT_SYMBOL(spa_config_update);
+
+module_param(spa_config_path, charp, 0444);
+MODULE_PARM_DESC(spa_config_path, "SPA config file (/etc/zfs/zpool.cache)");
+
+module_param(zfs_autoimport_disable, int, 0644);
+MODULE_PARM_DESC(zfs_autoimport_disable, "Disable pool import at module load");
+
+#endif