]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/spa_config.c
OpenZFS 7614, 9064 - zfs device evacuation/removal
[mirror_zfs.git] / module / zfs / spa_config.c
index 831eca7f5e98cd10ed3d49acd3fa174dc2e2c19a..4e9fd6c575ff5f50aabbbda088874f478568ac5c 100644 (file)
@@ -22,7 +22,8 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright 2017 Joyent, Inc.
  */
 
 #include <sys/spa.h>
@@ -54,7 +55,7 @@
  * configuration information.  When the module loads, we read this information
  * from /etc/zfs/zpool.cache and populate the SPA namespace.  This namespace is
  * maintained independently in spa.c.  Whenever the namespace is modified, or
- * the configuration of a pool is changed, we call spa_config_sync(), which
+ * the configuration of a pool is changed, we call spa_write_cachefile(), which
  * walks through all the active pools and writes the configuration to disk.
  */
 
@@ -129,7 +130,7 @@ spa_config_load(void)
                if (nvpair_type(nvpair) != DATA_TYPE_NVLIST)
                        continue;
 
-               VERIFY(nvpair_value_nvlist(nvpair, &child) == 0);
+               child = fnvpair_value_nvlist(nvpair);
 
                if (spa_lookup(nvpair_name(nvpair)) != NULL)
                        continue;
@@ -146,6 +147,26 @@ out:
        kobj_close_file(file);
 }
 
+static int
+spa_config_remove(spa_config_dirent_t *dp)
+{
+#if defined(__linux__) && defined(_KERNEL)
+       int error, flags = FWRITE | FTRUNC;
+       uio_seg_t seg = UIO_SYSSPACE;
+       vnode_t *vp;
+
+       error = vn_open(dp->scd_path, seg, flags, 0644, &vp, 0, 0);
+       if (error == 0) {
+               (void) VOP_FSYNC(vp, FSYNC, kcred, NULL);
+               (void) VOP_CLOSE(vp, 0, 1, 0, kcred, NULL);
+       }
+
+       return (error);
+#else
+       return (vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE));
+#endif
+}
+
 static int
 spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
 {
@@ -160,27 +181,25 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
         * If the nvlist is empty (NULL), then remove the old cachefile.
         */
        if (nvl == NULL) {
-               err = vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
+               err = spa_config_remove(dp);
+               if (err == ENOENT)
+                       err = 0;
+
                return (err);
        }
 
        /*
         * Pack the configuration into a buffer.
         */
-       VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0);
-
-       buf = vmem_alloc(buflen, KM_SLEEP);
+       buf = fnvlist_pack(nvl, &buflen);
        temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 
-       VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR,
-           KM_SLEEP) == 0);
-
 #if defined(__linux__) && defined(_KERNEL)
        /*
         * Write the configuration to disk.  Due to the complexity involved
-        * in performing a rename from within the kernel the file is truncated
-        * and overwritten in place.  In the event of an error the file is
-        * unlinked to make sure we always have a consistent view of the data.
+        * in performing a rename and remove from within the kernel the file
+        * is instead truncated and overwritten in place.  This way we always
+        * have a consistent view of the data or a zero length file.
         */
        err = vn_open(dp->scd_path, UIO_SYSSPACE, oflags, 0644, &vp, 0, 0);
        if (err == 0) {
@@ -190,9 +209,8 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
                        err = VOP_FSYNC(vp, FSYNC, kcred, NULL);
 
                (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
-
                if (err)
-                       (void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
+                       (void) spa_config_remove(dp);
        }
 #else
        /*
@@ -211,13 +229,12 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
                if (err == 0)
                        err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
                (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
-               VN_RELE(vp);
        }
 
        (void) vn_remove(temp, UIO_SYSSPACE, RMFILE);
 #endif
 
-       vmem_free(buf, buflen);
+       fnvlist_pack_free(buf, buflen);
        kmem_free(temp, MAXPATHLEN);
        return (err);
 }
@@ -228,11 +245,11 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
  * the configuration has been synced to the MOS. This exposes a window where
  * the MOS config will have been updated but the cache file has not. If
  * the system were to crash at that instant then the cached config may not
- * contain the correct information to open the pool and an explicity import
+ * contain the correct information to open the pool and an explicit import
  * would be required.
  */
 void
-spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
+spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent)
 {
        spa_config_dirent_t *dp, *tdp;
        nvlist_t *nvl;
@@ -276,6 +293,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
                        mutex_enter(&spa->spa_props_lock);
                        tdp = list_head(&spa->spa_config_list);
                        if (spa->spa_config == NULL ||
+                           tdp == NULL ||
                            tdp->scd_path == NULL ||
                            strcmp(tdp->scd_path, dp->scd_path) != 0) {
                                mutex_exit(&spa->spa_props_lock);
@@ -283,17 +301,15 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
                        }
 
                        if (nvl == NULL)
-                               VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME,
-                                   KM_SLEEP) == 0);
+                               nvl = fnvlist_alloc();
 
-                       if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) {
-                               VERIFY0(nvlist_lookup_string(spa->spa_config,
-                                       ZPOOL_CONFIG_POOL_NAME, &pool_name));
-                       else
+                       if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME)
+                               pool_name = fnvlist_lookup_string(
+                                   spa->spa_config, ZPOOL_CONFIG_POOL_NAME);
+                       else
                                pool_name = spa_name(spa);
 
-                       VERIFY(nvlist_add_nvlist(nvl, pool_name,
-                           spa->spa_config) == 0);
+                       fnvlist_add_nvlist(nvl, pool_name, spa->spa_config);
                        mutex_exit(&spa->spa_props_lock);
                }
 
@@ -311,7 +327,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
                 */
                if (target->spa_ccw_fail_time == 0) {
                        zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
-                           target, NULL, NULL, 0, 0);
+                           target, NULL, NULL, NULL, 0, 0);
                }
                target->spa_ccw_fail_time = gethrtime();
                spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
@@ -337,7 +353,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
        spa_config_generation++;
 
        if (postsysevent)
-               spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC);
+               spa_event_notify(target, NULL, NULL, ESC_ZFS_CONFIG_SYNC);
 }
 
 /*
@@ -355,15 +371,15 @@ spa_all_configs(uint64_t *generation)
        if (*generation == spa_config_generation)
                return (NULL);
 
-       VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+       pools = fnvlist_alloc();
 
        mutex_enter(&spa_namespace_lock);
        while ((spa = spa_next(spa)) != NULL) {
                if (INGLOBALZONE(curproc) ||
                    zone_dataset_visible(spa_name(spa), NULL)) {
                        mutex_enter(&spa->spa_props_lock);
-                       VERIFY(nvlist_add_nvlist(pools, spa_name(spa),
-                           spa->spa_config) == 0);
+                       fnvlist_add_nvlist(pools, spa_name(spa),
+                           spa->spa_config);
                        mutex_exit(&spa->spa_props_lock);
                }
        }
@@ -377,8 +393,7 @@ void
 spa_config_set(spa_t *spa, nvlist_t *config)
 {
        mutex_enter(&spa->spa_props_lock);
-       if (spa->spa_config != NULL)
-               nvlist_free(spa->spa_config);
+       nvlist_free(spa->spa_config);
        spa->spa_config = config;
        mutex_exit(&spa->spa_props_lock);
 }
@@ -426,55 +441,39 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
         */
        if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) {
                VERIFY0(nvlist_lookup_string(spa->spa_config,
-                       ZPOOL_CONFIG_POOL_NAME, &pool_name));
+                   ZPOOL_CONFIG_POOL_NAME, &pool_name));
        } else
                pool_name = spa_name(spa);
 
-       VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
-       VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
-           spa_version(spa)) == 0);
-       VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
-           pool_name) == 0);
-       VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
-           spa_state(spa)) == 0);
-       VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
-           txg) == 0);
-       VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
-           spa_guid(spa)) == 0);
-       VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA,
-           spa->spa_errata) == 0);
-       VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
-           ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);
-
-
-#ifdef _KERNEL
-       hostid = zone_get_hostid(NULL);
-#else  /* _KERNEL */
-       /*
-        * We're emulating the system's hostid in userland, so we can't use
-        * zone_get_hostid().
-        */
-       (void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
-#endif /* _KERNEL */
-       if (hostid != 0) {
-               VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
-                   hostid) == 0);
-       }
-       VERIFY0(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
-           utsname()->nodename));
+       config = fnvlist_alloc();
 
+       fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa));
+       fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, pool_name);
+       fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa));
+       fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg);
+       fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa));
+       fnvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, spa->spa_errata);
+       if (spa->spa_comment != NULL)
+               fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT,
+                   spa->spa_comment);
+
+       hostid = spa_get_hostid();
+       if (hostid != 0)
+               fnvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid);
+       fnvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname()->nodename);
+
+       int config_gen_flags = 0;
        if (vd != rvd) {
-               VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
-                   vd->vdev_top->vdev_guid) == 0);
-               VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
-                   vd->vdev_guid) == 0);
+               fnvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
+                   vd->vdev_top->vdev_guid);
+               fnvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
+                   vd->vdev_guid);
                if (vd->vdev_isspare)
-                       VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
-                           1ULL) == 0);
+                       fnvlist_add_uint64(config,
+                           ZPOOL_CONFIG_IS_SPARE, 1ULL);
                if (vd->vdev_islog)
-                       VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
-                           1ULL) == 0);
+                       fnvlist_add_uint64(config,
+                           ZPOOL_CONFIG_IS_LOG, 1ULL);
                vd = vd->vdev_top;              /* label contains top config */
        } else {
                /*
@@ -482,8 +481,12 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
                 * in the mos config, and not in the vdev labels
                 */
                if (spa->spa_config_splitting != NULL)
-                       VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
-                           spa->spa_config_splitting) == 0);
+                       fnvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
+                           spa->spa_config_splitting);
+
+               fnvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS);
+
+               config_gen_flags |= VDEV_CONFIG_MOS;
        }
 
        /*
@@ -498,19 +501,18 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
        if (spa->spa_config_splitting != NULL &&
            nvlist_lookup_uint64(spa->spa_config_splitting,
            ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
-               VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
-                   split_guid) == 0);
+               fnvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID, split_guid);
        }
 
-       nvroot = vdev_config_generate(spa, vd, getstats, 0);
-       VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
+       nvroot = vdev_config_generate(spa, vd, getstats, config_gen_flags);
+       fnvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot);
        nvlist_free(nvroot);
 
        /*
         * Store what's necessary for reading the MOS in the label.
         */
-       VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
-           spa->spa_label_features) == 0);
+       fnvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
+           spa->spa_label_features);
 
        if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
                ddt_histogram_t *ddh;
@@ -519,23 +521,23 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
 
                ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
                ddt_get_dedup_histogram(spa, ddh);
-               VERIFY(nvlist_add_uint64_array(config,
+               fnvlist_add_uint64_array(config,
                    ZPOOL_CONFIG_DDT_HISTOGRAM,
-                   (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
+                   (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t));
                kmem_free(ddh, sizeof (ddt_histogram_t));
 
                ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP);
                ddt_get_dedup_object_stats(spa, ddo);
-               VERIFY(nvlist_add_uint64_array(config,
+               fnvlist_add_uint64_array(config,
                    ZPOOL_CONFIG_DDT_OBJ_STATS,
-                   (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
+                   (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t));
                kmem_free(ddo, sizeof (ddt_object_t));
 
                dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP);
                ddt_get_dedup_stats(spa, dds);
-               VERIFY(nvlist_add_uint64_array(config,
+               fnvlist_add_uint64_array(config,
                    ZPOOL_CONFIG_DDT_STATS,
-                   (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
+                   (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t));
                kmem_free(dds, sizeof (ddt_stat_t));
        }
 
@@ -588,15 +590,16 @@ spa_config_update(spa_t *spa, int what)
        /*
         * Update the global config cache to reflect the new mosconfig.
         */
-       if (!spa->spa_is_root)
-               spa_config_sync(spa, B_FALSE, what != SPA_CONFIG_UPDATE_POOL);
+       if (!spa->spa_is_root) {
+               spa_write_cachefile(spa, B_FALSE,
+                   what != SPA_CONFIG_UPDATE_POOL);
+       }
 
        if (what == SPA_CONFIG_UPDATE_POOL)
                spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
-EXPORT_SYMBOL(spa_config_sync);
 EXPORT_SYMBOL(spa_config_load);
 EXPORT_SYMBOL(spa_all_configs);
 EXPORT_SYMBOL(spa_config_set);