]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/spa.c
OpenZFS 9102 - zfs should be able to initialize storage devices
[mirror_zfs.git] / module / zfs / spa.c
index f0683b0b84612521a6467765dcf64322b832c8f7..622be75f9454995d19b1b56ce0bf15628b051872 100644 (file)
@@ -56,6 +56,7 @@
 #include <sys/vdev_removal.h>
 #include <sys/vdev_indirect_mapping.h>
 #include <sys/vdev_indirect_births.h>
+#include <sys/vdev_initialize.h>
 #include <sys/vdev_disk.h>
 #include <sys/metaslab.h>
 #include <sys/metaslab_impl.h>
@@ -434,8 +435,9 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
 
                                dp = spa_get_dsl(spa);
                                dsl_pool_config_enter(dp, FTAG);
-                               if ((err = dsl_dataset_hold_obj(dp,
-                                   za.za_first_integer, FTAG, &ds))) {
+                               err = dsl_dataset_hold_obj(dp,
+                                   za.za_first_integer, FTAG, &ds);
+                               if (err != 0) {
                                        dsl_pool_config_exit(dp, FTAG);
                                        break;
                                }
@@ -601,7 +603,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
                                }
 
                                error = dmu_objset_hold(strval, FTAG, &os);
-                               if (error)
+                               if (error != 0)
                                        break;
 
                                /*
@@ -1218,8 +1220,10 @@ spa_activate(spa_t *spa, int mode)
                spa_create_zio_taskqs(spa);
        }
 
-       for (size_t i = 0; i < TXG_SIZE; i++)
-               spa->spa_txg_zio[i] = zio_root(spa, NULL, NULL, 0);
+       for (size_t i = 0; i < TXG_SIZE; i++) {
+               spa->spa_txg_zio[i] = zio_root(spa, NULL, NULL,
+                   ZIO_FLAG_CANFAIL);
+       }
 
        list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
            offsetof(vdev_t, vdev_config_dirty_node));
@@ -1437,6 +1441,11 @@ spa_unload(spa_t *spa)
         */
        spa_async_suspend(spa);
 
+       if (spa->spa_root_vdev) {
+               vdev_initialize_stop_all(spa->spa_root_vdev,
+                   VDEV_INITIALIZE_ACTIVE);
+       }
+
        /*
         * Stop syncing.
         */
@@ -1452,10 +1461,10 @@ spa_unload(spa_t *spa)
         * calling taskq_wait(mg_taskq).
         */
        if (spa->spa_root_vdev != NULL) {
-               spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+               spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
                for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++)
                        vdev_metaslab_fini(spa->spa_root_vdev->vdev_child[c]);
-               spa_config_exit(spa, SCL_ALL, FTAG);
+               spa_config_exit(spa, SCL_ALL, spa);
        }
 
        if (spa->spa_mmp.mmp_thread)
@@ -1492,7 +1501,7 @@ spa_unload(spa_t *spa)
 
        bpobj_close(&spa->spa_deferred_bpobj);
 
-       spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+       spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
 
        /*
         * Close all vdevs.
@@ -1554,7 +1563,7 @@ spa_unload(spa_t *spa)
                spa->spa_comment = NULL;
        }
 
-       spa_config_exit(spa, SCL_ALL, FTAG);
+       spa_config_exit(spa, SCL_ALL, spa);
 }
 
 /*
@@ -4246,6 +4255,9 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
                 */
                dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
 
+               spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+               vdev_initialize_restart(spa->spa_root_vdev);
+               spa_config_exit(spa, SCL_CONFIG, FTAG);
        }
 
        spa_load_note(spa, "LOADED");
@@ -5653,6 +5665,18 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
                        return (SET_ERROR(EXDEV));
                }
 
+               /*
+                * We're about to export or destroy this pool. Make sure
+                * we stop all initializtion activity here before we
+                * set the spa_final_txg. This will ensure that all
+                * dirty data resulting from the initialization is
+                * committed to disk before we unload the pool.
+                */
+               if (spa->spa_root_vdev != NULL) {
+                       vdev_initialize_stop_all(spa->spa_root_vdev,
+                           VDEV_INITIALIZE_ACTIVE);
+               }
+
                /*
                 * We want this to be reflected on every label,
                 * so mark them all dirty.  spa_unload() will do the
@@ -6357,6 +6381,86 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
        return (error);
 }
 
+int
+spa_vdev_initialize(spa_t *spa, uint64_t guid, uint64_t cmd_type)
+{
+       /*
+        * We hold the namespace lock through the whole function
+        * to prevent any changes to the pool while we're starting or
+        * stopping initialization. The config and state locks are held so that
+        * we can properly assess the vdev state before we commit to
+        * the initializing operation.
+        */
+       mutex_enter(&spa_namespace_lock);
+       spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
+
+       /* Look up vdev and ensure it's a leaf. */
+       vdev_t *vd = spa_lookup_by_guid(spa, guid, B_FALSE);
+       if (vd == NULL || vd->vdev_detached) {
+               spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+               mutex_exit(&spa_namespace_lock);
+               return (SET_ERROR(ENODEV));
+       } else if (!vd->vdev_ops->vdev_op_leaf || !vdev_is_concrete(vd)) {
+               spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+               mutex_exit(&spa_namespace_lock);
+               return (SET_ERROR(EINVAL));
+       } else if (!vdev_writeable(vd)) {
+               spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+               mutex_exit(&spa_namespace_lock);
+               return (SET_ERROR(EROFS));
+       }
+       mutex_enter(&vd->vdev_initialize_lock);
+       spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+
+       /*
+        * When we activate an initialize action we check to see
+        * if the vdev_initialize_thread is NULL. We do this instead
+        * of using the vdev_initialize_state since there might be
+        * a previous initialization process which has completed but
+        * the thread is not exited.
+        */
+       if (cmd_type == POOL_INITIALIZE_DO &&
+           (vd->vdev_initialize_thread != NULL ||
+           vd->vdev_top->vdev_removing)) {
+               mutex_exit(&vd->vdev_initialize_lock);
+               mutex_exit(&spa_namespace_lock);
+               return (SET_ERROR(EBUSY));
+       } else if (cmd_type == POOL_INITIALIZE_CANCEL &&
+           (vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE &&
+           vd->vdev_initialize_state != VDEV_INITIALIZE_SUSPENDED)) {
+               mutex_exit(&vd->vdev_initialize_lock);
+               mutex_exit(&spa_namespace_lock);
+               return (SET_ERROR(ESRCH));
+       } else if (cmd_type == POOL_INITIALIZE_SUSPEND &&
+           vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE) {
+               mutex_exit(&vd->vdev_initialize_lock);
+               mutex_exit(&spa_namespace_lock);
+               return (SET_ERROR(ESRCH));
+       }
+
+       switch (cmd_type) {
+       case POOL_INITIALIZE_DO:
+               vdev_initialize(vd);
+               break;
+       case POOL_INITIALIZE_CANCEL:
+               vdev_initialize_stop(vd, VDEV_INITIALIZE_CANCELED);
+               break;
+       case POOL_INITIALIZE_SUSPEND:
+               vdev_initialize_stop(vd, VDEV_INITIALIZE_SUSPENDED);
+               break;
+       default:
+               panic("invalid cmd_type %llu", (unsigned long long)cmd_type);
+       }
+       mutex_exit(&vd->vdev_initialize_lock);
+
+       /* Sync out the initializing state */
+       txg_wait_synced(spa->spa_dsl_pool, 0);
+       mutex_exit(&spa_namespace_lock);
+
+       return (0);
+}
+
+
 /*
  * Split a set of devices from their mirrors, and create a new pool from them.
  */
@@ -6565,6 +6669,19 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
        spa_activate(newspa, spa_mode_global);
        spa_async_suspend(newspa);
 
+       for (c = 0; c < children; c++) {
+               if (vml[c] != NULL) {
+                       /*
+                        * Temporarily stop the initializing activity. We set
+                        * the state to ACTIVE so that we know to resume
+                        * the initializing once the split has completed.
+                        */
+                       mutex_enter(&vml[c]->vdev_initialize_lock);
+                       vdev_initialize_stop(vml[c], VDEV_INITIALIZE_ACTIVE);
+                       mutex_exit(&vml[c]->vdev_initialize_lock);
+               }
+       }
+
        newspa->spa_config_source = SPA_CONFIG_SRC_SPLIT;
 
        /* create the new pool from the disks of the original pool */
@@ -6652,6 +6769,10 @@ out:
                if (vml[c] != NULL)
                        vml[c]->vdev_offline = B_FALSE;
        }
+
+       /* restart initializing disks as necessary */
+       spa_async_request(spa, SPA_ASYNC_INITIALIZE_RESTART);
+
        vdev_reopen(spa->spa_root_vdev);
 
        nvlist_free(spa->spa_config_splitting);
@@ -7025,6 +7146,14 @@ spa_async_thread(void *arg)
            !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
                dsl_resilver_restart(dp, 0);
 
+       if (tasks & SPA_ASYNC_INITIALIZE_RESTART) {
+               mutex_enter(&spa_namespace_lock);
+               spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+               vdev_initialize_restart(spa->spa_root_vdev);
+               spa_config_exit(spa, SCL_CONFIG, FTAG);
+               mutex_exit(&spa_namespace_lock);
+       }
+
        /*
         * Let the world know that we're done.
         */
@@ -7677,8 +7806,9 @@ spa_sync(spa_t *spa, uint64_t txg)
         * Wait for i/os issued in open context that need to complete
         * before this txg syncs.
         */
-       VERIFY0(zio_wait(spa->spa_txg_zio[txg & TXG_MASK]));
-       spa->spa_txg_zio[txg & TXG_MASK] = zio_root(spa, NULL, NULL, 0);
+       (void) zio_wait(spa->spa_txg_zio[txg & TXG_MASK]);
+       spa->spa_txg_zio[txg & TXG_MASK] = zio_root(spa, NULL, NULL,
+           ZIO_FLAG_CANFAIL);
 
        /*
         * Lock out configuration changes.
@@ -7983,7 +8113,8 @@ spa_sync(spa_t *spa, uint64_t txg)
        /*
         * Update usable space statistics.
         */
-       while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))))
+       while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
+           != NULL)
                vdev_sync_done(vd, txg);
 
        spa_update_dspace(spa);