+ nvlist_free(mos_config);
+
+ spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
+
+ error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object,
+ B_FALSE);
+ if (error && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+ if (error == 0) {
+ uint64_t autoreplace;
+
+ spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs);
+ spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace);
+ spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
+ spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
+ spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
+ spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost);
+ spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
+ &spa->spa_dedup_ditto);
+
+ spa->spa_autoreplace = (autoreplace != 0);
+ }
+
+ /*
+ * If we are importing a pool with missing top-level vdevs,
+ * we enforce that the pool doesn't panic or get suspended on
+ * error since the likelihood of missing data is extremely high.
+ */
+ if (spa->spa_missing_tvds > 0 &&
+ spa->spa_failmode != ZIO_FAILURE_MODE_CONTINUE &&
+ spa->spa_load_state != SPA_LOAD_TRYIMPORT) {
+ spa_load_note(spa, "forcing failmode to 'continue' "
+ "as some top level vdevs are missing");
+ spa->spa_failmode = ZIO_FAILURE_MODE_CONTINUE;
+ }
+
+ return (0);
+}
+
+static int
+spa_ld_open_aux_vdevs(spa_t *spa, spa_import_type_t type)
+{
+ int error = 0;
+ vdev_t *rvd = spa->spa_root_vdev;
+
+ /*
+ * If we're assembling the pool from the split-off vdevs of
+ * an existing pool, we don't want to attach the spares & cache
+ * devices.
+ */
+
+ /*
+ * Load any hot spares for this pool.
+ */
+ error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object,
+ B_FALSE);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
+ ASSERT(spa_version(spa) >= SPA_VERSION_SPARES);
+ if (load_nvlist(spa, spa->spa_spares.sav_object,
+ &spa->spa_spares.sav_config) != 0) {
+ spa_load_failed(spa, "error loading spares nvlist");
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ }
+
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ spa_load_spares(spa);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+ } else if (error == 0) {
+ spa->spa_spares.sav_sync = B_TRUE;
+ }
+
+ /*
+ * Load any level 2 ARC devices for this pool.
+ */
+ error = spa_dir_prop(spa, DMU_POOL_L2CACHE,
+ &spa->spa_l2cache.sav_object, B_FALSE);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
+ ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE);
+ if (load_nvlist(spa, spa->spa_l2cache.sav_object,
+ &spa->spa_l2cache.sav_config) != 0) {
+ spa_load_failed(spa, "error loading l2cache nvlist");
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ }
+
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ spa_load_l2cache(spa);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+ } else if (error == 0) {
+ spa->spa_l2cache.sav_sync = B_TRUE;
+ }
+
+ return (0);
+}
+
+static int
+spa_ld_load_vdev_metadata(spa_t *spa)
+{
+ int error = 0;
+ vdev_t *rvd = spa->spa_root_vdev;
+
+ /*
+ * If the 'multihost' property is set, then never allow a pool to
+ * be imported when the system hostid is zero. The exception to
+ * this rule is zdb which is always allowed to access pools.
+ */
+ if (spa_multihost(spa) && spa_get_hostid() == 0 &&
+ (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) == 0) {
+ fnvlist_add_uint64(spa->spa_load_info,
+ ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID);
+ return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO));
+ }
+
+ /*
+ * If the 'autoreplace' property is set, then post a resource notifying
+ * the ZFS DE that it should not issue any faults for unopenable
+ * devices. We also iterate over the vdevs, and post a sysevent for any
+ * unopenable vdevs so that the normal autoreplace handler can take
+ * over.
+ */
+ if (spa->spa_autoreplace && spa->spa_load_state != SPA_LOAD_TRYIMPORT) {
+ spa_check_removed(spa->spa_root_vdev);
+ /*
+ * For the import case, this is done in spa_import(), because
+ * at this point we're using the spare definitions from
+ * the MOS config, not necessarily from the userland config.
+ */
+ if (spa->spa_load_state != SPA_LOAD_IMPORT) {
+ spa_aux_check_removed(&spa->spa_spares);
+ spa_aux_check_removed(&spa->spa_l2cache);
+ }
+ }
+
+ /*
+ * Load the vdev metadata such as metaslabs, DTLs, spacemap object, etc.
+ */
+ error = vdev_load(rvd);
+ if (error != 0) {
+ spa_load_failed(spa, "vdev_load failed [error=%d]", error);
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error));
+ }
+
+ /*
+ * Propagate the leaf DTLs we just loaded all the way up the vdev tree.
+ */
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+
+ return (0);
+}
+
+static int
+spa_ld_load_dedup_tables(spa_t *spa)
+{
+ int error = 0;
+ vdev_t *rvd = spa->spa_root_vdev;
+
+ error = ddt_load(spa);
+ if (error != 0) {
+ spa_load_failed(spa, "ddt_load failed [error=%d]", error);
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ }
+
+ return (0);
+}
+
+static int
+spa_ld_verify_logs(spa_t *spa, spa_import_type_t type, char **ereport)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+
+ if (type != SPA_IMPORT_ASSEMBLE && spa_writeable(spa)) {
+ boolean_t missing = spa_check_logs(spa);
+ if (missing) {
+ if (spa->spa_missing_tvds != 0) {
+ spa_load_note(spa, "spa_check_logs failed "
+ "so dropping the logs");
+ } else {
+ *ereport = FM_EREPORT_ZFS_LOG_REPLAY;
+ spa_load_failed(spa, "spa_check_logs failed");
+ return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG,
+ ENXIO));
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+spa_ld_verify_pool_data(spa_t *spa)
+{
+ int error = 0;
+ vdev_t *rvd = spa->spa_root_vdev;
+
+ /*
+ * We've successfully opened the pool, verify that we're ready
+ * to start pushing transactions.
+ */
+ if (spa->spa_load_state != SPA_LOAD_TRYIMPORT) {
+ error = spa_load_verify(spa);
+ if (error != 0) {
+ spa_load_failed(spa, "spa_load_verify failed "
+ "[error=%d]", error);
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
+ error));
+ }
+ }
+
+ return (0);
+}
+
+static void
+spa_ld_claim_log_blocks(spa_t *spa)
+{
+ dmu_tx_t *tx;
+ dsl_pool_t *dp = spa_get_dsl(spa);
+
+ /*
+ * Claim log blocks that haven't been committed yet.
+ * This must all happen in a single txg.
+ * Note: spa_claim_max_txg is updated by spa_claim_notify(),
+ * invoked from zil_claim_log_block()'s i/o done callback.
+ * Price of rollback is that we abandon the log.
+ */
+ spa->spa_claiming = B_TRUE;
+
+ tx = dmu_tx_create_assigned(dp, spa_first_txg(spa));
+ (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
+ zil_claim, tx, DS_FIND_CHILDREN);
+ dmu_tx_commit(tx);
+
+ spa->spa_claiming = B_FALSE;
+
+ spa_set_log_state(spa, SPA_LOG_GOOD);
+}
+
+static void
+spa_ld_check_for_config_update(spa_t *spa, uint64_t config_cache_txg,
+ boolean_t update_config_cache)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+ int need_update = B_FALSE;
+
+ /*
+ * If the config cache is stale, or we have uninitialized
+ * metaslabs (see spa_vdev_add()), then update the config.
+ *
+ * If this is a verbatim import, trust the current
+ * in-core spa_config and update the disk labels.
+ */
+ if (update_config_cache || config_cache_txg != spa->spa_config_txg ||
+ spa->spa_load_state == SPA_LOAD_IMPORT ||
+ spa->spa_load_state == SPA_LOAD_RECOVER ||
+ (spa->spa_import_flags & ZFS_IMPORT_VERBATIM))
+ need_update = B_TRUE;
+
+ for (int c = 0; c < rvd->vdev_children; c++)
+ if (rvd->vdev_child[c]->vdev_ms_array == 0)
+ need_update = B_TRUE;
+
+ /*
+ * Update the config cache asychronously in case we're the
+ * root pool, in which case the config cache isn't writable yet.
+ */
+ if (need_update)
+ spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
+}
+
+static void
+spa_ld_prepare_for_reload(spa_t *spa)
+{
+ int mode = spa->spa_mode;
+ int async_suspended = spa->spa_async_suspended;
+
+ spa_unload(spa);
+ spa_deactivate(spa);
+ spa_activate(spa, mode);
+
+ /*
+ * We save the value of spa_async_suspended as it gets reset to 0 by
+ * spa_unload(). We want to restore it back to the original value before
+ * returning as we might be calling spa_async_resume() later.
+ */
+ spa->spa_async_suspended = async_suspended;
+}
+
+static int
+spa_ld_read_checkpoint_txg(spa_t *spa)
+{
+ uberblock_t checkpoint;
+ int error = 0;
+
+ ASSERT0(spa->spa_checkpoint_txg);
+ ASSERT(MUTEX_HELD(&spa_namespace_lock));
+
+ error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
+ sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint);
+
+ if (error == ENOENT)
+ return (0);
+
+ if (error != 0)
+ return (error);
+
+ ASSERT3U(checkpoint.ub_txg, !=, 0);
+ ASSERT3U(checkpoint.ub_checkpoint_txg, !=, 0);
+ ASSERT3U(checkpoint.ub_timestamp, !=, 0);
+ spa->spa_checkpoint_txg = checkpoint.ub_txg;
+ spa->spa_checkpoint_info.sci_timestamp = checkpoint.ub_timestamp;
+
+ return (0);
+}
+
+static int
+spa_ld_mos_init(spa_t *spa, spa_import_type_t type)
+{
+ int error = 0;
+
+ ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE);
+
+ /*
+ * Never trust the config that is provided unless we are assembling
+ * a pool following a split.
+ * This means don't trust blkptrs and the vdev tree in general. This
+ * also effectively puts the spa in read-only mode since
+ * spa_writeable() checks for spa_trust_config to be true.
+ * We will later load a trusted config from the MOS.
+ */
+ if (type != SPA_IMPORT_ASSEMBLE)
+ spa->spa_trust_config = B_FALSE;
+
+ /*
+ * Parse the config provided to create a vdev tree.
+ */
+ error = spa_ld_parse_config(spa, type);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Now that we have the vdev tree, try to open each vdev. This involves
+ * opening the underlying physical device, retrieving its geometry and
+ * probing the vdev with a dummy I/O. The state of each vdev will be set
+ * based on the success of those operations. After this we'll be ready
+ * to read from the vdevs.
+ */
+ error = spa_ld_open_vdevs(spa);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Read the label of each vdev and make sure that the GUIDs stored
+ * there match the GUIDs in the config provided.
+ * If we're assembling a new pool that's been split off from an
+ * existing pool, the labels haven't yet been updated so we skip
+ * validation for now.
+ */
+ if (type != SPA_IMPORT_ASSEMBLE) {
+ error = spa_ld_validate_vdevs(spa);
+ if (error != 0)
+ return (error);
+ }
+
+ /*
+ * Read all vdev labels to find the best uberblock (i.e. latest,
+ * unless spa_load_max_txg is set) and store it in spa_uberblock. We
+ * get the list of features required to read blkptrs in the MOS from
+ * the vdev label with the best uberblock and verify that our version
+ * of zfs supports them all.
+ */
+ error = spa_ld_select_uberblock(spa, type);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Pass that uberblock to the dsl_pool layer which will open the root
+ * blkptr. This blkptr points to the latest version of the MOS and will
+ * allow us to read its contents.
+ */
+ error = spa_ld_open_rootbp(spa);
+ if (error != 0)
+ return (error);
+
+ return (0);
+}
+
+static int
+spa_ld_checkpoint_rewind(spa_t *spa)
+{
+ uberblock_t checkpoint;
+ int error = 0;
+
+ ASSERT(MUTEX_HELD(&spa_namespace_lock));
+ ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT);
+
+ error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
+ sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint);
+
+ if (error != 0) {
+ spa_load_failed(spa, "unable to retrieve checkpointed "
+ "uberblock from the MOS config [error=%d]", error);
+
+ if (error == ENOENT)
+ error = ZFS_ERR_NO_CHECKPOINT;
+
+ return (error);
+ }
+
+ ASSERT3U(checkpoint.ub_txg, <, spa->spa_uberblock.ub_txg);
+ ASSERT3U(checkpoint.ub_txg, ==, checkpoint.ub_checkpoint_txg);