*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
*/
#include <sys/dsl_scan.h>
typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
static scan_cb_t dsl_scan_scrub_cb;
-static dsl_syncfunc_t dsl_scan_cancel_sync;
+static void dsl_scan_cancel_sync(void *, dmu_tx_t *);
static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */
scn = dp->dp_scan = kmem_zalloc(sizeof (dsl_scan_t), KM_SLEEP);
scn->scn_dp = dp;
+ /*
+ * It's possible that we're resuming a scan after a reboot so
+ * make sure that the scan_async_destroying flag is initialized
+ * appropriately.
+ */
+ ASSERT(!scn->scn_async_destroying);
+ scn->scn_async_destroying = spa_feature_is_active(dp->dp_spa,
+ &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]);
+
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
"scrub_func", sizeof (uint64_t), 1, &f);
if (err == 0) {
/* ARGSUSED */
static int
-dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_scan_setup_check(void *arg, dmu_tx_t *tx)
{
- dsl_scan_t *scn = arg1;
+ dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
if (scn->scn_phys.scn_state == DSS_SCANNING)
- return (EBUSY);
+ return (SET_ERROR(EBUSY));
return (0);
}
-/* ARGSUSED */
static void
-dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
{
- dsl_scan_t *scn = arg1;
- pool_scan_func_t *funcp = arg2;
+ dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
+ pool_scan_func_t *funcp = arg;
dmu_object_type_t ot = 0;
dsl_pool_t *dp = scn->scn_dp;
spa_t *spa = dp->dp_spa;
scn->scn_phys.scn_errors = 0;
scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc;
scn->scn_restart_txg = 0;
+ scn->scn_done_txg = 0;
spa_scan_stat_init(spa);
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
if (vdev_resilver_needed(spa->spa_root_vdev,
&scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
- spa_event_notify(spa, NULL, FM_EREPORT_ZFS_RESILVER_START);
+ spa_event_notify(spa, NULL,
+ FM_EREPORT_ZFS_RESILVER_START);
} else {
- spa_event_notify(spa, NULL, FM_EREPORT_ZFS_SCRUB_START);
+ spa_event_notify(spa, NULL,
+ FM_EREPORT_ZFS_SCRUB_START);
}
spa->spa_scrub_started = B_TRUE;
/* ARGSUSED */
static int
-dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_scan_cancel_check(void *arg, dmu_tx_t *tx)
{
- dsl_scan_t *scn = arg1;
+ dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
if (scn->scn_phys.scn_state != DSS_SCANNING)
- return (ENOENT);
+ return (SET_ERROR(ENOENT));
return (0);
}
/* ARGSUSED */
static void
-dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_scan_cancel_sync(void *arg, dmu_tx_t *tx)
{
- dsl_scan_t *scn = arg1;
+ dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
dsl_scan_done(scn, B_FALSE, tx);
dsl_scan_sync_state(scn, tx);
int
dsl_scan_cancel(dsl_pool_t *dp)
{
- boolean_t complete = B_FALSE;
- int err;
-
- err = dsl_sync_task_do(dp, dsl_scan_cancel_check,
- dsl_scan_cancel_sync, dp->dp_scan, &complete, 3);
- return (err);
+ return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scan_cancel_check,
+ dsl_scan_cancel_sync, NULL, 3));
}
static void dsl_scan_visitbp(blkptr_t *bp,
static void
dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
{
- VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset,
+ VERIFY0(zap_update(scn->scn_dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
&scn->scn_phys, tx));
zfs_resilver_min_time_ms : zfs_scan_min_time_ms;
elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
- (elapsed_nanosecs / MICROSEC > mintime &&
+ (NSEC2MSEC(elapsed_nanosecs) > mintime &&
txg_sync_waiting(scn->scn_dp)) ||
spa_shutting_down(scn->scn_dp->dp_spa)) {
if (zb) {
* Don't scan it now unless we need to because something
* under it was modified.
*/
- if (bp->blk_birth <= scn->scn_phys.scn_cur_max_txg) {
+ if (BP_PHYSICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_max_txg) {
scan_funcs[scn->scn_phys.scn_func](dp, bp, zb);
}
if (buf)
(void) arc_buf_remove_ref(buf, &buf);
out:
- kmem_free(bp_toread, sizeof(blkptr_t));
+ kmem_free(bp_toread, sizeof (blkptr_t));
}
static void
/* ARGSUSED */
static int
-enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
+enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
struct enqueue_clones_arg *eca = arg;
dsl_dataset_t *ds;
int err;
- dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_scan_t *scn = dp->dp_scan;
- err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+ if (hds->ds_dir->dd_phys->dd_origin_obj != eca->originobj)
+ return (0);
+
+ err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
- if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) {
- while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
- dsl_dataset_t *prev;
- err = dsl_dataset_hold_obj(dp,
- ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
+ while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
+ dsl_dataset_t *prev;
+ err = dsl_dataset_hold_obj(dp,
+ ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
- dsl_dataset_rele(ds, FTAG);
- if (err)
- return (err);
- ds = prev;
- }
- VERIFY(zap_add_int_key(dp->dp_meta_objset,
- scn->scn_phys.scn_queue_obj, ds->ds_object,
- ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
+ dsl_dataset_rele(ds, FTAG);
+ if (err)
+ return (err);
+ ds = prev;
}
+ VERIFY(zap_add_int_key(dp->dp_meta_objset,
+ scn->scn_phys.scn_queue_obj, ds->ds_object,
+ ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
dsl_dataset_rele(ds, FTAG);
return (0);
}
}
if (usenext) {
- VERIFY(zap_join_key(dp->dp_meta_objset,
+ VERIFY0(zap_join_key(dp->dp_meta_objset,
ds->ds_phys->ds_next_clones_obj,
scn->scn_phys.scn_queue_obj,
- ds->ds_phys->ds_creation_txg, tx) == 0);
+ ds->ds_phys->ds_creation_txg, tx));
} else {
struct enqueue_clones_arg eca;
eca.tx = tx;
eca.originobj = ds->ds_object;
- (void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa,
- NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN);
+ VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
+ enqueue_clones_cb, &eca, DS_FIND_CHILDREN));
}
}
/* ARGSUSED */
static int
-enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
+enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds;
int err;
- dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_scan_t *scn = dp->dp_scan;
- err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
+ err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
if (ddp->ddp_phys_birth == 0 ||
- ddp->ddp_phys_birth > scn->scn_phys.scn_cur_max_txg)
+ ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg)
continue;
ddt_bp_create(checksum, ddk, ddp, &bp);
return;
if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
- VERIFY(0 == dmu_objset_find_spa(dp->dp_spa,
- NULL, enqueue_cb, tx, DS_FIND_CHILDREN));
+ VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
+ enqueue_cb, tx, DS_FIND_CHILDREN));
} else {
dsl_scan_visitds(scn,
dp->dp_origin_snap->ds_object, tx);
* bookmark so we don't think that we're still trying to resume.
*/
bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_t));
- zc = kmem_alloc(sizeof(zap_cursor_t), KM_PUSHPAGE);
- za = kmem_alloc(sizeof(zap_attribute_t), KM_PUSHPAGE);
+ zc = kmem_alloc(sizeof (zap_cursor_t), KM_PUSHPAGE);
+ za = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE);
/* keep pulling things out of the zap-object-as-queue */
while (zap_cursor_init(zc, dp->dp_meta_objset,
}
zap_cursor_fini(zc);
out:
- kmem_free(za, sizeof(zap_attribute_t));
- kmem_free(zc, sizeof(zap_cursor_t));
+ kmem_free(za, sizeof (zap_attribute_t));
+ kmem_free(zc, sizeof (zap_cursor_t));
}
static boolean_t
{
uint64_t elapsed_nanosecs;
+ if (zfs_recover)
+ return (B_FALSE);
+
elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
- (elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms &&
+ (NSEC2MSEC(elapsed_nanosecs) > zfs_free_min_time_ms &&
txg_sync_waiting(scn->scn_dp)) ||
spa_shutting_down(scn->scn_dp->dp_spa));
}
if (!scn->scn_is_bptree ||
(BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
if (dsl_scan_free_should_pause(scn))
- return (ERESTART);
+ return (SET_ERROR(ERESTART));
}
zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
if (spa_shutting_down(spa))
return (B_FALSE);
- if (scn->scn_phys.scn_state == DSS_SCANNING)
+ if (scn->scn_phys.scn_state == DSS_SCANNING ||
+ scn->scn_async_destroying)
return (B_TRUE);
- if (spa_feature_is_active(spa,
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
- return (B_TRUE);
- }
if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
(void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
&used, &comp, &uncomp);
func = POOL_SCAN_RESILVER;
zfs_dbgmsg("restarting scan func=%u txg=%llu",
func, tx->tx_txg);
- dsl_scan_setup_sync(scn, &func, tx);
+ dsl_scan_setup_sync(&func, tx);
}
if (!dsl_scan_active(scn) ||
if (err == 0 && spa_feature_is_active(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+ ASSERT(scn->scn_async_destroying);
scn->scn_is_bptree = B_TRUE;
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
NULL, ZIO_FLAG_MUSTSUCCEED);
err = bptree_iterate(dp->dp_meta_objset,
dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
scn, tx);
- VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
- if (err != 0)
- return;
-
- /* disable async destroy feature */
- spa_feature_decr(spa,
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx);
- ASSERT(!spa_feature_is_active(spa,
- &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]));
- VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_BPTREE_OBJ, tx));
- VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset,
- dp->dp_bptree_obj, tx));
- dp->dp_bptree_obj = 0;
+ VERIFY0(zio_wait(scn->scn_zio_root));
+
+ if (err == 0) {
+ zfeature_info_t *feat = &spa_feature_table
+ [SPA_FEATURE_ASYNC_DESTROY];
+ /* finished; deactivate async destroy feature */
+ spa_feature_decr(spa, feat, tx);
+ ASSERT(!spa_feature_is_active(spa, feat));
+ VERIFY0(zap_remove(dp->dp_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_BPTREE_OBJ, tx));
+ VERIFY0(bptree_free(dp->dp_meta_objset,
+ dp->dp_bptree_obj, tx));
+ dp->dp_bptree_obj = 0;
+ scn->scn_async_destroying = B_FALSE;
+ }
}
if (scn->scn_visited_this_txg) {
zfs_dbgmsg("freed %llu blocks in %llums from "
"free_bpobj/bptree txg %llu",
(longlong_t)scn->scn_visited_this_txg,
(longlong_t)
- (gethrtime() - scn->scn_sync_start_time) / MICROSEC,
+ NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
(longlong_t)tx->tx_txg);
scn->scn_visited_this_txg = 0;
/*
if (scn->scn_phys.scn_state != DSS_SCANNING)
return;
+ if (scn->scn_done_txg == tx->tx_txg) {
+ ASSERT(!scn->scn_pausing);
+ /* finished with scan. */
+ zfs_dbgmsg("txg %llu scan complete", tx->tx_txg);
+ dsl_scan_done(scn, B_TRUE, tx);
+ ASSERT3U(spa->spa_scrub_inflight, ==, 0);
+ dsl_scan_sync_state(scn, tx);
+ return;
+ }
+
if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
scn->scn_phys.scn_ddt_class_max) {
zfs_dbgmsg("doing scan sync txg %llu; "
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
NULL, ZIO_FLAG_CANFAIL);
+ dsl_pool_config_enter(dp, FTAG);
dsl_scan_visit(scn, tx);
+ dsl_pool_config_exit(dp, FTAG);
(void) zio_wait(scn->scn_zio_root);
scn->scn_zio_root = NULL;
zfs_dbgmsg("visited %llu blocks in %llums",
(longlong_t)scn->scn_visited_this_txg,
- (longlong_t)(gethrtime() - scn->scn_sync_start_time) / MICROSEC);
+ (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time));
if (!scn->scn_pausing) {
- /* finished with scan. */
- zfs_dbgmsg("finished scan txg %llu", (longlong_t)tx->tx_txg);
- dsl_scan_done(scn, B_TRUE, tx);
+ scn->scn_done_txg = tx->tx_txg + 1;
+ zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu",
+ tx->tx_txg, scn->scn_done_txg);
}
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp);
boolean_t needs_io = B_FALSE;
int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
- int zio_priority = 0;
int scan_delay = 0;
int d;
ASSERT(DSL_SCAN_IS_SCRUB_RESILVER(scn));
if (scn->scn_phys.scn_func == POOL_SCAN_SCRUB) {
zio_flags |= ZIO_FLAG_SCRUB;
- zio_priority = ZIO_PRIORITY_SCRUB;
needs_io = B_TRUE;
scan_delay = zfs_scrub_delay;
- } else if (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) {
+ } else {
+ ASSERT3U(scn->scn_phys.scn_func, ==, POOL_SCAN_RESILVER);
zio_flags |= ZIO_FLAG_RESILVER;
- zio_priority = ZIO_PRIORITY_RESILVER;
needs_io = B_FALSE;
scan_delay = zfs_resilver_delay;
}
delay(scan_delay);
zio_nowait(zio_read(NULL, spa, bp, data, size,
- dsl_scan_scrub_done, NULL, zio_priority,
+ dsl_scan_scrub_done, NULL, ZIO_PRIORITY_SCRUB,
zio_flags, zb));
}
spa->spa_scrub_reopen = B_FALSE;
(void) spa_vdev_state_exit(spa, NULL, 0);
- return (dsl_sync_task_do(dp, dsl_scan_setup_check,
- dsl_scan_setup_sync, dp->dp_scan, &func, 0));
+ return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
+ dsl_scan_setup_sync, &func, 0));
}
#if defined(_KERNEL) && defined(HAVE_SPL)