module/zfs/dsl_destroy.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  25  * Copyright (c) 2013 by Joyent, Inc. All rights reserved.
  26  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  27  */
  28
  29 #include <sys/zfs_context.h>
  30 #include <sys/dsl_userhold.h>
  31 #include <sys/dsl_dataset.h>
  32 #include <sys/dsl_synctask.h>
  33 #include <sys/dmu_tx.h>
  34 #include <sys/dsl_pool.h>
  35 #include <sys/dsl_dir.h>
  36 #include <sys/dmu_traverse.h>
  37 #include <sys/dsl_scan.h>
  38 #include <sys/dmu_objset.h>
  39 #include <sys/zap.h>
  40 #include <sys/zfeature.h>
  41 #include <sys/zfs_ioctl.h>
  42 #include <sys/dsl_deleg.h>
  43 #include <sys/dmu_impl.h>
  44 #include <sys/zvol.h>
  45
  46 typedef struct dmu_snapshots_destroy_arg {
  47         nvlist_t *dsda_snaps;
  48         nvlist_t *dsda_successful_snaps;
  49         boolean_t dsda_defer;
  50         nvlist_t *dsda_errlist;
  51 } dmu_snapshots_destroy_arg_t;
  52
  53 int
  54 dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
  55 {
  56         if (!ds->ds_is_snapshot)
  57                 return (SET_ERROR(EINVAL));
  58
  59         if (dsl_dataset_long_held(ds))
  60                 return (SET_ERROR(EBUSY));
  61
  62         /*
  63          * Only allow deferred destroy on pools that support it.
  64          * NOTE: deferred destroy is only supported on snapshots.
  65          */
  66         if (defer) {
  67                 if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
  68                     SPA_VERSION_USERREFS)
  69                         return (SET_ERROR(ENOTSUP));
  70                 return (0);
  71         }
  72
  73         /*
  74          * If this snapshot has an elevated user reference count,
  75          * we can't destroy it yet.
  76          */
  77         if (ds->ds_userrefs > 0)
  78                 return (SET_ERROR(EBUSY));
  79
  80         /*
  81          * Can't delete a branch point.
  82          */
  83         if (dsl_dataset_phys(ds)->ds_num_children > 1)
  84                 return (SET_ERROR(EEXIST));
  85
  86         return (0);
  87 }
  88
  89 static int
  90 dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
  91 {
  92         dmu_snapshots_destroy_arg_t *dsda = arg;
  93         dsl_pool_t *dp = dmu_tx_pool(tx);
  94         nvpair_t *pair;
  95         int error = 0;
  96
  97         if (!dmu_tx_is_syncing(tx))
  98                 return (0);
  99
 100         for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL);
 101             pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) {
 102                 dsl_dataset_t *ds;
 103
 104                 error = dsl_dataset_hold(dp, nvpair_name(pair),
 105                     FTAG, &ds);
 106
 107                 /*
 108                  * If the snapshot does not exist, silently ignore it
 109                  * (it's "already destroyed").
 110                  */
 111                 if (error == ENOENT)
 112                         continue;
 113
 114                 if (error == 0) {
 115                         error = dsl_destroy_snapshot_check_impl(ds,
 116                             dsda->dsda_defer);
 117                         dsl_dataset_rele(ds, FTAG);
 118                 }
 119
 120                 if (error == 0) {
 121                         fnvlist_add_boolean(dsda->dsda_successful_snaps,
 122                             nvpair_name(pair));
 123                 } else {
 124                         fnvlist_add_int32(dsda->dsda_errlist,
 125                             nvpair_name(pair), error);
 126                 }
 127         }
 128
 129         pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
 130         if (pair != NULL)
 131                 return (fnvpair_value_int32(pair));
 132
 133         return (0);
 134 }
 135
 136 struct process_old_arg {
 137         dsl_dataset_t *ds;
 138         dsl_dataset_t *ds_prev;
 139         boolean_t after_branch_point;
 140         zio_t *pio;
 141         uint64_t used, comp, uncomp;
 142 };
 143
 144 static int
 145 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 146 {
 147         struct process_old_arg *poa = arg;
 148         dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
 149
 150         ASSERT(!BP_IS_HOLE(bp));
 151
 152         if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) {
 153                 dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
 154                 if (poa->ds_prev && !poa->after_branch_point &&
 155                     bp->blk_birth >
 156                     dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) {
 157                         dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes +=
 158                             bp_get_dsize_sync(dp->dp_spa, bp);
 159                 }
 160         } else {
 161                 poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
 162                 poa->comp += BP_GET_PSIZE(bp);
 163                 poa->uncomp += BP_GET_UCSIZE(bp);
 164                 dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
 165         }
 166         return (0);
 167 }
 168
 169 static void
 170 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
 171     dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
 172 {
 173         struct process_old_arg poa = { 0 };
 174         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 175         objset_t *mos = dp->dp_meta_objset;
 176         uint64_t deadlist_obj;
 177
 178         ASSERT(ds->ds_deadlist.dl_oldfmt);
 179         ASSERT(ds_next->ds_deadlist.dl_oldfmt);
 180
 181         poa.ds = ds;
 182         poa.ds_prev = ds_prev;
 183         poa.after_branch_point = after_branch_point;
 184         poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 185         VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
 186             process_old_cb, &poa, tx));
 187         VERIFY0(zio_wait(poa.pio));
 188         ASSERT3U(poa.used, ==, dsl_dataset_phys(ds)->ds_unique_bytes);
 189
 190         /* change snapused */
 191         dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
 192             -poa.used, -poa.comp, -poa.uncomp, tx);
 193
 194         /* swap next's deadlist to our deadlist */
 195         dsl_deadlist_close(&ds->ds_deadlist);
 196         dsl_deadlist_close(&ds_next->ds_deadlist);
 197         deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj;
 198         dsl_dataset_phys(ds)->ds_deadlist_obj =
 199             dsl_dataset_phys(ds_next)->ds_deadlist_obj;
 200         dsl_dataset_phys(ds_next)->ds_deadlist_obj = deadlist_obj;
 201         dsl_deadlist_open(&ds->ds_deadlist, mos,
 202             dsl_dataset_phys(ds)->ds_deadlist_obj);
 203         dsl_deadlist_open(&ds_next->ds_deadlist, mos,
 204             dsl_dataset_phys(ds_next)->ds_deadlist_obj);
 205 }
 206
 207 static void
 208 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
 209 {
 210         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 211         zap_cursor_t *zc;
 212         zap_attribute_t *za;
 213
 214         /*
 215          * If it is the old version, dd_clones doesn't exist so we can't
 216          * find the clones, but dsl_deadlist_remove_key() is a no-op so it
 217          * doesn't matter.
 218          */
 219         if (dsl_dir_phys(ds->ds_dir)->dd_clones == 0)
 220                 return;
 221
 222         zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
 223         za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 224
 225         for (zap_cursor_init(zc, mos, dsl_dir_phys(ds->ds_dir)->dd_clones);
 226             zap_cursor_retrieve(zc, za) == 0;
 227             zap_cursor_advance(zc)) {
 228                 dsl_dataset_t *clone;
 229
 230                 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
 231                     za->za_first_integer, FTAG, &clone));
 232                 if (clone->ds_dir->dd_origin_txg > mintxg) {
 233                         dsl_deadlist_remove_key(&clone->ds_deadlist,
 234                             mintxg, tx);
 235                         dsl_dataset_remove_clones_key(clone, mintxg, tx);
 236                 }
 237                 dsl_dataset_rele(clone, FTAG);
 238         }
 239         zap_cursor_fini(zc);
 240
 241         kmem_free(za, sizeof (zap_attribute_t));
 242         kmem_free(zc, sizeof (zap_cursor_t));
 243 }
 244
 245 void
 246 dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
 247 {
 248         spa_feature_t f;
 249         int after_branch_point = FALSE;
 250         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 251         objset_t *mos = dp->dp_meta_objset;
 252         dsl_dataset_t *ds_prev = NULL;
 253         uint64_t obj, old_unique, used = 0, comp = 0, uncomp = 0;
 254         dsl_dataset_t *ds_next, *ds_head, *hds;
 255
 256
 257         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 258         rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 259         ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
 260         rrw_exit(&ds->ds_bp_rwlock, FTAG);
 261         ASSERT(refcount_is_zero(&ds->ds_longholds));
 262
 263         if (defer &&
 264             (ds->ds_userrefs > 0 ||
 265             dsl_dataset_phys(ds)->ds_num_children > 1)) {
 266                 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
 267                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
 268                 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
 269                 spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
 270                 return;
 271         }
 272
 273         ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
 274
 275         /* We need to log before removing it from the namespace. */
 276         spa_history_log_internal_ds(ds, "destroy", tx, "");
 277
 278         dsl_scan_ds_destroyed(ds, tx);
 279
 280         obj = ds->ds_object;
 281
 282         for (f = 0; f < SPA_FEATURES; f++) {
 283                 if (ds->ds_feature_inuse[f]) {
 284                         dsl_dataset_deactivate_feature(obj, f, tx);
 285                         ds->ds_feature_inuse[f] = B_FALSE;
 286                 }
 287         }
 288         if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
 289                 ASSERT3P(ds->ds_prev, ==, NULL);
 290                 VERIFY0(dsl_dataset_hold_obj(dp,
 291                     dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev));
 292                 after_branch_point =
 293                     (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj);
 294
 295                 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
 296                 if (after_branch_point &&
 297                     dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) {
 298                         dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
 299                         if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) {
 300                                 VERIFY0(zap_add_int(mos,
 301                                     dsl_dataset_phys(ds_prev)->
 302                                     ds_next_clones_obj,
 303                                     dsl_dataset_phys(ds)->ds_next_snap_obj,
 304                                     tx));
 305                         }
 306                 }
 307                 if (!after_branch_point) {
 308                         dsl_dataset_phys(ds_prev)->ds_next_snap_obj =
 309                             dsl_dataset_phys(ds)->ds_next_snap_obj;
 310                 }
 311         }
 312
 313         VERIFY0(dsl_dataset_hold_obj(dp,
 314             dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &ds_next));
 315         ASSERT3U(dsl_dataset_phys(ds_next)->ds_prev_snap_obj, ==, obj);
 316
 317         old_unique = dsl_dataset_phys(ds_next)->ds_unique_bytes;
 318
 319         dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
 320         dsl_dataset_phys(ds_next)->ds_prev_snap_obj =
 321             dsl_dataset_phys(ds)->ds_prev_snap_obj;
 322         dsl_dataset_phys(ds_next)->ds_prev_snap_txg =
 323             dsl_dataset_phys(ds)->ds_prev_snap_txg;
 324         ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==,
 325             ds_prev ? dsl_dataset_phys(ds_prev)->ds_creation_txg : 0);
 326
 327         if (ds_next->ds_deadlist.dl_oldfmt) {
 328                 process_old_deadlist(ds, ds_prev, ds_next,
 329                     after_branch_point, tx);
 330         } else {
 331                 /* Adjust prev's unique space. */
 332                 if (ds_prev && !after_branch_point) {
 333                         dsl_deadlist_space_range(&ds_next->ds_deadlist,
 334                             dsl_dataset_phys(ds_prev)->ds_prev_snap_txg,
 335                             dsl_dataset_phys(ds)->ds_prev_snap_txg,
 336                             &used, &comp, &uncomp);
 337                         dsl_dataset_phys(ds_prev)->ds_unique_bytes += used;
 338                 }
 339
 340                 /* Adjust snapused. */
 341                 dsl_deadlist_space_range(&ds_next->ds_deadlist,
 342                     dsl_dataset_phys(ds)->ds_prev_snap_txg, UINT64_MAX,
 343                     &used, &comp, &uncomp);
 344                 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
 345                     -used, -comp, -uncomp, tx);
 346
 347                 /* Move blocks to be freed to pool's free list. */
 348                 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
 349                     &dp->dp_free_bpobj, dsl_dataset_phys(ds)->ds_prev_snap_txg,
 350                     tx);
 351                 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
 352                     DD_USED_HEAD, used, comp, uncomp, tx);
 353
 354                 /* Merge our deadlist into next's and free it. */
 355                 dsl_deadlist_merge(&ds_next->ds_deadlist,
 356                     dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
 357         }
 358         dsl_deadlist_close(&ds->ds_deadlist);
 359         dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
 360         dmu_buf_will_dirty(ds->ds_dbuf, tx);
 361         dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
 362
 363         /* Collapse range in clone heads */
 364         dsl_dataset_remove_clones_key(ds,
 365             dsl_dataset_phys(ds)->ds_creation_txg, tx);
 366
 367         if (ds_next->ds_is_snapshot) {
 368                 dsl_dataset_t *ds_nextnext;
 369
 370                 /*
 371                  * Update next's unique to include blocks which
 372                  * were previously shared by only this snapshot
 373                  * and it.  Those blocks will be born after the
 374                  * prev snap and before this snap, and will have
 375                  * died after the next snap and before the one
 376                  * after that (ie. be on the snap after next's
 377                  * deadlist).
 378                  */
 379                 VERIFY0(dsl_dataset_hold_obj(dp,
 380                     dsl_dataset_phys(ds_next)->ds_next_snap_obj,
 381                     FTAG, &ds_nextnext));
 382                 dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
 383                     dsl_dataset_phys(ds)->ds_prev_snap_txg,
 384                     dsl_dataset_phys(ds)->ds_creation_txg,
 385                     &used, &comp, &uncomp);
 386                 dsl_dataset_phys(ds_next)->ds_unique_bytes += used;
 387                 dsl_dataset_rele(ds_nextnext, FTAG);
 388                 ASSERT3P(ds_next->ds_prev, ==, NULL);
 389
 390                 /* Collapse range in this head. */
 391                 VERIFY0(dsl_dataset_hold_obj(dp,
 392                     dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &hds));
 393                 dsl_deadlist_remove_key(&hds->ds_deadlist,
 394                     dsl_dataset_phys(ds)->ds_creation_txg, tx);
 395                 dsl_dataset_rele(hds, FTAG);
 396
 397         } else {
 398                 ASSERT3P(ds_next->ds_prev, ==, ds);
 399                 dsl_dataset_rele(ds_next->ds_prev, ds_next);
 400                 ds_next->ds_prev = NULL;
 401                 if (ds_prev) {
 402                         VERIFY0(dsl_dataset_hold_obj(dp,
 403                             dsl_dataset_phys(ds)->ds_prev_snap_obj,
 404                             ds_next, &ds_next->ds_prev));
 405                 }
 406
 407                 dsl_dataset_recalc_head_uniq(ds_next);
 408
 409                 /*
 410                  * Reduce the amount of our unconsumed refreservation
 411                  * being charged to our parent by the amount of
 412                  * new unique data we have gained.
 413                  */
 414                 if (old_unique < ds_next->ds_reserved) {
 415                         int64_t mrsdelta;
 416                         uint64_t new_unique =
 417                             dsl_dataset_phys(ds_next)->ds_unique_bytes;
 418
 419                         ASSERT(old_unique <= new_unique);
 420                         mrsdelta = MIN(new_unique - old_unique,
 421                             ds_next->ds_reserved - old_unique);
 422                         dsl_dir_diduse_space(ds->ds_dir,
 423                             DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
 424                 }
 425         }
 426         dsl_dataset_rele(ds_next, FTAG);
 427
 428         /*
 429          * This must be done after the dsl_traverse(), because it will
 430          * re-open the objset.
 431          */
 432         if (ds->ds_objset) {
 433                 dmu_objset_evict(ds->ds_objset);
 434                 ds->ds_objset = NULL;
 435         }
 436
 437         /* remove from snapshot namespace */
 438         ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0);
 439         VERIFY0(dsl_dataset_hold_obj(dp,
 440             dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head));
 441         VERIFY0(dsl_dataset_get_snapname(ds));
 442 #ifdef ZFS_DEBUG
 443         {
 444                 uint64_t val;
 445                 int err;
 446
 447                 err = dsl_dataset_snap_lookup(ds_head,
 448                     ds->ds_snapname, &val);
 449                 ASSERT0(err);
 450                 ASSERT3U(val, ==, obj);
 451         }
 452 #endif
 453         VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx, B_TRUE));
 454         dsl_dataset_rele(ds_head, FTAG);
 455
 456         if (ds_prev != NULL)
 457                 dsl_dataset_rele(ds_prev, FTAG);
 458
 459         spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
 460
 461         if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) {
 462                 ASSERTV(uint64_t count);
 463                 ASSERT0(zap_count(mos,
 464                     dsl_dataset_phys(ds)->ds_next_clones_obj, &count) &&
 465                     count == 0);
 466                 VERIFY0(dmu_object_free(mos,
 467                     dsl_dataset_phys(ds)->ds_next_clones_obj, tx));
 468         }
 469         if (dsl_dataset_phys(ds)->ds_props_obj != 0)
 470                 VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_props_obj,
 471                     tx));
 472         if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0)
 473                 VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj,
 474                     tx));
 475         dsl_dir_rele(ds->ds_dir, ds);
 476         ds->ds_dir = NULL;
 477         dmu_object_free_zapified(mos, obj, tx);
 478 }
 479
 480 static void
 481 dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
 482 {
 483         dmu_snapshots_destroy_arg_t *dsda = arg;
 484         dsl_pool_t *dp = dmu_tx_pool(tx);
 485         nvpair_t *pair;
 486
 487         for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL);
 488             pair != NULL;
 489             pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) {
 490                 dsl_dataset_t *ds;
 491
 492                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 493
 494                 dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
 495                 zvol_remove_minors(dp->dp_spa, nvpair_name(pair), B_TRUE);
 496                 dsl_dataset_rele(ds, FTAG);
 497         }
 498 }
 499
 500 /*
 501  * The semantics of this function are described in the comment above
 502  * lzc_destroy_snaps().  To summarize:
 503  *
 504  * The snapshots must all be in the same pool.
 505  *
 506  * Snapshots that don't exist will be silently ignored (considered to be
 507  * "already deleted").
 508  *
 509  * On success, all snaps will be destroyed and this will return 0.
 510  * On failure, no snaps will be destroyed, the errlist will be filled in,
 511  * and this will return an errno.
 512  */
 513 int
 514 dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
 515     nvlist_t *errlist)
 516 {
 517         dmu_snapshots_destroy_arg_t dsda;
 518         int error;
 519         nvpair_t *pair;
 520
 521         pair = nvlist_next_nvpair(snaps, NULL);
 522         if (pair == NULL)
 523                 return (0);
 524
 525         dsda.dsda_snaps = snaps;
 526         VERIFY0(nvlist_alloc(&dsda.dsda_successful_snaps,
 527             NV_UNIQUE_NAME, KM_SLEEP));
 528         dsda.dsda_defer = defer;
 529         dsda.dsda_errlist = errlist;
 530
 531         error = dsl_sync_task(nvpair_name(pair),
 532             dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync,
 533             &dsda, 0, ZFS_SPACE_CHECK_NONE);
 534         fnvlist_free(dsda.dsda_successful_snaps);
 535
 536         return (error);
 537 }
 538
 539 int
 540 dsl_destroy_snapshot(const char *name, boolean_t defer)
 541 {
 542         int error;
 543         nvlist_t *nvl = fnvlist_alloc();
 544         nvlist_t *errlist = fnvlist_alloc();
 545
 546         fnvlist_add_boolean(nvl, name);
 547         error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
 548         fnvlist_free(errlist);
 549         fnvlist_free(nvl);
 550         return (error);
 551 }
 552
 553 struct killarg {
 554         dsl_dataset_t *ds;
 555         dmu_tx_t *tx;
 556 };
 557
 558 /* ARGSUSED */
 559 static int
 560 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 561     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 562 {
 563         struct killarg *ka = arg;
 564         dmu_tx_t *tx = ka->tx;
 565
 566         if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
 567                 return (0);
 568
 569         if (zb->zb_level == ZB_ZIL_LEVEL) {
 570                 ASSERT(zilog != NULL);
 571                 /*
 572                  * It's a block in the intent log.  It has no
 573                  * accounting, so just free it.
 574                  */
 575                 dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
 576         } else {
 577                 ASSERT(zilog == NULL);
 578                 ASSERT3U(bp->blk_birth, >,
 579                     dsl_dataset_phys(ka->ds)->ds_prev_snap_txg);
 580                 (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
 581         }
 582
 583         return (0);
 584 }
 585
 586 static void
 587 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
 588 {
 589         struct killarg ka;
 590
 591         /*
 592          * Free everything that we point to (that's born after
 593          * the previous snapshot, if we are a clone)
 594          *
 595          * NB: this should be very quick, because we already
 596          * freed all the objects in open context.
 597          */
 598         ka.ds = ds;
 599         ka.tx = tx;
 600         VERIFY0(traverse_dataset(ds,
 601             dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST |
 602             TRAVERSE_NO_DECRYPT, kill_blkptr, &ka));
 603         ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
 604             dsl_dataset_phys(ds)->ds_unique_bytes == 0);
 605 }
 606
 607 typedef struct dsl_destroy_head_arg {
 608         const char *ddha_name;
 609 } dsl_destroy_head_arg_t;
 610
 611 int
 612 dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
 613 {
 614         int error;
 615         uint64_t count;
 616         objset_t *mos;
 617
 618         ASSERT(!ds->ds_is_snapshot);
 619         if (ds->ds_is_snapshot)
 620                 return (SET_ERROR(EINVAL));
 621
 622         if (refcount_count(&ds->ds_longholds) != expected_holds)
 623                 return (SET_ERROR(EBUSY));
 624
 625         mos = ds->ds_dir->dd_pool->dp_meta_objset;
 626
 627         /*
 628          * Can't delete a head dataset if there are snapshots of it.
 629          * (Except if the only snapshots are from the branch we cloned
 630          * from.)
 631          */
 632         if (ds->ds_prev != NULL &&
 633             dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object)
 634                 return (SET_ERROR(EBUSY));
 635
 636         /*
 637          * Can't delete if there are children of this fs.
 638          */
 639         error = zap_count(mos,
 640             dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &count);
 641         if (error != 0)
 642                 return (error);
 643         if (count != 0)
 644                 return (SET_ERROR(EEXIST));
 645
 646         if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
 647             dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
 648             ds->ds_prev->ds_userrefs == 0) {
 649                 /* We need to remove the origin snapshot as well. */
 650                 if (!refcount_is_zero(&ds->ds_prev->ds_longholds))
 651                         return (SET_ERROR(EBUSY));
 652         }
 653         return (0);
 654 }
 655
 656 static int
 657 dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
 658 {
 659         dsl_destroy_head_arg_t *ddha = arg;
 660         dsl_pool_t *dp = dmu_tx_pool(tx);
 661         dsl_dataset_t *ds;
 662         int error;
 663
 664         error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
 665         if (error != 0)
 666                 return (error);
 667
 668         error = dsl_destroy_head_check_impl(ds, 0);
 669         dsl_dataset_rele(ds, FTAG);
 670         return (error);
 671 }
 672
 673 static void
 674 dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
 675 {
 676         dsl_dir_t *dd;
 677         dsl_pool_t *dp = dmu_tx_pool(tx);
 678         objset_t *mos = dp->dp_meta_objset;
 679         dd_used_t t;
 680
 681         ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
 682
 683         VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
 684
 685         ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj);
 686
 687         /*
 688          * Decrement the filesystem count for all parent filesystems.
 689          *
 690          * When we receive an incremental stream into a filesystem that already
 691          * exists, a temporary clone is created.  We never count this temporary
 692          * clone, whose name begins with a '%'.
 693          */
 694         if (dd->dd_myname[0] != '%' && dd->dd_parent != NULL)
 695                 dsl_fs_ss_count_adjust(dd->dd_parent, -1,
 696                     DD_FIELD_FILESYSTEM_COUNT, tx);
 697
 698         /*
 699          * Remove our reservation. The impl() routine avoids setting the
 700          * actual property, which would require the (already destroyed) ds.
 701          */
 702         dsl_dir_set_reservation_sync_impl(dd, 0, tx);
 703
 704         ASSERT0(dsl_dir_phys(dd)->dd_used_bytes);
 705         ASSERT0(dsl_dir_phys(dd)->dd_reserved);
 706         for (t = 0; t < DD_USED_NUM; t++)
 707                 ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]);
 708
 709         if (dd->dd_crypto_obj != 0) {
 710                 dsl_crypto_key_destroy_sync(dd->dd_crypto_obj, tx);
 711                 (void) spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object);
 712         }
 713
 714         VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx));
 715         VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx));
 716         VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx));
 717         VERIFY0(zap_remove(mos,
 718             dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj,
 719             dd->dd_myname, tx));
 720
 721         dsl_dir_rele(dd, FTAG);
 722         dmu_object_free_zapified(mos, ddobj, tx);
 723 }
 724
 725 void
 726 dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
 727 {
 728         dsl_pool_t *dp = dmu_tx_pool(tx);
 729         spa_feature_t f;
 730         objset_t *mos = dp->dp_meta_objset;
 731         uint64_t obj, ddobj, prevobj = 0;
 732         boolean_t rmorigin;
 733         objset_t *os;
 734
 735         ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
 736         ASSERT(ds->ds_prev == NULL ||
 737             dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
 738         rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 739         ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
 740         rrw_exit(&ds->ds_bp_rwlock, FTAG);
 741         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 742
 743         /* We need to log before removing it from the namespace. */
 744         spa_history_log_internal_ds(ds, "destroy", tx, "");
 745
 746         rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
 747             DS_IS_DEFER_DESTROY(ds->ds_prev) &&
 748             dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
 749             ds->ds_prev->ds_userrefs == 0);
 750
 751         /* Remove our reservation. */
 752         if (ds->ds_reserved != 0) {
 753                 dsl_dataset_set_refreservation_sync_impl(ds,
 754                     (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
 755                     0, tx);
 756                 ASSERT0(ds->ds_reserved);
 757         }
 758
 759         obj = ds->ds_object;
 760
 761         for (f = 0; f < SPA_FEATURES; f++) {
 762                 if (ds->ds_feature_inuse[f]) {
 763                         dsl_dataset_deactivate_feature(obj, f, tx);
 764                         ds->ds_feature_inuse[f] = B_FALSE;
 765                 }
 766         }
 767
 768         dsl_scan_ds_destroyed(ds, tx);
 769
 770         if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
 771                 /* This is a clone */
 772                 ASSERT(ds->ds_prev != NULL);
 773                 ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj, !=,
 774                     obj);
 775                 ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj);
 776
 777                 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
 778                 if (dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj != 0) {
 779                         dsl_dataset_remove_from_next_clones(ds->ds_prev,
 780                             obj, tx);
 781                 }
 782
 783                 ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_num_children, >, 1);
 784                 dsl_dataset_phys(ds->ds_prev)->ds_num_children--;
 785         }
 786
 787         /*
 788          * Destroy the deadlist.  Unless it's a clone, the
 789          * deadlist should be empty.  (If it's a clone, it's
 790          * safe to ignore the deadlist contents.)
 791          */
 792         dsl_deadlist_close(&ds->ds_deadlist);
 793         dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
 794         dmu_buf_will_dirty(ds->ds_dbuf, tx);
 795         dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
 796
 797         VERIFY0(dmu_objset_from_ds(ds, &os));
 798
 799         if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) {
 800                 old_synchronous_dataset_destroy(ds, tx);
 801         } else {
 802                 /*
 803                  * Move the bptree into the pool's list of trees to
 804                  * clean up and update space accounting information.
 805                  */
 806                 uint64_t used, comp, uncomp;
 807
 808                 zil_destroy_sync(dmu_objset_zil(os), tx);
 809
 810                 if (!spa_feature_is_active(dp->dp_spa,
 811                     SPA_FEATURE_ASYNC_DESTROY)) {
 812                         dsl_scan_t *scn = dp->dp_scan;
 813                         spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY,
 814                             tx);
 815                         dp->dp_bptree_obj = bptree_alloc(mos, tx);
 816                         VERIFY0(zap_add(mos,
 817                             DMU_POOL_DIRECTORY_OBJECT,
 818                             DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
 819                             &dp->dp_bptree_obj, tx));
 820                         ASSERT(!scn->scn_async_destroying);
 821                         scn->scn_async_destroying = B_TRUE;
 822                 }
 823
 824                 used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes;
 825                 comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes;
 826                 uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes;
 827
 828                 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
 829                     dsl_dataset_phys(ds)->ds_unique_bytes == used);
 830
 831                 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 832                 bptree_add(mos, dp->dp_bptree_obj,
 833                     &dsl_dataset_phys(ds)->ds_bp,
 834                     dsl_dataset_phys(ds)->ds_prev_snap_txg,
 835                     used, comp, uncomp, tx);
 836                 rrw_exit(&ds->ds_bp_rwlock, FTAG);
 837                 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
 838                     -used, -comp, -uncomp, tx);
 839                 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
 840                     used, comp, uncomp, tx);
 841         }
 842
 843         if (ds->ds_prev != NULL) {
 844                 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 845                         VERIFY0(zap_remove_int(mos,
 846                             dsl_dir_phys(ds->ds_prev->ds_dir)->dd_clones,
 847                             ds->ds_object, tx));
 848                 }
 849                 prevobj = ds->ds_prev->ds_object;
 850                 dsl_dataset_rele(ds->ds_prev, ds);
 851                 ds->ds_prev = NULL;
 852         }
 853
 854         /*
 855          * This must be done after the dsl_traverse(), because it will
 856          * re-open the objset.
 857          */
 858         if (ds->ds_objset) {
 859                 dmu_objset_evict(ds->ds_objset);
 860                 ds->ds_objset = NULL;
 861         }
 862
 863         /* Erase the link in the dir */
 864         dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
 865         dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj = 0;
 866         ddobj = ds->ds_dir->dd_object;
 867         ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0);
 868         VERIFY0(zap_destroy(mos,
 869             dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx));
 870
 871         if (ds->ds_bookmarks != 0) {
 872                 VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx));
 873                 spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
 874         }
 875
 876         spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
 877
 878         ASSERT0(dsl_dataset_phys(ds)->ds_next_clones_obj);
 879         ASSERT0(dsl_dataset_phys(ds)->ds_props_obj);
 880         ASSERT0(dsl_dataset_phys(ds)->ds_userrefs_obj);
 881         dsl_dir_rele(ds->ds_dir, ds);
 882         ds->ds_dir = NULL;
 883         dmu_object_free_zapified(mos, obj, tx);
 884
 885         dsl_dir_destroy_sync(ddobj, tx);
 886
 887         if (rmorigin) {
 888                 dsl_dataset_t *prev;
 889                 VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
 890                 dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
 891                 dsl_dataset_rele(prev, FTAG);
 892         }
 893 }
 894
 895 static void
 896 dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
 897 {
 898         dsl_destroy_head_arg_t *ddha = arg;
 899         dsl_pool_t *dp = dmu_tx_pool(tx);
 900         dsl_dataset_t *ds;
 901
 902         VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
 903         dsl_destroy_head_sync_impl(ds, tx);
 904         zvol_remove_minors(dp->dp_spa, ddha->ddha_name, B_TRUE);
 905         dsl_dataset_rele(ds, FTAG);
 906 }
 907
 908 static void
 909 dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
 910 {
 911         dsl_destroy_head_arg_t *ddha = arg;
 912         dsl_pool_t *dp = dmu_tx_pool(tx);
 913         dsl_dataset_t *ds;
 914
 915         VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
 916
 917         /* Mark it as inconsistent on-disk, in case we crash */
 918         dmu_buf_will_dirty(ds->ds_dbuf, tx);
 919         dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
 920
 921         spa_history_log_internal_ds(ds, "destroy begin", tx, "");
 922         dsl_dataset_rele(ds, FTAG);
 923 }
 924
 925 int
 926 dsl_destroy_head(const char *name)
 927 {
 928         dsl_destroy_head_arg_t ddha;
 929         int error;
 930         spa_t *spa;
 931         boolean_t isenabled;
 932
 933 #ifdef _KERNEL
 934         zfs_destroy_unmount_origin(name);
 935 #endif
 936
 937         error = spa_open(name, &spa, FTAG);
 938         if (error != 0)
 939                 return (error);
 940         isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY);
 941         spa_close(spa, FTAG);
 942
 943         ddha.ddha_name = name;
 944
 945         if (!isenabled) {
 946                 objset_t *os;
 947
 948                 error = dsl_sync_task(name, dsl_destroy_head_check,
 949                     dsl_destroy_head_begin_sync, &ddha,
 950                     0, ZFS_SPACE_CHECK_NONE);
 951                 if (error != 0)
 952                         return (error);
 953
 954                 /*
 955                  * Head deletion is processed in one txg on old pools;
 956                  * remove the objects from open context so that the txg sync
 957                  * is not too long.
 958                  */
 959                 error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_FALSE,
 960                     FTAG, &os);
 961                 if (error == 0) {
 962                         uint64_t obj;
 963                         uint64_t prev_snap_txg =
 964                             dsl_dataset_phys(dmu_objset_ds(os))->
 965                             ds_prev_snap_txg;
 966                         for (obj = 0; error == 0;
 967                             error = dmu_object_next(os, &obj, FALSE,
 968                             prev_snap_txg))
 969                                 (void) dmu_free_long_object(os, obj);
 970                         /* sync out all frees */
 971                         txg_wait_synced(dmu_objset_pool(os), 0);
 972                         dmu_objset_disown(os, B_FALSE, FTAG);
 973                 }
 974         }
 975
 976         return (dsl_sync_task(name, dsl_destroy_head_check,
 977             dsl_destroy_head_sync, &ddha, 0, ZFS_SPACE_CHECK_NONE));
 978 }
 979
 980 /*
 981  * Note, this function is used as the callback for dmu_objset_find().  We
 982  * always return 0 so that we will continue to find and process
 983  * inconsistent datasets, even if we encounter an error trying to
 984  * process one of them.
 985  */
 986 /* ARGSUSED */
 987 int
 988 dsl_destroy_inconsistent(const char *dsname, void *arg)
 989 {
 990         objset_t *os;
 991
 992         if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
 993                 boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os));
 994
 995                 /*
 996                  * If the dataset is inconsistent because a resumable receive
 997                  * has failed, then do not destroy it.
 998                  */
 999                 if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os)))
1000                         need_destroy = B_FALSE;
1001
1002                 dmu_objset_rele(os, FTAG);
1003                 if (need_destroy)
1004                         (void) dsl_destroy_head(dsname);
1005         }
1006         return (0);
1007 }
1008
1009
1010 #if defined(_KERNEL) && defined(HAVE_SPL)
1011 EXPORT_SYMBOL(dsl_destroy_head);
1012 EXPORT_SYMBOL(dsl_destroy_head_sync_impl);
1013 EXPORT_SYMBOL(dsl_dataset_user_hold_check_one);
1014 EXPORT_SYMBOL(dsl_destroy_snapshot_sync_impl);
1015 EXPORT_SYMBOL(dsl_destroy_inconsistent);
1016 EXPORT_SYMBOL(dsl_dataset_user_release_tmp);
1017 EXPORT_SYMBOL(dsl_destroy_head_check_impl);
1018 #endif