module/zfs/dsl_pool.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  25  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  26  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  27  */
  28
  29 #include <sys/dsl_pool.h>
  30 #include <sys/dsl_dataset.h>
  31 #include <sys/dsl_prop.h>
  32 #include <sys/dsl_dir.h>
  33 #include <sys/dsl_synctask.h>
  34 #include <sys/dsl_scan.h>
  35 #include <sys/dnode.h>
  36 #include <sys/dmu_tx.h>
  37 #include <sys/dmu_objset.h>
  38 #include <sys/arc.h>
  39 #include <sys/zap.h>
  40 #include <sys/zio.h>
  41 #include <sys/zfs_context.h>
  42 #include <sys/fs/zfs.h>
  43 #include <sys/zfs_znode.h>
  44 #include <sys/spa_impl.h>
  45 #include <sys/dsl_deadlist.h>
  46 #include <sys/bptree.h>
  47 #include <sys/zfeature.h>
  48 #include <sys/zil_impl.h>
  49 #include <sys/dsl_userhold.h>
  50 #include <sys/trace_txg.h>
  51
  52 /*
  53  * ZFS Write Throttle
  54  * ------------------
  55  *
  56  * ZFS must limit the rate of incoming writes to the rate at which it is able
  57  * to sync data modifications to the backend storage. Throttling by too much
  58  * creates an artificial limit; throttling by too little can only be sustained
  59  * for short periods and would lead to highly lumpy performance. On a per-pool
  60  * basis, ZFS tracks the amount of modified (dirty) data. As operations change
  61  * data, the amount of dirty data increases; as ZFS syncs out data, the amount
  62  * of dirty data decreases. When the amount of dirty data exceeds a
  63  * predetermined threshold further modifications are blocked until the amount
  64  * of dirty data decreases (as data is synced out).
  65  *
  66  * The limit on dirty data is tunable, and should be adjusted according to
  67  * both the IO capacity and available memory of the system. The larger the
  68  * window, the more ZFS is able to aggregate and amortize metadata (and data)
  69  * changes. However, memory is a limited resource, and allowing for more dirty
  70  * data comes at the cost of keeping other useful data in memory (for example
  71  * ZFS data cached by the ARC).
  72  *
  73  * Implementation
  74  *
  75  * As buffers are modified dsl_pool_willuse_space() increments both the per-
  76  * txg (dp_dirty_pertxg[]) and poolwide (dp_dirty_total) accounting of
  77  * dirty space used; dsl_pool_dirty_space() decrements those values as data
  78  * is synced out from dsl_pool_sync(). While only the poolwide value is
  79  * relevant, the per-txg value is useful for debugging. The tunable
  80  * zfs_dirty_data_max determines the dirty space limit. Once that value is
  81  * exceeded, new writes are halted until space frees up.
  82  *
  83  * The zfs_dirty_data_sync tunable dictates the threshold at which we
  84  * ensure that there is a txg syncing (see the comment in txg.c for a full
  85  * description of transaction group stages).
  86  *
  87  * The IO scheduler uses both the dirty space limit and current amount of
  88  * dirty data as inputs. Those values affect the number of concurrent IOs ZFS
  89  * issues. See the comment in vdev_queue.c for details of the IO scheduler.
  90  *
  91  * The delay is also calculated based on the amount of dirty data.  See the
  92  * comment above dmu_tx_delay() for details.
  93  */
  94
  95 /*
  96  * zfs_dirty_data_max will be set to zfs_dirty_data_max_percent% of all memory,
  97  * capped at zfs_dirty_data_max_max.  It can also be overridden with a module
  98  * parameter.
  99  */
 100 unsigned long zfs_dirty_data_max = 0;
 101 unsigned long zfs_dirty_data_max_max = 0;
 102 int zfs_dirty_data_max_percent = 10;
 103 int zfs_dirty_data_max_max_percent = 25;
 104
 105 /*
 106  * If there is at least this much dirty data, push out a txg.
 107  */
 108 unsigned long zfs_dirty_data_sync = 64 * 1024 * 1024;
 109
 110 /*
 111  * Once there is this amount of dirty data, the dmu_tx_delay() will kick in
 112  * and delay each transaction.
 113  * This value should be >= zfs_vdev_async_write_active_max_dirty_percent.
 114  */
 115 int zfs_delay_min_dirty_percent = 60;
 116
 117 /*
 118  * This controls how quickly the delay approaches infinity.
 119  * Larger values cause it to delay more for a given amount of dirty data.
 120  * Therefore larger values will cause there to be less dirty data for a
 121  * given throughput.
 122  *
 123  * For the smoothest delay, this value should be about 1 billion divided
 124  * by the maximum number of operations per second.  This will smoothly
 125  * handle between 10x and 1/10th this number.
 126  *
 127  * Note: zfs_delay_scale * zfs_dirty_data_max must be < 2^64, due to the
 128  * multiply in dmu_tx_delay().
 129  */
 130 unsigned long zfs_delay_scale = 1000 * 1000 * 1000 / 2000;
 131
 132 hrtime_t zfs_throttle_delay = MSEC2NSEC(10);
 133 hrtime_t zfs_throttle_resolution = MSEC2NSEC(10);
 134
 135 /*
 136  * This determines the number of threads used by the dp_sync_taskq.
 137  */
 138 int zfs_sync_taskq_batch_pct = 75;
 139
 140 int
 141 dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
 142 {
 143         uint64_t obj;
 144         int err;
 145
 146         err = zap_lookup(dp->dp_meta_objset,
 147             dsl_dir_phys(dp->dp_root_dir)->dd_child_dir_zapobj,
 148             name, sizeof (obj), 1, &obj);
 149         if (err)
 150                 return (err);
 151
 152         return (dsl_dir_hold_obj(dp, obj, name, dp, ddp));
 153 }
 154
 155 static dsl_pool_t *
 156 dsl_pool_open_impl(spa_t *spa, uint64_t txg)
 157 {
 158         dsl_pool_t *dp;
 159         blkptr_t *bp = spa_get_rootblkptr(spa);
 160
 161         dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP);
 162         dp->dp_spa = spa;
 163         dp->dp_meta_rootbp = *bp;
 164         rrw_init(&dp->dp_config_rwlock, B_TRUE);
 165         txg_init(dp, txg);
 166
 167         txg_list_create(&dp->dp_dirty_datasets,
 168             offsetof(dsl_dataset_t, ds_dirty_link));
 169         txg_list_create(&dp->dp_dirty_zilogs,
 170             offsetof(zilog_t, zl_dirty_link));
 171         txg_list_create(&dp->dp_dirty_dirs,
 172             offsetof(dsl_dir_t, dd_dirty_link));
 173         txg_list_create(&dp->dp_sync_tasks,
 174             offsetof(dsl_sync_task_t, dst_node));
 175
 176         dp->dp_sync_taskq = taskq_create("dp_sync_taskq",
 177             zfs_sync_taskq_batch_pct, minclsyspri, 1, INT_MAX,
 178             TASKQ_THREADS_CPU_PCT);
 179
 180         mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
 181         cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
 182
 183         dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, defclsyspri,
 184             max_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 185
 186         return (dp);
 187 }
 188
 189 int
 190 dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
 191 {
 192         int err;
 193         dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
 194
 195         /*
 196          * Initialize the caller's dsl_pool_t structure before we actually open
 197          * the meta objset.  This is done because a self-healing write zio may
 198          * be issued as part of dmu_objset_open_impl() and the spa needs its
 199          * dsl_pool_t initialized in order to handle the write.
 200          */
 201         *dpp = dp;
 202
 203         err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
 204             &dp->dp_meta_objset);
 205         if (err != 0) {
 206                 dsl_pool_close(dp);
 207                 *dpp = NULL;
 208         }
 209
 210         return (err);
 211 }
 212
 213 int
 214 dsl_pool_open(dsl_pool_t *dp)
 215 {
 216         int err;
 217         dsl_dir_t *dd;
 218         dsl_dataset_t *ds;
 219         uint64_t obj;
 220
 221         rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
 222         err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 223             DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
 224             &dp->dp_root_dir_obj);
 225         if (err)
 226                 goto out;
 227
 228         err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
 229             NULL, dp, &dp->dp_root_dir);
 230         if (err)
 231                 goto out;
 232
 233         err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir);
 234         if (err)
 235                 goto out;
 236
 237         if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) {
 238                 err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd);
 239                 if (err)
 240                         goto out;
 241                 err = dsl_dataset_hold_obj(dp,
 242                     dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds);
 243                 if (err == 0) {
 244                         err = dsl_dataset_hold_obj(dp,
 245                             dsl_dataset_phys(ds)->ds_prev_snap_obj, dp,
 246                             &dp->dp_origin_snap);
 247                         dsl_dataset_rele(ds, FTAG);
 248                 }
 249                 dsl_dir_rele(dd, dp);
 250                 if (err)
 251                         goto out;
 252         }
 253
 254         if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
 255                 err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME,
 256                     &dp->dp_free_dir);
 257                 if (err)
 258                         goto out;
 259
 260                 err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 261                     DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj);
 262                 if (err)
 263                         goto out;
 264                 VERIFY0(bpobj_open(&dp->dp_free_bpobj,
 265                     dp->dp_meta_objset, obj));
 266         }
 267
 268         /*
 269          * Note: errors ignored, because the leak dir will not exist if we
 270          * have not encountered a leak yet.
 271          */
 272         (void) dsl_pool_open_special_dir(dp, LEAK_DIR_NAME,
 273             &dp->dp_leak_dir);
 274
 275         if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) {
 276                 err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 277                     DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
 278                     &dp->dp_bptree_obj);
 279                 if (err != 0)
 280                         goto out;
 281         }
 282
 283         if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMPTY_BPOBJ)) {
 284                 err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 285                     DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1,
 286                     &dp->dp_empty_bpobj);
 287                 if (err != 0)
 288                         goto out;
 289         }
 290
 291         err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 292             DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
 293             &dp->dp_tmp_userrefs_obj);
 294         if (err == ENOENT)
 295                 err = 0;
 296         if (err)
 297                 goto out;
 298
 299         err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
 300
 301 out:
 302         rrw_exit(&dp->dp_config_rwlock, FTAG);
 303         return (err);
 304 }
 305
 306 void
 307 dsl_pool_close(dsl_pool_t *dp)
 308 {
 309         /*
 310          * Drop our references from dsl_pool_open().
 311          *
 312          * Since we held the origin_snap from "syncing" context (which
 313          * includes pool-opening context), it actually only got a "ref"
 314          * and not a hold, so just drop that here.
 315          */
 316         if (dp->dp_origin_snap)
 317                 dsl_dataset_rele(dp->dp_origin_snap, dp);
 318         if (dp->dp_mos_dir)
 319                 dsl_dir_rele(dp->dp_mos_dir, dp);
 320         if (dp->dp_free_dir)
 321                 dsl_dir_rele(dp->dp_free_dir, dp);
 322         if (dp->dp_leak_dir)
 323                 dsl_dir_rele(dp->dp_leak_dir, dp);
 324         if (dp->dp_root_dir)
 325                 dsl_dir_rele(dp->dp_root_dir, dp);
 326
 327         bpobj_close(&dp->dp_free_bpobj);
 328
 329         /* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */
 330         if (dp->dp_meta_objset)
 331                 dmu_objset_evict(dp->dp_meta_objset);
 332
 333         txg_list_destroy(&dp->dp_dirty_datasets);
 334         txg_list_destroy(&dp->dp_dirty_zilogs);
 335         txg_list_destroy(&dp->dp_sync_tasks);
 336         txg_list_destroy(&dp->dp_dirty_dirs);
 337
 338         taskq_destroy(dp->dp_sync_taskq);
 339
 340         /*
 341          * We can't set retry to TRUE since we're explicitly specifying
 342          * a spa to flush. This is good enough; any missed buffers for
 343          * this spa won't cause trouble, and they'll eventually fall
 344          * out of the ARC just like any other unused buffer.
 345          */
 346         arc_flush(dp->dp_spa, FALSE);
 347
 348         txg_fini(dp);
 349         dsl_scan_fini(dp);
 350         dmu_buf_user_evict_wait();
 351
 352         rrw_destroy(&dp->dp_config_rwlock);
 353         mutex_destroy(&dp->dp_lock);
 354         taskq_destroy(dp->dp_iput_taskq);
 355         if (dp->dp_blkstats)
 356                 vmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
 357         kmem_free(dp, sizeof (dsl_pool_t));
 358 }
 359
 360 dsl_pool_t *
 361 dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
 362 {
 363         int err;
 364         dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
 365         dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
 366         objset_t *os;
 367         dsl_dataset_t *ds;
 368         uint64_t obj;
 369
 370         rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
 371
 372         /* create and open the MOS (meta-objset) */
 373         dp->dp_meta_objset = dmu_objset_create_impl(spa,
 374             NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
 375
 376         /* create the pool directory */
 377         err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 378             DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx);
 379         ASSERT0(err);
 380
 381         /* Initialize scan structures */
 382         VERIFY0(dsl_scan_init(dp, txg));
 383
 384         /* create and open the root dir */
 385         dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx);
 386         VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
 387             NULL, dp, &dp->dp_root_dir));
 388
 389         /* create and open the meta-objset dir */
 390         (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx);
 391         VERIFY0(dsl_pool_open_special_dir(dp,
 392             MOS_DIR_NAME, &dp->dp_mos_dir));
 393
 394         if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
 395                 /* create and open the free dir */
 396                 (void) dsl_dir_create_sync(dp, dp->dp_root_dir,
 397                     FREE_DIR_NAME, tx);
 398                 VERIFY0(dsl_pool_open_special_dir(dp,
 399                     FREE_DIR_NAME, &dp->dp_free_dir));
 400
 401                 /* create and open the free_bplist */
 402                 obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx);
 403                 VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 404                     DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
 405                 VERIFY0(bpobj_open(&dp->dp_free_bpobj,
 406                     dp->dp_meta_objset, obj));
 407         }
 408
 409         if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB)
 410                 dsl_pool_create_origin(dp, tx);
 411
 412         /* create the root dataset */
 413         obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
 414
 415         /* create the root objset */
 416         VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
 417         rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 418         VERIFY(NULL != (os = dmu_objset_create_impl(dp->dp_spa, ds,
 419             dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx)));
 420         rrw_exit(&ds->ds_bp_rwlock, FTAG);
 421 #ifdef _KERNEL
 422         zfs_create_fs(os, kcred, zplprops, tx);
 423 #endif
 424         dsl_dataset_rele(ds, FTAG);
 425
 426         dmu_tx_commit(tx);
 427
 428         rrw_exit(&dp->dp_config_rwlock, FTAG);
 429
 430         return (dp);
 431 }
 432
 433 /*
 434  * Account for the meta-objset space in its placeholder dsl_dir.
 435  */
 436 void
 437 dsl_pool_mos_diduse_space(dsl_pool_t *dp,
 438     int64_t used, int64_t comp, int64_t uncomp)
 439 {
 440         ASSERT3U(comp, ==, uncomp); /* it's all metadata */
 441         mutex_enter(&dp->dp_lock);
 442         dp->dp_mos_used_delta += used;
 443         dp->dp_mos_compressed_delta += comp;
 444         dp->dp_mos_uncompressed_delta += uncomp;
 445         mutex_exit(&dp->dp_lock);
 446 }
 447
 448 static void
 449 dsl_pool_sync_mos(dsl_pool_t *dp, dmu_tx_t *tx)
 450 {
 451         zio_t *zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 452         dmu_objset_sync(dp->dp_meta_objset, zio, tx);
 453         VERIFY0(zio_wait(zio));
 454         dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", "");
 455         spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
 456 }
 457
 458 static void
 459 dsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta)
 460 {
 461         ASSERT(MUTEX_HELD(&dp->dp_lock));
 462
 463         if (delta < 0)
 464                 ASSERT3U(-delta, <=, dp->dp_dirty_total);
 465
 466         dp->dp_dirty_total += delta;
 467
 468         /*
 469          * Note: we signal even when increasing dp_dirty_total.
 470          * This ensures forward progress -- each thread wakes the next waiter.
 471          */
 472         if (dp->dp_dirty_total <= zfs_dirty_data_max)
 473                 cv_signal(&dp->dp_spaceavail_cv);
 474 }
 475
 476 void
 477 dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
 478 {
 479         zio_t *zio;
 480         dmu_tx_t *tx;
 481         dsl_dir_t *dd;
 482         dsl_dataset_t *ds;
 483         objset_t *mos = dp->dp_meta_objset;
 484         list_t synced_datasets;
 485
 486         list_create(&synced_datasets, sizeof (dsl_dataset_t),
 487             offsetof(dsl_dataset_t, ds_synced_link));
 488
 489         tx = dmu_tx_create_assigned(dp, txg);
 490
 491         /*
 492          * Write out all dirty blocks of dirty datasets.
 493          */
 494         zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 495         while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) {
 496                 /*
 497                  * We must not sync any non-MOS datasets twice, because
 498                  * we may have taken a snapshot of them.  However, we
 499                  * may sync newly-created datasets on pass 2.
 500                  */
 501                 ASSERT(!list_link_active(&ds->ds_synced_link));
 502                 list_insert_tail(&synced_datasets, ds);
 503                 dsl_dataset_sync(ds, zio, tx);
 504         }
 505         VERIFY0(zio_wait(zio));
 506
 507         /*
 508          * We have written all of the accounted dirty data, so our
 509          * dp_space_towrite should now be zero.  However, some seldom-used
 510          * code paths do not adhere to this (e.g. dbuf_undirty(), also
 511          * rounding error in dbuf_write_physdone).
 512          * Shore up the accounting of any dirtied space now.
 513          */
 514         dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg);
 515
 516         /*
 517          * Update the long range free counter after
 518          * we're done syncing user data
 519          */
 520         mutex_enter(&dp->dp_lock);
 521         ASSERT(spa_sync_pass(dp->dp_spa) == 1 ||
 522             dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] == 0);
 523         dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] = 0;
 524         mutex_exit(&dp->dp_lock);
 525
 526         /*
 527          * After the data blocks have been written (ensured by the zio_wait()
 528          * above), update the user/group space accounting.  This happens
 529          * in tasks dispatched to dp_sync_taskq, so wait for them before
 530          * continuing.
 531          */
 532         for (ds = list_head(&synced_datasets); ds != NULL;
 533             ds = list_next(&synced_datasets, ds)) {
 534                 dmu_objset_do_userquota_updates(ds->ds_objset, tx);
 535         }
 536         taskq_wait(dp->dp_sync_taskq);
 537
 538         /*
 539          * Sync the datasets again to push out the changes due to
 540          * userspace updates.  This must be done before we process the
 541          * sync tasks, so that any snapshots will have the correct
 542          * user accounting information (and we won't get confused
 543          * about which blocks are part of the snapshot).
 544          */
 545         zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 546         while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) {
 547                 ASSERT(list_link_active(&ds->ds_synced_link));
 548                 dmu_buf_rele(ds->ds_dbuf, ds);
 549                 dsl_dataset_sync(ds, zio, tx);
 550         }
 551         VERIFY0(zio_wait(zio));
 552
 553         /*
 554          * Now that the datasets have been completely synced, we can
 555          * clean up our in-memory structures accumulated while syncing:
 556          *
 557          *  - move dead blocks from the pending deadlist to the on-disk deadlist
 558          *  - release hold from dsl_dataset_dirty()
 559          */
 560         while ((ds = list_remove_head(&synced_datasets)) != NULL) {
 561                 dsl_dataset_sync_done(ds, tx);
 562         }
 563
 564         while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) != NULL) {
 565                 dsl_dir_sync(dd, tx);
 566         }
 567
 568         /*
 569          * The MOS's space is accounted for in the pool/$MOS
 570          * (dp_mos_dir).  We can't modify the mos while we're syncing
 571          * it, so we remember the deltas and apply them here.
 572          */
 573         if (dp->dp_mos_used_delta != 0 || dp->dp_mos_compressed_delta != 0 ||
 574             dp->dp_mos_uncompressed_delta != 0) {
 575                 dsl_dir_diduse_space(dp->dp_mos_dir, DD_USED_HEAD,
 576                     dp->dp_mos_used_delta,
 577                     dp->dp_mos_compressed_delta,
 578                     dp->dp_mos_uncompressed_delta, tx);
 579                 dp->dp_mos_used_delta = 0;
 580                 dp->dp_mos_compressed_delta = 0;
 581                 dp->dp_mos_uncompressed_delta = 0;
 582         }
 583
 584         if (!multilist_is_empty(mos->os_dirty_dnodes[txg & TXG_MASK])) {
 585                 dsl_pool_sync_mos(dp, tx);
 586         }
 587
 588         /*
 589          * If we modify a dataset in the same txg that we want to destroy it,
 590          * its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it.
 591          * dsl_dir_destroy_check() will fail if there are unexpected holds.
 592          * Therefore, we want to sync the MOS (thus syncing the dd_dbuf
 593          * and clearing the hold on it) before we process the sync_tasks.
 594          * The MOS data dirtied by the sync_tasks will be synced on the next
 595          * pass.
 596          */
 597         if (!txg_list_empty(&dp->dp_sync_tasks, txg)) {
 598                 dsl_sync_task_t *dst;
 599                 /*
 600                  * No more sync tasks should have been added while we
 601                  * were syncing.
 602                  */
 603                 ASSERT3U(spa_sync_pass(dp->dp_spa), ==, 1);
 604                 while ((dst = txg_list_remove(&dp->dp_sync_tasks, txg)) != NULL)
 605                         dsl_sync_task_sync(dst, tx);
 606         }
 607
 608         dmu_tx_commit(tx);
 609
 610         DTRACE_PROBE2(dsl_pool_sync__done, dsl_pool_t *dp, dp, uint64_t, txg);
 611 }
 612
 613 void
 614 dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
 615 {
 616         zilog_t *zilog;
 617
 618         while ((zilog = txg_list_remove(&dp->dp_dirty_zilogs, txg))) {
 619                 dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
 620                 zil_clean(zilog, txg);
 621                 ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
 622                 dmu_buf_rele(ds->ds_dbuf, zilog);
 623         }
 624         ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
 625 }
 626
 627 /*
 628  * TRUE if the current thread is the tx_sync_thread or if we
 629  * are being called from SPA context during pool initialization.
 630  */
 631 int
 632 dsl_pool_sync_context(dsl_pool_t *dp)
 633 {
 634         return (curthread == dp->dp_tx.tx_sync_thread ||
 635             spa_is_initializing(dp->dp_spa) ||
 636             taskq_member(dp->dp_sync_taskq, curthread));
 637 }
 638
 639 uint64_t
 640 dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree)
 641 {
 642         uint64_t space, resv;
 643
 644         /*
 645          * If we're trying to assess whether it's OK to do a free,
 646          * cut the reservation in half to allow forward progress
 647          * (e.g. make it possible to rm(1) files from a full pool).
 648          */
 649         space = spa_get_dspace(dp->dp_spa);
 650         resv = spa_get_slop_space(dp->dp_spa);
 651         if (netfree)
 652                 resv >>= 1;
 653
 654         return (space - resv);
 655 }
 656
 657 boolean_t
 658 dsl_pool_need_dirty_delay(dsl_pool_t *dp)
 659 {
 660         uint64_t delay_min_bytes =
 661             zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
 662         boolean_t rv;
 663
 664         mutex_enter(&dp->dp_lock);
 665         if (dp->dp_dirty_total > zfs_dirty_data_sync)
 666                 txg_kick(dp);
 667         rv = (dp->dp_dirty_total > delay_min_bytes);
 668         mutex_exit(&dp->dp_lock);
 669         return (rv);
 670 }
 671
 672 void
 673 dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
 674 {
 675         if (space > 0) {
 676                 mutex_enter(&dp->dp_lock);
 677                 dp->dp_dirty_pertxg[tx->tx_txg & TXG_MASK] += space;
 678                 dsl_pool_dirty_delta(dp, space);
 679                 mutex_exit(&dp->dp_lock);
 680         }
 681 }
 682
 683 void
 684 dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg)
 685 {
 686         ASSERT3S(space, >=, 0);
 687         if (space == 0)
 688                 return;
 689
 690         mutex_enter(&dp->dp_lock);
 691         if (dp->dp_dirty_pertxg[txg & TXG_MASK] < space) {
 692                 /* XXX writing something we didn't dirty? */
 693                 space = dp->dp_dirty_pertxg[txg & TXG_MASK];
 694         }
 695         ASSERT3U(dp->dp_dirty_pertxg[txg & TXG_MASK], >=, space);
 696         dp->dp_dirty_pertxg[txg & TXG_MASK] -= space;
 697         ASSERT3U(dp->dp_dirty_total, >=, space);
 698         dsl_pool_dirty_delta(dp, -space);
 699         mutex_exit(&dp->dp_lock);
 700 }
 701
 702 /* ARGSUSED */
 703 static int
 704 upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 705 {
 706         dmu_tx_t *tx = arg;
 707         dsl_dataset_t *ds, *prev = NULL;
 708         int err;
 709
 710         err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
 711         if (err)
 712                 return (err);
 713
 714         while (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
 715                 err = dsl_dataset_hold_obj(dp,
 716                     dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev);
 717                 if (err) {
 718                         dsl_dataset_rele(ds, FTAG);
 719                         return (err);
 720                 }
 721
 722                 if (dsl_dataset_phys(prev)->ds_next_snap_obj != ds->ds_object)
 723                         break;
 724                 dsl_dataset_rele(ds, FTAG);
 725                 ds = prev;
 726                 prev = NULL;
 727         }
 728
 729         if (prev == NULL) {
 730                 prev = dp->dp_origin_snap;
 731
 732                 /*
 733                  * The $ORIGIN can't have any data, or the accounting
 734                  * will be wrong.
 735                  */
 736                 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 737                 ASSERT0(dsl_dataset_phys(prev)->ds_bp.blk_birth);
 738                 rrw_exit(&ds->ds_bp_rwlock, FTAG);
 739
 740                 /* The origin doesn't get attached to itself */
 741                 if (ds->ds_object == prev->ds_object) {
 742                         dsl_dataset_rele(ds, FTAG);
 743                         return (0);
 744                 }
 745
 746                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
 747                 dsl_dataset_phys(ds)->ds_prev_snap_obj = prev->ds_object;
 748                 dsl_dataset_phys(ds)->ds_prev_snap_txg =
 749                     dsl_dataset_phys(prev)->ds_creation_txg;
 750
 751                 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
 752                 dsl_dir_phys(ds->ds_dir)->dd_origin_obj = prev->ds_object;
 753
 754                 dmu_buf_will_dirty(prev->ds_dbuf, tx);
 755                 dsl_dataset_phys(prev)->ds_num_children++;
 756
 757                 if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) {
 758                         ASSERT(ds->ds_prev == NULL);
 759                         VERIFY0(dsl_dataset_hold_obj(dp,
 760                             dsl_dataset_phys(ds)->ds_prev_snap_obj,
 761                             ds, &ds->ds_prev));
 762                 }
 763         }
 764
 765         ASSERT3U(dsl_dir_phys(ds->ds_dir)->dd_origin_obj, ==, prev->ds_object);
 766         ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_obj, ==, prev->ds_object);
 767
 768         if (dsl_dataset_phys(prev)->ds_next_clones_obj == 0) {
 769                 dmu_buf_will_dirty(prev->ds_dbuf, tx);
 770                 dsl_dataset_phys(prev)->ds_next_clones_obj =
 771                     zap_create(dp->dp_meta_objset,
 772                     DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
 773         }
 774         VERIFY0(zap_add_int(dp->dp_meta_objset,
 775             dsl_dataset_phys(prev)->ds_next_clones_obj, ds->ds_object, tx));
 776
 777         dsl_dataset_rele(ds, FTAG);
 778         if (prev != dp->dp_origin_snap)
 779                 dsl_dataset_rele(prev, FTAG);
 780         return (0);
 781 }
 782
 783 void
 784 dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx)
 785 {
 786         ASSERT(dmu_tx_is_syncing(tx));
 787         ASSERT(dp->dp_origin_snap != NULL);
 788
 789         VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb,
 790             tx, DS_FIND_CHILDREN | DS_FIND_SERIALIZE));
 791 }
 792
 793 /* ARGSUSED */
 794 static int
 795 upgrade_dir_clones_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 796 {
 797         dmu_tx_t *tx = arg;
 798         objset_t *mos = dp->dp_meta_objset;
 799
 800         if (dsl_dir_phys(ds->ds_dir)->dd_origin_obj != 0) {
 801                 dsl_dataset_t *origin;
 802
 803                 VERIFY0(dsl_dataset_hold_obj(dp,
 804                     dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &origin));
 805
 806                 if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) {
 807                         dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
 808                         dsl_dir_phys(origin->ds_dir)->dd_clones =
 809                             zap_create(mos, DMU_OT_DSL_CLONES, DMU_OT_NONE,
 810                             0, tx);
 811                 }
 812
 813                 VERIFY0(zap_add_int(dp->dp_meta_objset,
 814                     dsl_dir_phys(origin->ds_dir)->dd_clones,
 815                     ds->ds_object, tx));
 816
 817                 dsl_dataset_rele(origin, FTAG);
 818         }
 819         return (0);
 820 }
 821
 822 void
 823 dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
 824 {
 825         uint64_t obj;
 826
 827         ASSERT(dmu_tx_is_syncing(tx));
 828
 829         (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx);
 830         VERIFY0(dsl_pool_open_special_dir(dp,
 831             FREE_DIR_NAME, &dp->dp_free_dir));
 832
 833         /*
 834          * We can't use bpobj_alloc(), because spa_version() still
 835          * returns the old version, and we need a new-version bpobj with
 836          * subobj support.  So call dmu_object_alloc() directly.
 837          */
 838         obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ,
 839             SPA_OLD_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx);
 840         VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 841             DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
 842         VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj));
 843
 844         VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
 845             upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN | DS_FIND_SERIALIZE));
 846 }
 847
 848 void
 849 dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
 850 {
 851         uint64_t dsobj;
 852         dsl_dataset_t *ds;
 853
 854         ASSERT(dmu_tx_is_syncing(tx));
 855         ASSERT(dp->dp_origin_snap == NULL);
 856         ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER));
 857
 858         /* create the origin dir, ds, & snap-ds */
 859         dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
 860             NULL, 0, kcred, tx);
 861         VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
 862         dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx);
 863         VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj,
 864             dp, &dp->dp_origin_snap));
 865         dsl_dataset_rele(ds, FTAG);
 866 }
 867
 868 taskq_t *
 869 dsl_pool_iput_taskq(dsl_pool_t *dp)
 870 {
 871         return (dp->dp_iput_taskq);
 872 }
 873
 874 /*
 875  * Walk through the pool-wide zap object of temporary snapshot user holds
 876  * and release them.
 877  */
 878 void
 879 dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp)
 880 {
 881         zap_attribute_t za;
 882         zap_cursor_t zc;
 883         objset_t *mos = dp->dp_meta_objset;
 884         uint64_t zapobj = dp->dp_tmp_userrefs_obj;
 885         nvlist_t *holds;
 886
 887         if (zapobj == 0)
 888                 return;
 889         ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
 890
 891         holds = fnvlist_alloc();
 892
 893         for (zap_cursor_init(&zc, mos, zapobj);
 894             zap_cursor_retrieve(&zc, &za) == 0;
 895             zap_cursor_advance(&zc)) {
 896                 char *htag;
 897                 nvlist_t *tags;
 898
 899                 htag = strchr(za.za_name, '-');
 900                 *htag = '\0';
 901                 ++htag;
 902                 if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) {
 903                         tags = fnvlist_alloc();
 904                         fnvlist_add_boolean(tags, htag);
 905                         fnvlist_add_nvlist(holds, za.za_name, tags);
 906                         fnvlist_free(tags);
 907                 } else {
 908                         fnvlist_add_boolean(tags, htag);
 909                 }
 910         }
 911         dsl_dataset_user_release_tmp(dp, holds);
 912         fnvlist_free(holds);
 913         zap_cursor_fini(&zc);
 914 }
 915
 916 /*
 917  * Create the pool-wide zap object for storing temporary snapshot holds.
 918  */
 919 void
 920 dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
 921 {
 922         objset_t *mos = dp->dp_meta_objset;
 923
 924         ASSERT(dp->dp_tmp_userrefs_obj == 0);
 925         ASSERT(dmu_tx_is_syncing(tx));
 926
 927         dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS,
 928             DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, tx);
 929 }
 930
 931 static int
 932 dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
 933     const char *tag, uint64_t now, dmu_tx_t *tx, boolean_t holding)
 934 {
 935         objset_t *mos = dp->dp_meta_objset;
 936         uint64_t zapobj = dp->dp_tmp_userrefs_obj;
 937         char *name;
 938         int error;
 939
 940         ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
 941         ASSERT(dmu_tx_is_syncing(tx));
 942
 943         /*
 944          * If the pool was created prior to SPA_VERSION_USERREFS, the
 945          * zap object for temporary holds might not exist yet.
 946          */
 947         if (zapobj == 0) {
 948                 if (holding) {
 949                         dsl_pool_user_hold_create_obj(dp, tx);
 950                         zapobj = dp->dp_tmp_userrefs_obj;
 951                 } else {
 952                         return (SET_ERROR(ENOENT));
 953                 }
 954         }
 955
 956         name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag);
 957         if (holding)
 958                 error = zap_add(mos, zapobj, name, 8, 1, &now, tx);
 959         else
 960                 error = zap_remove(mos, zapobj, name, tx);
 961         strfree(name);
 962
 963         return (error);
 964 }
 965
 966 /*
 967  * Add a temporary hold for the given dataset object and tag.
 968  */
 969 int
 970 dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
 971     uint64_t now, dmu_tx_t *tx)
 972 {
 973         return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE));
 974 }
 975
 976 /*
 977  * Release a temporary hold for the given dataset object and tag.
 978  */
 979 int
 980 dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
 981     dmu_tx_t *tx)
 982 {
 983         return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, 0,
 984             tx, B_FALSE));
 985 }
 986
 987 /*
 988  * DSL Pool Configuration Lock
 989  *
 990  * The dp_config_rwlock protects against changes to DSL state (e.g. dataset
 991  * creation / destruction / rename / property setting).  It must be held for
 992  * read to hold a dataset or dsl_dir.  I.e. you must call
 993  * dsl_pool_config_enter() or dsl_pool_hold() before calling
 994  * dsl_{dataset,dir}_hold{_obj}.  In most circumstances, the dp_config_rwlock
 995  * must be held continuously until all datasets and dsl_dirs are released.
 996  *
 997  * The only exception to this rule is that if a "long hold" is placed on
 998  * a dataset, then the dp_config_rwlock may be dropped while the dataset
 999  * is still held.  The long hold will prevent the dataset from being
1000  * destroyed -- the destroy will fail with EBUSY.  A long hold can be
1001  * obtained by calling dsl_dataset_long_hold(), or by "owning" a dataset
1002  * (by calling dsl_{dataset,objset}_{try}own{_obj}).
1003  *
1004  * Legitimate long-holders (including owners) should be long-running, cancelable
1005  * tasks that should cause "zfs destroy" to fail.  This includes DMU
1006  * consumers (i.e. a ZPL filesystem being mounted or ZVOL being open),
1007  * "zfs send", and "zfs diff".  There are several other long-holders whose
1008  * uses are suboptimal (e.g. "zfs promote", and zil_suspend()).
1009  *
1010  * The usual formula for long-holding would be:
1011  * dsl_pool_hold()
1012  * dsl_dataset_hold()
1013  * ... perform checks ...
1014  * dsl_dataset_long_hold()
1015  * dsl_pool_rele()
1016  * ... perform long-running task ...
1017  * dsl_dataset_long_rele()
1018  * dsl_dataset_rele()
1019  *
1020  * Note that when the long hold is released, the dataset is still held but
1021  * the pool is not held.  The dataset may change arbitrarily during this time
1022  * (e.g. it could be destroyed).  Therefore you shouldn't do anything to the
1023  * dataset except release it.
1024  *
1025  * User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only
1026  * or modifying operations.
1027  *
1028  * Modifying operations should generally use dsl_sync_task().  The synctask
1029  * infrastructure enforces proper locking strategy with respect to the
1030  * dp_config_rwlock.  See the comment above dsl_sync_task() for details.
1031  *
1032  * Read-only operations will manually hold the pool, then the dataset, obtain
1033  * information from the dataset, then release the pool and dataset.
1034  * dmu_objset_{hold,rele}() are convenience routines that also do the pool
1035  * hold/rele.
1036  */
1037
1038 int
1039 dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp)
1040 {
1041         spa_t *spa;
1042         int error;
1043
1044         error = spa_open(name, &spa, tag);
1045         if (error == 0) {
1046                 *dp = spa_get_dsl(spa);
1047                 dsl_pool_config_enter(*dp, tag);
1048         }
1049         return (error);
1050 }
1051
1052 void
1053 dsl_pool_rele(dsl_pool_t *dp, void *tag)
1054 {
1055         dsl_pool_config_exit(dp, tag);
1056         spa_close(dp->dp_spa, tag);
1057 }
1058
1059 void
1060 dsl_pool_config_enter(dsl_pool_t *dp, void *tag)
1061 {
1062         /*
1063          * We use a "reentrant" reader-writer lock, but not reentrantly.
1064          *
1065          * The rrwlock can (with the track_all flag) track all reading threads,
1066          * which is very useful for debugging which code path failed to release
1067          * the lock, and for verifying that the *current* thread does hold
1068          * the lock.
1069          *
1070          * (Unlike a rwlock, which knows that N threads hold it for
1071          * read, but not *which* threads, so rw_held(RW_READER) returns TRUE
1072          * if any thread holds it for read, even if this thread doesn't).
1073          */
1074         ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER));
1075         rrw_enter(&dp->dp_config_rwlock, RW_READER, tag);
1076 }
1077
1078 void
1079 dsl_pool_config_enter_prio(dsl_pool_t *dp, void *tag)
1080 {
1081         ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER));
1082         rrw_enter_read_prio(&dp->dp_config_rwlock, tag);
1083 }
1084
1085 void
1086 dsl_pool_config_exit(dsl_pool_t *dp, void *tag)
1087 {
1088         rrw_exit(&dp->dp_config_rwlock, tag);
1089 }
1090
1091 boolean_t
1092 dsl_pool_config_held(dsl_pool_t *dp)
1093 {
1094         return (RRW_LOCK_HELD(&dp->dp_config_rwlock));
1095 }
1096
1097 boolean_t
1098 dsl_pool_config_held_writer(dsl_pool_t *dp)
1099 {
1100         return (RRW_WRITE_HELD(&dp->dp_config_rwlock));
1101 }
1102
1103 #if defined(_KERNEL) && defined(HAVE_SPL)
1104 EXPORT_SYMBOL(dsl_pool_config_enter);
1105 EXPORT_SYMBOL(dsl_pool_config_exit);
1106
1107 /* BEGIN CSTYLED */
1108 /* zfs_dirty_data_max_percent only applied at module load in arc_init(). */
1109 module_param(zfs_dirty_data_max_percent, int, 0444);
1110 MODULE_PARM_DESC(zfs_dirty_data_max_percent, "percent of ram can be dirty");
1111
1112 /* zfs_dirty_data_max_max_percent only applied at module load in arc_init(). */
1113 module_param(zfs_dirty_data_max_max_percent, int, 0444);
1114 MODULE_PARM_DESC(zfs_dirty_data_max_max_percent,
1115         "zfs_dirty_data_max upper bound as % of RAM");
1116
1117 module_param(zfs_delay_min_dirty_percent, int, 0644);
1118 MODULE_PARM_DESC(zfs_delay_min_dirty_percent, "transaction delay threshold");
1119
1120 module_param(zfs_dirty_data_max, ulong, 0644);
1121 MODULE_PARM_DESC(zfs_dirty_data_max, "determines the dirty space limit");
1122
1123 /* zfs_dirty_data_max_max only applied at module load in arc_init(). */
1124 module_param(zfs_dirty_data_max_max, ulong, 0444);
1125 MODULE_PARM_DESC(zfs_dirty_data_max_max,
1126         "zfs_dirty_data_max upper bound in bytes");
1127
1128 module_param(zfs_dirty_data_sync, ulong, 0644);
1129 MODULE_PARM_DESC(zfs_dirty_data_sync, "sync txg when this much dirty data");
1130
1131 module_param(zfs_delay_scale, ulong, 0644);
1132 MODULE_PARM_DESC(zfs_delay_scale, "how quickly delay approaches infinity");
1133
1134 module_param(zfs_sync_taskq_batch_pct, int, 0644);
1135 MODULE_PARM_DESC(zfs_sync_taskq_batch_pct,
1136         "max percent of CPUs that are used to sync dirty data");
1137 /* END CSTYLED */
1138 #endif