module/zfs/mmp.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
  23  */
  24
  25 #include <sys/abd.h>
  26 #include <sys/mmp.h>
  27 #include <sys/spa.h>
  28 #include <sys/spa_impl.h>
  29 #include <sys/time.h>
  30 #include <sys/vdev.h>
  31 #include <sys/vdev_impl.h>
  32 #include <sys/zfs_context.h>
  33 #include <sys/callb.h>
  34
  35 /*
  36  * Multi-Modifier Protection (MMP) attempts to prevent a user from importing
  37  * or opening a pool on more than one host at a time.  In particular, it
  38  * prevents "zpool import -f" on a host from succeeding while the pool is
  39  * already imported on another host.  There are many other ways in which a
  40  * device could be used by two hosts for different purposes at the same time
  41  * resulting in pool damage.  This implementation does not attempt to detect
  42  * those cases.
  43  *
  44  * MMP operates by ensuring there are frequent visible changes on disk (a
  45  * "heartbeat") at all times.  And by altering the import process to check
  46  * for these changes and failing the import when they are detected.  This
  47  * functionality is enabled by setting the 'multihost' pool property to on.
  48  *
  49  * Uberblocks written by the txg_sync thread always go into the first
  50  * (N-MMP_BLOCKS_PER_LABEL) slots, the remaining slots are reserved for MMP.
  51  * They are used to hold uberblocks which are exactly the same as the last
  52  * synced uberblock except that the ub_timestamp is frequently updated.
  53  * Like all other uberblocks, the slot is written with an embedded checksum,
  54  * and slots with invalid checksums are ignored.  This provides the
  55  * "heartbeat", with no risk of overwriting good uberblocks that must be
  56  * preserved, e.g. previous txgs and associated block pointers.
  57  *
  58  * Two optional fields are added to uberblock structure: ub_mmp_magic and
  59  * ub_mmp_delay.  The magic field allows zfs to tell whether ub_mmp_delay is
  60  * valid.  The delay field is a decaying average of the amount of time between
  61  * completion of successive MMP writes, in nanoseconds.  It is used to predict
  62  * how long the import must wait to detect activity in the pool, before
  63  * concluding it is not in use.
  64  *
  65  * During import an activity test may now be performed to determine if
  66  * the pool is in use.  The activity test is typically required if the
  67  * ZPOOL_CONFIG_HOSTID does not match the system hostid, the pool state is
  68  * POOL_STATE_ACTIVE, and the pool is not a root pool.
  69  *
  70  * The activity test finds the "best" uberblock (highest txg & timestamp),
  71  * waits some time, and then finds the "best" uberblock again.  If the txg
  72  * and timestamp in both "best" uberblocks do not match, the pool is in use
  73  * by another host and the import fails.  Since the granularity of the
  74  * timestamp is in seconds this activity test must take a bare minimum of one
  75  * second.  In order to assure the accuracy of the activity test, the default
  76  * values result in an activity test duration of 10x the mmp write interval.
  77  *
  78  * The "zpool import"  activity test can be expected to take a minimum time of
  79  * zfs_multihost_import_intervals * zfs_multihost_interval milliseconds.  If the
  80  * "best" uberblock has a valid ub_mmp_delay field, then the duration of the
  81  * test may take longer if MMP writes were occurring less frequently than
  82  * expected.  Additionally, the duration is then extended by a random 25% to
  83  * attempt to to detect simultaneous imports.  For example, if both partner
  84  * hosts are rebooted at the same time and automatically attempt to import the
  85  * pool.
  86  */
  87
  88 /*
  89  * Used to control the frequency of mmp writes which are performed when the
  90  * 'multihost' pool property is on.  This is one factor used to determine the
  91  * length of the activity check during import.
  92  *
  93  * The mmp write period is zfs_multihost_interval / leaf-vdevs milliseconds.
  94  * This means that on average an mmp write will be issued for each leaf vdev
  95  * every zfs_multihost_interval milliseconds.  In practice, the observed period
  96  * can vary with the I/O load and this observed value is the delay which is
  97  * stored in the uberblock.  The minimum allowed value is 100 ms.
  98  */
  99 ulong_t zfs_multihost_interval = MMP_DEFAULT_INTERVAL;
 100
 101 /*
 102  * Used to control the duration of the activity test on import.  Smaller values
 103  * of zfs_multihost_import_intervals will reduce the import time but increase
 104  * the risk of failing to detect an active pool.  The total activity check time
 105  * is never allowed to drop below one second.  A value of 0 is ignored and
 106  * treated as if it was set to 1.
 107  */
 108 uint_t zfs_multihost_import_intervals = MMP_DEFAULT_IMPORT_INTERVALS;
 109
 110 /*
 111  * Controls the behavior of the pool when mmp write failures are detected.
 112  *
 113  * When zfs_multihost_fail_intervals = 0 then mmp write failures are ignored.
 114  * The failures will still be reported to the ZED which depending on its
 115  * configuration may take action such as suspending the pool or taking a
 116  * device offline.
 117  *
 118  * When zfs_multihost_fail_intervals > 0 then sequential mmp write failures will
 119  * cause the pool to be suspended.  This occurs when
 120  * zfs_multihost_fail_intervals * zfs_multihost_interval milliseconds have
 121  * passed since the last successful mmp write.  This guarantees the activity
 122  * test will see mmp writes if the
 123  * pool is imported.
 124  */
 125 uint_t zfs_multihost_fail_intervals = MMP_DEFAULT_FAIL_INTERVALS;
 126
 127 char *mmp_tag = "mmp_write_uberblock";
 128 static void mmp_thread(void *arg);
 129
 130 void
 131 mmp_init(spa_t *spa)
 132 {
 133         mmp_thread_t *mmp = &spa->spa_mmp;
 134
 135         mutex_init(&mmp->mmp_thread_lock, NULL, MUTEX_DEFAULT, NULL);
 136         cv_init(&mmp->mmp_thread_cv, NULL, CV_DEFAULT, NULL);
 137         mutex_init(&mmp->mmp_io_lock, NULL, MUTEX_DEFAULT, NULL);
 138         mmp->mmp_kstat_id = 1;
 139 }
 140
 141 void
 142 mmp_fini(spa_t *spa)
 143 {
 144         mmp_thread_t *mmp = &spa->spa_mmp;
 145
 146         mutex_destroy(&mmp->mmp_thread_lock);
 147         cv_destroy(&mmp->mmp_thread_cv);
 148         mutex_destroy(&mmp->mmp_io_lock);
 149 }
 150
 151 static void
 152 mmp_thread_enter(mmp_thread_t *mmp, callb_cpr_t *cpr)
 153 {
 154         CALLB_CPR_INIT(cpr, &mmp->mmp_thread_lock, callb_generic_cpr, FTAG);
 155         mutex_enter(&mmp->mmp_thread_lock);
 156 }
 157
 158 static void
 159 mmp_thread_exit(mmp_thread_t *mmp, kthread_t **mpp, callb_cpr_t *cpr)
 160 {
 161         ASSERT(*mpp != NULL);
 162         *mpp = NULL;
 163         cv_broadcast(&mmp->mmp_thread_cv);
 164         CALLB_CPR_EXIT(cpr);            /* drops &mmp->mmp_thread_lock */
 165         thread_exit();
 166 }
 167
 168 void
 169 mmp_thread_start(spa_t *spa)
 170 {
 171         mmp_thread_t *mmp = &spa->spa_mmp;
 172
 173         if (spa_writeable(spa)) {
 174                 mutex_enter(&mmp->mmp_thread_lock);
 175                 if (!mmp->mmp_thread) {
 176                         dprintf("mmp_thread_start pool %s\n",
 177                             spa->spa_name);
 178                         mmp->mmp_thread = thread_create(NULL, 0, mmp_thread,
 179                             spa, 0, &p0, TS_RUN, defclsyspri);
 180                 }
 181                 mutex_exit(&mmp->mmp_thread_lock);
 182         }
 183 }
 184
 185 void
 186 mmp_thread_stop(spa_t *spa)
 187 {
 188         mmp_thread_t *mmp = &spa->spa_mmp;
 189
 190         mutex_enter(&mmp->mmp_thread_lock);
 191         mmp->mmp_thread_exiting = 1;
 192         cv_broadcast(&mmp->mmp_thread_cv);
 193
 194         while (mmp->mmp_thread) {
 195                 cv_wait(&mmp->mmp_thread_cv, &mmp->mmp_thread_lock);
 196         }
 197         mutex_exit(&mmp->mmp_thread_lock);
 198
 199         ASSERT(mmp->mmp_thread == NULL);
 200         mmp->mmp_thread_exiting = 0;
 201 }
 202
 203 typedef enum mmp_vdev_state_flag {
 204         MMP_FAIL_NOT_WRITABLE   = (1 << 0),
 205         MMP_FAIL_WRITE_PENDING  = (1 << 1),
 206 } mmp_vdev_state_flag_t;
 207
 208 static vdev_t *
 209 mmp_random_leaf_impl(vdev_t *vd, int *fail_mask)
 210 {
 211         int child_idx;
 212
 213         if (!vdev_writeable(vd)) {
 214                 *fail_mask |= MMP_FAIL_NOT_WRITABLE;
 215                 return (NULL);
 216         }
 217
 218         if (vd->vdev_ops->vdev_op_leaf) {
 219                 vdev_t *ret;
 220
 221                 if (vd->vdev_mmp_pending != 0) {
 222                         *fail_mask |= MMP_FAIL_WRITE_PENDING;
 223                         ret = NULL;
 224                 } else {
 225                         ret = vd;
 226                 }
 227
 228                 return (ret);
 229         }
 230
 231         child_idx = spa_get_random(vd->vdev_children);
 232         for (int offset = vd->vdev_children; offset > 0; offset--) {
 233                 vdev_t *leaf;
 234                 vdev_t *child = vd->vdev_child[(child_idx + offset) %
 235                     vd->vdev_children];
 236
 237                 leaf = mmp_random_leaf_impl(child, fail_mask);
 238                 if (leaf)
 239                         return (leaf);
 240         }
 241
 242         return (NULL);
 243 }
 244
 245 /*
 246  * Find a leaf vdev to write an MMP block to.  It must not have an outstanding
 247  * mmp write (if so a new write will also likely block).  If there is no usable
 248  * leaf in the tree rooted at in_vd, a nonzero error value is returned, and
 249  * *out_vd is unchanged.
 250  *
 251  * The error value returned is a bit field.
 252  *
 253  * MMP_FAIL_WRITE_PENDING
 254  * If set, one or more leaf vdevs are writeable, but have an MMP write which has
 255  * not yet completed.
 256  *
 257  * MMP_FAIL_NOT_WRITABLE
 258  * If set, one or more vdevs are not writeable.  The children of those vdevs
 259  * were not examined.
 260  *
 261  * Assuming in_vd points to a tree, a random subtree will be chosen to start.
 262  * That subtree, and successive ones, will be walked until a usable leaf has
 263  * been found, or all subtrees have been examined (except that the children of
 264  * un-writeable vdevs are not examined).
 265  *
 266  * If the leaf vdevs in the tree are healthy, the distribution of returned leaf
 267  * vdevs will be even.  If there are unhealthy leaves, the following leaves
 268  * (child_index % index_children) will be chosen more often.
 269  */
 270
 271 static int
 272 mmp_random_leaf(vdev_t *in_vd, vdev_t **out_vd)
 273 {
 274         int error_mask = 0;
 275         vdev_t *vd = mmp_random_leaf_impl(in_vd, &error_mask);
 276
 277         if (error_mask == 0)
 278                 *out_vd = vd;
 279
 280         return (error_mask);
 281 }
 282
 283 static void
 284 mmp_write_done(zio_t *zio)
 285 {
 286         spa_t *spa = zio->io_spa;
 287         vdev_t *vd = zio->io_vd;
 288         mmp_thread_t *mts = zio->io_private;
 289
 290         mutex_enter(&mts->mmp_io_lock);
 291         uint64_t mmp_kstat_id = vd->vdev_mmp_kstat_id;
 292         hrtime_t mmp_write_duration = gethrtime() - vd->vdev_mmp_pending;
 293
 294         if (zio->io_error)
 295                 goto unlock;
 296
 297         /*
 298          * Mmp writes are queued on a fixed schedule, but under many
 299          * circumstances, such as a busy device or faulty hardware,
 300          * the writes will complete at variable, much longer,
 301          * intervals.  In these cases, another node checking for
 302          * activity must wait longer to account for these delays.
 303          *
 304          * The mmp_delay is calculated as a decaying average of the interval
 305          * between completed mmp writes.  This is used to predict how long
 306          * the import must wait to detect activity in the pool, before
 307          * concluding it is not in use.
 308          *
 309          * Do not set mmp_delay if the multihost property is not on,
 310          * so as not to trigger an activity check on import.
 311          */
 312         if (spa_multihost(spa)) {
 313                 hrtime_t delay = gethrtime() - mts->mmp_last_write;
 314
 315                 if (delay > mts->mmp_delay)
 316                         mts->mmp_delay = delay;
 317                 else
 318                         mts->mmp_delay = (delay + mts->mmp_delay * 127) /
 319                             128;
 320         } else {
 321                 mts->mmp_delay = 0;
 322         }
 323         mts->mmp_last_write = gethrtime();
 324
 325 unlock:
 326         vd->vdev_mmp_pending = 0;
 327         vd->vdev_mmp_kstat_id = 0;
 328
 329         mutex_exit(&mts->mmp_io_lock);
 330         spa_config_exit(spa, SCL_STATE, mmp_tag);
 331
 332         spa_mmp_history_set(spa, mmp_kstat_id, zio->io_error,
 333             mmp_write_duration);
 334
 335         abd_free(zio->io_abd);
 336 }
 337
 338 /*
 339  * When the uberblock on-disk is updated by a spa_sync,
 340  * creating a new "best" uberblock, update the one stored
 341  * in the mmp thread state, used for mmp writes.
 342  */
 343 void
 344 mmp_update_uberblock(spa_t *spa, uberblock_t *ub)
 345 {
 346         mmp_thread_t *mmp = &spa->spa_mmp;
 347
 348         mutex_enter(&mmp->mmp_io_lock);
 349         mmp->mmp_ub = *ub;
 350         mmp->mmp_ub.ub_timestamp = gethrestime_sec();
 351         mutex_exit(&mmp->mmp_io_lock);
 352 }
 353
 354 /*
 355  * Choose a random vdev, label, and MMP block, and write over it
 356  * with a copy of the last-synced uberblock, whose timestamp
 357  * has been updated to reflect that the pool is in use.
 358  */
 359 static void
 360 mmp_write_uberblock(spa_t *spa)
 361 {
 362         int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
 363         mmp_thread_t *mmp = &spa->spa_mmp;
 364         uberblock_t *ub;
 365         vdev_t *vd = NULL;
 366         int label, error;
 367         uint64_t offset;
 368
 369         hrtime_t lock_acquire_time = gethrtime();
 370         spa_config_enter(spa, SCL_STATE, mmp_tag, RW_READER);
 371         lock_acquire_time = gethrtime() - lock_acquire_time;
 372         if (lock_acquire_time > (MSEC2NSEC(MMP_MIN_INTERVAL) / 10))
 373                 zfs_dbgmsg("SCL_STATE acquisition took %llu ns\n",
 374                     (u_longlong_t)lock_acquire_time);
 375
 376         error = mmp_random_leaf(spa->spa_root_vdev, &vd);
 377
 378         mutex_enter(&mmp->mmp_io_lock);
 379
 380         /*
 381          * spa_mmp_history has two types of entries:
 382          * Issued MMP write: records time issued, error status, etc.
 383          * Skipped MMP write: an MMP write could not be issued because no
 384          * suitable leaf vdev was available.  See comment above struct
 385          * spa_mmp_history for details.
 386          */
 387
 388         if (error) {
 389                 if (mmp->mmp_skip_error == error) {
 390                         spa_mmp_history_set_skip(spa, mmp->mmp_kstat_id - 1);
 391                 } else {
 392                         mmp->mmp_skip_error = error;
 393                         spa_mmp_history_add(spa, mmp->mmp_ub.ub_txg,
 394                             gethrestime_sec(), mmp->mmp_delay, NULL, 0,
 395                             mmp->mmp_kstat_id++, error);
 396                 }
 397                 mutex_exit(&mmp->mmp_io_lock);
 398                 spa_config_exit(spa, SCL_STATE, FTAG);
 399                 return;
 400         }
 401
 402         mmp->mmp_skip_error = 0;
 403
 404         if (mmp->mmp_zio_root == NULL)
 405                 mmp->mmp_zio_root = zio_root(spa, NULL, NULL,
 406                     flags | ZIO_FLAG_GODFATHER);
 407
 408         ub = &mmp->mmp_ub;
 409         ub->ub_timestamp = gethrestime_sec();
 410         ub->ub_mmp_magic = MMP_MAGIC;
 411         ub->ub_mmp_delay = mmp->mmp_delay;
 412         vd->vdev_mmp_pending = gethrtime();
 413         vd->vdev_mmp_kstat_id = mmp->mmp_kstat_id;
 414
 415         zio_t *zio  = zio_null(mmp->mmp_zio_root, spa, NULL, NULL, NULL, flags);
 416         abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
 417         abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd));
 418         abd_copy_from_buf(ub_abd, ub, sizeof (uberblock_t));
 419
 420         mmp->mmp_kstat_id++;
 421         mutex_exit(&mmp->mmp_io_lock);
 422
 423         offset = VDEV_UBERBLOCK_OFFSET(vd, VDEV_UBERBLOCK_COUNT(vd) -
 424             MMP_BLOCKS_PER_LABEL + spa_get_random(MMP_BLOCKS_PER_LABEL));
 425
 426         label = spa_get_random(VDEV_LABELS);
 427         vdev_label_write(zio, vd, label, ub_abd, offset,
 428             VDEV_UBERBLOCK_SIZE(vd), mmp_write_done, mmp,
 429             flags | ZIO_FLAG_DONT_PROPAGATE);
 430
 431         (void) spa_mmp_history_add(spa, ub->ub_txg, ub->ub_timestamp,
 432             ub->ub_mmp_delay, vd, label, vd->vdev_mmp_kstat_id, 0);
 433
 434         zio_nowait(zio);
 435 }
 436
 437 static void
 438 mmp_thread(void *arg)
 439 {
 440         spa_t *spa = (spa_t *)arg;
 441         mmp_thread_t *mmp = &spa->spa_mmp;
 442         boolean_t last_spa_suspended = spa_suspended(spa);
 443         boolean_t last_spa_multihost = spa_multihost(spa);
 444         callb_cpr_t cpr;
 445         hrtime_t max_fail_ns = zfs_multihost_fail_intervals *
 446             MSEC2NSEC(MAX(zfs_multihost_interval, MMP_MIN_INTERVAL));
 447
 448         mmp_thread_enter(mmp, &cpr);
 449
 450         /*
 451          * The mmp_write_done() function calculates mmp_delay based on the
 452          * prior value of mmp_delay and the elapsed time since the last write.
 453          * For the first mmp write, there is no "last write", so we start
 454          * with fake, but reasonable, default non-zero values.
 455          */
 456         mmp->mmp_delay = MSEC2NSEC(MAX(zfs_multihost_interval,
 457             MMP_MIN_INTERVAL)) / MAX(vdev_count_leaves(spa), 1);
 458         mmp->mmp_last_write = gethrtime() - mmp->mmp_delay;
 459
 460         while (!mmp->mmp_thread_exiting) {
 461                 uint64_t mmp_fail_intervals = zfs_multihost_fail_intervals;
 462                 uint64_t mmp_interval = MSEC2NSEC(
 463                     MAX(zfs_multihost_interval, MMP_MIN_INTERVAL));
 464                 boolean_t suspended = spa_suspended(spa);
 465                 boolean_t multihost = spa_multihost(spa);
 466                 hrtime_t start, next_time;
 467
 468                 start = gethrtime();
 469                 if (multihost) {
 470                         next_time = start + mmp_interval /
 471                             MAX(vdev_count_leaves(spa), 1);
 472                 } else {
 473                         next_time = start + MSEC2NSEC(MMP_DEFAULT_INTERVAL);
 474                 }
 475
 476                 /*
 477                  * MMP off => on, or suspended => !suspended:
 478                  * No writes occurred recently.  Update mmp_last_write to give
 479                  * us some time to try.
 480                  */
 481                 if ((!last_spa_multihost && multihost) ||
 482                     (last_spa_suspended && !suspended)) {
 483                         mutex_enter(&mmp->mmp_io_lock);
 484                         mmp->mmp_last_write = gethrtime();
 485                         mutex_exit(&mmp->mmp_io_lock);
 486                 }
 487
 488                 /*
 489                  * MMP on => off:
 490                  * mmp_delay == 0 tells importing node to skip activity check.
 491                  */
 492                 if (last_spa_multihost && !multihost) {
 493                         mutex_enter(&mmp->mmp_io_lock);
 494                         mmp->mmp_delay = 0;
 495                         mutex_exit(&mmp->mmp_io_lock);
 496                 }
 497                 last_spa_multihost = multihost;
 498                 last_spa_suspended = suspended;
 499
 500                 /*
 501                  * Smooth max_fail_ns when its factors are decreased, because
 502                  * making (max_fail_ns < mmp_interval) results in the pool being
 503                  * immediately suspended before writes can occur at the new
 504                  * higher frequency.
 505                  */
 506                 if ((mmp_interval * mmp_fail_intervals) < max_fail_ns) {
 507                         max_fail_ns = ((31 * max_fail_ns) + (mmp_interval *
 508                             mmp_fail_intervals)) / 32;
 509                 } else {
 510                         max_fail_ns = mmp_interval * mmp_fail_intervals;
 511                 }
 512
 513                 /*
 514                  * Suspend the pool if no MMP write has succeeded in over
 515                  * mmp_interval * mmp_fail_intervals nanoseconds.
 516                  */
 517                 if (!suspended && mmp_fail_intervals && multihost &&
 518                     (start - mmp->mmp_last_write) > max_fail_ns) {
 519                         cmn_err(CE_WARN, "MMP writes to pool '%s' have not "
 520                             "succeeded in over %llus; suspending pool",
 521                             spa_name(spa),
 522                             NSEC2SEC(start - mmp->mmp_last_write));
 523                         zio_suspend(spa, NULL, ZIO_SUSPEND_MMP);
 524                 }
 525
 526                 if (multihost && !suspended)
 527                         mmp_write_uberblock(spa);
 528
 529                 CALLB_CPR_SAFE_BEGIN(&cpr);
 530                 (void) cv_timedwait_sig_hires(&mmp->mmp_thread_cv,
 531                     &mmp->mmp_thread_lock, next_time, USEC2NSEC(1),
 532                     CALLOUT_FLAG_ABSOLUTE);
 533                 CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock);
 534         }
 535
 536         /* Outstanding writes are allowed to complete. */
 537         if (mmp->mmp_zio_root)
 538                 zio_wait(mmp->mmp_zio_root);
 539
 540         mmp->mmp_zio_root = NULL;
 541         mmp_thread_exit(mmp, &mmp->mmp_thread, &cpr);
 542 }
 543
 544 /*
 545  * Signal the MMP thread to wake it, when it is sleeping on
 546  * its cv.  Used when some module parameter has changed and
 547  * we want the thread to know about it.
 548  * Only signal if the pool is active and mmp thread is
 549  * running, otherwise there is no thread to wake.
 550  */
 551 static void
 552 mmp_signal_thread(spa_t *spa)
 553 {
 554         mmp_thread_t *mmp = &spa->spa_mmp;
 555
 556         mutex_enter(&mmp->mmp_thread_lock);
 557         if (mmp->mmp_thread)
 558                 cv_broadcast(&mmp->mmp_thread_cv);
 559         mutex_exit(&mmp->mmp_thread_lock);
 560 }
 561
 562 void
 563 mmp_signal_all_threads(void)
 564 {
 565         spa_t *spa = NULL;
 566
 567         mutex_enter(&spa_namespace_lock);
 568         while ((spa = spa_next(spa))) {
 569                 if (spa->spa_state == POOL_STATE_ACTIVE)
 570                         mmp_signal_thread(spa);
 571         }
 572         mutex_exit(&spa_namespace_lock);
 573 }
 574
 575 #if defined(_KERNEL) && defined(HAVE_SPL)
 576 #include <linux/mod_compat.h>
 577
 578 static int
 579 param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp)
 580 {
 581         int ret;
 582
 583         ret = param_set_ulong(val, kp);
 584         if (ret < 0)
 585                 return (ret);
 586
 587         mmp_signal_all_threads();
 588
 589         return (ret);
 590 }
 591
 592 /* BEGIN CSTYLED */
 593 module_param(zfs_multihost_fail_intervals, uint, 0644);
 594 MODULE_PARM_DESC(zfs_multihost_fail_intervals,
 595         "Max allowed period without a successful mmp write");
 596
 597 module_param_call(zfs_multihost_interval, param_set_multihost_interval,
 598     param_get_ulong, &zfs_multihost_interval, 0644);
 599 MODULE_PARM_DESC(zfs_multihost_interval,
 600         "Milliseconds between mmp writes to each leaf");
 601
 602 module_param(zfs_multihost_import_intervals, uint, 0644);
 603 MODULE_PARM_DESC(zfs_multihost_import_intervals,
 604         "Number of zfs_multihost_interval periods to wait for activity");
 605 /* END CSTYLED */
 606 #endif