*/
/*
+ * SPA: Storage Pool Allocator
+ *
* This file contains all the routines used when modifying on-disk SPA state.
* This includes opening, importing, destroying, exporting a pool, and syncing a
* pool.
typedef enum zti_modes {
ZTI_MODE_FIXED, /* value is # of threads (min 1) */
- ZTI_MODE_ONLINE_PERCENT, /* value is % of online CPUs */
ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */
ZTI_MODE_NULL, /* don't create a taskq */
ZTI_NMODES
char **ereport);
static void spa_vdev_resilver_done(spa_t *spa);
-uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */
+uint_t zio_taskq_batch_pct = 75; /* 1 thread per cpu in pset */
id_t zio_taskq_psrset_bind = PS_NONE;
boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
uint_t zio_taskq_basedc = 80; /* base duty cycle */
err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_PUSHPAGE);
if (err)
- return err;
+ return (err);
mutex_enter(&spa->spa_props_lock);
break;
}
- if ((error = dmu_objset_hold(strval,FTAG,&os)))
+ error = dmu_objset_hold(strval, FTAG, &os);
+ if (error)
break;
/* Must be ZPL and not gzip compressed. */
int error;
uint64_t guid;
+ mutex_enter(&spa->spa_vdev_top_lock);
mutex_enter(&spa_namespace_lock);
guid = spa_generate_guid(NULL);
}
mutex_exit(&spa_namespace_lock);
+ mutex_exit(&spa->spa_vdev_top_lock);
return (error);
}
tqs->stqs_count = count;
tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
- for (i = 0; i < count; i++) {
- taskq_t *tq;
-
- switch (mode) {
- case ZTI_MODE_FIXED:
- ASSERT3U(value, >=, 1);
- value = MAX(value, 1);
- break;
+ switch (mode) {
+ case ZTI_MODE_FIXED:
+ ASSERT3U(value, >=, 1);
+ value = MAX(value, 1);
+ break;
- case ZTI_MODE_BATCH:
- batch = B_TRUE;
- flags |= TASKQ_THREADS_CPU_PCT;
- value = zio_taskq_batch_pct;
- break;
+ case ZTI_MODE_BATCH:
+ batch = B_TRUE;
+ flags |= TASKQ_THREADS_CPU_PCT;
+ value = zio_taskq_batch_pct;
+ break;
- case ZTI_MODE_ONLINE_PERCENT:
- flags |= TASKQ_THREADS_CPU_PCT;
- break;
+ default:
+ panic("unrecognized mode for %s_%s taskq (%u:%u) in "
+ "spa_activate()",
+ zio_type_name[t], zio_taskq_types[q], mode, value);
+ break;
+ }
- default:
- panic("unrecognized mode for %s_%s taskq (%u:%u) in "
- "spa_activate()",
- zio_type_name[t], zio_taskq_types[q], mode, value);
- break;
- }
+ for (i = 0; i < count; i++) {
+ taskq_t *tq;
if (count > 1) {
(void) snprintf(name, sizeof (name), "%s_%s_%u",
tq = taskq_create_sysdc(name, value, 50, INT_MAX,
spa->spa_proc, zio_taskq_basedc, flags);
} else {
- tq = taskq_create_proc(name, value, maxclsyspri, 50,
+ pri_t pri = maxclsyspri;
+ /*
+ * The write issue taskq can be extremely CPU
+ * intensive. Run it at slightly lower priority
+ * than the other taskqs.
+ */
+ if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE)
+ pri--;
+
+ tq = taskq_create_proc(name, value, pri, 50,
INT_MAX, spa->spa_proc, flags);
}
hostid != myhostid) {
nvlist_free(nvconfig);
cmn_err(CE_WARN, "pool '%s' could not be "
- "loaded as it was last accessed by "
- "another system (host: %s hostid: 0x%lx). "
- "See: http://zfsonlinux.org/msg/ZFS-8000-EY",
+ "loaded as it was last accessed by another "
+ "system (host: %s hostid: 0x%lx). See: "
+ "http://zfsonlinux.org/msg/ZFS-8000-EY",
spa_name(spa), hostname,
(unsigned long)hostid);
return (SET_ERROR(EBADF));
if (dsl_dsobj_to_dsname(spa_name(spa),
spa->spa_bootfs, tmpname) == 0) {
char *cp;
- char *dsname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
+ char *dsname;
+
+ dsname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
cp = strchr(tmpname, '/');
if (cp == NULL) {
}
/* mark the device being resilvered */
- newvd->vdev_resilvering = B_TRUE;
+ newvd->vdev_resilver_txg = txg;
/*
* If the parent is not a mirror, or if we're replacing, insert the new
/*
* Detach a device from a mirror or replacing vdev.
+ *
* If 'replace_done' is specified, only detach if the parent
* is a replacing vdev.
*/
if (pvd->vdev_ops == &vdev_spare_ops)
cvd->vdev_unspare = B_FALSE;
vdev_remove_parent(cvd);
- cvd->vdev_resilvering = B_FALSE;
}
* the spa_vdev_config_[enter/exit] functions which allow us to
* grab and release the spa_config_lock while still holding the namespace
* lock. During each step the configuration is synced out.
- */
-
-/*
- * Remove a device from the pool. Currently, this supports removing only hot
- * spares, slogs, and level 2 ARC devices.
+ *
+ * Currently, this supports removing only hot spares, slogs, and level 2 ARC
+ * devices.
*/
int
spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
/*
* Find any device that's done replacing, or a vdev marked 'unspare' that's
- * current spared, so we can detach it.
+ * currently spared, so we can detach it.
*/
static vdev_t *
spa_vdev_resilver_done_hunt(vdev_t *vd)
ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
sguid = ppvd->vdev_child[1]->vdev_guid;
}
+ ASSERT(vd->vdev_resilver_txg == 0 || !vdev_dtl_required(vd));
+
spa_config_exit(spa, SCL_ALL, FTAG);
if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0)
return;
return (0);
}
+/*
+ * Note: this simple function is not inlined to make it easier to dtrace the
+ * amount of time spent syncing frees.
+ */
+static void
+spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx)
+{
+ zio_t *zio = zio_root(spa, NULL, NULL, 0);
+ bplist_iterate(bpl, spa_free_sync_cb, zio, tx);
+ VERIFY(zio_wait(zio) == 0);
+}
+
+/*
+ * Note: this simple function is not inlined to make it easier to dtrace the
+ * amount of time spent syncing deferred frees.
+ */
+static void
+spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx)
+{
+ zio_t *zio = zio_root(spa, NULL, NULL, 0);
+ VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj,
+ spa_free_sync_cb, zio, tx), ==, 0);
+ VERIFY0(zio_wait(zio));
+}
+
static void
spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
{
if (sav->sav_count == 0) {
VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0);
} else {
- list = kmem_alloc(sav->sav_count * sizeof (void *), KM_PUSHPAGE);
+ list = kmem_alloc(sav->sav_count*sizeof (void *), KM_PUSHPAGE);
for (i = 0; i < sav->sav_count; i++)
list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
B_FALSE, VDEV_CONFIG_L2CACHE);
{
dsl_pool_t *dp = spa->spa_dsl_pool;
objset_t *mos = spa->spa_meta_objset;
- bpobj_t *defer_bpo = &spa->spa_deferred_bpobj;
bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *vd;
!txg_list_empty(&dp->dp_sync_tasks, txg) ||
((dsl_scan_active(dp->dp_scan) ||
txg_sync_waiting(dp)) && !spa_shutting_down(spa))) {
- zio_t *zio = zio_root(spa, NULL, NULL, 0);
- VERIFY3U(bpobj_iterate(defer_bpo,
- spa_free_sync_cb, zio, tx), ==, 0);
- VERIFY0(zio_wait(zio));
+ spa_sync_deferred_frees(spa, tx);
}
/*
dsl_pool_sync(dp, txg);
if (pass < zfs_sync_pass_deferred_free) {
- zio_t *zio = zio_root(spa, NULL, NULL, 0);
- bplist_iterate(free_bpl, spa_free_sync_cb,
- zio, tx);
- VERIFY(zio_wait(zio) == 0);
+ spa_sync_frees(spa, free_bpl, tx);
} else {
bplist_iterate(free_bpl, bpobj_enqueue_cb,
- defer_bpo, tx);
+ &spa->spa_deferred_bpobj, tx);
}
ddt_sync(spa, txg);