* the io_error. If this was not a speculative zio, create an ereport.
*/
if (ret == ECKSUM) {
- ret = SET_ERROR(EIO);
+ zio->io_error = SET_ERROR(EIO);
if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, &zio->io_bookmark, zio, 0, 0);
}
static boolean_t
-zio_wait_for_children(zio_t *zio, enum zio_child child, enum zio_wait_type wait)
+zio_wait_for_children(zio_t *zio, uint8_t childbits, enum zio_wait_type wait)
{
- uint64_t *countp = &zio->io_children[child][wait];
boolean_t waiting = B_FALSE;
mutex_enter(&zio->io_lock);
ASSERT(zio->io_stall == NULL);
- if (*countp != 0) {
- zio->io_stage >>= 1;
- ASSERT3U(zio->io_stage, !=, ZIO_STAGE_OPEN);
- zio->io_stall = countp;
- waiting = B_TRUE;
+ for (int c = 0; c < ZIO_CHILD_TYPES; c++) {
+ if (!(ZIO_CHILD_BIT_IS_SET(childbits, c)))
+ continue;
+
+ uint64_t *countp = &zio->io_children[c][wait];
+ if (*countp != 0) {
+ zio->io_stage >>= 1;
+ ASSERT3U(zio->io_stage, !=, ZIO_STAGE_OPEN);
+ zio->io_stall = countp;
+ waiting = B_TRUE;
+ break;
+ }
}
mutex_exit(&zio->io_lock);
-
return (waiting);
}
zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
{
+ zfs_blkptr_verify(spa, bp);
+
/*
* The check for EMBEDDED is a performance optimization. We
* process the free here (by ignoring it) rather than
{
zio_t *zio;
- dprintf_bp(bp, "claiming in txg %llu", txg);
+ zfs_blkptr_verify(spa, bp);
if (BP_IS_EMBEDDED(bp))
return (zio_null(pio, spa, NULL, NULL, NULL, 0));
enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE;
zio_t *zio;
- ASSERT(vd->vdev_parent ==
- (pio->io_vd ? pio->io_vd : pio->io_spa->spa_root_vdev));
+ /*
+ * vdev child I/Os do not propagate their error to the parent.
+ * Therefore, for correct operation the caller *must* check for
+ * and handle the error in the child i/o's done callback.
+ * The only exceptions are i/os that we don't care about
+ * (OPTIONAL or REPAIR).
+ */
+ ASSERT((flags & ZIO_FLAG_OPTIONAL) || (flags & ZIO_FLAG_IO_REPAIR) ||
+ done != NULL);
+
+ /*
+ * In the common case, where the parent zio was to a normal vdev,
+ * the child zio must be to a child vdev of that vdev. Otherwise,
+ * the child zio must be to a top-level vdev.
+ */
+ if (pio->io_vd != NULL && pio->io_vd->vdev_ops != &vdev_indirect_ops) {
+ ASSERT3P(vd->vdev_parent, ==, pio->io_vd);
+ } else {
+ ASSERT3P(vd, ==, vd->vdev_top);
+ }
if (type == ZIO_TYPE_READ && bp != NULL) {
/*
pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
}
- if (vd->vdev_children == 0)
+ if (vd->vdev_ops->vdev_op_leaf) {
+ ASSERT0(vd->vdev_children);
offset += VDEV_LABEL_START_SIZE;
+ }
- flags |= ZIO_VDEV_CHILD_FLAGS(pio) | ZIO_FLAG_DONT_PROPAGATE;
+ flags |= ZIO_VDEV_CHILD_FLAGS(pio);
/*
* If we've decided to do a repair, the write is not speculative --
uint64_t psize =
BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
+ ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy);
+
if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
zio->io_child_type == ZIO_CHILD_LOGICAL &&
!(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) {
abd_return_buf_copy(zio->io_abd, data, psize);
} else {
ASSERT(!BP_IS_EMBEDDED(bp));
+ ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy);
}
if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
* If our children haven't all reached the ready stage,
* wait for them and then repeat this pipeline stage.
*/
- if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
- zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY))
+ if (zio_wait_for_children(zio, ZIO_CHILD_LOGICAL_BIT |
+ ZIO_CHILD_GANG_BIT, ZIO_WAIT_READY)) {
return (ZIO_PIPELINE_STOP);
+ }
if (!IO_IS_ALLOCATING(zio))
return (ZIO_PIPELINE_CONTINUE);
*bp = zio->io_bp_orig;
zio->io_pipeline = zio->io_orig_pipeline;
+ } else if ((zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) != 0 &&
+ zp->zp_type == DMU_OT_DNODE) {
+ /*
+ * The DMU actually relies on the zio layer's compression
+ * to free metadnode blocks that have had all contained
+ * dnodes freed. As a result, even when doing a raw
+ * receive, we must check whether the block can be compressed
+ * to a hole.
+ */
+ psize = zio_compress_data(ZIO_COMPRESS_EMPTY,
+ zio->io_abd, NULL, lsize);
+ if (psize == 0)
+ compress = ZIO_COMPRESS_OFF;
} else {
ASSERT3U(psize, !=, 0);
-
}
/*
zio->io_pipeline = ZIO_DDT_FREE_PIPELINE;
}
+ ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy);
+
return (ZIO_PIPELINE_CONTINUE);
}
}
void
-zio_suspend(spa_t *spa, zio_t *zio)
+zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
{
if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC)
fm_panic("Pool '%s' has encountered an uncorrectable I/O "
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
ZIO_FLAG_GODFATHER);
- spa->spa_suspended = B_TRUE;
+ spa->spa_suspended = reason;
if (zio != NULL) {
ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
* Reexecute all previously suspended i/o.
*/
mutex_enter(&spa->spa_suspend_lock);
- spa->spa_suspended = B_FALSE;
+ spa->spa_suspended = ZIO_SUSPEND_NONE;
cv_broadcast(&spa->spa_suspend_cv);
pio = spa->spa_suspend_zio_root;
spa->spa_suspend_zio_root = NULL;
{
blkptr_t *bp = zio->io_bp;
- if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE))
+ if (zio_wait_for_children(zio, ZIO_CHILD_GANG_BIT, ZIO_WAIT_DONE)) {
return (ZIO_PIPELINE_STOP);
+ }
ASSERT(BP_IS_GANG(bp) && zio->io_gang_leader == zio);
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
{
blkptr_t *bp = zio->io_bp;
- if (zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE))
+ if (zio_wait_for_children(zio, ZIO_CHILD_DDT_BIT, ZIO_WAIT_DONE)) {
return (ZIO_PIPELINE_STOP);
+ }
ASSERT(BP_GET_DEDUP(bp));
ASSERT(BP_GET_PSIZE(bp) == zio->io_size);
}
ASSERT3P(zio->io_logical, !=, zio);
+ if (zio->io_type == ZIO_TYPE_WRITE && zio->io_vd->vdev_removing) {
+ /*
+ * Note: the code can handle other kinds of writes,
+ * but we don't expect them.
+ */
+ ASSERT(zio->io_flags &
+ (ZIO_FLAG_PHYSICAL | ZIO_FLAG_SELF_HEAL |
+ ZIO_FLAG_INDUCE_DAMAGE));
+ }
align = 1ULL << vd->vdev_top->vdev_ashift;
vdev_ops_t *ops = vd ? vd->vdev_ops : &vdev_mirror_ops;
boolean_t unexpected_error = B_FALSE;
- if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE))
+ if (zio_wait_for_children(zio, ZIO_CHILD_VDEV_BIT, ZIO_WAIT_DONE)) {
return (ZIO_PIPELINE_STOP);
+ }
ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
{
vdev_t *vd = zio->io_vd;
- if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE))
+ if (zio_wait_for_children(zio, ZIO_CHILD_VDEV_BIT, ZIO_WAIT_DONE)) {
return (ZIO_PIPELINE_STOP);
+ }
if (vd == NULL && !(zio->io_flags & ZIO_FLAG_CONFIG_WRITER))
spa_config_exit(zio->io_spa, SCL_ZIO, zio);
zio_t *pio, *pio_next;
zio_link_t *zl = NULL;
- if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
- zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_READY))
+ if (zio_wait_for_children(zio, ZIO_CHILD_GANG_BIT | ZIO_CHILD_DDT_BIT,
+ ZIO_WAIT_READY)) {
return (ZIO_PIPELINE_STOP);
+ }
if (zio->io_ready) {
ASSERT(IO_IS_ALLOCATING(zio));
* If our children haven't all completed,
* wait for them and then repeat this pipeline stage.
*/
- if (zio_wait_for_children(zio, ZIO_CHILD_VDEV, ZIO_WAIT_DONE) ||
- zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_DONE) ||
- zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_DONE) ||
- zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE))
+ if (zio_wait_for_children(zio, ZIO_CHILD_ALL_BITS, ZIO_WAIT_DONE)) {
return (ZIO_PIPELINE_STOP);
+ }
/*
* If the allocation throttle is enabled, then update the accounting.
* We'd fail again if we reexecuted now, so suspend
* until conditions improve (e.g. device comes online).
*/
- zio_suspend(zio->io_spa, zio);
+ zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR);
} else {
/*
* Reexecution is potentially a huge amount of work.