*/
int zfs_dmu_offset_next_sync = 0;
+/*
+ * This can be used for testing, to ensure that certain actions happen
+ * while in the middle of a remap (which might otherwise complete too
+ * quickly).
+ */
+int zfs_object_remap_one_indirect_delay_ticks = 0;
+
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
{ DMU_BSWAP_UINT8, TRUE, FALSE, "unallocated" },
{ DMU_BSWAP_ZAP, TRUE, FALSE, "object directory" },
{ DMU_BSWAP_UINT64, TRUE, FALSE, "FUID table size" },
{ DMU_BSWAP_ZAP, TRUE, FALSE, "DSL dataset next clones"},
{ DMU_BSWAP_ZAP, TRUE, FALSE, "scan work queue" },
- { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group used" },
- { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group quota" },
+ { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group/project used" },
+ { DMU_BSWAP_ZAP, TRUE, TRUE, "ZFS user/group/project quota"},
{ DMU_BSWAP_ZAP, TRUE, FALSE, "snapshot refcount tags"},
{ DMU_BSWAP_ZAP, TRUE, FALSE, "DDT ZAP algorithm" },
{ DMU_BSWAP_ZAP, TRUE, FALSE, "DDT statistics" },
}
if (nblks != 0) {
- int i;
-
blkid = dbuf_whichblock(dn, level, offset);
- for (i = 0; i < nblks; i++)
+ for (int i = 0; i < nblks; i++)
dbuf_prefetch(dn, level, blkid + i, pri, 0);
}
/* bytes of data covered by a level-1 indirect block */
uint64_t iblkrange =
dn->dn_datablksz * EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT);
- uint64_t blks;
ASSERT3U(minimum, <=, *start);
}
ASSERT(ISP2(iblkrange));
- for (blks = 0; *start > minimum && blks < maxblks; blks++) {
+ for (uint64_t blks = 0; *start > minimum && blks < maxblks; blks++) {
int err;
/*
int err;
uint64_t dirty_frees_threshold;
dsl_pool_t *dp = dmu_objset_pool(os);
- int t;
if (dn == NULL)
return (SET_ERROR(EINVAL));
chunk_len = chunk_end - chunk_begin;
mutex_enter(&dp->dp_lock);
- for (t = 0; t < TXG_SIZE; t++) {
+ for (int t = 0; t < TXG_SIZE; t++) {
long_free_dirty_all_txgs +=
dp->dp_long_free_dirty_pertxg[t];
}
while (dr != NULL && dr->dr_txg > tx->tx_txg)
dr = dr->dr_next;
- if (dr != NULL && dr->dr_txg == tx->tx_txg)
+ if (dr != NULL && dr->dr_txg == tx->tx_txg) {
dr->dt.dl.dr_raw = B_TRUE;
+ dn->dn_objset->os_next_write_raw
+ [tx->tx_txg & TXG_MASK] = B_TRUE;
+ }
}
dmu_tx_commit(tx);
dmu_tx_mark_netfree(tx);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err == 0) {
- err = dmu_object_free(os, object, tx);
- if (err == 0 && raw)
- VERIFY0(dmu_object_dirty_raw(os, object, tx));
+ if (raw)
+ err = dmu_object_dirty_raw(os, object, tx);
+ if (err == 0)
+ err = dmu_object_free(os, object, tx);
dmu_tx_commit(tx);
} else {
if (err)
return (err);
ASSERT(offset < UINT64_MAX);
- ASSERT(size == -1ULL || size <= UINT64_MAX - offset);
+ ASSERT(size == DMU_OBJECT_END || size <= UINT64_MAX - offset);
dnode_free_range(dn, offset, size, tx);
dnode_rele(dn, FTAG);
return (0);
dmu_buf_rele_array(dbp, numbufs, FTAG);
}
+static int
+dmu_object_remap_one_indirect(objset_t *os, dnode_t *dn,
+ uint64_t last_removal_txg, uint64_t offset)
+{
+ uint64_t l1blkid = dbuf_whichblock(dn, 1, offset);
+ int err = 0;
+
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ dmu_buf_impl_t *dbuf = dbuf_hold_level(dn, 1, l1blkid, FTAG);
+ ASSERT3P(dbuf, !=, NULL);
+
+ /*
+ * If the block hasn't been written yet, this default will ensure
+ * we don't try to remap it.
+ */
+ uint64_t birth = UINT64_MAX;
+ ASSERT3U(last_removal_txg, !=, UINT64_MAX);
+ if (dbuf->db_blkptr != NULL)
+ birth = dbuf->db_blkptr->blk_birth;
+ rw_exit(&dn->dn_struct_rwlock);
+
+ /*
+ * If this L1 was already written after the last removal, then we've
+ * already tried to remap it.
+ */
+ if (birth <= last_removal_txg &&
+ dbuf_read(dbuf, NULL, DB_RF_MUST_SUCCEED) == 0 &&
+ dbuf_can_remap(dbuf)) {
+ dmu_tx_t *tx = dmu_tx_create(os);
+ dmu_tx_hold_remap_l1indirect(tx, dn->dn_object);
+ err = dmu_tx_assign(tx, TXG_WAIT);
+ if (err == 0) {
+ (void) dbuf_dirty(dbuf, tx);
+ dmu_tx_commit(tx);
+ } else {
+ dmu_tx_abort(tx);
+ }
+ }
+
+ dbuf_rele(dbuf, FTAG);
+
+ delay(zfs_object_remap_one_indirect_delay_ticks);
+
+ return (err);
+}
+
+/*
+ * Remap all blockpointers in the object, if possible, so that they reference
+ * only concrete vdevs.
+ *
+ * To do this, iterate over the L0 blockpointers and remap any that reference
+ * an indirect vdev. Note that we only examine L0 blockpointers; since we
+ * cannot guarantee that we can remap all blockpointer anyways (due to split
+ * blocks), we do not want to make the code unnecessarily complicated to
+ * catch the unlikely case that there is an L1 block on an indirect vdev that
+ * contains no indirect blockpointers.
+ */
+int
+dmu_object_remap_indirects(objset_t *os, uint64_t object,
+ uint64_t last_removal_txg)
+{
+ uint64_t offset, l1span;
+ int err;
+ dnode_t *dn;
+
+ err = dnode_hold(os, object, FTAG, &dn);
+ if (err != 0) {
+ return (err);
+ }
+
+ if (dn->dn_nlevels <= 1) {
+ if (issig(JUSTLOOKING) && issig(FORREAL)) {
+ err = SET_ERROR(EINTR);
+ }
+
+ /*
+ * If the dnode has no indirect blocks, we cannot dirty them.
+ * We still want to remap the blkptr(s) in the dnode if
+ * appropriate, so mark it as dirty.
+ */
+ if (err == 0 && dnode_needs_remap(dn)) {
+ dmu_tx_t *tx = dmu_tx_create(os);
+ dmu_tx_hold_bonus(tx, dn->dn_object);
+ if ((err = dmu_tx_assign(tx, TXG_WAIT)) == 0) {
+ dnode_setdirty(dn, tx);
+ dmu_tx_commit(tx);
+ } else {
+ dmu_tx_abort(tx);
+ }
+ }
+
+ dnode_rele(dn, FTAG);
+ return (err);
+ }
+
+ offset = 0;
+ l1span = 1ULL << (dn->dn_indblkshift - SPA_BLKPTRSHIFT +
+ dn->dn_datablkshift);
+ /*
+ * Find the next L1 indirect that is not a hole.
+ */
+ while (dnode_next_offset(dn, 0, &offset, 2, 1, 0) == 0) {
+ if (issig(JUSTLOOKING) && issig(FORREAL)) {
+ err = SET_ERROR(EINTR);
+ break;
+ }
+ if ((err = dmu_object_remap_one_indirect(os, dn,
+ last_removal_txg, offset)) != 0) {
+ break;
+ }
+ offset += l1span;
+ }
+
+ dnode_rele(dn, FTAG);
+ return (err);
+}
+
void
dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_tx_t *tx)
arc_buf_destroy(buf, FTAG);
}
-void
-dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, const uint8_t *salt,
- const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx)
+int
+dmu_convert_mdn_block_to_raw(objset_t *os, uint64_t firstobj,
+ boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
+ const uint8_t *mac, dmu_tx_t *tx)
{
- dmu_object_type_t type;
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
- uint64_t dsobj = dmu_objset_id(db->db_objset);
+ int ret;
+ dmu_buf_t *handle = NULL;
+ dmu_buf_impl_t *db = NULL;
+ uint64_t offset = firstobj * DNODE_MIN_SIZE;
+ uint64_t dsobj = dmu_objset_id(os);
- ASSERT3P(db->db_buf, !=, NULL);
- ASSERT3U(dsobj, !=, 0);
+ ret = dmu_buf_hold_by_dnode(DMU_META_DNODE(os), offset, FTAG, &handle,
+ DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT);
+ if (ret != 0)
+ return (ret);
dmu_buf_will_change_crypt_params(handle, tx);
- DB_DNODE_ENTER(db);
- type = DB_DNODE(db)->dn_type;
- DB_DNODE_EXIT(db);
+ db = (dmu_buf_impl_t *)handle;
+ ASSERT3P(db->db_buf, !=, NULL);
+ ASSERT3U(dsobj, !=, 0);
/*
* This technically violates the assumption the dmu code makes
* that dnode blocks are only released in syncing context.
*/
(void) arc_release(db->db_buf, db);
- arc_convert_to_raw(db->db_buf, dsobj, byteorder, type, salt, iv, mac);
+ arc_convert_to_raw(db->db_buf, dsobj, byteorder, DMU_OT_DNODE,
+ salt, iv, mac);
+
+ dmu_buf_rele(handle, FTAG);
+
+ return (0);
}
void
return (SET_ERROR(EIO));
}
+ /*
+ * In order to prevent the zgd's lwb from being free'd prior to
+ * dmu_sync_late_arrival_done() being called, we have to ensure
+ * the lwb's "max txg" takes this tx's txg into account.
+ */
+ zil_lwb_add_txg(zgd->zgd_lwb, dmu_tx_get_txg(tx));
+
dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_SLEEP);
dsa->dsa_dr = NULL;
dsa->dsa_done = done;
return (err);
}
+int
+dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid,
+ dmu_tx_t *tx)
+{
+ dnode_t *dn;
+ int err;
+
+ err = dnode_hold(os, object, FTAG, &dn);
+ if (err)
+ return (err);
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ dnode_new_blkid(dn, maxblkid, tx, B_FALSE);
+ rw_exit(&dn->dn_struct_rwlock);
+ dnode_rele(dn, FTAG);
+ return (0);
+}
+
void
dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
dmu_tx_t *tx)
return (err);
}
-int zfs_mdcomp_disable = 0;
-
/*
* When the "redundant_metadata" property is set to "most", only indirect
* blocks of this level and higher will have an additional ditto block.
* 3. all other level 0 blocks
*/
if (ismd) {
- if (zfs_mdcomp_disable) {
- compress = ZIO_COMPRESS_EMPTY;
- } else {
- /*
- * XXX -- we should design a compression algorithm
- * that specializes in arrays of bps.
- */
- compress = zio_compress_select(os->os_spa,
- ZIO_COMPRESS_ON, ZIO_COMPRESS_ON);
- }
+ /*
+ * XXX -- we should design a compression algorithm
+ * that specializes in arrays of bps.
+ */
+ compress = zio_compress_select(os->os_spa,
+ ZIO_COMPRESS_ON, ZIO_COMPRESS_ON);
/*
* Metadata always gets checksummed. If the data
dedup = B_FALSE;
}
- if (type == DMU_OT_DNODE || type == DMU_OT_OBJSET)
+ if (level <= 0 &&
+ (type == DMU_OT_DNODE || type == DMU_OT_OBJSET)) {
compress = ZIO_COMPRESS_EMPTY;
+ }
}
zp->zp_compress = compress;
/*
* Check if dnode is dirty
*/
- if (dn->dn_dirtyctx != DN_UNDIRTIED) {
- for (i = 0; i < TXG_SIZE; i++) {
- if (!list_is_empty(&dn->dn_dirty_records[i])) {
- clean = B_FALSE;
- break;
- }
+ for (i = 0; i < TXG_SIZE; i++) {
+ if (multilist_link_active(&dn->dn_dirty_link[i])) {
+ clean = B_FALSE;
+ break;
}
}
__dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
{
dnode_phys_t *dnp = dn->dn_phys;
- int i;
doi->doi_data_block_size = dn->dn_datablksz;
doi->doi_metadata_block_size = dn->dn_indblkshift ?
doi->doi_physical_blocks_512 = (DN_USED_BYTES(dnp) + 256) >> 9;
doi->doi_max_offset = (dn->dn_maxblkid + 1) * dn->dn_datablksz;
doi->doi_fill_count = 0;
- for (i = 0; i < dnp->dn_nblkptr; i++)
+ for (int i = 0; i < dnp->dn_nblkptr; i++)
doi->doi_fill_count += BP_GET_FILL(&dnp->dn_blkptr[i]);
}
EXPORT_SYMBOL(dmu_object_dnsize_from_db);
EXPORT_SYMBOL(dmu_object_set_nlevels);
EXPORT_SYMBOL(dmu_object_set_blocksize);
+EXPORT_SYMBOL(dmu_object_set_maxblkid);
EXPORT_SYMBOL(dmu_object_set_checksum);
EXPORT_SYMBOL(dmu_object_set_compress);
EXPORT_SYMBOL(dmu_write_policy);
EXPORT_SYMBOL(dmu_ot);
/* BEGIN CSTYLED */
-module_param(zfs_mdcomp_disable, int, 0644);
-MODULE_PARM_DESC(zfs_mdcomp_disable, "Disable meta data compression");
-
module_param(zfs_nopwrite_enabled, int, 0644);
MODULE_PARM_DESC(zfs_nopwrite_enabled, "Enable NOP writes");