]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/dbuf.c
OpenZFS 6513 - partially filled holes lose birth time
[mirror_zfs.git] / module / zfs / dbuf.c
index fd51d59a8edb6f3921ae1e36134beaf75dd2e57b..126748994d534cab4019820b22aee7a3e6703954 100644 (file)
@@ -543,13 +543,50 @@ dbuf_verify(dmu_buf_impl_t *db)
                 * If the blkptr isn't set but they have nonzero data,
                 * it had better be dirty, otherwise we'll lose that
                 * data when we evict this buffer.
+                *
+                * There is an exception to this rule for indirect blocks; in
+                * this case, if the indirect block is a hole, we fill in a few
+                * fields on each of the child blocks (importantly, birth time)
+                * to prevent hole birth times from being lost when you
+                * partially fill in a hole.
                 */
                if (db->db_dirtycnt == 0) {
-                       ASSERTV(uint64_t *buf = db->db.db_data);
-                       int i;
+                       if (db->db_level == 0) {
+                               uint64_t *buf = db->db.db_data;
+                               int i;
 
-                       for (i = 0; i < db->db.db_size >> 3; i++) {
-                               ASSERT(buf[i] == 0);
+                               for (i = 0; i < db->db.db_size >> 3; i++) {
+                                       ASSERT(buf[i] == 0);
+                               }
+                       } else {
+                               int i;
+                               blkptr_t *bps = db->db.db_data;
+                               ASSERT3U(1 << DB_DNODE(db)->dn_indblkshift, ==,
+                                   db->db.db_size);
+                               /*
+                                * We want to verify that all the blkptrs in the
+                                * indirect block are holes, but we may have
+                                * automatically set up a few fields for them.
+                                * We iterate through each blkptr and verify
+                                * they only have those fields set.
+                                */
+                               for (i = 0;
+                                   i < db->db.db_size / sizeof (blkptr_t);
+                                   i++) {
+                                       blkptr_t *bp = &bps[i];
+                                       ASSERT(ZIO_CHECKSUM_IS_ZERO(
+                                           &bp->blk_cksum));
+                                       ASSERT(
+                                           DVA_IS_EMPTY(&bp->blk_dva[0]) &&
+                                           DVA_IS_EMPTY(&bp->blk_dva[1]) &&
+                                           DVA_IS_EMPTY(&bp->blk_dva[2]));
+                                       ASSERT0(bp->blk_fill);
+                                       ASSERT0(bp->blk_pad[0]);
+                                       ASSERT0(bp->blk_pad[1]);
+                                       ASSERT(!BP_IS_EMBEDDED(bp));
+                                       ASSERT(BP_IS_HOLE(bp));
+                                       ASSERT0(bp->blk_phys_birth);
+                               }
                        }
                }
        }
@@ -718,10 +755,32 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
            BP_IS_HOLE(db->db_blkptr)))) {
                arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
 
-               DB_DNODE_EXIT(db);
                dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa,
                    db->db.db_size, db, type));
                bzero(db->db.db_data, db->db.db_size);
+
+               if (db->db_blkptr != NULL && db->db_level > 0 &&
+                   BP_IS_HOLE(db->db_blkptr) &&
+                   db->db_blkptr->blk_birth != 0) {
+                       blkptr_t *bps = db->db.db_data;
+                       int i;
+                       for (i = 0; i < ((1 <<
+                           DB_DNODE(db)->dn_indblkshift) / sizeof (blkptr_t));
+                           i++) {
+                               blkptr_t *bp = &bps[i];
+                               ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
+                                   1 << dn->dn_indblkshift);
+                               BP_SET_LSIZE(bp,
+                                   BP_GET_LEVEL(db->db_blkptr) == 1 ?
+                                   dn->dn_datablksz :
+                                   BP_GET_LSIZE(db->db_blkptr));
+                               BP_SET_TYPE(bp, BP_GET_TYPE(db->db_blkptr));
+                               BP_SET_LEVEL(bp,
+                                   BP_GET_LEVEL(db->db_blkptr) - 1);
+                               BP_SET_BIRTH(bp, db->db_blkptr->blk_birth, 0);
+                       }
+               }
+               DB_DNODE_EXIT(db);
                db->db_state = DB_CACHED;
                mutex_exit(&db->db_mtx);
                return (0);
@@ -3094,6 +3153,45 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
        rw_exit(&dn->dn_struct_rwlock);
 }
 
+/* ARGSUSED */
+/*
+ * This function gets called just prior to running through the compression
+ * stage of the zio pipeline. If we're an indirect block comprised of only
+ * holes, then we want this indirect to be compressed away to a hole. In
+ * order to do that we must zero out any information about the holes that
+ * this indirect points to prior to before we try to compress it.
+ */
+static void
+dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
+{
+       dmu_buf_impl_t *db = vdb;
+       dnode_t *dn;
+       blkptr_t *bp;
+       uint64_t i;
+       int epbs;
+
+       ASSERT3U(db->db_level, >, 0);
+       DB_DNODE_ENTER(db);
+       dn = DB_DNODE(db);
+       epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
+
+       /* Determine if all our children are holes */
+       for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
+               if (!BP_IS_HOLE(bp))
+                       break;
+       }
+
+       /*
+        * If all the children are holes, then zero them all out so that
+        * we may get compressed away.
+        */
+       if (i == 1 << epbs) {
+               /* didn't find any non-holes */
+               bzero(db->db.db_data, db->db.db_size);
+       }
+       DB_DNODE_EXIT(db);
+}
+
 /*
  * The SPA will call this callback several times for each zio - once
  * for every physical child i/o (zio->io_phys_children times).  This
@@ -3348,7 +3446,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
 
                dr->dr_zio = zio_write(zio, os->os_spa, txg,
                    &dr->dr_bp_copy, contents, db->db.db_size, &zp,
-                   dbuf_write_override_ready, NULL, dbuf_write_override_done,
+                   dbuf_write_override_ready, NULL, NULL,
+                   dbuf_write_override_done,
                    dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
                mutex_enter(&db->db_mtx);
                dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
@@ -3359,14 +3458,26 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
                ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF);
                dr->dr_zio = zio_write(zio, os->os_spa, txg,
                    &dr->dr_bp_copy, NULL, db->db.db_size, &zp,
-                   dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db,
+                   dbuf_write_nofill_ready, NULL, NULL,
+                   dbuf_write_nofill_done, db,
                    ZIO_PRIORITY_ASYNC_WRITE,
                    ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
        } else {
+               arc_done_func_t *children_ready_cb = NULL;
                ASSERT(arc_released(data));
+
+               /*
+                * For indirect blocks, we want to setup the children
+                * ready callback so that we can properly handle an indirect
+                * block that only contains holes.
+                */
+               if (db->db_level != 0)
+                       children_ready_cb = dbuf_write_children_ready;
+
                dr->dr_zio = arc_write(zio, os->os_spa, txg,
                    &dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
                    DBUF_IS_L2COMPRESSIBLE(db), &zp, dbuf_write_ready,
+                   children_ready_cb,
                    dbuf_write_physdone, dbuf_write_done, db,
                    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
        }