]> git.proxmox.com Git - mirror_zfs.git/commitdiff
zap: reuse zap_leaf_t on dbuf reuse after shrink
authorRob N <robn@despairlabs.com>
Sat, 25 May 2024 01:55:47 +0000 (11:55 +1000)
committerGitHub <noreply@github.com>
Sat, 25 May 2024 01:55:47 +0000 (18:55 -0700)
If a shrink or truncate had recently freed a portion of the ZAP, the
dbuf could still be sitting on the dbuf cache waiting for eviction. If
it is then allocated for a new leaf before it can be evicted, the
zap_leaf_t is still attached as userdata, tripping the VERIFY.

Instead, just check for the userdata, and if we find it, reuse it.

Sponsored-by: Klara, Inc.
Sponsored-by: iXsystems, Inc.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #16157.
Closes #16204

module/zfs/zap.c

index 81dab80daf87098d9ed5703b01166a545e411689..03b76ea1b7bf6ebf8857f757228a6e867b584249 100644 (file)
@@ -425,20 +425,36 @@ zap_leaf_evict_sync(void *dbu)
 static zap_leaf_t *
 zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
 {
-       zap_leaf_t *l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
-
        ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
 
-       rw_init(&l->l_rwlock, NULL, RW_NOLOCKDEP, NULL);
-       rw_enter(&l->l_rwlock, RW_WRITER);
-       l->l_blkid = zap_allocate_blocks(zap, 1);
-       l->l_dbuf = NULL;
+       uint64_t blkid = zap_allocate_blocks(zap, 1);
+       dmu_buf_t *db = NULL;
 
        VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
-           l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
+           blkid << FZAP_BLOCK_SHIFT(zap), NULL, &db,
            DMU_READ_NO_PREFETCH));
-       dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
-       VERIFY3P(NULL, ==, dmu_buf_set_user(l->l_dbuf, &l->l_dbu));
+
+       /*
+        * Create the leaf structure and stash it on the dbuf. If zap was
+        * recent shrunk or truncated, the dbuf might have been sitting in the
+        * cache waiting to be evicted, and so still have the old leaf attached
+        * to it. If so, just reuse it.
+        */
+       zap_leaf_t *l = dmu_buf_get_user(db);
+       if (l == NULL) {
+               l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
+               l->l_blkid = blkid;
+               l->l_dbuf = db;
+               rw_init(&l->l_rwlock, NULL, RW_NOLOCKDEP, NULL);
+               dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL,
+                   &l->l_dbuf);
+               dmu_buf_set_user(l->l_dbuf, &l->l_dbu);
+       } else {
+               ASSERT3U(l->l_blkid, ==, blkid);
+               ASSERT3P(l->l_dbuf, ==, db);
+       }
+
+       rw_enter(&l->l_rwlock, RW_WRITER);
        dmu_buf_will_dirty(l->l_dbuf, tx);
 
        zap_leaf_init(l, zap->zap_normflags != 0);