]> git.proxmox.com Git - mirror_zfs.git/commitdiff
ZIL: Revert zl_lock scope reduction.
authorAlexander Motin <mav@FreeBSD.org>
Sat, 2 Sep 2023 00:13:52 +0000 (20:13 -0400)
committerGitHub <noreply@github.com>
Sat, 2 Sep 2023 00:13:52 +0000 (17:13 -0700)
While I have no reports of it, I suspect possible use-after-free
scenario when zil_commit_waiter() tries to dereference zcw_lwb
for lwb already freed by zil_sync(), while zcw_done is not set.
Extension of zl_lock scope as it was originally should block
zil_sync() from freeing the lwb, closing this race.

This reverts #14959 and couple chunks of #14841.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored by: iXsystems, Inc.
Closes #15228

module/zfs/zil.c

index be3311b031cc6ca114591da4cf77402b6a7ca12b..297c6b65d4f2af964bc7baabb47b6904a17ebd01 100644 (file)
@@ -1411,15 +1411,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
        zilog_t *zilog = lwb->lwb_zilog;
        zil_commit_waiter_t *zcw;
        itx_t *itx;
-       uint64_t txg;
-       list_t itxs, waiters;
 
        spa_config_exit(zilog->zl_spa, SCL_STATE, lwb);
 
-       list_create(&itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
-       list_create(&waiters, sizeof (zil_commit_waiter_t),
-           offsetof(zil_commit_waiter_t, zcw_node));
-
        hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp;
 
        mutex_enter(&zilog->zl_lock);
@@ -1428,6 +1422,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
 
        lwb->lwb_root_zio = NULL;
 
+       ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
+       lwb->lwb_state = LWB_STATE_FLUSH_DONE;
+
        if (zilog->zl_last_lwb_opened == lwb) {
                /*
                 * Remember the highest committed log sequence number
@@ -1438,22 +1435,13 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
                zilog->zl_commit_lr_seq = zilog->zl_lr_seq;
        }
 
-       list_move_tail(&itxs, &lwb->lwb_itxs);
-       list_move_tail(&waiters, &lwb->lwb_waiters);
-       txg = lwb->lwb_issued_txg;
-
-       ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
-       lwb->lwb_state = LWB_STATE_FLUSH_DONE;
-
-       mutex_exit(&zilog->zl_lock);
-
-       while ((itx = list_remove_head(&itxs)) != NULL)
+       while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
                zil_itx_destroy(itx);
-       list_destroy(&itxs);
 
-       while ((zcw = list_remove_head(&waiters)) != NULL) {
+       while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) {
                mutex_enter(&zcw->zcw_lock);
 
+               ASSERT3P(zcw->zcw_lwb, ==, lwb);
                zcw->zcw_lwb = NULL;
                /*
                 * We expect any ZIO errors from child ZIOs to have been
@@ -1478,7 +1466,11 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
 
                mutex_exit(&zcw->zcw_lock);
        }
-       list_destroy(&waiters);
+
+       uint64_t txg = lwb->lwb_issued_txg;
+
+       /* Once we drop the lock, lwb may be freed by zil_sync(). */
+       mutex_exit(&zilog->zl_lock);
 
        mutex_enter(&zilog->zl_lwb_io_lock);
        ASSERT3U(zilog->zl_lwb_inflight[txg & TXG_MASK], >, 0);