]>
Commit | Line | Data |
---|---|---|
68be554e TL |
1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
2 | From: Robert Evans <rrevans@gmail.com> | |
3 | Date: Mon, 25 Mar 2024 17:56:49 -0400 | |
4 | Subject: [PATCH] Fix corruption caused by mmap flushing problems | |
5 | ||
6 | 1) Make mmap flushes synchronous. Linux may skip flushing dirty pages | |
7 | already in writeback unless data-integrity sync is requested. | |
8 | ||
9 | 2) Change zfs_putpage to use TXG_WAIT. Otherwise dirty pages may be | |
10 | skipped due to DMU pushing back on TX assign. | |
11 | ||
12 | 3) Add missing mmap flush when doing block cloning. | |
13 | ||
14 | 4) While here, pass errors from putpage to writepage/writepages. | |
15 | ||
16 | This change fixes corruption edge cases, but unfortunately adds | |
17 | synchronous ZIL flushes for dirty mmap pages to llseek and bclone | |
18 | operations. It may be possible to avoid these sync writes later | |
19 | but would need more tricky refactoring of the writeback code. | |
20 | ||
21 | Reviewed-by: Alexander Motin <mav@FreeBSD.org> | |
22 | Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> | |
23 | Signed-off-by: Robert Evans <evansr@google.com> | |
24 | Closes #15933 | |
25 | Closes #16019 | |
26 | --- | |
27 | module/os/linux/zfs/zfs_vnops_os.c | 5 +---- | |
28 | module/os/linux/zfs/zpl_file.c | 8 ++++---- | |
29 | module/zfs/zfs_vnops.c | 6 +++++- | |
30 | 3 files changed, 10 insertions(+), 9 deletions(-) | |
31 | ||
32 | diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c | |
33 | index c06a75662..7c473bc7e 100644 | |
34 | --- a/module/os/linux/zfs/zfs_vnops_os.c | |
35 | +++ b/module/os/linux/zfs/zfs_vnops_os.c | |
36 | @@ -3792,11 +3792,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, | |
37 | dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); | |
38 | zfs_sa_upgrade_txholds(tx, zp); | |
39 | ||
40 | - err = dmu_tx_assign(tx, TXG_NOWAIT); | |
41 | + err = dmu_tx_assign(tx, TXG_WAIT); | |
42 | if (err != 0) { | |
43 | - if (err == ERESTART) | |
44 | - dmu_tx_wait(tx); | |
45 | - | |
46 | dmu_tx_abort(tx); | |
47 | #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO | |
48 | filemap_dirty_folio(page_mapping(pp), page_folio(pp)); | |
49 | diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c | |
50 | index 3caa0fc6c..9dec52215 100644 | |
51 | --- a/module/os/linux/zfs/zpl_file.c | |
52 | +++ b/module/os/linux/zfs/zpl_file.c | |
53 | @@ -720,23 +720,23 @@ zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data) | |
54 | { | |
55 | boolean_t *for_sync = data; | |
56 | fstrans_cookie_t cookie; | |
57 | + int ret; | |
58 | ||
59 | ASSERT(PageLocked(pp)); | |
60 | ASSERT(!PageWriteback(pp)); | |
61 | ||
62 | cookie = spl_fstrans_mark(); | |
63 | - (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync); | |
64 | + ret = zfs_putpage(pp->mapping->host, pp, wbc, *for_sync); | |
65 | spl_fstrans_unmark(cookie); | |
66 | ||
67 | - return (0); | |
68 | + return (ret); | |
69 | } | |
70 | ||
71 | #ifdef HAVE_WRITEPAGE_T_FOLIO | |
72 | static int | |
73 | zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data) | |
74 | { | |
75 | - (void) zpl_putpage(&pp->page, wbc, data); | |
76 | - return (0); | |
77 | + return (zpl_putpage(&pp->page, wbc, data)); | |
78 | } | |
79 | #endif | |
80 | ||
81 | diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c | |
82 | index 2b37834d5..7020f88ec 100644 | |
83 | --- a/module/zfs/zfs_vnops.c | |
84 | +++ b/module/zfs/zfs_vnops.c | |
85 | @@ -130,7 +130,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off) | |
86 | ||
87 | /* Flush any mmap()'d data to disk */ | |
88 | if (zn_has_cached_data(zp, 0, file_sz - 1)) | |
89 | - zn_flush_cached_data(zp, B_FALSE); | |
90 | + zn_flush_cached_data(zp, B_TRUE); | |
91 | ||
92 | lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER); | |
93 | error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff); | |
94 | @@ -1193,6 +1193,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, | |
95 | } | |
96 | } | |
97 | ||
98 | + /* Flush any mmap()'d data to disk */ | |
99 | + if (zn_has_cached_data(inzp, inoff, inoff + len - 1)) | |
100 | + zn_flush_cached_data(inzp, B_TRUE); | |
101 | + | |
102 | /* | |
103 | * Maintain predictable lock order. | |
104 | */ |