]>
Commit | Line | Data |
---|---|---|
36fd6e86 DW |
1 | /* |
2 | * Copyright (C) 2017 Oracle. All Rights Reserved. | |
3 | * | |
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version 2 | |
9 | * of the License, or (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it would be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write the Free Software Foundation, | |
18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | */ | |
20 | #include "xfs.h" | |
21 | #include "xfs_fs.h" | |
22 | #include "xfs_shared.h" | |
23 | #include "xfs_format.h" | |
24 | #include "xfs_trans_resv.h" | |
25 | #include "xfs_mount.h" | |
26 | #include "xfs_defer.h" | |
27 | #include "xfs_btree.h" | |
28 | #include "xfs_bit.h" | |
29 | #include "xfs_log_format.h" | |
30 | #include "xfs_trans.h" | |
31 | #include "xfs_sb.h" | |
32 | #include "xfs_inode.h" | |
33 | #include "xfs_alloc.h" | |
34 | #include "xfs_alloc_btree.h" | |
35 | #include "xfs_bmap.h" | |
36 | #include "xfs_bmap_btree.h" | |
37 | #include "xfs_ialloc.h" | |
38 | #include "xfs_ialloc_btree.h" | |
39 | #include "xfs_refcount.h" | |
40 | #include "xfs_refcount_btree.h" | |
41 | #include "xfs_rmap.h" | |
42 | #include "xfs_rmap_btree.h" | |
43 | #include "scrub/xfs_scrub.h" | |
44 | #include "scrub/scrub.h" | |
45 | #include "scrub/trace.h" | |
46 | ||
a5637186 DW |
47 | /* |
48 | * Online Scrub and Repair | |
49 | * | |
50 | * Traditionally, XFS (the kernel driver) did not know how to check or | |
51 | * repair on-disk data structures. That task was left to the xfs_check | |
52 | * and xfs_repair tools, both of which require taking the filesystem | |
53 | * offline for a thorough but time consuming examination. Online | |
54 | * scrub & repair, on the other hand, enables us to check the metadata | |
55 | * for obvious errors while carefully stepping around the filesystem's | |
56 | * ongoing operations, locking rules, etc. | |
57 | * | |
58 | * Given that most XFS metadata consist of records stored in a btree, | |
59 | * most of the checking functions iterate the btree blocks themselves | |
60 | * looking for irregularities. When a record block is encountered, each | |
61 | * record can be checked for obviously bad values. Record values can | |
62 | * also be cross-referenced against other btrees to look for potential | |
63 | * misunderstandings between pieces of metadata. | |
64 | * | |
65 | * It is expected that the checkers responsible for per-AG metadata | |
66 | * structures will lock the AG headers (AGI, AGF, AGFL), iterate the | |
67 | * metadata structure, and perform any relevant cross-referencing before | |
68 | * unlocking the AG and returning the results to userspace. These | |
69 | * scrubbers must not keep an AG locked for too long to avoid tying up | |
70 | * the block and inode allocators. | |
71 | * | |
72 | * Block maps and b-trees rooted in an inode present a special challenge | |
73 | * because they can involve extents from any AG. The general scrubber | |
74 | * structure of lock -> check -> xref -> unlock still holds, but AG | |
75 | * locking order rules /must/ be obeyed to avoid deadlocks. The | |
76 | * ordering rule, of course, is that we must lock in increasing AG | |
77 | * order. Helper functions are provided to track which AG headers we've | |
78 | * already locked. If we detect an imminent locking order violation, we | |
79 | * can signal a potential deadlock, in which case the scrubber can jump | |
80 | * out to the top level, lock all the AGs in order, and retry the scrub. | |
81 | * | |
82 | * For file data (directories, extended attributes, symlinks) scrub, we | |
83 | * can simply lock the inode and walk the data. For btree data | |
84 | * (directories and attributes) we follow the same btree-scrubbing | |
85 | * strategy outlined previously to check the records. | |
86 | * | |
87 | * We use a bit of trickery with transactions to avoid buffer deadlocks | |
88 | * if there is a cycle in the metadata. The basic problem is that | |
89 | * travelling down a btree involves locking the current buffer at each | |
90 | * tree level. If a pointer should somehow point back to a buffer that | |
91 | * we've already examined, we will deadlock due to the second buffer | |
92 | * locking attempt. Note however that grabbing a buffer in transaction | |
93 | * context links the locked buffer to the transaction. If we try to | |
94 | * re-grab the buffer in the context of the same transaction, we avoid | |
95 | * the second lock attempt and continue. Between the verifier and the | |
96 | * scrubber, something will notice that something is amiss and report | |
97 | * the corruption. Therefore, each scrubber will allocate an empty | |
98 | * transaction, attach buffers to it, and cancel the transaction at the | |
99 | * end of the scrub run. Cancelling a non-dirty transaction simply | |
100 | * unlocks the buffers. | |
101 | * | |
102 | * There are four pieces of data that scrub can communicate to | |
103 | * userspace. The first is the error code (errno), which can be used to | |
104 | * communicate operational errors in performing the scrub. There are | |
105 | * also three flags that can be set in the scrub context. If the data | |
106 | * structure itself is corrupt, the CORRUPT flag will be set. If | |
107 | * the metadata is correct but otherwise suboptimal, the PREEN flag | |
108 | * will be set. | |
109 | */ | |
110 | ||
111 | /* Scrub setup and teardown */ | |
112 | ||
113 | /* Free all the resources and finish the transactions. */ | |
114 | STATIC int | |
115 | xfs_scrub_teardown( | |
116 | struct xfs_scrub_context *sc, | |
117 | int error) | |
118 | { | |
119 | if (sc->tp) { | |
120 | xfs_trans_cancel(sc->tp); | |
121 | sc->tp = NULL; | |
122 | } | |
123 | return error; | |
124 | } | |
125 | ||
126 | /* Scrubbing dispatch. */ | |
127 | ||
128 | static const struct xfs_scrub_meta_ops meta_scrub_ops[] = { | |
129 | }; | |
130 | ||
131 | /* This isn't a stable feature, warn once per day. */ | |
132 | static inline void | |
133 | xfs_scrub_experimental_warning( | |
134 | struct xfs_mount *mp) | |
135 | { | |
136 | static struct ratelimit_state scrub_warning = RATELIMIT_STATE_INIT( | |
137 | "xfs_scrub_warning", 86400 * HZ, 1); | |
138 | ratelimit_set_flags(&scrub_warning, RATELIMIT_MSG_ON_RELEASE); | |
139 | ||
140 | if (__ratelimit(&scrub_warning)) | |
141 | xfs_alert(mp, | |
142 | "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); | |
143 | } | |
144 | ||
36fd6e86 DW |
145 | /* Dispatch metadata scrubbing. */ |
146 | int | |
147 | xfs_scrub_metadata( | |
148 | struct xfs_inode *ip, | |
149 | struct xfs_scrub_metadata *sm) | |
150 | { | |
a5637186 DW |
151 | struct xfs_scrub_context sc; |
152 | struct xfs_mount *mp = ip->i_mount; | |
153 | const struct xfs_scrub_meta_ops *ops; | |
154 | bool try_harder = false; | |
155 | int error = 0; | |
156 | ||
157 | trace_xfs_scrub_start(ip, sm, error); | |
158 | ||
159 | /* Forbidden if we are shut down or mounted norecovery. */ | |
160 | error = -ESHUTDOWN; | |
161 | if (XFS_FORCED_SHUTDOWN(mp)) | |
162 | goto out; | |
163 | error = -ENOTRECOVERABLE; | |
164 | if (mp->m_flags & XFS_MOUNT_NORECOVERY) | |
165 | goto out; | |
166 | ||
167 | /* Check our inputs. */ | |
168 | error = -EINVAL; | |
169 | sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; | |
170 | if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) | |
171 | goto out; | |
172 | if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) | |
173 | goto out; | |
174 | ||
175 | /* Do we know about this type of metadata? */ | |
176 | error = -ENOENT; | |
177 | if (sm->sm_type >= XFS_SCRUB_TYPE_NR) | |
178 | goto out; | |
179 | ops = &meta_scrub_ops[sm->sm_type]; | |
180 | if (ops->scrub == NULL) | |
181 | goto out; | |
182 | ||
183 | /* | |
184 | * We won't scrub any filesystem that doesn't have the ability | |
185 | * to record unwritten extents. The option was made default in | |
186 | * 2003, removed from mkfs in 2007, and cannot be disabled in | |
187 | * v5, so if we find a filesystem without this flag it's either | |
188 | * really old or totally unsupported. Avoid it either way. | |
189 | * We also don't support v1-v3 filesystems, which aren't | |
190 | * mountable. | |
191 | */ | |
192 | error = -EOPNOTSUPP; | |
193 | if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) | |
194 | goto out; | |
195 | ||
196 | /* Does this fs even support this type of metadata? */ | |
197 | error = -ENOENT; | |
198 | if (ops->has && !ops->has(&mp->m_sb)) | |
199 | goto out; | |
200 | ||
201 | /* We don't know how to repair anything yet. */ | |
202 | error = -EOPNOTSUPP; | |
203 | if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) | |
204 | goto out; | |
205 | ||
206 | xfs_scrub_experimental_warning(mp); | |
207 | ||
208 | retry_op: | |
209 | /* Set up for the operation. */ | |
210 | memset(&sc, 0, sizeof(sc)); | |
211 | sc.mp = ip->i_mount; | |
212 | sc.sm = sm; | |
213 | sc.ops = ops; | |
214 | sc.try_harder = try_harder; | |
215 | error = sc.ops->setup(&sc, ip); | |
216 | if (error) | |
217 | goto out_teardown; | |
218 | ||
219 | /* Scrub for errors. */ | |
220 | error = sc.ops->scrub(&sc); | |
221 | if (!try_harder && error == -EDEADLOCK) { | |
222 | /* | |
223 | * Scrubbers return -EDEADLOCK to mean 'try harder'. | |
224 | * Tear down everything we hold, then set up again with | |
225 | * preparation for worst-case scenarios. | |
226 | */ | |
227 | error = xfs_scrub_teardown(&sc, 0); | |
228 | if (error) | |
229 | goto out; | |
230 | try_harder = true; | |
231 | goto retry_op; | |
232 | } else if (error) | |
233 | goto out_teardown; | |
234 | ||
235 | if (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | | |
236 | XFS_SCRUB_OFLAG_XCORRUPT)) | |
237 | xfs_alert_ratelimited(mp, "Corruption detected during scrub."); | |
238 | ||
239 | out_teardown: | |
240 | error = xfs_scrub_teardown(&sc, error); | |
241 | out: | |
242 | trace_xfs_scrub_done(ip, sm, error); | |
243 | if (error == -EFSCORRUPTED || error == -EFSBADCRC) { | |
244 | sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; | |
245 | error = 0; | |
246 | } | |
247 | return error; | |
36fd6e86 | 248 | } |