]>
Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* |
7b718769 NS |
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | |
1da177e4 | 4 | * |
7b718769 NS |
5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | |
1da177e4 LT |
7 | * published by the Free Software Foundation. |
8 | * | |
7b718769 NS |
9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
1da177e4 | 13 | * |
7b718769 NS |
14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
1da177e4 | 17 | */ |
1da177e4 | 18 | #include "xfs.h" |
a844f451 | 19 | #include "xfs_fs.h" |
1da177e4 | 20 | #include "xfs_types.h" |
a844f451 | 21 | #include "xfs_bit.h" |
1da177e4 | 22 | #include "xfs_log.h" |
a844f451 | 23 | #include "xfs_inum.h" |
1da177e4 LT |
24 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | |
a844f451 | 26 | #include "xfs_ag.h" |
1da177e4 LT |
27 | #include "xfs_dir2.h" |
28 | #include "xfs_dmapi.h" | |
29 | #include "xfs_mount.h" | |
a844f451 | 30 | #include "xfs_da_btree.h" |
1da177e4 LT |
31 | #include "xfs_bmap_btree.h" |
32 | #include "xfs_ialloc_btree.h" | |
33 | #include "xfs_alloc_btree.h" | |
1da177e4 | 34 | #include "xfs_dir2_sf.h" |
a844f451 | 35 | #include "xfs_attr_sf.h" |
1da177e4 | 36 | #include "xfs_dinode.h" |
1da177e4 | 37 | #include "xfs_inode.h" |
a844f451 NS |
38 | #include "xfs_inode_item.h" |
39 | #include "xfs_btree.h" | |
40 | #include "xfs_alloc.h" | |
41 | #include "xfs_ialloc.h" | |
42 | #include "xfs_quota.h" | |
1da177e4 LT |
43 | #include "xfs_error.h" |
44 | #include "xfs_bmap.h" | |
1da177e4 | 45 | #include "xfs_rw.h" |
1da177e4 | 46 | #include "xfs_buf_item.h" |
a844f451 | 47 | #include "xfs_log_priv.h" |
1da177e4 | 48 | #include "xfs_dir2_trace.h" |
a844f451 | 49 | #include "xfs_extfree_item.h" |
1da177e4 LT |
50 | #include "xfs_acl.h" |
51 | #include "xfs_attr.h" | |
52 | #include "xfs_clnt.h" | |
2a82b8be DC |
53 | #include "xfs_mru_cache.h" |
54 | #include "xfs_filestream.h" | |
e13a73f0 | 55 | #include "xfs_fsops.h" |
739bfb2a | 56 | #include "xfs_vnodeops.h" |
48c872a9 | 57 | #include "xfs_vfsops.h" |
43355099 | 58 | #include "xfs_utils.h" |
739bfb2a | 59 | |
1da177e4 | 60 | |
3c85c36c | 61 | STATIC void |
f898d6c0 CH |
62 | xfs_quiesce_fs( |
63 | xfs_mount_t *mp) | |
64 | { | |
65 | int count = 0, pincount; | |
3758dee9 | 66 | |
f898d6c0 CH |
67 | xfs_flush_buftarg(mp->m_ddev_targp, 0); |
68 | xfs_finish_reclaim_all(mp, 0); | |
69 | ||
70 | /* This loop must run at least twice. | |
71 | * The first instance of the loop will flush | |
72 | * most meta data but that will generate more | |
73 | * meta data (typically directory updates). | |
74 | * Which then must be flushed and logged before | |
75 | * we can write the unmount record. | |
3758dee9 | 76 | */ |
f898d6c0 | 77 | do { |
516b2e7c | 78 | xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL); |
f898d6c0 CH |
79 | pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); |
80 | if (!pincount) { | |
81 | delay(50); | |
82 | count++; | |
83 | } | |
84 | } while (count < 2); | |
f898d6c0 | 85 | } |
1da177e4 | 86 | |
516b2e7c DC |
87 | /* |
88 | * Second stage of a quiesce. The data is already synced, now we have to take | |
89 | * care of the metadata. New transactions are already blocked, so we need to | |
90 | * wait for any remaining transactions to drain out before proceding. | |
91 | */ | |
9909c4aa | 92 | void |
516b2e7c DC |
93 | xfs_attr_quiesce( |
94 | xfs_mount_t *mp) | |
95 | { | |
e5720eec DC |
96 | int error = 0; |
97 | ||
516b2e7c DC |
98 | /* wait for all modifications to complete */ |
99 | while (atomic_read(&mp->m_active_trans) > 0) | |
100 | delay(100); | |
101 | ||
102 | /* flush inodes and push all remaining buffers out to disk */ | |
103 | xfs_quiesce_fs(mp); | |
104 | ||
105 | ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); | |
106 | ||
107 | /* Push the superblock and write an unmount record */ | |
e5720eec DC |
108 | error = xfs_log_sbcount(mp, 1); |
109 | if (error) | |
110 | xfs_fs_cmn_err(CE_WARN, mp, | |
111 | "xfs_attr_quiesce: failed to log sb changes. " | |
112 | "Frozen image may not be consistent."); | |
516b2e7c DC |
113 | xfs_log_unmount_write(mp); |
114 | xfs_unmountfs_writesb(mp); | |
115 | } | |
116 | ||
1da177e4 LT |
117 | /* |
118 | * xfs_unmount_flush implements a set of flush operation on special | |
119 | * inodes, which are needed as a separate set of operations so that | |
120 | * they can be called as part of relocation process. | |
121 | */ | |
122 | int | |
123 | xfs_unmount_flush( | |
124 | xfs_mount_t *mp, /* Mount structure we are getting | |
125 | rid of. */ | |
126 | int relocation) /* Called from vfs relocation. */ | |
127 | { | |
128 | xfs_inode_t *rip = mp->m_rootip; | |
129 | xfs_inode_t *rbmip; | |
130 | xfs_inode_t *rsumip = NULL; | |
67fcaa73 | 131 | bhv_vnode_t *rvp = XFS_ITOV(rip); |
1da177e4 LT |
132 | int error; |
133 | ||
f7c66ce3 | 134 | xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); |
1da177e4 LT |
135 | xfs_iflock(rip); |
136 | ||
137 | /* | |
138 | * Flush out the real time inodes. | |
139 | */ | |
140 | if ((rbmip = mp->m_rbmip) != NULL) { | |
141 | xfs_ilock(rbmip, XFS_ILOCK_EXCL); | |
142 | xfs_iflock(rbmip); | |
143 | error = xfs_iflush(rbmip, XFS_IFLUSH_SYNC); | |
144 | xfs_iunlock(rbmip, XFS_ILOCK_EXCL); | |
145 | ||
146 | if (error == EFSCORRUPTED) | |
147 | goto fscorrupt_out; | |
148 | ||
149 | ASSERT(vn_count(XFS_ITOV(rbmip)) == 1); | |
150 | ||
151 | rsumip = mp->m_rsumip; | |
152 | xfs_ilock(rsumip, XFS_ILOCK_EXCL); | |
153 | xfs_iflock(rsumip); | |
154 | error = xfs_iflush(rsumip, XFS_IFLUSH_SYNC); | |
155 | xfs_iunlock(rsumip, XFS_ILOCK_EXCL); | |
156 | ||
157 | if (error == EFSCORRUPTED) | |
158 | goto fscorrupt_out; | |
159 | ||
160 | ASSERT(vn_count(XFS_ITOV(rsumip)) == 1); | |
161 | } | |
162 | ||
163 | /* | |
164 | * Synchronously flush root inode to disk | |
165 | */ | |
166 | error = xfs_iflush(rip, XFS_IFLUSH_SYNC); | |
167 | if (error == EFSCORRUPTED) | |
168 | goto fscorrupt_out2; | |
169 | ||
170 | if (vn_count(rvp) != 1 && !relocation) { | |
171 | xfs_iunlock(rip, XFS_ILOCK_EXCL); | |
172 | return XFS_ERROR(EBUSY); | |
173 | } | |
174 | ||
175 | /* | |
176 | * Release dquot that rootinode, rbmino and rsumino might be holding, | |
177 | * flush and purge the quota inodes. | |
178 | */ | |
179 | error = XFS_QM_UNMOUNT(mp); | |
180 | if (error == EFSCORRUPTED) | |
181 | goto fscorrupt_out2; | |
182 | ||
183 | if (rbmip) { | |
43355099 CH |
184 | IRELE(rbmip); |
185 | IRELE(rsumip); | |
1da177e4 LT |
186 | } |
187 | ||
188 | xfs_iunlock(rip, XFS_ILOCK_EXCL); | |
189 | return 0; | |
190 | ||
191 | fscorrupt_out: | |
192 | xfs_ifunlock(rip); | |
193 | ||
194 | fscorrupt_out2: | |
195 | xfs_iunlock(rip, XFS_ILOCK_EXCL); | |
196 | ||
197 | return XFS_ERROR(EFSCORRUPTED); | |
198 | } | |
199 | ||
1da177e4 LT |
200 | /* |
201 | * xfs_sync flushes any pending I/O to file system vfsp. | |
202 | * | |
203 | * This routine is called by vfs_sync() to make sure that things make it | |
204 | * out to disk eventually, on sync() system calls to flush out everything, | |
205 | * and when the file system is unmounted. For the vfs_sync() case, all | |
206 | * we really need to do is sync out the log to make all of our meta-data | |
207 | * updates permanent (except for timestamps). For calls from pflushd(), | |
208 | * dirty pages are kept moving by calling pdflush() on the inodes | |
209 | * containing them. We also flush the inodes that we can lock without | |
210 | * sleeping and the superblock if we can lock it without sleeping from | |
211 | * vfs_sync() so that items at the tail of the log are always moving out. | |
212 | * | |
213 | * Flags: | |
214 | * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want | |
215 | * to sleep if we can help it. All we really need | |
216 | * to do is ensure that the log is synced at least | |
217 | * periodically. We also push the inodes and | |
218 | * superblock if we can lock them without sleeping | |
219 | * and they are not pinned. | |
220 | * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not | |
221 | * set, then we really want to lock each inode and flush | |
222 | * it. | |
223 | * SYNC_WAIT - All the flushes that take place in this call should | |
224 | * be synchronous. | |
225 | * SYNC_DELWRI - This tells us to push dirty pages associated with | |
226 | * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to | |
227 | * determine if they should be flushed sync, async, or | |
228 | * delwri. | |
229 | * SYNC_CLOSE - This flag is passed when the system is being | |
c41564b5 | 230 | * unmounted. We should sync and invalidate everything. |
1da177e4 LT |
231 | * SYNC_FSDATA - This indicates that the caller would like to make |
232 | * sure the superblock is safe on disk. We can ensure | |
c41564b5 | 233 | * this by simply making sure the log gets flushed |
1da177e4 LT |
234 | * if SYNC_BDFLUSH is set, and by actually writing it |
235 | * out otherwise. | |
3c0dc77b DC |
236 | * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete |
237 | * before we return (including direct I/O). Forms the drain | |
238 | * side of the write barrier needed to safely quiesce the | |
239 | * filesystem. | |
1da177e4 LT |
240 | * |
241 | */ | |
48c872a9 | 242 | int |
1da177e4 | 243 | xfs_sync( |
48c872a9 CH |
244 | xfs_mount_t *mp, |
245 | int flags) | |
1da177e4 | 246 | { |
b09cc771 CH |
247 | int error; |
248 | ||
249 | /* | |
250 | * Get the Quota Manager to flush the dquots. | |
251 | * | |
252 | * If XFS quota support is not enabled or this filesystem | |
253 | * instance does not use quotas XFS_QM_DQSYNC will always | |
254 | * return zero. | |
255 | */ | |
256 | error = XFS_QM_DQSYNC(mp, flags); | |
257 | if (error) { | |
258 | /* | |
259 | * If we got an IO error, we will be shutting down. | |
260 | * So, there's nothing more for us to do here. | |
261 | */ | |
262 | ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp)); | |
263 | if (XFS_FORCED_SHUTDOWN(mp)) | |
264 | return XFS_ERROR(error); | |
265 | } | |
1da177e4 | 266 | |
2a82b8be DC |
267 | if (flags & SYNC_IOWAIT) |
268 | xfs_filestream_flush(mp); | |
269 | ||
2823945f | 270 | return xfs_syncsub(mp, flags, NULL); |
1da177e4 LT |
271 | } |
272 | ||
273 | /* | |
274 | * xfs sync routine for internal use | |
275 | * | |
b83bd138 | 276 | * This routine supports all of the flags defined for the generic vfs_sync |
1f9b3b64 | 277 | * interface as explained above under xfs_sync. |
1da177e4 LT |
278 | * |
279 | */ | |
ee34807a | 280 | int |
1da177e4 LT |
281 | xfs_sync_inodes( |
282 | xfs_mount_t *mp, | |
283 | int flags, | |
1da177e4 LT |
284 | int *bypassed) |
285 | { | |
286 | xfs_inode_t *ip = NULL; | |
67fcaa73 | 287 | bhv_vnode_t *vp = NULL; |
1da177e4 LT |
288 | int error; |
289 | int last_error; | |
290 | uint64_t fflag; | |
291 | uint lock_flags; | |
292 | uint base_lock_flags; | |
293 | boolean_t mount_locked; | |
294 | boolean_t vnode_refed; | |
295 | int preempt; | |
1da177e4 LT |
296 | xfs_iptr_t *ipointer; |
297 | #ifdef DEBUG | |
298 | boolean_t ipointer_in = B_FALSE; | |
299 | ||
300 | #define IPOINTER_SET ipointer_in = B_TRUE | |
301 | #define IPOINTER_CLR ipointer_in = B_FALSE | |
302 | #else | |
303 | #define IPOINTER_SET | |
304 | #define IPOINTER_CLR | |
305 | #endif | |
306 | ||
307 | ||
308 | /* Insert a marker record into the inode list after inode ip. The list | |
309 | * must be locked when this is called. After the call the list will no | |
310 | * longer be locked. | |
311 | */ | |
312 | #define IPOINTER_INSERT(ip, mp) { \ | |
313 | ASSERT(ipointer_in == B_FALSE); \ | |
314 | ipointer->ip_mnext = ip->i_mnext; \ | |
315 | ipointer->ip_mprev = ip; \ | |
316 | ip->i_mnext = (xfs_inode_t *)ipointer; \ | |
317 | ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \ | |
318 | preempt = 0; \ | |
319 | XFS_MOUNT_IUNLOCK(mp); \ | |
320 | mount_locked = B_FALSE; \ | |
321 | IPOINTER_SET; \ | |
322 | } | |
323 | ||
324 | /* Remove the marker from the inode list. If the marker was the only item | |
325 | * in the list then there are no remaining inodes and we should zero out | |
326 | * the whole list. If we are the current head of the list then move the head | |
327 | * past us. | |
328 | */ | |
329 | #define IPOINTER_REMOVE(ip, mp) { \ | |
330 | ASSERT(ipointer_in == B_TRUE); \ | |
331 | if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \ | |
332 | ip = ipointer->ip_mnext; \ | |
333 | ip->i_mprev = ipointer->ip_mprev; \ | |
334 | ipointer->ip_mprev->i_mnext = ip; \ | |
335 | if (mp->m_inodes == (xfs_inode_t *)ipointer) { \ | |
336 | mp->m_inodes = ip; \ | |
337 | } \ | |
338 | } else { \ | |
339 | ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \ | |
340 | mp->m_inodes = NULL; \ | |
341 | ip = NULL; \ | |
342 | } \ | |
343 | IPOINTER_CLR; \ | |
344 | } | |
345 | ||
346 | #define XFS_PREEMPT_MASK 0x7f | |
347 | ||
44866d39 LM |
348 | ASSERT(!(flags & SYNC_BDFLUSH)); |
349 | ||
1da177e4 LT |
350 | if (bypassed) |
351 | *bypassed = 0; | |
bd186aa9 | 352 | if (mp->m_flags & XFS_MOUNT_RDONLY) |
1da177e4 LT |
353 | return 0; |
354 | error = 0; | |
355 | last_error = 0; | |
356 | preempt = 0; | |
357 | ||
358 | /* Allocate a reference marker */ | |
359 | ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); | |
360 | ||
361 | fflag = XFS_B_ASYNC; /* default is don't wait */ | |
44866d39 | 362 | if (flags & SYNC_DELWRI) |
1da177e4 LT |
363 | fflag = XFS_B_DELWRI; |
364 | if (flags & SYNC_WAIT) | |
365 | fflag = 0; /* synchronous overrides all */ | |
366 | ||
367 | base_lock_flags = XFS_ILOCK_SHARED; | |
368 | if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { | |
369 | /* | |
370 | * We need the I/O lock if we're going to call any of | |
371 | * the flush/inval routines. | |
372 | */ | |
373 | base_lock_flags |= XFS_IOLOCK_SHARED; | |
374 | } | |
375 | ||
376 | XFS_MOUNT_ILOCK(mp); | |
377 | ||
378 | ip = mp->m_inodes; | |
379 | ||
380 | mount_locked = B_TRUE; | |
381 | vnode_refed = B_FALSE; | |
382 | ||
383 | IPOINTER_CLR; | |
384 | ||
385 | do { | |
386 | ASSERT(ipointer_in == B_FALSE); | |
387 | ASSERT(vnode_refed == B_FALSE); | |
388 | ||
389 | lock_flags = base_lock_flags; | |
390 | ||
391 | /* | |
392 | * There were no inodes in the list, just break out | |
393 | * of the loop. | |
394 | */ | |
395 | if (ip == NULL) { | |
396 | break; | |
397 | } | |
398 | ||
399 | /* | |
400 | * We found another sync thread marker - skip it | |
401 | */ | |
402 | if (ip->i_mount == NULL) { | |
403 | ip = ip->i_mnext; | |
404 | continue; | |
405 | } | |
406 | ||
407 | vp = XFS_ITOV_NULL(ip); | |
408 | ||
409 | /* | |
410 | * If the vnode is gone then this is being torn down, | |
411 | * call reclaim if it is flushed, else let regular flush | |
412 | * code deal with it later in the loop. | |
413 | */ | |
414 | ||
415 | if (vp == NULL) { | |
416 | /* Skip ones already in reclaim */ | |
417 | if (ip->i_flags & XFS_IRECLAIM) { | |
418 | ip = ip->i_mnext; | |
419 | continue; | |
420 | } | |
421 | if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { | |
422 | ip = ip->i_mnext; | |
423 | } else if ((xfs_ipincount(ip) == 0) && | |
424 | xfs_iflock_nowait(ip)) { | |
425 | IPOINTER_INSERT(ip, mp); | |
426 | ||
427 | xfs_finish_reclaim(ip, 1, | |
428 | XFS_IFLUSH_DELWRI_ELSE_ASYNC); | |
429 | ||
430 | XFS_MOUNT_ILOCK(mp); | |
431 | mount_locked = B_TRUE; | |
432 | IPOINTER_REMOVE(ip, mp); | |
433 | } else { | |
434 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | |
435 | ip = ip->i_mnext; | |
436 | } | |
437 | continue; | |
438 | } | |
439 | ||
440 | if (VN_BAD(vp)) { | |
441 | ip = ip->i_mnext; | |
442 | continue; | |
443 | } | |
444 | ||
445 | if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { | |
446 | XFS_MOUNT_IUNLOCK(mp); | |
f0e2d93c | 447 | kmem_free(ipointer); |
1da177e4 LT |
448 | return 0; |
449 | } | |
450 | ||
1da177e4 LT |
451 | /* |
452 | * Try to lock without sleeping. We're out of order with | |
453 | * the inode list lock here, so if we fail we need to drop | |
454 | * the mount lock and try again. If we're called from | |
455 | * bdflush() here, then don't bother. | |
456 | * | |
457 | * The inode lock here actually coordinates with the | |
458 | * almost spurious inode lock in xfs_ireclaim() to prevent | |
459 | * the vnode we handle here without a reference from | |
460 | * being freed while we reference it. If we lock the inode | |
461 | * while it's on the mount list here, then the spurious inode | |
462 | * lock in xfs_ireclaim() after the inode is pulled from | |
463 | * the mount list will sleep until we release it here. | |
464 | * This keeps the vnode from being freed while we reference | |
cdb62687 | 465 | * it. |
1da177e4 LT |
466 | */ |
467 | if (xfs_ilock_nowait(ip, lock_flags) == 0) { | |
44866d39 | 468 | if (vp == NULL) { |
1da177e4 LT |
469 | ip = ip->i_mnext; |
470 | continue; | |
471 | } | |
472 | ||
cdb62687 | 473 | vp = vn_grab(vp); |
1da177e4 | 474 | if (vp == NULL) { |
cdb62687 | 475 | ip = ip->i_mnext; |
1da177e4 LT |
476 | continue; |
477 | } | |
478 | ||
cdb62687 | 479 | IPOINTER_INSERT(ip, mp); |
1da177e4 LT |
480 | xfs_ilock(ip, lock_flags); |
481 | ||
482 | ASSERT(vp == XFS_ITOV(ip)); | |
483 | ASSERT(ip->i_mount == mp); | |
484 | ||
485 | vnode_refed = B_TRUE; | |
486 | } | |
487 | ||
488 | /* From here on in the loop we may have a marker record | |
489 | * in the inode list. | |
490 | */ | |
491 | ||
40095b64 DC |
492 | /* |
493 | * If we have to flush data or wait for I/O completion | |
494 | * we need to drop the ilock that we currently hold. | |
495 | * If we need to drop the lock, insert a marker if we | |
496 | * have not already done so. | |
497 | */ | |
498 | if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) || | |
499 | ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) { | |
500 | if (mount_locked) { | |
501 | IPOINTER_INSERT(ip, mp); | |
1da177e4 | 502 | } |
40095b64 | 503 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
1da177e4 | 504 | |
40095b64 DC |
505 | if (flags & SYNC_CLOSE) { |
506 | /* Shutdown case. Flush and invalidate. */ | |
507 | if (XFS_FORCED_SHUTDOWN(mp)) | |
739bfb2a CH |
508 | xfs_tosspages(ip, 0, -1, |
509 | FI_REMAPF); | |
40095b64 | 510 | else |
739bfb2a CH |
511 | error = xfs_flushinval_pages(ip, |
512 | 0, -1, FI_REMAPF); | |
40095b64 | 513 | } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { |
739bfb2a | 514 | error = xfs_flush_pages(ip, 0, |
67fcaa73 | 515 | -1, fflag, FI_NONE); |
1da177e4 LT |
516 | } |
517 | ||
40095b64 DC |
518 | /* |
519 | * When freezing, we need to wait ensure all I/O (including direct | |
520 | * I/O) is complete to ensure no further data modification can take | |
521 | * place after this point | |
522 | */ | |
523 | if (flags & SYNC_IOWAIT) | |
b677c210 | 524 | vn_iowait(ip); |
40095b64 DC |
525 | |
526 | xfs_ilock(ip, XFS_ILOCK_SHARED); | |
1da177e4 LT |
527 | } |
528 | ||
44866d39 LM |
529 | if ((flags & SYNC_ATTR) && |
530 | (ip->i_update_core || | |
531 | (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) { | |
532 | if (mount_locked) | |
533 | IPOINTER_INSERT(ip, mp); | |
1da177e4 | 534 | |
44866d39 LM |
535 | if (flags & SYNC_WAIT) { |
536 | xfs_iflock(ip); | |
537 | error = xfs_iflush(ip, XFS_IFLUSH_SYNC); | |
1da177e4 | 538 | |
44866d39 LM |
539 | /* |
540 | * If we can't acquire the flush lock, then the inode | |
541 | * is already being flushed so don't bother waiting. | |
542 | * | |
543 | * If we can lock it then do a delwri flush so we can | |
544 | * combine multiple inode flushes in each disk write. | |
545 | */ | |
546 | } else if (xfs_iflock_nowait(ip)) { | |
547 | error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); | |
548 | } else if (bypassed) { | |
549 | (*bypassed)++; | |
1da177e4 LT |
550 | } |
551 | } | |
552 | ||
553 | if (lock_flags != 0) { | |
554 | xfs_iunlock(ip, lock_flags); | |
555 | } | |
556 | ||
557 | if (vnode_refed) { | |
558 | /* | |
559 | * If we had to take a reference on the vnode | |
560 | * above, then wait until after we've unlocked | |
561 | * the inode to release the reference. This is | |
562 | * because we can be already holding the inode | |
43355099 | 563 | * lock when IRELE() calls xfs_inactive(). |
1da177e4 LT |
564 | * |
565 | * Make sure to drop the mount lock before calling | |
43355099 | 566 | * IRELE() so that we don't trip over ourselves if |
1da177e4 LT |
567 | * we have to go for the mount lock again in the |
568 | * inactive code. | |
569 | */ | |
570 | if (mount_locked) { | |
571 | IPOINTER_INSERT(ip, mp); | |
572 | } | |
573 | ||
43355099 | 574 | IRELE(ip); |
1da177e4 LT |
575 | |
576 | vnode_refed = B_FALSE; | |
577 | } | |
578 | ||
579 | if (error) { | |
580 | last_error = error; | |
581 | } | |
582 | ||
583 | /* | |
584 | * bail out if the filesystem is corrupted. | |
585 | */ | |
586 | if (error == EFSCORRUPTED) { | |
587 | if (!mount_locked) { | |
588 | XFS_MOUNT_ILOCK(mp); | |
589 | IPOINTER_REMOVE(ip, mp); | |
590 | } | |
591 | XFS_MOUNT_IUNLOCK(mp); | |
592 | ASSERT(ipointer_in == B_FALSE); | |
f0e2d93c | 593 | kmem_free(ipointer); |
1da177e4 LT |
594 | return XFS_ERROR(error); |
595 | } | |
596 | ||
597 | /* Let other threads have a chance at the mount lock | |
598 | * if we have looped many times without dropping the | |
599 | * lock. | |
600 | */ | |
601 | if ((++preempt & XFS_PREEMPT_MASK) == 0) { | |
602 | if (mount_locked) { | |
603 | IPOINTER_INSERT(ip, mp); | |
604 | } | |
605 | } | |
606 | ||
607 | if (mount_locked == B_FALSE) { | |
608 | XFS_MOUNT_ILOCK(mp); | |
609 | mount_locked = B_TRUE; | |
610 | IPOINTER_REMOVE(ip, mp); | |
611 | continue; | |
612 | } | |
613 | ||
614 | ASSERT(ipointer_in == B_FALSE); | |
615 | ip = ip->i_mnext; | |
616 | ||
617 | } while (ip != mp->m_inodes); | |
618 | ||
619 | XFS_MOUNT_IUNLOCK(mp); | |
620 | ||
621 | ASSERT(ipointer_in == B_FALSE); | |
622 | ||
f0e2d93c | 623 | kmem_free(ipointer); |
1da177e4 LT |
624 | return XFS_ERROR(last_error); |
625 | } | |
626 | ||
627 | /* | |
628 | * xfs sync routine for internal use | |
629 | * | |
b83bd138 | 630 | * This routine supports all of the flags defined for the generic vfs_sync |
1f9b3b64 | 631 | * interface as explained above under xfs_sync. |
1da177e4 LT |
632 | * |
633 | */ | |
634 | int | |
635 | xfs_syncsub( | |
636 | xfs_mount_t *mp, | |
637 | int flags, | |
1da177e4 LT |
638 | int *bypassed) |
639 | { | |
640 | int error = 0; | |
641 | int last_error = 0; | |
642 | uint log_flags = XFS_LOG_FORCE; | |
643 | xfs_buf_t *bp; | |
644 | xfs_buf_log_item_t *bip; | |
645 | ||
646 | /* | |
647 | * Sync out the log. This ensures that the log is periodically | |
648 | * flushed even if there is not enough activity to fill it up. | |
649 | */ | |
650 | if (flags & SYNC_WAIT) | |
651 | log_flags |= XFS_LOG_SYNC; | |
652 | ||
653 | xfs_log_force(mp, (xfs_lsn_t)0, log_flags); | |
654 | ||
655 | if (flags & (SYNC_ATTR|SYNC_DELWRI)) { | |
656 | if (flags & SYNC_BDFLUSH) | |
657 | xfs_finish_reclaim_all(mp, 1); | |
658 | else | |
1f9b3b64 | 659 | error = xfs_sync_inodes(mp, flags, bypassed); |
1da177e4 LT |
660 | } |
661 | ||
662 | /* | |
663 | * Flushing out dirty data above probably generated more | |
664 | * log activity, so if this isn't vfs_sync() then flush | |
665 | * the log again. | |
666 | */ | |
667 | if (flags & SYNC_DELWRI) { | |
668 | xfs_log_force(mp, (xfs_lsn_t)0, log_flags); | |
669 | } | |
670 | ||
671 | if (flags & SYNC_FSDATA) { | |
672 | /* | |
673 | * If this is vfs_sync() then only sync the superblock | |
674 | * if we can lock it without sleeping and it is not pinned. | |
675 | */ | |
676 | if (flags & SYNC_BDFLUSH) { | |
677 | bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); | |
678 | if (bp != NULL) { | |
679 | bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); | |
680 | if ((bip != NULL) && | |
681 | xfs_buf_item_dirty(bip)) { | |
682 | if (!(XFS_BUF_ISPINNED(bp))) { | |
683 | XFS_BUF_ASYNC(bp); | |
684 | error = xfs_bwrite(mp, bp); | |
685 | } else { | |
686 | xfs_buf_relse(bp); | |
687 | } | |
688 | } else { | |
689 | xfs_buf_relse(bp); | |
690 | } | |
691 | } | |
692 | } else { | |
693 | bp = xfs_getsb(mp, 0); | |
694 | /* | |
695 | * If the buffer is pinned then push on the log so | |
696 | * we won't get stuck waiting in the write for | |
697 | * someone, maybe ourselves, to flush the log. | |
698 | * Even though we just pushed the log above, we | |
699 | * did not have the superblock buffer locked at | |
700 | * that point so it can become pinned in between | |
701 | * there and here. | |
702 | */ | |
703 | if (XFS_BUF_ISPINNED(bp)) | |
704 | xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); | |
705 | if (flags & SYNC_WAIT) | |
706 | XFS_BUF_UNASYNC(bp); | |
707 | else | |
708 | XFS_BUF_ASYNC(bp); | |
709 | error = xfs_bwrite(mp, bp); | |
710 | } | |
711 | if (error) { | |
712 | last_error = error; | |
713 | } | |
714 | } | |
715 | ||
1da177e4 LT |
716 | /* |
717 | * Now check to see if the log needs a "dummy" transaction. | |
718 | */ | |
1da177e4 LT |
719 | if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { |
720 | xfs_trans_t *tp; | |
721 | xfs_inode_t *ip; | |
722 | ||
723 | /* | |
724 | * Put a dummy transaction in the log to tell | |
725 | * recovery that all others are OK. | |
726 | */ | |
727 | tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); | |
728 | if ((error = xfs_trans_reserve(tp, 0, | |
729 | XFS_ICHANGE_LOG_RES(mp), | |
730 | 0, 0, 0))) { | |
731 | xfs_trans_cancel(tp, 0); | |
732 | return error; | |
733 | } | |
734 | ||
735 | ip = mp->m_rootip; | |
736 | xfs_ilock(ip, XFS_ILOCK_EXCL); | |
737 | ||
738 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | |
739 | xfs_trans_ihold(tp, ip); | |
740 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | |
1c72bf90 | 741 | error = xfs_trans_commit(tp, 0); |
1da177e4 LT |
742 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
743 | xfs_log_force(mp, (xfs_lsn_t)0, log_flags); | |
744 | } | |
745 | ||
746 | /* | |
747 | * When shutting down, we need to insure that the AIL is pushed | |
748 | * to disk or the filesystem can appear corrupt from the PROM. | |
749 | */ | |
750 | if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) { | |
751 | XFS_bflush(mp->m_ddev_targp); | |
752 | if (mp->m_rtdev_targp) { | |
753 | XFS_bflush(mp->m_rtdev_targp); | |
754 | } | |
755 | } | |
756 | ||
757 | return XFS_ERROR(last_error); | |
758 | } |