]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - fs/xfs/xfs_aops.c
xfs: update i_size after unwritten conversion in dio completion
[mirror_ubuntu-artful-kernel.git] / fs / xfs / xfs_aops.c
1 /*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 #include "xfs.h"
19 #include "xfs_shared.h"
20 #include "xfs_format.h"
21 #include "xfs_log_format.h"
22 #include "xfs_trans_resv.h"
23 #include "xfs_mount.h"
24 #include "xfs_inode.h"
25 #include "xfs_trans.h"
26 #include "xfs_inode_item.h"
27 #include "xfs_alloc.h"
28 #include "xfs_error.h"
29 #include "xfs_iomap.h"
30 #include "xfs_trace.h"
31 #include "xfs_bmap.h"
32 #include "xfs_bmap_util.h"
33 #include "xfs_bmap_btree.h"
34 #include "xfs_reflink.h"
35 #include <linux/gfp.h>
36 #include <linux/mpage.h>
37 #include <linux/pagevec.h>
38 #include <linux/writeback.h>
39
40 /*
41 * structure owned by writepages passed to individual writepage calls
42 */
43 struct xfs_writepage_ctx {
44 struct xfs_bmbt_irec imap;
45 bool imap_valid;
46 unsigned int io_type;
47 struct xfs_ioend *ioend;
48 sector_t last_block;
49 };
50
51 void
52 xfs_count_page_state(
53 struct page *page,
54 int *delalloc,
55 int *unwritten)
56 {
57 struct buffer_head *bh, *head;
58
59 *delalloc = *unwritten = 0;
60
61 bh = head = page_buffers(page);
62 do {
63 if (buffer_unwritten(bh))
64 (*unwritten) = 1;
65 else if (buffer_delay(bh))
66 (*delalloc) = 1;
67 } while ((bh = bh->b_this_page) != head);
68 }
69
70 struct block_device *
71 xfs_find_bdev_for_inode(
72 struct inode *inode)
73 {
74 struct xfs_inode *ip = XFS_I(inode);
75 struct xfs_mount *mp = ip->i_mount;
76
77 if (XFS_IS_REALTIME_INODE(ip))
78 return mp->m_rtdev_targp->bt_bdev;
79 else
80 return mp->m_ddev_targp->bt_bdev;
81 }
82
83 /*
84 * We're now finished for good with this page. Update the page state via the
85 * associated buffer_heads, paying attention to the start and end offsets that
86 * we need to process on the page.
87 *
88 * Note that we open code the action in end_buffer_async_write here so that we
89 * only have to iterate over the buffers attached to the page once. This is not
90 * only more efficient, but also ensures that we only calls end_page_writeback
91 * at the end of the iteration, and thus avoids the pitfall of having the page
92 * and buffers potentially freed after every call to end_buffer_async_write.
93 */
94 static void
95 xfs_finish_page_writeback(
96 struct inode *inode,
97 struct bio_vec *bvec,
98 int error)
99 {
100 struct buffer_head *head = page_buffers(bvec->bv_page), *bh = head;
101 bool busy = false;
102 unsigned int off = 0;
103 unsigned long flags;
104
105 ASSERT(bvec->bv_offset < PAGE_SIZE);
106 ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
107 ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE);
108 ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
109
110 local_irq_save(flags);
111 bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
112 do {
113 if (off >= bvec->bv_offset &&
114 off < bvec->bv_offset + bvec->bv_len) {
115 ASSERT(buffer_async_write(bh));
116 ASSERT(bh->b_end_io == NULL);
117
118 if (error) {
119 mark_buffer_write_io_error(bh);
120 clear_buffer_uptodate(bh);
121 SetPageError(bvec->bv_page);
122 } else {
123 set_buffer_uptodate(bh);
124 }
125 clear_buffer_async_write(bh);
126 unlock_buffer(bh);
127 } else if (buffer_async_write(bh)) {
128 ASSERT(buffer_locked(bh));
129 busy = true;
130 }
131 off += bh->b_size;
132 } while ((bh = bh->b_this_page) != head);
133 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
134 local_irq_restore(flags);
135
136 if (!busy)
137 end_page_writeback(bvec->bv_page);
138 }
139
140 /*
141 * We're now finished for good with this ioend structure. Update the page
142 * state, release holds on bios, and finally free up memory. Do not use the
143 * ioend after this.
144 */
145 STATIC void
146 xfs_destroy_ioend(
147 struct xfs_ioend *ioend,
148 int error)
149 {
150 struct inode *inode = ioend->io_inode;
151 struct bio *bio = &ioend->io_inline_bio;
152 struct bio *last = ioend->io_bio, *next;
153 u64 start = bio->bi_iter.bi_sector;
154 bool quiet = bio_flagged(bio, BIO_QUIET);
155
156 for (bio = &ioend->io_inline_bio; bio; bio = next) {
157 struct bio_vec *bvec;
158 int i;
159
160 /*
161 * For the last bio, bi_private points to the ioend, so we
162 * need to explicitly end the iteration here.
163 */
164 if (bio == last)
165 next = NULL;
166 else
167 next = bio->bi_private;
168
169 /* walk each page on bio, ending page IO on them */
170 bio_for_each_segment_all(bvec, bio, i)
171 xfs_finish_page_writeback(inode, bvec, error);
172
173 bio_put(bio);
174 }
175
176 if (unlikely(error && !quiet)) {
177 xfs_err_ratelimited(XFS_I(inode)->i_mount,
178 "writeback error on sector %llu", start);
179 }
180 }
181
182 /*
183 * Fast and loose check if this write could update the on-disk inode size.
184 */
185 static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
186 {
187 return ioend->io_offset + ioend->io_size >
188 XFS_I(ioend->io_inode)->i_d.di_size;
189 }
190
191 STATIC int
192 xfs_setfilesize_trans_alloc(
193 struct xfs_ioend *ioend)
194 {
195 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
196 struct xfs_trans *tp;
197 int error;
198
199 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
200 if (error)
201 return error;
202
203 ioend->io_append_trans = tp;
204
205 /*
206 * We may pass freeze protection with a transaction. So tell lockdep
207 * we released it.
208 */
209 __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
210 /*
211 * We hand off the transaction to the completion thread now, so
212 * clear the flag here.
213 */
214 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
215 return 0;
216 }
217
218 /*
219 * Update on-disk file size now that data has been written to disk.
220 */
221 STATIC int
222 __xfs_setfilesize(
223 struct xfs_inode *ip,
224 struct xfs_trans *tp,
225 xfs_off_t offset,
226 size_t size)
227 {
228 xfs_fsize_t isize;
229
230 xfs_ilock(ip, XFS_ILOCK_EXCL);
231 isize = xfs_new_eof(ip, offset + size);
232 if (!isize) {
233 xfs_iunlock(ip, XFS_ILOCK_EXCL);
234 xfs_trans_cancel(tp);
235 return 0;
236 }
237
238 trace_xfs_setfilesize(ip, offset, size);
239
240 ip->i_d.di_size = isize;
241 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
242 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
243
244 return xfs_trans_commit(tp);
245 }
246
247 int
248 xfs_setfilesize(
249 struct xfs_inode *ip,
250 xfs_off_t offset,
251 size_t size)
252 {
253 struct xfs_mount *mp = ip->i_mount;
254 struct xfs_trans *tp;
255 int error;
256
257 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
258 if (error)
259 return error;
260
261 return __xfs_setfilesize(ip, tp, offset, size);
262 }
263
264 STATIC int
265 xfs_setfilesize_ioend(
266 struct xfs_ioend *ioend,
267 int error)
268 {
269 struct xfs_inode *ip = XFS_I(ioend->io_inode);
270 struct xfs_trans *tp = ioend->io_append_trans;
271
272 /*
273 * The transaction may have been allocated in the I/O submission thread,
274 * thus we need to mark ourselves as being in a transaction manually.
275 * Similarly for freeze protection.
276 */
277 current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
278 __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
279
280 /* we abort the update if there was an IO error */
281 if (error) {
282 xfs_trans_cancel(tp);
283 return error;
284 }
285
286 return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
287 }
288
289 /*
290 * IO write completion.
291 */
292 STATIC void
293 xfs_end_io(
294 struct work_struct *work)
295 {
296 struct xfs_ioend *ioend =
297 container_of(work, struct xfs_ioend, io_work);
298 struct xfs_inode *ip = XFS_I(ioend->io_inode);
299 xfs_off_t offset = ioend->io_offset;
300 size_t size = ioend->io_size;
301 int error;
302
303 /*
304 * Just clean up the in-memory strutures if the fs has been shut down.
305 */
306 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
307 error = -EIO;
308 goto done;
309 }
310
311 /*
312 * Clean up any COW blocks on an I/O error.
313 */
314 error = blk_status_to_errno(ioend->io_bio->bi_status);
315 if (unlikely(error)) {
316 switch (ioend->io_type) {
317 case XFS_IO_COW:
318 xfs_reflink_cancel_cow_range(ip, offset, size, true);
319 break;
320 }
321
322 goto done;
323 }
324
325 /*
326 * Success: commit the COW or unwritten blocks if needed.
327 */
328 switch (ioend->io_type) {
329 case XFS_IO_COW:
330 error = xfs_reflink_end_cow(ip, offset, size);
331 break;
332 case XFS_IO_UNWRITTEN:
333 /* writeback should never update isize */
334 error = xfs_iomap_write_unwritten(ip, offset, size, false);
335 break;
336 default:
337 ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
338 break;
339 }
340
341 done:
342 if (ioend->io_append_trans)
343 error = xfs_setfilesize_ioend(ioend, error);
344 xfs_destroy_ioend(ioend, error);
345 }
346
347 STATIC void
348 xfs_end_bio(
349 struct bio *bio)
350 {
351 struct xfs_ioend *ioend = bio->bi_private;
352 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
353
354 if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW)
355 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
356 else if (ioend->io_append_trans)
357 queue_work(mp->m_data_workqueue, &ioend->io_work);
358 else
359 xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
360 }
361
362 STATIC int
363 xfs_map_blocks(
364 struct inode *inode,
365 loff_t offset,
366 struct xfs_bmbt_irec *imap,
367 int type)
368 {
369 struct xfs_inode *ip = XFS_I(inode);
370 struct xfs_mount *mp = ip->i_mount;
371 ssize_t count = i_blocksize(inode);
372 xfs_fileoff_t offset_fsb, end_fsb;
373 int error = 0;
374 int bmapi_flags = XFS_BMAPI_ENTIRE;
375 int nimaps = 1;
376
377 if (XFS_FORCED_SHUTDOWN(mp))
378 return -EIO;
379
380 ASSERT(type != XFS_IO_COW);
381 if (type == XFS_IO_UNWRITTEN)
382 bmapi_flags |= XFS_BMAPI_IGSTATE;
383
384 xfs_ilock(ip, XFS_ILOCK_SHARED);
385 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
386 (ip->i_df.if_flags & XFS_IFEXTENTS));
387 ASSERT(offset <= mp->m_super->s_maxbytes);
388
389 if (offset + count > mp->m_super->s_maxbytes)
390 count = mp->m_super->s_maxbytes - offset;
391 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
392 offset_fsb = XFS_B_TO_FSBT(mp, offset);
393 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
394 imap, &nimaps, bmapi_flags);
395 /*
396 * Truncate an overwrite extent if there's a pending CoW
397 * reservation before the end of this extent. This forces us
398 * to come back to writepage to take care of the CoW.
399 */
400 if (nimaps && type == XFS_IO_OVERWRITE)
401 xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap);
402 xfs_iunlock(ip, XFS_ILOCK_SHARED);
403
404 if (error)
405 return error;
406
407 if (type == XFS_IO_DELALLOC &&
408 (!nimaps || isnullstartblock(imap->br_startblock))) {
409 error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset,
410 imap);
411 if (!error)
412 trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
413 return error;
414 }
415
416 #ifdef DEBUG
417 if (type == XFS_IO_UNWRITTEN) {
418 ASSERT(nimaps);
419 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
420 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
421 }
422 #endif
423 if (nimaps)
424 trace_xfs_map_blocks_found(ip, offset, count, type, imap);
425 return 0;
426 }
427
428 STATIC bool
429 xfs_imap_valid(
430 struct inode *inode,
431 struct xfs_bmbt_irec *imap,
432 xfs_off_t offset)
433 {
434 offset >>= inode->i_blkbits;
435
436 return offset >= imap->br_startoff &&
437 offset < imap->br_startoff + imap->br_blockcount;
438 }
439
440 STATIC void
441 xfs_start_buffer_writeback(
442 struct buffer_head *bh)
443 {
444 ASSERT(buffer_mapped(bh));
445 ASSERT(buffer_locked(bh));
446 ASSERT(!buffer_delay(bh));
447 ASSERT(!buffer_unwritten(bh));
448
449 bh->b_end_io = NULL;
450 set_buffer_async_write(bh);
451 set_buffer_uptodate(bh);
452 clear_buffer_dirty(bh);
453 }
454
455 STATIC void
456 xfs_start_page_writeback(
457 struct page *page,
458 int clear_dirty)
459 {
460 ASSERT(PageLocked(page));
461 ASSERT(!PageWriteback(page));
462
463 /*
464 * if the page was not fully cleaned, we need to ensure that the higher
465 * layers come back to it correctly. That means we need to keep the page
466 * dirty, and for WB_SYNC_ALL writeback we need to ensure the
467 * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to
468 * write this page in this writeback sweep will be made.
469 */
470 if (clear_dirty) {
471 clear_page_dirty_for_io(page);
472 set_page_writeback(page);
473 } else
474 set_page_writeback_keepwrite(page);
475
476 unlock_page(page);
477 }
478
479 static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
480 {
481 return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
482 }
483
484 /*
485 * Submit the bio for an ioend. We are passed an ioend with a bio attached to
486 * it, and we submit that bio. The ioend may be used for multiple bio
487 * submissions, so we only want to allocate an append transaction for the ioend
488 * once. In the case of multiple bio submission, each bio will take an IO
489 * reference to the ioend to ensure that the ioend completion is only done once
490 * all bios have been submitted and the ioend is really done.
491 *
492 * If @fail is non-zero, it means that we have a situation where some part of
493 * the submission process has failed after we have marked paged for writeback
494 * and unlocked them. In this situation, we need to fail the bio and ioend
495 * rather than submit it to IO. This typically only happens on a filesystem
496 * shutdown.
497 */
498 STATIC int
499 xfs_submit_ioend(
500 struct writeback_control *wbc,
501 struct xfs_ioend *ioend,
502 int status)
503 {
504 /* Convert CoW extents to regular */
505 if (!status && ioend->io_type == XFS_IO_COW) {
506 status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
507 ioend->io_offset, ioend->io_size);
508 }
509
510 /* Reserve log space if we might write beyond the on-disk inode size. */
511 if (!status &&
512 ioend->io_type != XFS_IO_UNWRITTEN &&
513 xfs_ioend_is_append(ioend) &&
514 !ioend->io_append_trans)
515 status = xfs_setfilesize_trans_alloc(ioend);
516
517 ioend->io_bio->bi_private = ioend;
518 ioend->io_bio->bi_end_io = xfs_end_bio;
519 ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
520
521 /*
522 * If we are failing the IO now, just mark the ioend with an
523 * error and finish it. This will run IO completion immediately
524 * as there is only one reference to the ioend at this point in
525 * time.
526 */
527 if (status) {
528 ioend->io_bio->bi_status = errno_to_blk_status(status);
529 bio_endio(ioend->io_bio);
530 return status;
531 }
532
533 ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
534 submit_bio(ioend->io_bio);
535 return 0;
536 }
537
538 static void
539 xfs_init_bio_from_bh(
540 struct bio *bio,
541 struct buffer_head *bh)
542 {
543 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
544 bio->bi_bdev = bh->b_bdev;
545 }
546
547 static struct xfs_ioend *
548 xfs_alloc_ioend(
549 struct inode *inode,
550 unsigned int type,
551 xfs_off_t offset,
552 struct buffer_head *bh)
553 {
554 struct xfs_ioend *ioend;
555 struct bio *bio;
556
557 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
558 xfs_init_bio_from_bh(bio, bh);
559
560 ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
561 INIT_LIST_HEAD(&ioend->io_list);
562 ioend->io_type = type;
563 ioend->io_inode = inode;
564 ioend->io_size = 0;
565 ioend->io_offset = offset;
566 INIT_WORK(&ioend->io_work, xfs_end_io);
567 ioend->io_append_trans = NULL;
568 ioend->io_bio = bio;
569 return ioend;
570 }
571
572 /*
573 * Allocate a new bio, and chain the old bio to the new one.
574 *
575 * Note that we have to do perform the chaining in this unintuitive order
576 * so that the bi_private linkage is set up in the right direction for the
577 * traversal in xfs_destroy_ioend().
578 */
579 static void
580 xfs_chain_bio(
581 struct xfs_ioend *ioend,
582 struct writeback_control *wbc,
583 struct buffer_head *bh)
584 {
585 struct bio *new;
586
587 new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
588 xfs_init_bio_from_bh(new, bh);
589
590 bio_chain(ioend->io_bio, new);
591 bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
592 ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
593 ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
594 submit_bio(ioend->io_bio);
595 ioend->io_bio = new;
596 }
597
598 /*
599 * Test to see if we've been building up a completion structure for
600 * earlier buffers -- if so, we try to append to this ioend if we
601 * can, otherwise we finish off any current ioend and start another.
602 * Return the ioend we finished off so that the caller can submit it
603 * once it has finished processing the dirty page.
604 */
605 STATIC void
606 xfs_add_to_ioend(
607 struct inode *inode,
608 struct buffer_head *bh,
609 xfs_off_t offset,
610 struct xfs_writepage_ctx *wpc,
611 struct writeback_control *wbc,
612 struct list_head *iolist)
613 {
614 if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
615 bh->b_blocknr != wpc->last_block + 1 ||
616 offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
617 if (wpc->ioend)
618 list_add(&wpc->ioend->io_list, iolist);
619 wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
620 }
621
622 /*
623 * If the buffer doesn't fit into the bio we need to allocate a new
624 * one. This shouldn't happen more than once for a given buffer.
625 */
626 while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size)
627 xfs_chain_bio(wpc->ioend, wbc, bh);
628
629 wpc->ioend->io_size += bh->b_size;
630 wpc->last_block = bh->b_blocknr;
631 xfs_start_buffer_writeback(bh);
632 }
633
634 STATIC void
635 xfs_map_buffer(
636 struct inode *inode,
637 struct buffer_head *bh,
638 struct xfs_bmbt_irec *imap,
639 xfs_off_t offset)
640 {
641 sector_t bn;
642 struct xfs_mount *m = XFS_I(inode)->i_mount;
643 xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
644 xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
645
646 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
647 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
648
649 bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
650 ((offset - iomap_offset) >> inode->i_blkbits);
651
652 ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
653
654 bh->b_blocknr = bn;
655 set_buffer_mapped(bh);
656 }
657
658 STATIC void
659 xfs_map_at_offset(
660 struct inode *inode,
661 struct buffer_head *bh,
662 struct xfs_bmbt_irec *imap,
663 xfs_off_t offset)
664 {
665 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
666 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
667
668 xfs_map_buffer(inode, bh, imap, offset);
669 set_buffer_mapped(bh);
670 clear_buffer_delay(bh);
671 clear_buffer_unwritten(bh);
672 }
673
674 /*
675 * Test if a given page contains at least one buffer of a given @type.
676 * If @check_all_buffers is true, then we walk all the buffers in the page to
677 * try to find one of the type passed in. If it is not set, then the caller only
678 * needs to check the first buffer on the page for a match.
679 */
680 STATIC bool
681 xfs_check_page_type(
682 struct page *page,
683 unsigned int type,
684 bool check_all_buffers)
685 {
686 struct buffer_head *bh;
687 struct buffer_head *head;
688
689 if (PageWriteback(page))
690 return false;
691 if (!page->mapping)
692 return false;
693 if (!page_has_buffers(page))
694 return false;
695
696 bh = head = page_buffers(page);
697 do {
698 if (buffer_unwritten(bh)) {
699 if (type == XFS_IO_UNWRITTEN)
700 return true;
701 } else if (buffer_delay(bh)) {
702 if (type == XFS_IO_DELALLOC)
703 return true;
704 } else if (buffer_dirty(bh) && buffer_mapped(bh)) {
705 if (type == XFS_IO_OVERWRITE)
706 return true;
707 }
708
709 /* If we are only checking the first buffer, we are done now. */
710 if (!check_all_buffers)
711 break;
712 } while ((bh = bh->b_this_page) != head);
713
714 return false;
715 }
716
717 STATIC void
718 xfs_vm_invalidatepage(
719 struct page *page,
720 unsigned int offset,
721 unsigned int length)
722 {
723 trace_xfs_invalidatepage(page->mapping->host, page, offset,
724 length);
725 block_invalidatepage(page, offset, length);
726 }
727
728 /*
729 * If the page has delalloc buffers on it, we need to punch them out before we
730 * invalidate the page. If we don't, we leave a stale delalloc mapping on the
731 * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
732 * is done on that same region - the delalloc extent is returned when none is
733 * supposed to be there.
734 *
735 * We prevent this by truncating away the delalloc regions on the page before
736 * invalidating it. Because they are delalloc, we can do this without needing a
737 * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
738 * truncation without a transaction as there is no space left for block
739 * reservation (typically why we see a ENOSPC in writeback).
740 *
741 * This is not a performance critical path, so for now just do the punching a
742 * buffer head at a time.
743 */
744 STATIC void
745 xfs_aops_discard_page(
746 struct page *page)
747 {
748 struct inode *inode = page->mapping->host;
749 struct xfs_inode *ip = XFS_I(inode);
750 struct buffer_head *bh, *head;
751 loff_t offset = page_offset(page);
752
753 if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
754 goto out_invalidate;
755
756 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
757 goto out_invalidate;
758
759 xfs_alert(ip->i_mount,
760 "page discard on page %p, inode 0x%llx, offset %llu.",
761 page, ip->i_ino, offset);
762
763 xfs_ilock(ip, XFS_ILOCK_EXCL);
764 bh = head = page_buffers(page);
765 do {
766 int error;
767 xfs_fileoff_t start_fsb;
768
769 if (!buffer_delay(bh))
770 goto next_buffer;
771
772 start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
773 error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
774 if (error) {
775 /* something screwed, just bail */
776 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
777 xfs_alert(ip->i_mount,
778 "page discard unable to remove delalloc mapping.");
779 }
780 break;
781 }
782 next_buffer:
783 offset += i_blocksize(inode);
784
785 } while ((bh = bh->b_this_page) != head);
786
787 xfs_iunlock(ip, XFS_ILOCK_EXCL);
788 out_invalidate:
789 xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
790 return;
791 }
792
793 static int
794 xfs_map_cow(
795 struct xfs_writepage_ctx *wpc,
796 struct inode *inode,
797 loff_t offset,
798 unsigned int *new_type)
799 {
800 struct xfs_inode *ip = XFS_I(inode);
801 struct xfs_bmbt_irec imap;
802 bool is_cow = false;
803 int error;
804
805 /*
806 * If we already have a valid COW mapping keep using it.
807 */
808 if (wpc->io_type == XFS_IO_COW) {
809 wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset);
810 if (wpc->imap_valid) {
811 *new_type = XFS_IO_COW;
812 return 0;
813 }
814 }
815
816 /*
817 * Else we need to check if there is a COW mapping at this offset.
818 */
819 xfs_ilock(ip, XFS_ILOCK_SHARED);
820 is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
821 xfs_iunlock(ip, XFS_ILOCK_SHARED);
822
823 if (!is_cow)
824 return 0;
825
826 /*
827 * And if the COW mapping has a delayed extent here we need to
828 * allocate real space for it now.
829 */
830 if (isnullstartblock(imap.br_startblock)) {
831 error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
832 &imap);
833 if (error)
834 return error;
835 }
836
837 wpc->io_type = *new_type = XFS_IO_COW;
838 wpc->imap_valid = true;
839 wpc->imap = imap;
840 return 0;
841 }
842
843 /*
844 * We implement an immediate ioend submission policy here to avoid needing to
845 * chain multiple ioends and hence nest mempool allocations which can violate
846 * forward progress guarantees we need to provide. The current ioend we are
847 * adding buffers to is cached on the writepage context, and if the new buffer
848 * does not append to the cached ioend it will create a new ioend and cache that
849 * instead.
850 *
851 * If a new ioend is created and cached, the old ioend is returned and queued
852 * locally for submission once the entire page is processed or an error has been
853 * detected. While ioends are submitted immediately after they are completed,
854 * batching optimisations are provided by higher level block plugging.
855 *
856 * At the end of a writeback pass, there will be a cached ioend remaining on the
857 * writepage context that the caller will need to submit.
858 */
859 static int
860 xfs_writepage_map(
861 struct xfs_writepage_ctx *wpc,
862 struct writeback_control *wbc,
863 struct inode *inode,
864 struct page *page,
865 loff_t offset,
866 uint64_t end_offset)
867 {
868 LIST_HEAD(submit_list);
869 struct xfs_ioend *ioend, *next;
870 struct buffer_head *bh, *head;
871 ssize_t len = i_blocksize(inode);
872 int error = 0;
873 int count = 0;
874 int uptodate = 1;
875 unsigned int new_type;
876
877 bh = head = page_buffers(page);
878 offset = page_offset(page);
879 do {
880 if (offset >= end_offset)
881 break;
882 if (!buffer_uptodate(bh))
883 uptodate = 0;
884
885 /*
886 * set_page_dirty dirties all buffers in a page, independent
887 * of their state. The dirty state however is entirely
888 * meaningless for holes (!mapped && uptodate), so skip
889 * buffers covering holes here.
890 */
891 if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
892 wpc->imap_valid = false;
893 continue;
894 }
895
896 if (buffer_unwritten(bh))
897 new_type = XFS_IO_UNWRITTEN;
898 else if (buffer_delay(bh))
899 new_type = XFS_IO_DELALLOC;
900 else if (buffer_uptodate(bh))
901 new_type = XFS_IO_OVERWRITE;
902 else {
903 if (PageUptodate(page))
904 ASSERT(buffer_mapped(bh));
905 /*
906 * This buffer is not uptodate and will not be
907 * written to disk. Ensure that we will put any
908 * subsequent writeable buffers into a new
909 * ioend.
910 */
911 wpc->imap_valid = false;
912 continue;
913 }
914
915 if (xfs_is_reflink_inode(XFS_I(inode))) {
916 error = xfs_map_cow(wpc, inode, offset, &new_type);
917 if (error)
918 goto out;
919 }
920
921 if (wpc->io_type != new_type) {
922 wpc->io_type = new_type;
923 wpc->imap_valid = false;
924 }
925
926 if (wpc->imap_valid)
927 wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
928 offset);
929 if (!wpc->imap_valid) {
930 error = xfs_map_blocks(inode, offset, &wpc->imap,
931 wpc->io_type);
932 if (error)
933 goto out;
934 wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
935 offset);
936 }
937 if (wpc->imap_valid) {
938 lock_buffer(bh);
939 if (wpc->io_type != XFS_IO_OVERWRITE)
940 xfs_map_at_offset(inode, bh, &wpc->imap, offset);
941 xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
942 count++;
943 }
944
945 } while (offset += len, ((bh = bh->b_this_page) != head));
946
947 if (uptodate && bh == head)
948 SetPageUptodate(page);
949
950 ASSERT(wpc->ioend || list_empty(&submit_list));
951
952 out:
953 /*
954 * On error, we have to fail the ioend here because we have locked
955 * buffers in the ioend. If we don't do this, we'll deadlock
956 * invalidating the page as that tries to lock the buffers on the page.
957 * Also, because we may have set pages under writeback, we have to make
958 * sure we run IO completion to mark the error state of the IO
959 * appropriately, so we can't cancel the ioend directly here. That means
960 * we have to mark this page as under writeback if we included any
961 * buffers from it in the ioend chain so that completion treats it
962 * correctly.
963 *
964 * If we didn't include the page in the ioend, the on error we can
965 * simply discard and unlock it as there are no other users of the page
966 * or it's buffers right now. The caller will still need to trigger
967 * submission of outstanding ioends on the writepage context so they are
968 * treated correctly on error.
969 */
970 if (count) {
971 xfs_start_page_writeback(page, !error);
972
973 /*
974 * Preserve the original error if there was one, otherwise catch
975 * submission errors here and propagate into subsequent ioend
976 * submissions.
977 */
978 list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
979 int error2;
980
981 list_del_init(&ioend->io_list);
982 error2 = xfs_submit_ioend(wbc, ioend, error);
983 if (error2 && !error)
984 error = error2;
985 }
986 } else if (error) {
987 xfs_aops_discard_page(page);
988 ClearPageUptodate(page);
989 unlock_page(page);
990 } else {
991 /*
992 * We can end up here with no error and nothing to write if we
993 * race with a partial page truncate on a sub-page block sized
994 * filesystem. In that case we need to mark the page clean.
995 */
996 xfs_start_page_writeback(page, 1);
997 end_page_writeback(page);
998 }
999
1000 mapping_set_error(page->mapping, error);
1001 return error;
1002 }
1003
1004 /*
1005 * Write out a dirty page.
1006 *
1007 * For delalloc space on the page we need to allocate space and flush it.
1008 * For unwritten space on the page we need to start the conversion to
1009 * regular allocated space.
1010 * For any other dirty buffer heads on the page we should flush them.
1011 */
1012 STATIC int
1013 xfs_do_writepage(
1014 struct page *page,
1015 struct writeback_control *wbc,
1016 void *data)
1017 {
1018 struct xfs_writepage_ctx *wpc = data;
1019 struct inode *inode = page->mapping->host;
1020 loff_t offset;
1021 uint64_t end_offset;
1022 pgoff_t end_index;
1023
1024 trace_xfs_writepage(inode, page, 0, 0);
1025
1026 ASSERT(page_has_buffers(page));
1027
1028 /*
1029 * Refuse to write the page out if we are called from reclaim context.
1030 *
1031 * This avoids stack overflows when called from deeply used stacks in
1032 * random callers for direct reclaim or memcg reclaim. We explicitly
1033 * allow reclaim from kswapd as the stack usage there is relatively low.
1034 *
1035 * This should never happen except in the case of a VM regression so
1036 * warn about it.
1037 */
1038 if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
1039 PF_MEMALLOC))
1040 goto redirty;
1041
1042 /*
1043 * Given that we do not allow direct reclaim to call us, we should
1044 * never be called while in a filesystem transaction.
1045 */
1046 if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
1047 goto redirty;
1048
1049 /*
1050 * Is this page beyond the end of the file?
1051 *
1052 * The page index is less than the end_index, adjust the end_offset
1053 * to the highest offset that this page should represent.
1054 * -----------------------------------------------------
1055 * | file mapping | <EOF> |
1056 * -----------------------------------------------------
1057 * | Page ... | Page N-2 | Page N-1 | Page N | |
1058 * ^--------------------------------^----------|--------
1059 * | desired writeback range | see else |
1060 * ---------------------------------^------------------|
1061 */
1062 offset = i_size_read(inode);
1063 end_index = offset >> PAGE_SHIFT;
1064 if (page->index < end_index)
1065 end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT;
1066 else {
1067 /*
1068 * Check whether the page to write out is beyond or straddles
1069 * i_size or not.
1070 * -------------------------------------------------------
1071 * | file mapping | <EOF> |
1072 * -------------------------------------------------------
1073 * | Page ... | Page N-2 | Page N-1 | Page N | Beyond |
1074 * ^--------------------------------^-----------|---------
1075 * | | Straddles |
1076 * ---------------------------------^-----------|--------|
1077 */
1078 unsigned offset_into_page = offset & (PAGE_SIZE - 1);
1079
1080 /*
1081 * Skip the page if it is fully outside i_size, e.g. due to a
1082 * truncate operation that is in progress. We must redirty the
1083 * page so that reclaim stops reclaiming it. Otherwise
1084 * xfs_vm_releasepage() is called on it and gets confused.
1085 *
1086 * Note that the end_index is unsigned long, it would overflow
1087 * if the given offset is greater than 16TB on 32-bit system
1088 * and if we do check the page is fully outside i_size or not
1089 * via "if (page->index >= end_index + 1)" as "end_index + 1"
1090 * will be evaluated to 0. Hence this page will be redirtied
1091 * and be written out repeatedly which would result in an
1092 * infinite loop, the user program that perform this operation
1093 * will hang. Instead, we can verify this situation by checking
1094 * if the page to write is totally beyond the i_size or if it's
1095 * offset is just equal to the EOF.
1096 */
1097 if (page->index > end_index ||
1098 (page->index == end_index && offset_into_page == 0))
1099 goto redirty;
1100
1101 /*
1102 * The page straddles i_size. It must be zeroed out on each
1103 * and every writepage invocation because it may be mmapped.
1104 * "A file is mapped in multiples of the page size. For a file
1105 * that is not a multiple of the page size, the remaining
1106 * memory is zeroed when mapped, and writes to that region are
1107 * not written out to the file."
1108 */
1109 zero_user_segment(page, offset_into_page, PAGE_SIZE);
1110
1111 /* Adjust the end_offset to the end of file */
1112 end_offset = offset;
1113 }
1114
1115 return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
1116
1117 redirty:
1118 redirty_page_for_writepage(wbc, page);
1119 unlock_page(page);
1120 return 0;
1121 }
1122
1123 STATIC int
1124 xfs_vm_writepage(
1125 struct page *page,
1126 struct writeback_control *wbc)
1127 {
1128 struct xfs_writepage_ctx wpc = {
1129 .io_type = XFS_IO_INVALID,
1130 };
1131 int ret;
1132
1133 ret = xfs_do_writepage(page, wbc, &wpc);
1134 if (wpc.ioend)
1135 ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
1136 return ret;
1137 }
1138
1139 STATIC int
1140 xfs_vm_writepages(
1141 struct address_space *mapping,
1142 struct writeback_control *wbc)
1143 {
1144 struct xfs_writepage_ctx wpc = {
1145 .io_type = XFS_IO_INVALID,
1146 };
1147 int ret;
1148
1149 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
1150 if (dax_mapping(mapping))
1151 return dax_writeback_mapping_range(mapping,
1152 xfs_find_bdev_for_inode(mapping->host), wbc);
1153
1154 ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
1155 if (wpc.ioend)
1156 ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
1157 return ret;
1158 }
1159
1160 /*
1161 * Called to move a page into cleanable state - and from there
1162 * to be released. The page should already be clean. We always
1163 * have buffer heads in this call.
1164 *
1165 * Returns 1 if the page is ok to release, 0 otherwise.
1166 */
1167 STATIC int
1168 xfs_vm_releasepage(
1169 struct page *page,
1170 gfp_t gfp_mask)
1171 {
1172 int delalloc, unwritten;
1173
1174 trace_xfs_releasepage(page->mapping->host, page, 0, 0);
1175
1176 /*
1177 * mm accommodates an old ext3 case where clean pages might not have had
1178 * the dirty bit cleared. Thus, it can send actual dirty pages to
1179 * ->releasepage() via shrink_active_list(). Conversely,
1180 * block_invalidatepage() can send pages that are still marked dirty
1181 * but otherwise have invalidated buffers.
1182 *
1183 * We want to release the latter to avoid unnecessary buildup of the
1184 * LRU, skip the former and warn if we've left any lingering
1185 * delalloc/unwritten buffers on clean pages. Skip pages with delalloc
1186 * or unwritten buffers and warn if the page is not dirty. Otherwise
1187 * try to release the buffers.
1188 */
1189 xfs_count_page_state(page, &delalloc, &unwritten);
1190
1191 if (delalloc) {
1192 WARN_ON_ONCE(!PageDirty(page));
1193 return 0;
1194 }
1195 if (unwritten) {
1196 WARN_ON_ONCE(!PageDirty(page));
1197 return 0;
1198 }
1199
1200 return try_to_free_buffers(page);
1201 }
1202
1203 /*
1204 * If this is O_DIRECT or the mpage code calling tell them how large the mapping
1205 * is, so that we can avoid repeated get_blocks calls.
1206 *
1207 * If the mapping spans EOF, then we have to break the mapping up as the mapping
1208 * for blocks beyond EOF must be marked new so that sub block regions can be
1209 * correctly zeroed. We can't do this for mappings within EOF unless the mapping
1210 * was just allocated or is unwritten, otherwise the callers would overwrite
1211 * existing data with zeros. Hence we have to split the mapping into a range up
1212 * to and including EOF, and a second mapping for beyond EOF.
1213 */
1214 static void
1215 xfs_map_trim_size(
1216 struct inode *inode,
1217 sector_t iblock,
1218 struct buffer_head *bh_result,
1219 struct xfs_bmbt_irec *imap,
1220 xfs_off_t offset,
1221 ssize_t size)
1222 {
1223 xfs_off_t mapping_size;
1224
1225 mapping_size = imap->br_startoff + imap->br_blockcount - iblock;
1226 mapping_size <<= inode->i_blkbits;
1227
1228 ASSERT(mapping_size > 0);
1229 if (mapping_size > size)
1230 mapping_size = size;
1231 if (offset < i_size_read(inode) &&
1232 offset + mapping_size >= i_size_read(inode)) {
1233 /* limit mapping to block that spans EOF */
1234 mapping_size = roundup_64(i_size_read(inode) - offset,
1235 i_blocksize(inode));
1236 }
1237 if (mapping_size > LONG_MAX)
1238 mapping_size = LONG_MAX;
1239
1240 bh_result->b_size = mapping_size;
1241 }
1242
1243 static int
1244 xfs_get_blocks(
1245 struct inode *inode,
1246 sector_t iblock,
1247 struct buffer_head *bh_result,
1248 int create)
1249 {
1250 struct xfs_inode *ip = XFS_I(inode);
1251 struct xfs_mount *mp = ip->i_mount;
1252 xfs_fileoff_t offset_fsb, end_fsb;
1253 int error = 0;
1254 int lockmode = 0;
1255 struct xfs_bmbt_irec imap;
1256 int nimaps = 1;
1257 xfs_off_t offset;
1258 ssize_t size;
1259
1260 BUG_ON(create);
1261
1262 if (XFS_FORCED_SHUTDOWN(mp))
1263 return -EIO;
1264
1265 offset = (xfs_off_t)iblock << inode->i_blkbits;
1266 ASSERT(bh_result->b_size >= i_blocksize(inode));
1267 size = bh_result->b_size;
1268
1269 if (offset >= i_size_read(inode))
1270 return 0;
1271
1272 /*
1273 * Direct I/O is usually done on preallocated files, so try getting
1274 * a block mapping without an exclusive lock first.
1275 */
1276 lockmode = xfs_ilock_data_map_shared(ip);
1277
1278 ASSERT(offset <= mp->m_super->s_maxbytes);
1279 if (offset + size > mp->m_super->s_maxbytes)
1280 size = mp->m_super->s_maxbytes - offset;
1281 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
1282 offset_fsb = XFS_B_TO_FSBT(mp, offset);
1283
1284 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
1285 &imap, &nimaps, XFS_BMAPI_ENTIRE);
1286 if (error)
1287 goto out_unlock;
1288
1289 if (nimaps) {
1290 trace_xfs_get_blocks_found(ip, offset, size,
1291 imap.br_state == XFS_EXT_UNWRITTEN ?
1292 XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
1293 xfs_iunlock(ip, lockmode);
1294 } else {
1295 trace_xfs_get_blocks_notfound(ip, offset, size);
1296 goto out_unlock;
1297 }
1298
1299 /* trim mapping down to size requested */
1300 xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
1301
1302 /*
1303 * For unwritten extents do not report a disk address in the buffered
1304 * read case (treat as if we're reading into a hole).
1305 */
1306 if (xfs_bmap_is_real_extent(&imap))
1307 xfs_map_buffer(inode, bh_result, &imap, offset);
1308
1309 /*
1310 * If this is a realtime file, data may be on a different device.
1311 * to that pointed to from the buffer_head b_bdev currently.
1312 */
1313 bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
1314 return 0;
1315
1316 out_unlock:
1317 xfs_iunlock(ip, lockmode);
1318 return error;
1319 }
1320
1321 STATIC ssize_t
1322 xfs_vm_direct_IO(
1323 struct kiocb *iocb,
1324 struct iov_iter *iter)
1325 {
1326 /*
1327 * We just need the method present so that open/fcntl allow direct I/O.
1328 */
1329 return -EINVAL;
1330 }
1331
1332 STATIC sector_t
1333 xfs_vm_bmap(
1334 struct address_space *mapping,
1335 sector_t block)
1336 {
1337 struct inode *inode = (struct inode *)mapping->host;
1338 struct xfs_inode *ip = XFS_I(inode);
1339
1340 trace_xfs_vm_bmap(XFS_I(inode));
1341
1342 /*
1343 * The swap code (ab-)uses ->bmap to get a block mapping and then
1344 * bypasseѕ the file system for actual I/O. We really can't allow
1345 * that on reflinks inodes, so we have to skip out here. And yes,
1346 * 0 is the magic code for a bmap error.
1347 *
1348 * Since we don't pass back blockdev info, we can't return bmap
1349 * information for rt files either.
1350 */
1351 if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
1352 return 0;
1353
1354 filemap_write_and_wait(mapping);
1355 return generic_block_bmap(mapping, block, xfs_get_blocks);
1356 }
1357
1358 STATIC int
1359 xfs_vm_readpage(
1360 struct file *unused,
1361 struct page *page)
1362 {
1363 trace_xfs_vm_readpage(page->mapping->host, 1);
1364 return mpage_readpage(page, xfs_get_blocks);
1365 }
1366
1367 STATIC int
1368 xfs_vm_readpages(
1369 struct file *unused,
1370 struct address_space *mapping,
1371 struct list_head *pages,
1372 unsigned nr_pages)
1373 {
1374 trace_xfs_vm_readpages(mapping->host, nr_pages);
1375 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1376 }
1377
1378 /*
1379 * This is basically a copy of __set_page_dirty_buffers() with one
1380 * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
1381 * dirty, we'll never be able to clean them because we don't write buffers
1382 * beyond EOF, and that means we can't invalidate pages that span EOF
1383 * that have been marked dirty. Further, the dirty state can leak into
1384 * the file interior if the file is extended, resulting in all sorts of
1385 * bad things happening as the state does not match the underlying data.
1386 *
1387 * XXX: this really indicates that bufferheads in XFS need to die. Warts like
1388 * this only exist because of bufferheads and how the generic code manages them.
1389 */
1390 STATIC int
1391 xfs_vm_set_page_dirty(
1392 struct page *page)
1393 {
1394 struct address_space *mapping = page->mapping;
1395 struct inode *inode = mapping->host;
1396 loff_t end_offset;
1397 loff_t offset;
1398 int newly_dirty;
1399
1400 if (unlikely(!mapping))
1401 return !TestSetPageDirty(page);
1402
1403 end_offset = i_size_read(inode);
1404 offset = page_offset(page);
1405
1406 spin_lock(&mapping->private_lock);
1407 if (page_has_buffers(page)) {
1408 struct buffer_head *head = page_buffers(page);
1409 struct buffer_head *bh = head;
1410
1411 do {
1412 if (offset < end_offset)
1413 set_buffer_dirty(bh);
1414 bh = bh->b_this_page;
1415 offset += i_blocksize(inode);
1416 } while (bh != head);
1417 }
1418 /*
1419 * Lock out page->mem_cgroup migration to keep PageDirty
1420 * synchronized with per-memcg dirty page counters.
1421 */
1422 lock_page_memcg(page);
1423 newly_dirty = !TestSetPageDirty(page);
1424 spin_unlock(&mapping->private_lock);
1425
1426 if (newly_dirty) {
1427 /* sigh - __set_page_dirty() is static, so copy it here, too */
1428 unsigned long flags;
1429
1430 spin_lock_irqsave(&mapping->tree_lock, flags);
1431 if (page->mapping) { /* Race with truncate? */
1432 WARN_ON_ONCE(!PageUptodate(page));
1433 account_page_dirtied(page, mapping);
1434 radix_tree_tag_set(&mapping->page_tree,
1435 page_index(page), PAGECACHE_TAG_DIRTY);
1436 }
1437 spin_unlock_irqrestore(&mapping->tree_lock, flags);
1438 }
1439 unlock_page_memcg(page);
1440 if (newly_dirty)
1441 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1442 return newly_dirty;
1443 }
1444
1445 const struct address_space_operations xfs_address_space_operations = {
1446 .readpage = xfs_vm_readpage,
1447 .readpages = xfs_vm_readpages,
1448 .writepage = xfs_vm_writepage,
1449 .writepages = xfs_vm_writepages,
1450 .set_page_dirty = xfs_vm_set_page_dirty,
1451 .releasepage = xfs_vm_releasepage,
1452 .invalidatepage = xfs_vm_invalidatepage,
1453 .bmap = xfs_vm_bmap,
1454 .direct_IO = xfs_vm_direct_IO,
1455 .migratepage = buffer_migrate_page,
1456 .is_partially_uptodate = block_is_partially_uptodate,
1457 .error_remove_page = generic_error_remove_page,
1458 };