1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
9 #include <linux/f2fs_fs.h>
10 #include <linux/buffer_head.h>
11 #include <linux/mpage.h>
12 #include <linux/writeback.h>
13 #include <linux/backing-dev.h>
14 #include <linux/pagevec.h>
15 #include <linux/blkdev.h>
16 #include <linux/bio.h>
17 #include <linux/blk-crypto.h>
18 #include <linux/swap.h>
19 #include <linux/prefetch.h>
20 #include <linux/uio.h>
21 #include <linux/cleancache.h>
22 #include <linux/sched/signal.h>
23 #include <linux/fiemap.h>
29 #include <trace/events/f2fs.h>
31 #define NUM_PREALLOC_POST_READ_CTXS 128
33 static struct kmem_cache
*bio_post_read_ctx_cache
;
34 static struct kmem_cache
*bio_entry_slab
;
35 static mempool_t
*bio_post_read_ctx_pool
;
36 static struct bio_set f2fs_bioset
;
38 #define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
40 int __init
f2fs_init_bioset(void)
42 if (bioset_init(&f2fs_bioset
, F2FS_BIO_POOL_SIZE
,
43 0, BIOSET_NEED_BVECS
))
48 void f2fs_destroy_bioset(void)
50 bioset_exit(&f2fs_bioset
);
53 static inline struct bio
*__f2fs_bio_alloc(gfp_t gfp_mask
,
54 unsigned int nr_iovecs
)
56 return bio_alloc_bioset(gfp_mask
, nr_iovecs
, &f2fs_bioset
);
59 struct bio
*f2fs_bio_alloc(struct f2fs_sb_info
*sbi
, int npages
, bool noio
)
62 /* No failure on bio allocation */
63 return __f2fs_bio_alloc(GFP_NOIO
, npages
);
66 if (time_to_inject(sbi
, FAULT_ALLOC_BIO
)) {
67 f2fs_show_injection_info(sbi
, FAULT_ALLOC_BIO
);
71 return __f2fs_bio_alloc(GFP_KERNEL
, npages
);
74 static bool __is_cp_guaranteed(struct page
*page
)
76 struct address_space
*mapping
= page
->mapping
;
78 struct f2fs_sb_info
*sbi
;
83 if (f2fs_is_compressed_page(page
))
86 inode
= mapping
->host
;
87 sbi
= F2FS_I_SB(inode
);
89 if (inode
->i_ino
== F2FS_META_INO(sbi
) ||
90 inode
->i_ino
== F2FS_NODE_INO(sbi
) ||
91 S_ISDIR(inode
->i_mode
) ||
92 (S_ISREG(inode
->i_mode
) &&
93 (f2fs_is_atomic_file(inode
) || IS_NOQUOTA(inode
))) ||
99 static enum count_type
__read_io_type(struct page
*page
)
101 struct address_space
*mapping
= page_file_mapping(page
);
104 struct inode
*inode
= mapping
->host
;
105 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
107 if (inode
->i_ino
== F2FS_META_INO(sbi
))
110 if (inode
->i_ino
== F2FS_NODE_INO(sbi
))
116 /* postprocessing steps for read bios */
117 enum bio_post_read_step
{
119 STEP_DECOMPRESS_NOWQ
, /* handle normal cluster data inplace */
120 STEP_DECOMPRESS
, /* handle compressed cluster data in workqueue */
124 struct bio_post_read_ctx
{
126 struct f2fs_sb_info
*sbi
;
127 struct work_struct work
;
128 unsigned int enabled_steps
;
131 static void __read_end_io(struct bio
*bio
, bool compr
, bool verity
)
135 struct bvec_iter_all iter_all
;
137 bio_for_each_segment_all(bv
, bio
, iter_all
) {
140 #ifdef CONFIG_F2FS_FS_COMPRESSION
141 if (compr
&& f2fs_is_compressed_page(page
)) {
142 f2fs_decompress_pages(bio
, page
, verity
);
149 /* PG_error was set if any post_read step failed */
150 if (bio
->bi_status
|| PageError(page
)) {
151 ClearPageUptodate(page
);
152 /* will re-read again later */
153 ClearPageError(page
);
155 SetPageUptodate(page
);
157 dec_page_count(F2FS_P_SB(page
), __read_io_type(page
));
162 static void f2fs_release_read_bio(struct bio
*bio
);
163 static void __f2fs_read_end_io(struct bio
*bio
, bool compr
, bool verity
)
166 __read_end_io(bio
, false, verity
);
167 f2fs_release_read_bio(bio
);
170 static void f2fs_decompress_bio(struct bio
*bio
, bool verity
)
172 __read_end_io(bio
, true, verity
);
175 static void bio_post_read_processing(struct bio_post_read_ctx
*ctx
);
177 static void f2fs_decrypt_work(struct bio_post_read_ctx
*ctx
)
179 fscrypt_decrypt_bio(ctx
->bio
);
182 static void f2fs_decompress_work(struct bio_post_read_ctx
*ctx
)
184 f2fs_decompress_bio(ctx
->bio
, ctx
->enabled_steps
& (1 << STEP_VERITY
));
187 #ifdef CONFIG_F2FS_FS_COMPRESSION
188 static void f2fs_verify_pages(struct page
**rpages
, unsigned int cluster_size
)
190 f2fs_decompress_end_io(rpages
, cluster_size
, false, true);
193 static void f2fs_verify_bio(struct bio
*bio
)
196 struct bvec_iter_all iter_all
;
198 bio_for_each_segment_all(bv
, bio
, iter_all
) {
199 struct page
*page
= bv
->bv_page
;
200 struct decompress_io_ctx
*dic
;
202 dic
= (struct decompress_io_ctx
*)page_private(page
);
205 if (atomic_dec_return(&dic
->verity_pages
))
207 f2fs_verify_pages(dic
->rpages
,
213 if (bio
->bi_status
|| PageError(page
))
216 if (fsverity_verify_page(page
)) {
217 SetPageUptodate(page
);
221 ClearPageUptodate(page
);
222 ClearPageError(page
);
224 dec_page_count(F2FS_P_SB(page
), __read_io_type(page
));
230 static void f2fs_verity_work(struct work_struct
*work
)
232 struct bio_post_read_ctx
*ctx
=
233 container_of(work
, struct bio_post_read_ctx
, work
);
234 struct bio
*bio
= ctx
->bio
;
235 #ifdef CONFIG_F2FS_FS_COMPRESSION
236 unsigned int enabled_steps
= ctx
->enabled_steps
;
240 * fsverity_verify_bio() may call readpages() again, and while verity
241 * will be disabled for this, decryption may still be needed, resulting
242 * in another bio_post_read_ctx being allocated. So to prevent
243 * deadlocks we need to release the current ctx to the mempool first.
244 * This assumes that verity is the last post-read step.
246 mempool_free(ctx
, bio_post_read_ctx_pool
);
247 bio
->bi_private
= NULL
;
249 #ifdef CONFIG_F2FS_FS_COMPRESSION
250 /* previous step is decompression */
251 if (enabled_steps
& (1 << STEP_DECOMPRESS
)) {
252 f2fs_verify_bio(bio
);
253 f2fs_release_read_bio(bio
);
258 fsverity_verify_bio(bio
);
259 __f2fs_read_end_io(bio
, false, false);
262 static void f2fs_post_read_work(struct work_struct
*work
)
264 struct bio_post_read_ctx
*ctx
=
265 container_of(work
, struct bio_post_read_ctx
, work
);
267 if (ctx
->enabled_steps
& (1 << STEP_DECRYPT
))
268 f2fs_decrypt_work(ctx
);
270 if (ctx
->enabled_steps
& (1 << STEP_DECOMPRESS
))
271 f2fs_decompress_work(ctx
);
273 if (ctx
->enabled_steps
& (1 << STEP_VERITY
)) {
274 INIT_WORK(&ctx
->work
, f2fs_verity_work
);
275 fsverity_enqueue_verify_work(&ctx
->work
);
279 __f2fs_read_end_io(ctx
->bio
,
280 ctx
->enabled_steps
& (1 << STEP_DECOMPRESS
), false);
283 static void f2fs_enqueue_post_read_work(struct f2fs_sb_info
*sbi
,
284 struct work_struct
*work
)
286 queue_work(sbi
->post_read_wq
, work
);
289 static void bio_post_read_processing(struct bio_post_read_ctx
*ctx
)
292 * We use different work queues for decryption and for verity because
293 * verity may require reading metadata pages that need decryption, and
294 * we shouldn't recurse to the same workqueue.
297 if (ctx
->enabled_steps
& (1 << STEP_DECRYPT
) ||
298 ctx
->enabled_steps
& (1 << STEP_DECOMPRESS
)) {
299 INIT_WORK(&ctx
->work
, f2fs_post_read_work
);
300 f2fs_enqueue_post_read_work(ctx
->sbi
, &ctx
->work
);
304 if (ctx
->enabled_steps
& (1 << STEP_VERITY
)) {
305 INIT_WORK(&ctx
->work
, f2fs_verity_work
);
306 fsverity_enqueue_verify_work(&ctx
->work
);
310 __f2fs_read_end_io(ctx
->bio
, false, false);
313 static bool f2fs_bio_post_read_required(struct bio
*bio
)
315 return bio
->bi_private
;
318 static void f2fs_read_end_io(struct bio
*bio
)
320 struct f2fs_sb_info
*sbi
= F2FS_P_SB(bio_first_page_all(bio
));
322 if (time_to_inject(sbi
, FAULT_READ_IO
)) {
323 f2fs_show_injection_info(sbi
, FAULT_READ_IO
);
324 bio
->bi_status
= BLK_STS_IOERR
;
327 if (f2fs_bio_post_read_required(bio
)) {
328 struct bio_post_read_ctx
*ctx
= bio
->bi_private
;
330 bio_post_read_processing(ctx
);
334 __f2fs_read_end_io(bio
, false, false);
337 static void f2fs_write_end_io(struct bio
*bio
)
339 struct f2fs_sb_info
*sbi
= bio
->bi_private
;
340 struct bio_vec
*bvec
;
341 struct bvec_iter_all iter_all
;
343 if (time_to_inject(sbi
, FAULT_WRITE_IO
)) {
344 f2fs_show_injection_info(sbi
, FAULT_WRITE_IO
);
345 bio
->bi_status
= BLK_STS_IOERR
;
348 bio_for_each_segment_all(bvec
, bio
, iter_all
) {
349 struct page
*page
= bvec
->bv_page
;
350 enum count_type type
= WB_DATA_TYPE(page
);
352 if (IS_DUMMY_WRITTEN_PAGE(page
)) {
353 set_page_private(page
, (unsigned long)NULL
);
354 ClearPagePrivate(page
);
356 mempool_free(page
, sbi
->write_io_dummy
);
358 if (unlikely(bio
->bi_status
))
359 f2fs_stop_checkpoint(sbi
, true);
363 fscrypt_finalize_bounce_page(&page
);
365 #ifdef CONFIG_F2FS_FS_COMPRESSION
366 if (f2fs_is_compressed_page(page
)) {
367 f2fs_compress_write_end_io(bio
, page
);
372 if (unlikely(bio
->bi_status
)) {
373 mapping_set_error(page
->mapping
, -EIO
);
374 if (type
== F2FS_WB_CP_DATA
)
375 f2fs_stop_checkpoint(sbi
, true);
378 f2fs_bug_on(sbi
, page
->mapping
== NODE_MAPPING(sbi
) &&
379 page
->index
!= nid_of_node(page
));
381 dec_page_count(sbi
, type
);
382 if (f2fs_in_warm_node_list(sbi
, page
))
383 f2fs_del_fsync_node_entry(sbi
, page
);
384 clear_cold_data(page
);
385 end_page_writeback(page
);
387 if (!get_pages(sbi
, F2FS_WB_CP_DATA
) &&
388 wq_has_sleeper(&sbi
->cp_wait
))
389 wake_up(&sbi
->cp_wait
);
394 struct block_device
*f2fs_target_device(struct f2fs_sb_info
*sbi
,
395 block_t blk_addr
, struct bio
*bio
)
397 struct block_device
*bdev
= sbi
->sb
->s_bdev
;
400 if (f2fs_is_multi_device(sbi
)) {
401 for (i
= 0; i
< sbi
->s_ndevs
; i
++) {
402 if (FDEV(i
).start_blk
<= blk_addr
&&
403 FDEV(i
).end_blk
>= blk_addr
) {
404 blk_addr
-= FDEV(i
).start_blk
;
411 bio_set_dev(bio
, bdev
);
412 bio
->bi_iter
.bi_sector
= SECTOR_FROM_BLOCK(blk_addr
);
417 int f2fs_target_device_index(struct f2fs_sb_info
*sbi
, block_t blkaddr
)
421 if (!f2fs_is_multi_device(sbi
))
424 for (i
= 0; i
< sbi
->s_ndevs
; i
++)
425 if (FDEV(i
).start_blk
<= blkaddr
&& FDEV(i
).end_blk
>= blkaddr
)
431 * Return true, if pre_bio's bdev is same as its target device.
433 static bool __same_bdev(struct f2fs_sb_info
*sbi
,
434 block_t blk_addr
, struct bio
*bio
)
436 struct block_device
*b
= f2fs_target_device(sbi
, blk_addr
, NULL
);
437 return bio
->bi_disk
== b
->bd_disk
&& bio
->bi_partno
== b
->bd_partno
;
440 static struct bio
*__bio_alloc(struct f2fs_io_info
*fio
, int npages
)
442 struct f2fs_sb_info
*sbi
= fio
->sbi
;
445 bio
= f2fs_bio_alloc(sbi
, npages
, true);
447 f2fs_target_device(sbi
, fio
->new_blkaddr
, bio
);
448 if (is_read_io(fio
->op
)) {
449 bio
->bi_end_io
= f2fs_read_end_io
;
450 bio
->bi_private
= NULL
;
452 bio
->bi_end_io
= f2fs_write_end_io
;
453 bio
->bi_private
= sbi
;
454 bio
->bi_write_hint
= f2fs_io_type_to_rw_hint(sbi
,
455 fio
->type
, fio
->temp
);
458 wbc_init_bio(fio
->io_wbc
, bio
);
463 static void f2fs_set_bio_crypt_ctx(struct bio
*bio
, const struct inode
*inode
,
465 const struct f2fs_io_info
*fio
,
469 * The f2fs garbage collector sets ->encrypted_page when it wants to
470 * read/write raw data without encryption.
472 if (!fio
|| !fio
->encrypted_page
)
473 fscrypt_set_bio_crypt_ctx(bio
, inode
, first_idx
, gfp_mask
);
476 static bool f2fs_crypt_mergeable_bio(struct bio
*bio
, const struct inode
*inode
,
478 const struct f2fs_io_info
*fio
)
481 * The f2fs garbage collector sets ->encrypted_page when it wants to
482 * read/write raw data without encryption.
484 if (fio
&& fio
->encrypted_page
)
485 return !bio_has_crypt_ctx(bio
);
487 return fscrypt_mergeable_bio(bio
, inode
, next_idx
);
490 static inline void __submit_bio(struct f2fs_sb_info
*sbi
,
491 struct bio
*bio
, enum page_type type
)
493 if (!is_read_io(bio_op(bio
))) {
496 if (type
!= DATA
&& type
!= NODE
)
499 if (f2fs_lfs_mode(sbi
) && current
->plug
)
500 blk_finish_plug(current
->plug
);
502 if (!F2FS_IO_ALIGNED(sbi
))
505 start
= bio
->bi_iter
.bi_size
>> F2FS_BLKSIZE_BITS
;
506 start
%= F2FS_IO_SIZE(sbi
);
511 /* fill dummy pages */
512 for (; start
< F2FS_IO_SIZE(sbi
); start
++) {
514 mempool_alloc(sbi
->write_io_dummy
,
515 GFP_NOIO
| __GFP_NOFAIL
);
516 f2fs_bug_on(sbi
, !page
);
518 zero_user_segment(page
, 0, PAGE_SIZE
);
519 SetPagePrivate(page
);
520 set_page_private(page
, DUMMY_WRITTEN_PAGE
);
522 if (bio_add_page(bio
, page
, PAGE_SIZE
, 0) < PAGE_SIZE
)
526 * In the NODE case, we lose next block address chain. So, we
527 * need to do checkpoint in f2fs_sync_file.
530 set_sbi_flag(sbi
, SBI_NEED_CP
);
533 if (is_read_io(bio_op(bio
)))
534 trace_f2fs_submit_read_bio(sbi
->sb
, type
, bio
);
536 trace_f2fs_submit_write_bio(sbi
->sb
, type
, bio
);
540 void f2fs_submit_bio(struct f2fs_sb_info
*sbi
,
541 struct bio
*bio
, enum page_type type
)
543 __submit_bio(sbi
, bio
, type
);
546 static void __attach_io_flag(struct f2fs_io_info
*fio
)
548 struct f2fs_sb_info
*sbi
= fio
->sbi
;
549 unsigned int temp_mask
= (1 << NR_TEMP_TYPE
) - 1;
550 unsigned int io_flag
, fua_flag
, meta_flag
;
552 if (fio
->type
== DATA
)
553 io_flag
= sbi
->data_io_flag
;
554 else if (fio
->type
== NODE
)
555 io_flag
= sbi
->node_io_flag
;
559 fua_flag
= io_flag
& temp_mask
;
560 meta_flag
= (io_flag
>> NR_TEMP_TYPE
) & temp_mask
;
563 * data/node io flag bits per temp:
564 * REQ_META | REQ_FUA |
565 * 5 | 4 | 3 | 2 | 1 | 0 |
566 * Cold | Warm | Hot | Cold | Warm | Hot |
568 if ((1 << fio
->temp
) & meta_flag
)
569 fio
->op_flags
|= REQ_META
;
570 if ((1 << fio
->temp
) & fua_flag
)
571 fio
->op_flags
|= REQ_FUA
;
574 static void __submit_merged_bio(struct f2fs_bio_info
*io
)
576 struct f2fs_io_info
*fio
= &io
->fio
;
581 __attach_io_flag(fio
);
582 bio_set_op_attrs(io
->bio
, fio
->op
, fio
->op_flags
);
584 if (is_read_io(fio
->op
))
585 trace_f2fs_prepare_read_bio(io
->sbi
->sb
, fio
->type
, io
->bio
);
587 trace_f2fs_prepare_write_bio(io
->sbi
->sb
, fio
->type
, io
->bio
);
589 __submit_bio(io
->sbi
, io
->bio
, fio
->type
);
593 static bool __has_merged_page(struct bio
*bio
, struct inode
*inode
,
594 struct page
*page
, nid_t ino
)
596 struct bio_vec
*bvec
;
597 struct bvec_iter_all iter_all
;
602 if (!inode
&& !page
&& !ino
)
605 bio_for_each_segment_all(bvec
, bio
, iter_all
) {
606 struct page
*target
= bvec
->bv_page
;
608 if (fscrypt_is_bounce_page(target
)) {
609 target
= fscrypt_pagecache_page(target
);
613 if (f2fs_is_compressed_page(target
)) {
614 target
= f2fs_compress_control_page(target
);
619 if (inode
&& inode
== target
->mapping
->host
)
621 if (page
&& page
== target
)
623 if (ino
&& ino
== ino_of_node(target
))
630 static void __f2fs_submit_merged_write(struct f2fs_sb_info
*sbi
,
631 enum page_type type
, enum temp_type temp
)
633 enum page_type btype
= PAGE_TYPE_OF_BIO(type
);
634 struct f2fs_bio_info
*io
= sbi
->write_io
[btype
] + temp
;
636 down_write(&io
->io_rwsem
);
638 /* change META to META_FLUSH in the checkpoint procedure */
639 if (type
>= META_FLUSH
) {
640 io
->fio
.type
= META_FLUSH
;
641 io
->fio
.op
= REQ_OP_WRITE
;
642 io
->fio
.op_flags
= REQ_META
| REQ_PRIO
| REQ_SYNC
;
643 if (!test_opt(sbi
, NOBARRIER
))
644 io
->fio
.op_flags
|= REQ_PREFLUSH
| REQ_FUA
;
646 __submit_merged_bio(io
);
647 up_write(&io
->io_rwsem
);
650 static void __submit_merged_write_cond(struct f2fs_sb_info
*sbi
,
651 struct inode
*inode
, struct page
*page
,
652 nid_t ino
, enum page_type type
, bool force
)
657 for (temp
= HOT
; temp
< NR_TEMP_TYPE
; temp
++) {
659 enum page_type btype
= PAGE_TYPE_OF_BIO(type
);
660 struct f2fs_bio_info
*io
= sbi
->write_io
[btype
] + temp
;
662 down_read(&io
->io_rwsem
);
663 ret
= __has_merged_page(io
->bio
, inode
, page
, ino
);
664 up_read(&io
->io_rwsem
);
667 __f2fs_submit_merged_write(sbi
, type
, temp
);
669 /* TODO: use HOT temp only for meta pages now. */
675 void f2fs_submit_merged_write(struct f2fs_sb_info
*sbi
, enum page_type type
)
677 __submit_merged_write_cond(sbi
, NULL
, NULL
, 0, type
, true);
680 void f2fs_submit_merged_write_cond(struct f2fs_sb_info
*sbi
,
681 struct inode
*inode
, struct page
*page
,
682 nid_t ino
, enum page_type type
)
684 __submit_merged_write_cond(sbi
, inode
, page
, ino
, type
, false);
687 void f2fs_flush_merged_writes(struct f2fs_sb_info
*sbi
)
689 f2fs_submit_merged_write(sbi
, DATA
);
690 f2fs_submit_merged_write(sbi
, NODE
);
691 f2fs_submit_merged_write(sbi
, META
);
695 * Fill the locked page with data located in the block address.
696 * A caller needs to unlock the page on failure.
698 int f2fs_submit_page_bio(struct f2fs_io_info
*fio
)
701 struct page
*page
= fio
->encrypted_page
?
702 fio
->encrypted_page
: fio
->page
;
704 if (!f2fs_is_valid_blkaddr(fio
->sbi
, fio
->new_blkaddr
,
705 fio
->is_por
? META_POR
: (__is_meta_io(fio
) ?
706 META_GENERIC
: DATA_GENERIC_ENHANCE
)))
707 return -EFSCORRUPTED
;
709 trace_f2fs_submit_page_bio(page
, fio
);
710 f2fs_trace_ios(fio
, 0);
712 /* Allocate a new bio */
713 bio
= __bio_alloc(fio
, 1);
715 f2fs_set_bio_crypt_ctx(bio
, fio
->page
->mapping
->host
,
716 fio
->page
->index
, fio
, GFP_NOIO
);
718 if (bio_add_page(bio
, page
, PAGE_SIZE
, 0) < PAGE_SIZE
) {
723 if (fio
->io_wbc
&& !is_read_io(fio
->op
))
724 wbc_account_cgroup_owner(fio
->io_wbc
, page
, PAGE_SIZE
);
726 __attach_io_flag(fio
);
727 bio_set_op_attrs(bio
, fio
->op
, fio
->op_flags
);
729 inc_page_count(fio
->sbi
, is_read_io(fio
->op
) ?
730 __read_io_type(page
): WB_DATA_TYPE(fio
->page
));
732 __submit_bio(fio
->sbi
, bio
, fio
->type
);
736 static bool page_is_mergeable(struct f2fs_sb_info
*sbi
, struct bio
*bio
,
737 block_t last_blkaddr
, block_t cur_blkaddr
)
739 if (unlikely(sbi
->max_io_bytes
&&
740 bio
->bi_iter
.bi_size
>= sbi
->max_io_bytes
))
742 if (last_blkaddr
+ 1 != cur_blkaddr
)
744 return __same_bdev(sbi
, cur_blkaddr
, bio
);
747 static bool io_type_is_mergeable(struct f2fs_bio_info
*io
,
748 struct f2fs_io_info
*fio
)
750 if (io
->fio
.op
!= fio
->op
)
752 return io
->fio
.op_flags
== fio
->op_flags
;
755 static bool io_is_mergeable(struct f2fs_sb_info
*sbi
, struct bio
*bio
,
756 struct f2fs_bio_info
*io
,
757 struct f2fs_io_info
*fio
,
758 block_t last_blkaddr
,
761 if (F2FS_IO_ALIGNED(sbi
) && (fio
->type
== DATA
|| fio
->type
== NODE
)) {
762 unsigned int filled_blocks
=
763 F2FS_BYTES_TO_BLK(bio
->bi_iter
.bi_size
);
764 unsigned int io_size
= F2FS_IO_SIZE(sbi
);
765 unsigned int left_vecs
= bio
->bi_max_vecs
- bio
->bi_vcnt
;
767 /* IOs in bio is aligned and left space of vectors is not enough */
768 if (!(filled_blocks
% io_size
) && left_vecs
< io_size
)
771 if (!page_is_mergeable(sbi
, bio
, last_blkaddr
, cur_blkaddr
))
773 return io_type_is_mergeable(io
, fio
);
776 static void add_bio_entry(struct f2fs_sb_info
*sbi
, struct bio
*bio
,
777 struct page
*page
, enum temp_type temp
)
779 struct f2fs_bio_info
*io
= sbi
->write_io
[DATA
] + temp
;
780 struct bio_entry
*be
;
782 be
= f2fs_kmem_cache_alloc(bio_entry_slab
, GFP_NOFS
);
786 if (bio_add_page(bio
, page
, PAGE_SIZE
, 0) != PAGE_SIZE
)
789 down_write(&io
->bio_list_lock
);
790 list_add_tail(&be
->list
, &io
->bio_list
);
791 up_write(&io
->bio_list_lock
);
794 static void del_bio_entry(struct bio_entry
*be
)
797 kmem_cache_free(bio_entry_slab
, be
);
800 static int add_ipu_page(struct f2fs_io_info
*fio
, struct bio
**bio
,
803 struct f2fs_sb_info
*sbi
= fio
->sbi
;
808 for (temp
= HOT
; temp
< NR_TEMP_TYPE
&& !found
; temp
++) {
809 struct f2fs_bio_info
*io
= sbi
->write_io
[DATA
] + temp
;
810 struct list_head
*head
= &io
->bio_list
;
811 struct bio_entry
*be
;
813 down_write(&io
->bio_list_lock
);
814 list_for_each_entry(be
, head
, list
) {
820 f2fs_bug_on(sbi
, !page_is_mergeable(sbi
, *bio
,
823 if (f2fs_crypt_mergeable_bio(*bio
,
824 fio
->page
->mapping
->host
,
825 fio
->page
->index
, fio
) &&
826 bio_add_page(*bio
, page
, PAGE_SIZE
, 0) ==
832 /* page can't be merged into bio; submit the bio */
834 __submit_bio(sbi
, *bio
, DATA
);
837 up_write(&io
->bio_list_lock
);
848 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info
*sbi
,
849 struct bio
**bio
, struct page
*page
)
853 struct bio
*target
= bio
? *bio
: NULL
;
855 for (temp
= HOT
; temp
< NR_TEMP_TYPE
&& !found
; temp
++) {
856 struct f2fs_bio_info
*io
= sbi
->write_io
[DATA
] + temp
;
857 struct list_head
*head
= &io
->bio_list
;
858 struct bio_entry
*be
;
860 if (list_empty(head
))
863 down_read(&io
->bio_list_lock
);
864 list_for_each_entry(be
, head
, list
) {
866 found
= (target
== be
->bio
);
868 found
= __has_merged_page(be
->bio
, NULL
,
873 up_read(&io
->bio_list_lock
);
880 down_write(&io
->bio_list_lock
);
881 list_for_each_entry(be
, head
, list
) {
883 found
= (target
== be
->bio
);
885 found
= __has_merged_page(be
->bio
, NULL
,
893 up_write(&io
->bio_list_lock
);
897 __submit_bio(sbi
, target
, DATA
);
904 int f2fs_merge_page_bio(struct f2fs_io_info
*fio
)
906 struct bio
*bio
= *fio
->bio
;
907 struct page
*page
= fio
->encrypted_page
?
908 fio
->encrypted_page
: fio
->page
;
910 if (!f2fs_is_valid_blkaddr(fio
->sbi
, fio
->new_blkaddr
,
911 __is_meta_io(fio
) ? META_GENERIC
: DATA_GENERIC
))
912 return -EFSCORRUPTED
;
914 trace_f2fs_submit_page_bio(page
, fio
);
915 f2fs_trace_ios(fio
, 0);
917 if (bio
&& !page_is_mergeable(fio
->sbi
, bio
, *fio
->last_block
,
919 f2fs_submit_merged_ipu_write(fio
->sbi
, &bio
, NULL
);
922 bio
= __bio_alloc(fio
, BIO_MAX_PAGES
);
923 __attach_io_flag(fio
);
924 f2fs_set_bio_crypt_ctx(bio
, fio
->page
->mapping
->host
,
925 fio
->page
->index
, fio
, GFP_NOIO
);
926 bio_set_op_attrs(bio
, fio
->op
, fio
->op_flags
);
928 add_bio_entry(fio
->sbi
, bio
, page
, fio
->temp
);
930 if (add_ipu_page(fio
, &bio
, page
))
935 wbc_account_cgroup_owner(fio
->io_wbc
, page
, PAGE_SIZE
);
937 inc_page_count(fio
->sbi
, WB_DATA_TYPE(page
));
939 *fio
->last_block
= fio
->new_blkaddr
;
945 void f2fs_submit_page_write(struct f2fs_io_info
*fio
)
947 struct f2fs_sb_info
*sbi
= fio
->sbi
;
948 enum page_type btype
= PAGE_TYPE_OF_BIO(fio
->type
);
949 struct f2fs_bio_info
*io
= sbi
->write_io
[btype
] + fio
->temp
;
950 struct page
*bio_page
;
952 f2fs_bug_on(sbi
, is_read_io(fio
->op
));
954 down_write(&io
->io_rwsem
);
957 spin_lock(&io
->io_lock
);
958 if (list_empty(&io
->io_list
)) {
959 spin_unlock(&io
->io_lock
);
962 fio
= list_first_entry(&io
->io_list
,
963 struct f2fs_io_info
, list
);
964 list_del(&fio
->list
);
965 spin_unlock(&io
->io_lock
);
968 verify_fio_blkaddr(fio
);
970 if (fio
->encrypted_page
)
971 bio_page
= fio
->encrypted_page
;
972 else if (fio
->compressed_page
)
973 bio_page
= fio
->compressed_page
;
975 bio_page
= fio
->page
;
977 /* set submitted = true as a return value */
978 fio
->submitted
= true;
980 inc_page_count(sbi
, WB_DATA_TYPE(bio_page
));
983 (!io_is_mergeable(sbi
, io
->bio
, io
, fio
, io
->last_block_in_bio
,
985 !f2fs_crypt_mergeable_bio(io
->bio
, fio
->page
->mapping
->host
,
986 bio_page
->index
, fio
)))
987 __submit_merged_bio(io
);
989 if (io
->bio
== NULL
) {
990 if (F2FS_IO_ALIGNED(sbi
) &&
991 (fio
->type
== DATA
|| fio
->type
== NODE
) &&
992 fio
->new_blkaddr
& F2FS_IO_SIZE_MASK(sbi
)) {
993 dec_page_count(sbi
, WB_DATA_TYPE(bio_page
));
997 io
->bio
= __bio_alloc(fio
, BIO_MAX_PAGES
);
998 f2fs_set_bio_crypt_ctx(io
->bio
, fio
->page
->mapping
->host
,
999 bio_page
->index
, fio
, GFP_NOIO
);
1003 if (bio_add_page(io
->bio
, bio_page
, PAGE_SIZE
, 0) < PAGE_SIZE
) {
1004 __submit_merged_bio(io
);
1009 wbc_account_cgroup_owner(fio
->io_wbc
, bio_page
, PAGE_SIZE
);
1011 io
->last_block_in_bio
= fio
->new_blkaddr
;
1012 f2fs_trace_ios(fio
, 0);
1014 trace_f2fs_submit_page_write(fio
->page
, fio
);
1019 if (is_sbi_flag_set(sbi
, SBI_IS_SHUTDOWN
) ||
1020 !f2fs_is_checkpoint_ready(sbi
))
1021 __submit_merged_bio(io
);
1022 up_write(&io
->io_rwsem
);
1025 static inline bool f2fs_need_verity(const struct inode
*inode
, pgoff_t idx
)
1027 return fsverity_active(inode
) &&
1028 idx
< DIV_ROUND_UP(inode
->i_size
, PAGE_SIZE
);
1031 static struct bio
*f2fs_grab_read_bio(struct inode
*inode
, block_t blkaddr
,
1032 unsigned nr_pages
, unsigned op_flag
,
1033 pgoff_t first_idx
, bool for_write
,
1036 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1038 struct bio_post_read_ctx
*ctx
;
1039 unsigned int post_read_steps
= 0;
1041 bio
= f2fs_bio_alloc(sbi
, min_t(int, nr_pages
, BIO_MAX_PAGES
),
1044 return ERR_PTR(-ENOMEM
);
1046 f2fs_set_bio_crypt_ctx(bio
, inode
, first_idx
, NULL
, GFP_NOFS
);
1048 f2fs_target_device(sbi
, blkaddr
, bio
);
1049 bio
->bi_end_io
= f2fs_read_end_io
;
1050 bio_set_op_attrs(bio
, REQ_OP_READ
, op_flag
);
1052 if (fscrypt_inode_uses_fs_layer_crypto(inode
))
1053 post_read_steps
|= 1 << STEP_DECRYPT
;
1054 if (f2fs_compressed_file(inode
))
1055 post_read_steps
|= 1 << STEP_DECOMPRESS_NOWQ
;
1056 if (for_verity
&& f2fs_need_verity(inode
, first_idx
))
1057 post_read_steps
|= 1 << STEP_VERITY
;
1059 if (post_read_steps
) {
1060 /* Due to the mempool, this never fails. */
1061 ctx
= mempool_alloc(bio_post_read_ctx_pool
, GFP_NOFS
);
1064 ctx
->enabled_steps
= post_read_steps
;
1065 bio
->bi_private
= ctx
;
1071 static void f2fs_release_read_bio(struct bio
*bio
)
1073 if (bio
->bi_private
)
1074 mempool_free(bio
->bi_private
, bio_post_read_ctx_pool
);
1078 /* This can handle encryption stuffs */
1079 static int f2fs_submit_page_read(struct inode
*inode
, struct page
*page
,
1080 block_t blkaddr
, int op_flags
, bool for_write
)
1082 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1085 bio
= f2fs_grab_read_bio(inode
, blkaddr
, 1, op_flags
,
1086 page
->index
, for_write
, true);
1088 return PTR_ERR(bio
);
1090 /* wait for GCed page writeback via META_MAPPING */
1091 f2fs_wait_on_block_writeback(inode
, blkaddr
);
1093 if (bio_add_page(bio
, page
, PAGE_SIZE
, 0) < PAGE_SIZE
) {
1097 ClearPageError(page
);
1098 inc_page_count(sbi
, F2FS_RD_DATA
);
1099 f2fs_update_iostat(sbi
, FS_DATA_READ_IO
, F2FS_BLKSIZE
);
1100 __submit_bio(sbi
, bio
, DATA
);
1104 static void __set_data_blkaddr(struct dnode_of_data
*dn
)
1106 struct f2fs_node
*rn
= F2FS_NODE(dn
->node_page
);
1110 if (IS_INODE(dn
->node_page
) && f2fs_has_extra_attr(dn
->inode
))
1111 base
= get_extra_isize(dn
->inode
);
1113 /* Get physical address of data block */
1114 addr_array
= blkaddr_in_node(rn
);
1115 addr_array
[base
+ dn
->ofs_in_node
] = cpu_to_le32(dn
->data_blkaddr
);
1119 * Lock ordering for the change of data block address:
1122 * update block addresses in the node page
1124 void f2fs_set_data_blkaddr(struct dnode_of_data
*dn
)
1126 f2fs_wait_on_page_writeback(dn
->node_page
, NODE
, true, true);
1127 __set_data_blkaddr(dn
);
1128 if (set_page_dirty(dn
->node_page
))
1129 dn
->node_changed
= true;
1132 void f2fs_update_data_blkaddr(struct dnode_of_data
*dn
, block_t blkaddr
)
1134 dn
->data_blkaddr
= blkaddr
;
1135 f2fs_set_data_blkaddr(dn
);
1136 f2fs_update_extent_cache(dn
);
1139 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
1140 int f2fs_reserve_new_blocks(struct dnode_of_data
*dn
, blkcnt_t count
)
1142 struct f2fs_sb_info
*sbi
= F2FS_I_SB(dn
->inode
);
1148 if (unlikely(is_inode_flag_set(dn
->inode
, FI_NO_ALLOC
)))
1150 if (unlikely((err
= inc_valid_block_count(sbi
, dn
->inode
, &count
))))
1153 trace_f2fs_reserve_new_blocks(dn
->inode
, dn
->nid
,
1154 dn
->ofs_in_node
, count
);
1156 f2fs_wait_on_page_writeback(dn
->node_page
, NODE
, true, true);
1158 for (; count
> 0; dn
->ofs_in_node
++) {
1159 block_t blkaddr
= f2fs_data_blkaddr(dn
);
1160 if (blkaddr
== NULL_ADDR
) {
1161 dn
->data_blkaddr
= NEW_ADDR
;
1162 __set_data_blkaddr(dn
);
1167 if (set_page_dirty(dn
->node_page
))
1168 dn
->node_changed
= true;
1172 /* Should keep dn->ofs_in_node unchanged */
1173 int f2fs_reserve_new_block(struct dnode_of_data
*dn
)
1175 unsigned int ofs_in_node
= dn
->ofs_in_node
;
1178 ret
= f2fs_reserve_new_blocks(dn
, 1);
1179 dn
->ofs_in_node
= ofs_in_node
;
1183 int f2fs_reserve_block(struct dnode_of_data
*dn
, pgoff_t index
)
1185 bool need_put
= dn
->inode_page
? false : true;
1188 err
= f2fs_get_dnode_of_data(dn
, index
, ALLOC_NODE
);
1192 if (dn
->data_blkaddr
== NULL_ADDR
)
1193 err
= f2fs_reserve_new_block(dn
);
1194 if (err
|| need_put
)
1199 int f2fs_get_block(struct dnode_of_data
*dn
, pgoff_t index
)
1201 struct extent_info ei
= {0, 0, 0};
1202 struct inode
*inode
= dn
->inode
;
1204 if (f2fs_lookup_extent_cache(inode
, index
, &ei
)) {
1205 dn
->data_blkaddr
= ei
.blk
+ index
- ei
.fofs
;
1209 return f2fs_reserve_block(dn
, index
);
1212 struct page
*f2fs_get_read_data_page(struct inode
*inode
, pgoff_t index
,
1213 int op_flags
, bool for_write
)
1215 struct address_space
*mapping
= inode
->i_mapping
;
1216 struct dnode_of_data dn
;
1218 struct extent_info ei
= {0,0,0};
1221 page
= f2fs_grab_cache_page(mapping
, index
, for_write
);
1223 return ERR_PTR(-ENOMEM
);
1225 if (f2fs_lookup_extent_cache(inode
, index
, &ei
)) {
1226 dn
.data_blkaddr
= ei
.blk
+ index
- ei
.fofs
;
1227 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode
), dn
.data_blkaddr
,
1228 DATA_GENERIC_ENHANCE_READ
)) {
1229 err
= -EFSCORRUPTED
;
1235 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
1236 err
= f2fs_get_dnode_of_data(&dn
, index
, LOOKUP_NODE
);
1239 f2fs_put_dnode(&dn
);
1241 if (unlikely(dn
.data_blkaddr
== NULL_ADDR
)) {
1245 if (dn
.data_blkaddr
!= NEW_ADDR
&&
1246 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode
),
1248 DATA_GENERIC_ENHANCE
)) {
1249 err
= -EFSCORRUPTED
;
1253 if (PageUptodate(page
)) {
1259 * A new dentry page is allocated but not able to be written, since its
1260 * new inode page couldn't be allocated due to -ENOSPC.
1261 * In such the case, its blkaddr can be remained as NEW_ADDR.
1262 * see, f2fs_add_link -> f2fs_get_new_data_page ->
1263 * f2fs_init_inode_metadata.
1265 if (dn
.data_blkaddr
== NEW_ADDR
) {
1266 zero_user_segment(page
, 0, PAGE_SIZE
);
1267 if (!PageUptodate(page
))
1268 SetPageUptodate(page
);
1273 err
= f2fs_submit_page_read(inode
, page
, dn
.data_blkaddr
,
1274 op_flags
, for_write
);
1280 f2fs_put_page(page
, 1);
1281 return ERR_PTR(err
);
1284 struct page
*f2fs_find_data_page(struct inode
*inode
, pgoff_t index
)
1286 struct address_space
*mapping
= inode
->i_mapping
;
1289 page
= find_get_page(mapping
, index
);
1290 if (page
&& PageUptodate(page
))
1292 f2fs_put_page(page
, 0);
1294 page
= f2fs_get_read_data_page(inode
, index
, 0, false);
1298 if (PageUptodate(page
))
1301 wait_on_page_locked(page
);
1302 if (unlikely(!PageUptodate(page
))) {
1303 f2fs_put_page(page
, 0);
1304 return ERR_PTR(-EIO
);
1310 * If it tries to access a hole, return an error.
1311 * Because, the callers, functions in dir.c and GC, should be able to know
1312 * whether this page exists or not.
1314 struct page
*f2fs_get_lock_data_page(struct inode
*inode
, pgoff_t index
,
1317 struct address_space
*mapping
= inode
->i_mapping
;
1320 page
= f2fs_get_read_data_page(inode
, index
, 0, for_write
);
1324 /* wait for read completion */
1326 if (unlikely(page
->mapping
!= mapping
)) {
1327 f2fs_put_page(page
, 1);
1330 if (unlikely(!PageUptodate(page
))) {
1331 f2fs_put_page(page
, 1);
1332 return ERR_PTR(-EIO
);
1338 * Caller ensures that this data page is never allocated.
1339 * A new zero-filled data page is allocated in the page cache.
1341 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1343 * Note that, ipage is set only by make_empty_dir, and if any error occur,
1344 * ipage should be released by this function.
1346 struct page
*f2fs_get_new_data_page(struct inode
*inode
,
1347 struct page
*ipage
, pgoff_t index
, bool new_i_size
)
1349 struct address_space
*mapping
= inode
->i_mapping
;
1351 struct dnode_of_data dn
;
1354 page
= f2fs_grab_cache_page(mapping
, index
, true);
1357 * before exiting, we should make sure ipage will be released
1358 * if any error occur.
1360 f2fs_put_page(ipage
, 1);
1361 return ERR_PTR(-ENOMEM
);
1364 set_new_dnode(&dn
, inode
, ipage
, NULL
, 0);
1365 err
= f2fs_reserve_block(&dn
, index
);
1367 f2fs_put_page(page
, 1);
1368 return ERR_PTR(err
);
1371 f2fs_put_dnode(&dn
);
1373 if (PageUptodate(page
))
1376 if (dn
.data_blkaddr
== NEW_ADDR
) {
1377 zero_user_segment(page
, 0, PAGE_SIZE
);
1378 if (!PageUptodate(page
))
1379 SetPageUptodate(page
);
1381 f2fs_put_page(page
, 1);
1383 /* if ipage exists, blkaddr should be NEW_ADDR */
1384 f2fs_bug_on(F2FS_I_SB(inode
), ipage
);
1385 page
= f2fs_get_lock_data_page(inode
, index
, true);
1390 if (new_i_size
&& i_size_read(inode
) <
1391 ((loff_t
)(index
+ 1) << PAGE_SHIFT
))
1392 f2fs_i_size_write(inode
, ((loff_t
)(index
+ 1) << PAGE_SHIFT
));
1396 static int __allocate_data_block(struct dnode_of_data
*dn
, int seg_type
)
1398 struct f2fs_sb_info
*sbi
= F2FS_I_SB(dn
->inode
);
1399 struct f2fs_summary sum
;
1400 struct node_info ni
;
1401 block_t old_blkaddr
;
1405 if (unlikely(is_inode_flag_set(dn
->inode
, FI_NO_ALLOC
)))
1408 err
= f2fs_get_node_info(sbi
, dn
->nid
, &ni
);
1412 dn
->data_blkaddr
= f2fs_data_blkaddr(dn
);
1413 if (dn
->data_blkaddr
!= NULL_ADDR
)
1416 if (unlikely((err
= inc_valid_block_count(sbi
, dn
->inode
, &count
))))
1420 set_summary(&sum
, dn
->nid
, dn
->ofs_in_node
, ni
.version
);
1421 old_blkaddr
= dn
->data_blkaddr
;
1422 f2fs_allocate_data_block(sbi
, NULL
, old_blkaddr
, &dn
->data_blkaddr
,
1423 &sum
, seg_type
, NULL
);
1424 if (GET_SEGNO(sbi
, old_blkaddr
) != NULL_SEGNO
)
1425 invalidate_mapping_pages(META_MAPPING(sbi
),
1426 old_blkaddr
, old_blkaddr
);
1427 f2fs_update_data_blkaddr(dn
, dn
->data_blkaddr
);
1430 * i_size will be updated by direct_IO. Otherwise, we'll get stale
1431 * data from unwritten block via dio_read.
1436 int f2fs_preallocate_blocks(struct kiocb
*iocb
, struct iov_iter
*from
)
1438 struct inode
*inode
= file_inode(iocb
->ki_filp
);
1439 struct f2fs_map_blocks map
;
1442 bool direct_io
= iocb
->ki_flags
& IOCB_DIRECT
;
1444 map
.m_lblk
= F2FS_BLK_ALIGN(iocb
->ki_pos
);
1445 map
.m_len
= F2FS_BYTES_TO_BLK(iocb
->ki_pos
+ iov_iter_count(from
));
1446 if (map
.m_len
> map
.m_lblk
)
1447 map
.m_len
-= map
.m_lblk
;
1451 map
.m_next_pgofs
= NULL
;
1452 map
.m_next_extent
= NULL
;
1453 map
.m_seg_type
= NO_CHECK_TYPE
;
1454 map
.m_may_create
= true;
1457 map
.m_seg_type
= f2fs_rw_hint_to_seg_type(iocb
->ki_hint
);
1458 flag
= f2fs_force_buffered_io(inode
, iocb
, from
) ?
1459 F2FS_GET_BLOCK_PRE_AIO
:
1460 F2FS_GET_BLOCK_PRE_DIO
;
1463 if (iocb
->ki_pos
+ iov_iter_count(from
) > MAX_INLINE_DATA(inode
)) {
1464 err
= f2fs_convert_inline_inode(inode
);
1468 if (f2fs_has_inline_data(inode
))
1471 flag
= F2FS_GET_BLOCK_PRE_AIO
;
1474 err
= f2fs_map_blocks(inode
, &map
, 1, flag
);
1475 if (map
.m_len
> 0 && err
== -ENOSPC
) {
1477 set_inode_flag(inode
, FI_NO_PREALLOC
);
1483 void f2fs_do_map_lock(struct f2fs_sb_info
*sbi
, int flag
, bool lock
)
1485 if (flag
== F2FS_GET_BLOCK_PRE_AIO
) {
1487 down_read(&sbi
->node_change
);
1489 up_read(&sbi
->node_change
);
1494 f2fs_unlock_op(sbi
);
1499 * f2fs_map_blocks() tries to find or build mapping relationship which
1500 * maps continuous logical blocks to physical blocks, and return such
1501 * info via f2fs_map_blocks structure.
1503 int f2fs_map_blocks(struct inode
*inode
, struct f2fs_map_blocks
*map
,
1504 int create
, int flag
)
1506 unsigned int maxblocks
= map
->m_len
;
1507 struct dnode_of_data dn
;
1508 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1509 int mode
= map
->m_may_create
? ALLOC_NODE
: LOOKUP_NODE
;
1510 pgoff_t pgofs
, end_offset
, end
;
1511 int err
= 0, ofs
= 1;
1512 unsigned int ofs_in_node
, last_ofs_in_node
;
1514 struct extent_info ei
= {0,0,0};
1516 unsigned int start_pgofs
;
1524 /* it only supports block size == page size */
1525 pgofs
= (pgoff_t
)map
->m_lblk
;
1526 end
= pgofs
+ maxblocks
;
1528 if (!create
&& f2fs_lookup_extent_cache(inode
, pgofs
, &ei
)) {
1529 if (f2fs_lfs_mode(sbi
) && flag
== F2FS_GET_BLOCK_DIO
&&
1533 map
->m_pblk
= ei
.blk
+ pgofs
- ei
.fofs
;
1534 map
->m_len
= min((pgoff_t
)maxblocks
, ei
.fofs
+ ei
.len
- pgofs
);
1535 map
->m_flags
= F2FS_MAP_MAPPED
;
1536 if (map
->m_next_extent
)
1537 *map
->m_next_extent
= pgofs
+ map
->m_len
;
1539 /* for hardware encryption, but to avoid potential issue in future */
1540 if (flag
== F2FS_GET_BLOCK_DIO
)
1541 f2fs_wait_on_block_writeback_range(inode
,
1542 map
->m_pblk
, map
->m_len
);
1547 if (map
->m_may_create
)
1548 f2fs_do_map_lock(sbi
, flag
, true);
1550 /* When reading holes, we need its node page */
1551 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
1552 err
= f2fs_get_dnode_of_data(&dn
, pgofs
, mode
);
1554 if (flag
== F2FS_GET_BLOCK_BMAP
)
1556 if (err
== -ENOENT
) {
1558 if (map
->m_next_pgofs
)
1559 *map
->m_next_pgofs
=
1560 f2fs_get_next_page_offset(&dn
, pgofs
);
1561 if (map
->m_next_extent
)
1562 *map
->m_next_extent
=
1563 f2fs_get_next_page_offset(&dn
, pgofs
);
1568 start_pgofs
= pgofs
;
1570 last_ofs_in_node
= ofs_in_node
= dn
.ofs_in_node
;
1571 end_offset
= ADDRS_PER_PAGE(dn
.node_page
, inode
);
1574 blkaddr
= f2fs_data_blkaddr(&dn
);
1576 if (__is_valid_data_blkaddr(blkaddr
) &&
1577 !f2fs_is_valid_blkaddr(sbi
, blkaddr
, DATA_GENERIC_ENHANCE
)) {
1578 err
= -EFSCORRUPTED
;
1582 if (__is_valid_data_blkaddr(blkaddr
)) {
1583 /* use out-place-update for driect IO under LFS mode */
1584 if (f2fs_lfs_mode(sbi
) && flag
== F2FS_GET_BLOCK_DIO
&&
1585 map
->m_may_create
) {
1586 err
= __allocate_data_block(&dn
, map
->m_seg_type
);
1589 blkaddr
= dn
.data_blkaddr
;
1590 set_inode_flag(inode
, FI_APPEND_WRITE
);
1594 if (unlikely(f2fs_cp_error(sbi
))) {
1598 if (flag
== F2FS_GET_BLOCK_PRE_AIO
) {
1599 if (blkaddr
== NULL_ADDR
) {
1601 last_ofs_in_node
= dn
.ofs_in_node
;
1604 WARN_ON(flag
!= F2FS_GET_BLOCK_PRE_DIO
&&
1605 flag
!= F2FS_GET_BLOCK_DIO
);
1606 err
= __allocate_data_block(&dn
,
1609 set_inode_flag(inode
, FI_APPEND_WRITE
);
1613 map
->m_flags
|= F2FS_MAP_NEW
;
1614 blkaddr
= dn
.data_blkaddr
;
1616 if (flag
== F2FS_GET_BLOCK_BMAP
) {
1620 if (flag
== F2FS_GET_BLOCK_PRECACHE
)
1622 if (flag
== F2FS_GET_BLOCK_FIEMAP
&&
1623 blkaddr
== NULL_ADDR
) {
1624 if (map
->m_next_pgofs
)
1625 *map
->m_next_pgofs
= pgofs
+ 1;
1628 if (flag
!= F2FS_GET_BLOCK_FIEMAP
) {
1629 /* for defragment case */
1630 if (map
->m_next_pgofs
)
1631 *map
->m_next_pgofs
= pgofs
+ 1;
1637 if (flag
== F2FS_GET_BLOCK_PRE_AIO
)
1640 if (map
->m_len
== 0) {
1641 /* preallocated unwritten block should be mapped for fiemap. */
1642 if (blkaddr
== NEW_ADDR
)
1643 map
->m_flags
|= F2FS_MAP_UNWRITTEN
;
1644 map
->m_flags
|= F2FS_MAP_MAPPED
;
1646 map
->m_pblk
= blkaddr
;
1648 } else if ((map
->m_pblk
!= NEW_ADDR
&&
1649 blkaddr
== (map
->m_pblk
+ ofs
)) ||
1650 (map
->m_pblk
== NEW_ADDR
&& blkaddr
== NEW_ADDR
) ||
1651 flag
== F2FS_GET_BLOCK_PRE_DIO
) {
1662 /* preallocate blocks in batch for one dnode page */
1663 if (flag
== F2FS_GET_BLOCK_PRE_AIO
&&
1664 (pgofs
== end
|| dn
.ofs_in_node
== end_offset
)) {
1666 dn
.ofs_in_node
= ofs_in_node
;
1667 err
= f2fs_reserve_new_blocks(&dn
, prealloc
);
1671 map
->m_len
+= dn
.ofs_in_node
- ofs_in_node
;
1672 if (prealloc
&& dn
.ofs_in_node
!= last_ofs_in_node
+ 1) {
1676 dn
.ofs_in_node
= end_offset
;
1681 else if (dn
.ofs_in_node
< end_offset
)
1684 if (flag
== F2FS_GET_BLOCK_PRECACHE
) {
1685 if (map
->m_flags
& F2FS_MAP_MAPPED
) {
1686 unsigned int ofs
= start_pgofs
- map
->m_lblk
;
1688 f2fs_update_extent_cache_range(&dn
,
1689 start_pgofs
, map
->m_pblk
+ ofs
,
1694 f2fs_put_dnode(&dn
);
1696 if (map
->m_may_create
) {
1697 f2fs_do_map_lock(sbi
, flag
, false);
1698 f2fs_balance_fs(sbi
, dn
.node_changed
);
1704 /* for hardware encryption, but to avoid potential issue in future */
1705 if (flag
== F2FS_GET_BLOCK_DIO
&& map
->m_flags
& F2FS_MAP_MAPPED
)
1706 f2fs_wait_on_block_writeback_range(inode
,
1707 map
->m_pblk
, map
->m_len
);
1709 if (flag
== F2FS_GET_BLOCK_PRECACHE
) {
1710 if (map
->m_flags
& F2FS_MAP_MAPPED
) {
1711 unsigned int ofs
= start_pgofs
- map
->m_lblk
;
1713 f2fs_update_extent_cache_range(&dn
,
1714 start_pgofs
, map
->m_pblk
+ ofs
,
1717 if (map
->m_next_extent
)
1718 *map
->m_next_extent
= pgofs
+ 1;
1720 f2fs_put_dnode(&dn
);
1722 if (map
->m_may_create
) {
1723 f2fs_do_map_lock(sbi
, flag
, false);
1724 f2fs_balance_fs(sbi
, dn
.node_changed
);
1727 trace_f2fs_map_blocks(inode
, map
, err
);
1731 bool f2fs_overwrite_io(struct inode
*inode
, loff_t pos
, size_t len
)
1733 struct f2fs_map_blocks map
;
1737 if (pos
+ len
> i_size_read(inode
))
1740 map
.m_lblk
= F2FS_BYTES_TO_BLK(pos
);
1741 map
.m_next_pgofs
= NULL
;
1742 map
.m_next_extent
= NULL
;
1743 map
.m_seg_type
= NO_CHECK_TYPE
;
1744 map
.m_may_create
= false;
1745 last_lblk
= F2FS_BLK_ALIGN(pos
+ len
);
1747 while (map
.m_lblk
< last_lblk
) {
1748 map
.m_len
= last_lblk
- map
.m_lblk
;
1749 err
= f2fs_map_blocks(inode
, &map
, 0, F2FS_GET_BLOCK_DEFAULT
);
1750 if (err
|| map
.m_len
== 0)
1752 map
.m_lblk
+= map
.m_len
;
1757 static inline u64
bytes_to_blks(struct inode
*inode
, u64 bytes
)
1759 return (bytes
>> inode
->i_blkbits
);
1762 static inline u64
blks_to_bytes(struct inode
*inode
, u64 blks
)
1764 return (blks
<< inode
->i_blkbits
);
1767 static int __get_data_block(struct inode
*inode
, sector_t iblock
,
1768 struct buffer_head
*bh
, int create
, int flag
,
1769 pgoff_t
*next_pgofs
, int seg_type
, bool may_write
)
1771 struct f2fs_map_blocks map
;
1774 map
.m_lblk
= iblock
;
1775 map
.m_len
= bytes_to_blks(inode
, bh
->b_size
);
1776 map
.m_next_pgofs
= next_pgofs
;
1777 map
.m_next_extent
= NULL
;
1778 map
.m_seg_type
= seg_type
;
1779 map
.m_may_create
= may_write
;
1781 err
= f2fs_map_blocks(inode
, &map
, create
, flag
);
1783 map_bh(bh
, inode
->i_sb
, map
.m_pblk
);
1784 bh
->b_state
= (bh
->b_state
& ~F2FS_MAP_FLAGS
) | map
.m_flags
;
1785 bh
->b_size
= blks_to_bytes(inode
, map
.m_len
);
1790 static int get_data_block_dio_write(struct inode
*inode
, sector_t iblock
,
1791 struct buffer_head
*bh_result
, int create
)
1793 return __get_data_block(inode
, iblock
, bh_result
, create
,
1794 F2FS_GET_BLOCK_DIO
, NULL
,
1795 f2fs_rw_hint_to_seg_type(inode
->i_write_hint
),
1796 IS_SWAPFILE(inode
) ? false : true);
1799 static int get_data_block_dio(struct inode
*inode
, sector_t iblock
,
1800 struct buffer_head
*bh_result
, int create
)
1802 return __get_data_block(inode
, iblock
, bh_result
, create
,
1803 F2FS_GET_BLOCK_DIO
, NULL
,
1804 f2fs_rw_hint_to_seg_type(inode
->i_write_hint
),
1808 static int f2fs_xattr_fiemap(struct inode
*inode
,
1809 struct fiemap_extent_info
*fieinfo
)
1811 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1813 struct node_info ni
;
1814 __u64 phys
= 0, len
;
1816 nid_t xnid
= F2FS_I(inode
)->i_xattr_nid
;
1819 if (f2fs_has_inline_xattr(inode
)) {
1822 page
= f2fs_grab_cache_page(NODE_MAPPING(sbi
),
1823 inode
->i_ino
, false);
1827 err
= f2fs_get_node_info(sbi
, inode
->i_ino
, &ni
);
1829 f2fs_put_page(page
, 1);
1833 phys
= blks_to_bytes(inode
, ni
.blk_addr
);
1834 offset
= offsetof(struct f2fs_inode
, i_addr
) +
1835 sizeof(__le32
) * (DEF_ADDRS_PER_INODE
-
1836 get_inline_xattr_addrs(inode
));
1839 len
= inline_xattr_size(inode
);
1841 f2fs_put_page(page
, 1);
1843 flags
= FIEMAP_EXTENT_DATA_INLINE
| FIEMAP_EXTENT_NOT_ALIGNED
;
1846 flags
|= FIEMAP_EXTENT_LAST
;
1848 err
= fiemap_fill_next_extent(fieinfo
, 0, phys
, len
, flags
);
1849 trace_f2fs_fiemap(inode
, 0, phys
, len
, flags
, err
);
1850 if (err
|| err
== 1)
1855 page
= f2fs_grab_cache_page(NODE_MAPPING(sbi
), xnid
, false);
1859 err
= f2fs_get_node_info(sbi
, xnid
, &ni
);
1861 f2fs_put_page(page
, 1);
1865 phys
= blks_to_bytes(inode
, ni
.blk_addr
);
1866 len
= inode
->i_sb
->s_blocksize
;
1868 f2fs_put_page(page
, 1);
1870 flags
= FIEMAP_EXTENT_LAST
;
1874 err
= fiemap_fill_next_extent(fieinfo
, 0, phys
, len
, flags
);
1875 trace_f2fs_fiemap(inode
, 0, phys
, len
, flags
, err
);
1878 return (err
< 0 ? err
: 0);
1881 static loff_t
max_inode_blocks(struct inode
*inode
)
1883 loff_t result
= ADDRS_PER_INODE(inode
);
1884 loff_t leaf_count
= ADDRS_PER_BLOCK(inode
);
1886 /* two direct node blocks */
1887 result
+= (leaf_count
* 2);
1889 /* two indirect node blocks */
1890 leaf_count
*= NIDS_PER_BLOCK
;
1891 result
+= (leaf_count
* 2);
1893 /* one double indirect node block */
1894 leaf_count
*= NIDS_PER_BLOCK
;
1895 result
+= leaf_count
;
1900 int f2fs_fiemap(struct inode
*inode
, struct fiemap_extent_info
*fieinfo
,
1903 struct f2fs_map_blocks map
;
1904 sector_t start_blk
, last_blk
;
1906 u64 logical
= 0, phys
= 0, size
= 0;
1909 bool compr_cluster
= false;
1910 unsigned int cluster_size
= F2FS_I(inode
)->i_cluster_size
;
1912 if (fieinfo
->fi_flags
& FIEMAP_FLAG_CACHE
) {
1913 ret
= f2fs_precache_extents(inode
);
1918 ret
= fiemap_prep(inode
, fieinfo
, start
, &len
, FIEMAP_FLAG_XATTR
);
1924 if (fieinfo
->fi_flags
& FIEMAP_FLAG_XATTR
) {
1925 ret
= f2fs_xattr_fiemap(inode
, fieinfo
);
1929 if (f2fs_has_inline_data(inode
) || f2fs_has_inline_dentry(inode
)) {
1930 ret
= f2fs_inline_data_fiemap(inode
, fieinfo
, start
, len
);
1935 if (bytes_to_blks(inode
, len
) == 0)
1936 len
= blks_to_bytes(inode
, 1);
1938 start_blk
= bytes_to_blks(inode
, start
);
1939 last_blk
= bytes_to_blks(inode
, start
+ len
- 1);
1942 memset(&map
, 0, sizeof(map
));
1943 map
.m_lblk
= start_blk
;
1944 map
.m_len
= bytes_to_blks(inode
, len
);
1945 map
.m_next_pgofs
= &next_pgofs
;
1946 map
.m_seg_type
= NO_CHECK_TYPE
;
1949 map
.m_len
= cluster_size
- 1;
1951 ret
= f2fs_map_blocks(inode
, &map
, 0, F2FS_GET_BLOCK_FIEMAP
);
1956 if (!(map
.m_flags
& F2FS_MAP_FLAGS
)) {
1957 start_blk
= next_pgofs
;
1959 if (blks_to_bytes(inode
, start_blk
) < blks_to_bytes(inode
,
1960 max_inode_blocks(inode
)))
1963 flags
|= FIEMAP_EXTENT_LAST
;
1967 if (IS_ENCRYPTED(inode
))
1968 flags
|= FIEMAP_EXTENT_DATA_ENCRYPTED
;
1970 ret
= fiemap_fill_next_extent(fieinfo
, logical
,
1972 trace_f2fs_fiemap(inode
, logical
, phys
, size
, flags
, ret
);
1978 if (start_blk
> last_blk
)
1981 if (compr_cluster
) {
1982 compr_cluster
= false;
1985 logical
= blks_to_bytes(inode
, start_blk
- 1);
1986 phys
= blks_to_bytes(inode
, map
.m_pblk
);
1987 size
= blks_to_bytes(inode
, cluster_size
);
1989 flags
|= FIEMAP_EXTENT_ENCODED
;
1991 start_blk
+= cluster_size
- 1;
1993 if (start_blk
> last_blk
)
1999 if (map
.m_pblk
== COMPRESS_ADDR
) {
2000 compr_cluster
= true;
2005 logical
= blks_to_bytes(inode
, start_blk
);
2006 phys
= blks_to_bytes(inode
, map
.m_pblk
);
2007 size
= blks_to_bytes(inode
, map
.m_len
);
2009 if (map
.m_flags
& F2FS_MAP_UNWRITTEN
)
2010 flags
= FIEMAP_EXTENT_UNWRITTEN
;
2012 start_blk
+= bytes_to_blks(inode
, size
);
2016 if (fatal_signal_pending(current
))
2024 inode_unlock(inode
);
2028 static inline loff_t
f2fs_readpage_limit(struct inode
*inode
)
2030 if (IS_ENABLED(CONFIG_FS_VERITY
) &&
2031 (IS_VERITY(inode
) || f2fs_verity_in_progress(inode
)))
2032 return inode
->i_sb
->s_maxbytes
;
2034 return i_size_read(inode
);
2037 static int f2fs_read_single_page(struct inode
*inode
, struct page
*page
,
2039 struct f2fs_map_blocks
*map
,
2040 struct bio
**bio_ret
,
2041 sector_t
*last_block_in_bio
,
2044 struct bio
*bio
= *bio_ret
;
2045 const unsigned blocksize
= blks_to_bytes(inode
, 1);
2046 sector_t block_in_file
;
2047 sector_t last_block
;
2048 sector_t last_block_in_file
;
2052 block_in_file
= (sector_t
)page_index(page
);
2053 last_block
= block_in_file
+ nr_pages
;
2054 last_block_in_file
= bytes_to_blks(inode
,
2055 f2fs_readpage_limit(inode
) + blocksize
- 1);
2056 if (last_block
> last_block_in_file
)
2057 last_block
= last_block_in_file
;
2059 /* just zeroing out page which is beyond EOF */
2060 if (block_in_file
>= last_block
)
2063 * Map blocks using the previous result first.
2065 if ((map
->m_flags
& F2FS_MAP_MAPPED
) &&
2066 block_in_file
> map
->m_lblk
&&
2067 block_in_file
< (map
->m_lblk
+ map
->m_len
))
2071 * Then do more f2fs_map_blocks() calls until we are
2072 * done with this page.
2074 map
->m_lblk
= block_in_file
;
2075 map
->m_len
= last_block
- block_in_file
;
2077 ret
= f2fs_map_blocks(inode
, map
, 0, F2FS_GET_BLOCK_DEFAULT
);
2081 if ((map
->m_flags
& F2FS_MAP_MAPPED
)) {
2082 block_nr
= map
->m_pblk
+ block_in_file
- map
->m_lblk
;
2083 SetPageMappedToDisk(page
);
2085 if (!PageUptodate(page
) && (!PageSwapCache(page
) &&
2086 !cleancache_get_page(page
))) {
2087 SetPageUptodate(page
);
2091 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode
), block_nr
,
2092 DATA_GENERIC_ENHANCE_READ
)) {
2093 ret
= -EFSCORRUPTED
;
2098 zero_user_segment(page
, 0, PAGE_SIZE
);
2099 if (f2fs_need_verity(inode
, page
->index
) &&
2100 !fsverity_verify_page(page
)) {
2104 if (!PageUptodate(page
))
2105 SetPageUptodate(page
);
2111 * This page will go to BIO. Do we need to send this
2114 if (bio
&& (!page_is_mergeable(F2FS_I_SB(inode
), bio
,
2115 *last_block_in_bio
, block_nr
) ||
2116 !f2fs_crypt_mergeable_bio(bio
, inode
, page
->index
, NULL
))) {
2118 __submit_bio(F2FS_I_SB(inode
), bio
, DATA
);
2122 bio
= f2fs_grab_read_bio(inode
, block_nr
, nr_pages
,
2123 is_readahead
? REQ_RAHEAD
: 0, page
->index
,
2133 * If the page is under writeback, we need to wait for
2134 * its completion to see the correct decrypted data.
2136 f2fs_wait_on_block_writeback(inode
, block_nr
);
2138 if (bio_add_page(bio
, page
, blocksize
, 0) < blocksize
)
2139 goto submit_and_realloc
;
2141 inc_page_count(F2FS_I_SB(inode
), F2FS_RD_DATA
);
2142 f2fs_update_iostat(F2FS_I_SB(inode
), FS_DATA_READ_IO
, F2FS_BLKSIZE
);
2143 ClearPageError(page
);
2144 *last_block_in_bio
= block_nr
;
2148 __submit_bio(F2FS_I_SB(inode
), bio
, DATA
);
2157 #ifdef CONFIG_F2FS_FS_COMPRESSION
2158 int f2fs_read_multi_pages(struct compress_ctx
*cc
, struct bio
**bio_ret
,
2159 unsigned nr_pages
, sector_t
*last_block_in_bio
,
2160 bool is_readahead
, bool for_write
)
2162 struct dnode_of_data dn
;
2163 struct inode
*inode
= cc
->inode
;
2164 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2165 struct bio
*bio
= *bio_ret
;
2166 unsigned int start_idx
= cc
->cluster_idx
<< cc
->log_cluster_size
;
2167 sector_t last_block_in_file
;
2168 const unsigned blocksize
= blks_to_bytes(inode
, 1);
2169 struct decompress_io_ctx
*dic
= NULL
;
2170 struct bio_post_read_ctx
*ctx
;
2171 bool for_verity
= false;
2175 f2fs_bug_on(sbi
, f2fs_cluster_is_empty(cc
));
2177 last_block_in_file
= bytes_to_blks(inode
,
2178 f2fs_readpage_limit(inode
) + blocksize
- 1);
2180 /* get rid of pages beyond EOF */
2181 for (i
= 0; i
< cc
->cluster_size
; i
++) {
2182 struct page
*page
= cc
->rpages
[i
];
2186 if ((sector_t
)page
->index
>= last_block_in_file
) {
2187 zero_user_segment(page
, 0, PAGE_SIZE
);
2188 if (!PageUptodate(page
))
2189 SetPageUptodate(page
);
2190 } else if (!PageUptodate(page
)) {
2194 cc
->rpages
[i
] = NULL
;
2198 /* we are done since all pages are beyond EOF */
2199 if (f2fs_cluster_is_empty(cc
))
2202 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
2203 ret
= f2fs_get_dnode_of_data(&dn
, start_idx
, LOOKUP_NODE
);
2207 f2fs_bug_on(sbi
, dn
.data_blkaddr
!= COMPRESS_ADDR
);
2209 for (i
= 1; i
< cc
->cluster_size
; i
++) {
2212 blkaddr
= data_blkaddr(dn
.inode
, dn
.node_page
,
2213 dn
.ofs_in_node
+ i
);
2215 if (!__is_valid_data_blkaddr(blkaddr
))
2218 if (!f2fs_is_valid_blkaddr(sbi
, blkaddr
, DATA_GENERIC
)) {
2225 /* nothing to decompress */
2226 if (cc
->nr_cpages
== 0) {
2231 dic
= f2fs_alloc_dic(cc
);
2238 * It's possible to enable fsverity on the fly when handling a cluster,
2239 * which requires complicated error handling. Instead of adding more
2240 * complexity, let's give a rule where end_io post-processes fsverity
2241 * per cluster. In order to do that, we need to submit bio, if previous
2242 * bio sets a different post-process policy.
2244 if (fsverity_active(cc
->inode
)) {
2245 atomic_set(&dic
->verity_pages
, cc
->nr_cpages
);
2249 ctx
= bio
->bi_private
;
2250 if (!(ctx
->enabled_steps
& (1 << STEP_VERITY
))) {
2251 __submit_bio(sbi
, bio
, DATA
);
2257 for (i
= 0; i
< dic
->nr_cpages
; i
++) {
2258 struct page
*page
= dic
->cpages
[i
];
2261 blkaddr
= data_blkaddr(dn
.inode
, dn
.node_page
,
2262 dn
.ofs_in_node
+ i
+ 1);
2264 if (bio
&& (!page_is_mergeable(sbi
, bio
,
2265 *last_block_in_bio
, blkaddr
) ||
2266 !f2fs_crypt_mergeable_bio(bio
, inode
, page
->index
, NULL
))) {
2268 __submit_bio(sbi
, bio
, DATA
);
2273 bio
= f2fs_grab_read_bio(inode
, blkaddr
, nr_pages
,
2274 is_readahead
? REQ_RAHEAD
: 0,
2275 page
->index
, for_write
, for_verity
);
2277 unsigned int remained
= dic
->nr_cpages
- i
;
2278 bool release
= false;
2284 if (!atomic_sub_return(remained
,
2285 &dic
->verity_pages
))
2288 if (!atomic_sub_return(remained
,
2289 &dic
->pending_pages
))
2294 f2fs_decompress_end_io(dic
->rpages
,
2295 cc
->cluster_size
, true,
2300 f2fs_put_dnode(&dn
);
2306 f2fs_wait_on_block_writeback(inode
, blkaddr
);
2308 if (bio_add_page(bio
, page
, blocksize
, 0) < blocksize
)
2309 goto submit_and_realloc
;
2311 /* tag STEP_DECOMPRESS to handle IO in wq */
2312 ctx
= bio
->bi_private
;
2313 if (!(ctx
->enabled_steps
& (1 << STEP_DECOMPRESS
)))
2314 ctx
->enabled_steps
|= 1 << STEP_DECOMPRESS
;
2316 inc_page_count(sbi
, F2FS_RD_DATA
);
2317 f2fs_update_iostat(sbi
, FS_DATA_READ_IO
, F2FS_BLKSIZE
);
2318 f2fs_update_iostat(sbi
, FS_CDATA_READ_IO
, F2FS_BLKSIZE
);
2319 ClearPageError(page
);
2320 *last_block_in_bio
= blkaddr
;
2323 f2fs_put_dnode(&dn
);
2329 f2fs_put_dnode(&dn
);
2331 f2fs_decompress_end_io(cc
->rpages
, cc
->cluster_size
, true, false);
2338 * This function was originally taken from fs/mpage.c, and customized for f2fs.
2339 * Major change was from block_size == page_size in f2fs by default.
2341 * Note that the aops->readpages() function is ONLY used for read-ahead. If
2342 * this function ever deviates from doing just read-ahead, it should either
2343 * use ->readpage() or do the necessary surgery to decouple ->readpages()
2346 static int f2fs_mpage_readpages(struct inode
*inode
,
2347 struct readahead_control
*rac
, struct page
*page
)
2349 struct bio
*bio
= NULL
;
2350 sector_t last_block_in_bio
= 0;
2351 struct f2fs_map_blocks map
;
2352 #ifdef CONFIG_F2FS_FS_COMPRESSION
2353 struct compress_ctx cc
= {
2355 .log_cluster_size
= F2FS_I(inode
)->i_log_cluster_size
,
2356 .cluster_size
= F2FS_I(inode
)->i_cluster_size
,
2357 .cluster_idx
= NULL_CLUSTER
,
2364 unsigned nr_pages
= rac
? readahead_count(rac
) : 1;
2365 unsigned max_nr_pages
= nr_pages
;
2367 bool drop_ra
= false;
2373 map
.m_next_pgofs
= NULL
;
2374 map
.m_next_extent
= NULL
;
2375 map
.m_seg_type
= NO_CHECK_TYPE
;
2376 map
.m_may_create
= false;
2379 * Two readahead threads for same address range can cause race condition
2380 * which fragments sequential read IOs. So let's avoid each other.
2382 if (rac
&& readahead_count(rac
)) {
2383 if (READ_ONCE(F2FS_I(inode
)->ra_offset
) == readahead_index(rac
))
2386 WRITE_ONCE(F2FS_I(inode
)->ra_offset
,
2387 readahead_index(rac
));
2390 for (; nr_pages
; nr_pages
--) {
2392 page
= readahead_page(rac
);
2393 prefetchw(&page
->flags
);
2395 f2fs_put_page(page
, 1);
2400 #ifdef CONFIG_F2FS_FS_COMPRESSION
2401 if (f2fs_compressed_file(inode
)) {
2402 /* there are remained comressed pages, submit them */
2403 if (!f2fs_cluster_can_merge_page(&cc
, page
->index
)) {
2404 ret
= f2fs_read_multi_pages(&cc
, &bio
,
2407 rac
!= NULL
, false);
2408 f2fs_destroy_compress_ctx(&cc
);
2410 goto set_error_page
;
2412 ret
= f2fs_is_compressed_cluster(inode
, page
->index
);
2414 goto set_error_page
;
2416 goto read_single_page
;
2418 ret
= f2fs_init_compress_ctx(&cc
);
2420 goto set_error_page
;
2422 f2fs_compress_ctx_add_page(&cc
, page
);
2429 ret
= f2fs_read_single_page(inode
, page
, max_nr_pages
, &map
,
2430 &bio
, &last_block_in_bio
, rac
);
2432 #ifdef CONFIG_F2FS_FS_COMPRESSION
2436 zero_user_segment(page
, 0, PAGE_SIZE
);
2439 #ifdef CONFIG_F2FS_FS_COMPRESSION
2445 #ifdef CONFIG_F2FS_FS_COMPRESSION
2446 if (f2fs_compressed_file(inode
)) {
2448 if (nr_pages
== 1 && !f2fs_cluster_is_empty(&cc
)) {
2449 ret
= f2fs_read_multi_pages(&cc
, &bio
,
2452 rac
!= NULL
, false);
2453 f2fs_destroy_compress_ctx(&cc
);
2459 __submit_bio(F2FS_I_SB(inode
), bio
, DATA
);
2461 if (rac
&& readahead_count(rac
) && !drop_ra
)
2462 WRITE_ONCE(F2FS_I(inode
)->ra_offset
, -1);
2466 static int f2fs_read_data_page(struct file
*file
, struct page
*page
)
2468 struct inode
*inode
= page_file_mapping(page
)->host
;
2471 trace_f2fs_readpage(page
, DATA
);
2473 if (!f2fs_is_compress_backend_ready(inode
)) {
2478 /* If the file has inline data, try to read it directly */
2479 if (f2fs_has_inline_data(inode
))
2480 ret
= f2fs_read_inline_data(inode
, page
);
2482 ret
= f2fs_mpage_readpages(inode
, NULL
, page
);
2486 static void f2fs_readahead(struct readahead_control
*rac
)
2488 struct inode
*inode
= rac
->mapping
->host
;
2490 trace_f2fs_readpages(inode
, readahead_index(rac
), readahead_count(rac
));
2492 if (!f2fs_is_compress_backend_ready(inode
))
2495 /* If the file has inline data, skip readpages */
2496 if (f2fs_has_inline_data(inode
))
2499 f2fs_mpage_readpages(inode
, rac
, NULL
);
2502 int f2fs_encrypt_one_page(struct f2fs_io_info
*fio
)
2504 struct inode
*inode
= fio
->page
->mapping
->host
;
2505 struct page
*mpage
, *page
;
2506 gfp_t gfp_flags
= GFP_NOFS
;
2508 if (!f2fs_encrypted_file(inode
))
2511 page
= fio
->compressed_page
? fio
->compressed_page
: fio
->page
;
2513 /* wait for GCed page writeback via META_MAPPING */
2514 f2fs_wait_on_block_writeback(inode
, fio
->old_blkaddr
);
2516 if (fscrypt_inode_uses_inline_crypto(inode
))
2520 fio
->encrypted_page
= fscrypt_encrypt_pagecache_blocks(page
,
2521 PAGE_SIZE
, 0, gfp_flags
);
2522 if (IS_ERR(fio
->encrypted_page
)) {
2523 /* flush pending IOs and wait for a while in the ENOMEM case */
2524 if (PTR_ERR(fio
->encrypted_page
) == -ENOMEM
) {
2525 f2fs_flush_merged_writes(fio
->sbi
);
2526 congestion_wait(BLK_RW_ASYNC
, DEFAULT_IO_TIMEOUT
);
2527 gfp_flags
|= __GFP_NOFAIL
;
2530 return PTR_ERR(fio
->encrypted_page
);
2533 mpage
= find_lock_page(META_MAPPING(fio
->sbi
), fio
->old_blkaddr
);
2535 if (PageUptodate(mpage
))
2536 memcpy(page_address(mpage
),
2537 page_address(fio
->encrypted_page
), PAGE_SIZE
);
2538 f2fs_put_page(mpage
, 1);
2543 static inline bool check_inplace_update_policy(struct inode
*inode
,
2544 struct f2fs_io_info
*fio
)
2546 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2547 unsigned int policy
= SM_I(sbi
)->ipu_policy
;
2549 if (policy
& (0x1 << F2FS_IPU_FORCE
))
2551 if (policy
& (0x1 << F2FS_IPU_SSR
) && f2fs_need_SSR(sbi
))
2553 if (policy
& (0x1 << F2FS_IPU_UTIL
) &&
2554 utilization(sbi
) > SM_I(sbi
)->min_ipu_util
)
2556 if (policy
& (0x1 << F2FS_IPU_SSR_UTIL
) && f2fs_need_SSR(sbi
) &&
2557 utilization(sbi
) > SM_I(sbi
)->min_ipu_util
)
2561 * IPU for rewrite async pages
2563 if (policy
& (0x1 << F2FS_IPU_ASYNC
) &&
2564 fio
&& fio
->op
== REQ_OP_WRITE
&&
2565 !(fio
->op_flags
& REQ_SYNC
) &&
2566 !IS_ENCRYPTED(inode
))
2569 /* this is only set during fdatasync */
2570 if (policy
& (0x1 << F2FS_IPU_FSYNC
) &&
2571 is_inode_flag_set(inode
, FI_NEED_IPU
))
2574 if (unlikely(fio
&& is_sbi_flag_set(sbi
, SBI_CP_DISABLED
) &&
2575 !f2fs_is_checkpointed_data(sbi
, fio
->old_blkaddr
)))
2581 bool f2fs_should_update_inplace(struct inode
*inode
, struct f2fs_io_info
*fio
)
2583 if (f2fs_is_pinned_file(inode
))
2586 /* if this is cold file, we should overwrite to avoid fragmentation */
2587 if (file_is_cold(inode
))
2590 return check_inplace_update_policy(inode
, fio
);
2593 bool f2fs_should_update_outplace(struct inode
*inode
, struct f2fs_io_info
*fio
)
2595 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2597 if (f2fs_lfs_mode(sbi
))
2599 if (S_ISDIR(inode
->i_mode
))
2601 if (IS_NOQUOTA(inode
))
2603 if (f2fs_is_atomic_file(inode
))
2606 if (is_cold_data(fio
->page
))
2608 if (IS_ATOMIC_WRITTEN_PAGE(fio
->page
))
2610 if (unlikely(is_sbi_flag_set(sbi
, SBI_CP_DISABLED
) &&
2611 f2fs_is_checkpointed_data(sbi
, fio
->old_blkaddr
)))
2617 static inline bool need_inplace_update(struct f2fs_io_info
*fio
)
2619 struct inode
*inode
= fio
->page
->mapping
->host
;
2621 if (f2fs_should_update_outplace(inode
, fio
))
2624 return f2fs_should_update_inplace(inode
, fio
);
2627 int f2fs_do_write_data_page(struct f2fs_io_info
*fio
)
2629 struct page
*page
= fio
->page
;
2630 struct inode
*inode
= page
->mapping
->host
;
2631 struct dnode_of_data dn
;
2632 struct extent_info ei
= {0,0,0};
2633 struct node_info ni
;
2634 bool ipu_force
= false;
2637 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
2638 if (need_inplace_update(fio
) &&
2639 f2fs_lookup_extent_cache(inode
, page
->index
, &ei
)) {
2640 fio
->old_blkaddr
= ei
.blk
+ page
->index
- ei
.fofs
;
2642 if (!f2fs_is_valid_blkaddr(fio
->sbi
, fio
->old_blkaddr
,
2643 DATA_GENERIC_ENHANCE
))
2644 return -EFSCORRUPTED
;
2647 fio
->need_lock
= LOCK_DONE
;
2651 /* Deadlock due to between page->lock and f2fs_lock_op */
2652 if (fio
->need_lock
== LOCK_REQ
&& !f2fs_trylock_op(fio
->sbi
))
2655 err
= f2fs_get_dnode_of_data(&dn
, page
->index
, LOOKUP_NODE
);
2659 fio
->old_blkaddr
= dn
.data_blkaddr
;
2661 /* This page is already truncated */
2662 if (fio
->old_blkaddr
== NULL_ADDR
) {
2663 ClearPageUptodate(page
);
2664 clear_cold_data(page
);
2668 if (__is_valid_data_blkaddr(fio
->old_blkaddr
) &&
2669 !f2fs_is_valid_blkaddr(fio
->sbi
, fio
->old_blkaddr
,
2670 DATA_GENERIC_ENHANCE
)) {
2671 err
= -EFSCORRUPTED
;
2675 * If current allocation needs SSR,
2676 * it had better in-place writes for updated data.
2679 (__is_valid_data_blkaddr(fio
->old_blkaddr
) &&
2680 need_inplace_update(fio
))) {
2681 err
= f2fs_encrypt_one_page(fio
);
2685 set_page_writeback(page
);
2686 ClearPageError(page
);
2687 f2fs_put_dnode(&dn
);
2688 if (fio
->need_lock
== LOCK_REQ
)
2689 f2fs_unlock_op(fio
->sbi
);
2690 err
= f2fs_inplace_write_data(fio
);
2692 if (fscrypt_inode_uses_fs_layer_crypto(inode
))
2693 fscrypt_finalize_bounce_page(&fio
->encrypted_page
);
2694 if (PageWriteback(page
))
2695 end_page_writeback(page
);
2697 set_inode_flag(inode
, FI_UPDATE_WRITE
);
2699 trace_f2fs_do_write_data_page(fio
->page
, IPU
);
2703 if (fio
->need_lock
== LOCK_RETRY
) {
2704 if (!f2fs_trylock_op(fio
->sbi
)) {
2708 fio
->need_lock
= LOCK_REQ
;
2711 err
= f2fs_get_node_info(fio
->sbi
, dn
.nid
, &ni
);
2715 fio
->version
= ni
.version
;
2717 err
= f2fs_encrypt_one_page(fio
);
2721 set_page_writeback(page
);
2722 ClearPageError(page
);
2724 if (fio
->compr_blocks
&& fio
->old_blkaddr
== COMPRESS_ADDR
)
2725 f2fs_i_compr_blocks_update(inode
, fio
->compr_blocks
- 1, false);
2727 /* LFS mode write path */
2728 f2fs_outplace_write_data(&dn
, fio
);
2729 trace_f2fs_do_write_data_page(page
, OPU
);
2730 set_inode_flag(inode
, FI_APPEND_WRITE
);
2731 if (page
->index
== 0)
2732 set_inode_flag(inode
, FI_FIRST_BLOCK_WRITTEN
);
2734 f2fs_put_dnode(&dn
);
2736 if (fio
->need_lock
== LOCK_REQ
)
2737 f2fs_unlock_op(fio
->sbi
);
2741 int f2fs_write_single_data_page(struct page
*page
, int *submitted
,
2743 sector_t
*last_block
,
2744 struct writeback_control
*wbc
,
2745 enum iostat_type io_type
,
2749 struct inode
*inode
= page
->mapping
->host
;
2750 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2751 loff_t i_size
= i_size_read(inode
);
2752 const pgoff_t end_index
= ((unsigned long long)i_size
)
2754 loff_t psize
= (loff_t
)(page
->index
+ 1) << PAGE_SHIFT
;
2755 unsigned offset
= 0;
2756 bool need_balance_fs
= false;
2758 struct f2fs_io_info fio
= {
2760 .ino
= inode
->i_ino
,
2763 .op_flags
= wbc_to_write_flags(wbc
),
2764 .old_blkaddr
= NULL_ADDR
,
2766 .encrypted_page
= NULL
,
2768 .compr_blocks
= compr_blocks
,
2769 .need_lock
= LOCK_RETRY
,
2773 .last_block
= last_block
,
2776 trace_f2fs_writepage(page
, DATA
);
2778 /* we should bypass data pages to proceed the kworkder jobs */
2779 if (unlikely(f2fs_cp_error(sbi
))) {
2780 mapping_set_error(page
->mapping
, -EIO
);
2782 * don't drop any dirty dentry pages for keeping lastest
2783 * directory structure.
2785 if (S_ISDIR(inode
->i_mode
))
2790 if (unlikely(is_sbi_flag_set(sbi
, SBI_POR_DOING
)))
2793 if (page
->index
< end_index
||
2794 f2fs_verity_in_progress(inode
) ||
2799 * If the offset is out-of-range of file size,
2800 * this page does not have to be written to disk.
2802 offset
= i_size
& (PAGE_SIZE
- 1);
2803 if ((page
->index
>= end_index
+ 1) || !offset
)
2806 zero_user_segment(page
, offset
, PAGE_SIZE
);
2808 if (f2fs_is_drop_cache(inode
))
2810 /* we should not write 0'th page having journal header */
2811 if (f2fs_is_volatile_file(inode
) && (!page
->index
||
2812 (!wbc
->for_reclaim
&&
2813 f2fs_available_free_memory(sbi
, BASE_CHECK
))))
2816 /* Dentry/quota blocks are controlled by checkpoint */
2817 if (S_ISDIR(inode
->i_mode
) || IS_NOQUOTA(inode
)) {
2819 * We need to wait for node_write to avoid block allocation during
2820 * checkpoint. This can only happen to quota writes which can cause
2821 * the below discard race condition.
2823 if (IS_NOQUOTA(inode
))
2824 down_read(&sbi
->node_write
);
2826 fio
.need_lock
= LOCK_DONE
;
2827 err
= f2fs_do_write_data_page(&fio
);
2829 if (IS_NOQUOTA(inode
))
2830 up_read(&sbi
->node_write
);
2835 if (!wbc
->for_reclaim
)
2836 need_balance_fs
= true;
2837 else if (has_not_enough_free_secs(sbi
, 0, 0))
2840 set_inode_flag(inode
, FI_HOT_DATA
);
2843 if (f2fs_has_inline_data(inode
)) {
2844 err
= f2fs_write_inline_data(inode
, page
);
2849 if (err
== -EAGAIN
) {
2850 err
= f2fs_do_write_data_page(&fio
);
2851 if (err
== -EAGAIN
) {
2852 fio
.need_lock
= LOCK_REQ
;
2853 err
= f2fs_do_write_data_page(&fio
);
2858 file_set_keep_isize(inode
);
2860 spin_lock(&F2FS_I(inode
)->i_size_lock
);
2861 if (F2FS_I(inode
)->last_disk_size
< psize
)
2862 F2FS_I(inode
)->last_disk_size
= psize
;
2863 spin_unlock(&F2FS_I(inode
)->i_size_lock
);
2867 if (err
&& err
!= -ENOENT
)
2871 inode_dec_dirty_pages(inode
);
2873 ClearPageUptodate(page
);
2874 clear_cold_data(page
);
2877 if (wbc
->for_reclaim
) {
2878 f2fs_submit_merged_write_cond(sbi
, NULL
, page
, 0, DATA
);
2879 clear_inode_flag(inode
, FI_HOT_DATA
);
2880 f2fs_remove_dirty_inode(inode
);
2884 if (!S_ISDIR(inode
->i_mode
) && !IS_NOQUOTA(inode
) &&
2885 !F2FS_I(inode
)->cp_task
&& allow_balance
)
2886 f2fs_balance_fs(sbi
, need_balance_fs
);
2888 if (unlikely(f2fs_cp_error(sbi
))) {
2889 f2fs_submit_merged_write(sbi
, DATA
);
2890 f2fs_submit_merged_ipu_write(sbi
, bio
, NULL
);
2895 *submitted
= fio
.submitted
? 1 : 0;
2900 redirty_page_for_writepage(wbc
, page
);
2902 * pageout() in MM traslates EAGAIN, so calls handle_write_error()
2903 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
2904 * file_write_and_wait_range() will see EIO error, which is critical
2905 * to return value of fsync() followed by atomic_write failure to user.
2907 if (!err
|| wbc
->for_reclaim
)
2908 return AOP_WRITEPAGE_ACTIVATE
;
2913 static int f2fs_write_data_page(struct page
*page
,
2914 struct writeback_control
*wbc
)
2916 #ifdef CONFIG_F2FS_FS_COMPRESSION
2917 struct inode
*inode
= page
->mapping
->host
;
2919 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode
))))
2922 if (f2fs_compressed_file(inode
)) {
2923 if (f2fs_is_compressed_cluster(inode
, page
->index
)) {
2924 redirty_page_for_writepage(wbc
, page
);
2925 return AOP_WRITEPAGE_ACTIVATE
;
2931 return f2fs_write_single_data_page(page
, NULL
, NULL
, NULL
,
2932 wbc
, FS_DATA_IO
, 0, true);
2936 * This function was copied from write_cche_pages from mm/page-writeback.c.
2937 * The major change is making write step of cold data page separately from
2938 * warm/hot data page.
2940 static int f2fs_write_cache_pages(struct address_space
*mapping
,
2941 struct writeback_control
*wbc
,
2942 enum iostat_type io_type
)
2945 int done
= 0, retry
= 0;
2946 struct pagevec pvec
;
2947 struct f2fs_sb_info
*sbi
= F2FS_M_SB(mapping
);
2948 struct bio
*bio
= NULL
;
2949 sector_t last_block
;
2950 #ifdef CONFIG_F2FS_FS_COMPRESSION
2951 struct inode
*inode
= mapping
->host
;
2952 struct compress_ctx cc
= {
2954 .log_cluster_size
= F2FS_I(inode
)->i_log_cluster_size
,
2955 .cluster_size
= F2FS_I(inode
)->i_cluster_size
,
2956 .cluster_idx
= NULL_CLUSTER
,
2962 .rlen
= PAGE_SIZE
* F2FS_I(inode
)->i_cluster_size
,
2968 pgoff_t end
; /* Inclusive */
2970 int range_whole
= 0;
2976 pagevec_init(&pvec
);
2978 if (get_dirty_pages(mapping
->host
) <=
2979 SM_I(F2FS_M_SB(mapping
))->min_hot_blocks
)
2980 set_inode_flag(mapping
->host
, FI_HOT_DATA
);
2982 clear_inode_flag(mapping
->host
, FI_HOT_DATA
);
2984 if (wbc
->range_cyclic
) {
2985 index
= mapping
->writeback_index
; /* prev offset */
2988 index
= wbc
->range_start
>> PAGE_SHIFT
;
2989 end
= wbc
->range_end
>> PAGE_SHIFT
;
2990 if (wbc
->range_start
== 0 && wbc
->range_end
== LLONG_MAX
)
2993 if (wbc
->sync_mode
== WB_SYNC_ALL
|| wbc
->tagged_writepages
)
2994 tag
= PAGECACHE_TAG_TOWRITE
;
2996 tag
= PAGECACHE_TAG_DIRTY
;
2999 if (wbc
->sync_mode
== WB_SYNC_ALL
|| wbc
->tagged_writepages
)
3000 tag_pages_for_writeback(mapping
, index
, end
);
3002 while (!done
&& !retry
&& (index
<= end
)) {
3003 nr_pages
= pagevec_lookup_range_tag(&pvec
, mapping
, &index
, end
,
3008 for (i
= 0; i
< nr_pages
; i
++) {
3009 struct page
*page
= pvec
.pages
[i
];
3013 #ifdef CONFIG_F2FS_FS_COMPRESSION
3014 if (f2fs_compressed_file(inode
)) {
3015 ret
= f2fs_init_compress_ctx(&cc
);
3021 if (!f2fs_cluster_can_merge_page(&cc
,
3023 ret
= f2fs_write_multi_pages(&cc
,
3024 &submitted
, wbc
, io_type
);
3030 if (unlikely(f2fs_cp_error(sbi
)))
3033 if (f2fs_cluster_is_empty(&cc
)) {
3034 void *fsdata
= NULL
;
3038 ret2
= f2fs_prepare_compress_overwrite(
3040 page
->index
, &fsdata
);
3046 !f2fs_compress_write_end(inode
,
3047 fsdata
, page
->index
,
3057 /* give a priority to WB_SYNC threads */
3058 if (atomic_read(&sbi
->wb_sync_req
[DATA
]) &&
3059 wbc
->sync_mode
== WB_SYNC_NONE
) {
3063 #ifdef CONFIG_F2FS_FS_COMPRESSION
3066 done_index
= page
->index
;
3070 if (unlikely(page
->mapping
!= mapping
)) {
3076 if (!PageDirty(page
)) {
3077 /* someone wrote it for us */
3078 goto continue_unlock
;
3081 if (PageWriteback(page
)) {
3082 if (wbc
->sync_mode
!= WB_SYNC_NONE
)
3083 f2fs_wait_on_page_writeback(page
,
3086 goto continue_unlock
;
3089 if (!clear_page_dirty_for_io(page
))
3090 goto continue_unlock
;
3092 #ifdef CONFIG_F2FS_FS_COMPRESSION
3093 if (f2fs_compressed_file(inode
)) {
3095 f2fs_compress_ctx_add_page(&cc
, page
);
3099 ret
= f2fs_write_single_data_page(page
, &submitted
,
3100 &bio
, &last_block
, wbc
, io_type
,
3102 if (ret
== AOP_WRITEPAGE_ACTIVATE
)
3104 #ifdef CONFIG_F2FS_FS_COMPRESSION
3107 nwritten
+= submitted
;
3108 wbc
->nr_to_write
-= submitted
;
3110 if (unlikely(ret
)) {
3112 * keep nr_to_write, since vfs uses this to
3113 * get # of written pages.
3115 if (ret
== AOP_WRITEPAGE_ACTIVATE
) {
3118 } else if (ret
== -EAGAIN
) {
3120 if (wbc
->sync_mode
== WB_SYNC_ALL
) {
3122 congestion_wait(BLK_RW_ASYNC
,
3123 DEFAULT_IO_TIMEOUT
);
3128 done_index
= page
->index
+ 1;
3133 if (wbc
->nr_to_write
<= 0 &&
3134 wbc
->sync_mode
== WB_SYNC_NONE
) {
3142 pagevec_release(&pvec
);
3145 #ifdef CONFIG_F2FS_FS_COMPRESSION
3146 /* flush remained pages in compress cluster */
3147 if (f2fs_compressed_file(inode
) && !f2fs_cluster_is_empty(&cc
)) {
3148 ret
= f2fs_write_multi_pages(&cc
, &submitted
, wbc
, io_type
);
3149 nwritten
+= submitted
;
3150 wbc
->nr_to_write
-= submitted
;
3156 if (f2fs_compressed_file(inode
))
3157 f2fs_destroy_compress_ctx(&cc
);
3164 if (wbc
->range_cyclic
&& !done
)
3166 if (wbc
->range_cyclic
|| (range_whole
&& wbc
->nr_to_write
> 0))
3167 mapping
->writeback_index
= done_index
;
3170 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping
), mapping
->host
,
3172 /* submit cached bio of IPU write */
3174 f2fs_submit_merged_ipu_write(sbi
, &bio
, NULL
);
3179 static inline bool __should_serialize_io(struct inode
*inode
,
3180 struct writeback_control
*wbc
)
3182 /* to avoid deadlock in path of data flush */
3183 if (F2FS_I(inode
)->cp_task
)
3186 if (!S_ISREG(inode
->i_mode
))
3188 if (IS_NOQUOTA(inode
))
3191 if (f2fs_need_compress_data(inode
))
3193 if (wbc
->sync_mode
!= WB_SYNC_ALL
)
3195 if (get_dirty_pages(inode
) >= SM_I(F2FS_I_SB(inode
))->min_seq_blocks
)
3200 static int __f2fs_write_data_pages(struct address_space
*mapping
,
3201 struct writeback_control
*wbc
,
3202 enum iostat_type io_type
)
3204 struct inode
*inode
= mapping
->host
;
3205 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3206 struct blk_plug plug
;
3208 bool locked
= false;
3210 /* deal with chardevs and other special file */
3211 if (!mapping
->a_ops
->writepage
)
3214 /* skip writing if there is no dirty page in this inode */
3215 if (!get_dirty_pages(inode
) && wbc
->sync_mode
== WB_SYNC_NONE
)
3218 /* during POR, we don't need to trigger writepage at all. */
3219 if (unlikely(is_sbi_flag_set(sbi
, SBI_POR_DOING
)))
3222 if ((S_ISDIR(inode
->i_mode
) || IS_NOQUOTA(inode
)) &&
3223 wbc
->sync_mode
== WB_SYNC_NONE
&&
3224 get_dirty_pages(inode
) < nr_pages_to_skip(sbi
, DATA
) &&
3225 f2fs_available_free_memory(sbi
, DIRTY_DENTS
))
3228 /* skip writing during file defragment */
3229 if (is_inode_flag_set(inode
, FI_DO_DEFRAG
))
3232 trace_f2fs_writepages(mapping
->host
, wbc
, DATA
);
3234 /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
3235 if (wbc
->sync_mode
== WB_SYNC_ALL
)
3236 atomic_inc(&sbi
->wb_sync_req
[DATA
]);
3237 else if (atomic_read(&sbi
->wb_sync_req
[DATA
]))
3240 if (__should_serialize_io(inode
, wbc
)) {
3241 mutex_lock(&sbi
->writepages
);
3245 blk_start_plug(&plug
);
3246 ret
= f2fs_write_cache_pages(mapping
, wbc
, io_type
);
3247 blk_finish_plug(&plug
);
3250 mutex_unlock(&sbi
->writepages
);
3252 if (wbc
->sync_mode
== WB_SYNC_ALL
)
3253 atomic_dec(&sbi
->wb_sync_req
[DATA
]);
3255 * if some pages were truncated, we cannot guarantee its mapping->host
3256 * to detect pending bios.
3259 f2fs_remove_dirty_inode(inode
);
3263 wbc
->pages_skipped
+= get_dirty_pages(inode
);
3264 trace_f2fs_writepages(mapping
->host
, wbc
, DATA
);
3268 static int f2fs_write_data_pages(struct address_space
*mapping
,
3269 struct writeback_control
*wbc
)
3271 struct inode
*inode
= mapping
->host
;
3273 return __f2fs_write_data_pages(mapping
, wbc
,
3274 F2FS_I(inode
)->cp_task
== current
?
3275 FS_CP_DATA_IO
: FS_DATA_IO
);
3278 static void f2fs_write_failed(struct address_space
*mapping
, loff_t to
)
3280 struct inode
*inode
= mapping
->host
;
3281 loff_t i_size
= i_size_read(inode
);
3283 if (IS_NOQUOTA(inode
))
3286 /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
3287 if (to
> i_size
&& !f2fs_verity_in_progress(inode
)) {
3288 down_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
3289 down_write(&F2FS_I(inode
)->i_mmap_sem
);
3291 truncate_pagecache(inode
, i_size
);
3292 f2fs_truncate_blocks(inode
, i_size
, true);
3294 up_write(&F2FS_I(inode
)->i_mmap_sem
);
3295 up_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
3299 static int prepare_write_begin(struct f2fs_sb_info
*sbi
,
3300 struct page
*page
, loff_t pos
, unsigned len
,
3301 block_t
*blk_addr
, bool *node_changed
)
3303 struct inode
*inode
= page
->mapping
->host
;
3304 pgoff_t index
= page
->index
;
3305 struct dnode_of_data dn
;
3307 bool locked
= false;
3308 struct extent_info ei
= {0,0,0};
3313 * we already allocated all the blocks, so we don't need to get
3314 * the block addresses when there is no need to fill the page.
3316 if (!f2fs_has_inline_data(inode
) && len
== PAGE_SIZE
&&
3317 !is_inode_flag_set(inode
, FI_NO_PREALLOC
) &&
3318 !f2fs_verity_in_progress(inode
))
3321 /* f2fs_lock_op avoids race between write CP and convert_inline_page */
3322 if (f2fs_has_inline_data(inode
) && pos
+ len
> MAX_INLINE_DATA(inode
))
3323 flag
= F2FS_GET_BLOCK_DEFAULT
;
3325 flag
= F2FS_GET_BLOCK_PRE_AIO
;
3327 if (f2fs_has_inline_data(inode
) ||
3328 (pos
& PAGE_MASK
) >= i_size_read(inode
)) {
3329 f2fs_do_map_lock(sbi
, flag
, true);
3334 /* check inline_data */
3335 ipage
= f2fs_get_node_page(sbi
, inode
->i_ino
);
3336 if (IS_ERR(ipage
)) {
3337 err
= PTR_ERR(ipage
);
3341 set_new_dnode(&dn
, inode
, ipage
, ipage
, 0);
3343 if (f2fs_has_inline_data(inode
)) {
3344 if (pos
+ len
<= MAX_INLINE_DATA(inode
)) {
3345 f2fs_do_read_inline_data(page
, ipage
);
3346 set_inode_flag(inode
, FI_DATA_EXIST
);
3348 set_inline_node(ipage
);
3350 err
= f2fs_convert_inline_page(&dn
, page
);
3353 if (dn
.data_blkaddr
== NULL_ADDR
)
3354 err
= f2fs_get_block(&dn
, index
);
3356 } else if (locked
) {
3357 err
= f2fs_get_block(&dn
, index
);
3359 if (f2fs_lookup_extent_cache(inode
, index
, &ei
)) {
3360 dn
.data_blkaddr
= ei
.blk
+ index
- ei
.fofs
;
3363 err
= f2fs_get_dnode_of_data(&dn
, index
, LOOKUP_NODE
);
3364 if (err
|| dn
.data_blkaddr
== NULL_ADDR
) {
3365 f2fs_put_dnode(&dn
);
3366 f2fs_do_map_lock(sbi
, F2FS_GET_BLOCK_PRE_AIO
,
3368 WARN_ON(flag
!= F2FS_GET_BLOCK_PRE_AIO
);
3375 /* convert_inline_page can make node_changed */
3376 *blk_addr
= dn
.data_blkaddr
;
3377 *node_changed
= dn
.node_changed
;
3379 f2fs_put_dnode(&dn
);
3382 f2fs_do_map_lock(sbi
, flag
, false);
3386 static int f2fs_write_begin(struct file
*file
, struct address_space
*mapping
,
3387 loff_t pos
, unsigned len
, unsigned flags
,
3388 struct page
**pagep
, void **fsdata
)
3390 struct inode
*inode
= mapping
->host
;
3391 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3392 struct page
*page
= NULL
;
3393 pgoff_t index
= ((unsigned long long) pos
) >> PAGE_SHIFT
;
3394 bool need_balance
= false, drop_atomic
= false;
3395 block_t blkaddr
= NULL_ADDR
;
3398 trace_f2fs_write_begin(inode
, pos
, len
, flags
);
3400 if (!f2fs_is_checkpoint_ready(sbi
)) {
3405 if ((f2fs_is_atomic_file(inode
) &&
3406 !f2fs_available_free_memory(sbi
, INMEM_PAGES
)) ||
3407 is_inode_flag_set(inode
, FI_ATOMIC_REVOKE_REQUEST
)) {
3414 * We should check this at this moment to avoid deadlock on inode page
3415 * and #0 page. The locking rule for inline_data conversion should be:
3416 * lock_page(page #0) -> lock_page(inode_page)
3419 err
= f2fs_convert_inline_inode(inode
);
3424 #ifdef CONFIG_F2FS_FS_COMPRESSION
3425 if (f2fs_compressed_file(inode
)) {
3430 ret
= f2fs_prepare_compress_overwrite(inode
, pagep
,
3443 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
3444 * wait_for_stable_page. Will wait that below with our IO control.
3446 page
= f2fs_pagecache_get_page(mapping
, index
,
3447 FGP_LOCK
| FGP_WRITE
| FGP_CREAT
, GFP_NOFS
);
3453 /* TODO: cluster can be compressed due to race with .writepage */
3457 err
= prepare_write_begin(sbi
, page
, pos
, len
,
3458 &blkaddr
, &need_balance
);
3462 if (need_balance
&& !IS_NOQUOTA(inode
) &&
3463 has_not_enough_free_secs(sbi
, 0, 0)) {
3465 f2fs_balance_fs(sbi
, true);
3467 if (page
->mapping
!= mapping
) {
3468 /* The page got truncated from under us */
3469 f2fs_put_page(page
, 1);
3474 f2fs_wait_on_page_writeback(page
, DATA
, false, true);
3476 if (len
== PAGE_SIZE
|| PageUptodate(page
))
3479 if (!(pos
& (PAGE_SIZE
- 1)) && (pos
+ len
) >= i_size_read(inode
) &&
3480 !f2fs_verity_in_progress(inode
)) {
3481 zero_user_segment(page
, len
, PAGE_SIZE
);
3485 if (blkaddr
== NEW_ADDR
) {
3486 zero_user_segment(page
, 0, PAGE_SIZE
);
3487 SetPageUptodate(page
);
3489 if (!f2fs_is_valid_blkaddr(sbi
, blkaddr
,
3490 DATA_GENERIC_ENHANCE_READ
)) {
3491 err
= -EFSCORRUPTED
;
3494 err
= f2fs_submit_page_read(inode
, page
, blkaddr
, 0, true);
3499 if (unlikely(page
->mapping
!= mapping
)) {
3500 f2fs_put_page(page
, 1);
3503 if (unlikely(!PageUptodate(page
))) {
3511 f2fs_put_page(page
, 1);
3512 f2fs_write_failed(mapping
, pos
+ len
);
3514 f2fs_drop_inmem_pages_all(sbi
, false);
3518 static int f2fs_write_end(struct file
*file
,
3519 struct address_space
*mapping
,
3520 loff_t pos
, unsigned len
, unsigned copied
,
3521 struct page
*page
, void *fsdata
)
3523 struct inode
*inode
= page
->mapping
->host
;
3525 trace_f2fs_write_end(inode
, pos
, len
, copied
);
3528 * This should be come from len == PAGE_SIZE, and we expect copied
3529 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3530 * let generic_perform_write() try to copy data again through copied=0.
3532 if (!PageUptodate(page
)) {
3533 if (unlikely(copied
!= len
))
3536 SetPageUptodate(page
);
3539 #ifdef CONFIG_F2FS_FS_COMPRESSION
3540 /* overwrite compressed file */
3541 if (f2fs_compressed_file(inode
) && fsdata
) {
3542 f2fs_compress_write_end(inode
, fsdata
, page
->index
, copied
);
3543 f2fs_update_time(F2FS_I_SB(inode
), REQ_TIME
);
3545 if (pos
+ copied
> i_size_read(inode
) &&
3546 !f2fs_verity_in_progress(inode
))
3547 f2fs_i_size_write(inode
, pos
+ copied
);
3555 set_page_dirty(page
);
3557 if (pos
+ copied
> i_size_read(inode
) &&
3558 !f2fs_verity_in_progress(inode
))
3559 f2fs_i_size_write(inode
, pos
+ copied
);
3561 f2fs_put_page(page
, 1);
3562 f2fs_update_time(F2FS_I_SB(inode
), REQ_TIME
);
3566 static int check_direct_IO(struct inode
*inode
, struct iov_iter
*iter
,
3569 unsigned i_blkbits
= READ_ONCE(inode
->i_blkbits
);
3570 unsigned blkbits
= i_blkbits
;
3571 unsigned blocksize_mask
= (1 << blkbits
) - 1;
3572 unsigned long align
= offset
| iov_iter_alignment(iter
);
3573 struct block_device
*bdev
= inode
->i_sb
->s_bdev
;
3575 if (iov_iter_rw(iter
) == READ
&& offset
>= i_size_read(inode
))
3578 if (align
& blocksize_mask
) {
3580 blkbits
= blksize_bits(bdev_logical_block_size(bdev
));
3581 blocksize_mask
= (1 << blkbits
) - 1;
3582 if (align
& blocksize_mask
)
3589 static void f2fs_dio_end_io(struct bio
*bio
)
3591 struct f2fs_private_dio
*dio
= bio
->bi_private
;
3593 dec_page_count(F2FS_I_SB(dio
->inode
),
3594 dio
->write
? F2FS_DIO_WRITE
: F2FS_DIO_READ
);
3596 bio
->bi_private
= dio
->orig_private
;
3597 bio
->bi_end_io
= dio
->orig_end_io
;
3604 static void f2fs_dio_submit_bio(struct bio
*bio
, struct inode
*inode
,
3607 struct f2fs_private_dio
*dio
;
3608 bool write
= (bio_op(bio
) == REQ_OP_WRITE
);
3610 dio
= f2fs_kzalloc(F2FS_I_SB(inode
),
3611 sizeof(struct f2fs_private_dio
), GFP_NOFS
);
3616 dio
->orig_end_io
= bio
->bi_end_io
;
3617 dio
->orig_private
= bio
->bi_private
;
3620 bio
->bi_end_io
= f2fs_dio_end_io
;
3621 bio
->bi_private
= dio
;
3623 inc_page_count(F2FS_I_SB(inode
),
3624 write
? F2FS_DIO_WRITE
: F2FS_DIO_READ
);
3629 bio
->bi_status
= BLK_STS_IOERR
;
3633 static ssize_t
f2fs_direct_IO(struct kiocb
*iocb
, struct iov_iter
*iter
)
3635 struct address_space
*mapping
= iocb
->ki_filp
->f_mapping
;
3636 struct inode
*inode
= mapping
->host
;
3637 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3638 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
3639 size_t count
= iov_iter_count(iter
);
3640 loff_t offset
= iocb
->ki_pos
;
3641 int rw
= iov_iter_rw(iter
);
3643 enum rw_hint hint
= iocb
->ki_hint
;
3644 int whint_mode
= F2FS_OPTION(sbi
).whint_mode
;
3647 err
= check_direct_IO(inode
, iter
, offset
);
3649 return err
< 0 ? err
: 0;
3651 if (f2fs_force_buffered_io(inode
, iocb
, iter
))
3654 do_opu
= allow_outplace_dio(inode
, iocb
, iter
);
3656 trace_f2fs_direct_IO_enter(inode
, offset
, count
, rw
);
3658 if (rw
== WRITE
&& whint_mode
== WHINT_MODE_OFF
)
3659 iocb
->ki_hint
= WRITE_LIFE_NOT_SET
;
3661 if (iocb
->ki_flags
& IOCB_NOWAIT
) {
3662 if (!down_read_trylock(&fi
->i_gc_rwsem
[rw
])) {
3663 iocb
->ki_hint
= hint
;
3667 if (do_opu
&& !down_read_trylock(&fi
->i_gc_rwsem
[READ
])) {
3668 up_read(&fi
->i_gc_rwsem
[rw
]);
3669 iocb
->ki_hint
= hint
;
3674 down_read(&fi
->i_gc_rwsem
[rw
]);
3676 down_read(&fi
->i_gc_rwsem
[READ
]);
3679 err
= __blockdev_direct_IO(iocb
, inode
, inode
->i_sb
->s_bdev
,
3680 iter
, rw
== WRITE
? get_data_block_dio_write
:
3681 get_data_block_dio
, NULL
, f2fs_dio_submit_bio
,
3682 rw
== WRITE
? DIO_LOCKING
| DIO_SKIP_HOLES
:
3686 up_read(&fi
->i_gc_rwsem
[READ
]);
3688 up_read(&fi
->i_gc_rwsem
[rw
]);
3691 if (whint_mode
== WHINT_MODE_OFF
)
3692 iocb
->ki_hint
= hint
;
3694 f2fs_update_iostat(F2FS_I_SB(inode
), APP_DIRECT_IO
,
3697 set_inode_flag(inode
, FI_UPDATE_WRITE
);
3698 } else if (err
== -EIOCBQUEUED
) {
3699 f2fs_update_iostat(F2FS_I_SB(inode
), APP_DIRECT_IO
,
3700 count
- iov_iter_count(iter
));
3701 } else if (err
< 0) {
3702 f2fs_write_failed(mapping
, offset
+ count
);
3706 f2fs_update_iostat(sbi
, APP_DIRECT_READ_IO
, err
);
3707 else if (err
== -EIOCBQUEUED
)
3708 f2fs_update_iostat(F2FS_I_SB(inode
), APP_DIRECT_READ_IO
,
3709 count
- iov_iter_count(iter
));
3713 trace_f2fs_direct_IO_exit(inode
, offset
, count
, rw
, err
);
3718 void f2fs_invalidate_page(struct page
*page
, unsigned int offset
,
3719 unsigned int length
)
3721 struct inode
*inode
= page
->mapping
->host
;
3722 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3724 if (inode
->i_ino
>= F2FS_ROOT_INO(sbi
) &&
3725 (offset
% PAGE_SIZE
|| length
!= PAGE_SIZE
))
3728 if (PageDirty(page
)) {
3729 if (inode
->i_ino
== F2FS_META_INO(sbi
)) {
3730 dec_page_count(sbi
, F2FS_DIRTY_META
);
3731 } else if (inode
->i_ino
== F2FS_NODE_INO(sbi
)) {
3732 dec_page_count(sbi
, F2FS_DIRTY_NODES
);
3734 inode_dec_dirty_pages(inode
);
3735 f2fs_remove_dirty_inode(inode
);
3739 clear_cold_data(page
);
3741 if (IS_ATOMIC_WRITTEN_PAGE(page
))
3742 return f2fs_drop_inmem_page(inode
, page
);
3744 f2fs_clear_page_private(page
);
3747 int f2fs_release_page(struct page
*page
, gfp_t wait
)
3749 /* If this is dirty page, keep PagePrivate */
3750 if (PageDirty(page
))
3753 /* This is atomic written page, keep Private */
3754 if (IS_ATOMIC_WRITTEN_PAGE(page
))
3757 clear_cold_data(page
);
3758 f2fs_clear_page_private(page
);
3762 static int f2fs_set_data_page_dirty(struct page
*page
)
3764 struct inode
*inode
= page_file_mapping(page
)->host
;
3766 trace_f2fs_set_page_dirty(page
, DATA
);
3768 if (!PageUptodate(page
))
3769 SetPageUptodate(page
);
3770 if (PageSwapCache(page
))
3771 return __set_page_dirty_nobuffers(page
);
3773 if (f2fs_is_atomic_file(inode
) && !f2fs_is_commit_atomic_write(inode
)) {
3774 if (!IS_ATOMIC_WRITTEN_PAGE(page
)) {
3775 f2fs_register_inmem_page(inode
, page
);
3779 * Previously, this page has been registered, we just
3785 if (!PageDirty(page
)) {
3786 __set_page_dirty_nobuffers(page
);
3787 f2fs_update_dirty_page(inode
, page
);
3794 static sector_t
f2fs_bmap_compress(struct inode
*inode
, sector_t block
)
3796 #ifdef CONFIG_F2FS_FS_COMPRESSION
3797 struct dnode_of_data dn
;
3798 sector_t start_idx
, blknr
= 0;
3801 start_idx
= round_down(block
, F2FS_I(inode
)->i_cluster_size
);
3803 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
3804 ret
= f2fs_get_dnode_of_data(&dn
, start_idx
, LOOKUP_NODE
);
3808 if (dn
.data_blkaddr
!= COMPRESS_ADDR
) {
3809 dn
.ofs_in_node
+= block
- start_idx
;
3810 blknr
= f2fs_data_blkaddr(&dn
);
3811 if (!__is_valid_data_blkaddr(blknr
))
3815 f2fs_put_dnode(&dn
);
3823 static sector_t
f2fs_bmap(struct address_space
*mapping
, sector_t block
)
3825 struct inode
*inode
= mapping
->host
;
3828 if (f2fs_has_inline_data(inode
))
3831 /* make sure allocating whole blocks */
3832 if (mapping_tagged(mapping
, PAGECACHE_TAG_DIRTY
))
3833 filemap_write_and_wait(mapping
);
3835 /* Block number less than F2FS MAX BLOCKS */
3836 if (unlikely(block
>= F2FS_I_SB(inode
)->max_file_blocks
))
3839 if (f2fs_compressed_file(inode
)) {
3840 blknr
= f2fs_bmap_compress(inode
, block
);
3842 struct f2fs_map_blocks map
;
3844 memset(&map
, 0, sizeof(map
));
3847 map
.m_next_pgofs
= NULL
;
3848 map
.m_seg_type
= NO_CHECK_TYPE
;
3850 if (!f2fs_map_blocks(inode
, &map
, 0, F2FS_GET_BLOCK_BMAP
))
3854 trace_f2fs_bmap(inode
, block
, blknr
);
3858 #ifdef CONFIG_MIGRATION
3859 #include <linux/migrate.h>
3861 int f2fs_migrate_page(struct address_space
*mapping
,
3862 struct page
*newpage
, struct page
*page
, enum migrate_mode mode
)
3864 int rc
, extra_count
;
3865 struct f2fs_inode_info
*fi
= F2FS_I(mapping
->host
);
3866 bool atomic_written
= IS_ATOMIC_WRITTEN_PAGE(page
);
3868 BUG_ON(PageWriteback(page
));
3870 /* migrating an atomic written page is safe with the inmem_lock hold */
3871 if (atomic_written
) {
3872 if (mode
!= MIGRATE_SYNC
)
3874 if (!mutex_trylock(&fi
->inmem_lock
))
3878 /* one extra reference was held for atomic_write page */
3879 extra_count
= atomic_written
? 1 : 0;
3880 rc
= migrate_page_move_mapping(mapping
, newpage
,
3882 if (rc
!= MIGRATEPAGE_SUCCESS
) {
3884 mutex_unlock(&fi
->inmem_lock
);
3888 if (atomic_written
) {
3889 struct inmem_pages
*cur
;
3890 list_for_each_entry(cur
, &fi
->inmem_pages
, list
)
3891 if (cur
->page
== page
) {
3892 cur
->page
= newpage
;
3895 mutex_unlock(&fi
->inmem_lock
);
3900 if (PagePrivate(page
)) {
3901 f2fs_set_page_private(newpage
, page_private(page
));
3902 f2fs_clear_page_private(page
);
3905 if (mode
!= MIGRATE_SYNC_NO_COPY
)
3906 migrate_page_copy(newpage
, page
);
3908 migrate_page_states(newpage
, page
);
3910 return MIGRATEPAGE_SUCCESS
;
3915 static int check_swap_activate_fast(struct swap_info_struct
*sis
,
3916 struct file
*swap_file
, sector_t
*span
)
3918 struct address_space
*mapping
= swap_file
->f_mapping
;
3919 struct inode
*inode
= mapping
->host
;
3920 sector_t cur_lblock
;
3921 sector_t last_lblock
;
3923 sector_t lowest_pblock
= -1;
3924 sector_t highest_pblock
= 0;
3926 unsigned long nr_pblocks
;
3931 * Map all the blocks into the extent list. This code doesn't try
3935 last_lblock
= bytes_to_blks(inode
, i_size_read(inode
));
3936 len
= i_size_read(inode
);
3938 while (cur_lblock
<= last_lblock
&& cur_lblock
< sis
->max
) {
3939 struct f2fs_map_blocks map
;
3944 memset(&map
, 0, sizeof(map
));
3945 map
.m_lblk
= cur_lblock
;
3946 map
.m_len
= bytes_to_blks(inode
, len
) - cur_lblock
;
3947 map
.m_next_pgofs
= &next_pgofs
;
3948 map
.m_seg_type
= NO_CHECK_TYPE
;
3950 ret
= f2fs_map_blocks(inode
, &map
, 0, F2FS_GET_BLOCK_FIEMAP
);
3955 if (!(map
.m_flags
& F2FS_MAP_FLAGS
))
3958 pblock
= map
.m_pblk
;
3959 nr_pblocks
= map
.m_len
;
3961 if (cur_lblock
+ nr_pblocks
>= sis
->max
)
3962 nr_pblocks
= sis
->max
- cur_lblock
;
3964 if (cur_lblock
) { /* exclude the header page */
3965 if (pblock
< lowest_pblock
)
3966 lowest_pblock
= pblock
;
3967 if (pblock
+ nr_pblocks
- 1 > highest_pblock
)
3968 highest_pblock
= pblock
+ nr_pblocks
- 1;
3972 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
3974 ret
= add_swap_extent(sis
, cur_lblock
, nr_pblocks
, pblock
);
3978 cur_lblock
+= nr_pblocks
;
3981 *span
= 1 + highest_pblock
- lowest_pblock
;
3982 if (cur_lblock
== 0)
3983 cur_lblock
= 1; /* force Empty message */
3984 sis
->max
= cur_lblock
;
3985 sis
->pages
= cur_lblock
- 1;
3986 sis
->highest_bit
= cur_lblock
- 1;
3990 pr_err("swapon: swapfile has holes\n");
3994 /* Copied from generic_swapfile_activate() to check any holes */
3995 static int check_swap_activate(struct swap_info_struct
*sis
,
3996 struct file
*swap_file
, sector_t
*span
)
3998 struct address_space
*mapping
= swap_file
->f_mapping
;
3999 struct inode
*inode
= mapping
->host
;
4000 unsigned blocks_per_page
;
4001 unsigned long page_no
;
4002 sector_t probe_block
;
4003 sector_t last_block
;
4004 sector_t lowest_block
= -1;
4005 sector_t highest_block
= 0;
4009 if (PAGE_SIZE
== F2FS_BLKSIZE
)
4010 return check_swap_activate_fast(sis
, swap_file
, span
);
4012 blocks_per_page
= bytes_to_blks(inode
, PAGE_SIZE
);
4015 * Map all the blocks into the extent list. This code doesn't try
4020 last_block
= bytes_to_blks(inode
, i_size_read(inode
));
4021 while ((probe_block
+ blocks_per_page
) <= last_block
&&
4022 page_no
< sis
->max
) {
4023 unsigned block_in_page
;
4024 sector_t first_block
;
4030 block
= probe_block
;
4031 err
= bmap(inode
, &block
);
4034 first_block
= block
;
4037 * It must be PAGE_SIZE aligned on-disk
4039 if (first_block
& (blocks_per_page
- 1)) {
4044 for (block_in_page
= 1; block_in_page
< blocks_per_page
;
4047 block
= probe_block
+ block_in_page
;
4048 err
= bmap(inode
, &block
);
4053 if (block
!= first_block
+ block_in_page
) {
4060 first_block
>>= (PAGE_SHIFT
- inode
->i_blkbits
);
4061 if (page_no
) { /* exclude the header page */
4062 if (first_block
< lowest_block
)
4063 lowest_block
= first_block
;
4064 if (first_block
> highest_block
)
4065 highest_block
= first_block
;
4069 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
4071 ret
= add_swap_extent(sis
, page_no
, 1, first_block
);
4076 probe_block
+= blocks_per_page
;
4081 *span
= 1 + highest_block
- lowest_block
;
4083 page_no
= 1; /* force Empty message */
4085 sis
->pages
= page_no
- 1;
4086 sis
->highest_bit
= page_no
- 1;
4090 pr_err("swapon: swapfile has holes\n");
4094 static int f2fs_swap_activate(struct swap_info_struct
*sis
, struct file
*file
,
4097 struct inode
*inode
= file_inode(file
);
4100 if (!S_ISREG(inode
->i_mode
))
4103 if (f2fs_readonly(F2FS_I_SB(inode
)->sb
))
4106 ret
= f2fs_convert_inline_inode(inode
);
4110 if (!f2fs_disable_compressed_file(inode
))
4113 ret
= check_swap_activate(sis
, file
, span
);
4117 set_inode_flag(inode
, FI_PIN_FILE
);
4118 f2fs_precache_extents(inode
);
4119 f2fs_update_time(F2FS_I_SB(inode
), REQ_TIME
);
4123 static void f2fs_swap_deactivate(struct file
*file
)
4125 struct inode
*inode
= file_inode(file
);
4127 clear_inode_flag(inode
, FI_PIN_FILE
);
4130 static int f2fs_swap_activate(struct swap_info_struct
*sis
, struct file
*file
,
4136 static void f2fs_swap_deactivate(struct file
*file
)
4141 const struct address_space_operations f2fs_dblock_aops
= {
4142 .readpage
= f2fs_read_data_page
,
4143 .readahead
= f2fs_readahead
,
4144 .writepage
= f2fs_write_data_page
,
4145 .writepages
= f2fs_write_data_pages
,
4146 .write_begin
= f2fs_write_begin
,
4147 .write_end
= f2fs_write_end
,
4148 .set_page_dirty
= f2fs_set_data_page_dirty
,
4149 .invalidatepage
= f2fs_invalidate_page
,
4150 .releasepage
= f2fs_release_page
,
4151 .direct_IO
= f2fs_direct_IO
,
4153 .swap_activate
= f2fs_swap_activate
,
4154 .swap_deactivate
= f2fs_swap_deactivate
,
4155 #ifdef CONFIG_MIGRATION
4156 .migratepage
= f2fs_migrate_page
,
4160 void f2fs_clear_page_cache_dirty_tag(struct page
*page
)
4162 struct address_space
*mapping
= page_mapping(page
);
4163 unsigned long flags
;
4165 xa_lock_irqsave(&mapping
->i_pages
, flags
);
4166 __xa_clear_mark(&mapping
->i_pages
, page_index(page
),
4167 PAGECACHE_TAG_DIRTY
);
4168 xa_unlock_irqrestore(&mapping
->i_pages
, flags
);
4171 int __init
f2fs_init_post_read_processing(void)
4173 bio_post_read_ctx_cache
=
4174 kmem_cache_create("f2fs_bio_post_read_ctx",
4175 sizeof(struct bio_post_read_ctx
), 0, 0, NULL
);
4176 if (!bio_post_read_ctx_cache
)
4178 bio_post_read_ctx_pool
=
4179 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS
,
4180 bio_post_read_ctx_cache
);
4181 if (!bio_post_read_ctx_pool
)
4182 goto fail_free_cache
;
4186 kmem_cache_destroy(bio_post_read_ctx_cache
);
4191 void f2fs_destroy_post_read_processing(void)
4193 mempool_destroy(bio_post_read_ctx_pool
);
4194 kmem_cache_destroy(bio_post_read_ctx_cache
);
4197 int f2fs_init_post_read_wq(struct f2fs_sb_info
*sbi
)
4199 if (!f2fs_sb_has_encrypt(sbi
) &&
4200 !f2fs_sb_has_verity(sbi
) &&
4201 !f2fs_sb_has_compression(sbi
))
4204 sbi
->post_read_wq
= alloc_workqueue("f2fs_post_read_wq",
4205 WQ_UNBOUND
| WQ_HIGHPRI
,
4207 if (!sbi
->post_read_wq
)
4212 void f2fs_destroy_post_read_wq(struct f2fs_sb_info
*sbi
)
4214 if (sbi
->post_read_wq
)
4215 destroy_workqueue(sbi
->post_read_wq
);
4218 int __init
f2fs_init_bio_entry_cache(void)
4220 bio_entry_slab
= f2fs_kmem_cache_create("f2fs_bio_entry_slab",
4221 sizeof(struct bio_entry
));
4222 if (!bio_entry_slab
)
4227 void f2fs_destroy_bio_entry_cache(void)
4229 kmem_cache_destroy(bio_entry_slab
);