1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
9 #include <linux/f2fs_fs.h>
10 #include <linux/buffer_head.h>
11 #include <linux/mpage.h>
12 #include <linux/writeback.h>
13 #include <linux/backing-dev.h>
14 #include <linux/pagevec.h>
15 #include <linux/blkdev.h>
16 #include <linux/bio.h>
17 #include <linux/blk-crypto.h>
18 #include <linux/swap.h>
19 #include <linux/prefetch.h>
20 #include <linux/uio.h>
21 #include <linux/cleancache.h>
22 #include <linux/sched/signal.h>
23 #include <linux/fiemap.h>
29 #include <trace/events/f2fs.h>
31 #define NUM_PREALLOC_POST_READ_CTXS 128
33 static struct kmem_cache
*bio_post_read_ctx_cache
;
34 static struct kmem_cache
*bio_entry_slab
;
35 static mempool_t
*bio_post_read_ctx_pool
;
36 static struct bio_set f2fs_bioset
;
38 #define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
40 int __init
f2fs_init_bioset(void)
42 if (bioset_init(&f2fs_bioset
, F2FS_BIO_POOL_SIZE
,
43 0, BIOSET_NEED_BVECS
))
48 void f2fs_destroy_bioset(void)
50 bioset_exit(&f2fs_bioset
);
53 static inline struct bio
*__f2fs_bio_alloc(gfp_t gfp_mask
,
54 unsigned int nr_iovecs
)
56 return bio_alloc_bioset(gfp_mask
, nr_iovecs
, &f2fs_bioset
);
59 struct bio
*f2fs_bio_alloc(struct f2fs_sb_info
*sbi
, int npages
, bool noio
)
62 /* No failure on bio allocation */
63 return __f2fs_bio_alloc(GFP_NOIO
, npages
);
66 if (time_to_inject(sbi
, FAULT_ALLOC_BIO
)) {
67 f2fs_show_injection_info(sbi
, FAULT_ALLOC_BIO
);
71 return __f2fs_bio_alloc(GFP_KERNEL
, npages
);
74 static bool __is_cp_guaranteed(struct page
*page
)
76 struct address_space
*mapping
= page
->mapping
;
78 struct f2fs_sb_info
*sbi
;
83 if (f2fs_is_compressed_page(page
))
86 inode
= mapping
->host
;
87 sbi
= F2FS_I_SB(inode
);
89 if (inode
->i_ino
== F2FS_META_INO(sbi
) ||
90 inode
->i_ino
== F2FS_NODE_INO(sbi
) ||
91 S_ISDIR(inode
->i_mode
) ||
92 (S_ISREG(inode
->i_mode
) &&
93 (f2fs_is_atomic_file(inode
) || IS_NOQUOTA(inode
))) ||
99 static enum count_type
__read_io_type(struct page
*page
)
101 struct address_space
*mapping
= page_file_mapping(page
);
104 struct inode
*inode
= mapping
->host
;
105 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
107 if (inode
->i_ino
== F2FS_META_INO(sbi
))
110 if (inode
->i_ino
== F2FS_NODE_INO(sbi
))
116 /* postprocessing steps for read bios */
117 enum bio_post_read_step
{
118 #ifdef CONFIG_FS_ENCRYPTION
119 STEP_DECRYPT
= 1 << 0,
121 STEP_DECRYPT
= 0, /* compile out the decryption-related code */
123 #ifdef CONFIG_F2FS_FS_COMPRESSION
124 STEP_DECOMPRESS
= 1 << 1,
126 STEP_DECOMPRESS
= 0, /* compile out the decompression-related code */
128 #ifdef CONFIG_FS_VERITY
129 STEP_VERITY
= 1 << 2,
131 STEP_VERITY
= 0, /* compile out the verity-related code */
135 struct bio_post_read_ctx
{
137 struct f2fs_sb_info
*sbi
;
138 struct work_struct work
;
139 unsigned int enabled_steps
;
142 static void f2fs_finish_read_bio(struct bio
*bio
)
145 struct bvec_iter_all iter_all
;
148 * Update and unlock the bio's pagecache pages, and put the
149 * decompression context for any compressed pages.
151 bio_for_each_segment_all(bv
, bio
, iter_all
) {
152 struct page
*page
= bv
->bv_page
;
154 if (f2fs_is_compressed_page(page
)) {
156 f2fs_end_read_compressed_page(page
, true);
157 f2fs_put_page_dic(page
);
161 /* PG_error was set if decryption or verity failed. */
162 if (bio
->bi_status
|| PageError(page
)) {
163 ClearPageUptodate(page
);
164 /* will re-read again later */
165 ClearPageError(page
);
167 SetPageUptodate(page
);
169 dec_page_count(F2FS_P_SB(page
), __read_io_type(page
));
174 mempool_free(bio
->bi_private
, bio_post_read_ctx_pool
);
178 static void f2fs_verify_bio(struct work_struct
*work
)
180 struct bio_post_read_ctx
*ctx
=
181 container_of(work
, struct bio_post_read_ctx
, work
);
182 struct bio
*bio
= ctx
->bio
;
183 bool may_have_compressed_pages
= (ctx
->enabled_steps
& STEP_DECOMPRESS
);
186 * fsverity_verify_bio() may call readpages() again, and while verity
187 * will be disabled for this, decryption and/or decompression may still
188 * be needed, resulting in another bio_post_read_ctx being allocated.
189 * So to prevent deadlocks we need to release the current ctx to the
190 * mempool first. This assumes that verity is the last post-read step.
192 mempool_free(ctx
, bio_post_read_ctx_pool
);
193 bio
->bi_private
= NULL
;
196 * Verify the bio's pages with fs-verity. Exclude compressed pages,
197 * as those were handled separately by f2fs_end_read_compressed_page().
199 if (may_have_compressed_pages
) {
201 struct bvec_iter_all iter_all
;
203 bio_for_each_segment_all(bv
, bio
, iter_all
) {
204 struct page
*page
= bv
->bv_page
;
206 if (!f2fs_is_compressed_page(page
) &&
207 !PageError(page
) && !fsverity_verify_page(page
))
211 fsverity_verify_bio(bio
);
214 f2fs_finish_read_bio(bio
);
218 * If the bio's data needs to be verified with fs-verity, then enqueue the
219 * verity work for the bio. Otherwise finish the bio now.
221 * Note that to avoid deadlocks, the verity work can't be done on the
222 * decryption/decompression workqueue. This is because verifying the data pages
223 * can involve reading verity metadata pages from the file, and these verity
224 * metadata pages may be encrypted and/or compressed.
226 static void f2fs_verify_and_finish_bio(struct bio
*bio
)
228 struct bio_post_read_ctx
*ctx
= bio
->bi_private
;
230 if (ctx
&& (ctx
->enabled_steps
& STEP_VERITY
)) {
231 INIT_WORK(&ctx
->work
, f2fs_verify_bio
);
232 fsverity_enqueue_verify_work(&ctx
->work
);
234 f2fs_finish_read_bio(bio
);
239 * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
240 * remaining page was read by @ctx->bio.
242 * Note that a bio may span clusters (even a mix of compressed and uncompressed
243 * clusters) or be for just part of a cluster. STEP_DECOMPRESS just indicates
244 * that the bio includes at least one compressed page. The actual decompression
245 * is done on a per-cluster basis, not a per-bio basis.
247 static void f2fs_handle_step_decompress(struct bio_post_read_ctx
*ctx
)
250 struct bvec_iter_all iter_all
;
251 bool all_compressed
= true;
253 bio_for_each_segment_all(bv
, ctx
->bio
, iter_all
) {
254 struct page
*page
= bv
->bv_page
;
256 /* PG_error was set if decryption failed. */
257 if (f2fs_is_compressed_page(page
))
258 f2fs_end_read_compressed_page(page
, PageError(page
));
260 all_compressed
= false;
264 * Optimization: if all the bio's pages are compressed, then scheduling
265 * the per-bio verity work is unnecessary, as verity will be fully
266 * handled at the compression cluster level.
269 ctx
->enabled_steps
&= ~STEP_VERITY
;
272 static void f2fs_post_read_work(struct work_struct
*work
)
274 struct bio_post_read_ctx
*ctx
=
275 container_of(work
, struct bio_post_read_ctx
, work
);
277 if (ctx
->enabled_steps
& STEP_DECRYPT
)
278 fscrypt_decrypt_bio(ctx
->bio
);
280 if (ctx
->enabled_steps
& STEP_DECOMPRESS
)
281 f2fs_handle_step_decompress(ctx
);
283 f2fs_verify_and_finish_bio(ctx
->bio
);
286 static void f2fs_read_end_io(struct bio
*bio
)
288 struct f2fs_sb_info
*sbi
= F2FS_P_SB(bio_first_page_all(bio
));
289 struct bio_post_read_ctx
*ctx
= bio
->bi_private
;
291 if (time_to_inject(sbi
, FAULT_READ_IO
)) {
292 f2fs_show_injection_info(sbi
, FAULT_READ_IO
);
293 bio
->bi_status
= BLK_STS_IOERR
;
296 if (bio
->bi_status
) {
297 f2fs_finish_read_bio(bio
);
301 if (ctx
&& (ctx
->enabled_steps
& (STEP_DECRYPT
| STEP_DECOMPRESS
))) {
302 INIT_WORK(&ctx
->work
, f2fs_post_read_work
);
303 queue_work(ctx
->sbi
->post_read_wq
, &ctx
->work
);
305 f2fs_verify_and_finish_bio(bio
);
309 static void f2fs_write_end_io(struct bio
*bio
)
311 struct f2fs_sb_info
*sbi
= bio
->bi_private
;
312 struct bio_vec
*bvec
;
313 struct bvec_iter_all iter_all
;
315 if (time_to_inject(sbi
, FAULT_WRITE_IO
)) {
316 f2fs_show_injection_info(sbi
, FAULT_WRITE_IO
);
317 bio
->bi_status
= BLK_STS_IOERR
;
320 bio_for_each_segment_all(bvec
, bio
, iter_all
) {
321 struct page
*page
= bvec
->bv_page
;
322 enum count_type type
= WB_DATA_TYPE(page
);
324 if (IS_DUMMY_WRITTEN_PAGE(page
)) {
325 set_page_private(page
, (unsigned long)NULL
);
326 ClearPagePrivate(page
);
328 mempool_free(page
, sbi
->write_io_dummy
);
330 if (unlikely(bio
->bi_status
))
331 f2fs_stop_checkpoint(sbi
, true);
335 fscrypt_finalize_bounce_page(&page
);
337 #ifdef CONFIG_F2FS_FS_COMPRESSION
338 if (f2fs_is_compressed_page(page
)) {
339 f2fs_compress_write_end_io(bio
, page
);
344 if (unlikely(bio
->bi_status
)) {
345 mapping_set_error(page
->mapping
, -EIO
);
346 if (type
== F2FS_WB_CP_DATA
)
347 f2fs_stop_checkpoint(sbi
, true);
350 f2fs_bug_on(sbi
, page
->mapping
== NODE_MAPPING(sbi
) &&
351 page
->index
!= nid_of_node(page
));
353 dec_page_count(sbi
, type
);
354 if (f2fs_in_warm_node_list(sbi
, page
))
355 f2fs_del_fsync_node_entry(sbi
, page
);
356 clear_cold_data(page
);
357 end_page_writeback(page
);
359 if (!get_pages(sbi
, F2FS_WB_CP_DATA
) &&
360 wq_has_sleeper(&sbi
->cp_wait
))
361 wake_up(&sbi
->cp_wait
);
366 struct block_device
*f2fs_target_device(struct f2fs_sb_info
*sbi
,
367 block_t blk_addr
, struct bio
*bio
)
369 struct block_device
*bdev
= sbi
->sb
->s_bdev
;
372 if (f2fs_is_multi_device(sbi
)) {
373 for (i
= 0; i
< sbi
->s_ndevs
; i
++) {
374 if (FDEV(i
).start_blk
<= blk_addr
&&
375 FDEV(i
).end_blk
>= blk_addr
) {
376 blk_addr
-= FDEV(i
).start_blk
;
383 bio_set_dev(bio
, bdev
);
384 bio
->bi_iter
.bi_sector
= SECTOR_FROM_BLOCK(blk_addr
);
389 int f2fs_target_device_index(struct f2fs_sb_info
*sbi
, block_t blkaddr
)
393 if (!f2fs_is_multi_device(sbi
))
396 for (i
= 0; i
< sbi
->s_ndevs
; i
++)
397 if (FDEV(i
).start_blk
<= blkaddr
&& FDEV(i
).end_blk
>= blkaddr
)
403 * Return true, if pre_bio's bdev is same as its target device.
405 static bool __same_bdev(struct f2fs_sb_info
*sbi
,
406 block_t blk_addr
, struct bio
*bio
)
408 struct block_device
*b
= f2fs_target_device(sbi
, blk_addr
, NULL
);
409 return bio
->bi_disk
== b
->bd_disk
&& bio
->bi_partno
== b
->bd_partno
;
412 static struct bio
*__bio_alloc(struct f2fs_io_info
*fio
, int npages
)
414 struct f2fs_sb_info
*sbi
= fio
->sbi
;
417 bio
= f2fs_bio_alloc(sbi
, npages
, true);
419 f2fs_target_device(sbi
, fio
->new_blkaddr
, bio
);
420 if (is_read_io(fio
->op
)) {
421 bio
->bi_end_io
= f2fs_read_end_io
;
422 bio
->bi_private
= NULL
;
424 bio
->bi_end_io
= f2fs_write_end_io
;
425 bio
->bi_private
= sbi
;
426 bio
->bi_write_hint
= f2fs_io_type_to_rw_hint(sbi
,
427 fio
->type
, fio
->temp
);
430 wbc_init_bio(fio
->io_wbc
, bio
);
435 static void f2fs_set_bio_crypt_ctx(struct bio
*bio
, const struct inode
*inode
,
437 const struct f2fs_io_info
*fio
,
441 * The f2fs garbage collector sets ->encrypted_page when it wants to
442 * read/write raw data without encryption.
444 if (!fio
|| !fio
->encrypted_page
)
445 fscrypt_set_bio_crypt_ctx(bio
, inode
, first_idx
, gfp_mask
);
448 static bool f2fs_crypt_mergeable_bio(struct bio
*bio
, const struct inode
*inode
,
450 const struct f2fs_io_info
*fio
)
453 * The f2fs garbage collector sets ->encrypted_page when it wants to
454 * read/write raw data without encryption.
456 if (fio
&& fio
->encrypted_page
)
457 return !bio_has_crypt_ctx(bio
);
459 return fscrypt_mergeable_bio(bio
, inode
, next_idx
);
462 static inline void __submit_bio(struct f2fs_sb_info
*sbi
,
463 struct bio
*bio
, enum page_type type
)
465 if (!is_read_io(bio_op(bio
))) {
468 if (type
!= DATA
&& type
!= NODE
)
471 if (f2fs_lfs_mode(sbi
) && current
->plug
)
472 blk_finish_plug(current
->plug
);
474 if (F2FS_IO_ALIGNED(sbi
))
477 start
= bio
->bi_iter
.bi_size
>> F2FS_BLKSIZE_BITS
;
478 start
%= F2FS_IO_SIZE(sbi
);
483 /* fill dummy pages */
484 for (; start
< F2FS_IO_SIZE(sbi
); start
++) {
486 mempool_alloc(sbi
->write_io_dummy
,
487 GFP_NOIO
| __GFP_NOFAIL
);
488 f2fs_bug_on(sbi
, !page
);
490 zero_user_segment(page
, 0, PAGE_SIZE
);
491 SetPagePrivate(page
);
492 set_page_private(page
, DUMMY_WRITTEN_PAGE
);
494 if (bio_add_page(bio
, page
, PAGE_SIZE
, 0) < PAGE_SIZE
)
498 * In the NODE case, we lose next block address chain. So, we
499 * need to do checkpoint in f2fs_sync_file.
502 set_sbi_flag(sbi
, SBI_NEED_CP
);
505 if (is_read_io(bio_op(bio
)))
506 trace_f2fs_submit_read_bio(sbi
->sb
, type
, bio
);
508 trace_f2fs_submit_write_bio(sbi
->sb
, type
, bio
);
512 void f2fs_submit_bio(struct f2fs_sb_info
*sbi
,
513 struct bio
*bio
, enum page_type type
)
515 __submit_bio(sbi
, bio
, type
);
518 static void __attach_io_flag(struct f2fs_io_info
*fio
)
520 struct f2fs_sb_info
*sbi
= fio
->sbi
;
521 unsigned int temp_mask
= (1 << NR_TEMP_TYPE
) - 1;
522 unsigned int io_flag
, fua_flag
, meta_flag
;
524 if (fio
->type
== DATA
)
525 io_flag
= sbi
->data_io_flag
;
526 else if (fio
->type
== NODE
)
527 io_flag
= sbi
->node_io_flag
;
531 fua_flag
= io_flag
& temp_mask
;
532 meta_flag
= (io_flag
>> NR_TEMP_TYPE
) & temp_mask
;
535 * data/node io flag bits per temp:
536 * REQ_META | REQ_FUA |
537 * 5 | 4 | 3 | 2 | 1 | 0 |
538 * Cold | Warm | Hot | Cold | Warm | Hot |
540 if ((1 << fio
->temp
) & meta_flag
)
541 fio
->op_flags
|= REQ_META
;
542 if ((1 << fio
->temp
) & fua_flag
)
543 fio
->op_flags
|= REQ_FUA
;
546 static void __submit_merged_bio(struct f2fs_bio_info
*io
)
548 struct f2fs_io_info
*fio
= &io
->fio
;
553 __attach_io_flag(fio
);
554 bio_set_op_attrs(io
->bio
, fio
->op
, fio
->op_flags
);
556 if (is_read_io(fio
->op
))
557 trace_f2fs_prepare_read_bio(io
->sbi
->sb
, fio
->type
, io
->bio
);
559 trace_f2fs_prepare_write_bio(io
->sbi
->sb
, fio
->type
, io
->bio
);
561 __submit_bio(io
->sbi
, io
->bio
, fio
->type
);
565 static bool __has_merged_page(struct bio
*bio
, struct inode
*inode
,
566 struct page
*page
, nid_t ino
)
568 struct bio_vec
*bvec
;
569 struct bvec_iter_all iter_all
;
574 if (!inode
&& !page
&& !ino
)
577 bio_for_each_segment_all(bvec
, bio
, iter_all
) {
578 struct page
*target
= bvec
->bv_page
;
580 if (fscrypt_is_bounce_page(target
)) {
581 target
= fscrypt_pagecache_page(target
);
585 if (f2fs_is_compressed_page(target
)) {
586 target
= f2fs_compress_control_page(target
);
591 if (inode
&& inode
== target
->mapping
->host
)
593 if (page
&& page
== target
)
595 if (ino
&& ino
== ino_of_node(target
))
602 static void __f2fs_submit_merged_write(struct f2fs_sb_info
*sbi
,
603 enum page_type type
, enum temp_type temp
)
605 enum page_type btype
= PAGE_TYPE_OF_BIO(type
);
606 struct f2fs_bio_info
*io
= sbi
->write_io
[btype
] + temp
;
608 down_write(&io
->io_rwsem
);
610 /* change META to META_FLUSH in the checkpoint procedure */
611 if (type
>= META_FLUSH
) {
612 io
->fio
.type
= META_FLUSH
;
613 io
->fio
.op
= REQ_OP_WRITE
;
614 io
->fio
.op_flags
= REQ_META
| REQ_PRIO
| REQ_SYNC
;
615 if (!test_opt(sbi
, NOBARRIER
))
616 io
->fio
.op_flags
|= REQ_PREFLUSH
| REQ_FUA
;
618 __submit_merged_bio(io
);
619 up_write(&io
->io_rwsem
);
622 static void __submit_merged_write_cond(struct f2fs_sb_info
*sbi
,
623 struct inode
*inode
, struct page
*page
,
624 nid_t ino
, enum page_type type
, bool force
)
629 for (temp
= HOT
; temp
< NR_TEMP_TYPE
; temp
++) {
631 enum page_type btype
= PAGE_TYPE_OF_BIO(type
);
632 struct f2fs_bio_info
*io
= sbi
->write_io
[btype
] + temp
;
634 down_read(&io
->io_rwsem
);
635 ret
= __has_merged_page(io
->bio
, inode
, page
, ino
);
636 up_read(&io
->io_rwsem
);
639 __f2fs_submit_merged_write(sbi
, type
, temp
);
641 /* TODO: use HOT temp only for meta pages now. */
647 void f2fs_submit_merged_write(struct f2fs_sb_info
*sbi
, enum page_type type
)
649 __submit_merged_write_cond(sbi
, NULL
, NULL
, 0, type
, true);
652 void f2fs_submit_merged_write_cond(struct f2fs_sb_info
*sbi
,
653 struct inode
*inode
, struct page
*page
,
654 nid_t ino
, enum page_type type
)
656 __submit_merged_write_cond(sbi
, inode
, page
, ino
, type
, false);
659 void f2fs_flush_merged_writes(struct f2fs_sb_info
*sbi
)
661 f2fs_submit_merged_write(sbi
, DATA
);
662 f2fs_submit_merged_write(sbi
, NODE
);
663 f2fs_submit_merged_write(sbi
, META
);
667 * Fill the locked page with data located in the block address.
668 * A caller needs to unlock the page on failure.
670 int f2fs_submit_page_bio(struct f2fs_io_info
*fio
)
673 struct page
*page
= fio
->encrypted_page
?
674 fio
->encrypted_page
: fio
->page
;
676 if (!f2fs_is_valid_blkaddr(fio
->sbi
, fio
->new_blkaddr
,
677 fio
->is_por
? META_POR
: (__is_meta_io(fio
) ?
678 META_GENERIC
: DATA_GENERIC_ENHANCE
)))
679 return -EFSCORRUPTED
;
681 trace_f2fs_submit_page_bio(page
, fio
);
682 f2fs_trace_ios(fio
, 0);
684 /* Allocate a new bio */
685 bio
= __bio_alloc(fio
, 1);
687 f2fs_set_bio_crypt_ctx(bio
, fio
->page
->mapping
->host
,
688 fio
->page
->index
, fio
, GFP_NOIO
);
690 if (bio_add_page(bio
, page
, PAGE_SIZE
, 0) < PAGE_SIZE
) {
695 if (fio
->io_wbc
&& !is_read_io(fio
->op
))
696 wbc_account_cgroup_owner(fio
->io_wbc
, page
, PAGE_SIZE
);
698 __attach_io_flag(fio
);
699 bio_set_op_attrs(bio
, fio
->op
, fio
->op_flags
);
701 inc_page_count(fio
->sbi
, is_read_io(fio
->op
) ?
702 __read_io_type(page
): WB_DATA_TYPE(fio
->page
));
704 __submit_bio(fio
->sbi
, bio
, fio
->type
);
708 static bool page_is_mergeable(struct f2fs_sb_info
*sbi
, struct bio
*bio
,
709 block_t last_blkaddr
, block_t cur_blkaddr
)
711 if (unlikely(sbi
->max_io_bytes
&&
712 bio
->bi_iter
.bi_size
>= sbi
->max_io_bytes
))
714 if (last_blkaddr
+ 1 != cur_blkaddr
)
716 return __same_bdev(sbi
, cur_blkaddr
, bio
);
719 static bool io_type_is_mergeable(struct f2fs_bio_info
*io
,
720 struct f2fs_io_info
*fio
)
722 if (io
->fio
.op
!= fio
->op
)
724 return io
->fio
.op_flags
== fio
->op_flags
;
727 static bool io_is_mergeable(struct f2fs_sb_info
*sbi
, struct bio
*bio
,
728 struct f2fs_bio_info
*io
,
729 struct f2fs_io_info
*fio
,
730 block_t last_blkaddr
,
733 if (F2FS_IO_ALIGNED(sbi
) && (fio
->type
== DATA
|| fio
->type
== NODE
)) {
734 unsigned int filled_blocks
=
735 F2FS_BYTES_TO_BLK(bio
->bi_iter
.bi_size
);
736 unsigned int io_size
= F2FS_IO_SIZE(sbi
);
737 unsigned int left_vecs
= bio
->bi_max_vecs
- bio
->bi_vcnt
;
739 /* IOs in bio is aligned and left space of vectors is not enough */
740 if (!(filled_blocks
% io_size
) && left_vecs
< io_size
)
743 if (!page_is_mergeable(sbi
, bio
, last_blkaddr
, cur_blkaddr
))
745 return io_type_is_mergeable(io
, fio
);
748 static void add_bio_entry(struct f2fs_sb_info
*sbi
, struct bio
*bio
,
749 struct page
*page
, enum temp_type temp
)
751 struct f2fs_bio_info
*io
= sbi
->write_io
[DATA
] + temp
;
752 struct bio_entry
*be
;
754 be
= f2fs_kmem_cache_alloc(bio_entry_slab
, GFP_NOFS
);
758 if (bio_add_page(bio
, page
, PAGE_SIZE
, 0) != PAGE_SIZE
)
761 down_write(&io
->bio_list_lock
);
762 list_add_tail(&be
->list
, &io
->bio_list
);
763 up_write(&io
->bio_list_lock
);
766 static void del_bio_entry(struct bio_entry
*be
)
769 kmem_cache_free(bio_entry_slab
, be
);
772 static int add_ipu_page(struct f2fs_io_info
*fio
, struct bio
**bio
,
775 struct f2fs_sb_info
*sbi
= fio
->sbi
;
780 for (temp
= HOT
; temp
< NR_TEMP_TYPE
&& !found
; temp
++) {
781 struct f2fs_bio_info
*io
= sbi
->write_io
[DATA
] + temp
;
782 struct list_head
*head
= &io
->bio_list
;
783 struct bio_entry
*be
;
785 down_write(&io
->bio_list_lock
);
786 list_for_each_entry(be
, head
, list
) {
792 f2fs_bug_on(sbi
, !page_is_mergeable(sbi
, *bio
,
795 if (f2fs_crypt_mergeable_bio(*bio
,
796 fio
->page
->mapping
->host
,
797 fio
->page
->index
, fio
) &&
798 bio_add_page(*bio
, page
, PAGE_SIZE
, 0) ==
804 /* page can't be merged into bio; submit the bio */
806 __submit_bio(sbi
, *bio
, DATA
);
809 up_write(&io
->bio_list_lock
);
820 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info
*sbi
,
821 struct bio
**bio
, struct page
*page
)
825 struct bio
*target
= bio
? *bio
: NULL
;
827 for (temp
= HOT
; temp
< NR_TEMP_TYPE
&& !found
; temp
++) {
828 struct f2fs_bio_info
*io
= sbi
->write_io
[DATA
] + temp
;
829 struct list_head
*head
= &io
->bio_list
;
830 struct bio_entry
*be
;
832 if (list_empty(head
))
835 down_read(&io
->bio_list_lock
);
836 list_for_each_entry(be
, head
, list
) {
838 found
= (target
== be
->bio
);
840 found
= __has_merged_page(be
->bio
, NULL
,
845 up_read(&io
->bio_list_lock
);
852 down_write(&io
->bio_list_lock
);
853 list_for_each_entry(be
, head
, list
) {
855 found
= (target
== be
->bio
);
857 found
= __has_merged_page(be
->bio
, NULL
,
865 up_write(&io
->bio_list_lock
);
869 __submit_bio(sbi
, target
, DATA
);
876 int f2fs_merge_page_bio(struct f2fs_io_info
*fio
)
878 struct bio
*bio
= *fio
->bio
;
879 struct page
*page
= fio
->encrypted_page
?
880 fio
->encrypted_page
: fio
->page
;
882 if (!f2fs_is_valid_blkaddr(fio
->sbi
, fio
->new_blkaddr
,
883 __is_meta_io(fio
) ? META_GENERIC
: DATA_GENERIC
))
884 return -EFSCORRUPTED
;
886 trace_f2fs_submit_page_bio(page
, fio
);
887 f2fs_trace_ios(fio
, 0);
889 if (bio
&& !page_is_mergeable(fio
->sbi
, bio
, *fio
->last_block
,
891 f2fs_submit_merged_ipu_write(fio
->sbi
, &bio
, NULL
);
894 bio
= __bio_alloc(fio
, BIO_MAX_PAGES
);
895 __attach_io_flag(fio
);
896 f2fs_set_bio_crypt_ctx(bio
, fio
->page
->mapping
->host
,
897 fio
->page
->index
, fio
, GFP_NOIO
);
898 bio_set_op_attrs(bio
, fio
->op
, fio
->op_flags
);
900 add_bio_entry(fio
->sbi
, bio
, page
, fio
->temp
);
902 if (add_ipu_page(fio
, &bio
, page
))
907 wbc_account_cgroup_owner(fio
->io_wbc
, page
, PAGE_SIZE
);
909 inc_page_count(fio
->sbi
, WB_DATA_TYPE(page
));
911 *fio
->last_block
= fio
->new_blkaddr
;
917 void f2fs_submit_page_write(struct f2fs_io_info
*fio
)
919 struct f2fs_sb_info
*sbi
= fio
->sbi
;
920 enum page_type btype
= PAGE_TYPE_OF_BIO(fio
->type
);
921 struct f2fs_bio_info
*io
= sbi
->write_io
[btype
] + fio
->temp
;
922 struct page
*bio_page
;
924 f2fs_bug_on(sbi
, is_read_io(fio
->op
));
926 down_write(&io
->io_rwsem
);
929 spin_lock(&io
->io_lock
);
930 if (list_empty(&io
->io_list
)) {
931 spin_unlock(&io
->io_lock
);
934 fio
= list_first_entry(&io
->io_list
,
935 struct f2fs_io_info
, list
);
936 list_del(&fio
->list
);
937 spin_unlock(&io
->io_lock
);
940 verify_fio_blkaddr(fio
);
942 if (fio
->encrypted_page
)
943 bio_page
= fio
->encrypted_page
;
944 else if (fio
->compressed_page
)
945 bio_page
= fio
->compressed_page
;
947 bio_page
= fio
->page
;
949 /* set submitted = true as a return value */
950 fio
->submitted
= true;
952 inc_page_count(sbi
, WB_DATA_TYPE(bio_page
));
955 (!io_is_mergeable(sbi
, io
->bio
, io
, fio
, io
->last_block_in_bio
,
957 !f2fs_crypt_mergeable_bio(io
->bio
, fio
->page
->mapping
->host
,
958 bio_page
->index
, fio
)))
959 __submit_merged_bio(io
);
961 if (io
->bio
== NULL
) {
962 if (F2FS_IO_ALIGNED(sbi
) &&
963 (fio
->type
== DATA
|| fio
->type
== NODE
) &&
964 fio
->new_blkaddr
& F2FS_IO_SIZE_MASK(sbi
)) {
965 dec_page_count(sbi
, WB_DATA_TYPE(bio_page
));
969 io
->bio
= __bio_alloc(fio
, BIO_MAX_PAGES
);
970 f2fs_set_bio_crypt_ctx(io
->bio
, fio
->page
->mapping
->host
,
971 bio_page
->index
, fio
, GFP_NOIO
);
975 if (bio_add_page(io
->bio
, bio_page
, PAGE_SIZE
, 0) < PAGE_SIZE
) {
976 __submit_merged_bio(io
);
981 wbc_account_cgroup_owner(fio
->io_wbc
, bio_page
, PAGE_SIZE
);
983 io
->last_block_in_bio
= fio
->new_blkaddr
;
984 f2fs_trace_ios(fio
, 0);
986 trace_f2fs_submit_page_write(fio
->page
, fio
);
991 if (is_sbi_flag_set(sbi
, SBI_IS_SHUTDOWN
) ||
992 !f2fs_is_checkpoint_ready(sbi
))
993 __submit_merged_bio(io
);
994 up_write(&io
->io_rwsem
);
997 static struct bio
*f2fs_grab_read_bio(struct inode
*inode
, block_t blkaddr
,
998 unsigned nr_pages
, unsigned op_flag
,
999 pgoff_t first_idx
, bool for_write
)
1001 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1003 struct bio_post_read_ctx
*ctx
;
1004 unsigned int post_read_steps
= 0;
1006 bio
= f2fs_bio_alloc(sbi
, min_t(int, nr_pages
, BIO_MAX_PAGES
),
1009 return ERR_PTR(-ENOMEM
);
1011 f2fs_set_bio_crypt_ctx(bio
, inode
, first_idx
, NULL
, GFP_NOFS
);
1013 f2fs_target_device(sbi
, blkaddr
, bio
);
1014 bio
->bi_end_io
= f2fs_read_end_io
;
1015 bio_set_op_attrs(bio
, REQ_OP_READ
, op_flag
);
1017 if (fscrypt_inode_uses_fs_layer_crypto(inode
))
1018 post_read_steps
|= STEP_DECRYPT
;
1020 if (f2fs_need_verity(inode
, first_idx
))
1021 post_read_steps
|= STEP_VERITY
;
1024 * STEP_DECOMPRESS is handled specially, since a compressed file might
1025 * contain both compressed and uncompressed clusters. We'll allocate a
1026 * bio_post_read_ctx if the file is compressed, but the caller is
1027 * responsible for enabling STEP_DECOMPRESS if it's actually needed.
1030 if (post_read_steps
|| f2fs_compressed_file(inode
)) {
1031 /* Due to the mempool, this never fails. */
1032 ctx
= mempool_alloc(bio_post_read_ctx_pool
, GFP_NOFS
);
1035 ctx
->enabled_steps
= post_read_steps
;
1036 bio
->bi_private
= ctx
;
1042 /* This can handle encryption stuffs */
1043 static int f2fs_submit_page_read(struct inode
*inode
, struct page
*page
,
1044 block_t blkaddr
, int op_flags
, bool for_write
)
1046 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1049 bio
= f2fs_grab_read_bio(inode
, blkaddr
, 1, op_flags
,
1050 page
->index
, for_write
);
1052 return PTR_ERR(bio
);
1054 /* wait for GCed page writeback via META_MAPPING */
1055 f2fs_wait_on_block_writeback(inode
, blkaddr
);
1057 if (bio_add_page(bio
, page
, PAGE_SIZE
, 0) < PAGE_SIZE
) {
1061 ClearPageError(page
);
1062 inc_page_count(sbi
, F2FS_RD_DATA
);
1063 f2fs_update_iostat(sbi
, FS_DATA_READ_IO
, F2FS_BLKSIZE
);
1064 __submit_bio(sbi
, bio
, DATA
);
1068 static void __set_data_blkaddr(struct dnode_of_data
*dn
)
1070 struct f2fs_node
*rn
= F2FS_NODE(dn
->node_page
);
1074 if (IS_INODE(dn
->node_page
) && f2fs_has_extra_attr(dn
->inode
))
1075 base
= get_extra_isize(dn
->inode
);
1077 /* Get physical address of data block */
1078 addr_array
= blkaddr_in_node(rn
);
1079 addr_array
[base
+ dn
->ofs_in_node
] = cpu_to_le32(dn
->data_blkaddr
);
1083 * Lock ordering for the change of data block address:
1086 * update block addresses in the node page
1088 void f2fs_set_data_blkaddr(struct dnode_of_data
*dn
)
1090 f2fs_wait_on_page_writeback(dn
->node_page
, NODE
, true, true);
1091 __set_data_blkaddr(dn
);
1092 if (set_page_dirty(dn
->node_page
))
1093 dn
->node_changed
= true;
1096 void f2fs_update_data_blkaddr(struct dnode_of_data
*dn
, block_t blkaddr
)
1098 dn
->data_blkaddr
= blkaddr
;
1099 f2fs_set_data_blkaddr(dn
);
1100 f2fs_update_extent_cache(dn
);
1103 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
1104 int f2fs_reserve_new_blocks(struct dnode_of_data
*dn
, blkcnt_t count
)
1106 struct f2fs_sb_info
*sbi
= F2FS_I_SB(dn
->inode
);
1112 if (unlikely(is_inode_flag_set(dn
->inode
, FI_NO_ALLOC
)))
1114 if (unlikely((err
= inc_valid_block_count(sbi
, dn
->inode
, &count
))))
1117 trace_f2fs_reserve_new_blocks(dn
->inode
, dn
->nid
,
1118 dn
->ofs_in_node
, count
);
1120 f2fs_wait_on_page_writeback(dn
->node_page
, NODE
, true, true);
1122 for (; count
> 0; dn
->ofs_in_node
++) {
1123 block_t blkaddr
= f2fs_data_blkaddr(dn
);
1124 if (blkaddr
== NULL_ADDR
) {
1125 dn
->data_blkaddr
= NEW_ADDR
;
1126 __set_data_blkaddr(dn
);
1131 if (set_page_dirty(dn
->node_page
))
1132 dn
->node_changed
= true;
1136 /* Should keep dn->ofs_in_node unchanged */
1137 int f2fs_reserve_new_block(struct dnode_of_data
*dn
)
1139 unsigned int ofs_in_node
= dn
->ofs_in_node
;
1142 ret
= f2fs_reserve_new_blocks(dn
, 1);
1143 dn
->ofs_in_node
= ofs_in_node
;
1147 int f2fs_reserve_block(struct dnode_of_data
*dn
, pgoff_t index
)
1149 bool need_put
= dn
->inode_page
? false : true;
1152 err
= f2fs_get_dnode_of_data(dn
, index
, ALLOC_NODE
);
1156 if (dn
->data_blkaddr
== NULL_ADDR
)
1157 err
= f2fs_reserve_new_block(dn
);
1158 if (err
|| need_put
)
1163 int f2fs_get_block(struct dnode_of_data
*dn
, pgoff_t index
)
1165 struct extent_info ei
= {0, 0, 0};
1166 struct inode
*inode
= dn
->inode
;
1168 if (f2fs_lookup_extent_cache(inode
, index
, &ei
)) {
1169 dn
->data_blkaddr
= ei
.blk
+ index
- ei
.fofs
;
1173 return f2fs_reserve_block(dn
, index
);
1176 struct page
*f2fs_get_read_data_page(struct inode
*inode
, pgoff_t index
,
1177 int op_flags
, bool for_write
)
1179 struct address_space
*mapping
= inode
->i_mapping
;
1180 struct dnode_of_data dn
;
1182 struct extent_info ei
= {0,0,0};
1185 page
= f2fs_grab_cache_page(mapping
, index
, for_write
);
1187 return ERR_PTR(-ENOMEM
);
1189 if (f2fs_lookup_extent_cache(inode
, index
, &ei
)) {
1190 dn
.data_blkaddr
= ei
.blk
+ index
- ei
.fofs
;
1191 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode
), dn
.data_blkaddr
,
1192 DATA_GENERIC_ENHANCE_READ
)) {
1193 err
= -EFSCORRUPTED
;
1199 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
1200 err
= f2fs_get_dnode_of_data(&dn
, index
, LOOKUP_NODE
);
1203 f2fs_put_dnode(&dn
);
1205 if (unlikely(dn
.data_blkaddr
== NULL_ADDR
)) {
1209 if (dn
.data_blkaddr
!= NEW_ADDR
&&
1210 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode
),
1212 DATA_GENERIC_ENHANCE
)) {
1213 err
= -EFSCORRUPTED
;
1217 if (PageUptodate(page
)) {
1223 * A new dentry page is allocated but not able to be written, since its
1224 * new inode page couldn't be allocated due to -ENOSPC.
1225 * In such the case, its blkaddr can be remained as NEW_ADDR.
1226 * see, f2fs_add_link -> f2fs_get_new_data_page ->
1227 * f2fs_init_inode_metadata.
1229 if (dn
.data_blkaddr
== NEW_ADDR
) {
1230 zero_user_segment(page
, 0, PAGE_SIZE
);
1231 if (!PageUptodate(page
))
1232 SetPageUptodate(page
);
1237 err
= f2fs_submit_page_read(inode
, page
, dn
.data_blkaddr
,
1238 op_flags
, for_write
);
1244 f2fs_put_page(page
, 1);
1245 return ERR_PTR(err
);
1248 struct page
*f2fs_find_data_page(struct inode
*inode
, pgoff_t index
)
1250 struct address_space
*mapping
= inode
->i_mapping
;
1253 page
= find_get_page(mapping
, index
);
1254 if (page
&& PageUptodate(page
))
1256 f2fs_put_page(page
, 0);
1258 page
= f2fs_get_read_data_page(inode
, index
, 0, false);
1262 if (PageUptodate(page
))
1265 wait_on_page_locked(page
);
1266 if (unlikely(!PageUptodate(page
))) {
1267 f2fs_put_page(page
, 0);
1268 return ERR_PTR(-EIO
);
1274 * If it tries to access a hole, return an error.
1275 * Because, the callers, functions in dir.c and GC, should be able to know
1276 * whether this page exists or not.
1278 struct page
*f2fs_get_lock_data_page(struct inode
*inode
, pgoff_t index
,
1281 struct address_space
*mapping
= inode
->i_mapping
;
1284 page
= f2fs_get_read_data_page(inode
, index
, 0, for_write
);
1288 /* wait for read completion */
1290 if (unlikely(page
->mapping
!= mapping
)) {
1291 f2fs_put_page(page
, 1);
1294 if (unlikely(!PageUptodate(page
))) {
1295 f2fs_put_page(page
, 1);
1296 return ERR_PTR(-EIO
);
1302 * Caller ensures that this data page is never allocated.
1303 * A new zero-filled data page is allocated in the page cache.
1305 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1307 * Note that, ipage is set only by make_empty_dir, and if any error occur,
1308 * ipage should be released by this function.
1310 struct page
*f2fs_get_new_data_page(struct inode
*inode
,
1311 struct page
*ipage
, pgoff_t index
, bool new_i_size
)
1313 struct address_space
*mapping
= inode
->i_mapping
;
1315 struct dnode_of_data dn
;
1318 page
= f2fs_grab_cache_page(mapping
, index
, true);
1321 * before exiting, we should make sure ipage will be released
1322 * if any error occur.
1324 f2fs_put_page(ipage
, 1);
1325 return ERR_PTR(-ENOMEM
);
1328 set_new_dnode(&dn
, inode
, ipage
, NULL
, 0);
1329 err
= f2fs_reserve_block(&dn
, index
);
1331 f2fs_put_page(page
, 1);
1332 return ERR_PTR(err
);
1335 f2fs_put_dnode(&dn
);
1337 if (PageUptodate(page
))
1340 if (dn
.data_blkaddr
== NEW_ADDR
) {
1341 zero_user_segment(page
, 0, PAGE_SIZE
);
1342 if (!PageUptodate(page
))
1343 SetPageUptodate(page
);
1345 f2fs_put_page(page
, 1);
1347 /* if ipage exists, blkaddr should be NEW_ADDR */
1348 f2fs_bug_on(F2FS_I_SB(inode
), ipage
);
1349 page
= f2fs_get_lock_data_page(inode
, index
, true);
1354 if (new_i_size
&& i_size_read(inode
) <
1355 ((loff_t
)(index
+ 1) << PAGE_SHIFT
))
1356 f2fs_i_size_write(inode
, ((loff_t
)(index
+ 1) << PAGE_SHIFT
));
1360 static int __allocate_data_block(struct dnode_of_data
*dn
, int seg_type
)
1362 struct f2fs_sb_info
*sbi
= F2FS_I_SB(dn
->inode
);
1363 struct f2fs_summary sum
;
1364 struct node_info ni
;
1365 block_t old_blkaddr
;
1369 if (unlikely(is_inode_flag_set(dn
->inode
, FI_NO_ALLOC
)))
1372 err
= f2fs_get_node_info(sbi
, dn
->nid
, &ni
);
1376 dn
->data_blkaddr
= f2fs_data_blkaddr(dn
);
1377 if (dn
->data_blkaddr
!= NULL_ADDR
)
1380 if (unlikely((err
= inc_valid_block_count(sbi
, dn
->inode
, &count
))))
1384 set_summary(&sum
, dn
->nid
, dn
->ofs_in_node
, ni
.version
);
1385 old_blkaddr
= dn
->data_blkaddr
;
1386 f2fs_allocate_data_block(sbi
, NULL
, old_blkaddr
, &dn
->data_blkaddr
,
1387 &sum
, seg_type
, NULL
);
1388 if (GET_SEGNO(sbi
, old_blkaddr
) != NULL_SEGNO
)
1389 invalidate_mapping_pages(META_MAPPING(sbi
),
1390 old_blkaddr
, old_blkaddr
);
1391 f2fs_update_data_blkaddr(dn
, dn
->data_blkaddr
);
1394 * i_size will be updated by direct_IO. Otherwise, we'll get stale
1395 * data from unwritten block via dio_read.
1400 int f2fs_preallocate_blocks(struct kiocb
*iocb
, struct iov_iter
*from
)
1402 struct inode
*inode
= file_inode(iocb
->ki_filp
);
1403 struct f2fs_map_blocks map
;
1406 bool direct_io
= iocb
->ki_flags
& IOCB_DIRECT
;
1408 map
.m_lblk
= F2FS_BLK_ALIGN(iocb
->ki_pos
);
1409 map
.m_len
= F2FS_BYTES_TO_BLK(iocb
->ki_pos
+ iov_iter_count(from
));
1410 if (map
.m_len
> map
.m_lblk
)
1411 map
.m_len
-= map
.m_lblk
;
1415 map
.m_next_pgofs
= NULL
;
1416 map
.m_next_extent
= NULL
;
1417 map
.m_seg_type
= NO_CHECK_TYPE
;
1418 map
.m_may_create
= true;
1421 map
.m_seg_type
= f2fs_rw_hint_to_seg_type(iocb
->ki_hint
);
1422 flag
= f2fs_force_buffered_io(inode
, iocb
, from
) ?
1423 F2FS_GET_BLOCK_PRE_AIO
:
1424 F2FS_GET_BLOCK_PRE_DIO
;
1427 if (iocb
->ki_pos
+ iov_iter_count(from
) > MAX_INLINE_DATA(inode
)) {
1428 err
= f2fs_convert_inline_inode(inode
);
1432 if (f2fs_has_inline_data(inode
))
1435 flag
= F2FS_GET_BLOCK_PRE_AIO
;
1438 err
= f2fs_map_blocks(inode
, &map
, 1, flag
);
1439 if (map
.m_len
> 0 && err
== -ENOSPC
) {
1441 set_inode_flag(inode
, FI_NO_PREALLOC
);
1447 void f2fs_do_map_lock(struct f2fs_sb_info
*sbi
, int flag
, bool lock
)
1449 if (flag
== F2FS_GET_BLOCK_PRE_AIO
) {
1451 down_read(&sbi
->node_change
);
1453 up_read(&sbi
->node_change
);
1458 f2fs_unlock_op(sbi
);
1463 * f2fs_map_blocks() tries to find or build mapping relationship which
1464 * maps continuous logical blocks to physical blocks, and return such
1465 * info via f2fs_map_blocks structure.
1467 int f2fs_map_blocks(struct inode
*inode
, struct f2fs_map_blocks
*map
,
1468 int create
, int flag
)
1470 unsigned int maxblocks
= map
->m_len
;
1471 struct dnode_of_data dn
;
1472 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1473 int mode
= map
->m_may_create
? ALLOC_NODE
: LOOKUP_NODE
;
1474 pgoff_t pgofs
, end_offset
, end
;
1475 int err
= 0, ofs
= 1;
1476 unsigned int ofs_in_node
, last_ofs_in_node
;
1478 struct extent_info ei
= {0,0,0};
1480 unsigned int start_pgofs
;
1488 /* it only supports block size == page size */
1489 pgofs
= (pgoff_t
)map
->m_lblk
;
1490 end
= pgofs
+ maxblocks
;
1492 if (!create
&& f2fs_lookup_extent_cache(inode
, pgofs
, &ei
)) {
1493 if (f2fs_lfs_mode(sbi
) && flag
== F2FS_GET_BLOCK_DIO
&&
1497 map
->m_pblk
= ei
.blk
+ pgofs
- ei
.fofs
;
1498 map
->m_len
= min((pgoff_t
)maxblocks
, ei
.fofs
+ ei
.len
- pgofs
);
1499 map
->m_flags
= F2FS_MAP_MAPPED
;
1500 if (map
->m_next_extent
)
1501 *map
->m_next_extent
= pgofs
+ map
->m_len
;
1503 /* for hardware encryption, but to avoid potential issue in future */
1504 if (flag
== F2FS_GET_BLOCK_DIO
)
1505 f2fs_wait_on_block_writeback_range(inode
,
1506 map
->m_pblk
, map
->m_len
);
1511 if (map
->m_may_create
)
1512 f2fs_do_map_lock(sbi
, flag
, true);
1514 /* When reading holes, we need its node page */
1515 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
1516 err
= f2fs_get_dnode_of_data(&dn
, pgofs
, mode
);
1518 if (flag
== F2FS_GET_BLOCK_BMAP
)
1520 if (err
== -ENOENT
) {
1522 if (map
->m_next_pgofs
)
1523 *map
->m_next_pgofs
=
1524 f2fs_get_next_page_offset(&dn
, pgofs
);
1525 if (map
->m_next_extent
)
1526 *map
->m_next_extent
=
1527 f2fs_get_next_page_offset(&dn
, pgofs
);
1532 start_pgofs
= pgofs
;
1534 last_ofs_in_node
= ofs_in_node
= dn
.ofs_in_node
;
1535 end_offset
= ADDRS_PER_PAGE(dn
.node_page
, inode
);
1538 blkaddr
= f2fs_data_blkaddr(&dn
);
1540 if (__is_valid_data_blkaddr(blkaddr
) &&
1541 !f2fs_is_valid_blkaddr(sbi
, blkaddr
, DATA_GENERIC_ENHANCE
)) {
1542 err
= -EFSCORRUPTED
;
1546 if (__is_valid_data_blkaddr(blkaddr
)) {
1547 /* use out-place-update for driect IO under LFS mode */
1548 if (f2fs_lfs_mode(sbi
) && flag
== F2FS_GET_BLOCK_DIO
&&
1549 map
->m_may_create
) {
1550 err
= __allocate_data_block(&dn
, map
->m_seg_type
);
1553 blkaddr
= dn
.data_blkaddr
;
1554 set_inode_flag(inode
, FI_APPEND_WRITE
);
1558 if (unlikely(f2fs_cp_error(sbi
))) {
1562 if (flag
== F2FS_GET_BLOCK_PRE_AIO
) {
1563 if (blkaddr
== NULL_ADDR
) {
1565 last_ofs_in_node
= dn
.ofs_in_node
;
1568 WARN_ON(flag
!= F2FS_GET_BLOCK_PRE_DIO
&&
1569 flag
!= F2FS_GET_BLOCK_DIO
);
1570 err
= __allocate_data_block(&dn
,
1573 set_inode_flag(inode
, FI_APPEND_WRITE
);
1577 map
->m_flags
|= F2FS_MAP_NEW
;
1578 blkaddr
= dn
.data_blkaddr
;
1580 if (flag
== F2FS_GET_BLOCK_BMAP
) {
1584 if (flag
== F2FS_GET_BLOCK_PRECACHE
)
1586 if (flag
== F2FS_GET_BLOCK_FIEMAP
&&
1587 blkaddr
== NULL_ADDR
) {
1588 if (map
->m_next_pgofs
)
1589 *map
->m_next_pgofs
= pgofs
+ 1;
1592 if (flag
!= F2FS_GET_BLOCK_FIEMAP
) {
1593 /* for defragment case */
1594 if (map
->m_next_pgofs
)
1595 *map
->m_next_pgofs
= pgofs
+ 1;
1601 if (flag
== F2FS_GET_BLOCK_PRE_AIO
)
1604 if (map
->m_len
== 0) {
1605 /* preallocated unwritten block should be mapped for fiemap. */
1606 if (blkaddr
== NEW_ADDR
)
1607 map
->m_flags
|= F2FS_MAP_UNWRITTEN
;
1608 map
->m_flags
|= F2FS_MAP_MAPPED
;
1610 map
->m_pblk
= blkaddr
;
1612 } else if ((map
->m_pblk
!= NEW_ADDR
&&
1613 blkaddr
== (map
->m_pblk
+ ofs
)) ||
1614 (map
->m_pblk
== NEW_ADDR
&& blkaddr
== NEW_ADDR
) ||
1615 flag
== F2FS_GET_BLOCK_PRE_DIO
) {
1626 /* preallocate blocks in batch for one dnode page */
1627 if (flag
== F2FS_GET_BLOCK_PRE_AIO
&&
1628 (pgofs
== end
|| dn
.ofs_in_node
== end_offset
)) {
1630 dn
.ofs_in_node
= ofs_in_node
;
1631 err
= f2fs_reserve_new_blocks(&dn
, prealloc
);
1635 map
->m_len
+= dn
.ofs_in_node
- ofs_in_node
;
1636 if (prealloc
&& dn
.ofs_in_node
!= last_ofs_in_node
+ 1) {
1640 dn
.ofs_in_node
= end_offset
;
1645 else if (dn
.ofs_in_node
< end_offset
)
1648 if (flag
== F2FS_GET_BLOCK_PRECACHE
) {
1649 if (map
->m_flags
& F2FS_MAP_MAPPED
) {
1650 unsigned int ofs
= start_pgofs
- map
->m_lblk
;
1652 f2fs_update_extent_cache_range(&dn
,
1653 start_pgofs
, map
->m_pblk
+ ofs
,
1658 f2fs_put_dnode(&dn
);
1660 if (map
->m_may_create
) {
1661 f2fs_do_map_lock(sbi
, flag
, false);
1662 f2fs_balance_fs(sbi
, dn
.node_changed
);
1668 /* for hardware encryption, but to avoid potential issue in future */
1669 if (flag
== F2FS_GET_BLOCK_DIO
&& map
->m_flags
& F2FS_MAP_MAPPED
)
1670 f2fs_wait_on_block_writeback_range(inode
,
1671 map
->m_pblk
, map
->m_len
);
1673 if (flag
== F2FS_GET_BLOCK_PRECACHE
) {
1674 if (map
->m_flags
& F2FS_MAP_MAPPED
) {
1675 unsigned int ofs
= start_pgofs
- map
->m_lblk
;
1677 f2fs_update_extent_cache_range(&dn
,
1678 start_pgofs
, map
->m_pblk
+ ofs
,
1681 if (map
->m_next_extent
)
1682 *map
->m_next_extent
= pgofs
+ 1;
1684 f2fs_put_dnode(&dn
);
1686 if (map
->m_may_create
) {
1687 f2fs_do_map_lock(sbi
, flag
, false);
1688 f2fs_balance_fs(sbi
, dn
.node_changed
);
1691 trace_f2fs_map_blocks(inode
, map
, err
);
1695 bool f2fs_overwrite_io(struct inode
*inode
, loff_t pos
, size_t len
)
1697 struct f2fs_map_blocks map
;
1701 if (pos
+ len
> i_size_read(inode
))
1704 map
.m_lblk
= F2FS_BYTES_TO_BLK(pos
);
1705 map
.m_next_pgofs
= NULL
;
1706 map
.m_next_extent
= NULL
;
1707 map
.m_seg_type
= NO_CHECK_TYPE
;
1708 map
.m_may_create
= false;
1709 last_lblk
= F2FS_BLK_ALIGN(pos
+ len
);
1711 while (map
.m_lblk
< last_lblk
) {
1712 map
.m_len
= last_lblk
- map
.m_lblk
;
1713 err
= f2fs_map_blocks(inode
, &map
, 0, F2FS_GET_BLOCK_DEFAULT
);
1714 if (err
|| map
.m_len
== 0)
1716 map
.m_lblk
+= map
.m_len
;
1721 static inline u64
bytes_to_blks(struct inode
*inode
, u64 bytes
)
1723 return (bytes
>> inode
->i_blkbits
);
1726 static inline u64
blks_to_bytes(struct inode
*inode
, u64 blks
)
1728 return (blks
<< inode
->i_blkbits
);
1731 static int __get_data_block(struct inode
*inode
, sector_t iblock
,
1732 struct buffer_head
*bh
, int create
, int flag
,
1733 pgoff_t
*next_pgofs
, int seg_type
, bool may_write
)
1735 struct f2fs_map_blocks map
;
1738 map
.m_lblk
= iblock
;
1739 map
.m_len
= bytes_to_blks(inode
, bh
->b_size
);
1740 map
.m_next_pgofs
= next_pgofs
;
1741 map
.m_next_extent
= NULL
;
1742 map
.m_seg_type
= seg_type
;
1743 map
.m_may_create
= may_write
;
1745 err
= f2fs_map_blocks(inode
, &map
, create
, flag
);
1747 map_bh(bh
, inode
->i_sb
, map
.m_pblk
);
1748 bh
->b_state
= (bh
->b_state
& ~F2FS_MAP_FLAGS
) | map
.m_flags
;
1749 bh
->b_size
= blks_to_bytes(inode
, map
.m_len
);
1754 static int get_data_block_dio_write(struct inode
*inode
, sector_t iblock
,
1755 struct buffer_head
*bh_result
, int create
)
1757 return __get_data_block(inode
, iblock
, bh_result
, create
,
1758 F2FS_GET_BLOCK_DIO
, NULL
,
1759 f2fs_rw_hint_to_seg_type(inode
->i_write_hint
),
1760 IS_SWAPFILE(inode
) ? false : true);
1763 static int get_data_block_dio(struct inode
*inode
, sector_t iblock
,
1764 struct buffer_head
*bh_result
, int create
)
1766 return __get_data_block(inode
, iblock
, bh_result
, create
,
1767 F2FS_GET_BLOCK_DIO
, NULL
,
1768 f2fs_rw_hint_to_seg_type(inode
->i_write_hint
),
1772 static int f2fs_xattr_fiemap(struct inode
*inode
,
1773 struct fiemap_extent_info
*fieinfo
)
1775 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
1777 struct node_info ni
;
1778 __u64 phys
= 0, len
;
1780 nid_t xnid
= F2FS_I(inode
)->i_xattr_nid
;
1783 if (f2fs_has_inline_xattr(inode
)) {
1786 page
= f2fs_grab_cache_page(NODE_MAPPING(sbi
),
1787 inode
->i_ino
, false);
1791 err
= f2fs_get_node_info(sbi
, inode
->i_ino
, &ni
);
1793 f2fs_put_page(page
, 1);
1797 phys
= blks_to_bytes(inode
, ni
.blk_addr
);
1798 offset
= offsetof(struct f2fs_inode
, i_addr
) +
1799 sizeof(__le32
) * (DEF_ADDRS_PER_INODE
-
1800 get_inline_xattr_addrs(inode
));
1803 len
= inline_xattr_size(inode
);
1805 f2fs_put_page(page
, 1);
1807 flags
= FIEMAP_EXTENT_DATA_INLINE
| FIEMAP_EXTENT_NOT_ALIGNED
;
1810 flags
|= FIEMAP_EXTENT_LAST
;
1812 err
= fiemap_fill_next_extent(fieinfo
, 0, phys
, len
, flags
);
1813 trace_f2fs_fiemap(inode
, 0, phys
, len
, flags
, err
);
1814 if (err
|| err
== 1)
1819 page
= f2fs_grab_cache_page(NODE_MAPPING(sbi
), xnid
, false);
1823 err
= f2fs_get_node_info(sbi
, xnid
, &ni
);
1825 f2fs_put_page(page
, 1);
1829 phys
= blks_to_bytes(inode
, ni
.blk_addr
);
1830 len
= inode
->i_sb
->s_blocksize
;
1832 f2fs_put_page(page
, 1);
1834 flags
= FIEMAP_EXTENT_LAST
;
1838 err
= fiemap_fill_next_extent(fieinfo
, 0, phys
, len
, flags
);
1839 trace_f2fs_fiemap(inode
, 0, phys
, len
, flags
, err
);
1842 return (err
< 0 ? err
: 0);
1845 static loff_t
max_inode_blocks(struct inode
*inode
)
1847 loff_t result
= ADDRS_PER_INODE(inode
);
1848 loff_t leaf_count
= ADDRS_PER_BLOCK(inode
);
1850 /* two direct node blocks */
1851 result
+= (leaf_count
* 2);
1853 /* two indirect node blocks */
1854 leaf_count
*= NIDS_PER_BLOCK
;
1855 result
+= (leaf_count
* 2);
1857 /* one double indirect node block */
1858 leaf_count
*= NIDS_PER_BLOCK
;
1859 result
+= leaf_count
;
1864 int f2fs_fiemap(struct inode
*inode
, struct fiemap_extent_info
*fieinfo
,
1867 struct f2fs_map_blocks map
;
1868 sector_t start_blk
, last_blk
;
1870 u64 logical
= 0, phys
= 0, size
= 0;
1873 bool compr_cluster
= false;
1874 unsigned int cluster_size
= F2FS_I(inode
)->i_cluster_size
;
1876 if (fieinfo
->fi_flags
& FIEMAP_FLAG_CACHE
) {
1877 ret
= f2fs_precache_extents(inode
);
1882 ret
= fiemap_prep(inode
, fieinfo
, start
, &len
, FIEMAP_FLAG_XATTR
);
1888 if (fieinfo
->fi_flags
& FIEMAP_FLAG_XATTR
) {
1889 ret
= f2fs_xattr_fiemap(inode
, fieinfo
);
1893 if (f2fs_has_inline_data(inode
) || f2fs_has_inline_dentry(inode
)) {
1894 ret
= f2fs_inline_data_fiemap(inode
, fieinfo
, start
, len
);
1899 if (bytes_to_blks(inode
, len
) == 0)
1900 len
= blks_to_bytes(inode
, 1);
1902 start_blk
= bytes_to_blks(inode
, start
);
1903 last_blk
= bytes_to_blks(inode
, start
+ len
- 1);
1906 memset(&map
, 0, sizeof(map
));
1907 map
.m_lblk
= start_blk
;
1908 map
.m_len
= bytes_to_blks(inode
, len
);
1909 map
.m_next_pgofs
= &next_pgofs
;
1910 map
.m_seg_type
= NO_CHECK_TYPE
;
1913 map
.m_len
= cluster_size
- 1;
1915 ret
= f2fs_map_blocks(inode
, &map
, 0, F2FS_GET_BLOCK_FIEMAP
);
1920 if (!(map
.m_flags
& F2FS_MAP_FLAGS
)) {
1921 start_blk
= next_pgofs
;
1923 if (blks_to_bytes(inode
, start_blk
) < blks_to_bytes(inode
,
1924 max_inode_blocks(inode
)))
1927 flags
|= FIEMAP_EXTENT_LAST
;
1931 flags
|= FIEMAP_EXTENT_MERGED
;
1932 if (IS_ENCRYPTED(inode
))
1933 flags
|= FIEMAP_EXTENT_DATA_ENCRYPTED
;
1935 ret
= fiemap_fill_next_extent(fieinfo
, logical
,
1937 trace_f2fs_fiemap(inode
, logical
, phys
, size
, flags
, ret
);
1943 if (start_blk
> last_blk
)
1946 if (compr_cluster
) {
1947 compr_cluster
= false;
1950 logical
= blks_to_bytes(inode
, start_blk
- 1);
1951 phys
= blks_to_bytes(inode
, map
.m_pblk
);
1952 size
= blks_to_bytes(inode
, cluster_size
);
1954 flags
|= FIEMAP_EXTENT_ENCODED
;
1956 start_blk
+= cluster_size
- 1;
1958 if (start_blk
> last_blk
)
1964 if (map
.m_pblk
== COMPRESS_ADDR
) {
1965 compr_cluster
= true;
1970 logical
= blks_to_bytes(inode
, start_blk
);
1971 phys
= blks_to_bytes(inode
, map
.m_pblk
);
1972 size
= blks_to_bytes(inode
, map
.m_len
);
1974 if (map
.m_flags
& F2FS_MAP_UNWRITTEN
)
1975 flags
= FIEMAP_EXTENT_UNWRITTEN
;
1977 start_blk
+= bytes_to_blks(inode
, size
);
1981 if (fatal_signal_pending(current
))
1989 inode_unlock(inode
);
1993 static inline loff_t
f2fs_readpage_limit(struct inode
*inode
)
1995 if (IS_ENABLED(CONFIG_FS_VERITY
) &&
1996 (IS_VERITY(inode
) || f2fs_verity_in_progress(inode
)))
1997 return inode
->i_sb
->s_maxbytes
;
1999 return i_size_read(inode
);
2002 static int f2fs_read_single_page(struct inode
*inode
, struct page
*page
,
2004 struct f2fs_map_blocks
*map
,
2005 struct bio
**bio_ret
,
2006 sector_t
*last_block_in_bio
,
2009 struct bio
*bio
= *bio_ret
;
2010 const unsigned blocksize
= blks_to_bytes(inode
, 1);
2011 sector_t block_in_file
;
2012 sector_t last_block
;
2013 sector_t last_block_in_file
;
2017 block_in_file
= (sector_t
)page_index(page
);
2018 last_block
= block_in_file
+ nr_pages
;
2019 last_block_in_file
= bytes_to_blks(inode
,
2020 f2fs_readpage_limit(inode
) + blocksize
- 1);
2021 if (last_block
> last_block_in_file
)
2022 last_block
= last_block_in_file
;
2024 /* just zeroing out page which is beyond EOF */
2025 if (block_in_file
>= last_block
)
2028 * Map blocks using the previous result first.
2030 if ((map
->m_flags
& F2FS_MAP_MAPPED
) &&
2031 block_in_file
> map
->m_lblk
&&
2032 block_in_file
< (map
->m_lblk
+ map
->m_len
))
2036 * Then do more f2fs_map_blocks() calls until we are
2037 * done with this page.
2039 map
->m_lblk
= block_in_file
;
2040 map
->m_len
= last_block
- block_in_file
;
2042 ret
= f2fs_map_blocks(inode
, map
, 0, F2FS_GET_BLOCK_DEFAULT
);
2046 if ((map
->m_flags
& F2FS_MAP_MAPPED
)) {
2047 block_nr
= map
->m_pblk
+ block_in_file
- map
->m_lblk
;
2048 SetPageMappedToDisk(page
);
2050 if (!PageUptodate(page
) && (!PageSwapCache(page
) &&
2051 !cleancache_get_page(page
))) {
2052 SetPageUptodate(page
);
2056 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode
), block_nr
,
2057 DATA_GENERIC_ENHANCE_READ
)) {
2058 ret
= -EFSCORRUPTED
;
2063 zero_user_segment(page
, 0, PAGE_SIZE
);
2064 if (f2fs_need_verity(inode
, page
->index
) &&
2065 !fsverity_verify_page(page
)) {
2069 if (!PageUptodate(page
))
2070 SetPageUptodate(page
);
2076 * This page will go to BIO. Do we need to send this
2079 if (bio
&& (!page_is_mergeable(F2FS_I_SB(inode
), bio
,
2080 *last_block_in_bio
, block_nr
) ||
2081 !f2fs_crypt_mergeable_bio(bio
, inode
, page
->index
, NULL
))) {
2083 __submit_bio(F2FS_I_SB(inode
), bio
, DATA
);
2087 bio
= f2fs_grab_read_bio(inode
, block_nr
, nr_pages
,
2088 is_readahead
? REQ_RAHEAD
: 0, page
->index
,
2098 * If the page is under writeback, we need to wait for
2099 * its completion to see the correct decrypted data.
2101 f2fs_wait_on_block_writeback(inode
, block_nr
);
2103 if (bio_add_page(bio
, page
, blocksize
, 0) < blocksize
)
2104 goto submit_and_realloc
;
2106 inc_page_count(F2FS_I_SB(inode
), F2FS_RD_DATA
);
2107 f2fs_update_iostat(F2FS_I_SB(inode
), FS_DATA_READ_IO
, F2FS_BLKSIZE
);
2108 ClearPageError(page
);
2109 *last_block_in_bio
= block_nr
;
2113 __submit_bio(F2FS_I_SB(inode
), bio
, DATA
);
2122 #ifdef CONFIG_F2FS_FS_COMPRESSION
2123 int f2fs_read_multi_pages(struct compress_ctx
*cc
, struct bio
**bio_ret
,
2124 unsigned nr_pages
, sector_t
*last_block_in_bio
,
2125 bool is_readahead
, bool for_write
)
2127 struct dnode_of_data dn
;
2128 struct inode
*inode
= cc
->inode
;
2129 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2130 struct bio
*bio
= *bio_ret
;
2131 unsigned int start_idx
= cc
->cluster_idx
<< cc
->log_cluster_size
;
2132 sector_t last_block_in_file
;
2133 const unsigned blocksize
= blks_to_bytes(inode
, 1);
2134 struct decompress_io_ctx
*dic
= NULL
;
2138 f2fs_bug_on(sbi
, f2fs_cluster_is_empty(cc
));
2140 last_block_in_file
= bytes_to_blks(inode
,
2141 f2fs_readpage_limit(inode
) + blocksize
- 1);
2143 /* get rid of pages beyond EOF */
2144 for (i
= 0; i
< cc
->cluster_size
; i
++) {
2145 struct page
*page
= cc
->rpages
[i
];
2149 if ((sector_t
)page
->index
>= last_block_in_file
) {
2150 zero_user_segment(page
, 0, PAGE_SIZE
);
2151 if (!PageUptodate(page
))
2152 SetPageUptodate(page
);
2153 } else if (!PageUptodate(page
)) {
2157 cc
->rpages
[i
] = NULL
;
2161 /* we are done since all pages are beyond EOF */
2162 if (f2fs_cluster_is_empty(cc
))
2165 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
2166 ret
= f2fs_get_dnode_of_data(&dn
, start_idx
, LOOKUP_NODE
);
2170 f2fs_bug_on(sbi
, dn
.data_blkaddr
!= COMPRESS_ADDR
);
2172 for (i
= 1; i
< cc
->cluster_size
; i
++) {
2175 blkaddr
= data_blkaddr(dn
.inode
, dn
.node_page
,
2176 dn
.ofs_in_node
+ i
);
2178 if (!__is_valid_data_blkaddr(blkaddr
))
2181 if (!f2fs_is_valid_blkaddr(sbi
, blkaddr
, DATA_GENERIC
)) {
2188 /* nothing to decompress */
2189 if (cc
->nr_cpages
== 0) {
2194 dic
= f2fs_alloc_dic(cc
);
2200 for (i
= 0; i
< dic
->nr_cpages
; i
++) {
2201 struct page
*page
= dic
->cpages
[i
];
2203 struct bio_post_read_ctx
*ctx
;
2205 blkaddr
= data_blkaddr(dn
.inode
, dn
.node_page
,
2206 dn
.ofs_in_node
+ i
+ 1);
2208 if (bio
&& (!page_is_mergeable(sbi
, bio
,
2209 *last_block_in_bio
, blkaddr
) ||
2210 !f2fs_crypt_mergeable_bio(bio
, inode
, page
->index
, NULL
))) {
2212 __submit_bio(sbi
, bio
, DATA
);
2217 bio
= f2fs_grab_read_bio(inode
, blkaddr
, nr_pages
,
2218 is_readahead
? REQ_RAHEAD
: 0,
2219 page
->index
, for_write
);
2222 f2fs_decompress_end_io(dic
, ret
);
2223 f2fs_put_dnode(&dn
);
2229 f2fs_wait_on_block_writeback(inode
, blkaddr
);
2231 if (bio_add_page(bio
, page
, blocksize
, 0) < blocksize
)
2232 goto submit_and_realloc
;
2234 ctx
= bio
->bi_private
;
2235 ctx
->enabled_steps
|= STEP_DECOMPRESS
;
2236 refcount_inc(&dic
->refcnt
);
2238 inc_page_count(sbi
, F2FS_RD_DATA
);
2239 f2fs_update_iostat(sbi
, FS_DATA_READ_IO
, F2FS_BLKSIZE
);
2240 f2fs_update_iostat(sbi
, FS_CDATA_READ_IO
, F2FS_BLKSIZE
);
2241 ClearPageError(page
);
2242 *last_block_in_bio
= blkaddr
;
2245 f2fs_put_dnode(&dn
);
2251 f2fs_put_dnode(&dn
);
2253 for (i
= 0; i
< cc
->cluster_size
; i
++) {
2254 if (cc
->rpages
[i
]) {
2255 ClearPageUptodate(cc
->rpages
[i
]);
2256 ClearPageError(cc
->rpages
[i
]);
2257 unlock_page(cc
->rpages
[i
]);
2266 * This function was originally taken from fs/mpage.c, and customized for f2fs.
2267 * Major change was from block_size == page_size in f2fs by default.
2269 * Note that the aops->readpages() function is ONLY used for read-ahead. If
2270 * this function ever deviates from doing just read-ahead, it should either
2271 * use ->readpage() or do the necessary surgery to decouple ->readpages()
2274 static int f2fs_mpage_readpages(struct inode
*inode
,
2275 struct readahead_control
*rac
, struct page
*page
)
2277 struct bio
*bio
= NULL
;
2278 sector_t last_block_in_bio
= 0;
2279 struct f2fs_map_blocks map
;
2280 #ifdef CONFIG_F2FS_FS_COMPRESSION
2281 struct compress_ctx cc
= {
2283 .log_cluster_size
= F2FS_I(inode
)->i_log_cluster_size
,
2284 .cluster_size
= F2FS_I(inode
)->i_cluster_size
,
2285 .cluster_idx
= NULL_CLUSTER
,
2292 unsigned nr_pages
= rac
? readahead_count(rac
) : 1;
2293 unsigned max_nr_pages
= nr_pages
;
2295 bool drop_ra
= false;
2301 map
.m_next_pgofs
= NULL
;
2302 map
.m_next_extent
= NULL
;
2303 map
.m_seg_type
= NO_CHECK_TYPE
;
2304 map
.m_may_create
= false;
2307 * Two readahead threads for same address range can cause race condition
2308 * which fragments sequential read IOs. So let's avoid each other.
2310 if (rac
&& readahead_count(rac
)) {
2311 if (READ_ONCE(F2FS_I(inode
)->ra_offset
) == readahead_index(rac
))
2314 WRITE_ONCE(F2FS_I(inode
)->ra_offset
,
2315 readahead_index(rac
));
2318 for (; nr_pages
; nr_pages
--) {
2320 page
= readahead_page(rac
);
2321 prefetchw(&page
->flags
);
2323 f2fs_put_page(page
, 1);
2328 #ifdef CONFIG_F2FS_FS_COMPRESSION
2329 if (f2fs_compressed_file(inode
)) {
2330 /* there are remained comressed pages, submit them */
2331 if (!f2fs_cluster_can_merge_page(&cc
, page
->index
)) {
2332 ret
= f2fs_read_multi_pages(&cc
, &bio
,
2335 rac
!= NULL
, false);
2336 f2fs_destroy_compress_ctx(&cc
);
2338 goto set_error_page
;
2340 ret
= f2fs_is_compressed_cluster(inode
, page
->index
);
2342 goto set_error_page
;
2344 goto read_single_page
;
2346 ret
= f2fs_init_compress_ctx(&cc
);
2348 goto set_error_page
;
2350 f2fs_compress_ctx_add_page(&cc
, page
);
2357 ret
= f2fs_read_single_page(inode
, page
, max_nr_pages
, &map
,
2358 &bio
, &last_block_in_bio
, rac
);
2360 #ifdef CONFIG_F2FS_FS_COMPRESSION
2364 zero_user_segment(page
, 0, PAGE_SIZE
);
2367 #ifdef CONFIG_F2FS_FS_COMPRESSION
2373 #ifdef CONFIG_F2FS_FS_COMPRESSION
2374 if (f2fs_compressed_file(inode
)) {
2376 if (nr_pages
== 1 && !f2fs_cluster_is_empty(&cc
)) {
2377 ret
= f2fs_read_multi_pages(&cc
, &bio
,
2380 rac
!= NULL
, false);
2381 f2fs_destroy_compress_ctx(&cc
);
2387 __submit_bio(F2FS_I_SB(inode
), bio
, DATA
);
2389 if (rac
&& readahead_count(rac
) && !drop_ra
)
2390 WRITE_ONCE(F2FS_I(inode
)->ra_offset
, -1);
2394 static int f2fs_read_data_page(struct file
*file
, struct page
*page
)
2396 struct inode
*inode
= page_file_mapping(page
)->host
;
2399 trace_f2fs_readpage(page
, DATA
);
2401 if (!f2fs_is_compress_backend_ready(inode
)) {
2406 /* If the file has inline data, try to read it directly */
2407 if (f2fs_has_inline_data(inode
))
2408 ret
= f2fs_read_inline_data(inode
, page
);
2410 ret
= f2fs_mpage_readpages(inode
, NULL
, page
);
2414 static void f2fs_readahead(struct readahead_control
*rac
)
2416 struct inode
*inode
= rac
->mapping
->host
;
2418 trace_f2fs_readpages(inode
, readahead_index(rac
), readahead_count(rac
));
2420 if (!f2fs_is_compress_backend_ready(inode
))
2423 /* If the file has inline data, skip readpages */
2424 if (f2fs_has_inline_data(inode
))
2427 f2fs_mpage_readpages(inode
, rac
, NULL
);
2430 int f2fs_encrypt_one_page(struct f2fs_io_info
*fio
)
2432 struct inode
*inode
= fio
->page
->mapping
->host
;
2433 struct page
*mpage
, *page
;
2434 gfp_t gfp_flags
= GFP_NOFS
;
2436 if (!f2fs_encrypted_file(inode
))
2439 page
= fio
->compressed_page
? fio
->compressed_page
: fio
->page
;
2441 /* wait for GCed page writeback via META_MAPPING */
2442 f2fs_wait_on_block_writeback(inode
, fio
->old_blkaddr
);
2444 if (fscrypt_inode_uses_inline_crypto(inode
))
2448 fio
->encrypted_page
= fscrypt_encrypt_pagecache_blocks(page
,
2449 PAGE_SIZE
, 0, gfp_flags
);
2450 if (IS_ERR(fio
->encrypted_page
)) {
2451 /* flush pending IOs and wait for a while in the ENOMEM case */
2452 if (PTR_ERR(fio
->encrypted_page
) == -ENOMEM
) {
2453 f2fs_flush_merged_writes(fio
->sbi
);
2454 congestion_wait(BLK_RW_ASYNC
, DEFAULT_IO_TIMEOUT
);
2455 gfp_flags
|= __GFP_NOFAIL
;
2458 return PTR_ERR(fio
->encrypted_page
);
2461 mpage
= find_lock_page(META_MAPPING(fio
->sbi
), fio
->old_blkaddr
);
2463 if (PageUptodate(mpage
))
2464 memcpy(page_address(mpage
),
2465 page_address(fio
->encrypted_page
), PAGE_SIZE
);
2466 f2fs_put_page(mpage
, 1);
2471 static inline bool check_inplace_update_policy(struct inode
*inode
,
2472 struct f2fs_io_info
*fio
)
2474 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2475 unsigned int policy
= SM_I(sbi
)->ipu_policy
;
2477 if (policy
& (0x1 << F2FS_IPU_FORCE
))
2479 if (policy
& (0x1 << F2FS_IPU_SSR
) && f2fs_need_SSR(sbi
))
2481 if (policy
& (0x1 << F2FS_IPU_UTIL
) &&
2482 utilization(sbi
) > SM_I(sbi
)->min_ipu_util
)
2484 if (policy
& (0x1 << F2FS_IPU_SSR_UTIL
) && f2fs_need_SSR(sbi
) &&
2485 utilization(sbi
) > SM_I(sbi
)->min_ipu_util
)
2489 * IPU for rewrite async pages
2491 if (policy
& (0x1 << F2FS_IPU_ASYNC
) &&
2492 fio
&& fio
->op
== REQ_OP_WRITE
&&
2493 !(fio
->op_flags
& REQ_SYNC
) &&
2494 !IS_ENCRYPTED(inode
))
2497 /* this is only set during fdatasync */
2498 if (policy
& (0x1 << F2FS_IPU_FSYNC
) &&
2499 is_inode_flag_set(inode
, FI_NEED_IPU
))
2502 if (unlikely(fio
&& is_sbi_flag_set(sbi
, SBI_CP_DISABLED
) &&
2503 !f2fs_is_checkpointed_data(sbi
, fio
->old_blkaddr
)))
2509 bool f2fs_should_update_inplace(struct inode
*inode
, struct f2fs_io_info
*fio
)
2511 if (f2fs_is_pinned_file(inode
))
2514 /* if this is cold file, we should overwrite to avoid fragmentation */
2515 if (file_is_cold(inode
))
2518 return check_inplace_update_policy(inode
, fio
);
2521 bool f2fs_should_update_outplace(struct inode
*inode
, struct f2fs_io_info
*fio
)
2523 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2525 if (f2fs_lfs_mode(sbi
))
2527 if (S_ISDIR(inode
->i_mode
))
2529 if (IS_NOQUOTA(inode
))
2531 if (f2fs_is_atomic_file(inode
))
2534 if (is_cold_data(fio
->page
))
2536 if (IS_ATOMIC_WRITTEN_PAGE(fio
->page
))
2538 if (unlikely(is_sbi_flag_set(sbi
, SBI_CP_DISABLED
) &&
2539 f2fs_is_checkpointed_data(sbi
, fio
->old_blkaddr
)))
2545 static inline bool need_inplace_update(struct f2fs_io_info
*fio
)
2547 struct inode
*inode
= fio
->page
->mapping
->host
;
2549 if (f2fs_should_update_outplace(inode
, fio
))
2552 return f2fs_should_update_inplace(inode
, fio
);
2555 int f2fs_do_write_data_page(struct f2fs_io_info
*fio
)
2557 struct page
*page
= fio
->page
;
2558 struct inode
*inode
= page
->mapping
->host
;
2559 struct dnode_of_data dn
;
2560 struct extent_info ei
= {0,0,0};
2561 struct node_info ni
;
2562 bool ipu_force
= false;
2565 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
2566 if (need_inplace_update(fio
) &&
2567 f2fs_lookup_extent_cache(inode
, page
->index
, &ei
)) {
2568 fio
->old_blkaddr
= ei
.blk
+ page
->index
- ei
.fofs
;
2570 if (!f2fs_is_valid_blkaddr(fio
->sbi
, fio
->old_blkaddr
,
2571 DATA_GENERIC_ENHANCE
))
2572 return -EFSCORRUPTED
;
2575 fio
->need_lock
= LOCK_DONE
;
2579 /* Deadlock due to between page->lock and f2fs_lock_op */
2580 if (fio
->need_lock
== LOCK_REQ
&& !f2fs_trylock_op(fio
->sbi
))
2583 err
= f2fs_get_dnode_of_data(&dn
, page
->index
, LOOKUP_NODE
);
2587 fio
->old_blkaddr
= dn
.data_blkaddr
;
2589 /* This page is already truncated */
2590 if (fio
->old_blkaddr
== NULL_ADDR
) {
2591 ClearPageUptodate(page
);
2592 clear_cold_data(page
);
2596 if (__is_valid_data_blkaddr(fio
->old_blkaddr
) &&
2597 !f2fs_is_valid_blkaddr(fio
->sbi
, fio
->old_blkaddr
,
2598 DATA_GENERIC_ENHANCE
)) {
2599 err
= -EFSCORRUPTED
;
2603 * If current allocation needs SSR,
2604 * it had better in-place writes for updated data.
2607 (__is_valid_data_blkaddr(fio
->old_blkaddr
) &&
2608 need_inplace_update(fio
))) {
2609 err
= f2fs_encrypt_one_page(fio
);
2613 set_page_writeback(page
);
2614 ClearPageError(page
);
2615 f2fs_put_dnode(&dn
);
2616 if (fio
->need_lock
== LOCK_REQ
)
2617 f2fs_unlock_op(fio
->sbi
);
2618 err
= f2fs_inplace_write_data(fio
);
2620 if (fscrypt_inode_uses_fs_layer_crypto(inode
))
2621 fscrypt_finalize_bounce_page(&fio
->encrypted_page
);
2622 if (PageWriteback(page
))
2623 end_page_writeback(page
);
2625 set_inode_flag(inode
, FI_UPDATE_WRITE
);
2627 trace_f2fs_do_write_data_page(fio
->page
, IPU
);
2631 if (fio
->need_lock
== LOCK_RETRY
) {
2632 if (!f2fs_trylock_op(fio
->sbi
)) {
2636 fio
->need_lock
= LOCK_REQ
;
2639 err
= f2fs_get_node_info(fio
->sbi
, dn
.nid
, &ni
);
2643 fio
->version
= ni
.version
;
2645 err
= f2fs_encrypt_one_page(fio
);
2649 set_page_writeback(page
);
2650 ClearPageError(page
);
2652 if (fio
->compr_blocks
&& fio
->old_blkaddr
== COMPRESS_ADDR
)
2653 f2fs_i_compr_blocks_update(inode
, fio
->compr_blocks
- 1, false);
2655 /* LFS mode write path */
2656 f2fs_outplace_write_data(&dn
, fio
);
2657 trace_f2fs_do_write_data_page(page
, OPU
);
2658 set_inode_flag(inode
, FI_APPEND_WRITE
);
2659 if (page
->index
== 0)
2660 set_inode_flag(inode
, FI_FIRST_BLOCK_WRITTEN
);
2662 f2fs_put_dnode(&dn
);
2664 if (fio
->need_lock
== LOCK_REQ
)
2665 f2fs_unlock_op(fio
->sbi
);
2669 int f2fs_write_single_data_page(struct page
*page
, int *submitted
,
2671 sector_t
*last_block
,
2672 struct writeback_control
*wbc
,
2673 enum iostat_type io_type
,
2676 struct inode
*inode
= page
->mapping
->host
;
2677 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
2678 loff_t i_size
= i_size_read(inode
);
2679 const pgoff_t end_index
= ((unsigned long long)i_size
)
2681 loff_t psize
= (loff_t
)(page
->index
+ 1) << PAGE_SHIFT
;
2682 unsigned offset
= 0;
2683 bool need_balance_fs
= false;
2685 struct f2fs_io_info fio
= {
2687 .ino
= inode
->i_ino
,
2690 .op_flags
= wbc_to_write_flags(wbc
),
2691 .old_blkaddr
= NULL_ADDR
,
2693 .encrypted_page
= NULL
,
2695 .compr_blocks
= compr_blocks
,
2696 .need_lock
= LOCK_RETRY
,
2700 .last_block
= last_block
,
2703 trace_f2fs_writepage(page
, DATA
);
2705 /* we should bypass data pages to proceed the kworkder jobs */
2706 if (unlikely(f2fs_cp_error(sbi
))) {
2707 mapping_set_error(page
->mapping
, -EIO
);
2709 * don't drop any dirty dentry pages for keeping lastest
2710 * directory structure.
2712 if (S_ISDIR(inode
->i_mode
))
2717 if (unlikely(is_sbi_flag_set(sbi
, SBI_POR_DOING
)))
2720 if (page
->index
< end_index
||
2721 f2fs_verity_in_progress(inode
) ||
2726 * If the offset is out-of-range of file size,
2727 * this page does not have to be written to disk.
2729 offset
= i_size
& (PAGE_SIZE
- 1);
2730 if ((page
->index
>= end_index
+ 1) || !offset
)
2733 zero_user_segment(page
, offset
, PAGE_SIZE
);
2735 if (f2fs_is_drop_cache(inode
))
2737 /* we should not write 0'th page having journal header */
2738 if (f2fs_is_volatile_file(inode
) && (!page
->index
||
2739 (!wbc
->for_reclaim
&&
2740 f2fs_available_free_memory(sbi
, BASE_CHECK
))))
2743 /* Dentry/quota blocks are controlled by checkpoint */
2744 if (S_ISDIR(inode
->i_mode
) || IS_NOQUOTA(inode
)) {
2746 * We need to wait for node_write to avoid block allocation during
2747 * checkpoint. This can only happen to quota writes which can cause
2748 * the below discard race condition.
2750 if (IS_NOQUOTA(inode
))
2751 down_read(&sbi
->node_write
);
2753 fio
.need_lock
= LOCK_DONE
;
2754 err
= f2fs_do_write_data_page(&fio
);
2756 if (IS_NOQUOTA(inode
))
2757 up_read(&sbi
->node_write
);
2762 if (!wbc
->for_reclaim
)
2763 need_balance_fs
= true;
2764 else if (has_not_enough_free_secs(sbi
, 0, 0))
2767 set_inode_flag(inode
, FI_HOT_DATA
);
2770 if (f2fs_has_inline_data(inode
)) {
2771 err
= f2fs_write_inline_data(inode
, page
);
2776 if (err
== -EAGAIN
) {
2777 err
= f2fs_do_write_data_page(&fio
);
2778 if (err
== -EAGAIN
) {
2779 fio
.need_lock
= LOCK_REQ
;
2780 err
= f2fs_do_write_data_page(&fio
);
2785 file_set_keep_isize(inode
);
2787 spin_lock(&F2FS_I(inode
)->i_size_lock
);
2788 if (F2FS_I(inode
)->last_disk_size
< psize
)
2789 F2FS_I(inode
)->last_disk_size
= psize
;
2790 spin_unlock(&F2FS_I(inode
)->i_size_lock
);
2794 if (err
&& err
!= -ENOENT
)
2798 inode_dec_dirty_pages(inode
);
2800 ClearPageUptodate(page
);
2801 clear_cold_data(page
);
2804 if (wbc
->for_reclaim
) {
2805 f2fs_submit_merged_write_cond(sbi
, NULL
, page
, 0, DATA
);
2806 clear_inode_flag(inode
, FI_HOT_DATA
);
2807 f2fs_remove_dirty_inode(inode
);
2811 if (!S_ISDIR(inode
->i_mode
) && !IS_NOQUOTA(inode
) &&
2812 !F2FS_I(inode
)->cp_task
)
2813 f2fs_balance_fs(sbi
, need_balance_fs
);
2815 if (unlikely(f2fs_cp_error(sbi
))) {
2816 f2fs_submit_merged_write(sbi
, DATA
);
2817 f2fs_submit_merged_ipu_write(sbi
, bio
, NULL
);
2822 *submitted
= fio
.submitted
? 1 : 0;
2827 redirty_page_for_writepage(wbc
, page
);
2829 * pageout() in MM traslates EAGAIN, so calls handle_write_error()
2830 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
2831 * file_write_and_wait_range() will see EIO error, which is critical
2832 * to return value of fsync() followed by atomic_write failure to user.
2834 if (!err
|| wbc
->for_reclaim
)
2835 return AOP_WRITEPAGE_ACTIVATE
;
2840 static int f2fs_write_data_page(struct page
*page
,
2841 struct writeback_control
*wbc
)
2843 #ifdef CONFIG_F2FS_FS_COMPRESSION
2844 struct inode
*inode
= page
->mapping
->host
;
2846 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode
))))
2849 if (f2fs_compressed_file(inode
)) {
2850 if (f2fs_is_compressed_cluster(inode
, page
->index
)) {
2851 redirty_page_for_writepage(wbc
, page
);
2852 return AOP_WRITEPAGE_ACTIVATE
;
2858 return f2fs_write_single_data_page(page
, NULL
, NULL
, NULL
,
2859 wbc
, FS_DATA_IO
, 0);
2863 * This function was copied from write_cche_pages from mm/page-writeback.c.
2864 * The major change is making write step of cold data page separately from
2865 * warm/hot data page.
2867 static int f2fs_write_cache_pages(struct address_space
*mapping
,
2868 struct writeback_control
*wbc
,
2869 enum iostat_type io_type
)
2872 int done
= 0, retry
= 0;
2873 struct pagevec pvec
;
2874 struct f2fs_sb_info
*sbi
= F2FS_M_SB(mapping
);
2875 struct bio
*bio
= NULL
;
2876 sector_t last_block
;
2877 #ifdef CONFIG_F2FS_FS_COMPRESSION
2878 struct inode
*inode
= mapping
->host
;
2879 struct compress_ctx cc
= {
2881 .log_cluster_size
= F2FS_I(inode
)->i_log_cluster_size
,
2882 .cluster_size
= F2FS_I(inode
)->i_cluster_size
,
2883 .cluster_idx
= NULL_CLUSTER
,
2889 .rlen
= PAGE_SIZE
* F2FS_I(inode
)->i_cluster_size
,
2895 pgoff_t end
; /* Inclusive */
2897 int range_whole
= 0;
2903 pagevec_init(&pvec
);
2905 if (get_dirty_pages(mapping
->host
) <=
2906 SM_I(F2FS_M_SB(mapping
))->min_hot_blocks
)
2907 set_inode_flag(mapping
->host
, FI_HOT_DATA
);
2909 clear_inode_flag(mapping
->host
, FI_HOT_DATA
);
2911 if (wbc
->range_cyclic
) {
2912 index
= mapping
->writeback_index
; /* prev offset */
2915 index
= wbc
->range_start
>> PAGE_SHIFT
;
2916 end
= wbc
->range_end
>> PAGE_SHIFT
;
2917 if (wbc
->range_start
== 0 && wbc
->range_end
== LLONG_MAX
)
2920 if (wbc
->sync_mode
== WB_SYNC_ALL
|| wbc
->tagged_writepages
)
2921 tag
= PAGECACHE_TAG_TOWRITE
;
2923 tag
= PAGECACHE_TAG_DIRTY
;
2926 if (wbc
->sync_mode
== WB_SYNC_ALL
|| wbc
->tagged_writepages
)
2927 tag_pages_for_writeback(mapping
, index
, end
);
2929 while (!done
&& !retry
&& (index
<= end
)) {
2930 nr_pages
= pagevec_lookup_range_tag(&pvec
, mapping
, &index
, end
,
2935 for (i
= 0; i
< nr_pages
; i
++) {
2936 struct page
*page
= pvec
.pages
[i
];
2940 #ifdef CONFIG_F2FS_FS_COMPRESSION
2941 if (f2fs_compressed_file(inode
)) {
2942 ret
= f2fs_init_compress_ctx(&cc
);
2948 if (!f2fs_cluster_can_merge_page(&cc
,
2950 ret
= f2fs_write_multi_pages(&cc
,
2951 &submitted
, wbc
, io_type
);
2957 if (unlikely(f2fs_cp_error(sbi
)))
2960 if (f2fs_cluster_is_empty(&cc
)) {
2961 void *fsdata
= NULL
;
2965 ret2
= f2fs_prepare_compress_overwrite(
2967 page
->index
, &fsdata
);
2973 !f2fs_compress_write_end(inode
,
2974 fsdata
, page
->index
,
2984 /* give a priority to WB_SYNC threads */
2985 if (atomic_read(&sbi
->wb_sync_req
[DATA
]) &&
2986 wbc
->sync_mode
== WB_SYNC_NONE
) {
2990 #ifdef CONFIG_F2FS_FS_COMPRESSION
2993 done_index
= page
->index
;
2997 if (unlikely(page
->mapping
!= mapping
)) {
3003 if (!PageDirty(page
)) {
3004 /* someone wrote it for us */
3005 goto continue_unlock
;
3008 if (PageWriteback(page
)) {
3009 if (wbc
->sync_mode
!= WB_SYNC_NONE
)
3010 f2fs_wait_on_page_writeback(page
,
3013 goto continue_unlock
;
3016 if (!clear_page_dirty_for_io(page
))
3017 goto continue_unlock
;
3019 #ifdef CONFIG_F2FS_FS_COMPRESSION
3020 if (f2fs_compressed_file(inode
)) {
3022 f2fs_compress_ctx_add_page(&cc
, page
);
3026 ret
= f2fs_write_single_data_page(page
, &submitted
,
3027 &bio
, &last_block
, wbc
, io_type
, 0);
3028 if (ret
== AOP_WRITEPAGE_ACTIVATE
)
3030 #ifdef CONFIG_F2FS_FS_COMPRESSION
3033 nwritten
+= submitted
;
3034 wbc
->nr_to_write
-= submitted
;
3036 if (unlikely(ret
)) {
3038 * keep nr_to_write, since vfs uses this to
3039 * get # of written pages.
3041 if (ret
== AOP_WRITEPAGE_ACTIVATE
) {
3044 } else if (ret
== -EAGAIN
) {
3046 if (wbc
->sync_mode
== WB_SYNC_ALL
) {
3048 congestion_wait(BLK_RW_ASYNC
,
3049 DEFAULT_IO_TIMEOUT
);
3054 done_index
= page
->index
+ 1;
3059 if (wbc
->nr_to_write
<= 0 &&
3060 wbc
->sync_mode
== WB_SYNC_NONE
) {
3068 pagevec_release(&pvec
);
3071 #ifdef CONFIG_F2FS_FS_COMPRESSION
3072 /* flush remained pages in compress cluster */
3073 if (f2fs_compressed_file(inode
) && !f2fs_cluster_is_empty(&cc
)) {
3074 ret
= f2fs_write_multi_pages(&cc
, &submitted
, wbc
, io_type
);
3075 nwritten
+= submitted
;
3076 wbc
->nr_to_write
-= submitted
;
3082 if (f2fs_compressed_file(inode
))
3083 f2fs_destroy_compress_ctx(&cc
);
3090 if (wbc
->range_cyclic
&& !done
)
3092 if (wbc
->range_cyclic
|| (range_whole
&& wbc
->nr_to_write
> 0))
3093 mapping
->writeback_index
= done_index
;
3096 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping
), mapping
->host
,
3098 /* submit cached bio of IPU write */
3100 f2fs_submit_merged_ipu_write(sbi
, &bio
, NULL
);
3105 static inline bool __should_serialize_io(struct inode
*inode
,
3106 struct writeback_control
*wbc
)
3108 /* to avoid deadlock in path of data flush */
3109 if (F2FS_I(inode
)->cp_task
)
3112 if (!S_ISREG(inode
->i_mode
))
3114 if (IS_NOQUOTA(inode
))
3117 if (f2fs_need_compress_data(inode
))
3119 if (wbc
->sync_mode
!= WB_SYNC_ALL
)
3121 if (get_dirty_pages(inode
) >= SM_I(F2FS_I_SB(inode
))->min_seq_blocks
)
3126 static int __f2fs_write_data_pages(struct address_space
*mapping
,
3127 struct writeback_control
*wbc
,
3128 enum iostat_type io_type
)
3130 struct inode
*inode
= mapping
->host
;
3131 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3132 struct blk_plug plug
;
3134 bool locked
= false;
3136 /* deal with chardevs and other special file */
3137 if (!mapping
->a_ops
->writepage
)
3140 /* skip writing if there is no dirty page in this inode */
3141 if (!get_dirty_pages(inode
) && wbc
->sync_mode
== WB_SYNC_NONE
)
3144 /* during POR, we don't need to trigger writepage at all. */
3145 if (unlikely(is_sbi_flag_set(sbi
, SBI_POR_DOING
)))
3148 if ((S_ISDIR(inode
->i_mode
) || IS_NOQUOTA(inode
)) &&
3149 wbc
->sync_mode
== WB_SYNC_NONE
&&
3150 get_dirty_pages(inode
) < nr_pages_to_skip(sbi
, DATA
) &&
3151 f2fs_available_free_memory(sbi
, DIRTY_DENTS
))
3154 /* skip writing during file defragment */
3155 if (is_inode_flag_set(inode
, FI_DO_DEFRAG
))
3158 trace_f2fs_writepages(mapping
->host
, wbc
, DATA
);
3160 /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
3161 if (wbc
->sync_mode
== WB_SYNC_ALL
)
3162 atomic_inc(&sbi
->wb_sync_req
[DATA
]);
3163 else if (atomic_read(&sbi
->wb_sync_req
[DATA
]))
3166 if (__should_serialize_io(inode
, wbc
)) {
3167 mutex_lock(&sbi
->writepages
);
3171 blk_start_plug(&plug
);
3172 ret
= f2fs_write_cache_pages(mapping
, wbc
, io_type
);
3173 blk_finish_plug(&plug
);
3176 mutex_unlock(&sbi
->writepages
);
3178 if (wbc
->sync_mode
== WB_SYNC_ALL
)
3179 atomic_dec(&sbi
->wb_sync_req
[DATA
]);
3181 * if some pages were truncated, we cannot guarantee its mapping->host
3182 * to detect pending bios.
3185 f2fs_remove_dirty_inode(inode
);
3189 wbc
->pages_skipped
+= get_dirty_pages(inode
);
3190 trace_f2fs_writepages(mapping
->host
, wbc
, DATA
);
3194 static int f2fs_write_data_pages(struct address_space
*mapping
,
3195 struct writeback_control
*wbc
)
3197 struct inode
*inode
= mapping
->host
;
3199 return __f2fs_write_data_pages(mapping
, wbc
,
3200 F2FS_I(inode
)->cp_task
== current
?
3201 FS_CP_DATA_IO
: FS_DATA_IO
);
3204 static void f2fs_write_failed(struct address_space
*mapping
, loff_t to
)
3206 struct inode
*inode
= mapping
->host
;
3207 loff_t i_size
= i_size_read(inode
);
3209 if (IS_NOQUOTA(inode
))
3212 /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
3213 if (to
> i_size
&& !f2fs_verity_in_progress(inode
)) {
3214 down_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
3215 down_write(&F2FS_I(inode
)->i_mmap_sem
);
3217 truncate_pagecache(inode
, i_size
);
3218 f2fs_truncate_blocks(inode
, i_size
, true);
3220 up_write(&F2FS_I(inode
)->i_mmap_sem
);
3221 up_write(&F2FS_I(inode
)->i_gc_rwsem
[WRITE
]);
3225 static int prepare_write_begin(struct f2fs_sb_info
*sbi
,
3226 struct page
*page
, loff_t pos
, unsigned len
,
3227 block_t
*blk_addr
, bool *node_changed
)
3229 struct inode
*inode
= page
->mapping
->host
;
3230 pgoff_t index
= page
->index
;
3231 struct dnode_of_data dn
;
3233 bool locked
= false;
3234 struct extent_info ei
= {0,0,0};
3239 * we already allocated all the blocks, so we don't need to get
3240 * the block addresses when there is no need to fill the page.
3242 if (!f2fs_has_inline_data(inode
) && len
== PAGE_SIZE
&&
3243 !is_inode_flag_set(inode
, FI_NO_PREALLOC
) &&
3244 !f2fs_verity_in_progress(inode
))
3247 /* f2fs_lock_op avoids race between write CP and convert_inline_page */
3248 if (f2fs_has_inline_data(inode
) && pos
+ len
> MAX_INLINE_DATA(inode
))
3249 flag
= F2FS_GET_BLOCK_DEFAULT
;
3251 flag
= F2FS_GET_BLOCK_PRE_AIO
;
3253 if (f2fs_has_inline_data(inode
) ||
3254 (pos
& PAGE_MASK
) >= i_size_read(inode
)) {
3255 f2fs_do_map_lock(sbi
, flag
, true);
3260 /* check inline_data */
3261 ipage
= f2fs_get_node_page(sbi
, inode
->i_ino
);
3262 if (IS_ERR(ipage
)) {
3263 err
= PTR_ERR(ipage
);
3267 set_new_dnode(&dn
, inode
, ipage
, ipage
, 0);
3269 if (f2fs_has_inline_data(inode
)) {
3270 if (pos
+ len
<= MAX_INLINE_DATA(inode
)) {
3271 f2fs_do_read_inline_data(page
, ipage
);
3272 set_inode_flag(inode
, FI_DATA_EXIST
);
3274 set_inline_node(ipage
);
3276 err
= f2fs_convert_inline_page(&dn
, page
);
3279 if (dn
.data_blkaddr
== NULL_ADDR
)
3280 err
= f2fs_get_block(&dn
, index
);
3282 } else if (locked
) {
3283 err
= f2fs_get_block(&dn
, index
);
3285 if (f2fs_lookup_extent_cache(inode
, index
, &ei
)) {
3286 dn
.data_blkaddr
= ei
.blk
+ index
- ei
.fofs
;
3289 err
= f2fs_get_dnode_of_data(&dn
, index
, LOOKUP_NODE
);
3290 if (err
|| dn
.data_blkaddr
== NULL_ADDR
) {
3291 f2fs_put_dnode(&dn
);
3292 f2fs_do_map_lock(sbi
, F2FS_GET_BLOCK_PRE_AIO
,
3294 WARN_ON(flag
!= F2FS_GET_BLOCK_PRE_AIO
);
3301 /* convert_inline_page can make node_changed */
3302 *blk_addr
= dn
.data_blkaddr
;
3303 *node_changed
= dn
.node_changed
;
3305 f2fs_put_dnode(&dn
);
3308 f2fs_do_map_lock(sbi
, flag
, false);
3312 static int f2fs_write_begin(struct file
*file
, struct address_space
*mapping
,
3313 loff_t pos
, unsigned len
, unsigned flags
,
3314 struct page
**pagep
, void **fsdata
)
3316 struct inode
*inode
= mapping
->host
;
3317 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3318 struct page
*page
= NULL
;
3319 pgoff_t index
= ((unsigned long long) pos
) >> PAGE_SHIFT
;
3320 bool need_balance
= false, drop_atomic
= false;
3321 block_t blkaddr
= NULL_ADDR
;
3324 trace_f2fs_write_begin(inode
, pos
, len
, flags
);
3326 if (!f2fs_is_checkpoint_ready(sbi
)) {
3331 if ((f2fs_is_atomic_file(inode
) &&
3332 !f2fs_available_free_memory(sbi
, INMEM_PAGES
)) ||
3333 is_inode_flag_set(inode
, FI_ATOMIC_REVOKE_REQUEST
)) {
3340 * We should check this at this moment to avoid deadlock on inode page
3341 * and #0 page. The locking rule for inline_data conversion should be:
3342 * lock_page(page #0) -> lock_page(inode_page)
3345 err
= f2fs_convert_inline_inode(inode
);
3350 #ifdef CONFIG_F2FS_FS_COMPRESSION
3351 if (f2fs_compressed_file(inode
)) {
3356 ret
= f2fs_prepare_compress_overwrite(inode
, pagep
,
3369 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
3370 * wait_for_stable_page. Will wait that below with our IO control.
3372 page
= f2fs_pagecache_get_page(mapping
, index
,
3373 FGP_LOCK
| FGP_WRITE
| FGP_CREAT
, GFP_NOFS
);
3379 /* TODO: cluster can be compressed due to race with .writepage */
3383 err
= prepare_write_begin(sbi
, page
, pos
, len
,
3384 &blkaddr
, &need_balance
);
3388 if (need_balance
&& !IS_NOQUOTA(inode
) &&
3389 has_not_enough_free_secs(sbi
, 0, 0)) {
3391 f2fs_balance_fs(sbi
, true);
3393 if (page
->mapping
!= mapping
) {
3394 /* The page got truncated from under us */
3395 f2fs_put_page(page
, 1);
3400 f2fs_wait_on_page_writeback(page
, DATA
, false, true);
3402 if (len
== PAGE_SIZE
|| PageUptodate(page
))
3405 if (!(pos
& (PAGE_SIZE
- 1)) && (pos
+ len
) >= i_size_read(inode
) &&
3406 !f2fs_verity_in_progress(inode
)) {
3407 zero_user_segment(page
, len
, PAGE_SIZE
);
3411 if (blkaddr
== NEW_ADDR
) {
3412 zero_user_segment(page
, 0, PAGE_SIZE
);
3413 SetPageUptodate(page
);
3415 if (!f2fs_is_valid_blkaddr(sbi
, blkaddr
,
3416 DATA_GENERIC_ENHANCE_READ
)) {
3417 err
= -EFSCORRUPTED
;
3420 err
= f2fs_submit_page_read(inode
, page
, blkaddr
, 0, true);
3425 if (unlikely(page
->mapping
!= mapping
)) {
3426 f2fs_put_page(page
, 1);
3429 if (unlikely(!PageUptodate(page
))) {
3437 f2fs_put_page(page
, 1);
3438 f2fs_write_failed(mapping
, pos
+ len
);
3440 f2fs_drop_inmem_pages_all(sbi
, false);
3444 static int f2fs_write_end(struct file
*file
,
3445 struct address_space
*mapping
,
3446 loff_t pos
, unsigned len
, unsigned copied
,
3447 struct page
*page
, void *fsdata
)
3449 struct inode
*inode
= page
->mapping
->host
;
3451 trace_f2fs_write_end(inode
, pos
, len
, copied
);
3454 * This should be come from len == PAGE_SIZE, and we expect copied
3455 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3456 * let generic_perform_write() try to copy data again through copied=0.
3458 if (!PageUptodate(page
)) {
3459 if (unlikely(copied
!= len
))
3462 SetPageUptodate(page
);
3465 #ifdef CONFIG_F2FS_FS_COMPRESSION
3466 /* overwrite compressed file */
3467 if (f2fs_compressed_file(inode
) && fsdata
) {
3468 f2fs_compress_write_end(inode
, fsdata
, page
->index
, copied
);
3469 f2fs_update_time(F2FS_I_SB(inode
), REQ_TIME
);
3471 if (pos
+ copied
> i_size_read(inode
) &&
3472 !f2fs_verity_in_progress(inode
))
3473 f2fs_i_size_write(inode
, pos
+ copied
);
3481 set_page_dirty(page
);
3483 if (pos
+ copied
> i_size_read(inode
) &&
3484 !f2fs_verity_in_progress(inode
))
3485 f2fs_i_size_write(inode
, pos
+ copied
);
3487 f2fs_put_page(page
, 1);
3488 f2fs_update_time(F2FS_I_SB(inode
), REQ_TIME
);
3492 static int check_direct_IO(struct inode
*inode
, struct iov_iter
*iter
,
3495 unsigned i_blkbits
= READ_ONCE(inode
->i_blkbits
);
3496 unsigned blkbits
= i_blkbits
;
3497 unsigned blocksize_mask
= (1 << blkbits
) - 1;
3498 unsigned long align
= offset
| iov_iter_alignment(iter
);
3499 struct block_device
*bdev
= inode
->i_sb
->s_bdev
;
3501 if (iov_iter_rw(iter
) == READ
&& offset
>= i_size_read(inode
))
3504 if (align
& blocksize_mask
) {
3506 blkbits
= blksize_bits(bdev_logical_block_size(bdev
));
3507 blocksize_mask
= (1 << blkbits
) - 1;
3508 if (align
& blocksize_mask
)
3515 static void f2fs_dio_end_io(struct bio
*bio
)
3517 struct f2fs_private_dio
*dio
= bio
->bi_private
;
3519 dec_page_count(F2FS_I_SB(dio
->inode
),
3520 dio
->write
? F2FS_DIO_WRITE
: F2FS_DIO_READ
);
3522 bio
->bi_private
= dio
->orig_private
;
3523 bio
->bi_end_io
= dio
->orig_end_io
;
3530 static void f2fs_dio_submit_bio(struct bio
*bio
, struct inode
*inode
,
3533 struct f2fs_private_dio
*dio
;
3534 bool write
= (bio_op(bio
) == REQ_OP_WRITE
);
3536 dio
= f2fs_kzalloc(F2FS_I_SB(inode
),
3537 sizeof(struct f2fs_private_dio
), GFP_NOFS
);
3542 dio
->orig_end_io
= bio
->bi_end_io
;
3543 dio
->orig_private
= bio
->bi_private
;
3546 bio
->bi_end_io
= f2fs_dio_end_io
;
3547 bio
->bi_private
= dio
;
3549 inc_page_count(F2FS_I_SB(inode
),
3550 write
? F2FS_DIO_WRITE
: F2FS_DIO_READ
);
3555 bio
->bi_status
= BLK_STS_IOERR
;
3559 static ssize_t
f2fs_direct_IO(struct kiocb
*iocb
, struct iov_iter
*iter
)
3561 struct address_space
*mapping
= iocb
->ki_filp
->f_mapping
;
3562 struct inode
*inode
= mapping
->host
;
3563 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3564 struct f2fs_inode_info
*fi
= F2FS_I(inode
);
3565 size_t count
= iov_iter_count(iter
);
3566 loff_t offset
= iocb
->ki_pos
;
3567 int rw
= iov_iter_rw(iter
);
3569 enum rw_hint hint
= iocb
->ki_hint
;
3570 int whint_mode
= F2FS_OPTION(sbi
).whint_mode
;
3573 err
= check_direct_IO(inode
, iter
, offset
);
3575 return err
< 0 ? err
: 0;
3577 if (f2fs_force_buffered_io(inode
, iocb
, iter
))
3580 do_opu
= allow_outplace_dio(inode
, iocb
, iter
);
3582 trace_f2fs_direct_IO_enter(inode
, offset
, count
, rw
);
3584 if (rw
== WRITE
&& whint_mode
== WHINT_MODE_OFF
)
3585 iocb
->ki_hint
= WRITE_LIFE_NOT_SET
;
3587 if (iocb
->ki_flags
& IOCB_NOWAIT
) {
3588 if (!down_read_trylock(&fi
->i_gc_rwsem
[rw
])) {
3589 iocb
->ki_hint
= hint
;
3593 if (do_opu
&& !down_read_trylock(&fi
->i_gc_rwsem
[READ
])) {
3594 up_read(&fi
->i_gc_rwsem
[rw
]);
3595 iocb
->ki_hint
= hint
;
3600 down_read(&fi
->i_gc_rwsem
[rw
]);
3602 down_read(&fi
->i_gc_rwsem
[READ
]);
3605 err
= __blockdev_direct_IO(iocb
, inode
, inode
->i_sb
->s_bdev
,
3606 iter
, rw
== WRITE
? get_data_block_dio_write
:
3607 get_data_block_dio
, NULL
, f2fs_dio_submit_bio
,
3608 rw
== WRITE
? DIO_LOCKING
| DIO_SKIP_HOLES
:
3612 up_read(&fi
->i_gc_rwsem
[READ
]);
3614 up_read(&fi
->i_gc_rwsem
[rw
]);
3617 if (whint_mode
== WHINT_MODE_OFF
)
3618 iocb
->ki_hint
= hint
;
3620 f2fs_update_iostat(F2FS_I_SB(inode
), APP_DIRECT_IO
,
3623 set_inode_flag(inode
, FI_UPDATE_WRITE
);
3624 } else if (err
== -EIOCBQUEUED
) {
3625 f2fs_update_iostat(F2FS_I_SB(inode
), APP_DIRECT_IO
,
3626 count
- iov_iter_count(iter
));
3627 } else if (err
< 0) {
3628 f2fs_write_failed(mapping
, offset
+ count
);
3632 f2fs_update_iostat(sbi
, APP_DIRECT_READ_IO
, err
);
3633 else if (err
== -EIOCBQUEUED
)
3634 f2fs_update_iostat(F2FS_I_SB(inode
), APP_DIRECT_READ_IO
,
3635 count
- iov_iter_count(iter
));
3639 trace_f2fs_direct_IO_exit(inode
, offset
, count
, rw
, err
);
3644 void f2fs_invalidate_page(struct page
*page
, unsigned int offset
,
3645 unsigned int length
)
3647 struct inode
*inode
= page
->mapping
->host
;
3648 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
3650 if (inode
->i_ino
>= F2FS_ROOT_INO(sbi
) &&
3651 (offset
% PAGE_SIZE
|| length
!= PAGE_SIZE
))
3654 if (PageDirty(page
)) {
3655 if (inode
->i_ino
== F2FS_META_INO(sbi
)) {
3656 dec_page_count(sbi
, F2FS_DIRTY_META
);
3657 } else if (inode
->i_ino
== F2FS_NODE_INO(sbi
)) {
3658 dec_page_count(sbi
, F2FS_DIRTY_NODES
);
3660 inode_dec_dirty_pages(inode
);
3661 f2fs_remove_dirty_inode(inode
);
3665 clear_cold_data(page
);
3667 if (IS_ATOMIC_WRITTEN_PAGE(page
))
3668 return f2fs_drop_inmem_page(inode
, page
);
3670 f2fs_clear_page_private(page
);
3673 int f2fs_release_page(struct page
*page
, gfp_t wait
)
3675 /* If this is dirty page, keep PagePrivate */
3676 if (PageDirty(page
))
3679 /* This is atomic written page, keep Private */
3680 if (IS_ATOMIC_WRITTEN_PAGE(page
))
3683 clear_cold_data(page
);
3684 f2fs_clear_page_private(page
);
3688 static int f2fs_set_data_page_dirty(struct page
*page
)
3690 struct inode
*inode
= page_file_mapping(page
)->host
;
3692 trace_f2fs_set_page_dirty(page
, DATA
);
3694 if (!PageUptodate(page
))
3695 SetPageUptodate(page
);
3696 if (PageSwapCache(page
))
3697 return __set_page_dirty_nobuffers(page
);
3699 if (f2fs_is_atomic_file(inode
) && !f2fs_is_commit_atomic_write(inode
)) {
3700 if (!IS_ATOMIC_WRITTEN_PAGE(page
)) {
3701 f2fs_register_inmem_page(inode
, page
);
3705 * Previously, this page has been registered, we just
3711 if (!PageDirty(page
)) {
3712 __set_page_dirty_nobuffers(page
);
3713 f2fs_update_dirty_page(inode
, page
);
3720 static sector_t
f2fs_bmap_compress(struct inode
*inode
, sector_t block
)
3722 #ifdef CONFIG_F2FS_FS_COMPRESSION
3723 struct dnode_of_data dn
;
3724 sector_t start_idx
, blknr
= 0;
3727 start_idx
= round_down(block
, F2FS_I(inode
)->i_cluster_size
);
3729 set_new_dnode(&dn
, inode
, NULL
, NULL
, 0);
3730 ret
= f2fs_get_dnode_of_data(&dn
, start_idx
, LOOKUP_NODE
);
3734 if (dn
.data_blkaddr
!= COMPRESS_ADDR
) {
3735 dn
.ofs_in_node
+= block
- start_idx
;
3736 blknr
= f2fs_data_blkaddr(&dn
);
3737 if (!__is_valid_data_blkaddr(blknr
))
3741 f2fs_put_dnode(&dn
);
3749 static sector_t
f2fs_bmap(struct address_space
*mapping
, sector_t block
)
3751 struct inode
*inode
= mapping
->host
;
3754 if (f2fs_has_inline_data(inode
))
3757 /* make sure allocating whole blocks */
3758 if (mapping_tagged(mapping
, PAGECACHE_TAG_DIRTY
))
3759 filemap_write_and_wait(mapping
);
3761 /* Block number less than F2FS MAX BLOCKS */
3762 if (unlikely(block
>= F2FS_I_SB(inode
)->max_file_blocks
))
3765 if (f2fs_compressed_file(inode
)) {
3766 blknr
= f2fs_bmap_compress(inode
, block
);
3768 struct f2fs_map_blocks map
;
3770 memset(&map
, 0, sizeof(map
));
3773 map
.m_next_pgofs
= NULL
;
3774 map
.m_seg_type
= NO_CHECK_TYPE
;
3776 if (!f2fs_map_blocks(inode
, &map
, 0, F2FS_GET_BLOCK_BMAP
))
3780 trace_f2fs_bmap(inode
, block
, blknr
);
3784 #ifdef CONFIG_MIGRATION
3785 #include <linux/migrate.h>
3787 int f2fs_migrate_page(struct address_space
*mapping
,
3788 struct page
*newpage
, struct page
*page
, enum migrate_mode mode
)
3790 int rc
, extra_count
;
3791 struct f2fs_inode_info
*fi
= F2FS_I(mapping
->host
);
3792 bool atomic_written
= IS_ATOMIC_WRITTEN_PAGE(page
);
3794 BUG_ON(PageWriteback(page
));
3796 /* migrating an atomic written page is safe with the inmem_lock hold */
3797 if (atomic_written
) {
3798 if (mode
!= MIGRATE_SYNC
)
3800 if (!mutex_trylock(&fi
->inmem_lock
))
3804 /* one extra reference was held for atomic_write page */
3805 extra_count
= atomic_written
? 1 : 0;
3806 rc
= migrate_page_move_mapping(mapping
, newpage
,
3808 if (rc
!= MIGRATEPAGE_SUCCESS
) {
3810 mutex_unlock(&fi
->inmem_lock
);
3814 if (atomic_written
) {
3815 struct inmem_pages
*cur
;
3816 list_for_each_entry(cur
, &fi
->inmem_pages
, list
)
3817 if (cur
->page
== page
) {
3818 cur
->page
= newpage
;
3821 mutex_unlock(&fi
->inmem_lock
);
3826 if (PagePrivate(page
)) {
3827 f2fs_set_page_private(newpage
, page_private(page
));
3828 f2fs_clear_page_private(page
);
3831 if (mode
!= MIGRATE_SYNC_NO_COPY
)
3832 migrate_page_copy(newpage
, page
);
3834 migrate_page_states(newpage
, page
);
3836 return MIGRATEPAGE_SUCCESS
;
3841 static int check_swap_activate_fast(struct swap_info_struct
*sis
,
3842 struct file
*swap_file
, sector_t
*span
)
3844 struct address_space
*mapping
= swap_file
->f_mapping
;
3845 struct inode
*inode
= mapping
->host
;
3846 sector_t cur_lblock
;
3847 sector_t last_lblock
;
3849 sector_t lowest_pblock
= -1;
3850 sector_t highest_pblock
= 0;
3852 unsigned long nr_pblocks
;
3857 * Map all the blocks into the extent list. This code doesn't try
3861 last_lblock
= bytes_to_blks(inode
, i_size_read(inode
));
3862 len
= i_size_read(inode
);
3864 while (cur_lblock
<= last_lblock
&& cur_lblock
< sis
->max
) {
3865 struct f2fs_map_blocks map
;
3870 memset(&map
, 0, sizeof(map
));
3871 map
.m_lblk
= cur_lblock
;
3872 map
.m_len
= bytes_to_blks(inode
, len
) - cur_lblock
;
3873 map
.m_next_pgofs
= &next_pgofs
;
3874 map
.m_seg_type
= NO_CHECK_TYPE
;
3876 ret
= f2fs_map_blocks(inode
, &map
, 0, F2FS_GET_BLOCK_FIEMAP
);
3881 if (!(map
.m_flags
& F2FS_MAP_FLAGS
))
3884 pblock
= map
.m_pblk
;
3885 nr_pblocks
= map
.m_len
;
3887 if (cur_lblock
+ nr_pblocks
>= sis
->max
)
3888 nr_pblocks
= sis
->max
- cur_lblock
;
3890 if (cur_lblock
) { /* exclude the header page */
3891 if (pblock
< lowest_pblock
)
3892 lowest_pblock
= pblock
;
3893 if (pblock
+ nr_pblocks
- 1 > highest_pblock
)
3894 highest_pblock
= pblock
+ nr_pblocks
- 1;
3898 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
3900 ret
= add_swap_extent(sis
, cur_lblock
, nr_pblocks
, pblock
);
3904 cur_lblock
+= nr_pblocks
;
3907 *span
= 1 + highest_pblock
- lowest_pblock
;
3908 if (cur_lblock
== 0)
3909 cur_lblock
= 1; /* force Empty message */
3910 sis
->max
= cur_lblock
;
3911 sis
->pages
= cur_lblock
- 1;
3912 sis
->highest_bit
= cur_lblock
- 1;
3916 pr_err("swapon: swapfile has holes\n");
3920 /* Copied from generic_swapfile_activate() to check any holes */
3921 static int check_swap_activate(struct swap_info_struct
*sis
,
3922 struct file
*swap_file
, sector_t
*span
)
3924 struct address_space
*mapping
= swap_file
->f_mapping
;
3925 struct inode
*inode
= mapping
->host
;
3926 unsigned blocks_per_page
;
3927 unsigned long page_no
;
3928 sector_t probe_block
;
3929 sector_t last_block
;
3930 sector_t lowest_block
= -1;
3931 sector_t highest_block
= 0;
3935 if (PAGE_SIZE
== F2FS_BLKSIZE
)
3936 return check_swap_activate_fast(sis
, swap_file
, span
);
3938 blocks_per_page
= bytes_to_blks(inode
, PAGE_SIZE
);
3941 * Map all the blocks into the extent list. This code doesn't try
3946 last_block
= bytes_to_blks(inode
, i_size_read(inode
));
3947 while ((probe_block
+ blocks_per_page
) <= last_block
&&
3948 page_no
< sis
->max
) {
3949 unsigned block_in_page
;
3950 sector_t first_block
;
3956 block
= probe_block
;
3957 err
= bmap(inode
, &block
);
3960 first_block
= block
;
3963 * It must be PAGE_SIZE aligned on-disk
3965 if (first_block
& (blocks_per_page
- 1)) {
3970 for (block_in_page
= 1; block_in_page
< blocks_per_page
;
3973 block
= probe_block
+ block_in_page
;
3974 err
= bmap(inode
, &block
);
3979 if (block
!= first_block
+ block_in_page
) {
3986 first_block
>>= (PAGE_SHIFT
- inode
->i_blkbits
);
3987 if (page_no
) { /* exclude the header page */
3988 if (first_block
< lowest_block
)
3989 lowest_block
= first_block
;
3990 if (first_block
> highest_block
)
3991 highest_block
= first_block
;
3995 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
3997 ret
= add_swap_extent(sis
, page_no
, 1, first_block
);
4002 probe_block
+= blocks_per_page
;
4007 *span
= 1 + highest_block
- lowest_block
;
4009 page_no
= 1; /* force Empty message */
4011 sis
->pages
= page_no
- 1;
4012 sis
->highest_bit
= page_no
- 1;
4016 pr_err("swapon: swapfile has holes\n");
4020 static int f2fs_swap_activate(struct swap_info_struct
*sis
, struct file
*file
,
4023 struct inode
*inode
= file_inode(file
);
4026 if (!S_ISREG(inode
->i_mode
))
4029 if (f2fs_readonly(F2FS_I_SB(inode
)->sb
))
4032 ret
= f2fs_convert_inline_inode(inode
);
4036 if (!f2fs_disable_compressed_file(inode
))
4039 f2fs_precache_extents(inode
);
4041 ret
= check_swap_activate(sis
, file
, span
);
4045 set_inode_flag(inode
, FI_PIN_FILE
);
4046 f2fs_update_time(F2FS_I_SB(inode
), REQ_TIME
);
4050 static void f2fs_swap_deactivate(struct file
*file
)
4052 struct inode
*inode
= file_inode(file
);
4054 clear_inode_flag(inode
, FI_PIN_FILE
);
4057 static int f2fs_swap_activate(struct swap_info_struct
*sis
, struct file
*file
,
4063 static void f2fs_swap_deactivate(struct file
*file
)
4068 const struct address_space_operations f2fs_dblock_aops
= {
4069 .readpage
= f2fs_read_data_page
,
4070 .readahead
= f2fs_readahead
,
4071 .writepage
= f2fs_write_data_page
,
4072 .writepages
= f2fs_write_data_pages
,
4073 .write_begin
= f2fs_write_begin
,
4074 .write_end
= f2fs_write_end
,
4075 .set_page_dirty
= f2fs_set_data_page_dirty
,
4076 .invalidatepage
= f2fs_invalidate_page
,
4077 .releasepage
= f2fs_release_page
,
4078 .direct_IO
= f2fs_direct_IO
,
4080 .swap_activate
= f2fs_swap_activate
,
4081 .swap_deactivate
= f2fs_swap_deactivate
,
4082 #ifdef CONFIG_MIGRATION
4083 .migratepage
= f2fs_migrate_page
,
4087 void f2fs_clear_page_cache_dirty_tag(struct page
*page
)
4089 struct address_space
*mapping
= page_mapping(page
);
4090 unsigned long flags
;
4092 xa_lock_irqsave(&mapping
->i_pages
, flags
);
4093 __xa_clear_mark(&mapping
->i_pages
, page_index(page
),
4094 PAGECACHE_TAG_DIRTY
);
4095 xa_unlock_irqrestore(&mapping
->i_pages
, flags
);
4098 int __init
f2fs_init_post_read_processing(void)
4100 bio_post_read_ctx_cache
=
4101 kmem_cache_create("f2fs_bio_post_read_ctx",
4102 sizeof(struct bio_post_read_ctx
), 0, 0, NULL
);
4103 if (!bio_post_read_ctx_cache
)
4105 bio_post_read_ctx_pool
=
4106 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS
,
4107 bio_post_read_ctx_cache
);
4108 if (!bio_post_read_ctx_pool
)
4109 goto fail_free_cache
;
4113 kmem_cache_destroy(bio_post_read_ctx_cache
);
4118 void f2fs_destroy_post_read_processing(void)
4120 mempool_destroy(bio_post_read_ctx_pool
);
4121 kmem_cache_destroy(bio_post_read_ctx_cache
);
4124 int f2fs_init_post_read_wq(struct f2fs_sb_info
*sbi
)
4126 if (!f2fs_sb_has_encrypt(sbi
) &&
4127 !f2fs_sb_has_verity(sbi
) &&
4128 !f2fs_sb_has_compression(sbi
))
4131 sbi
->post_read_wq
= alloc_workqueue("f2fs_post_read_wq",
4132 WQ_UNBOUND
| WQ_HIGHPRI
,
4134 if (!sbi
->post_read_wq
)
4139 void f2fs_destroy_post_read_wq(struct f2fs_sb_info
*sbi
)
4141 if (sbi
->post_read_wq
)
4142 destroy_workqueue(sbi
->post_read_wq
);
4145 int __init
f2fs_init_bio_entry_cache(void)
4147 bio_entry_slab
= f2fs_kmem_cache_create("f2fs_bio_entry_slab",
4148 sizeof(struct bio_entry
));
4149 if (!bio_entry_slab
)
4154 void f2fs_destroy_bio_entry_cache(void)
4156 kmem_cache_destroy(bio_entry_slab
);