4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 #include <linux/module.h>
12 #include <linux/init.h>
14 #include <linux/statfs.h>
15 #include <linux/buffer_head.h>
16 #include <linux/backing-dev.h>
17 #include <linux/kthread.h>
18 #include <linux/parser.h>
19 #include <linux/mount.h>
20 #include <linux/seq_file.h>
21 #include <linux/proc_fs.h>
22 #include <linux/random.h>
23 #include <linux/exportfs.h>
24 #include <linux/blkdev.h>
25 #include <linux/f2fs_fs.h>
26 #include <linux/sysfs.h>
35 #define CREATE_TRACE_POINTS
36 #include <trace/events/f2fs.h>
38 static struct proc_dir_entry
*f2fs_proc_root
;
39 static struct kmem_cache
*f2fs_inode_cachep
;
40 static struct kset
*f2fs_kset
;
42 #ifdef CONFIG_F2FS_FAULT_INJECTION
44 char *fault_name
[FAULT_MAX
] = {
45 [FAULT_KMALLOC
] = "kmalloc",
46 [FAULT_PAGE_ALLOC
] = "page alloc",
47 [FAULT_ALLOC_NID
] = "alloc nid",
48 [FAULT_ORPHAN
] = "orphan",
49 [FAULT_BLOCK
] = "no more block",
50 [FAULT_DIR_DEPTH
] = "too big dir depth",
51 [FAULT_EVICT_INODE
] = "evict_inode fail",
52 [FAULT_IO
] = "IO error",
53 [FAULT_CHECKPOINT
] = "checkpoint error",
56 static void f2fs_build_fault_attr(struct f2fs_sb_info
*sbi
,
59 struct f2fs_fault_info
*ffi
= &sbi
->fault_info
;
62 atomic_set(&ffi
->inject_ops
, 0);
63 ffi
->inject_rate
= rate
;
64 ffi
->inject_type
= (1 << FAULT_MAX
) - 1;
66 memset(ffi
, 0, sizeof(struct f2fs_fault_info
));
71 /* f2fs-wide shrinker description */
72 static struct shrinker f2fs_shrinker_info
= {
73 .scan_objects
= f2fs_shrink_scan
,
74 .count_objects
= f2fs_shrink_count
,
75 .seeks
= DEFAULT_SEEKS
,
80 Opt_disable_roll_forward
,
90 Opt_disable_ext_identify
,
111 static match_table_t f2fs_tokens
= {
112 {Opt_gc_background
, "background_gc=%s"},
113 {Opt_disable_roll_forward
, "disable_roll_forward"},
114 {Opt_norecovery
, "norecovery"},
115 {Opt_discard
, "discard"},
116 {Opt_nodiscard
, "nodiscard"},
117 {Opt_noheap
, "no_heap"},
118 {Opt_user_xattr
, "user_xattr"},
119 {Opt_nouser_xattr
, "nouser_xattr"},
121 {Opt_noacl
, "noacl"},
122 {Opt_active_logs
, "active_logs=%u"},
123 {Opt_disable_ext_identify
, "disable_ext_identify"},
124 {Opt_inline_xattr
, "inline_xattr"},
125 {Opt_inline_data
, "inline_data"},
126 {Opt_inline_dentry
, "inline_dentry"},
127 {Opt_noinline_dentry
, "noinline_dentry"},
128 {Opt_flush_merge
, "flush_merge"},
129 {Opt_noflush_merge
, "noflush_merge"},
130 {Opt_nobarrier
, "nobarrier"},
131 {Opt_fastboot
, "fastboot"},
132 {Opt_extent_cache
, "extent_cache"},
133 {Opt_noextent_cache
, "noextent_cache"},
134 {Opt_noinline_data
, "noinline_data"},
135 {Opt_data_flush
, "data_flush"},
136 {Opt_mode
, "mode=%s"},
137 {Opt_io_size_bits
, "io_bits=%u"},
138 {Opt_fault_injection
, "fault_injection=%u"},
139 {Opt_lazytime
, "lazytime"},
140 {Opt_nolazytime
, "nolazytime"},
144 /* Sysfs support for f2fs */
146 GC_THREAD
, /* struct f2fs_gc_thread */
147 SM_INFO
, /* struct f2fs_sm_info */
148 NM_INFO
, /* struct f2fs_nm_info */
149 F2FS_SBI
, /* struct f2fs_sb_info */
150 #ifdef CONFIG_F2FS_FAULT_INJECTION
151 FAULT_INFO_RATE
, /* struct f2fs_fault_info */
152 FAULT_INFO_TYPE
, /* struct f2fs_fault_info */
157 struct attribute attr
;
158 ssize_t (*show
)(struct f2fs_attr
*, struct f2fs_sb_info
*, char *);
159 ssize_t (*store
)(struct f2fs_attr
*, struct f2fs_sb_info
*,
160 const char *, size_t);
165 static unsigned char *__struct_ptr(struct f2fs_sb_info
*sbi
, int struct_type
)
167 if (struct_type
== GC_THREAD
)
168 return (unsigned char *)sbi
->gc_thread
;
169 else if (struct_type
== SM_INFO
)
170 return (unsigned char *)SM_I(sbi
);
171 else if (struct_type
== NM_INFO
)
172 return (unsigned char *)NM_I(sbi
);
173 else if (struct_type
== F2FS_SBI
)
174 return (unsigned char *)sbi
;
175 #ifdef CONFIG_F2FS_FAULT_INJECTION
176 else if (struct_type
== FAULT_INFO_RATE
||
177 struct_type
== FAULT_INFO_TYPE
)
178 return (unsigned char *)&sbi
->fault_info
;
183 static ssize_t
lifetime_write_kbytes_show(struct f2fs_attr
*a
,
184 struct f2fs_sb_info
*sbi
, char *buf
)
186 struct super_block
*sb
= sbi
->sb
;
188 if (!sb
->s_bdev
->bd_part
)
189 return snprintf(buf
, PAGE_SIZE
, "0\n");
191 return snprintf(buf
, PAGE_SIZE
, "%llu\n",
192 (unsigned long long)(sbi
->kbytes_written
+
193 BD_PART_WRITTEN(sbi
)));
196 static ssize_t
f2fs_sbi_show(struct f2fs_attr
*a
,
197 struct f2fs_sb_info
*sbi
, char *buf
)
199 unsigned char *ptr
= NULL
;
202 ptr
= __struct_ptr(sbi
, a
->struct_type
);
206 ui
= (unsigned int *)(ptr
+ a
->offset
);
208 return snprintf(buf
, PAGE_SIZE
, "%u\n", *ui
);
211 static ssize_t
f2fs_sbi_store(struct f2fs_attr
*a
,
212 struct f2fs_sb_info
*sbi
,
213 const char *buf
, size_t count
)
220 ptr
= __struct_ptr(sbi
, a
->struct_type
);
224 ui
= (unsigned int *)(ptr
+ a
->offset
);
226 ret
= kstrtoul(skip_spaces(buf
), 0, &t
);
229 #ifdef CONFIG_F2FS_FAULT_INJECTION
230 if (a
->struct_type
== FAULT_INFO_TYPE
&& t
>= (1 << FAULT_MAX
))
237 static ssize_t
f2fs_attr_show(struct kobject
*kobj
,
238 struct attribute
*attr
, char *buf
)
240 struct f2fs_sb_info
*sbi
= container_of(kobj
, struct f2fs_sb_info
,
242 struct f2fs_attr
*a
= container_of(attr
, struct f2fs_attr
, attr
);
244 return a
->show
? a
->show(a
, sbi
, buf
) : 0;
247 static ssize_t
f2fs_attr_store(struct kobject
*kobj
, struct attribute
*attr
,
248 const char *buf
, size_t len
)
250 struct f2fs_sb_info
*sbi
= container_of(kobj
, struct f2fs_sb_info
,
252 struct f2fs_attr
*a
= container_of(attr
, struct f2fs_attr
, attr
);
254 return a
->store
? a
->store(a
, sbi
, buf
, len
) : 0;
257 static void f2fs_sb_release(struct kobject
*kobj
)
259 struct f2fs_sb_info
*sbi
= container_of(kobj
, struct f2fs_sb_info
,
261 complete(&sbi
->s_kobj_unregister
);
264 #define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
265 static struct f2fs_attr f2fs_attr_##_name = { \
266 .attr = {.name = __stringify(_name), .mode = _mode }, \
269 .struct_type = _struct_type, \
273 #define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
274 F2FS_ATTR_OFFSET(struct_type, name, 0644, \
275 f2fs_sbi_show, f2fs_sbi_store, \
276 offsetof(struct struct_name, elname))
278 #define F2FS_GENERAL_RO_ATTR(name) \
279 static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
281 F2FS_RW_ATTR(GC_THREAD
, f2fs_gc_kthread
, gc_min_sleep_time
, min_sleep_time
);
282 F2FS_RW_ATTR(GC_THREAD
, f2fs_gc_kthread
, gc_max_sleep_time
, max_sleep_time
);
283 F2FS_RW_ATTR(GC_THREAD
, f2fs_gc_kthread
, gc_no_gc_sleep_time
, no_gc_sleep_time
);
284 F2FS_RW_ATTR(GC_THREAD
, f2fs_gc_kthread
, gc_idle
, gc_idle
);
285 F2FS_RW_ATTR(SM_INFO
, f2fs_sm_info
, reclaim_segments
, rec_prefree_segments
);
286 F2FS_RW_ATTR(SM_INFO
, f2fs_sm_info
, max_small_discards
, max_discards
);
287 F2FS_RW_ATTR(SM_INFO
, f2fs_sm_info
, batched_trim_sections
, trim_sections
);
288 F2FS_RW_ATTR(SM_INFO
, f2fs_sm_info
, ipu_policy
, ipu_policy
);
289 F2FS_RW_ATTR(SM_INFO
, f2fs_sm_info
, min_ipu_util
, min_ipu_util
);
290 F2FS_RW_ATTR(SM_INFO
, f2fs_sm_info
, min_fsync_blocks
, min_fsync_blocks
);
291 F2FS_RW_ATTR(NM_INFO
, f2fs_nm_info
, ram_thresh
, ram_thresh
);
292 F2FS_RW_ATTR(NM_INFO
, f2fs_nm_info
, ra_nid_pages
, ra_nid_pages
);
293 F2FS_RW_ATTR(NM_INFO
, f2fs_nm_info
, dirty_nats_ratio
, dirty_nats_ratio
);
294 F2FS_RW_ATTR(F2FS_SBI
, f2fs_sb_info
, max_victim_search
, max_victim_search
);
295 F2FS_RW_ATTR(F2FS_SBI
, f2fs_sb_info
, dir_level
, dir_level
);
296 F2FS_RW_ATTR(F2FS_SBI
, f2fs_sb_info
, cp_interval
, interval_time
[CP_TIME
]);
297 F2FS_RW_ATTR(F2FS_SBI
, f2fs_sb_info
, idle_interval
, interval_time
[REQ_TIME
]);
298 #ifdef CONFIG_F2FS_FAULT_INJECTION
299 F2FS_RW_ATTR(FAULT_INFO_RATE
, f2fs_fault_info
, inject_rate
, inject_rate
);
300 F2FS_RW_ATTR(FAULT_INFO_TYPE
, f2fs_fault_info
, inject_type
, inject_type
);
302 F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes
);
304 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
305 static struct attribute
*f2fs_attrs
[] = {
306 ATTR_LIST(gc_min_sleep_time
),
307 ATTR_LIST(gc_max_sleep_time
),
308 ATTR_LIST(gc_no_gc_sleep_time
),
310 ATTR_LIST(reclaim_segments
),
311 ATTR_LIST(max_small_discards
),
312 ATTR_LIST(batched_trim_sections
),
313 ATTR_LIST(ipu_policy
),
314 ATTR_LIST(min_ipu_util
),
315 ATTR_LIST(min_fsync_blocks
),
316 ATTR_LIST(max_victim_search
),
317 ATTR_LIST(dir_level
),
318 ATTR_LIST(ram_thresh
),
319 ATTR_LIST(ra_nid_pages
),
320 ATTR_LIST(dirty_nats_ratio
),
321 ATTR_LIST(cp_interval
),
322 ATTR_LIST(idle_interval
),
323 #ifdef CONFIG_F2FS_FAULT_INJECTION
324 ATTR_LIST(inject_rate
),
325 ATTR_LIST(inject_type
),
327 ATTR_LIST(lifetime_write_kbytes
),
331 static const struct sysfs_ops f2fs_attr_ops
= {
332 .show
= f2fs_attr_show
,
333 .store
= f2fs_attr_store
,
336 static struct kobj_type f2fs_ktype
= {
337 .default_attrs
= f2fs_attrs
,
338 .sysfs_ops
= &f2fs_attr_ops
,
339 .release
= f2fs_sb_release
,
342 void f2fs_msg(struct super_block
*sb
, const char *level
, const char *fmt
, ...)
344 struct va_format vaf
;
350 printk("%sF2FS-fs (%s): %pV\n", level
, sb
->s_id
, &vaf
);
354 static void init_once(void *foo
)
356 struct f2fs_inode_info
*fi
= (struct f2fs_inode_info
*) foo
;
358 inode_init_once(&fi
->vfs_inode
);
361 static int parse_options(struct super_block
*sb
, char *options
)
363 struct f2fs_sb_info
*sbi
= F2FS_SB(sb
);
364 struct request_queue
*q
;
365 substring_t args
[MAX_OPT_ARGS
];
372 while ((p
= strsep(&options
, ",")) != NULL
) {
377 * Initialize args struct so we know whether arg was
378 * found; some options take optional arguments.
380 args
[0].to
= args
[0].from
= NULL
;
381 token
= match_token(p
, f2fs_tokens
, args
);
384 case Opt_gc_background
:
385 name
= match_strdup(&args
[0]);
389 if (strlen(name
) == 2 && !strncmp(name
, "on", 2)) {
391 clear_opt(sbi
, FORCE_FG_GC
);
392 } else if (strlen(name
) == 3 && !strncmp(name
, "off", 3)) {
393 clear_opt(sbi
, BG_GC
);
394 clear_opt(sbi
, FORCE_FG_GC
);
395 } else if (strlen(name
) == 4 && !strncmp(name
, "sync", 4)) {
397 set_opt(sbi
, FORCE_FG_GC
);
404 case Opt_disable_roll_forward
:
405 set_opt(sbi
, DISABLE_ROLL_FORWARD
);
408 /* this option mounts f2fs with ro */
409 set_opt(sbi
, DISABLE_ROLL_FORWARD
);
410 if (!f2fs_readonly(sb
))
414 q
= bdev_get_queue(sb
->s_bdev
);
415 if (blk_queue_discard(q
)) {
416 set_opt(sbi
, DISCARD
);
417 } else if (!f2fs_sb_mounted_blkzoned(sb
)) {
418 f2fs_msg(sb
, KERN_WARNING
,
419 "mounting with \"discard\" option, but "
420 "the device does not support discard");
424 if (f2fs_sb_mounted_blkzoned(sb
)) {
425 f2fs_msg(sb
, KERN_WARNING
,
426 "discard is required for zoned block devices");
429 clear_opt(sbi
, DISCARD
);
432 set_opt(sbi
, NOHEAP
);
434 #ifdef CONFIG_F2FS_FS_XATTR
436 set_opt(sbi
, XATTR_USER
);
438 case Opt_nouser_xattr
:
439 clear_opt(sbi
, XATTR_USER
);
441 case Opt_inline_xattr
:
442 set_opt(sbi
, INLINE_XATTR
);
446 f2fs_msg(sb
, KERN_INFO
,
447 "user_xattr options not supported");
449 case Opt_nouser_xattr
:
450 f2fs_msg(sb
, KERN_INFO
,
451 "nouser_xattr options not supported");
453 case Opt_inline_xattr
:
454 f2fs_msg(sb
, KERN_INFO
,
455 "inline_xattr options not supported");
458 #ifdef CONFIG_F2FS_FS_POSIX_ACL
460 set_opt(sbi
, POSIX_ACL
);
463 clear_opt(sbi
, POSIX_ACL
);
467 f2fs_msg(sb
, KERN_INFO
, "acl options not supported");
470 f2fs_msg(sb
, KERN_INFO
, "noacl options not supported");
473 case Opt_active_logs
:
474 if (args
->from
&& match_int(args
, &arg
))
476 if (arg
!= 2 && arg
!= 4 && arg
!= NR_CURSEG_TYPE
)
478 sbi
->active_logs
= arg
;
480 case Opt_disable_ext_identify
:
481 set_opt(sbi
, DISABLE_EXT_IDENTIFY
);
483 case Opt_inline_data
:
484 set_opt(sbi
, INLINE_DATA
);
486 case Opt_inline_dentry
:
487 set_opt(sbi
, INLINE_DENTRY
);
489 case Opt_noinline_dentry
:
490 clear_opt(sbi
, INLINE_DENTRY
);
492 case Opt_flush_merge
:
493 set_opt(sbi
, FLUSH_MERGE
);
495 case Opt_noflush_merge
:
496 clear_opt(sbi
, FLUSH_MERGE
);
499 set_opt(sbi
, NOBARRIER
);
502 set_opt(sbi
, FASTBOOT
);
504 case Opt_extent_cache
:
505 set_opt(sbi
, EXTENT_CACHE
);
507 case Opt_noextent_cache
:
508 clear_opt(sbi
, EXTENT_CACHE
);
510 case Opt_noinline_data
:
511 clear_opt(sbi
, INLINE_DATA
);
514 set_opt(sbi
, DATA_FLUSH
);
517 name
= match_strdup(&args
[0]);
521 if (strlen(name
) == 8 &&
522 !strncmp(name
, "adaptive", 8)) {
523 if (f2fs_sb_mounted_blkzoned(sb
)) {
524 f2fs_msg(sb
, KERN_WARNING
,
525 "adaptive mode is not allowed with "
526 "zoned block device feature");
530 set_opt_mode(sbi
, F2FS_MOUNT_ADAPTIVE
);
531 } else if (strlen(name
) == 3 &&
532 !strncmp(name
, "lfs", 3)) {
533 set_opt_mode(sbi
, F2FS_MOUNT_LFS
);
540 case Opt_io_size_bits
:
541 if (args
->from
&& match_int(args
, &arg
))
543 if (arg
> __ilog2_u32(BIO_MAX_PAGES
)) {
544 f2fs_msg(sb
, KERN_WARNING
,
545 "Not support %d, larger than %d",
546 1 << arg
, BIO_MAX_PAGES
);
549 sbi
->write_io_size_bits
= arg
;
551 case Opt_fault_injection
:
552 if (args
->from
&& match_int(args
, &arg
))
554 #ifdef CONFIG_F2FS_FAULT_INJECTION
555 f2fs_build_fault_attr(sbi
, arg
);
557 f2fs_msg(sb
, KERN_INFO
,
558 "FAULT_INJECTION was not selected");
562 sb
->s_flags
|= MS_LAZYTIME
;
565 sb
->s_flags
&= ~MS_LAZYTIME
;
568 f2fs_msg(sb
, KERN_ERR
,
569 "Unrecognized mount option \"%s\" or missing value",
575 if (F2FS_IO_SIZE_BITS(sbi
) && !test_opt(sbi
, LFS
)) {
576 f2fs_msg(sb
, KERN_ERR
,
577 "Should set mode=lfs with %uKB-sized IO",
578 F2FS_IO_SIZE_KB(sbi
));
584 static struct inode
*f2fs_alloc_inode(struct super_block
*sb
)
586 struct f2fs_inode_info
*fi
;
588 fi
= kmem_cache_alloc(f2fs_inode_cachep
, GFP_F2FS_ZERO
);
592 init_once((void *) fi
);
594 /* Initialize f2fs-specific inode info */
595 fi
->vfs_inode
.i_version
= 1;
596 atomic_set(&fi
->dirty_pages
, 0);
597 fi
->i_current_depth
= 1;
599 init_rwsem(&fi
->i_sem
);
600 INIT_LIST_HEAD(&fi
->dirty_list
);
601 INIT_LIST_HEAD(&fi
->gdirty_list
);
602 INIT_LIST_HEAD(&fi
->inmem_pages
);
603 mutex_init(&fi
->inmem_lock
);
604 init_rwsem(&fi
->dio_rwsem
[READ
]);
605 init_rwsem(&fi
->dio_rwsem
[WRITE
]);
607 /* Will be used by directory only */
608 fi
->i_dir_level
= F2FS_SB(sb
)->dir_level
;
609 return &fi
->vfs_inode
;
612 static int f2fs_drop_inode(struct inode
*inode
)
615 * This is to avoid a deadlock condition like below.
616 * writeback_single_inode(inode)
617 * - f2fs_write_data_page
618 * - f2fs_gc -> iput -> evict
619 * - inode_wait_for_writeback(inode)
621 if ((!inode_unhashed(inode
) && inode
->i_state
& I_SYNC
)) {
622 if (!inode
->i_nlink
&& !is_bad_inode(inode
)) {
623 /* to avoid evict_inode call simultaneously */
624 atomic_inc(&inode
->i_count
);
625 spin_unlock(&inode
->i_lock
);
627 /* some remained atomic pages should discarded */
628 if (f2fs_is_atomic_file(inode
))
629 drop_inmem_pages(inode
);
631 /* should remain fi->extent_tree for writepage */
632 f2fs_destroy_extent_node(inode
);
634 sb_start_intwrite(inode
->i_sb
);
635 f2fs_i_size_write(inode
, 0);
637 if (F2FS_HAS_BLOCKS(inode
))
638 f2fs_truncate(inode
);
640 sb_end_intwrite(inode
->i_sb
);
642 fscrypt_put_encryption_info(inode
, NULL
);
643 spin_lock(&inode
->i_lock
);
644 atomic_dec(&inode
->i_count
);
649 return generic_drop_inode(inode
);
652 int f2fs_inode_dirtied(struct inode
*inode
, bool sync
)
654 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
657 spin_lock(&sbi
->inode_lock
[DIRTY_META
]);
658 if (is_inode_flag_set(inode
, FI_DIRTY_INODE
)) {
661 set_inode_flag(inode
, FI_DIRTY_INODE
);
662 stat_inc_dirty_inode(sbi
, DIRTY_META
);
664 if (sync
&& list_empty(&F2FS_I(inode
)->gdirty_list
)) {
665 list_add_tail(&F2FS_I(inode
)->gdirty_list
,
666 &sbi
->inode_list
[DIRTY_META
]);
667 inc_page_count(sbi
, F2FS_DIRTY_IMETA
);
669 spin_unlock(&sbi
->inode_lock
[DIRTY_META
]);
673 void f2fs_inode_synced(struct inode
*inode
)
675 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
677 spin_lock(&sbi
->inode_lock
[DIRTY_META
]);
678 if (!is_inode_flag_set(inode
, FI_DIRTY_INODE
)) {
679 spin_unlock(&sbi
->inode_lock
[DIRTY_META
]);
682 if (!list_empty(&F2FS_I(inode
)->gdirty_list
)) {
683 list_del_init(&F2FS_I(inode
)->gdirty_list
);
684 dec_page_count(sbi
, F2FS_DIRTY_IMETA
);
686 clear_inode_flag(inode
, FI_DIRTY_INODE
);
687 clear_inode_flag(inode
, FI_AUTO_RECOVER
);
688 stat_dec_dirty_inode(F2FS_I_SB(inode
), DIRTY_META
);
689 spin_unlock(&sbi
->inode_lock
[DIRTY_META
]);
693 * f2fs_dirty_inode() is called from __mark_inode_dirty()
695 * We should call set_dirty_inode to write the dirty inode through write_inode.
697 static void f2fs_dirty_inode(struct inode
*inode
, int flags
)
699 struct f2fs_sb_info
*sbi
= F2FS_I_SB(inode
);
701 if (inode
->i_ino
== F2FS_NODE_INO(sbi
) ||
702 inode
->i_ino
== F2FS_META_INO(sbi
))
705 if (flags
== I_DIRTY_TIME
)
708 if (is_inode_flag_set(inode
, FI_AUTO_RECOVER
))
709 clear_inode_flag(inode
, FI_AUTO_RECOVER
);
711 f2fs_inode_dirtied(inode
, false);
714 static void f2fs_i_callback(struct rcu_head
*head
)
716 struct inode
*inode
= container_of(head
, struct inode
, i_rcu
);
717 kmem_cache_free(f2fs_inode_cachep
, F2FS_I(inode
));
720 static void f2fs_destroy_inode(struct inode
*inode
)
722 call_rcu(&inode
->i_rcu
, f2fs_i_callback
);
725 static void destroy_percpu_info(struct f2fs_sb_info
*sbi
)
727 percpu_counter_destroy(&sbi
->alloc_valid_block_count
);
728 percpu_counter_destroy(&sbi
->total_valid_inode_count
);
731 static void destroy_device_list(struct f2fs_sb_info
*sbi
)
735 for (i
= 0; i
< sbi
->s_ndevs
; i
++) {
736 blkdev_put(FDEV(i
).bdev
, FMODE_EXCL
);
737 #ifdef CONFIG_BLK_DEV_ZONED
738 kfree(FDEV(i
).blkz_type
);
744 static void f2fs_put_super(struct super_block
*sb
)
746 struct f2fs_sb_info
*sbi
= F2FS_SB(sb
);
749 remove_proc_entry("segment_info", sbi
->s_proc
);
750 remove_proc_entry("segment_bits", sbi
->s_proc
);
751 remove_proc_entry(sb
->s_id
, f2fs_proc_root
);
753 kobject_del(&sbi
->s_kobj
);
757 /* prevent remaining shrinker jobs */
758 mutex_lock(&sbi
->umount_mutex
);
761 * We don't need to do checkpoint when superblock is clean.
762 * But, the previous checkpoint was not done by umount, it needs to do
763 * clean checkpoint again.
765 if (is_sbi_flag_set(sbi
, SBI_IS_DIRTY
) ||
766 !is_set_ckpt_flags(sbi
, CP_UMOUNT_FLAG
)) {
767 struct cp_control cpc
= {
770 write_checkpoint(sbi
, &cpc
);
773 /* write_checkpoint can update stat informaion */
774 f2fs_destroy_stats(sbi
);
777 * normally superblock is clean, so we need to release this.
778 * In addition, EIO will skip do checkpoint, we need this as well.
780 release_ino_entry(sbi
, true);
782 f2fs_leave_shrinker(sbi
);
783 mutex_unlock(&sbi
->umount_mutex
);
785 /* our cp_error case, we can wait for any writeback page */
786 f2fs_flush_merged_bios(sbi
);
788 iput(sbi
->node_inode
);
789 iput(sbi
->meta_inode
);
791 /* destroy f2fs internal modules */
792 destroy_node_manager(sbi
);
793 destroy_segment_manager(sbi
);
796 kobject_put(&sbi
->s_kobj
);
797 wait_for_completion(&sbi
->s_kobj_unregister
);
799 sb
->s_fs_info
= NULL
;
800 if (sbi
->s_chksum_driver
)
801 crypto_free_shash(sbi
->s_chksum_driver
);
802 kfree(sbi
->raw_super
);
804 destroy_device_list(sbi
);
806 destroy_percpu_info(sbi
);
810 int f2fs_sync_fs(struct super_block
*sb
, int sync
)
812 struct f2fs_sb_info
*sbi
= F2FS_SB(sb
);
815 trace_f2fs_sync_fs(sb
, sync
);
818 struct cp_control cpc
;
820 cpc
.reason
= __get_cp_reason(sbi
);
822 mutex_lock(&sbi
->gc_mutex
);
823 err
= write_checkpoint(sbi
, &cpc
);
824 mutex_unlock(&sbi
->gc_mutex
);
826 f2fs_trace_ios(NULL
, 1);
831 static int f2fs_freeze(struct super_block
*sb
)
833 if (f2fs_readonly(sb
))
836 /* IO error happened before */
837 if (unlikely(f2fs_cp_error(F2FS_SB(sb
))))
840 /* must be clean, since sync_filesystem() was already called */
841 if (is_sbi_flag_set(F2FS_SB(sb
), SBI_IS_DIRTY
))
846 static int f2fs_unfreeze(struct super_block
*sb
)
851 static int f2fs_statfs(struct dentry
*dentry
, struct kstatfs
*buf
)
853 struct super_block
*sb
= dentry
->d_sb
;
854 struct f2fs_sb_info
*sbi
= F2FS_SB(sb
);
855 u64 id
= huge_encode_dev(sb
->s_bdev
->bd_dev
);
856 block_t total_count
, user_block_count
, start_count
, ovp_count
;
858 total_count
= le64_to_cpu(sbi
->raw_super
->block_count
);
859 user_block_count
= sbi
->user_block_count
;
860 start_count
= le32_to_cpu(sbi
->raw_super
->segment0_blkaddr
);
861 ovp_count
= SM_I(sbi
)->ovp_segments
<< sbi
->log_blocks_per_seg
;
862 buf
->f_type
= F2FS_SUPER_MAGIC
;
863 buf
->f_bsize
= sbi
->blocksize
;
865 buf
->f_blocks
= total_count
- start_count
;
866 buf
->f_bfree
= user_block_count
- valid_user_blocks(sbi
) + ovp_count
;
867 buf
->f_bavail
= user_block_count
- valid_user_blocks(sbi
);
869 buf
->f_files
= sbi
->total_node_count
- F2FS_RESERVED_NODE_NUM
;
870 buf
->f_ffree
= min(buf
->f_files
- valid_node_count(sbi
),
873 buf
->f_namelen
= F2FS_NAME_LEN
;
874 buf
->f_fsid
.val
[0] = (u32
)id
;
875 buf
->f_fsid
.val
[1] = (u32
)(id
>> 32);
880 static int f2fs_show_options(struct seq_file
*seq
, struct dentry
*root
)
882 struct f2fs_sb_info
*sbi
= F2FS_SB(root
->d_sb
);
884 if (!f2fs_readonly(sbi
->sb
) && test_opt(sbi
, BG_GC
)) {
885 if (test_opt(sbi
, FORCE_FG_GC
))
886 seq_printf(seq
, ",background_gc=%s", "sync");
888 seq_printf(seq
, ",background_gc=%s", "on");
890 seq_printf(seq
, ",background_gc=%s", "off");
892 if (test_opt(sbi
, DISABLE_ROLL_FORWARD
))
893 seq_puts(seq
, ",disable_roll_forward");
894 if (test_opt(sbi
, DISCARD
))
895 seq_puts(seq
, ",discard");
896 if (test_opt(sbi
, NOHEAP
))
897 seq_puts(seq
, ",no_heap_alloc");
898 #ifdef CONFIG_F2FS_FS_XATTR
899 if (test_opt(sbi
, XATTR_USER
))
900 seq_puts(seq
, ",user_xattr");
902 seq_puts(seq
, ",nouser_xattr");
903 if (test_opt(sbi
, INLINE_XATTR
))
904 seq_puts(seq
, ",inline_xattr");
906 #ifdef CONFIG_F2FS_FS_POSIX_ACL
907 if (test_opt(sbi
, POSIX_ACL
))
908 seq_puts(seq
, ",acl");
910 seq_puts(seq
, ",noacl");
912 if (test_opt(sbi
, DISABLE_EXT_IDENTIFY
))
913 seq_puts(seq
, ",disable_ext_identify");
914 if (test_opt(sbi
, INLINE_DATA
))
915 seq_puts(seq
, ",inline_data");
917 seq_puts(seq
, ",noinline_data");
918 if (test_opt(sbi
, INLINE_DENTRY
))
919 seq_puts(seq
, ",inline_dentry");
921 seq_puts(seq
, ",noinline_dentry");
922 if (!f2fs_readonly(sbi
->sb
) && test_opt(sbi
, FLUSH_MERGE
))
923 seq_puts(seq
, ",flush_merge");
924 if (test_opt(sbi
, NOBARRIER
))
925 seq_puts(seq
, ",nobarrier");
926 if (test_opt(sbi
, FASTBOOT
))
927 seq_puts(seq
, ",fastboot");
928 if (test_opt(sbi
, EXTENT_CACHE
))
929 seq_puts(seq
, ",extent_cache");
931 seq_puts(seq
, ",noextent_cache");
932 if (test_opt(sbi
, DATA_FLUSH
))
933 seq_puts(seq
, ",data_flush");
935 seq_puts(seq
, ",mode=");
936 if (test_opt(sbi
, ADAPTIVE
))
937 seq_puts(seq
, "adaptive");
938 else if (test_opt(sbi
, LFS
))
939 seq_puts(seq
, "lfs");
940 seq_printf(seq
, ",active_logs=%u", sbi
->active_logs
);
941 if (F2FS_IO_SIZE_BITS(sbi
))
942 seq_printf(seq
, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi
));
947 static int segment_info_seq_show(struct seq_file
*seq
, void *offset
)
949 struct super_block
*sb
= seq
->private;
950 struct f2fs_sb_info
*sbi
= F2FS_SB(sb
);
951 unsigned int total_segs
=
952 le32_to_cpu(sbi
->raw_super
->segment_count_main
);
955 seq_puts(seq
, "format: segment_type|valid_blocks\n"
956 "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
958 for (i
= 0; i
< total_segs
; i
++) {
959 struct seg_entry
*se
= get_seg_entry(sbi
, i
);
962 seq_printf(seq
, "%-10d", i
);
963 seq_printf(seq
, "%d|%-3u", se
->type
,
964 get_valid_blocks(sbi
, i
, 1));
965 if ((i
% 10) == 9 || i
== (total_segs
- 1))
974 static int segment_bits_seq_show(struct seq_file
*seq
, void *offset
)
976 struct super_block
*sb
= seq
->private;
977 struct f2fs_sb_info
*sbi
= F2FS_SB(sb
);
978 unsigned int total_segs
=
979 le32_to_cpu(sbi
->raw_super
->segment_count_main
);
982 seq_puts(seq
, "format: segment_type|valid_blocks|bitmaps\n"
983 "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
985 for (i
= 0; i
< total_segs
; i
++) {
986 struct seg_entry
*se
= get_seg_entry(sbi
, i
);
988 seq_printf(seq
, "%-10d", i
);
989 seq_printf(seq
, "%d|%-3u|", se
->type
,
990 get_valid_blocks(sbi
, i
, 1));
991 for (j
= 0; j
< SIT_VBLOCK_MAP_SIZE
; j
++)
992 seq_printf(seq
, " %.2x", se
->cur_valid_map
[j
]);
998 #define F2FS_PROC_FILE_DEF(_name) \
999 static int _name##_open_fs(struct inode *inode, struct file *file) \
1001 return single_open(file, _name##_seq_show, PDE_DATA(inode)); \
1004 static const struct file_operations f2fs_seq_##_name##_fops = { \
1005 .open = _name##_open_fs, \
1007 .llseek = seq_lseek, \
1008 .release = single_release, \
1011 F2FS_PROC_FILE_DEF(segment_info
);
1012 F2FS_PROC_FILE_DEF(segment_bits
);
1014 static void default_options(struct f2fs_sb_info
*sbi
)
1016 /* init some FS parameters */
1017 sbi
->active_logs
= NR_CURSEG_TYPE
;
1019 set_opt(sbi
, BG_GC
);
1020 set_opt(sbi
, INLINE_DATA
);
1021 set_opt(sbi
, INLINE_DENTRY
);
1022 set_opt(sbi
, EXTENT_CACHE
);
1023 sbi
->sb
->s_flags
|= MS_LAZYTIME
;
1024 set_opt(sbi
, FLUSH_MERGE
);
1025 if (f2fs_sb_mounted_blkzoned(sbi
->sb
)) {
1026 set_opt_mode(sbi
, F2FS_MOUNT_LFS
);
1027 set_opt(sbi
, DISCARD
);
1029 set_opt_mode(sbi
, F2FS_MOUNT_ADAPTIVE
);
1032 #ifdef CONFIG_F2FS_FS_XATTR
1033 set_opt(sbi
, XATTR_USER
);
1035 #ifdef CONFIG_F2FS_FS_POSIX_ACL
1036 set_opt(sbi
, POSIX_ACL
);
1039 #ifdef CONFIG_F2FS_FAULT_INJECTION
1040 f2fs_build_fault_attr(sbi
, 0);
1044 static int f2fs_remount(struct super_block
*sb
, int *flags
, char *data
)
1046 struct f2fs_sb_info
*sbi
= F2FS_SB(sb
);
1047 struct f2fs_mount_info org_mount_opt
;
1048 int err
, active_logs
;
1049 bool need_restart_gc
= false;
1050 bool need_stop_gc
= false;
1051 bool no_extent_cache
= !test_opt(sbi
, EXTENT_CACHE
);
1052 #ifdef CONFIG_F2FS_FAULT_INJECTION
1053 struct f2fs_fault_info ffi
= sbi
->fault_info
;
1057 * Save the old mount options in case we
1058 * need to restore them.
1060 org_mount_opt
= sbi
->mount_opt
;
1061 active_logs
= sbi
->active_logs
;
1063 /* recover superblocks we couldn't write due to previous RO mount */
1064 if (!(*flags
& MS_RDONLY
) && is_sbi_flag_set(sbi
, SBI_NEED_SB_WRITE
)) {
1065 err
= f2fs_commit_super(sbi
, false);
1066 f2fs_msg(sb
, KERN_INFO
,
1067 "Try to recover all the superblocks, ret: %d", err
);
1069 clear_sbi_flag(sbi
, SBI_NEED_SB_WRITE
);
1072 sbi
->mount_opt
.opt
= 0;
1073 default_options(sbi
);
1075 /* parse mount options */
1076 err
= parse_options(sb
, data
);
1081 * Previous and new state of filesystem is RO,
1082 * so skip checking GC and FLUSH_MERGE conditions.
1084 if (f2fs_readonly(sb
) && (*flags
& MS_RDONLY
))
1087 /* disallow enable/disable extent_cache dynamically */
1088 if (no_extent_cache
== !!test_opt(sbi
, EXTENT_CACHE
)) {
1090 f2fs_msg(sbi
->sb
, KERN_WARNING
,
1091 "switch extent_cache option is not allowed");
1096 * We stop the GC thread if FS is mounted as RO
1097 * or if background_gc = off is passed in mount
1098 * option. Also sync the filesystem.
1100 if ((*flags
& MS_RDONLY
) || !test_opt(sbi
, BG_GC
)) {
1101 if (sbi
->gc_thread
) {
1102 stop_gc_thread(sbi
);
1103 need_restart_gc
= true;
1105 } else if (!sbi
->gc_thread
) {
1106 err
= start_gc_thread(sbi
);
1109 need_stop_gc
= true;
1112 if (*flags
& MS_RDONLY
) {
1113 writeback_inodes_sb(sb
, WB_REASON_SYNC
);
1116 set_sbi_flag(sbi
, SBI_IS_DIRTY
);
1117 set_sbi_flag(sbi
, SBI_IS_CLOSE
);
1118 f2fs_sync_fs(sb
, 1);
1119 clear_sbi_flag(sbi
, SBI_IS_CLOSE
);
1123 * We stop issue flush thread if FS is mounted as RO
1124 * or if flush_merge is not passed in mount option.
1126 if ((*flags
& MS_RDONLY
) || !test_opt(sbi
, FLUSH_MERGE
)) {
1127 clear_opt(sbi
, FLUSH_MERGE
);
1128 destroy_flush_cmd_control(sbi
, false);
1130 err
= create_flush_cmd_control(sbi
);
1135 /* Update the POSIXACL Flag */
1136 sb
->s_flags
= (sb
->s_flags
& ~MS_POSIXACL
) |
1137 (test_opt(sbi
, POSIX_ACL
) ? MS_POSIXACL
: 0);
1141 if (need_restart_gc
) {
1142 if (start_gc_thread(sbi
))
1143 f2fs_msg(sbi
->sb
, KERN_WARNING
,
1144 "background gc thread has stopped");
1145 } else if (need_stop_gc
) {
1146 stop_gc_thread(sbi
);
1149 sbi
->mount_opt
= org_mount_opt
;
1150 sbi
->active_logs
= active_logs
;
1151 #ifdef CONFIG_F2FS_FAULT_INJECTION
1152 sbi
->fault_info
= ffi
;
1157 static struct super_operations f2fs_sops
= {
1158 .alloc_inode
= f2fs_alloc_inode
,
1159 .drop_inode
= f2fs_drop_inode
,
1160 .destroy_inode
= f2fs_destroy_inode
,
1161 .write_inode
= f2fs_write_inode
,
1162 .dirty_inode
= f2fs_dirty_inode
,
1163 .show_options
= f2fs_show_options
,
1164 .evict_inode
= f2fs_evict_inode
,
1165 .put_super
= f2fs_put_super
,
1166 .sync_fs
= f2fs_sync_fs
,
1167 .freeze_fs
= f2fs_freeze
,
1168 .unfreeze_fs
= f2fs_unfreeze
,
1169 .statfs
= f2fs_statfs
,
1170 .remount_fs
= f2fs_remount
,
1173 #ifdef CONFIG_F2FS_FS_ENCRYPTION
1174 static int f2fs_get_context(struct inode
*inode
, void *ctx
, size_t len
)
1176 return f2fs_getxattr(inode
, F2FS_XATTR_INDEX_ENCRYPTION
,
1177 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT
,
1181 static int f2fs_key_prefix(struct inode
*inode
, u8
**key
)
1183 *key
= F2FS_I_SB(inode
)->key_prefix
;
1184 return F2FS_I_SB(inode
)->key_prefix_size
;
1187 static int f2fs_set_context(struct inode
*inode
, const void *ctx
, size_t len
,
1190 return f2fs_setxattr(inode
, F2FS_XATTR_INDEX_ENCRYPTION
,
1191 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT
,
1192 ctx
, len
, fs_data
, XATTR_CREATE
);
1195 static unsigned f2fs_max_namelen(struct inode
*inode
)
1197 return S_ISLNK(inode
->i_mode
) ?
1198 inode
->i_sb
->s_blocksize
: F2FS_NAME_LEN
;
1201 static struct fscrypt_operations f2fs_cryptops
= {
1202 .get_context
= f2fs_get_context
,
1203 .key_prefix
= f2fs_key_prefix
,
1204 .set_context
= f2fs_set_context
,
1205 .is_encrypted
= f2fs_encrypted_inode
,
1206 .empty_dir
= f2fs_empty_dir
,
1207 .max_namelen
= f2fs_max_namelen
,
1210 static struct fscrypt_operations f2fs_cryptops
= {
1211 .is_encrypted
= f2fs_encrypted_inode
,
1215 static struct inode
*f2fs_nfs_get_inode(struct super_block
*sb
,
1216 u64 ino
, u32 generation
)
1218 struct f2fs_sb_info
*sbi
= F2FS_SB(sb
);
1219 struct inode
*inode
;
1221 if (check_nid_range(sbi
, ino
))
1222 return ERR_PTR(-ESTALE
);
1225 * f2fs_iget isn't quite right if the inode is currently unallocated!
1226 * However f2fs_iget currently does appropriate checks to handle stale
1227 * inodes so everything is OK.
1229 inode
= f2fs_iget(sb
, ino
);
1231 return ERR_CAST(inode
);
1232 if (unlikely(generation
&& inode
->i_generation
!= generation
)) {
1233 /* we didn't find the right inode.. */
1235 return ERR_PTR(-ESTALE
);
1240 static struct dentry
*f2fs_fh_to_dentry(struct super_block
*sb
, struct fid
*fid
,
1241 int fh_len
, int fh_type
)
1243 return generic_fh_to_dentry(sb
, fid
, fh_len
, fh_type
,
1244 f2fs_nfs_get_inode
);
1247 static struct dentry
*f2fs_fh_to_parent(struct super_block
*sb
, struct fid
*fid
,
1248 int fh_len
, int fh_type
)
1250 return generic_fh_to_parent(sb
, fid
, fh_len
, fh_type
,
1251 f2fs_nfs_get_inode
);
1254 static const struct export_operations f2fs_export_ops
= {
1255 .fh_to_dentry
= f2fs_fh_to_dentry
,
1256 .fh_to_parent
= f2fs_fh_to_parent
,
1257 .get_parent
= f2fs_get_parent
,
1260 static loff_t
max_file_blocks(void)
1262 loff_t result
= (DEF_ADDRS_PER_INODE
- F2FS_INLINE_XATTR_ADDRS
);
1263 loff_t leaf_count
= ADDRS_PER_BLOCK
;
1265 /* two direct node blocks */
1266 result
+= (leaf_count
* 2);
1268 /* two indirect node blocks */
1269 leaf_count
*= NIDS_PER_BLOCK
;
1270 result
+= (leaf_count
* 2);
1272 /* one double indirect node block */
1273 leaf_count
*= NIDS_PER_BLOCK
;
1274 result
+= leaf_count
;
1279 static int __f2fs_commit_super(struct buffer_head
*bh
,
1280 struct f2fs_super_block
*super
)
1284 memcpy(bh
->b_data
+ F2FS_SUPER_OFFSET
, super
, sizeof(*super
));
1285 set_buffer_uptodate(bh
);
1286 set_buffer_dirty(bh
);
1289 /* it's rare case, we can do fua all the time */
1290 return __sync_dirty_buffer(bh
, REQ_PREFLUSH
| REQ_FUA
);
1293 static inline bool sanity_check_area_boundary(struct f2fs_sb_info
*sbi
,
1294 struct buffer_head
*bh
)
1296 struct f2fs_super_block
*raw_super
= (struct f2fs_super_block
*)
1297 (bh
->b_data
+ F2FS_SUPER_OFFSET
);
1298 struct super_block
*sb
= sbi
->sb
;
1299 u32 segment0_blkaddr
= le32_to_cpu(raw_super
->segment0_blkaddr
);
1300 u32 cp_blkaddr
= le32_to_cpu(raw_super
->cp_blkaddr
);
1301 u32 sit_blkaddr
= le32_to_cpu(raw_super
->sit_blkaddr
);
1302 u32 nat_blkaddr
= le32_to_cpu(raw_super
->nat_blkaddr
);
1303 u32 ssa_blkaddr
= le32_to_cpu(raw_super
->ssa_blkaddr
);
1304 u32 main_blkaddr
= le32_to_cpu(raw_super
->main_blkaddr
);
1305 u32 segment_count_ckpt
= le32_to_cpu(raw_super
->segment_count_ckpt
);
1306 u32 segment_count_sit
= le32_to_cpu(raw_super
->segment_count_sit
);
1307 u32 segment_count_nat
= le32_to_cpu(raw_super
->segment_count_nat
);
1308 u32 segment_count_ssa
= le32_to_cpu(raw_super
->segment_count_ssa
);
1309 u32 segment_count_main
= le32_to_cpu(raw_super
->segment_count_main
);
1310 u32 segment_count
= le32_to_cpu(raw_super
->segment_count
);
1311 u32 log_blocks_per_seg
= le32_to_cpu(raw_super
->log_blocks_per_seg
);
1312 u64 main_end_blkaddr
= main_blkaddr
+
1313 (segment_count_main
<< log_blocks_per_seg
);
1314 u64 seg_end_blkaddr
= segment0_blkaddr
+
1315 (segment_count
<< log_blocks_per_seg
);
1317 if (segment0_blkaddr
!= cp_blkaddr
) {
1318 f2fs_msg(sb
, KERN_INFO
,
1319 "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
1320 segment0_blkaddr
, cp_blkaddr
);
1324 if (cp_blkaddr
+ (segment_count_ckpt
<< log_blocks_per_seg
) !=
1326 f2fs_msg(sb
, KERN_INFO
,
1327 "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
1328 cp_blkaddr
, sit_blkaddr
,
1329 segment_count_ckpt
<< log_blocks_per_seg
);
1333 if (sit_blkaddr
+ (segment_count_sit
<< log_blocks_per_seg
) !=
1335 f2fs_msg(sb
, KERN_INFO
,
1336 "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
1337 sit_blkaddr
, nat_blkaddr
,
1338 segment_count_sit
<< log_blocks_per_seg
);
1342 if (nat_blkaddr
+ (segment_count_nat
<< log_blocks_per_seg
) !=
1344 f2fs_msg(sb
, KERN_INFO
,
1345 "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
1346 nat_blkaddr
, ssa_blkaddr
,
1347 segment_count_nat
<< log_blocks_per_seg
);
1351 if (ssa_blkaddr
+ (segment_count_ssa
<< log_blocks_per_seg
) !=
1353 f2fs_msg(sb
, KERN_INFO
,
1354 "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
1355 ssa_blkaddr
, main_blkaddr
,
1356 segment_count_ssa
<< log_blocks_per_seg
);
1360 if (main_end_blkaddr
> seg_end_blkaddr
) {
1361 f2fs_msg(sb
, KERN_INFO
,
1362 "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
1365 (segment_count
<< log_blocks_per_seg
),
1366 segment_count_main
<< log_blocks_per_seg
);
1368 } else if (main_end_blkaddr
< seg_end_blkaddr
) {
1372 /* fix in-memory information all the time */
1373 raw_super
->segment_count
= cpu_to_le32((main_end_blkaddr
-
1374 segment0_blkaddr
) >> log_blocks_per_seg
);
1376 if (f2fs_readonly(sb
) || bdev_read_only(sb
->s_bdev
)) {
1377 set_sbi_flag(sbi
, SBI_NEED_SB_WRITE
);
1380 err
= __f2fs_commit_super(bh
, NULL
);
1381 res
= err
? "failed" : "done";
1383 f2fs_msg(sb
, KERN_INFO
,
1384 "Fix alignment : %s, start(%u) end(%u) block(%u)",
1387 (segment_count
<< log_blocks_per_seg
),
1388 segment_count_main
<< log_blocks_per_seg
);
1395 static int sanity_check_raw_super(struct f2fs_sb_info
*sbi
,
1396 struct buffer_head
*bh
)
1398 struct f2fs_super_block
*raw_super
= (struct f2fs_super_block
*)
1399 (bh
->b_data
+ F2FS_SUPER_OFFSET
);
1400 struct super_block
*sb
= sbi
->sb
;
1401 unsigned int blocksize
;
1403 if (F2FS_SUPER_MAGIC
!= le32_to_cpu(raw_super
->magic
)) {
1404 f2fs_msg(sb
, KERN_INFO
,
1405 "Magic Mismatch, valid(0x%x) - read(0x%x)",
1406 F2FS_SUPER_MAGIC
, le32_to_cpu(raw_super
->magic
));
1410 /* Currently, support only 4KB page cache size */
1411 if (F2FS_BLKSIZE
!= PAGE_SIZE
) {
1412 f2fs_msg(sb
, KERN_INFO
,
1413 "Invalid page_cache_size (%lu), supports only 4KB\n",
1418 /* Currently, support only 4KB block size */
1419 blocksize
= 1 << le32_to_cpu(raw_super
->log_blocksize
);
1420 if (blocksize
!= F2FS_BLKSIZE
) {
1421 f2fs_msg(sb
, KERN_INFO
,
1422 "Invalid blocksize (%u), supports only 4KB\n",
1427 /* check log blocks per segment */
1428 if (le32_to_cpu(raw_super
->log_blocks_per_seg
) != 9) {
1429 f2fs_msg(sb
, KERN_INFO
,
1430 "Invalid log blocks per segment (%u)\n",
1431 le32_to_cpu(raw_super
->log_blocks_per_seg
));
1435 /* Currently, support 512/1024/2048/4096 bytes sector size */
1436 if (le32_to_cpu(raw_super
->log_sectorsize
) >
1437 F2FS_MAX_LOG_SECTOR_SIZE
||
1438 le32_to_cpu(raw_super
->log_sectorsize
) <
1439 F2FS_MIN_LOG_SECTOR_SIZE
) {
1440 f2fs_msg(sb
, KERN_INFO
, "Invalid log sectorsize (%u)",
1441 le32_to_cpu(raw_super
->log_sectorsize
));
1444 if (le32_to_cpu(raw_super
->log_sectors_per_block
) +
1445 le32_to_cpu(raw_super
->log_sectorsize
) !=
1446 F2FS_MAX_LOG_SECTOR_SIZE
) {
1447 f2fs_msg(sb
, KERN_INFO
,
1448 "Invalid log sectors per block(%u) log sectorsize(%u)",
1449 le32_to_cpu(raw_super
->log_sectors_per_block
),
1450 le32_to_cpu(raw_super
->log_sectorsize
));
1454 /* check reserved ino info */
1455 if (le32_to_cpu(raw_super
->node_ino
) != 1 ||
1456 le32_to_cpu(raw_super
->meta_ino
) != 2 ||
1457 le32_to_cpu(raw_super
->root_ino
) != 3) {
1458 f2fs_msg(sb
, KERN_INFO
,
1459 "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
1460 le32_to_cpu(raw_super
->node_ino
),
1461 le32_to_cpu(raw_super
->meta_ino
),
1462 le32_to_cpu(raw_super
->root_ino
));
1466 /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
1467 if (sanity_check_area_boundary(sbi
, bh
))
1473 int sanity_check_ckpt(struct f2fs_sb_info
*sbi
)
1475 unsigned int total
, fsmeta
;
1476 struct f2fs_super_block
*raw_super
= F2FS_RAW_SUPER(sbi
);
1477 struct f2fs_checkpoint
*ckpt
= F2FS_CKPT(sbi
);
1478 unsigned int ovp_segments
, reserved_segments
;
1480 total
= le32_to_cpu(raw_super
->segment_count
);
1481 fsmeta
= le32_to_cpu(raw_super
->segment_count_ckpt
);
1482 fsmeta
+= le32_to_cpu(raw_super
->segment_count_sit
);
1483 fsmeta
+= le32_to_cpu(raw_super
->segment_count_nat
);
1484 fsmeta
+= le32_to_cpu(ckpt
->rsvd_segment_count
);
1485 fsmeta
+= le32_to_cpu(raw_super
->segment_count_ssa
);
1487 if (unlikely(fsmeta
>= total
))
1490 ovp_segments
= le32_to_cpu(ckpt
->overprov_segment_count
);
1491 reserved_segments
= le32_to_cpu(ckpt
->rsvd_segment_count
);
1493 if (unlikely(fsmeta
< F2FS_MIN_SEGMENTS
||
1494 ovp_segments
== 0 || reserved_segments
== 0)) {
1495 f2fs_msg(sbi
->sb
, KERN_ERR
,
1496 "Wrong layout: check mkfs.f2fs version");
1500 if (unlikely(f2fs_cp_error(sbi
))) {
1501 f2fs_msg(sbi
->sb
, KERN_ERR
, "A bug case: need to run fsck");
1507 static void init_sb_info(struct f2fs_sb_info
*sbi
)
1509 struct f2fs_super_block
*raw_super
= sbi
->raw_super
;
1512 sbi
->log_sectors_per_block
=
1513 le32_to_cpu(raw_super
->log_sectors_per_block
);
1514 sbi
->log_blocksize
= le32_to_cpu(raw_super
->log_blocksize
);
1515 sbi
->blocksize
= 1 << sbi
->log_blocksize
;
1516 sbi
->log_blocks_per_seg
= le32_to_cpu(raw_super
->log_blocks_per_seg
);
1517 sbi
->blocks_per_seg
= 1 << sbi
->log_blocks_per_seg
;
1518 sbi
->segs_per_sec
= le32_to_cpu(raw_super
->segs_per_sec
);
1519 sbi
->secs_per_zone
= le32_to_cpu(raw_super
->secs_per_zone
);
1520 sbi
->total_sections
= le32_to_cpu(raw_super
->section_count
);
1521 sbi
->total_node_count
=
1522 (le32_to_cpu(raw_super
->segment_count_nat
) / 2)
1523 * sbi
->blocks_per_seg
* NAT_ENTRY_PER_BLOCK
;
1524 sbi
->root_ino_num
= le32_to_cpu(raw_super
->root_ino
);
1525 sbi
->node_ino_num
= le32_to_cpu(raw_super
->node_ino
);
1526 sbi
->meta_ino_num
= le32_to_cpu(raw_super
->meta_ino
);
1527 sbi
->cur_victim_sec
= NULL_SECNO
;
1528 sbi
->max_victim_search
= DEF_MAX_VICTIM_SEARCH
;
1530 sbi
->dir_level
= DEF_DIR_LEVEL
;
1531 sbi
->interval_time
[CP_TIME
] = DEF_CP_INTERVAL
;
1532 sbi
->interval_time
[REQ_TIME
] = DEF_IDLE_INTERVAL
;
1533 clear_sbi_flag(sbi
, SBI_NEED_FSCK
);
1535 for (i
= 0; i
< NR_COUNT_TYPE
; i
++)
1536 atomic_set(&sbi
->nr_pages
[i
], 0);
1538 INIT_LIST_HEAD(&sbi
->s_list
);
1539 mutex_init(&sbi
->umount_mutex
);
1540 mutex_init(&sbi
->wio_mutex
[NODE
]);
1541 mutex_init(&sbi
->wio_mutex
[DATA
]);
1542 spin_lock_init(&sbi
->cp_lock
);
1544 #ifdef CONFIG_F2FS_FS_ENCRYPTION
1545 memcpy(sbi
->key_prefix
, F2FS_KEY_DESC_PREFIX
,
1546 F2FS_KEY_DESC_PREFIX_SIZE
);
1547 sbi
->key_prefix_size
= F2FS_KEY_DESC_PREFIX_SIZE
;
1551 static int init_percpu_info(struct f2fs_sb_info
*sbi
)
1555 err
= percpu_counter_init(&sbi
->alloc_valid_block_count
, 0, GFP_KERNEL
);
1559 return percpu_counter_init(&sbi
->total_valid_inode_count
, 0,
1563 #ifdef CONFIG_BLK_DEV_ZONED
1564 static int init_blkz_info(struct f2fs_sb_info
*sbi
, int devi
)
1566 struct block_device
*bdev
= FDEV(devi
).bdev
;
1567 sector_t nr_sectors
= bdev
->bd_part
->nr_sects
;
1568 sector_t sector
= 0;
1569 struct blk_zone
*zones
;
1570 unsigned int i
, nr_zones
;
1574 if (!f2fs_sb_mounted_blkzoned(sbi
->sb
))
1577 if (sbi
->blocks_per_blkz
&& sbi
->blocks_per_blkz
!=
1578 SECTOR_TO_BLOCK(bdev_zone_sectors(bdev
)))
1580 sbi
->blocks_per_blkz
= SECTOR_TO_BLOCK(bdev_zone_sectors(bdev
));
1581 if (sbi
->log_blocks_per_blkz
&& sbi
->log_blocks_per_blkz
!=
1582 __ilog2_u32(sbi
->blocks_per_blkz
))
1584 sbi
->log_blocks_per_blkz
= __ilog2_u32(sbi
->blocks_per_blkz
);
1585 FDEV(devi
).nr_blkz
= SECTOR_TO_BLOCK(nr_sectors
) >>
1586 sbi
->log_blocks_per_blkz
;
1587 if (nr_sectors
& (bdev_zone_sectors(bdev
) - 1))
1588 FDEV(devi
).nr_blkz
++;
1590 FDEV(devi
).blkz_type
= kmalloc(FDEV(devi
).nr_blkz
, GFP_KERNEL
);
1591 if (!FDEV(devi
).blkz_type
)
1594 #define F2FS_REPORT_NR_ZONES 4096
1596 zones
= kcalloc(F2FS_REPORT_NR_ZONES
, sizeof(struct blk_zone
),
1601 /* Get block zones type */
1602 while (zones
&& sector
< nr_sectors
) {
1604 nr_zones
= F2FS_REPORT_NR_ZONES
;
1605 err
= blkdev_report_zones(bdev
, sector
,
1615 for (i
= 0; i
< nr_zones
; i
++) {
1616 FDEV(devi
).blkz_type
[n
] = zones
[i
].type
;
1617 sector
+= zones
[i
].len
;
1629 * Read f2fs raw super block.
1630 * Because we have two copies of super block, so read both of them
1631 * to get the first valid one. If any one of them is broken, we pass
1632 * them recovery flag back to the caller.
1634 static int read_raw_super_block(struct f2fs_sb_info
*sbi
,
1635 struct f2fs_super_block
**raw_super
,
1636 int *valid_super_block
, int *recovery
)
1638 struct super_block
*sb
= sbi
->sb
;
1640 struct buffer_head
*bh
;
1641 struct f2fs_super_block
*super
;
1644 super
= kzalloc(sizeof(struct f2fs_super_block
), GFP_KERNEL
);
1648 for (block
= 0; block
< 2; block
++) {
1649 bh
= sb_bread(sb
, block
);
1651 f2fs_msg(sb
, KERN_ERR
, "Unable to read %dth superblock",
1657 /* sanity checking of raw super */
1658 if (sanity_check_raw_super(sbi
, bh
)) {
1659 f2fs_msg(sb
, KERN_ERR
,
1660 "Can't find valid F2FS filesystem in %dth superblock",
1668 memcpy(super
, bh
->b_data
+ F2FS_SUPER_OFFSET
,
1670 *valid_super_block
= block
;
1676 /* Fail to read any one of the superblocks*/
1680 /* No valid superblock */
1689 int f2fs_commit_super(struct f2fs_sb_info
*sbi
, bool recover
)
1691 struct buffer_head
*bh
;
1694 if ((recover
&& f2fs_readonly(sbi
->sb
)) ||
1695 bdev_read_only(sbi
->sb
->s_bdev
)) {
1696 set_sbi_flag(sbi
, SBI_NEED_SB_WRITE
);
1700 /* write back-up superblock first */
1701 bh
= sb_getblk(sbi
->sb
, sbi
->valid_super_block
? 0: 1);
1704 err
= __f2fs_commit_super(bh
, F2FS_RAW_SUPER(sbi
));
1707 /* if we are in recovery path, skip writing valid superblock */
1711 /* write current valid superblock */
1712 bh
= sb_getblk(sbi
->sb
, sbi
->valid_super_block
);
1715 err
= __f2fs_commit_super(bh
, F2FS_RAW_SUPER(sbi
));
1720 static int f2fs_scan_devices(struct f2fs_sb_info
*sbi
)
1722 struct f2fs_super_block
*raw_super
= F2FS_RAW_SUPER(sbi
);
1725 for (i
= 0; i
< MAX_DEVICES
; i
++) {
1726 if (!RDEV(i
).path
[0])
1730 sbi
->devs
= kzalloc(sizeof(struct f2fs_dev_info
) *
1731 MAX_DEVICES
, GFP_KERNEL
);
1736 memcpy(FDEV(i
).path
, RDEV(i
).path
, MAX_PATH_LEN
);
1737 FDEV(i
).total_segments
= le32_to_cpu(RDEV(i
).total_segments
);
1739 FDEV(i
).start_blk
= 0;
1740 FDEV(i
).end_blk
= FDEV(i
).start_blk
+
1741 (FDEV(i
).total_segments
<<
1742 sbi
->log_blocks_per_seg
) - 1 +
1743 le32_to_cpu(raw_super
->segment0_blkaddr
);
1745 FDEV(i
).start_blk
= FDEV(i
- 1).end_blk
+ 1;
1746 FDEV(i
).end_blk
= FDEV(i
).start_blk
+
1747 (FDEV(i
).total_segments
<<
1748 sbi
->log_blocks_per_seg
) - 1;
1751 FDEV(i
).bdev
= blkdev_get_by_path(FDEV(i
).path
,
1752 sbi
->sb
->s_mode
, sbi
->sb
->s_type
);
1753 if (IS_ERR(FDEV(i
).bdev
))
1754 return PTR_ERR(FDEV(i
).bdev
);
1756 /* to release errored devices */
1757 sbi
->s_ndevs
= i
+ 1;
1759 #ifdef CONFIG_BLK_DEV_ZONED
1760 if (bdev_zoned_model(FDEV(i
).bdev
) == BLK_ZONED_HM
&&
1761 !f2fs_sb_mounted_blkzoned(sbi
->sb
)) {
1762 f2fs_msg(sbi
->sb
, KERN_ERR
,
1763 "Zoned block device feature not enabled\n");
1766 if (bdev_zoned_model(FDEV(i
).bdev
) != BLK_ZONED_NONE
) {
1767 if (init_blkz_info(sbi
, i
)) {
1768 f2fs_msg(sbi
->sb
, KERN_ERR
,
1769 "Failed to initialize F2FS blkzone information");
1772 f2fs_msg(sbi
->sb
, KERN_INFO
,
1773 "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
1775 FDEV(i
).total_segments
,
1776 FDEV(i
).start_blk
, FDEV(i
).end_blk
,
1777 bdev_zoned_model(FDEV(i
).bdev
) == BLK_ZONED_HA
?
1778 "Host-aware" : "Host-managed");
1782 f2fs_msg(sbi
->sb
, KERN_INFO
,
1783 "Mount Device [%2d]: %20s, %8u, %8x - %8x",
1785 FDEV(i
).total_segments
,
1786 FDEV(i
).start_blk
, FDEV(i
).end_blk
);
1788 f2fs_msg(sbi
->sb
, KERN_INFO
,
1789 "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi
));
1793 static int f2fs_fill_super(struct super_block
*sb
, void *data
, int silent
)
1795 struct f2fs_sb_info
*sbi
;
1796 struct f2fs_super_block
*raw_super
;
1799 bool retry
= true, need_fsck
= false;
1800 char *options
= NULL
;
1801 int recovery
, i
, valid_super_block
;
1802 struct curseg_info
*seg_i
;
1807 valid_super_block
= -1;
1810 /* allocate memory for f2fs-specific super block info */
1811 sbi
= kzalloc(sizeof(struct f2fs_sb_info
), GFP_KERNEL
);
1817 /* Load the checksum driver */
1818 sbi
->s_chksum_driver
= crypto_alloc_shash("crc32", 0, 0);
1819 if (IS_ERR(sbi
->s_chksum_driver
)) {
1820 f2fs_msg(sb
, KERN_ERR
, "Cannot load crc32 driver.");
1821 err
= PTR_ERR(sbi
->s_chksum_driver
);
1822 sbi
->s_chksum_driver
= NULL
;
1826 /* set a block size */
1827 if (unlikely(!sb_set_blocksize(sb
, F2FS_BLKSIZE
))) {
1828 f2fs_msg(sb
, KERN_ERR
, "unable to set blocksize");
1832 err
= read_raw_super_block(sbi
, &raw_super
, &valid_super_block
,
1837 sb
->s_fs_info
= sbi
;
1838 sbi
->raw_super
= raw_super
;
1841 * The BLKZONED feature indicates that the drive was formatted with
1842 * zone alignment optimization. This is optional for host-aware
1843 * devices, but mandatory for host-managed zoned block devices.
1845 #ifndef CONFIG_BLK_DEV_ZONED
1846 if (f2fs_sb_mounted_blkzoned(sb
)) {
1847 f2fs_msg(sb
, KERN_ERR
,
1848 "Zoned block device support is not enabled\n");
1852 default_options(sbi
);
1853 /* parse mount options */
1854 options
= kstrdup((const char *)data
, GFP_KERNEL
);
1855 if (data
&& !options
) {
1860 err
= parse_options(sb
, options
);
1864 sbi
->max_file_blocks
= max_file_blocks();
1865 sb
->s_maxbytes
= sbi
->max_file_blocks
<<
1866 le32_to_cpu(raw_super
->log_blocksize
);
1867 sb
->s_max_links
= F2FS_LINK_MAX
;
1868 get_random_bytes(&sbi
->s_next_generation
, sizeof(u32
));
1870 sb
->s_op
= &f2fs_sops
;
1871 sb
->s_cop
= &f2fs_cryptops
;
1872 sb
->s_xattr
= f2fs_xattr_handlers
;
1873 sb
->s_export_op
= &f2fs_export_ops
;
1874 sb
->s_magic
= F2FS_SUPER_MAGIC
;
1875 sb
->s_time_gran
= 1;
1876 sb
->s_flags
= (sb
->s_flags
& ~MS_POSIXACL
) |
1877 (test_opt(sbi
, POSIX_ACL
) ? MS_POSIXACL
: 0);
1878 memcpy(sb
->s_uuid
, raw_super
->uuid
, sizeof(raw_super
->uuid
));
1880 /* init f2fs-specific super block info */
1881 sbi
->valid_super_block
= valid_super_block
;
1882 mutex_init(&sbi
->gc_mutex
);
1883 mutex_init(&sbi
->cp_mutex
);
1884 init_rwsem(&sbi
->node_write
);
1886 /* disallow all the data/node/meta page writes */
1887 set_sbi_flag(sbi
, SBI_POR_DOING
);
1888 spin_lock_init(&sbi
->stat_lock
);
1890 init_rwsem(&sbi
->read_io
.io_rwsem
);
1891 sbi
->read_io
.sbi
= sbi
;
1892 sbi
->read_io
.bio
= NULL
;
1893 for (i
= 0; i
< NR_PAGE_TYPE
; i
++) {
1894 init_rwsem(&sbi
->write_io
[i
].io_rwsem
);
1895 sbi
->write_io
[i
].sbi
= sbi
;
1896 sbi
->write_io
[i
].bio
= NULL
;
1899 init_rwsem(&sbi
->cp_rwsem
);
1900 init_waitqueue_head(&sbi
->cp_wait
);
1903 err
= init_percpu_info(sbi
);
1907 if (F2FS_IO_SIZE(sbi
) > 1) {
1908 sbi
->write_io_dummy
=
1909 mempool_create_page_pool(F2FS_IO_SIZE(sbi
) - 1, 0);
1910 if (!sbi
->write_io_dummy
)
1914 /* get an inode for meta space */
1915 sbi
->meta_inode
= f2fs_iget(sb
, F2FS_META_INO(sbi
));
1916 if (IS_ERR(sbi
->meta_inode
)) {
1917 f2fs_msg(sb
, KERN_ERR
, "Failed to read F2FS meta data inode");
1918 err
= PTR_ERR(sbi
->meta_inode
);
1922 err
= get_valid_checkpoint(sbi
);
1924 f2fs_msg(sb
, KERN_ERR
, "Failed to get valid F2FS checkpoint");
1925 goto free_meta_inode
;
1928 /* Initialize device list */
1929 err
= f2fs_scan_devices(sbi
);
1931 f2fs_msg(sb
, KERN_ERR
, "Failed to find devices");
1935 sbi
->total_valid_node_count
=
1936 le32_to_cpu(sbi
->ckpt
->valid_node_count
);
1937 percpu_counter_set(&sbi
->total_valid_inode_count
,
1938 le32_to_cpu(sbi
->ckpt
->valid_inode_count
));
1939 sbi
->user_block_count
= le64_to_cpu(sbi
->ckpt
->user_block_count
);
1940 sbi
->total_valid_block_count
=
1941 le64_to_cpu(sbi
->ckpt
->valid_block_count
);
1942 sbi
->last_valid_block_count
= sbi
->total_valid_block_count
;
1944 for (i
= 0; i
< NR_INODE_TYPE
; i
++) {
1945 INIT_LIST_HEAD(&sbi
->inode_list
[i
]);
1946 spin_lock_init(&sbi
->inode_lock
[i
]);
1949 init_extent_cache_info(sbi
);
1951 init_ino_entry_info(sbi
);
1953 /* setup f2fs internal modules */
1954 err
= build_segment_manager(sbi
);
1956 f2fs_msg(sb
, KERN_ERR
,
1957 "Failed to initialize F2FS segment manager");
1960 err
= build_node_manager(sbi
);
1962 f2fs_msg(sb
, KERN_ERR
,
1963 "Failed to initialize F2FS node manager");
1967 /* For write statistics */
1968 if (sb
->s_bdev
->bd_part
)
1969 sbi
->sectors_written_start
=
1970 (u64
)part_stat_read(sb
->s_bdev
->bd_part
, sectors
[1]);
1972 /* Read accumulated write IO statistics if exists */
1973 seg_i
= CURSEG_I(sbi
, CURSEG_HOT_NODE
);
1974 if (__exist_node_summaries(sbi
))
1975 sbi
->kbytes_written
=
1976 le64_to_cpu(seg_i
->journal
->info
.kbytes_written
);
1978 build_gc_manager(sbi
);
1980 /* get an inode for node space */
1981 sbi
->node_inode
= f2fs_iget(sb
, F2FS_NODE_INO(sbi
));
1982 if (IS_ERR(sbi
->node_inode
)) {
1983 f2fs_msg(sb
, KERN_ERR
, "Failed to read node inode");
1984 err
= PTR_ERR(sbi
->node_inode
);
1988 f2fs_join_shrinker(sbi
);
1990 /* if there are nt orphan nodes free them */
1991 err
= recover_orphan_inodes(sbi
);
1993 goto free_node_inode
;
1995 /* read root inode and dentry */
1996 root
= f2fs_iget(sb
, F2FS_ROOT_INO(sbi
));
1998 f2fs_msg(sb
, KERN_ERR
, "Failed to read root inode");
1999 err
= PTR_ERR(root
);
2000 goto free_node_inode
;
2002 if (!S_ISDIR(root
->i_mode
) || !root
->i_blocks
|| !root
->i_size
) {
2005 goto free_node_inode
;
2008 sb
->s_root
= d_make_root(root
); /* allocate root dentry */
2011 goto free_root_inode
;
2014 err
= f2fs_build_stats(sbi
);
2016 goto free_root_inode
;
2019 sbi
->s_proc
= proc_mkdir(sb
->s_id
, f2fs_proc_root
);
2022 proc_create_data("segment_info", S_IRUGO
, sbi
->s_proc
,
2023 &f2fs_seq_segment_info_fops
, sb
);
2024 proc_create_data("segment_bits", S_IRUGO
, sbi
->s_proc
,
2025 &f2fs_seq_segment_bits_fops
, sb
);
2028 sbi
->s_kobj
.kset
= f2fs_kset
;
2029 init_completion(&sbi
->s_kobj_unregister
);
2030 err
= kobject_init_and_add(&sbi
->s_kobj
, &f2fs_ktype
, NULL
,
2035 /* recover fsynced data */
2036 if (!test_opt(sbi
, DISABLE_ROLL_FORWARD
)) {
2038 * mount should be failed, when device has readonly mode, and
2039 * previous checkpoint was not done by clean system shutdown.
2041 if (bdev_read_only(sb
->s_bdev
) &&
2042 !is_set_ckpt_flags(sbi
, CP_UMOUNT_FLAG
)) {
2048 set_sbi_flag(sbi
, SBI_NEED_FSCK
);
2053 err
= recover_fsync_data(sbi
, false);
2056 f2fs_msg(sb
, KERN_ERR
,
2057 "Cannot recover all fsync data errno=%d", err
);
2061 err
= recover_fsync_data(sbi
, true);
2063 if (!f2fs_readonly(sb
) && err
> 0) {
2065 f2fs_msg(sb
, KERN_ERR
,
2066 "Need to recover fsync data");
2071 /* recover_fsync_data() cleared this already */
2072 clear_sbi_flag(sbi
, SBI_POR_DOING
);
2075 * If filesystem is not mounted as read-only then
2076 * do start the gc_thread.
2078 if (test_opt(sbi
, BG_GC
) && !f2fs_readonly(sb
)) {
2079 /* After POR, we can run background GC thread.*/
2080 err
= start_gc_thread(sbi
);
2086 /* recover broken superblock */
2088 err
= f2fs_commit_super(sbi
, true);
2089 f2fs_msg(sb
, KERN_INFO
,
2090 "Try to recover %dth superblock, ret: %d",
2091 sbi
->valid_super_block
? 1 : 2, err
);
2094 f2fs_update_time(sbi
, CP_TIME
);
2095 f2fs_update_time(sbi
, REQ_TIME
);
2099 f2fs_sync_inode_meta(sbi
);
2100 kobject_del(&sbi
->s_kobj
);
2101 kobject_put(&sbi
->s_kobj
);
2102 wait_for_completion(&sbi
->s_kobj_unregister
);
2105 remove_proc_entry("segment_info", sbi
->s_proc
);
2106 remove_proc_entry("segment_bits", sbi
->s_proc
);
2107 remove_proc_entry(sb
->s_id
, f2fs_proc_root
);
2109 f2fs_destroy_stats(sbi
);
2114 truncate_inode_pages_final(NODE_MAPPING(sbi
));
2115 mutex_lock(&sbi
->umount_mutex
);
2116 release_ino_entry(sbi
, true);
2117 f2fs_leave_shrinker(sbi
);
2119 * Some dirty meta pages can be produced by recover_orphan_inodes()
2120 * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
2121 * followed by write_checkpoint() through f2fs_write_node_pages(), which
2122 * falls into an infinite loop in sync_meta_pages().
2124 truncate_inode_pages_final(META_MAPPING(sbi
));
2125 iput(sbi
->node_inode
);
2126 mutex_unlock(&sbi
->umount_mutex
);
2128 destroy_node_manager(sbi
);
2130 destroy_segment_manager(sbi
);
2132 destroy_device_list(sbi
);
2135 make_bad_inode(sbi
->meta_inode
);
2136 iput(sbi
->meta_inode
);
2138 mempool_destroy(sbi
->write_io_dummy
);
2140 destroy_percpu_info(sbi
);
2145 if (sbi
->s_chksum_driver
)
2146 crypto_free_shash(sbi
->s_chksum_driver
);
2149 /* give only one another chance */
2152 shrink_dcache_sb(sb
);
2158 static struct dentry
*f2fs_mount(struct file_system_type
*fs_type
, int flags
,
2159 const char *dev_name
, void *data
)
2161 return mount_bdev(fs_type
, flags
, dev_name
, data
, f2fs_fill_super
);
2164 static void kill_f2fs_super(struct super_block
*sb
)
2167 set_sbi_flag(F2FS_SB(sb
), SBI_IS_CLOSE
);
2168 kill_block_super(sb
);
2171 static struct file_system_type f2fs_fs_type
= {
2172 .owner
= THIS_MODULE
,
2174 .mount
= f2fs_mount
,
2175 .kill_sb
= kill_f2fs_super
,
2176 .fs_flags
= FS_REQUIRES_DEV
,
2178 MODULE_ALIAS_FS("f2fs");
2180 static int __init
init_inodecache(void)
2182 f2fs_inode_cachep
= kmem_cache_create("f2fs_inode_cache",
2183 sizeof(struct f2fs_inode_info
), 0,
2184 SLAB_RECLAIM_ACCOUNT
|SLAB_ACCOUNT
, NULL
);
2185 if (!f2fs_inode_cachep
)
2190 static void destroy_inodecache(void)
2193 * Make sure all delayed rcu free inodes are flushed before we
2197 kmem_cache_destroy(f2fs_inode_cachep
);
2200 static int __init
init_f2fs_fs(void)
2204 f2fs_build_trace_ios();
2206 err
= init_inodecache();
2209 err
= create_node_manager_caches();
2211 goto free_inodecache
;
2212 err
= create_segment_manager_caches();
2214 goto free_node_manager_caches
;
2215 err
= create_checkpoint_caches();
2217 goto free_segment_manager_caches
;
2218 err
= create_extent_cache();
2220 goto free_checkpoint_caches
;
2221 f2fs_kset
= kset_create_and_add("f2fs", NULL
, fs_kobj
);
2224 goto free_extent_cache
;
2226 err
= register_shrinker(&f2fs_shrinker_info
);
2230 err
= register_filesystem(&f2fs_fs_type
);
2233 err
= f2fs_create_root_stats();
2235 goto free_filesystem
;
2236 f2fs_proc_root
= proc_mkdir("fs/f2fs", NULL
);
2240 unregister_filesystem(&f2fs_fs_type
);
2242 unregister_shrinker(&f2fs_shrinker_info
);
2244 kset_unregister(f2fs_kset
);
2246 destroy_extent_cache();
2247 free_checkpoint_caches
:
2248 destroy_checkpoint_caches();
2249 free_segment_manager_caches
:
2250 destroy_segment_manager_caches();
2251 free_node_manager_caches
:
2252 destroy_node_manager_caches();
2254 destroy_inodecache();
2259 static void __exit
exit_f2fs_fs(void)
2261 remove_proc_entry("fs/f2fs", NULL
);
2262 f2fs_destroy_root_stats();
2263 unregister_filesystem(&f2fs_fs_type
);
2264 unregister_shrinker(&f2fs_shrinker_info
);
2265 kset_unregister(f2fs_kset
);
2266 destroy_extent_cache();
2267 destroy_checkpoint_caches();
2268 destroy_segment_manager_caches();
2269 destroy_node_manager_caches();
2270 destroy_inodecache();
2271 f2fs_destroy_trace_ios();
2274 module_init(init_f2fs_fs
)
2275 module_exit(exit_f2fs_fs
)
2277 MODULE_AUTHOR("Samsung Electronics's Praesto Team");
2278 MODULE_DESCRIPTION("Flash Friendly File System");
2279 MODULE_LICENSE("GPL");