]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - fs/f2fs/super.c
f2fs: get io size bit from mount option
[mirror_ubuntu-bionic-kernel.git] / fs / f2fs / super.c
1 /*
2 * fs/f2fs/super.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/statfs.h>
15 #include <linux/buffer_head.h>
16 #include <linux/backing-dev.h>
17 #include <linux/kthread.h>
18 #include <linux/parser.h>
19 #include <linux/mount.h>
20 #include <linux/seq_file.h>
21 #include <linux/proc_fs.h>
22 #include <linux/random.h>
23 #include <linux/exportfs.h>
24 #include <linux/blkdev.h>
25 #include <linux/f2fs_fs.h>
26 #include <linux/sysfs.h>
27
28 #include "f2fs.h"
29 #include "node.h"
30 #include "segment.h"
31 #include "xattr.h"
32 #include "gc.h"
33 #include "trace.h"
34
35 #define CREATE_TRACE_POINTS
36 #include <trace/events/f2fs.h>
37
38 static struct proc_dir_entry *f2fs_proc_root;
39 static struct kmem_cache *f2fs_inode_cachep;
40 static struct kset *f2fs_kset;
41
42 #ifdef CONFIG_F2FS_FAULT_INJECTION
43
44 char *fault_name[FAULT_MAX] = {
45 [FAULT_KMALLOC] = "kmalloc",
46 [FAULT_PAGE_ALLOC] = "page alloc",
47 [FAULT_ALLOC_NID] = "alloc nid",
48 [FAULT_ORPHAN] = "orphan",
49 [FAULT_BLOCK] = "no more block",
50 [FAULT_DIR_DEPTH] = "too big dir depth",
51 [FAULT_EVICT_INODE] = "evict_inode fail",
52 [FAULT_IO] = "IO error",
53 [FAULT_CHECKPOINT] = "checkpoint error",
54 };
55
56 static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
57 unsigned int rate)
58 {
59 struct f2fs_fault_info *ffi = &sbi->fault_info;
60
61 if (rate) {
62 atomic_set(&ffi->inject_ops, 0);
63 ffi->inject_rate = rate;
64 ffi->inject_type = (1 << FAULT_MAX) - 1;
65 } else {
66 memset(ffi, 0, sizeof(struct f2fs_fault_info));
67 }
68 }
69 #endif
70
71 /* f2fs-wide shrinker description */
72 static struct shrinker f2fs_shrinker_info = {
73 .scan_objects = f2fs_shrink_scan,
74 .count_objects = f2fs_shrink_count,
75 .seeks = DEFAULT_SEEKS,
76 };
77
78 enum {
79 Opt_gc_background,
80 Opt_disable_roll_forward,
81 Opt_norecovery,
82 Opt_discard,
83 Opt_nodiscard,
84 Opt_noheap,
85 Opt_user_xattr,
86 Opt_nouser_xattr,
87 Opt_acl,
88 Opt_noacl,
89 Opt_active_logs,
90 Opt_disable_ext_identify,
91 Opt_inline_xattr,
92 Opt_inline_data,
93 Opt_inline_dentry,
94 Opt_noinline_dentry,
95 Opt_flush_merge,
96 Opt_noflush_merge,
97 Opt_nobarrier,
98 Opt_fastboot,
99 Opt_extent_cache,
100 Opt_noextent_cache,
101 Opt_noinline_data,
102 Opt_data_flush,
103 Opt_mode,
104 Opt_io_size_bits,
105 Opt_fault_injection,
106 Opt_lazytime,
107 Opt_nolazytime,
108 Opt_err,
109 };
110
111 static match_table_t f2fs_tokens = {
112 {Opt_gc_background, "background_gc=%s"},
113 {Opt_disable_roll_forward, "disable_roll_forward"},
114 {Opt_norecovery, "norecovery"},
115 {Opt_discard, "discard"},
116 {Opt_nodiscard, "nodiscard"},
117 {Opt_noheap, "no_heap"},
118 {Opt_user_xattr, "user_xattr"},
119 {Opt_nouser_xattr, "nouser_xattr"},
120 {Opt_acl, "acl"},
121 {Opt_noacl, "noacl"},
122 {Opt_active_logs, "active_logs=%u"},
123 {Opt_disable_ext_identify, "disable_ext_identify"},
124 {Opt_inline_xattr, "inline_xattr"},
125 {Opt_inline_data, "inline_data"},
126 {Opt_inline_dentry, "inline_dentry"},
127 {Opt_noinline_dentry, "noinline_dentry"},
128 {Opt_flush_merge, "flush_merge"},
129 {Opt_noflush_merge, "noflush_merge"},
130 {Opt_nobarrier, "nobarrier"},
131 {Opt_fastboot, "fastboot"},
132 {Opt_extent_cache, "extent_cache"},
133 {Opt_noextent_cache, "noextent_cache"},
134 {Opt_noinline_data, "noinline_data"},
135 {Opt_data_flush, "data_flush"},
136 {Opt_mode, "mode=%s"},
137 {Opt_io_size_bits, "io_bits=%u"},
138 {Opt_fault_injection, "fault_injection=%u"},
139 {Opt_lazytime, "lazytime"},
140 {Opt_nolazytime, "nolazytime"},
141 {Opt_err, NULL},
142 };
143
144 /* Sysfs support for f2fs */
145 enum {
146 GC_THREAD, /* struct f2fs_gc_thread */
147 SM_INFO, /* struct f2fs_sm_info */
148 NM_INFO, /* struct f2fs_nm_info */
149 F2FS_SBI, /* struct f2fs_sb_info */
150 #ifdef CONFIG_F2FS_FAULT_INJECTION
151 FAULT_INFO_RATE, /* struct f2fs_fault_info */
152 FAULT_INFO_TYPE, /* struct f2fs_fault_info */
153 #endif
154 };
155
156 struct f2fs_attr {
157 struct attribute attr;
158 ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
159 ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
160 const char *, size_t);
161 int struct_type;
162 int offset;
163 };
164
165 static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
166 {
167 if (struct_type == GC_THREAD)
168 return (unsigned char *)sbi->gc_thread;
169 else if (struct_type == SM_INFO)
170 return (unsigned char *)SM_I(sbi);
171 else if (struct_type == NM_INFO)
172 return (unsigned char *)NM_I(sbi);
173 else if (struct_type == F2FS_SBI)
174 return (unsigned char *)sbi;
175 #ifdef CONFIG_F2FS_FAULT_INJECTION
176 else if (struct_type == FAULT_INFO_RATE ||
177 struct_type == FAULT_INFO_TYPE)
178 return (unsigned char *)&sbi->fault_info;
179 #endif
180 return NULL;
181 }
182
183 static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
184 struct f2fs_sb_info *sbi, char *buf)
185 {
186 struct super_block *sb = sbi->sb;
187
188 if (!sb->s_bdev->bd_part)
189 return snprintf(buf, PAGE_SIZE, "0\n");
190
191 return snprintf(buf, PAGE_SIZE, "%llu\n",
192 (unsigned long long)(sbi->kbytes_written +
193 BD_PART_WRITTEN(sbi)));
194 }
195
196 static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
197 struct f2fs_sb_info *sbi, char *buf)
198 {
199 unsigned char *ptr = NULL;
200 unsigned int *ui;
201
202 ptr = __struct_ptr(sbi, a->struct_type);
203 if (!ptr)
204 return -EINVAL;
205
206 ui = (unsigned int *)(ptr + a->offset);
207
208 return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
209 }
210
211 static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
212 struct f2fs_sb_info *sbi,
213 const char *buf, size_t count)
214 {
215 unsigned char *ptr;
216 unsigned long t;
217 unsigned int *ui;
218 ssize_t ret;
219
220 ptr = __struct_ptr(sbi, a->struct_type);
221 if (!ptr)
222 return -EINVAL;
223
224 ui = (unsigned int *)(ptr + a->offset);
225
226 ret = kstrtoul(skip_spaces(buf), 0, &t);
227 if (ret < 0)
228 return ret;
229 #ifdef CONFIG_F2FS_FAULT_INJECTION
230 if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
231 return -EINVAL;
232 #endif
233 *ui = t;
234 return count;
235 }
236
237 static ssize_t f2fs_attr_show(struct kobject *kobj,
238 struct attribute *attr, char *buf)
239 {
240 struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
241 s_kobj);
242 struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
243
244 return a->show ? a->show(a, sbi, buf) : 0;
245 }
246
247 static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
248 const char *buf, size_t len)
249 {
250 struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
251 s_kobj);
252 struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
253
254 return a->store ? a->store(a, sbi, buf, len) : 0;
255 }
256
257 static void f2fs_sb_release(struct kobject *kobj)
258 {
259 struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
260 s_kobj);
261 complete(&sbi->s_kobj_unregister);
262 }
263
264 #define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
265 static struct f2fs_attr f2fs_attr_##_name = { \
266 .attr = {.name = __stringify(_name), .mode = _mode }, \
267 .show = _show, \
268 .store = _store, \
269 .struct_type = _struct_type, \
270 .offset = _offset \
271 }
272
273 #define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
274 F2FS_ATTR_OFFSET(struct_type, name, 0644, \
275 f2fs_sbi_show, f2fs_sbi_store, \
276 offsetof(struct struct_name, elname))
277
278 #define F2FS_GENERAL_RO_ATTR(name) \
279 static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
280
281 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
282 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
283 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
284 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
285 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
286 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
287 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
288 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
289 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
290 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
291 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
292 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
293 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
294 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
295 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
296 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
297 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
298 #ifdef CONFIG_F2FS_FAULT_INJECTION
299 F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
300 F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
301 #endif
302 F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
303
304 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
305 static struct attribute *f2fs_attrs[] = {
306 ATTR_LIST(gc_min_sleep_time),
307 ATTR_LIST(gc_max_sleep_time),
308 ATTR_LIST(gc_no_gc_sleep_time),
309 ATTR_LIST(gc_idle),
310 ATTR_LIST(reclaim_segments),
311 ATTR_LIST(max_small_discards),
312 ATTR_LIST(batched_trim_sections),
313 ATTR_LIST(ipu_policy),
314 ATTR_LIST(min_ipu_util),
315 ATTR_LIST(min_fsync_blocks),
316 ATTR_LIST(max_victim_search),
317 ATTR_LIST(dir_level),
318 ATTR_LIST(ram_thresh),
319 ATTR_LIST(ra_nid_pages),
320 ATTR_LIST(dirty_nats_ratio),
321 ATTR_LIST(cp_interval),
322 ATTR_LIST(idle_interval),
323 #ifdef CONFIG_F2FS_FAULT_INJECTION
324 ATTR_LIST(inject_rate),
325 ATTR_LIST(inject_type),
326 #endif
327 ATTR_LIST(lifetime_write_kbytes),
328 NULL,
329 };
330
331 static const struct sysfs_ops f2fs_attr_ops = {
332 .show = f2fs_attr_show,
333 .store = f2fs_attr_store,
334 };
335
336 static struct kobj_type f2fs_ktype = {
337 .default_attrs = f2fs_attrs,
338 .sysfs_ops = &f2fs_attr_ops,
339 .release = f2fs_sb_release,
340 };
341
342 void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
343 {
344 struct va_format vaf;
345 va_list args;
346
347 va_start(args, fmt);
348 vaf.fmt = fmt;
349 vaf.va = &args;
350 printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
351 va_end(args);
352 }
353
354 static void init_once(void *foo)
355 {
356 struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
357
358 inode_init_once(&fi->vfs_inode);
359 }
360
361 static int parse_options(struct super_block *sb, char *options)
362 {
363 struct f2fs_sb_info *sbi = F2FS_SB(sb);
364 struct request_queue *q;
365 substring_t args[MAX_OPT_ARGS];
366 char *p, *name;
367 int arg = 0;
368
369 if (!options)
370 return 0;
371
372 while ((p = strsep(&options, ",")) != NULL) {
373 int token;
374 if (!*p)
375 continue;
376 /*
377 * Initialize args struct so we know whether arg was
378 * found; some options take optional arguments.
379 */
380 args[0].to = args[0].from = NULL;
381 token = match_token(p, f2fs_tokens, args);
382
383 switch (token) {
384 case Opt_gc_background:
385 name = match_strdup(&args[0]);
386
387 if (!name)
388 return -ENOMEM;
389 if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
390 set_opt(sbi, BG_GC);
391 clear_opt(sbi, FORCE_FG_GC);
392 } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
393 clear_opt(sbi, BG_GC);
394 clear_opt(sbi, FORCE_FG_GC);
395 } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
396 set_opt(sbi, BG_GC);
397 set_opt(sbi, FORCE_FG_GC);
398 } else {
399 kfree(name);
400 return -EINVAL;
401 }
402 kfree(name);
403 break;
404 case Opt_disable_roll_forward:
405 set_opt(sbi, DISABLE_ROLL_FORWARD);
406 break;
407 case Opt_norecovery:
408 /* this option mounts f2fs with ro */
409 set_opt(sbi, DISABLE_ROLL_FORWARD);
410 if (!f2fs_readonly(sb))
411 return -EINVAL;
412 break;
413 case Opt_discard:
414 q = bdev_get_queue(sb->s_bdev);
415 if (blk_queue_discard(q)) {
416 set_opt(sbi, DISCARD);
417 } else if (!f2fs_sb_mounted_blkzoned(sb)) {
418 f2fs_msg(sb, KERN_WARNING,
419 "mounting with \"discard\" option, but "
420 "the device does not support discard");
421 }
422 break;
423 case Opt_nodiscard:
424 if (f2fs_sb_mounted_blkzoned(sb)) {
425 f2fs_msg(sb, KERN_WARNING,
426 "discard is required for zoned block devices");
427 return -EINVAL;
428 }
429 clear_opt(sbi, DISCARD);
430 break;
431 case Opt_noheap:
432 set_opt(sbi, NOHEAP);
433 break;
434 #ifdef CONFIG_F2FS_FS_XATTR
435 case Opt_user_xattr:
436 set_opt(sbi, XATTR_USER);
437 break;
438 case Opt_nouser_xattr:
439 clear_opt(sbi, XATTR_USER);
440 break;
441 case Opt_inline_xattr:
442 set_opt(sbi, INLINE_XATTR);
443 break;
444 #else
445 case Opt_user_xattr:
446 f2fs_msg(sb, KERN_INFO,
447 "user_xattr options not supported");
448 break;
449 case Opt_nouser_xattr:
450 f2fs_msg(sb, KERN_INFO,
451 "nouser_xattr options not supported");
452 break;
453 case Opt_inline_xattr:
454 f2fs_msg(sb, KERN_INFO,
455 "inline_xattr options not supported");
456 break;
457 #endif
458 #ifdef CONFIG_F2FS_FS_POSIX_ACL
459 case Opt_acl:
460 set_opt(sbi, POSIX_ACL);
461 break;
462 case Opt_noacl:
463 clear_opt(sbi, POSIX_ACL);
464 break;
465 #else
466 case Opt_acl:
467 f2fs_msg(sb, KERN_INFO, "acl options not supported");
468 break;
469 case Opt_noacl:
470 f2fs_msg(sb, KERN_INFO, "noacl options not supported");
471 break;
472 #endif
473 case Opt_active_logs:
474 if (args->from && match_int(args, &arg))
475 return -EINVAL;
476 if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
477 return -EINVAL;
478 sbi->active_logs = arg;
479 break;
480 case Opt_disable_ext_identify:
481 set_opt(sbi, DISABLE_EXT_IDENTIFY);
482 break;
483 case Opt_inline_data:
484 set_opt(sbi, INLINE_DATA);
485 break;
486 case Opt_inline_dentry:
487 set_opt(sbi, INLINE_DENTRY);
488 break;
489 case Opt_noinline_dentry:
490 clear_opt(sbi, INLINE_DENTRY);
491 break;
492 case Opt_flush_merge:
493 set_opt(sbi, FLUSH_MERGE);
494 break;
495 case Opt_noflush_merge:
496 clear_opt(sbi, FLUSH_MERGE);
497 break;
498 case Opt_nobarrier:
499 set_opt(sbi, NOBARRIER);
500 break;
501 case Opt_fastboot:
502 set_opt(sbi, FASTBOOT);
503 break;
504 case Opt_extent_cache:
505 set_opt(sbi, EXTENT_CACHE);
506 break;
507 case Opt_noextent_cache:
508 clear_opt(sbi, EXTENT_CACHE);
509 break;
510 case Opt_noinline_data:
511 clear_opt(sbi, INLINE_DATA);
512 break;
513 case Opt_data_flush:
514 set_opt(sbi, DATA_FLUSH);
515 break;
516 case Opt_mode:
517 name = match_strdup(&args[0]);
518
519 if (!name)
520 return -ENOMEM;
521 if (strlen(name) == 8 &&
522 !strncmp(name, "adaptive", 8)) {
523 if (f2fs_sb_mounted_blkzoned(sb)) {
524 f2fs_msg(sb, KERN_WARNING,
525 "adaptive mode is not allowed with "
526 "zoned block device feature");
527 kfree(name);
528 return -EINVAL;
529 }
530 set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
531 } else if (strlen(name) == 3 &&
532 !strncmp(name, "lfs", 3)) {
533 set_opt_mode(sbi, F2FS_MOUNT_LFS);
534 } else {
535 kfree(name);
536 return -EINVAL;
537 }
538 kfree(name);
539 break;
540 case Opt_io_size_bits:
541 if (args->from && match_int(args, &arg))
542 return -EINVAL;
543 if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
544 f2fs_msg(sb, KERN_WARNING,
545 "Not support %d, larger than %d",
546 1 << arg, BIO_MAX_PAGES);
547 return -EINVAL;
548 }
549 sbi->write_io_size_bits = arg;
550 break;
551 case Opt_fault_injection:
552 if (args->from && match_int(args, &arg))
553 return -EINVAL;
554 #ifdef CONFIG_F2FS_FAULT_INJECTION
555 f2fs_build_fault_attr(sbi, arg);
556 #else
557 f2fs_msg(sb, KERN_INFO,
558 "FAULT_INJECTION was not selected");
559 #endif
560 break;
561 case Opt_lazytime:
562 sb->s_flags |= MS_LAZYTIME;
563 break;
564 case Opt_nolazytime:
565 sb->s_flags &= ~MS_LAZYTIME;
566 break;
567 default:
568 f2fs_msg(sb, KERN_ERR,
569 "Unrecognized mount option \"%s\" or missing value",
570 p);
571 return -EINVAL;
572 }
573 }
574
575 if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
576 f2fs_msg(sb, KERN_ERR,
577 "Should set mode=lfs with %uKB-sized IO",
578 F2FS_IO_SIZE_KB(sbi));
579 return -EINVAL;
580 }
581 return 0;
582 }
583
584 static struct inode *f2fs_alloc_inode(struct super_block *sb)
585 {
586 struct f2fs_inode_info *fi;
587
588 fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
589 if (!fi)
590 return NULL;
591
592 init_once((void *) fi);
593
594 /* Initialize f2fs-specific inode info */
595 fi->vfs_inode.i_version = 1;
596 atomic_set(&fi->dirty_pages, 0);
597 fi->i_current_depth = 1;
598 fi->i_advise = 0;
599 init_rwsem(&fi->i_sem);
600 INIT_LIST_HEAD(&fi->dirty_list);
601 INIT_LIST_HEAD(&fi->gdirty_list);
602 INIT_LIST_HEAD(&fi->inmem_pages);
603 mutex_init(&fi->inmem_lock);
604 init_rwsem(&fi->dio_rwsem[READ]);
605 init_rwsem(&fi->dio_rwsem[WRITE]);
606
607 /* Will be used by directory only */
608 fi->i_dir_level = F2FS_SB(sb)->dir_level;
609 return &fi->vfs_inode;
610 }
611
612 static int f2fs_drop_inode(struct inode *inode)
613 {
614 /*
615 * This is to avoid a deadlock condition like below.
616 * writeback_single_inode(inode)
617 * - f2fs_write_data_page
618 * - f2fs_gc -> iput -> evict
619 * - inode_wait_for_writeback(inode)
620 */
621 if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) {
622 if (!inode->i_nlink && !is_bad_inode(inode)) {
623 /* to avoid evict_inode call simultaneously */
624 atomic_inc(&inode->i_count);
625 spin_unlock(&inode->i_lock);
626
627 /* some remained atomic pages should discarded */
628 if (f2fs_is_atomic_file(inode))
629 drop_inmem_pages(inode);
630
631 /* should remain fi->extent_tree for writepage */
632 f2fs_destroy_extent_node(inode);
633
634 sb_start_intwrite(inode->i_sb);
635 f2fs_i_size_write(inode, 0);
636
637 if (F2FS_HAS_BLOCKS(inode))
638 f2fs_truncate(inode);
639
640 sb_end_intwrite(inode->i_sb);
641
642 fscrypt_put_encryption_info(inode, NULL);
643 spin_lock(&inode->i_lock);
644 atomic_dec(&inode->i_count);
645 }
646 return 0;
647 }
648
649 return generic_drop_inode(inode);
650 }
651
652 int f2fs_inode_dirtied(struct inode *inode, bool sync)
653 {
654 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
655 int ret = 0;
656
657 spin_lock(&sbi->inode_lock[DIRTY_META]);
658 if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
659 ret = 1;
660 } else {
661 set_inode_flag(inode, FI_DIRTY_INODE);
662 stat_inc_dirty_inode(sbi, DIRTY_META);
663 }
664 if (sync && list_empty(&F2FS_I(inode)->gdirty_list)) {
665 list_add_tail(&F2FS_I(inode)->gdirty_list,
666 &sbi->inode_list[DIRTY_META]);
667 inc_page_count(sbi, F2FS_DIRTY_IMETA);
668 }
669 spin_unlock(&sbi->inode_lock[DIRTY_META]);
670 return ret;
671 }
672
673 void f2fs_inode_synced(struct inode *inode)
674 {
675 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
676
677 spin_lock(&sbi->inode_lock[DIRTY_META]);
678 if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) {
679 spin_unlock(&sbi->inode_lock[DIRTY_META]);
680 return;
681 }
682 if (!list_empty(&F2FS_I(inode)->gdirty_list)) {
683 list_del_init(&F2FS_I(inode)->gdirty_list);
684 dec_page_count(sbi, F2FS_DIRTY_IMETA);
685 }
686 clear_inode_flag(inode, FI_DIRTY_INODE);
687 clear_inode_flag(inode, FI_AUTO_RECOVER);
688 stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META);
689 spin_unlock(&sbi->inode_lock[DIRTY_META]);
690 }
691
692 /*
693 * f2fs_dirty_inode() is called from __mark_inode_dirty()
694 *
695 * We should call set_dirty_inode to write the dirty inode through write_inode.
696 */
697 static void f2fs_dirty_inode(struct inode *inode, int flags)
698 {
699 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
700
701 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
702 inode->i_ino == F2FS_META_INO(sbi))
703 return;
704
705 if (flags == I_DIRTY_TIME)
706 return;
707
708 if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
709 clear_inode_flag(inode, FI_AUTO_RECOVER);
710
711 f2fs_inode_dirtied(inode, false);
712 }
713
714 static void f2fs_i_callback(struct rcu_head *head)
715 {
716 struct inode *inode = container_of(head, struct inode, i_rcu);
717 kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
718 }
719
720 static void f2fs_destroy_inode(struct inode *inode)
721 {
722 call_rcu(&inode->i_rcu, f2fs_i_callback);
723 }
724
725 static void destroy_percpu_info(struct f2fs_sb_info *sbi)
726 {
727 percpu_counter_destroy(&sbi->alloc_valid_block_count);
728 percpu_counter_destroy(&sbi->total_valid_inode_count);
729 }
730
731 static void destroy_device_list(struct f2fs_sb_info *sbi)
732 {
733 int i;
734
735 for (i = 0; i < sbi->s_ndevs; i++) {
736 blkdev_put(FDEV(i).bdev, FMODE_EXCL);
737 #ifdef CONFIG_BLK_DEV_ZONED
738 kfree(FDEV(i).blkz_type);
739 #endif
740 }
741 kfree(sbi->devs);
742 }
743
744 static void f2fs_put_super(struct super_block *sb)
745 {
746 struct f2fs_sb_info *sbi = F2FS_SB(sb);
747
748 if (sbi->s_proc) {
749 remove_proc_entry("segment_info", sbi->s_proc);
750 remove_proc_entry("segment_bits", sbi->s_proc);
751 remove_proc_entry(sb->s_id, f2fs_proc_root);
752 }
753 kobject_del(&sbi->s_kobj);
754
755 stop_gc_thread(sbi);
756
757 /* prevent remaining shrinker jobs */
758 mutex_lock(&sbi->umount_mutex);
759
760 /*
761 * We don't need to do checkpoint when superblock is clean.
762 * But, the previous checkpoint was not done by umount, it needs to do
763 * clean checkpoint again.
764 */
765 if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
766 !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
767 struct cp_control cpc = {
768 .reason = CP_UMOUNT,
769 };
770 write_checkpoint(sbi, &cpc);
771 }
772
773 /* write_checkpoint can update stat informaion */
774 f2fs_destroy_stats(sbi);
775
776 /*
777 * normally superblock is clean, so we need to release this.
778 * In addition, EIO will skip do checkpoint, we need this as well.
779 */
780 release_ino_entry(sbi, true);
781
782 f2fs_leave_shrinker(sbi);
783 mutex_unlock(&sbi->umount_mutex);
784
785 /* our cp_error case, we can wait for any writeback page */
786 f2fs_flush_merged_bios(sbi);
787
788 iput(sbi->node_inode);
789 iput(sbi->meta_inode);
790
791 /* destroy f2fs internal modules */
792 destroy_node_manager(sbi);
793 destroy_segment_manager(sbi);
794
795 kfree(sbi->ckpt);
796 kobject_put(&sbi->s_kobj);
797 wait_for_completion(&sbi->s_kobj_unregister);
798
799 sb->s_fs_info = NULL;
800 if (sbi->s_chksum_driver)
801 crypto_free_shash(sbi->s_chksum_driver);
802 kfree(sbi->raw_super);
803
804 destroy_device_list(sbi);
805
806 destroy_percpu_info(sbi);
807 kfree(sbi);
808 }
809
810 int f2fs_sync_fs(struct super_block *sb, int sync)
811 {
812 struct f2fs_sb_info *sbi = F2FS_SB(sb);
813 int err = 0;
814
815 trace_f2fs_sync_fs(sb, sync);
816
817 if (sync) {
818 struct cp_control cpc;
819
820 cpc.reason = __get_cp_reason(sbi);
821
822 mutex_lock(&sbi->gc_mutex);
823 err = write_checkpoint(sbi, &cpc);
824 mutex_unlock(&sbi->gc_mutex);
825 }
826 f2fs_trace_ios(NULL, 1);
827
828 return err;
829 }
830
831 static int f2fs_freeze(struct super_block *sb)
832 {
833 if (f2fs_readonly(sb))
834 return 0;
835
836 /* IO error happened before */
837 if (unlikely(f2fs_cp_error(F2FS_SB(sb))))
838 return -EIO;
839
840 /* must be clean, since sync_filesystem() was already called */
841 if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
842 return -EINVAL;
843 return 0;
844 }
845
846 static int f2fs_unfreeze(struct super_block *sb)
847 {
848 return 0;
849 }
850
851 static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
852 {
853 struct super_block *sb = dentry->d_sb;
854 struct f2fs_sb_info *sbi = F2FS_SB(sb);
855 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
856 block_t total_count, user_block_count, start_count, ovp_count;
857
858 total_count = le64_to_cpu(sbi->raw_super->block_count);
859 user_block_count = sbi->user_block_count;
860 start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
861 ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
862 buf->f_type = F2FS_SUPER_MAGIC;
863 buf->f_bsize = sbi->blocksize;
864
865 buf->f_blocks = total_count - start_count;
866 buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
867 buf->f_bavail = user_block_count - valid_user_blocks(sbi);
868
869 buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
870 buf->f_ffree = min(buf->f_files - valid_node_count(sbi),
871 buf->f_bavail);
872
873 buf->f_namelen = F2FS_NAME_LEN;
874 buf->f_fsid.val[0] = (u32)id;
875 buf->f_fsid.val[1] = (u32)(id >> 32);
876
877 return 0;
878 }
879
880 static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
881 {
882 struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
883
884 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) {
885 if (test_opt(sbi, FORCE_FG_GC))
886 seq_printf(seq, ",background_gc=%s", "sync");
887 else
888 seq_printf(seq, ",background_gc=%s", "on");
889 } else {
890 seq_printf(seq, ",background_gc=%s", "off");
891 }
892 if (test_opt(sbi, DISABLE_ROLL_FORWARD))
893 seq_puts(seq, ",disable_roll_forward");
894 if (test_opt(sbi, DISCARD))
895 seq_puts(seq, ",discard");
896 if (test_opt(sbi, NOHEAP))
897 seq_puts(seq, ",no_heap_alloc");
898 #ifdef CONFIG_F2FS_FS_XATTR
899 if (test_opt(sbi, XATTR_USER))
900 seq_puts(seq, ",user_xattr");
901 else
902 seq_puts(seq, ",nouser_xattr");
903 if (test_opt(sbi, INLINE_XATTR))
904 seq_puts(seq, ",inline_xattr");
905 #endif
906 #ifdef CONFIG_F2FS_FS_POSIX_ACL
907 if (test_opt(sbi, POSIX_ACL))
908 seq_puts(seq, ",acl");
909 else
910 seq_puts(seq, ",noacl");
911 #endif
912 if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
913 seq_puts(seq, ",disable_ext_identify");
914 if (test_opt(sbi, INLINE_DATA))
915 seq_puts(seq, ",inline_data");
916 else
917 seq_puts(seq, ",noinline_data");
918 if (test_opt(sbi, INLINE_DENTRY))
919 seq_puts(seq, ",inline_dentry");
920 else
921 seq_puts(seq, ",noinline_dentry");
922 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
923 seq_puts(seq, ",flush_merge");
924 if (test_opt(sbi, NOBARRIER))
925 seq_puts(seq, ",nobarrier");
926 if (test_opt(sbi, FASTBOOT))
927 seq_puts(seq, ",fastboot");
928 if (test_opt(sbi, EXTENT_CACHE))
929 seq_puts(seq, ",extent_cache");
930 else
931 seq_puts(seq, ",noextent_cache");
932 if (test_opt(sbi, DATA_FLUSH))
933 seq_puts(seq, ",data_flush");
934
935 seq_puts(seq, ",mode=");
936 if (test_opt(sbi, ADAPTIVE))
937 seq_puts(seq, "adaptive");
938 else if (test_opt(sbi, LFS))
939 seq_puts(seq, "lfs");
940 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
941 if (F2FS_IO_SIZE_BITS(sbi))
942 seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
943
944 return 0;
945 }
946
947 static int segment_info_seq_show(struct seq_file *seq, void *offset)
948 {
949 struct super_block *sb = seq->private;
950 struct f2fs_sb_info *sbi = F2FS_SB(sb);
951 unsigned int total_segs =
952 le32_to_cpu(sbi->raw_super->segment_count_main);
953 int i;
954
955 seq_puts(seq, "format: segment_type|valid_blocks\n"
956 "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
957
958 for (i = 0; i < total_segs; i++) {
959 struct seg_entry *se = get_seg_entry(sbi, i);
960
961 if ((i % 10) == 0)
962 seq_printf(seq, "%-10d", i);
963 seq_printf(seq, "%d|%-3u", se->type,
964 get_valid_blocks(sbi, i, 1));
965 if ((i % 10) == 9 || i == (total_segs - 1))
966 seq_putc(seq, '\n');
967 else
968 seq_putc(seq, ' ');
969 }
970
971 return 0;
972 }
973
974 static int segment_bits_seq_show(struct seq_file *seq, void *offset)
975 {
976 struct super_block *sb = seq->private;
977 struct f2fs_sb_info *sbi = F2FS_SB(sb);
978 unsigned int total_segs =
979 le32_to_cpu(sbi->raw_super->segment_count_main);
980 int i, j;
981
982 seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n"
983 "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
984
985 for (i = 0; i < total_segs; i++) {
986 struct seg_entry *se = get_seg_entry(sbi, i);
987
988 seq_printf(seq, "%-10d", i);
989 seq_printf(seq, "%d|%-3u|", se->type,
990 get_valid_blocks(sbi, i, 1));
991 for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
992 seq_printf(seq, " %.2x", se->cur_valid_map[j]);
993 seq_putc(seq, '\n');
994 }
995 return 0;
996 }
997
998 #define F2FS_PROC_FILE_DEF(_name) \
999 static int _name##_open_fs(struct inode *inode, struct file *file) \
1000 { \
1001 return single_open(file, _name##_seq_show, PDE_DATA(inode)); \
1002 } \
1003 \
1004 static const struct file_operations f2fs_seq_##_name##_fops = { \
1005 .open = _name##_open_fs, \
1006 .read = seq_read, \
1007 .llseek = seq_lseek, \
1008 .release = single_release, \
1009 };
1010
1011 F2FS_PROC_FILE_DEF(segment_info);
1012 F2FS_PROC_FILE_DEF(segment_bits);
1013
1014 static void default_options(struct f2fs_sb_info *sbi)
1015 {
1016 /* init some FS parameters */
1017 sbi->active_logs = NR_CURSEG_TYPE;
1018
1019 set_opt(sbi, BG_GC);
1020 set_opt(sbi, INLINE_DATA);
1021 set_opt(sbi, INLINE_DENTRY);
1022 set_opt(sbi, EXTENT_CACHE);
1023 sbi->sb->s_flags |= MS_LAZYTIME;
1024 set_opt(sbi, FLUSH_MERGE);
1025 if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
1026 set_opt_mode(sbi, F2FS_MOUNT_LFS);
1027 set_opt(sbi, DISCARD);
1028 } else {
1029 set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
1030 }
1031
1032 #ifdef CONFIG_F2FS_FS_XATTR
1033 set_opt(sbi, XATTR_USER);
1034 #endif
1035 #ifdef CONFIG_F2FS_FS_POSIX_ACL
1036 set_opt(sbi, POSIX_ACL);
1037 #endif
1038
1039 #ifdef CONFIG_F2FS_FAULT_INJECTION
1040 f2fs_build_fault_attr(sbi, 0);
1041 #endif
1042 }
1043
1044 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
1045 {
1046 struct f2fs_sb_info *sbi = F2FS_SB(sb);
1047 struct f2fs_mount_info org_mount_opt;
1048 int err, active_logs;
1049 bool need_restart_gc = false;
1050 bool need_stop_gc = false;
1051 bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
1052 #ifdef CONFIG_F2FS_FAULT_INJECTION
1053 struct f2fs_fault_info ffi = sbi->fault_info;
1054 #endif
1055
1056 /*
1057 * Save the old mount options in case we
1058 * need to restore them.
1059 */
1060 org_mount_opt = sbi->mount_opt;
1061 active_logs = sbi->active_logs;
1062
1063 /* recover superblocks we couldn't write due to previous RO mount */
1064 if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
1065 err = f2fs_commit_super(sbi, false);
1066 f2fs_msg(sb, KERN_INFO,
1067 "Try to recover all the superblocks, ret: %d", err);
1068 if (!err)
1069 clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1070 }
1071
1072 sbi->mount_opt.opt = 0;
1073 default_options(sbi);
1074
1075 /* parse mount options */
1076 err = parse_options(sb, data);
1077 if (err)
1078 goto restore_opts;
1079
1080 /*
1081 * Previous and new state of filesystem is RO,
1082 * so skip checking GC and FLUSH_MERGE conditions.
1083 */
1084 if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
1085 goto skip;
1086
1087 /* disallow enable/disable extent_cache dynamically */
1088 if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
1089 err = -EINVAL;
1090 f2fs_msg(sbi->sb, KERN_WARNING,
1091 "switch extent_cache option is not allowed");
1092 goto restore_opts;
1093 }
1094
1095 /*
1096 * We stop the GC thread if FS is mounted as RO
1097 * or if background_gc = off is passed in mount
1098 * option. Also sync the filesystem.
1099 */
1100 if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) {
1101 if (sbi->gc_thread) {
1102 stop_gc_thread(sbi);
1103 need_restart_gc = true;
1104 }
1105 } else if (!sbi->gc_thread) {
1106 err = start_gc_thread(sbi);
1107 if (err)
1108 goto restore_opts;
1109 need_stop_gc = true;
1110 }
1111
1112 if (*flags & MS_RDONLY) {
1113 writeback_inodes_sb(sb, WB_REASON_SYNC);
1114 sync_inodes_sb(sb);
1115
1116 set_sbi_flag(sbi, SBI_IS_DIRTY);
1117 set_sbi_flag(sbi, SBI_IS_CLOSE);
1118 f2fs_sync_fs(sb, 1);
1119 clear_sbi_flag(sbi, SBI_IS_CLOSE);
1120 }
1121
1122 /*
1123 * We stop issue flush thread if FS is mounted as RO
1124 * or if flush_merge is not passed in mount option.
1125 */
1126 if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
1127 clear_opt(sbi, FLUSH_MERGE);
1128 destroy_flush_cmd_control(sbi, false);
1129 } else {
1130 err = create_flush_cmd_control(sbi);
1131 if (err)
1132 goto restore_gc;
1133 }
1134 skip:
1135 /* Update the POSIXACL Flag */
1136 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1137 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
1138
1139 return 0;
1140 restore_gc:
1141 if (need_restart_gc) {
1142 if (start_gc_thread(sbi))
1143 f2fs_msg(sbi->sb, KERN_WARNING,
1144 "background gc thread has stopped");
1145 } else if (need_stop_gc) {
1146 stop_gc_thread(sbi);
1147 }
1148 restore_opts:
1149 sbi->mount_opt = org_mount_opt;
1150 sbi->active_logs = active_logs;
1151 #ifdef CONFIG_F2FS_FAULT_INJECTION
1152 sbi->fault_info = ffi;
1153 #endif
1154 return err;
1155 }
1156
1157 static struct super_operations f2fs_sops = {
1158 .alloc_inode = f2fs_alloc_inode,
1159 .drop_inode = f2fs_drop_inode,
1160 .destroy_inode = f2fs_destroy_inode,
1161 .write_inode = f2fs_write_inode,
1162 .dirty_inode = f2fs_dirty_inode,
1163 .show_options = f2fs_show_options,
1164 .evict_inode = f2fs_evict_inode,
1165 .put_super = f2fs_put_super,
1166 .sync_fs = f2fs_sync_fs,
1167 .freeze_fs = f2fs_freeze,
1168 .unfreeze_fs = f2fs_unfreeze,
1169 .statfs = f2fs_statfs,
1170 .remount_fs = f2fs_remount,
1171 };
1172
1173 #ifdef CONFIG_F2FS_FS_ENCRYPTION
1174 static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
1175 {
1176 return f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
1177 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
1178 ctx, len, NULL);
1179 }
1180
1181 static int f2fs_key_prefix(struct inode *inode, u8 **key)
1182 {
1183 *key = F2FS_I_SB(inode)->key_prefix;
1184 return F2FS_I_SB(inode)->key_prefix_size;
1185 }
1186
1187 static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
1188 void *fs_data)
1189 {
1190 return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
1191 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
1192 ctx, len, fs_data, XATTR_CREATE);
1193 }
1194
1195 static unsigned f2fs_max_namelen(struct inode *inode)
1196 {
1197 return S_ISLNK(inode->i_mode) ?
1198 inode->i_sb->s_blocksize : F2FS_NAME_LEN;
1199 }
1200
1201 static struct fscrypt_operations f2fs_cryptops = {
1202 .get_context = f2fs_get_context,
1203 .key_prefix = f2fs_key_prefix,
1204 .set_context = f2fs_set_context,
1205 .is_encrypted = f2fs_encrypted_inode,
1206 .empty_dir = f2fs_empty_dir,
1207 .max_namelen = f2fs_max_namelen,
1208 };
1209 #else
1210 static struct fscrypt_operations f2fs_cryptops = {
1211 .is_encrypted = f2fs_encrypted_inode,
1212 };
1213 #endif
1214
1215 static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
1216 u64 ino, u32 generation)
1217 {
1218 struct f2fs_sb_info *sbi = F2FS_SB(sb);
1219 struct inode *inode;
1220
1221 if (check_nid_range(sbi, ino))
1222 return ERR_PTR(-ESTALE);
1223
1224 /*
1225 * f2fs_iget isn't quite right if the inode is currently unallocated!
1226 * However f2fs_iget currently does appropriate checks to handle stale
1227 * inodes so everything is OK.
1228 */
1229 inode = f2fs_iget(sb, ino);
1230 if (IS_ERR(inode))
1231 return ERR_CAST(inode);
1232 if (unlikely(generation && inode->i_generation != generation)) {
1233 /* we didn't find the right inode.. */
1234 iput(inode);
1235 return ERR_PTR(-ESTALE);
1236 }
1237 return inode;
1238 }
1239
1240 static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1241 int fh_len, int fh_type)
1242 {
1243 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1244 f2fs_nfs_get_inode);
1245 }
1246
1247 static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
1248 int fh_len, int fh_type)
1249 {
1250 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1251 f2fs_nfs_get_inode);
1252 }
1253
1254 static const struct export_operations f2fs_export_ops = {
1255 .fh_to_dentry = f2fs_fh_to_dentry,
1256 .fh_to_parent = f2fs_fh_to_parent,
1257 .get_parent = f2fs_get_parent,
1258 };
1259
1260 static loff_t max_file_blocks(void)
1261 {
1262 loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
1263 loff_t leaf_count = ADDRS_PER_BLOCK;
1264
1265 /* two direct node blocks */
1266 result += (leaf_count * 2);
1267
1268 /* two indirect node blocks */
1269 leaf_count *= NIDS_PER_BLOCK;
1270 result += (leaf_count * 2);
1271
1272 /* one double indirect node block */
1273 leaf_count *= NIDS_PER_BLOCK;
1274 result += leaf_count;
1275
1276 return result;
1277 }
1278
1279 static int __f2fs_commit_super(struct buffer_head *bh,
1280 struct f2fs_super_block *super)
1281 {
1282 lock_buffer(bh);
1283 if (super)
1284 memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
1285 set_buffer_uptodate(bh);
1286 set_buffer_dirty(bh);
1287 unlock_buffer(bh);
1288
1289 /* it's rare case, we can do fua all the time */
1290 return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA);
1291 }
1292
1293 static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
1294 struct buffer_head *bh)
1295 {
1296 struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
1297 (bh->b_data + F2FS_SUPER_OFFSET);
1298 struct super_block *sb = sbi->sb;
1299 u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
1300 u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
1301 u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
1302 u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
1303 u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1304 u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
1305 u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
1306 u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
1307 u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
1308 u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
1309 u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
1310 u32 segment_count = le32_to_cpu(raw_super->segment_count);
1311 u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
1312 u64 main_end_blkaddr = main_blkaddr +
1313 (segment_count_main << log_blocks_per_seg);
1314 u64 seg_end_blkaddr = segment0_blkaddr +
1315 (segment_count << log_blocks_per_seg);
1316
1317 if (segment0_blkaddr != cp_blkaddr) {
1318 f2fs_msg(sb, KERN_INFO,
1319 "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
1320 segment0_blkaddr, cp_blkaddr);
1321 return true;
1322 }
1323
1324 if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
1325 sit_blkaddr) {
1326 f2fs_msg(sb, KERN_INFO,
1327 "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
1328 cp_blkaddr, sit_blkaddr,
1329 segment_count_ckpt << log_blocks_per_seg);
1330 return true;
1331 }
1332
1333 if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
1334 nat_blkaddr) {
1335 f2fs_msg(sb, KERN_INFO,
1336 "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
1337 sit_blkaddr, nat_blkaddr,
1338 segment_count_sit << log_blocks_per_seg);
1339 return true;
1340 }
1341
1342 if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
1343 ssa_blkaddr) {
1344 f2fs_msg(sb, KERN_INFO,
1345 "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
1346 nat_blkaddr, ssa_blkaddr,
1347 segment_count_nat << log_blocks_per_seg);
1348 return true;
1349 }
1350
1351 if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
1352 main_blkaddr) {
1353 f2fs_msg(sb, KERN_INFO,
1354 "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
1355 ssa_blkaddr, main_blkaddr,
1356 segment_count_ssa << log_blocks_per_seg);
1357 return true;
1358 }
1359
1360 if (main_end_blkaddr > seg_end_blkaddr) {
1361 f2fs_msg(sb, KERN_INFO,
1362 "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
1363 main_blkaddr,
1364 segment0_blkaddr +
1365 (segment_count << log_blocks_per_seg),
1366 segment_count_main << log_blocks_per_seg);
1367 return true;
1368 } else if (main_end_blkaddr < seg_end_blkaddr) {
1369 int err = 0;
1370 char *res;
1371
1372 /* fix in-memory information all the time */
1373 raw_super->segment_count = cpu_to_le32((main_end_blkaddr -
1374 segment0_blkaddr) >> log_blocks_per_seg);
1375
1376 if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) {
1377 set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1378 res = "internally";
1379 } else {
1380 err = __f2fs_commit_super(bh, NULL);
1381 res = err ? "failed" : "done";
1382 }
1383 f2fs_msg(sb, KERN_INFO,
1384 "Fix alignment : %s, start(%u) end(%u) block(%u)",
1385 res, main_blkaddr,
1386 segment0_blkaddr +
1387 (segment_count << log_blocks_per_seg),
1388 segment_count_main << log_blocks_per_seg);
1389 if (err)
1390 return true;
1391 }
1392 return false;
1393 }
1394
1395 static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
1396 struct buffer_head *bh)
1397 {
1398 struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
1399 (bh->b_data + F2FS_SUPER_OFFSET);
1400 struct super_block *sb = sbi->sb;
1401 unsigned int blocksize;
1402
1403 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
1404 f2fs_msg(sb, KERN_INFO,
1405 "Magic Mismatch, valid(0x%x) - read(0x%x)",
1406 F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
1407 return 1;
1408 }
1409
1410 /* Currently, support only 4KB page cache size */
1411 if (F2FS_BLKSIZE != PAGE_SIZE) {
1412 f2fs_msg(sb, KERN_INFO,
1413 "Invalid page_cache_size (%lu), supports only 4KB\n",
1414 PAGE_SIZE);
1415 return 1;
1416 }
1417
1418 /* Currently, support only 4KB block size */
1419 blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
1420 if (blocksize != F2FS_BLKSIZE) {
1421 f2fs_msg(sb, KERN_INFO,
1422 "Invalid blocksize (%u), supports only 4KB\n",
1423 blocksize);
1424 return 1;
1425 }
1426
1427 /* check log blocks per segment */
1428 if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
1429 f2fs_msg(sb, KERN_INFO,
1430 "Invalid log blocks per segment (%u)\n",
1431 le32_to_cpu(raw_super->log_blocks_per_seg));
1432 return 1;
1433 }
1434
1435 /* Currently, support 512/1024/2048/4096 bytes sector size */
1436 if (le32_to_cpu(raw_super->log_sectorsize) >
1437 F2FS_MAX_LOG_SECTOR_SIZE ||
1438 le32_to_cpu(raw_super->log_sectorsize) <
1439 F2FS_MIN_LOG_SECTOR_SIZE) {
1440 f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
1441 le32_to_cpu(raw_super->log_sectorsize));
1442 return 1;
1443 }
1444 if (le32_to_cpu(raw_super->log_sectors_per_block) +
1445 le32_to_cpu(raw_super->log_sectorsize) !=
1446 F2FS_MAX_LOG_SECTOR_SIZE) {
1447 f2fs_msg(sb, KERN_INFO,
1448 "Invalid log sectors per block(%u) log sectorsize(%u)",
1449 le32_to_cpu(raw_super->log_sectors_per_block),
1450 le32_to_cpu(raw_super->log_sectorsize));
1451 return 1;
1452 }
1453
1454 /* check reserved ino info */
1455 if (le32_to_cpu(raw_super->node_ino) != 1 ||
1456 le32_to_cpu(raw_super->meta_ino) != 2 ||
1457 le32_to_cpu(raw_super->root_ino) != 3) {
1458 f2fs_msg(sb, KERN_INFO,
1459 "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
1460 le32_to_cpu(raw_super->node_ino),
1461 le32_to_cpu(raw_super->meta_ino),
1462 le32_to_cpu(raw_super->root_ino));
1463 return 1;
1464 }
1465
1466 /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
1467 if (sanity_check_area_boundary(sbi, bh))
1468 return 1;
1469
1470 return 0;
1471 }
1472
1473 int sanity_check_ckpt(struct f2fs_sb_info *sbi)
1474 {
1475 unsigned int total, fsmeta;
1476 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1477 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1478 unsigned int ovp_segments, reserved_segments;
1479
1480 total = le32_to_cpu(raw_super->segment_count);
1481 fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
1482 fsmeta += le32_to_cpu(raw_super->segment_count_sit);
1483 fsmeta += le32_to_cpu(raw_super->segment_count_nat);
1484 fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
1485 fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
1486
1487 if (unlikely(fsmeta >= total))
1488 return 1;
1489
1490 ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1491 reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
1492
1493 if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
1494 ovp_segments == 0 || reserved_segments == 0)) {
1495 f2fs_msg(sbi->sb, KERN_ERR,
1496 "Wrong layout: check mkfs.f2fs version");
1497 return 1;
1498 }
1499
1500 if (unlikely(f2fs_cp_error(sbi))) {
1501 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
1502 return 1;
1503 }
1504 return 0;
1505 }
1506
1507 static void init_sb_info(struct f2fs_sb_info *sbi)
1508 {
1509 struct f2fs_super_block *raw_super = sbi->raw_super;
1510 int i;
1511
1512 sbi->log_sectors_per_block =
1513 le32_to_cpu(raw_super->log_sectors_per_block);
1514 sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize);
1515 sbi->blocksize = 1 << sbi->log_blocksize;
1516 sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
1517 sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg;
1518 sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
1519 sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
1520 sbi->total_sections = le32_to_cpu(raw_super->section_count);
1521 sbi->total_node_count =
1522 (le32_to_cpu(raw_super->segment_count_nat) / 2)
1523 * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
1524 sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
1525 sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
1526 sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
1527 sbi->cur_victim_sec = NULL_SECNO;
1528 sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
1529
1530 sbi->dir_level = DEF_DIR_LEVEL;
1531 sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
1532 sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
1533 clear_sbi_flag(sbi, SBI_NEED_FSCK);
1534
1535 for (i = 0; i < NR_COUNT_TYPE; i++)
1536 atomic_set(&sbi->nr_pages[i], 0);
1537
1538 INIT_LIST_HEAD(&sbi->s_list);
1539 mutex_init(&sbi->umount_mutex);
1540 mutex_init(&sbi->wio_mutex[NODE]);
1541 mutex_init(&sbi->wio_mutex[DATA]);
1542 spin_lock_init(&sbi->cp_lock);
1543
1544 #ifdef CONFIG_F2FS_FS_ENCRYPTION
1545 memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX,
1546 F2FS_KEY_DESC_PREFIX_SIZE);
1547 sbi->key_prefix_size = F2FS_KEY_DESC_PREFIX_SIZE;
1548 #endif
1549 }
1550
1551 static int init_percpu_info(struct f2fs_sb_info *sbi)
1552 {
1553 int err;
1554
1555 err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL);
1556 if (err)
1557 return err;
1558
1559 return percpu_counter_init(&sbi->total_valid_inode_count, 0,
1560 GFP_KERNEL);
1561 }
1562
1563 #ifdef CONFIG_BLK_DEV_ZONED
1564 static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
1565 {
1566 struct block_device *bdev = FDEV(devi).bdev;
1567 sector_t nr_sectors = bdev->bd_part->nr_sects;
1568 sector_t sector = 0;
1569 struct blk_zone *zones;
1570 unsigned int i, nr_zones;
1571 unsigned int n = 0;
1572 int err = -EIO;
1573
1574 if (!f2fs_sb_mounted_blkzoned(sbi->sb))
1575 return 0;
1576
1577 if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
1578 SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)))
1579 return -EINVAL;
1580 sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev));
1581 if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
1582 __ilog2_u32(sbi->blocks_per_blkz))
1583 return -EINVAL;
1584 sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
1585 FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
1586 sbi->log_blocks_per_blkz;
1587 if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
1588 FDEV(devi).nr_blkz++;
1589
1590 FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
1591 if (!FDEV(devi).blkz_type)
1592 return -ENOMEM;
1593
1594 #define F2FS_REPORT_NR_ZONES 4096
1595
1596 zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone),
1597 GFP_KERNEL);
1598 if (!zones)
1599 return -ENOMEM;
1600
1601 /* Get block zones type */
1602 while (zones && sector < nr_sectors) {
1603
1604 nr_zones = F2FS_REPORT_NR_ZONES;
1605 err = blkdev_report_zones(bdev, sector,
1606 zones, &nr_zones,
1607 GFP_KERNEL);
1608 if (err)
1609 break;
1610 if (!nr_zones) {
1611 err = -EIO;
1612 break;
1613 }
1614
1615 for (i = 0; i < nr_zones; i++) {
1616 FDEV(devi).blkz_type[n] = zones[i].type;
1617 sector += zones[i].len;
1618 n++;
1619 }
1620 }
1621
1622 kfree(zones);
1623
1624 return err;
1625 }
1626 #endif
1627
1628 /*
1629 * Read f2fs raw super block.
1630 * Because we have two copies of super block, so read both of them
1631 * to get the first valid one. If any one of them is broken, we pass
1632 * them recovery flag back to the caller.
1633 */
1634 static int read_raw_super_block(struct f2fs_sb_info *sbi,
1635 struct f2fs_super_block **raw_super,
1636 int *valid_super_block, int *recovery)
1637 {
1638 struct super_block *sb = sbi->sb;
1639 int block;
1640 struct buffer_head *bh;
1641 struct f2fs_super_block *super;
1642 int err = 0;
1643
1644 super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL);
1645 if (!super)
1646 return -ENOMEM;
1647
1648 for (block = 0; block < 2; block++) {
1649 bh = sb_bread(sb, block);
1650 if (!bh) {
1651 f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
1652 block + 1);
1653 err = -EIO;
1654 continue;
1655 }
1656
1657 /* sanity checking of raw super */
1658 if (sanity_check_raw_super(sbi, bh)) {
1659 f2fs_msg(sb, KERN_ERR,
1660 "Can't find valid F2FS filesystem in %dth superblock",
1661 block + 1);
1662 err = -EINVAL;
1663 brelse(bh);
1664 continue;
1665 }
1666
1667 if (!*raw_super) {
1668 memcpy(super, bh->b_data + F2FS_SUPER_OFFSET,
1669 sizeof(*super));
1670 *valid_super_block = block;
1671 *raw_super = super;
1672 }
1673 brelse(bh);
1674 }
1675
1676 /* Fail to read any one of the superblocks*/
1677 if (err < 0)
1678 *recovery = 1;
1679
1680 /* No valid superblock */
1681 if (!*raw_super)
1682 kfree(super);
1683 else
1684 err = 0;
1685
1686 return err;
1687 }
1688
1689 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
1690 {
1691 struct buffer_head *bh;
1692 int err;
1693
1694 if ((recover && f2fs_readonly(sbi->sb)) ||
1695 bdev_read_only(sbi->sb->s_bdev)) {
1696 set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1697 return -EROFS;
1698 }
1699
1700 /* write back-up superblock first */
1701 bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1);
1702 if (!bh)
1703 return -EIO;
1704 err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
1705 brelse(bh);
1706
1707 /* if we are in recovery path, skip writing valid superblock */
1708 if (recover || err)
1709 return err;
1710
1711 /* write current valid superblock */
1712 bh = sb_getblk(sbi->sb, sbi->valid_super_block);
1713 if (!bh)
1714 return -EIO;
1715 err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
1716 brelse(bh);
1717 return err;
1718 }
1719
1720 static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
1721 {
1722 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1723 int i;
1724
1725 for (i = 0; i < MAX_DEVICES; i++) {
1726 if (!RDEV(i).path[0])
1727 return 0;
1728
1729 if (i == 0) {
1730 sbi->devs = kzalloc(sizeof(struct f2fs_dev_info) *
1731 MAX_DEVICES, GFP_KERNEL);
1732 if (!sbi->devs)
1733 return -ENOMEM;
1734 }
1735
1736 memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
1737 FDEV(i).total_segments = le32_to_cpu(RDEV(i).total_segments);
1738 if (i == 0) {
1739 FDEV(i).start_blk = 0;
1740 FDEV(i).end_blk = FDEV(i).start_blk +
1741 (FDEV(i).total_segments <<
1742 sbi->log_blocks_per_seg) - 1 +
1743 le32_to_cpu(raw_super->segment0_blkaddr);
1744 } else {
1745 FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
1746 FDEV(i).end_blk = FDEV(i).start_blk +
1747 (FDEV(i).total_segments <<
1748 sbi->log_blocks_per_seg) - 1;
1749 }
1750
1751 FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
1752 sbi->sb->s_mode, sbi->sb->s_type);
1753 if (IS_ERR(FDEV(i).bdev))
1754 return PTR_ERR(FDEV(i).bdev);
1755
1756 /* to release errored devices */
1757 sbi->s_ndevs = i + 1;
1758
1759 #ifdef CONFIG_BLK_DEV_ZONED
1760 if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
1761 !f2fs_sb_mounted_blkzoned(sbi->sb)) {
1762 f2fs_msg(sbi->sb, KERN_ERR,
1763 "Zoned block device feature not enabled\n");
1764 return -EINVAL;
1765 }
1766 if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
1767 if (init_blkz_info(sbi, i)) {
1768 f2fs_msg(sbi->sb, KERN_ERR,
1769 "Failed to initialize F2FS blkzone information");
1770 return -EINVAL;
1771 }
1772 f2fs_msg(sbi->sb, KERN_INFO,
1773 "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
1774 i, FDEV(i).path,
1775 FDEV(i).total_segments,
1776 FDEV(i).start_blk, FDEV(i).end_blk,
1777 bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
1778 "Host-aware" : "Host-managed");
1779 continue;
1780 }
1781 #endif
1782 f2fs_msg(sbi->sb, KERN_INFO,
1783 "Mount Device [%2d]: %20s, %8u, %8x - %8x",
1784 i, FDEV(i).path,
1785 FDEV(i).total_segments,
1786 FDEV(i).start_blk, FDEV(i).end_blk);
1787 }
1788 f2fs_msg(sbi->sb, KERN_INFO,
1789 "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
1790 return 0;
1791 }
1792
1793 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1794 {
1795 struct f2fs_sb_info *sbi;
1796 struct f2fs_super_block *raw_super;
1797 struct inode *root;
1798 int err;
1799 bool retry = true, need_fsck = false;
1800 char *options = NULL;
1801 int recovery, i, valid_super_block;
1802 struct curseg_info *seg_i;
1803
1804 try_onemore:
1805 err = -EINVAL;
1806 raw_super = NULL;
1807 valid_super_block = -1;
1808 recovery = 0;
1809
1810 /* allocate memory for f2fs-specific super block info */
1811 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
1812 if (!sbi)
1813 return -ENOMEM;
1814
1815 sbi->sb = sb;
1816
1817 /* Load the checksum driver */
1818 sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
1819 if (IS_ERR(sbi->s_chksum_driver)) {
1820 f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver.");
1821 err = PTR_ERR(sbi->s_chksum_driver);
1822 sbi->s_chksum_driver = NULL;
1823 goto free_sbi;
1824 }
1825
1826 /* set a block size */
1827 if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
1828 f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
1829 goto free_sbi;
1830 }
1831
1832 err = read_raw_super_block(sbi, &raw_super, &valid_super_block,
1833 &recovery);
1834 if (err)
1835 goto free_sbi;
1836
1837 sb->s_fs_info = sbi;
1838 sbi->raw_super = raw_super;
1839
1840 /*
1841 * The BLKZONED feature indicates that the drive was formatted with
1842 * zone alignment optimization. This is optional for host-aware
1843 * devices, but mandatory for host-managed zoned block devices.
1844 */
1845 #ifndef CONFIG_BLK_DEV_ZONED
1846 if (f2fs_sb_mounted_blkzoned(sb)) {
1847 f2fs_msg(sb, KERN_ERR,
1848 "Zoned block device support is not enabled\n");
1849 goto free_sb_buf;
1850 }
1851 #endif
1852 default_options(sbi);
1853 /* parse mount options */
1854 options = kstrdup((const char *)data, GFP_KERNEL);
1855 if (data && !options) {
1856 err = -ENOMEM;
1857 goto free_sb_buf;
1858 }
1859
1860 err = parse_options(sb, options);
1861 if (err)
1862 goto free_options;
1863
1864 sbi->max_file_blocks = max_file_blocks();
1865 sb->s_maxbytes = sbi->max_file_blocks <<
1866 le32_to_cpu(raw_super->log_blocksize);
1867 sb->s_max_links = F2FS_LINK_MAX;
1868 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1869
1870 sb->s_op = &f2fs_sops;
1871 sb->s_cop = &f2fs_cryptops;
1872 sb->s_xattr = f2fs_xattr_handlers;
1873 sb->s_export_op = &f2fs_export_ops;
1874 sb->s_magic = F2FS_SUPER_MAGIC;
1875 sb->s_time_gran = 1;
1876 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1877 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
1878 memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
1879
1880 /* init f2fs-specific super block info */
1881 sbi->valid_super_block = valid_super_block;
1882 mutex_init(&sbi->gc_mutex);
1883 mutex_init(&sbi->cp_mutex);
1884 init_rwsem(&sbi->node_write);
1885
1886 /* disallow all the data/node/meta page writes */
1887 set_sbi_flag(sbi, SBI_POR_DOING);
1888 spin_lock_init(&sbi->stat_lock);
1889
1890 init_rwsem(&sbi->read_io.io_rwsem);
1891 sbi->read_io.sbi = sbi;
1892 sbi->read_io.bio = NULL;
1893 for (i = 0; i < NR_PAGE_TYPE; i++) {
1894 init_rwsem(&sbi->write_io[i].io_rwsem);
1895 sbi->write_io[i].sbi = sbi;
1896 sbi->write_io[i].bio = NULL;
1897 }
1898
1899 init_rwsem(&sbi->cp_rwsem);
1900 init_waitqueue_head(&sbi->cp_wait);
1901 init_sb_info(sbi);
1902
1903 err = init_percpu_info(sbi);
1904 if (err)
1905 goto free_options;
1906
1907 if (F2FS_IO_SIZE(sbi) > 1) {
1908 sbi->write_io_dummy =
1909 mempool_create_page_pool(F2FS_IO_SIZE(sbi) - 1, 0);
1910 if (!sbi->write_io_dummy)
1911 goto free_options;
1912 }
1913
1914 /* get an inode for meta space */
1915 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
1916 if (IS_ERR(sbi->meta_inode)) {
1917 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
1918 err = PTR_ERR(sbi->meta_inode);
1919 goto free_io_dummy;
1920 }
1921
1922 err = get_valid_checkpoint(sbi);
1923 if (err) {
1924 f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
1925 goto free_meta_inode;
1926 }
1927
1928 /* Initialize device list */
1929 err = f2fs_scan_devices(sbi);
1930 if (err) {
1931 f2fs_msg(sb, KERN_ERR, "Failed to find devices");
1932 goto free_devices;
1933 }
1934
1935 sbi->total_valid_node_count =
1936 le32_to_cpu(sbi->ckpt->valid_node_count);
1937 percpu_counter_set(&sbi->total_valid_inode_count,
1938 le32_to_cpu(sbi->ckpt->valid_inode_count));
1939 sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count);
1940 sbi->total_valid_block_count =
1941 le64_to_cpu(sbi->ckpt->valid_block_count);
1942 sbi->last_valid_block_count = sbi->total_valid_block_count;
1943
1944 for (i = 0; i < NR_INODE_TYPE; i++) {
1945 INIT_LIST_HEAD(&sbi->inode_list[i]);
1946 spin_lock_init(&sbi->inode_lock[i]);
1947 }
1948
1949 init_extent_cache_info(sbi);
1950
1951 init_ino_entry_info(sbi);
1952
1953 /* setup f2fs internal modules */
1954 err = build_segment_manager(sbi);
1955 if (err) {
1956 f2fs_msg(sb, KERN_ERR,
1957 "Failed to initialize F2FS segment manager");
1958 goto free_sm;
1959 }
1960 err = build_node_manager(sbi);
1961 if (err) {
1962 f2fs_msg(sb, KERN_ERR,
1963 "Failed to initialize F2FS node manager");
1964 goto free_nm;
1965 }
1966
1967 /* For write statistics */
1968 if (sb->s_bdev->bd_part)
1969 sbi->sectors_written_start =
1970 (u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]);
1971
1972 /* Read accumulated write IO statistics if exists */
1973 seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
1974 if (__exist_node_summaries(sbi))
1975 sbi->kbytes_written =
1976 le64_to_cpu(seg_i->journal->info.kbytes_written);
1977
1978 build_gc_manager(sbi);
1979
1980 /* get an inode for node space */
1981 sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
1982 if (IS_ERR(sbi->node_inode)) {
1983 f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
1984 err = PTR_ERR(sbi->node_inode);
1985 goto free_nm;
1986 }
1987
1988 f2fs_join_shrinker(sbi);
1989
1990 /* if there are nt orphan nodes free them */
1991 err = recover_orphan_inodes(sbi);
1992 if (err)
1993 goto free_node_inode;
1994
1995 /* read root inode and dentry */
1996 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
1997 if (IS_ERR(root)) {
1998 f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
1999 err = PTR_ERR(root);
2000 goto free_node_inode;
2001 }
2002 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2003 iput(root);
2004 err = -EINVAL;
2005 goto free_node_inode;
2006 }
2007
2008 sb->s_root = d_make_root(root); /* allocate root dentry */
2009 if (!sb->s_root) {
2010 err = -ENOMEM;
2011 goto free_root_inode;
2012 }
2013
2014 err = f2fs_build_stats(sbi);
2015 if (err)
2016 goto free_root_inode;
2017
2018 if (f2fs_proc_root)
2019 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
2020
2021 if (sbi->s_proc) {
2022 proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
2023 &f2fs_seq_segment_info_fops, sb);
2024 proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
2025 &f2fs_seq_segment_bits_fops, sb);
2026 }
2027
2028 sbi->s_kobj.kset = f2fs_kset;
2029 init_completion(&sbi->s_kobj_unregister);
2030 err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
2031 "%s", sb->s_id);
2032 if (err)
2033 goto free_proc;
2034
2035 /* recover fsynced data */
2036 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
2037 /*
2038 * mount should be failed, when device has readonly mode, and
2039 * previous checkpoint was not done by clean system shutdown.
2040 */
2041 if (bdev_read_only(sb->s_bdev) &&
2042 !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
2043 err = -EROFS;
2044 goto free_kobj;
2045 }
2046
2047 if (need_fsck)
2048 set_sbi_flag(sbi, SBI_NEED_FSCK);
2049
2050 if (!retry)
2051 goto skip_recovery;
2052
2053 err = recover_fsync_data(sbi, false);
2054 if (err < 0) {
2055 need_fsck = true;
2056 f2fs_msg(sb, KERN_ERR,
2057 "Cannot recover all fsync data errno=%d", err);
2058 goto free_kobj;
2059 }
2060 } else {
2061 err = recover_fsync_data(sbi, true);
2062
2063 if (!f2fs_readonly(sb) && err > 0) {
2064 err = -EINVAL;
2065 f2fs_msg(sb, KERN_ERR,
2066 "Need to recover fsync data");
2067 goto free_kobj;
2068 }
2069 }
2070 skip_recovery:
2071 /* recover_fsync_data() cleared this already */
2072 clear_sbi_flag(sbi, SBI_POR_DOING);
2073
2074 /*
2075 * If filesystem is not mounted as read-only then
2076 * do start the gc_thread.
2077 */
2078 if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
2079 /* After POR, we can run background GC thread.*/
2080 err = start_gc_thread(sbi);
2081 if (err)
2082 goto free_kobj;
2083 }
2084 kfree(options);
2085
2086 /* recover broken superblock */
2087 if (recovery) {
2088 err = f2fs_commit_super(sbi, true);
2089 f2fs_msg(sb, KERN_INFO,
2090 "Try to recover %dth superblock, ret: %d",
2091 sbi->valid_super_block ? 1 : 2, err);
2092 }
2093
2094 f2fs_update_time(sbi, CP_TIME);
2095 f2fs_update_time(sbi, REQ_TIME);
2096 return 0;
2097
2098 free_kobj:
2099 f2fs_sync_inode_meta(sbi);
2100 kobject_del(&sbi->s_kobj);
2101 kobject_put(&sbi->s_kobj);
2102 wait_for_completion(&sbi->s_kobj_unregister);
2103 free_proc:
2104 if (sbi->s_proc) {
2105 remove_proc_entry("segment_info", sbi->s_proc);
2106 remove_proc_entry("segment_bits", sbi->s_proc);
2107 remove_proc_entry(sb->s_id, f2fs_proc_root);
2108 }
2109 f2fs_destroy_stats(sbi);
2110 free_root_inode:
2111 dput(sb->s_root);
2112 sb->s_root = NULL;
2113 free_node_inode:
2114 truncate_inode_pages_final(NODE_MAPPING(sbi));
2115 mutex_lock(&sbi->umount_mutex);
2116 release_ino_entry(sbi, true);
2117 f2fs_leave_shrinker(sbi);
2118 /*
2119 * Some dirty meta pages can be produced by recover_orphan_inodes()
2120 * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
2121 * followed by write_checkpoint() through f2fs_write_node_pages(), which
2122 * falls into an infinite loop in sync_meta_pages().
2123 */
2124 truncate_inode_pages_final(META_MAPPING(sbi));
2125 iput(sbi->node_inode);
2126 mutex_unlock(&sbi->umount_mutex);
2127 free_nm:
2128 destroy_node_manager(sbi);
2129 free_sm:
2130 destroy_segment_manager(sbi);
2131 free_devices:
2132 destroy_device_list(sbi);
2133 kfree(sbi->ckpt);
2134 free_meta_inode:
2135 make_bad_inode(sbi->meta_inode);
2136 iput(sbi->meta_inode);
2137 free_io_dummy:
2138 mempool_destroy(sbi->write_io_dummy);
2139 free_options:
2140 destroy_percpu_info(sbi);
2141 kfree(options);
2142 free_sb_buf:
2143 kfree(raw_super);
2144 free_sbi:
2145 if (sbi->s_chksum_driver)
2146 crypto_free_shash(sbi->s_chksum_driver);
2147 kfree(sbi);
2148
2149 /* give only one another chance */
2150 if (retry) {
2151 retry = false;
2152 shrink_dcache_sb(sb);
2153 goto try_onemore;
2154 }
2155 return err;
2156 }
2157
2158 static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
2159 const char *dev_name, void *data)
2160 {
2161 return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
2162 }
2163
2164 static void kill_f2fs_super(struct super_block *sb)
2165 {
2166 if (sb->s_root)
2167 set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
2168 kill_block_super(sb);
2169 }
2170
2171 static struct file_system_type f2fs_fs_type = {
2172 .owner = THIS_MODULE,
2173 .name = "f2fs",
2174 .mount = f2fs_mount,
2175 .kill_sb = kill_f2fs_super,
2176 .fs_flags = FS_REQUIRES_DEV,
2177 };
2178 MODULE_ALIAS_FS("f2fs");
2179
2180 static int __init init_inodecache(void)
2181 {
2182 f2fs_inode_cachep = kmem_cache_create("f2fs_inode_cache",
2183 sizeof(struct f2fs_inode_info), 0,
2184 SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, NULL);
2185 if (!f2fs_inode_cachep)
2186 return -ENOMEM;
2187 return 0;
2188 }
2189
2190 static void destroy_inodecache(void)
2191 {
2192 /*
2193 * Make sure all delayed rcu free inodes are flushed before we
2194 * destroy cache.
2195 */
2196 rcu_barrier();
2197 kmem_cache_destroy(f2fs_inode_cachep);
2198 }
2199
2200 static int __init init_f2fs_fs(void)
2201 {
2202 int err;
2203
2204 f2fs_build_trace_ios();
2205
2206 err = init_inodecache();
2207 if (err)
2208 goto fail;
2209 err = create_node_manager_caches();
2210 if (err)
2211 goto free_inodecache;
2212 err = create_segment_manager_caches();
2213 if (err)
2214 goto free_node_manager_caches;
2215 err = create_checkpoint_caches();
2216 if (err)
2217 goto free_segment_manager_caches;
2218 err = create_extent_cache();
2219 if (err)
2220 goto free_checkpoint_caches;
2221 f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
2222 if (!f2fs_kset) {
2223 err = -ENOMEM;
2224 goto free_extent_cache;
2225 }
2226 err = register_shrinker(&f2fs_shrinker_info);
2227 if (err)
2228 goto free_kset;
2229
2230 err = register_filesystem(&f2fs_fs_type);
2231 if (err)
2232 goto free_shrinker;
2233 err = f2fs_create_root_stats();
2234 if (err)
2235 goto free_filesystem;
2236 f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
2237 return 0;
2238
2239 free_filesystem:
2240 unregister_filesystem(&f2fs_fs_type);
2241 free_shrinker:
2242 unregister_shrinker(&f2fs_shrinker_info);
2243 free_kset:
2244 kset_unregister(f2fs_kset);
2245 free_extent_cache:
2246 destroy_extent_cache();
2247 free_checkpoint_caches:
2248 destroy_checkpoint_caches();
2249 free_segment_manager_caches:
2250 destroy_segment_manager_caches();
2251 free_node_manager_caches:
2252 destroy_node_manager_caches();
2253 free_inodecache:
2254 destroy_inodecache();
2255 fail:
2256 return err;
2257 }
2258
2259 static void __exit exit_f2fs_fs(void)
2260 {
2261 remove_proc_entry("fs/f2fs", NULL);
2262 f2fs_destroy_root_stats();
2263 unregister_filesystem(&f2fs_fs_type);
2264 unregister_shrinker(&f2fs_shrinker_info);
2265 kset_unregister(f2fs_kset);
2266 destroy_extent_cache();
2267 destroy_checkpoint_caches();
2268 destroy_segment_manager_caches();
2269 destroy_node_manager_caches();
2270 destroy_inodecache();
2271 f2fs_destroy_trace_ios();
2272 }
2273
2274 module_init(init_f2fs_fs)
2275 module_exit(exit_f2fs_fs)
2276
2277 MODULE_AUTHOR("Samsung Electronics's Praesto Team");
2278 MODULE_DESCRIPTION("Flash Friendly File System");
2279 MODULE_LICENSE("GPL");
2280