]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - fs/f2fs/super.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
[mirror_ubuntu-jammy-kernel.git] / fs / f2fs / super.c
1 /*
2 * fs/f2fs/super.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/fs.h>
14 #include <linux/statfs.h>
15 #include <linux/buffer_head.h>
16 #include <linux/backing-dev.h>
17 #include <linux/kthread.h>
18 #include <linux/parser.h>
19 #include <linux/mount.h>
20 #include <linux/seq_file.h>
21 #include <linux/proc_fs.h>
22 #include <linux/random.h>
23 #include <linux/exportfs.h>
24 #include <linux/blkdev.h>
25 #include <linux/f2fs_fs.h>
26 #include <linux/sysfs.h>
27
28 #include "f2fs.h"
29 #include "node.h"
30 #include "segment.h"
31 #include "xattr.h"
32 #include "gc.h"
33 #include "trace.h"
34
35 #define CREATE_TRACE_POINTS
36 #include <trace/events/f2fs.h>
37
38 static struct proc_dir_entry *f2fs_proc_root;
39 static struct kmem_cache *f2fs_inode_cachep;
40 static struct kset *f2fs_kset;
41
42 #ifdef CONFIG_F2FS_FAULT_INJECTION
43
44 char *fault_name[FAULT_MAX] = {
45 [FAULT_KMALLOC] = "kmalloc",
46 [FAULT_PAGE_ALLOC] = "page alloc",
47 [FAULT_ALLOC_NID] = "alloc nid",
48 [FAULT_ORPHAN] = "orphan",
49 [FAULT_BLOCK] = "no more block",
50 [FAULT_DIR_DEPTH] = "too big dir depth",
51 [FAULT_EVICT_INODE] = "evict_inode fail",
52 [FAULT_IO] = "IO error",
53 [FAULT_CHECKPOINT] = "checkpoint error",
54 };
55
56 static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
57 unsigned int rate)
58 {
59 struct f2fs_fault_info *ffi = &sbi->fault_info;
60
61 if (rate) {
62 atomic_set(&ffi->inject_ops, 0);
63 ffi->inject_rate = rate;
64 ffi->inject_type = (1 << FAULT_MAX) - 1;
65 } else {
66 memset(ffi, 0, sizeof(struct f2fs_fault_info));
67 }
68 }
69 #endif
70
71 /* f2fs-wide shrinker description */
72 static struct shrinker f2fs_shrinker_info = {
73 .scan_objects = f2fs_shrink_scan,
74 .count_objects = f2fs_shrink_count,
75 .seeks = DEFAULT_SEEKS,
76 };
77
78 enum {
79 Opt_gc_background,
80 Opt_disable_roll_forward,
81 Opt_norecovery,
82 Opt_discard,
83 Opt_nodiscard,
84 Opt_noheap,
85 Opt_user_xattr,
86 Opt_nouser_xattr,
87 Opt_acl,
88 Opt_noacl,
89 Opt_active_logs,
90 Opt_disable_ext_identify,
91 Opt_inline_xattr,
92 Opt_noinline_xattr,
93 Opt_inline_data,
94 Opt_inline_dentry,
95 Opt_noinline_dentry,
96 Opt_flush_merge,
97 Opt_noflush_merge,
98 Opt_nobarrier,
99 Opt_fastboot,
100 Opt_extent_cache,
101 Opt_noextent_cache,
102 Opt_noinline_data,
103 Opt_data_flush,
104 Opt_mode,
105 Opt_io_size_bits,
106 Opt_fault_injection,
107 Opt_lazytime,
108 Opt_nolazytime,
109 Opt_err,
110 };
111
112 static match_table_t f2fs_tokens = {
113 {Opt_gc_background, "background_gc=%s"},
114 {Opt_disable_roll_forward, "disable_roll_forward"},
115 {Opt_norecovery, "norecovery"},
116 {Opt_discard, "discard"},
117 {Opt_nodiscard, "nodiscard"},
118 {Opt_noheap, "no_heap"},
119 {Opt_user_xattr, "user_xattr"},
120 {Opt_nouser_xattr, "nouser_xattr"},
121 {Opt_acl, "acl"},
122 {Opt_noacl, "noacl"},
123 {Opt_active_logs, "active_logs=%u"},
124 {Opt_disable_ext_identify, "disable_ext_identify"},
125 {Opt_inline_xattr, "inline_xattr"},
126 {Opt_noinline_xattr, "noinline_xattr"},
127 {Opt_inline_data, "inline_data"},
128 {Opt_inline_dentry, "inline_dentry"},
129 {Opt_noinline_dentry, "noinline_dentry"},
130 {Opt_flush_merge, "flush_merge"},
131 {Opt_noflush_merge, "noflush_merge"},
132 {Opt_nobarrier, "nobarrier"},
133 {Opt_fastboot, "fastboot"},
134 {Opt_extent_cache, "extent_cache"},
135 {Opt_noextent_cache, "noextent_cache"},
136 {Opt_noinline_data, "noinline_data"},
137 {Opt_data_flush, "data_flush"},
138 {Opt_mode, "mode=%s"},
139 {Opt_io_size_bits, "io_bits=%u"},
140 {Opt_fault_injection, "fault_injection=%u"},
141 {Opt_lazytime, "lazytime"},
142 {Opt_nolazytime, "nolazytime"},
143 {Opt_err, NULL},
144 };
145
146 /* Sysfs support for f2fs */
147 enum {
148 GC_THREAD, /* struct f2fs_gc_thread */
149 SM_INFO, /* struct f2fs_sm_info */
150 DCC_INFO, /* struct discard_cmd_control */
151 NM_INFO, /* struct f2fs_nm_info */
152 F2FS_SBI, /* struct f2fs_sb_info */
153 #ifdef CONFIG_F2FS_FAULT_INJECTION
154 FAULT_INFO_RATE, /* struct f2fs_fault_info */
155 FAULT_INFO_TYPE, /* struct f2fs_fault_info */
156 #endif
157 };
158
159 struct f2fs_attr {
160 struct attribute attr;
161 ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
162 ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
163 const char *, size_t);
164 int struct_type;
165 int offset;
166 };
167
168 static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
169 {
170 if (struct_type == GC_THREAD)
171 return (unsigned char *)sbi->gc_thread;
172 else if (struct_type == SM_INFO)
173 return (unsigned char *)SM_I(sbi);
174 else if (struct_type == DCC_INFO)
175 return (unsigned char *)SM_I(sbi)->dcc_info;
176 else if (struct_type == NM_INFO)
177 return (unsigned char *)NM_I(sbi);
178 else if (struct_type == F2FS_SBI)
179 return (unsigned char *)sbi;
180 #ifdef CONFIG_F2FS_FAULT_INJECTION
181 else if (struct_type == FAULT_INFO_RATE ||
182 struct_type == FAULT_INFO_TYPE)
183 return (unsigned char *)&sbi->fault_info;
184 #endif
185 return NULL;
186 }
187
188 static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
189 struct f2fs_sb_info *sbi, char *buf)
190 {
191 struct super_block *sb = sbi->sb;
192
193 if (!sb->s_bdev->bd_part)
194 return snprintf(buf, PAGE_SIZE, "0\n");
195
196 return snprintf(buf, PAGE_SIZE, "%llu\n",
197 (unsigned long long)(sbi->kbytes_written +
198 BD_PART_WRITTEN(sbi)));
199 }
200
201 static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
202 struct f2fs_sb_info *sbi, char *buf)
203 {
204 unsigned char *ptr = NULL;
205 unsigned int *ui;
206
207 ptr = __struct_ptr(sbi, a->struct_type);
208 if (!ptr)
209 return -EINVAL;
210
211 ui = (unsigned int *)(ptr + a->offset);
212
213 return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
214 }
215
216 static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
217 struct f2fs_sb_info *sbi,
218 const char *buf, size_t count)
219 {
220 unsigned char *ptr;
221 unsigned long t;
222 unsigned int *ui;
223 ssize_t ret;
224
225 ptr = __struct_ptr(sbi, a->struct_type);
226 if (!ptr)
227 return -EINVAL;
228
229 ui = (unsigned int *)(ptr + a->offset);
230
231 ret = kstrtoul(skip_spaces(buf), 0, &t);
232 if (ret < 0)
233 return ret;
234 #ifdef CONFIG_F2FS_FAULT_INJECTION
235 if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
236 return -EINVAL;
237 #endif
238 *ui = t;
239 return count;
240 }
241
242 static ssize_t f2fs_attr_show(struct kobject *kobj,
243 struct attribute *attr, char *buf)
244 {
245 struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
246 s_kobj);
247 struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
248
249 return a->show ? a->show(a, sbi, buf) : 0;
250 }
251
252 static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr,
253 const char *buf, size_t len)
254 {
255 struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
256 s_kobj);
257 struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr);
258
259 return a->store ? a->store(a, sbi, buf, len) : 0;
260 }
261
262 static void f2fs_sb_release(struct kobject *kobj)
263 {
264 struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info,
265 s_kobj);
266 complete(&sbi->s_kobj_unregister);
267 }
268
269 #define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \
270 static struct f2fs_attr f2fs_attr_##_name = { \
271 .attr = {.name = __stringify(_name), .mode = _mode }, \
272 .show = _show, \
273 .store = _store, \
274 .struct_type = _struct_type, \
275 .offset = _offset \
276 }
277
278 #define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \
279 F2FS_ATTR_OFFSET(struct_type, name, 0644, \
280 f2fs_sbi_show, f2fs_sbi_store, \
281 offsetof(struct struct_name, elname))
282
283 #define F2FS_GENERAL_RO_ATTR(name) \
284 static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
285
286 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
287 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
288 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
289 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
290 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
291 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
292 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
293 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
294 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
295 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
296 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
297 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
298 F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
299 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
300 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
301 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
302 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
303 #ifdef CONFIG_F2FS_FAULT_INJECTION
304 F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
305 F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
306 #endif
307 F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
308
309 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
310 static struct attribute *f2fs_attrs[] = {
311 ATTR_LIST(gc_min_sleep_time),
312 ATTR_LIST(gc_max_sleep_time),
313 ATTR_LIST(gc_no_gc_sleep_time),
314 ATTR_LIST(gc_idle),
315 ATTR_LIST(reclaim_segments),
316 ATTR_LIST(max_small_discards),
317 ATTR_LIST(batched_trim_sections),
318 ATTR_LIST(ipu_policy),
319 ATTR_LIST(min_ipu_util),
320 ATTR_LIST(min_fsync_blocks),
321 ATTR_LIST(max_victim_search),
322 ATTR_LIST(dir_level),
323 ATTR_LIST(ram_thresh),
324 ATTR_LIST(ra_nid_pages),
325 ATTR_LIST(dirty_nats_ratio),
326 ATTR_LIST(cp_interval),
327 ATTR_LIST(idle_interval),
328 #ifdef CONFIG_F2FS_FAULT_INJECTION
329 ATTR_LIST(inject_rate),
330 ATTR_LIST(inject_type),
331 #endif
332 ATTR_LIST(lifetime_write_kbytes),
333 NULL,
334 };
335
336 static const struct sysfs_ops f2fs_attr_ops = {
337 .show = f2fs_attr_show,
338 .store = f2fs_attr_store,
339 };
340
341 static struct kobj_type f2fs_ktype = {
342 .default_attrs = f2fs_attrs,
343 .sysfs_ops = &f2fs_attr_ops,
344 .release = f2fs_sb_release,
345 };
346
347 void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
348 {
349 struct va_format vaf;
350 va_list args;
351
352 va_start(args, fmt);
353 vaf.fmt = fmt;
354 vaf.va = &args;
355 printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
356 va_end(args);
357 }
358
359 static void init_once(void *foo)
360 {
361 struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
362
363 inode_init_once(&fi->vfs_inode);
364 }
365
366 static int parse_options(struct super_block *sb, char *options)
367 {
368 struct f2fs_sb_info *sbi = F2FS_SB(sb);
369 struct request_queue *q;
370 substring_t args[MAX_OPT_ARGS];
371 char *p, *name;
372 int arg = 0;
373
374 if (!options)
375 return 0;
376
377 while ((p = strsep(&options, ",")) != NULL) {
378 int token;
379 if (!*p)
380 continue;
381 /*
382 * Initialize args struct so we know whether arg was
383 * found; some options take optional arguments.
384 */
385 args[0].to = args[0].from = NULL;
386 token = match_token(p, f2fs_tokens, args);
387
388 switch (token) {
389 case Opt_gc_background:
390 name = match_strdup(&args[0]);
391
392 if (!name)
393 return -ENOMEM;
394 if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
395 set_opt(sbi, BG_GC);
396 clear_opt(sbi, FORCE_FG_GC);
397 } else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
398 clear_opt(sbi, BG_GC);
399 clear_opt(sbi, FORCE_FG_GC);
400 } else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
401 set_opt(sbi, BG_GC);
402 set_opt(sbi, FORCE_FG_GC);
403 } else {
404 kfree(name);
405 return -EINVAL;
406 }
407 kfree(name);
408 break;
409 case Opt_disable_roll_forward:
410 set_opt(sbi, DISABLE_ROLL_FORWARD);
411 break;
412 case Opt_norecovery:
413 /* this option mounts f2fs with ro */
414 set_opt(sbi, DISABLE_ROLL_FORWARD);
415 if (!f2fs_readonly(sb))
416 return -EINVAL;
417 break;
418 case Opt_discard:
419 q = bdev_get_queue(sb->s_bdev);
420 if (blk_queue_discard(q)) {
421 set_opt(sbi, DISCARD);
422 } else if (!f2fs_sb_mounted_blkzoned(sb)) {
423 f2fs_msg(sb, KERN_WARNING,
424 "mounting with \"discard\" option, but "
425 "the device does not support discard");
426 }
427 break;
428 case Opt_nodiscard:
429 if (f2fs_sb_mounted_blkzoned(sb)) {
430 f2fs_msg(sb, KERN_WARNING,
431 "discard is required for zoned block devices");
432 return -EINVAL;
433 }
434 clear_opt(sbi, DISCARD);
435 break;
436 case Opt_noheap:
437 set_opt(sbi, NOHEAP);
438 break;
439 #ifdef CONFIG_F2FS_FS_XATTR
440 case Opt_user_xattr:
441 set_opt(sbi, XATTR_USER);
442 break;
443 case Opt_nouser_xattr:
444 clear_opt(sbi, XATTR_USER);
445 break;
446 case Opt_inline_xattr:
447 set_opt(sbi, INLINE_XATTR);
448 break;
449 case Opt_noinline_xattr:
450 clear_opt(sbi, INLINE_XATTR);
451 break;
452 #else
453 case Opt_user_xattr:
454 f2fs_msg(sb, KERN_INFO,
455 "user_xattr options not supported");
456 break;
457 case Opt_nouser_xattr:
458 f2fs_msg(sb, KERN_INFO,
459 "nouser_xattr options not supported");
460 break;
461 case Opt_inline_xattr:
462 f2fs_msg(sb, KERN_INFO,
463 "inline_xattr options not supported");
464 break;
465 case Opt_noinline_xattr:
466 f2fs_msg(sb, KERN_INFO,
467 "noinline_xattr options not supported");
468 break;
469 #endif
470 #ifdef CONFIG_F2FS_FS_POSIX_ACL
471 case Opt_acl:
472 set_opt(sbi, POSIX_ACL);
473 break;
474 case Opt_noacl:
475 clear_opt(sbi, POSIX_ACL);
476 break;
477 #else
478 case Opt_acl:
479 f2fs_msg(sb, KERN_INFO, "acl options not supported");
480 break;
481 case Opt_noacl:
482 f2fs_msg(sb, KERN_INFO, "noacl options not supported");
483 break;
484 #endif
485 case Opt_active_logs:
486 if (args->from && match_int(args, &arg))
487 return -EINVAL;
488 if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
489 return -EINVAL;
490 sbi->active_logs = arg;
491 break;
492 case Opt_disable_ext_identify:
493 set_opt(sbi, DISABLE_EXT_IDENTIFY);
494 break;
495 case Opt_inline_data:
496 set_opt(sbi, INLINE_DATA);
497 break;
498 case Opt_inline_dentry:
499 set_opt(sbi, INLINE_DENTRY);
500 break;
501 case Opt_noinline_dentry:
502 clear_opt(sbi, INLINE_DENTRY);
503 break;
504 case Opt_flush_merge:
505 set_opt(sbi, FLUSH_MERGE);
506 break;
507 case Opt_noflush_merge:
508 clear_opt(sbi, FLUSH_MERGE);
509 break;
510 case Opt_nobarrier:
511 set_opt(sbi, NOBARRIER);
512 break;
513 case Opt_fastboot:
514 set_opt(sbi, FASTBOOT);
515 break;
516 case Opt_extent_cache:
517 set_opt(sbi, EXTENT_CACHE);
518 break;
519 case Opt_noextent_cache:
520 clear_opt(sbi, EXTENT_CACHE);
521 break;
522 case Opt_noinline_data:
523 clear_opt(sbi, INLINE_DATA);
524 break;
525 case Opt_data_flush:
526 set_opt(sbi, DATA_FLUSH);
527 break;
528 case Opt_mode:
529 name = match_strdup(&args[0]);
530
531 if (!name)
532 return -ENOMEM;
533 if (strlen(name) == 8 &&
534 !strncmp(name, "adaptive", 8)) {
535 if (f2fs_sb_mounted_blkzoned(sb)) {
536 f2fs_msg(sb, KERN_WARNING,
537 "adaptive mode is not allowed with "
538 "zoned block device feature");
539 kfree(name);
540 return -EINVAL;
541 }
542 set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
543 } else if (strlen(name) == 3 &&
544 !strncmp(name, "lfs", 3)) {
545 set_opt_mode(sbi, F2FS_MOUNT_LFS);
546 } else {
547 kfree(name);
548 return -EINVAL;
549 }
550 kfree(name);
551 break;
552 case Opt_io_size_bits:
553 if (args->from && match_int(args, &arg))
554 return -EINVAL;
555 if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
556 f2fs_msg(sb, KERN_WARNING,
557 "Not support %d, larger than %d",
558 1 << arg, BIO_MAX_PAGES);
559 return -EINVAL;
560 }
561 sbi->write_io_size_bits = arg;
562 break;
563 case Opt_fault_injection:
564 if (args->from && match_int(args, &arg))
565 return -EINVAL;
566 #ifdef CONFIG_F2FS_FAULT_INJECTION
567 f2fs_build_fault_attr(sbi, arg);
568 set_opt(sbi, FAULT_INJECTION);
569 #else
570 f2fs_msg(sb, KERN_INFO,
571 "FAULT_INJECTION was not selected");
572 #endif
573 break;
574 case Opt_lazytime:
575 sb->s_flags |= MS_LAZYTIME;
576 break;
577 case Opt_nolazytime:
578 sb->s_flags &= ~MS_LAZYTIME;
579 break;
580 default:
581 f2fs_msg(sb, KERN_ERR,
582 "Unrecognized mount option \"%s\" or missing value",
583 p);
584 return -EINVAL;
585 }
586 }
587
588 if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
589 f2fs_msg(sb, KERN_ERR,
590 "Should set mode=lfs with %uKB-sized IO",
591 F2FS_IO_SIZE_KB(sbi));
592 return -EINVAL;
593 }
594 return 0;
595 }
596
597 static struct inode *f2fs_alloc_inode(struct super_block *sb)
598 {
599 struct f2fs_inode_info *fi;
600
601 fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
602 if (!fi)
603 return NULL;
604
605 init_once((void *) fi);
606
607 /* Initialize f2fs-specific inode info */
608 fi->vfs_inode.i_version = 1;
609 atomic_set(&fi->dirty_pages, 0);
610 fi->i_current_depth = 1;
611 fi->i_advise = 0;
612 init_rwsem(&fi->i_sem);
613 INIT_LIST_HEAD(&fi->dirty_list);
614 INIT_LIST_HEAD(&fi->gdirty_list);
615 INIT_LIST_HEAD(&fi->inmem_pages);
616 mutex_init(&fi->inmem_lock);
617 init_rwsem(&fi->dio_rwsem[READ]);
618 init_rwsem(&fi->dio_rwsem[WRITE]);
619
620 /* Will be used by directory only */
621 fi->i_dir_level = F2FS_SB(sb)->dir_level;
622 return &fi->vfs_inode;
623 }
624
625 static int f2fs_drop_inode(struct inode *inode)
626 {
627 int ret;
628 /*
629 * This is to avoid a deadlock condition like below.
630 * writeback_single_inode(inode)
631 * - f2fs_write_data_page
632 * - f2fs_gc -> iput -> evict
633 * - inode_wait_for_writeback(inode)
634 */
635 if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) {
636 if (!inode->i_nlink && !is_bad_inode(inode)) {
637 /* to avoid evict_inode call simultaneously */
638 atomic_inc(&inode->i_count);
639 spin_unlock(&inode->i_lock);
640
641 /* some remained atomic pages should discarded */
642 if (f2fs_is_atomic_file(inode))
643 drop_inmem_pages(inode);
644
645 /* should remain fi->extent_tree for writepage */
646 f2fs_destroy_extent_node(inode);
647
648 sb_start_intwrite(inode->i_sb);
649 f2fs_i_size_write(inode, 0);
650
651 if (F2FS_HAS_BLOCKS(inode))
652 f2fs_truncate(inode);
653
654 sb_end_intwrite(inode->i_sb);
655
656 fscrypt_put_encryption_info(inode, NULL);
657 spin_lock(&inode->i_lock);
658 atomic_dec(&inode->i_count);
659 }
660 trace_f2fs_drop_inode(inode, 0);
661 return 0;
662 }
663 ret = generic_drop_inode(inode);
664 trace_f2fs_drop_inode(inode, ret);
665 return ret;
666 }
667
668 int f2fs_inode_dirtied(struct inode *inode, bool sync)
669 {
670 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
671 int ret = 0;
672
673 spin_lock(&sbi->inode_lock[DIRTY_META]);
674 if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
675 ret = 1;
676 } else {
677 set_inode_flag(inode, FI_DIRTY_INODE);
678 stat_inc_dirty_inode(sbi, DIRTY_META);
679 }
680 if (sync && list_empty(&F2FS_I(inode)->gdirty_list)) {
681 list_add_tail(&F2FS_I(inode)->gdirty_list,
682 &sbi->inode_list[DIRTY_META]);
683 inc_page_count(sbi, F2FS_DIRTY_IMETA);
684 }
685 spin_unlock(&sbi->inode_lock[DIRTY_META]);
686 return ret;
687 }
688
689 void f2fs_inode_synced(struct inode *inode)
690 {
691 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
692
693 spin_lock(&sbi->inode_lock[DIRTY_META]);
694 if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) {
695 spin_unlock(&sbi->inode_lock[DIRTY_META]);
696 return;
697 }
698 if (!list_empty(&F2FS_I(inode)->gdirty_list)) {
699 list_del_init(&F2FS_I(inode)->gdirty_list);
700 dec_page_count(sbi, F2FS_DIRTY_IMETA);
701 }
702 clear_inode_flag(inode, FI_DIRTY_INODE);
703 clear_inode_flag(inode, FI_AUTO_RECOVER);
704 stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META);
705 spin_unlock(&sbi->inode_lock[DIRTY_META]);
706 }
707
708 /*
709 * f2fs_dirty_inode() is called from __mark_inode_dirty()
710 *
711 * We should call set_dirty_inode to write the dirty inode through write_inode.
712 */
713 static void f2fs_dirty_inode(struct inode *inode, int flags)
714 {
715 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
716
717 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
718 inode->i_ino == F2FS_META_INO(sbi))
719 return;
720
721 if (flags == I_DIRTY_TIME)
722 return;
723
724 if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
725 clear_inode_flag(inode, FI_AUTO_RECOVER);
726
727 f2fs_inode_dirtied(inode, false);
728 }
729
730 static void f2fs_i_callback(struct rcu_head *head)
731 {
732 struct inode *inode = container_of(head, struct inode, i_rcu);
733 kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
734 }
735
736 static void f2fs_destroy_inode(struct inode *inode)
737 {
738 call_rcu(&inode->i_rcu, f2fs_i_callback);
739 }
740
741 static void destroy_percpu_info(struct f2fs_sb_info *sbi)
742 {
743 percpu_counter_destroy(&sbi->alloc_valid_block_count);
744 percpu_counter_destroy(&sbi->total_valid_inode_count);
745 }
746
747 static void destroy_device_list(struct f2fs_sb_info *sbi)
748 {
749 int i;
750
751 for (i = 0; i < sbi->s_ndevs; i++) {
752 blkdev_put(FDEV(i).bdev, FMODE_EXCL);
753 #ifdef CONFIG_BLK_DEV_ZONED
754 kfree(FDEV(i).blkz_type);
755 #endif
756 }
757 kfree(sbi->devs);
758 }
759
760 static void f2fs_put_super(struct super_block *sb)
761 {
762 struct f2fs_sb_info *sbi = F2FS_SB(sb);
763
764 if (sbi->s_proc) {
765 remove_proc_entry("segment_info", sbi->s_proc);
766 remove_proc_entry("segment_bits", sbi->s_proc);
767 remove_proc_entry(sb->s_id, f2fs_proc_root);
768 }
769 kobject_del(&sbi->s_kobj);
770
771 stop_gc_thread(sbi);
772
773 /* prevent remaining shrinker jobs */
774 mutex_lock(&sbi->umount_mutex);
775
776 /*
777 * We don't need to do checkpoint when superblock is clean.
778 * But, the previous checkpoint was not done by umount, it needs to do
779 * clean checkpoint again.
780 */
781 if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
782 !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
783 struct cp_control cpc = {
784 .reason = CP_UMOUNT,
785 };
786 write_checkpoint(sbi, &cpc);
787 }
788
789 /* be sure to wait for any on-going discard commands */
790 f2fs_wait_discard_bio(sbi, NULL_ADDR);
791
792 /* write_checkpoint can update stat informaion */
793 f2fs_destroy_stats(sbi);
794
795 /*
796 * normally superblock is clean, so we need to release this.
797 * In addition, EIO will skip do checkpoint, we need this as well.
798 */
799 release_ino_entry(sbi, true);
800
801 f2fs_leave_shrinker(sbi);
802 mutex_unlock(&sbi->umount_mutex);
803
804 /* our cp_error case, we can wait for any writeback page */
805 f2fs_flush_merged_bios(sbi);
806
807 iput(sbi->node_inode);
808 iput(sbi->meta_inode);
809
810 /* destroy f2fs internal modules */
811 destroy_node_manager(sbi);
812 destroy_segment_manager(sbi);
813
814 kfree(sbi->ckpt);
815 kobject_put(&sbi->s_kobj);
816 wait_for_completion(&sbi->s_kobj_unregister);
817
818 sb->s_fs_info = NULL;
819 if (sbi->s_chksum_driver)
820 crypto_free_shash(sbi->s_chksum_driver);
821 kfree(sbi->raw_super);
822
823 destroy_device_list(sbi);
824 mempool_destroy(sbi->write_io_dummy);
825 destroy_percpu_info(sbi);
826 kfree(sbi);
827 }
828
829 int f2fs_sync_fs(struct super_block *sb, int sync)
830 {
831 struct f2fs_sb_info *sbi = F2FS_SB(sb);
832 int err = 0;
833
834 trace_f2fs_sync_fs(sb, sync);
835
836 if (sync) {
837 struct cp_control cpc;
838
839 cpc.reason = __get_cp_reason(sbi);
840
841 mutex_lock(&sbi->gc_mutex);
842 err = write_checkpoint(sbi, &cpc);
843 mutex_unlock(&sbi->gc_mutex);
844 }
845 f2fs_trace_ios(NULL, 1);
846
847 return err;
848 }
849
850 static int f2fs_freeze(struct super_block *sb)
851 {
852 if (f2fs_readonly(sb))
853 return 0;
854
855 /* IO error happened before */
856 if (unlikely(f2fs_cp_error(F2FS_SB(sb))))
857 return -EIO;
858
859 /* must be clean, since sync_filesystem() was already called */
860 if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
861 return -EINVAL;
862 return 0;
863 }
864
865 static int f2fs_unfreeze(struct super_block *sb)
866 {
867 return 0;
868 }
869
870 static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
871 {
872 struct super_block *sb = dentry->d_sb;
873 struct f2fs_sb_info *sbi = F2FS_SB(sb);
874 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
875 block_t total_count, user_block_count, start_count, ovp_count;
876
877 total_count = le64_to_cpu(sbi->raw_super->block_count);
878 user_block_count = sbi->user_block_count;
879 start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
880 ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
881 buf->f_type = F2FS_SUPER_MAGIC;
882 buf->f_bsize = sbi->blocksize;
883
884 buf->f_blocks = total_count - start_count;
885 buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
886 buf->f_bavail = user_block_count - valid_user_blocks(sbi);
887
888 buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
889 buf->f_ffree = min(buf->f_files - valid_node_count(sbi),
890 buf->f_bavail);
891
892 buf->f_namelen = F2FS_NAME_LEN;
893 buf->f_fsid.val[0] = (u32)id;
894 buf->f_fsid.val[1] = (u32)(id >> 32);
895
896 return 0;
897 }
898
899 static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
900 {
901 struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
902
903 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) {
904 if (test_opt(sbi, FORCE_FG_GC))
905 seq_printf(seq, ",background_gc=%s", "sync");
906 else
907 seq_printf(seq, ",background_gc=%s", "on");
908 } else {
909 seq_printf(seq, ",background_gc=%s", "off");
910 }
911 if (test_opt(sbi, DISABLE_ROLL_FORWARD))
912 seq_puts(seq, ",disable_roll_forward");
913 if (test_opt(sbi, DISCARD))
914 seq_puts(seq, ",discard");
915 if (test_opt(sbi, NOHEAP))
916 seq_puts(seq, ",no_heap_alloc");
917 #ifdef CONFIG_F2FS_FS_XATTR
918 if (test_opt(sbi, XATTR_USER))
919 seq_puts(seq, ",user_xattr");
920 else
921 seq_puts(seq, ",nouser_xattr");
922 if (test_opt(sbi, INLINE_XATTR))
923 seq_puts(seq, ",inline_xattr");
924 else
925 seq_puts(seq, ",noinline_xattr");
926 #endif
927 #ifdef CONFIG_F2FS_FS_POSIX_ACL
928 if (test_opt(sbi, POSIX_ACL))
929 seq_puts(seq, ",acl");
930 else
931 seq_puts(seq, ",noacl");
932 #endif
933 if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
934 seq_puts(seq, ",disable_ext_identify");
935 if (test_opt(sbi, INLINE_DATA))
936 seq_puts(seq, ",inline_data");
937 else
938 seq_puts(seq, ",noinline_data");
939 if (test_opt(sbi, INLINE_DENTRY))
940 seq_puts(seq, ",inline_dentry");
941 else
942 seq_puts(seq, ",noinline_dentry");
943 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
944 seq_puts(seq, ",flush_merge");
945 if (test_opt(sbi, NOBARRIER))
946 seq_puts(seq, ",nobarrier");
947 if (test_opt(sbi, FASTBOOT))
948 seq_puts(seq, ",fastboot");
949 if (test_opt(sbi, EXTENT_CACHE))
950 seq_puts(seq, ",extent_cache");
951 else
952 seq_puts(seq, ",noextent_cache");
953 if (test_opt(sbi, DATA_FLUSH))
954 seq_puts(seq, ",data_flush");
955
956 seq_puts(seq, ",mode=");
957 if (test_opt(sbi, ADAPTIVE))
958 seq_puts(seq, "adaptive");
959 else if (test_opt(sbi, LFS))
960 seq_puts(seq, "lfs");
961 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
962 if (F2FS_IO_SIZE_BITS(sbi))
963 seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
964 #ifdef CONFIG_F2FS_FAULT_INJECTION
965 if (test_opt(sbi, FAULT_INJECTION))
966 seq_puts(seq, ",fault_injection");
967 #endif
968
969 return 0;
970 }
971
972 static int segment_info_seq_show(struct seq_file *seq, void *offset)
973 {
974 struct super_block *sb = seq->private;
975 struct f2fs_sb_info *sbi = F2FS_SB(sb);
976 unsigned int total_segs =
977 le32_to_cpu(sbi->raw_super->segment_count_main);
978 int i;
979
980 seq_puts(seq, "format: segment_type|valid_blocks\n"
981 "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
982
983 for (i = 0; i < total_segs; i++) {
984 struct seg_entry *se = get_seg_entry(sbi, i);
985
986 if ((i % 10) == 0)
987 seq_printf(seq, "%-10d", i);
988 seq_printf(seq, "%d|%-3u", se->type,
989 get_valid_blocks(sbi, i, 1));
990 if ((i % 10) == 9 || i == (total_segs - 1))
991 seq_putc(seq, '\n');
992 else
993 seq_putc(seq, ' ');
994 }
995
996 return 0;
997 }
998
999 static int segment_bits_seq_show(struct seq_file *seq, void *offset)
1000 {
1001 struct super_block *sb = seq->private;
1002 struct f2fs_sb_info *sbi = F2FS_SB(sb);
1003 unsigned int total_segs =
1004 le32_to_cpu(sbi->raw_super->segment_count_main);
1005 int i, j;
1006
1007 seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n"
1008 "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
1009
1010 for (i = 0; i < total_segs; i++) {
1011 struct seg_entry *se = get_seg_entry(sbi, i);
1012
1013 seq_printf(seq, "%-10d", i);
1014 seq_printf(seq, "%d|%-3u|", se->type,
1015 get_valid_blocks(sbi, i, 1));
1016 for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
1017 seq_printf(seq, " %.2x", se->cur_valid_map[j]);
1018 seq_putc(seq, '\n');
1019 }
1020 return 0;
1021 }
1022
1023 #define F2FS_PROC_FILE_DEF(_name) \
1024 static int _name##_open_fs(struct inode *inode, struct file *file) \
1025 { \
1026 return single_open(file, _name##_seq_show, PDE_DATA(inode)); \
1027 } \
1028 \
1029 static const struct file_operations f2fs_seq_##_name##_fops = { \
1030 .open = _name##_open_fs, \
1031 .read = seq_read, \
1032 .llseek = seq_lseek, \
1033 .release = single_release, \
1034 };
1035
1036 F2FS_PROC_FILE_DEF(segment_info);
1037 F2FS_PROC_FILE_DEF(segment_bits);
1038
1039 static void default_options(struct f2fs_sb_info *sbi)
1040 {
1041 /* init some FS parameters */
1042 sbi->active_logs = NR_CURSEG_TYPE;
1043
1044 set_opt(sbi, BG_GC);
1045 set_opt(sbi, INLINE_XATTR);
1046 set_opt(sbi, INLINE_DATA);
1047 set_opt(sbi, INLINE_DENTRY);
1048 set_opt(sbi, EXTENT_CACHE);
1049 sbi->sb->s_flags |= MS_LAZYTIME;
1050 set_opt(sbi, FLUSH_MERGE);
1051 if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
1052 set_opt_mode(sbi, F2FS_MOUNT_LFS);
1053 set_opt(sbi, DISCARD);
1054 } else {
1055 set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
1056 }
1057
1058 #ifdef CONFIG_F2FS_FS_XATTR
1059 set_opt(sbi, XATTR_USER);
1060 #endif
1061 #ifdef CONFIG_F2FS_FS_POSIX_ACL
1062 set_opt(sbi, POSIX_ACL);
1063 #endif
1064
1065 #ifdef CONFIG_F2FS_FAULT_INJECTION
1066 f2fs_build_fault_attr(sbi, 0);
1067 #endif
1068 }
1069
1070 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
1071 {
1072 struct f2fs_sb_info *sbi = F2FS_SB(sb);
1073 struct f2fs_mount_info org_mount_opt;
1074 int err, active_logs;
1075 bool need_restart_gc = false;
1076 bool need_stop_gc = false;
1077 bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
1078 #ifdef CONFIG_F2FS_FAULT_INJECTION
1079 struct f2fs_fault_info ffi = sbi->fault_info;
1080 #endif
1081
1082 /*
1083 * Save the old mount options in case we
1084 * need to restore them.
1085 */
1086 org_mount_opt = sbi->mount_opt;
1087 active_logs = sbi->active_logs;
1088
1089 /* recover superblocks we couldn't write due to previous RO mount */
1090 if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
1091 err = f2fs_commit_super(sbi, false);
1092 f2fs_msg(sb, KERN_INFO,
1093 "Try to recover all the superblocks, ret: %d", err);
1094 if (!err)
1095 clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1096 }
1097
1098 sbi->mount_opt.opt = 0;
1099 default_options(sbi);
1100
1101 /* parse mount options */
1102 err = parse_options(sb, data);
1103 if (err)
1104 goto restore_opts;
1105
1106 /*
1107 * Previous and new state of filesystem is RO,
1108 * so skip checking GC and FLUSH_MERGE conditions.
1109 */
1110 if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
1111 goto skip;
1112
1113 /* disallow enable/disable extent_cache dynamically */
1114 if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
1115 err = -EINVAL;
1116 f2fs_msg(sbi->sb, KERN_WARNING,
1117 "switch extent_cache option is not allowed");
1118 goto restore_opts;
1119 }
1120
1121 /*
1122 * We stop the GC thread if FS is mounted as RO
1123 * or if background_gc = off is passed in mount
1124 * option. Also sync the filesystem.
1125 */
1126 if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) {
1127 if (sbi->gc_thread) {
1128 stop_gc_thread(sbi);
1129 need_restart_gc = true;
1130 }
1131 } else if (!sbi->gc_thread) {
1132 err = start_gc_thread(sbi);
1133 if (err)
1134 goto restore_opts;
1135 need_stop_gc = true;
1136 }
1137
1138 if (*flags & MS_RDONLY) {
1139 writeback_inodes_sb(sb, WB_REASON_SYNC);
1140 sync_inodes_sb(sb);
1141
1142 set_sbi_flag(sbi, SBI_IS_DIRTY);
1143 set_sbi_flag(sbi, SBI_IS_CLOSE);
1144 f2fs_sync_fs(sb, 1);
1145 clear_sbi_flag(sbi, SBI_IS_CLOSE);
1146 }
1147
1148 /*
1149 * We stop issue flush thread if FS is mounted as RO
1150 * or if flush_merge is not passed in mount option.
1151 */
1152 if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
1153 clear_opt(sbi, FLUSH_MERGE);
1154 destroy_flush_cmd_control(sbi, false);
1155 } else {
1156 err = create_flush_cmd_control(sbi);
1157 if (err)
1158 goto restore_gc;
1159 }
1160 skip:
1161 /* Update the POSIXACL Flag */
1162 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1163 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
1164
1165 return 0;
1166 restore_gc:
1167 if (need_restart_gc) {
1168 if (start_gc_thread(sbi))
1169 f2fs_msg(sbi->sb, KERN_WARNING,
1170 "background gc thread has stopped");
1171 } else if (need_stop_gc) {
1172 stop_gc_thread(sbi);
1173 }
1174 restore_opts:
1175 sbi->mount_opt = org_mount_opt;
1176 sbi->active_logs = active_logs;
1177 #ifdef CONFIG_F2FS_FAULT_INJECTION
1178 sbi->fault_info = ffi;
1179 #endif
1180 return err;
1181 }
1182
1183 static struct super_operations f2fs_sops = {
1184 .alloc_inode = f2fs_alloc_inode,
1185 .drop_inode = f2fs_drop_inode,
1186 .destroy_inode = f2fs_destroy_inode,
1187 .write_inode = f2fs_write_inode,
1188 .dirty_inode = f2fs_dirty_inode,
1189 .show_options = f2fs_show_options,
1190 .evict_inode = f2fs_evict_inode,
1191 .put_super = f2fs_put_super,
1192 .sync_fs = f2fs_sync_fs,
1193 .freeze_fs = f2fs_freeze,
1194 .unfreeze_fs = f2fs_unfreeze,
1195 .statfs = f2fs_statfs,
1196 .remount_fs = f2fs_remount,
1197 };
1198
1199 #ifdef CONFIG_F2FS_FS_ENCRYPTION
1200 static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
1201 {
1202 return f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
1203 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
1204 ctx, len, NULL);
1205 }
1206
1207 static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
1208 void *fs_data)
1209 {
1210 return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
1211 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
1212 ctx, len, fs_data, XATTR_CREATE);
1213 }
1214
1215 static unsigned f2fs_max_namelen(struct inode *inode)
1216 {
1217 return S_ISLNK(inode->i_mode) ?
1218 inode->i_sb->s_blocksize : F2FS_NAME_LEN;
1219 }
1220
1221 static const struct fscrypt_operations f2fs_cryptops = {
1222 .key_prefix = "f2fs:",
1223 .get_context = f2fs_get_context,
1224 .set_context = f2fs_set_context,
1225 .is_encrypted = f2fs_encrypted_inode,
1226 .empty_dir = f2fs_empty_dir,
1227 .max_namelen = f2fs_max_namelen,
1228 };
1229 #else
1230 static const struct fscrypt_operations f2fs_cryptops = {
1231 .is_encrypted = f2fs_encrypted_inode,
1232 };
1233 #endif
1234
1235 static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
1236 u64 ino, u32 generation)
1237 {
1238 struct f2fs_sb_info *sbi = F2FS_SB(sb);
1239 struct inode *inode;
1240
1241 if (check_nid_range(sbi, ino))
1242 return ERR_PTR(-ESTALE);
1243
1244 /*
1245 * f2fs_iget isn't quite right if the inode is currently unallocated!
1246 * However f2fs_iget currently does appropriate checks to handle stale
1247 * inodes so everything is OK.
1248 */
1249 inode = f2fs_iget(sb, ino);
1250 if (IS_ERR(inode))
1251 return ERR_CAST(inode);
1252 if (unlikely(generation && inode->i_generation != generation)) {
1253 /* we didn't find the right inode.. */
1254 iput(inode);
1255 return ERR_PTR(-ESTALE);
1256 }
1257 return inode;
1258 }
1259
1260 static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1261 int fh_len, int fh_type)
1262 {
1263 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1264 f2fs_nfs_get_inode);
1265 }
1266
1267 static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
1268 int fh_len, int fh_type)
1269 {
1270 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1271 f2fs_nfs_get_inode);
1272 }
1273
1274 static const struct export_operations f2fs_export_ops = {
1275 .fh_to_dentry = f2fs_fh_to_dentry,
1276 .fh_to_parent = f2fs_fh_to_parent,
1277 .get_parent = f2fs_get_parent,
1278 };
1279
1280 static loff_t max_file_blocks(void)
1281 {
1282 loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
1283 loff_t leaf_count = ADDRS_PER_BLOCK;
1284
1285 /* two direct node blocks */
1286 result += (leaf_count * 2);
1287
1288 /* two indirect node blocks */
1289 leaf_count *= NIDS_PER_BLOCK;
1290 result += (leaf_count * 2);
1291
1292 /* one double indirect node block */
1293 leaf_count *= NIDS_PER_BLOCK;
1294 result += leaf_count;
1295
1296 return result;
1297 }
1298
1299 static int __f2fs_commit_super(struct buffer_head *bh,
1300 struct f2fs_super_block *super)
1301 {
1302 lock_buffer(bh);
1303 if (super)
1304 memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
1305 set_buffer_uptodate(bh);
1306 set_buffer_dirty(bh);
1307 unlock_buffer(bh);
1308
1309 /* it's rare case, we can do fua all the time */
1310 return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA);
1311 }
1312
1313 static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
1314 struct buffer_head *bh)
1315 {
1316 struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
1317 (bh->b_data + F2FS_SUPER_OFFSET);
1318 struct super_block *sb = sbi->sb;
1319 u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
1320 u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
1321 u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
1322 u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
1323 u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1324 u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
1325 u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
1326 u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
1327 u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
1328 u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
1329 u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
1330 u32 segment_count = le32_to_cpu(raw_super->segment_count);
1331 u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
1332 u64 main_end_blkaddr = main_blkaddr +
1333 (segment_count_main << log_blocks_per_seg);
1334 u64 seg_end_blkaddr = segment0_blkaddr +
1335 (segment_count << log_blocks_per_seg);
1336
1337 if (segment0_blkaddr != cp_blkaddr) {
1338 f2fs_msg(sb, KERN_INFO,
1339 "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
1340 segment0_blkaddr, cp_blkaddr);
1341 return true;
1342 }
1343
1344 if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
1345 sit_blkaddr) {
1346 f2fs_msg(sb, KERN_INFO,
1347 "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
1348 cp_blkaddr, sit_blkaddr,
1349 segment_count_ckpt << log_blocks_per_seg);
1350 return true;
1351 }
1352
1353 if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
1354 nat_blkaddr) {
1355 f2fs_msg(sb, KERN_INFO,
1356 "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
1357 sit_blkaddr, nat_blkaddr,
1358 segment_count_sit << log_blocks_per_seg);
1359 return true;
1360 }
1361
1362 if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
1363 ssa_blkaddr) {
1364 f2fs_msg(sb, KERN_INFO,
1365 "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
1366 nat_blkaddr, ssa_blkaddr,
1367 segment_count_nat << log_blocks_per_seg);
1368 return true;
1369 }
1370
1371 if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
1372 main_blkaddr) {
1373 f2fs_msg(sb, KERN_INFO,
1374 "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
1375 ssa_blkaddr, main_blkaddr,
1376 segment_count_ssa << log_blocks_per_seg);
1377 return true;
1378 }
1379
1380 if (main_end_blkaddr > seg_end_blkaddr) {
1381 f2fs_msg(sb, KERN_INFO,
1382 "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
1383 main_blkaddr,
1384 segment0_blkaddr +
1385 (segment_count << log_blocks_per_seg),
1386 segment_count_main << log_blocks_per_seg);
1387 return true;
1388 } else if (main_end_blkaddr < seg_end_blkaddr) {
1389 int err = 0;
1390 char *res;
1391
1392 /* fix in-memory information all the time */
1393 raw_super->segment_count = cpu_to_le32((main_end_blkaddr -
1394 segment0_blkaddr) >> log_blocks_per_seg);
1395
1396 if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) {
1397 set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1398 res = "internally";
1399 } else {
1400 err = __f2fs_commit_super(bh, NULL);
1401 res = err ? "failed" : "done";
1402 }
1403 f2fs_msg(sb, KERN_INFO,
1404 "Fix alignment : %s, start(%u) end(%u) block(%u)",
1405 res, main_blkaddr,
1406 segment0_blkaddr +
1407 (segment_count << log_blocks_per_seg),
1408 segment_count_main << log_blocks_per_seg);
1409 if (err)
1410 return true;
1411 }
1412 return false;
1413 }
1414
1415 static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
1416 struct buffer_head *bh)
1417 {
1418 struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
1419 (bh->b_data + F2FS_SUPER_OFFSET);
1420 struct super_block *sb = sbi->sb;
1421 unsigned int blocksize;
1422
1423 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
1424 f2fs_msg(sb, KERN_INFO,
1425 "Magic Mismatch, valid(0x%x) - read(0x%x)",
1426 F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
1427 return 1;
1428 }
1429
1430 /* Currently, support only 4KB page cache size */
1431 if (F2FS_BLKSIZE != PAGE_SIZE) {
1432 f2fs_msg(sb, KERN_INFO,
1433 "Invalid page_cache_size (%lu), supports only 4KB\n",
1434 PAGE_SIZE);
1435 return 1;
1436 }
1437
1438 /* Currently, support only 4KB block size */
1439 blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
1440 if (blocksize != F2FS_BLKSIZE) {
1441 f2fs_msg(sb, KERN_INFO,
1442 "Invalid blocksize (%u), supports only 4KB\n",
1443 blocksize);
1444 return 1;
1445 }
1446
1447 /* check log blocks per segment */
1448 if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
1449 f2fs_msg(sb, KERN_INFO,
1450 "Invalid log blocks per segment (%u)\n",
1451 le32_to_cpu(raw_super->log_blocks_per_seg));
1452 return 1;
1453 }
1454
1455 /* Currently, support 512/1024/2048/4096 bytes sector size */
1456 if (le32_to_cpu(raw_super->log_sectorsize) >
1457 F2FS_MAX_LOG_SECTOR_SIZE ||
1458 le32_to_cpu(raw_super->log_sectorsize) <
1459 F2FS_MIN_LOG_SECTOR_SIZE) {
1460 f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
1461 le32_to_cpu(raw_super->log_sectorsize));
1462 return 1;
1463 }
1464 if (le32_to_cpu(raw_super->log_sectors_per_block) +
1465 le32_to_cpu(raw_super->log_sectorsize) !=
1466 F2FS_MAX_LOG_SECTOR_SIZE) {
1467 f2fs_msg(sb, KERN_INFO,
1468 "Invalid log sectors per block(%u) log sectorsize(%u)",
1469 le32_to_cpu(raw_super->log_sectors_per_block),
1470 le32_to_cpu(raw_super->log_sectorsize));
1471 return 1;
1472 }
1473
1474 /* check reserved ino info */
1475 if (le32_to_cpu(raw_super->node_ino) != 1 ||
1476 le32_to_cpu(raw_super->meta_ino) != 2 ||
1477 le32_to_cpu(raw_super->root_ino) != 3) {
1478 f2fs_msg(sb, KERN_INFO,
1479 "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
1480 le32_to_cpu(raw_super->node_ino),
1481 le32_to_cpu(raw_super->meta_ino),
1482 le32_to_cpu(raw_super->root_ino));
1483 return 1;
1484 }
1485
1486 /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
1487 if (sanity_check_area_boundary(sbi, bh))
1488 return 1;
1489
1490 return 0;
1491 }
1492
1493 int sanity_check_ckpt(struct f2fs_sb_info *sbi)
1494 {
1495 unsigned int total, fsmeta;
1496 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1497 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1498 unsigned int ovp_segments, reserved_segments;
1499
1500 total = le32_to_cpu(raw_super->segment_count);
1501 fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
1502 fsmeta += le32_to_cpu(raw_super->segment_count_sit);
1503 fsmeta += le32_to_cpu(raw_super->segment_count_nat);
1504 fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
1505 fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
1506
1507 if (unlikely(fsmeta >= total))
1508 return 1;
1509
1510 ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1511 reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
1512
1513 if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
1514 ovp_segments == 0 || reserved_segments == 0)) {
1515 f2fs_msg(sbi->sb, KERN_ERR,
1516 "Wrong layout: check mkfs.f2fs version");
1517 return 1;
1518 }
1519
1520 if (unlikely(f2fs_cp_error(sbi))) {
1521 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
1522 return 1;
1523 }
1524 return 0;
1525 }
1526
1527 static void init_sb_info(struct f2fs_sb_info *sbi)
1528 {
1529 struct f2fs_super_block *raw_super = sbi->raw_super;
1530 int i;
1531
1532 sbi->log_sectors_per_block =
1533 le32_to_cpu(raw_super->log_sectors_per_block);
1534 sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize);
1535 sbi->blocksize = 1 << sbi->log_blocksize;
1536 sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
1537 sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg;
1538 sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
1539 sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
1540 sbi->total_sections = le32_to_cpu(raw_super->section_count);
1541 sbi->total_node_count =
1542 (le32_to_cpu(raw_super->segment_count_nat) / 2)
1543 * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
1544 sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
1545 sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
1546 sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
1547 sbi->cur_victim_sec = NULL_SECNO;
1548 sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
1549
1550 sbi->dir_level = DEF_DIR_LEVEL;
1551 sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
1552 sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
1553 clear_sbi_flag(sbi, SBI_NEED_FSCK);
1554
1555 for (i = 0; i < NR_COUNT_TYPE; i++)
1556 atomic_set(&sbi->nr_pages[i], 0);
1557
1558 INIT_LIST_HEAD(&sbi->s_list);
1559 mutex_init(&sbi->umount_mutex);
1560 mutex_init(&sbi->wio_mutex[NODE]);
1561 mutex_init(&sbi->wio_mutex[DATA]);
1562 spin_lock_init(&sbi->cp_lock);
1563 }
1564
1565 static int init_percpu_info(struct f2fs_sb_info *sbi)
1566 {
1567 int err;
1568
1569 err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL);
1570 if (err)
1571 return err;
1572
1573 return percpu_counter_init(&sbi->total_valid_inode_count, 0,
1574 GFP_KERNEL);
1575 }
1576
1577 #ifdef CONFIG_BLK_DEV_ZONED
1578 static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
1579 {
1580 struct block_device *bdev = FDEV(devi).bdev;
1581 sector_t nr_sectors = bdev->bd_part->nr_sects;
1582 sector_t sector = 0;
1583 struct blk_zone *zones;
1584 unsigned int i, nr_zones;
1585 unsigned int n = 0;
1586 int err = -EIO;
1587
1588 if (!f2fs_sb_mounted_blkzoned(sbi->sb))
1589 return 0;
1590
1591 if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
1592 SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)))
1593 return -EINVAL;
1594 sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev));
1595 if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
1596 __ilog2_u32(sbi->blocks_per_blkz))
1597 return -EINVAL;
1598 sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
1599 FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
1600 sbi->log_blocks_per_blkz;
1601 if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
1602 FDEV(devi).nr_blkz++;
1603
1604 FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
1605 if (!FDEV(devi).blkz_type)
1606 return -ENOMEM;
1607
1608 #define F2FS_REPORT_NR_ZONES 4096
1609
1610 zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone),
1611 GFP_KERNEL);
1612 if (!zones)
1613 return -ENOMEM;
1614
1615 /* Get block zones type */
1616 while (zones && sector < nr_sectors) {
1617
1618 nr_zones = F2FS_REPORT_NR_ZONES;
1619 err = blkdev_report_zones(bdev, sector,
1620 zones, &nr_zones,
1621 GFP_KERNEL);
1622 if (err)
1623 break;
1624 if (!nr_zones) {
1625 err = -EIO;
1626 break;
1627 }
1628
1629 for (i = 0; i < nr_zones; i++) {
1630 FDEV(devi).blkz_type[n] = zones[i].type;
1631 sector += zones[i].len;
1632 n++;
1633 }
1634 }
1635
1636 kfree(zones);
1637
1638 return err;
1639 }
1640 #endif
1641
1642 /*
1643 * Read f2fs raw super block.
1644 * Because we have two copies of super block, so read both of them
1645 * to get the first valid one. If any one of them is broken, we pass
1646 * them recovery flag back to the caller.
1647 */
1648 static int read_raw_super_block(struct f2fs_sb_info *sbi,
1649 struct f2fs_super_block **raw_super,
1650 int *valid_super_block, int *recovery)
1651 {
1652 struct super_block *sb = sbi->sb;
1653 int block;
1654 struct buffer_head *bh;
1655 struct f2fs_super_block *super;
1656 int err = 0;
1657
1658 super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL);
1659 if (!super)
1660 return -ENOMEM;
1661
1662 for (block = 0; block < 2; block++) {
1663 bh = sb_bread(sb, block);
1664 if (!bh) {
1665 f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
1666 block + 1);
1667 err = -EIO;
1668 continue;
1669 }
1670
1671 /* sanity checking of raw super */
1672 if (sanity_check_raw_super(sbi, bh)) {
1673 f2fs_msg(sb, KERN_ERR,
1674 "Can't find valid F2FS filesystem in %dth superblock",
1675 block + 1);
1676 err = -EINVAL;
1677 brelse(bh);
1678 continue;
1679 }
1680
1681 if (!*raw_super) {
1682 memcpy(super, bh->b_data + F2FS_SUPER_OFFSET,
1683 sizeof(*super));
1684 *valid_super_block = block;
1685 *raw_super = super;
1686 }
1687 brelse(bh);
1688 }
1689
1690 /* Fail to read any one of the superblocks*/
1691 if (err < 0)
1692 *recovery = 1;
1693
1694 /* No valid superblock */
1695 if (!*raw_super)
1696 kfree(super);
1697 else
1698 err = 0;
1699
1700 return err;
1701 }
1702
1703 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
1704 {
1705 struct buffer_head *bh;
1706 int err;
1707
1708 if ((recover && f2fs_readonly(sbi->sb)) ||
1709 bdev_read_only(sbi->sb->s_bdev)) {
1710 set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1711 return -EROFS;
1712 }
1713
1714 /* write back-up superblock first */
1715 bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1);
1716 if (!bh)
1717 return -EIO;
1718 err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
1719 brelse(bh);
1720
1721 /* if we are in recovery path, skip writing valid superblock */
1722 if (recover || err)
1723 return err;
1724
1725 /* write current valid superblock */
1726 bh = sb_getblk(sbi->sb, sbi->valid_super_block);
1727 if (!bh)
1728 return -EIO;
1729 err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
1730 brelse(bh);
1731 return err;
1732 }
1733
1734 static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
1735 {
1736 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1737 unsigned int max_devices = MAX_DEVICES;
1738 int i;
1739
1740 /* Initialize single device information */
1741 if (!RDEV(0).path[0]) {
1742 if (!bdev_is_zoned(sbi->sb->s_bdev))
1743 return 0;
1744 max_devices = 1;
1745 }
1746
1747 /*
1748 * Initialize multiple devices information, or single
1749 * zoned block device information.
1750 */
1751 sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info),
1752 GFP_KERNEL);
1753 if (!sbi->devs)
1754 return -ENOMEM;
1755
1756 for (i = 0; i < max_devices; i++) {
1757
1758 if (i > 0 && !RDEV(i).path[0])
1759 break;
1760
1761 if (max_devices == 1) {
1762 /* Single zoned block device mount */
1763 FDEV(0).bdev =
1764 blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
1765 sbi->sb->s_mode, sbi->sb->s_type);
1766 } else {
1767 /* Multi-device mount */
1768 memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
1769 FDEV(i).total_segments =
1770 le32_to_cpu(RDEV(i).total_segments);
1771 if (i == 0) {
1772 FDEV(i).start_blk = 0;
1773 FDEV(i).end_blk = FDEV(i).start_blk +
1774 (FDEV(i).total_segments <<
1775 sbi->log_blocks_per_seg) - 1 +
1776 le32_to_cpu(raw_super->segment0_blkaddr);
1777 } else {
1778 FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
1779 FDEV(i).end_blk = FDEV(i).start_blk +
1780 (FDEV(i).total_segments <<
1781 sbi->log_blocks_per_seg) - 1;
1782 }
1783 FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
1784 sbi->sb->s_mode, sbi->sb->s_type);
1785 }
1786 if (IS_ERR(FDEV(i).bdev))
1787 return PTR_ERR(FDEV(i).bdev);
1788
1789 /* to release errored devices */
1790 sbi->s_ndevs = i + 1;
1791
1792 #ifdef CONFIG_BLK_DEV_ZONED
1793 if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
1794 !f2fs_sb_mounted_blkzoned(sbi->sb)) {
1795 f2fs_msg(sbi->sb, KERN_ERR,
1796 "Zoned block device feature not enabled\n");
1797 return -EINVAL;
1798 }
1799 if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
1800 if (init_blkz_info(sbi, i)) {
1801 f2fs_msg(sbi->sb, KERN_ERR,
1802 "Failed to initialize F2FS blkzone information");
1803 return -EINVAL;
1804 }
1805 if (max_devices == 1)
1806 break;
1807 f2fs_msg(sbi->sb, KERN_INFO,
1808 "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
1809 i, FDEV(i).path,
1810 FDEV(i).total_segments,
1811 FDEV(i).start_blk, FDEV(i).end_blk,
1812 bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
1813 "Host-aware" : "Host-managed");
1814 continue;
1815 }
1816 #endif
1817 f2fs_msg(sbi->sb, KERN_INFO,
1818 "Mount Device [%2d]: %20s, %8u, %8x - %8x",
1819 i, FDEV(i).path,
1820 FDEV(i).total_segments,
1821 FDEV(i).start_blk, FDEV(i).end_blk);
1822 }
1823 f2fs_msg(sbi->sb, KERN_INFO,
1824 "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
1825 return 0;
1826 }
1827
1828 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1829 {
1830 struct f2fs_sb_info *sbi;
1831 struct f2fs_super_block *raw_super;
1832 struct inode *root;
1833 int err;
1834 bool retry = true, need_fsck = false;
1835 char *options = NULL;
1836 int recovery, i, valid_super_block;
1837 struct curseg_info *seg_i;
1838
1839 try_onemore:
1840 err = -EINVAL;
1841 raw_super = NULL;
1842 valid_super_block = -1;
1843 recovery = 0;
1844
1845 /* allocate memory for f2fs-specific super block info */
1846 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
1847 if (!sbi)
1848 return -ENOMEM;
1849
1850 sbi->sb = sb;
1851
1852 /* Load the checksum driver */
1853 sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
1854 if (IS_ERR(sbi->s_chksum_driver)) {
1855 f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver.");
1856 err = PTR_ERR(sbi->s_chksum_driver);
1857 sbi->s_chksum_driver = NULL;
1858 goto free_sbi;
1859 }
1860
1861 /* set a block size */
1862 if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
1863 f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
1864 goto free_sbi;
1865 }
1866
1867 err = read_raw_super_block(sbi, &raw_super, &valid_super_block,
1868 &recovery);
1869 if (err)
1870 goto free_sbi;
1871
1872 sb->s_fs_info = sbi;
1873 sbi->raw_super = raw_super;
1874
1875 /*
1876 * The BLKZONED feature indicates that the drive was formatted with
1877 * zone alignment optimization. This is optional for host-aware
1878 * devices, but mandatory for host-managed zoned block devices.
1879 */
1880 #ifndef CONFIG_BLK_DEV_ZONED
1881 if (f2fs_sb_mounted_blkzoned(sb)) {
1882 f2fs_msg(sb, KERN_ERR,
1883 "Zoned block device support is not enabled\n");
1884 goto free_sb_buf;
1885 }
1886 #endif
1887 default_options(sbi);
1888 /* parse mount options */
1889 options = kstrdup((const char *)data, GFP_KERNEL);
1890 if (data && !options) {
1891 err = -ENOMEM;
1892 goto free_sb_buf;
1893 }
1894
1895 err = parse_options(sb, options);
1896 if (err)
1897 goto free_options;
1898
1899 sbi->max_file_blocks = max_file_blocks();
1900 sb->s_maxbytes = sbi->max_file_blocks <<
1901 le32_to_cpu(raw_super->log_blocksize);
1902 sb->s_max_links = F2FS_LINK_MAX;
1903 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1904
1905 sb->s_op = &f2fs_sops;
1906 sb->s_cop = &f2fs_cryptops;
1907 sb->s_xattr = f2fs_xattr_handlers;
1908 sb->s_export_op = &f2fs_export_ops;
1909 sb->s_magic = F2FS_SUPER_MAGIC;
1910 sb->s_time_gran = 1;
1911 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1912 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
1913 memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
1914
1915 /* init f2fs-specific super block info */
1916 sbi->valid_super_block = valid_super_block;
1917 mutex_init(&sbi->gc_mutex);
1918 mutex_init(&sbi->cp_mutex);
1919 init_rwsem(&sbi->node_write);
1920
1921 /* disallow all the data/node/meta page writes */
1922 set_sbi_flag(sbi, SBI_POR_DOING);
1923 spin_lock_init(&sbi->stat_lock);
1924
1925 init_rwsem(&sbi->read_io.io_rwsem);
1926 sbi->read_io.sbi = sbi;
1927 sbi->read_io.bio = NULL;
1928 for (i = 0; i < NR_PAGE_TYPE; i++) {
1929 init_rwsem(&sbi->write_io[i].io_rwsem);
1930 sbi->write_io[i].sbi = sbi;
1931 sbi->write_io[i].bio = NULL;
1932 }
1933
1934 init_rwsem(&sbi->cp_rwsem);
1935 init_waitqueue_head(&sbi->cp_wait);
1936 init_sb_info(sbi);
1937
1938 err = init_percpu_info(sbi);
1939 if (err)
1940 goto free_options;
1941
1942 if (F2FS_IO_SIZE(sbi) > 1) {
1943 sbi->write_io_dummy =
1944 mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
1945 if (!sbi->write_io_dummy)
1946 goto free_options;
1947 }
1948
1949 /* get an inode for meta space */
1950 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
1951 if (IS_ERR(sbi->meta_inode)) {
1952 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
1953 err = PTR_ERR(sbi->meta_inode);
1954 goto free_io_dummy;
1955 }
1956
1957 err = get_valid_checkpoint(sbi);
1958 if (err) {
1959 f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
1960 goto free_meta_inode;
1961 }
1962
1963 /* Initialize device list */
1964 err = f2fs_scan_devices(sbi);
1965 if (err) {
1966 f2fs_msg(sb, KERN_ERR, "Failed to find devices");
1967 goto free_devices;
1968 }
1969
1970 sbi->total_valid_node_count =
1971 le32_to_cpu(sbi->ckpt->valid_node_count);
1972 percpu_counter_set(&sbi->total_valid_inode_count,
1973 le32_to_cpu(sbi->ckpt->valid_inode_count));
1974 sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count);
1975 sbi->total_valid_block_count =
1976 le64_to_cpu(sbi->ckpt->valid_block_count);
1977 sbi->last_valid_block_count = sbi->total_valid_block_count;
1978
1979 for (i = 0; i < NR_INODE_TYPE; i++) {
1980 INIT_LIST_HEAD(&sbi->inode_list[i]);
1981 spin_lock_init(&sbi->inode_lock[i]);
1982 }
1983
1984 init_extent_cache_info(sbi);
1985
1986 init_ino_entry_info(sbi);
1987
1988 /* setup f2fs internal modules */
1989 err = build_segment_manager(sbi);
1990 if (err) {
1991 f2fs_msg(sb, KERN_ERR,
1992 "Failed to initialize F2FS segment manager");
1993 goto free_sm;
1994 }
1995 err = build_node_manager(sbi);
1996 if (err) {
1997 f2fs_msg(sb, KERN_ERR,
1998 "Failed to initialize F2FS node manager");
1999 goto free_nm;
2000 }
2001
2002 /* For write statistics */
2003 if (sb->s_bdev->bd_part)
2004 sbi->sectors_written_start =
2005 (u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]);
2006
2007 /* Read accumulated write IO statistics if exists */
2008 seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
2009 if (__exist_node_summaries(sbi))
2010 sbi->kbytes_written =
2011 le64_to_cpu(seg_i->journal->info.kbytes_written);
2012
2013 build_gc_manager(sbi);
2014
2015 /* get an inode for node space */
2016 sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
2017 if (IS_ERR(sbi->node_inode)) {
2018 f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
2019 err = PTR_ERR(sbi->node_inode);
2020 goto free_nm;
2021 }
2022
2023 f2fs_join_shrinker(sbi);
2024
2025 /* if there are nt orphan nodes free them */
2026 err = recover_orphan_inodes(sbi);
2027 if (err)
2028 goto free_node_inode;
2029
2030 /* read root inode and dentry */
2031 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
2032 if (IS_ERR(root)) {
2033 f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
2034 err = PTR_ERR(root);
2035 goto free_node_inode;
2036 }
2037 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2038 iput(root);
2039 err = -EINVAL;
2040 goto free_node_inode;
2041 }
2042
2043 sb->s_root = d_make_root(root); /* allocate root dentry */
2044 if (!sb->s_root) {
2045 err = -ENOMEM;
2046 goto free_root_inode;
2047 }
2048
2049 err = f2fs_build_stats(sbi);
2050 if (err)
2051 goto free_root_inode;
2052
2053 if (f2fs_proc_root)
2054 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
2055
2056 if (sbi->s_proc) {
2057 proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
2058 &f2fs_seq_segment_info_fops, sb);
2059 proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
2060 &f2fs_seq_segment_bits_fops, sb);
2061 }
2062
2063 sbi->s_kobj.kset = f2fs_kset;
2064 init_completion(&sbi->s_kobj_unregister);
2065 err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
2066 "%s", sb->s_id);
2067 if (err)
2068 goto free_proc;
2069
2070 /* recover fsynced data */
2071 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
2072 /*
2073 * mount should be failed, when device has readonly mode, and
2074 * previous checkpoint was not done by clean system shutdown.
2075 */
2076 if (bdev_read_only(sb->s_bdev) &&
2077 !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
2078 err = -EROFS;
2079 goto free_kobj;
2080 }
2081
2082 if (need_fsck)
2083 set_sbi_flag(sbi, SBI_NEED_FSCK);
2084
2085 if (!retry)
2086 goto skip_recovery;
2087
2088 err = recover_fsync_data(sbi, false);
2089 if (err < 0) {
2090 need_fsck = true;
2091 f2fs_msg(sb, KERN_ERR,
2092 "Cannot recover all fsync data errno=%d", err);
2093 goto free_kobj;
2094 }
2095 } else {
2096 err = recover_fsync_data(sbi, true);
2097
2098 if (!f2fs_readonly(sb) && err > 0) {
2099 err = -EINVAL;
2100 f2fs_msg(sb, KERN_ERR,
2101 "Need to recover fsync data");
2102 goto free_kobj;
2103 }
2104 }
2105 skip_recovery:
2106 /* recover_fsync_data() cleared this already */
2107 clear_sbi_flag(sbi, SBI_POR_DOING);
2108
2109 /*
2110 * If filesystem is not mounted as read-only then
2111 * do start the gc_thread.
2112 */
2113 if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
2114 /* After POR, we can run background GC thread.*/
2115 err = start_gc_thread(sbi);
2116 if (err)
2117 goto free_kobj;
2118 }
2119 kfree(options);
2120
2121 /* recover broken superblock */
2122 if (recovery) {
2123 err = f2fs_commit_super(sbi, true);
2124 f2fs_msg(sb, KERN_INFO,
2125 "Try to recover %dth superblock, ret: %d",
2126 sbi->valid_super_block ? 1 : 2, err);
2127 }
2128
2129 f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
2130 cur_cp_version(F2FS_CKPT(sbi)));
2131 f2fs_update_time(sbi, CP_TIME);
2132 f2fs_update_time(sbi, REQ_TIME);
2133 return 0;
2134
2135 free_kobj:
2136 f2fs_sync_inode_meta(sbi);
2137 kobject_del(&sbi->s_kobj);
2138 kobject_put(&sbi->s_kobj);
2139 wait_for_completion(&sbi->s_kobj_unregister);
2140 free_proc:
2141 if (sbi->s_proc) {
2142 remove_proc_entry("segment_info", sbi->s_proc);
2143 remove_proc_entry("segment_bits", sbi->s_proc);
2144 remove_proc_entry(sb->s_id, f2fs_proc_root);
2145 }
2146 f2fs_destroy_stats(sbi);
2147 free_root_inode:
2148 dput(sb->s_root);
2149 sb->s_root = NULL;
2150 free_node_inode:
2151 truncate_inode_pages_final(NODE_MAPPING(sbi));
2152 mutex_lock(&sbi->umount_mutex);
2153 release_ino_entry(sbi, true);
2154 f2fs_leave_shrinker(sbi);
2155 /*
2156 * Some dirty meta pages can be produced by recover_orphan_inodes()
2157 * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
2158 * followed by write_checkpoint() through f2fs_write_node_pages(), which
2159 * falls into an infinite loop in sync_meta_pages().
2160 */
2161 truncate_inode_pages_final(META_MAPPING(sbi));
2162 iput(sbi->node_inode);
2163 mutex_unlock(&sbi->umount_mutex);
2164 free_nm:
2165 destroy_node_manager(sbi);
2166 free_sm:
2167 destroy_segment_manager(sbi);
2168 free_devices:
2169 destroy_device_list(sbi);
2170 kfree(sbi->ckpt);
2171 free_meta_inode:
2172 make_bad_inode(sbi->meta_inode);
2173 iput(sbi->meta_inode);
2174 free_io_dummy:
2175 mempool_destroy(sbi->write_io_dummy);
2176 free_options:
2177 destroy_percpu_info(sbi);
2178 kfree(options);
2179 free_sb_buf:
2180 kfree(raw_super);
2181 free_sbi:
2182 if (sbi->s_chksum_driver)
2183 crypto_free_shash(sbi->s_chksum_driver);
2184 kfree(sbi);
2185
2186 /* give only one another chance */
2187 if (retry) {
2188 retry = false;
2189 shrink_dcache_sb(sb);
2190 goto try_onemore;
2191 }
2192 return err;
2193 }
2194
2195 static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
2196 const char *dev_name, void *data)
2197 {
2198 return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
2199 }
2200
2201 static void kill_f2fs_super(struct super_block *sb)
2202 {
2203 if (sb->s_root)
2204 set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
2205 kill_block_super(sb);
2206 }
2207
2208 static struct file_system_type f2fs_fs_type = {
2209 .owner = THIS_MODULE,
2210 .name = "f2fs",
2211 .mount = f2fs_mount,
2212 .kill_sb = kill_f2fs_super,
2213 .fs_flags = FS_REQUIRES_DEV,
2214 };
2215 MODULE_ALIAS_FS("f2fs");
2216
2217 static int __init init_inodecache(void)
2218 {
2219 f2fs_inode_cachep = kmem_cache_create("f2fs_inode_cache",
2220 sizeof(struct f2fs_inode_info), 0,
2221 SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, NULL);
2222 if (!f2fs_inode_cachep)
2223 return -ENOMEM;
2224 return 0;
2225 }
2226
2227 static void destroy_inodecache(void)
2228 {
2229 /*
2230 * Make sure all delayed rcu free inodes are flushed before we
2231 * destroy cache.
2232 */
2233 rcu_barrier();
2234 kmem_cache_destroy(f2fs_inode_cachep);
2235 }
2236
2237 static int __init init_f2fs_fs(void)
2238 {
2239 int err;
2240
2241 f2fs_build_trace_ios();
2242
2243 err = init_inodecache();
2244 if (err)
2245 goto fail;
2246 err = create_node_manager_caches();
2247 if (err)
2248 goto free_inodecache;
2249 err = create_segment_manager_caches();
2250 if (err)
2251 goto free_node_manager_caches;
2252 err = create_checkpoint_caches();
2253 if (err)
2254 goto free_segment_manager_caches;
2255 err = create_extent_cache();
2256 if (err)
2257 goto free_checkpoint_caches;
2258 f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
2259 if (!f2fs_kset) {
2260 err = -ENOMEM;
2261 goto free_extent_cache;
2262 }
2263 err = register_shrinker(&f2fs_shrinker_info);
2264 if (err)
2265 goto free_kset;
2266
2267 err = register_filesystem(&f2fs_fs_type);
2268 if (err)
2269 goto free_shrinker;
2270 err = f2fs_create_root_stats();
2271 if (err)
2272 goto free_filesystem;
2273 f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
2274 return 0;
2275
2276 free_filesystem:
2277 unregister_filesystem(&f2fs_fs_type);
2278 free_shrinker:
2279 unregister_shrinker(&f2fs_shrinker_info);
2280 free_kset:
2281 kset_unregister(f2fs_kset);
2282 free_extent_cache:
2283 destroy_extent_cache();
2284 free_checkpoint_caches:
2285 destroy_checkpoint_caches();
2286 free_segment_manager_caches:
2287 destroy_segment_manager_caches();
2288 free_node_manager_caches:
2289 destroy_node_manager_caches();
2290 free_inodecache:
2291 destroy_inodecache();
2292 fail:
2293 return err;
2294 }
2295
2296 static void __exit exit_f2fs_fs(void)
2297 {
2298 remove_proc_entry("fs/f2fs", NULL);
2299 f2fs_destroy_root_stats();
2300 unregister_filesystem(&f2fs_fs_type);
2301 unregister_shrinker(&f2fs_shrinker_info);
2302 kset_unregister(f2fs_kset);
2303 destroy_extent_cache();
2304 destroy_checkpoint_caches();
2305 destroy_segment_manager_caches();
2306 destroy_node_manager_caches();
2307 destroy_inodecache();
2308 f2fs_destroy_trace_ios();
2309 }
2310
2311 module_init(init_f2fs_fs)
2312 module_exit(exit_f2fs_fs)
2313
2314 MODULE_AUTHOR("Samsung Electronics's Praesto Team");
2315 MODULE_DESCRIPTION("Flash Friendly File System");
2316 MODULE_LICENSE("GPL");
2317