]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - fs/f2fs/segment.c
f2fs: change atomic and volatile write policies
[mirror_ubuntu-jammy-kernel.git] / fs / f2fs / segment.c
1 /*
2 * fs/f2fs/segment.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/prefetch.h>
16 #include <linux/kthread.h>
17 #include <linux/vmalloc.h>
18 #include <linux/swap.h>
19
20 #include "f2fs.h"
21 #include "segment.h"
22 #include "node.h"
23 #include <trace/events/f2fs.h>
24
25 #define __reverse_ffz(x) __reverse_ffs(~(x))
26
27 static struct kmem_cache *discard_entry_slab;
28 static struct kmem_cache *sit_entry_set_slab;
29 static struct kmem_cache *inmem_entry_slab;
30
31 /*
32 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
33 * MSB and LSB are reversed in a byte by f2fs_set_bit.
34 */
35 static inline unsigned long __reverse_ffs(unsigned long word)
36 {
37 int num = 0;
38
39 #if BITS_PER_LONG == 64
40 if ((word & 0xffffffff) == 0) {
41 num += 32;
42 word >>= 32;
43 }
44 #endif
45 if ((word & 0xffff) == 0) {
46 num += 16;
47 word >>= 16;
48 }
49 if ((word & 0xff) == 0) {
50 num += 8;
51 word >>= 8;
52 }
53 if ((word & 0xf0) == 0)
54 num += 4;
55 else
56 word >>= 4;
57 if ((word & 0xc) == 0)
58 num += 2;
59 else
60 word >>= 2;
61 if ((word & 0x2) == 0)
62 num += 1;
63 return num;
64 }
65
66 /*
67 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
68 * f2fs_set_bit makes MSB and LSB reversed in a byte.
69 * Example:
70 * LSB <--> MSB
71 * f2fs_set_bit(0, bitmap) => 0000 0001
72 * f2fs_set_bit(7, bitmap) => 1000 0000
73 */
74 static unsigned long __find_rev_next_bit(const unsigned long *addr,
75 unsigned long size, unsigned long offset)
76 {
77 const unsigned long *p = addr + BIT_WORD(offset);
78 unsigned long result = offset & ~(BITS_PER_LONG - 1);
79 unsigned long tmp;
80 unsigned long mask, submask;
81 unsigned long quot, rest;
82
83 if (offset >= size)
84 return size;
85
86 size -= result;
87 offset %= BITS_PER_LONG;
88 if (!offset)
89 goto aligned;
90
91 tmp = *(p++);
92 quot = (offset >> 3) << 3;
93 rest = offset & 0x7;
94 mask = ~0UL << quot;
95 submask = (unsigned char)(0xff << rest) >> rest;
96 submask <<= quot;
97 mask &= submask;
98 tmp &= mask;
99 if (size < BITS_PER_LONG)
100 goto found_first;
101 if (tmp)
102 goto found_middle;
103
104 size -= BITS_PER_LONG;
105 result += BITS_PER_LONG;
106 aligned:
107 while (size & ~(BITS_PER_LONG-1)) {
108 tmp = *(p++);
109 if (tmp)
110 goto found_middle;
111 result += BITS_PER_LONG;
112 size -= BITS_PER_LONG;
113 }
114 if (!size)
115 return result;
116 tmp = *p;
117 found_first:
118 tmp &= (~0UL >> (BITS_PER_LONG - size));
119 if (tmp == 0UL) /* Are any bits set? */
120 return result + size; /* Nope. */
121 found_middle:
122 return result + __reverse_ffs(tmp);
123 }
124
125 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
126 unsigned long size, unsigned long offset)
127 {
128 const unsigned long *p = addr + BIT_WORD(offset);
129 unsigned long result = offset & ~(BITS_PER_LONG - 1);
130 unsigned long tmp;
131 unsigned long mask, submask;
132 unsigned long quot, rest;
133
134 if (offset >= size)
135 return size;
136
137 size -= result;
138 offset %= BITS_PER_LONG;
139 if (!offset)
140 goto aligned;
141
142 tmp = *(p++);
143 quot = (offset >> 3) << 3;
144 rest = offset & 0x7;
145 mask = ~(~0UL << quot);
146 submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
147 submask <<= quot;
148 mask += submask;
149 tmp |= mask;
150 if (size < BITS_PER_LONG)
151 goto found_first;
152 if (~tmp)
153 goto found_middle;
154
155 size -= BITS_PER_LONG;
156 result += BITS_PER_LONG;
157 aligned:
158 while (size & ~(BITS_PER_LONG - 1)) {
159 tmp = *(p++);
160 if (~tmp)
161 goto found_middle;
162 result += BITS_PER_LONG;
163 size -= BITS_PER_LONG;
164 }
165 if (!size)
166 return result;
167 tmp = *p;
168
169 found_first:
170 tmp |= ~0UL << size;
171 if (tmp == ~0UL) /* Are any bits zero? */
172 return result + size; /* Nope. */
173 found_middle:
174 return result + __reverse_ffz(tmp);
175 }
176
177 void register_inmem_page(struct inode *inode, struct page *page)
178 {
179 struct f2fs_inode_info *fi = F2FS_I(inode);
180 struct inmem_pages *new;
181 int err;
182
183 SetPagePrivate(page);
184
185 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
186
187 /* add atomic page indices to the list */
188 new->page = page;
189 INIT_LIST_HEAD(&new->list);
190 retry:
191 /* increase reference count with clean state */
192 mutex_lock(&fi->inmem_lock);
193 err = radix_tree_insert(&fi->inmem_root, page->index, new);
194 if (err == -EEXIST) {
195 mutex_unlock(&fi->inmem_lock);
196 kmem_cache_free(inmem_entry_slab, new);
197 return;
198 } else if (err) {
199 mutex_unlock(&fi->inmem_lock);
200 goto retry;
201 }
202 get_page(page);
203 list_add_tail(&new->list, &fi->inmem_pages);
204 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
205 mutex_unlock(&fi->inmem_lock);
206 }
207
208 void invalidate_inmem_page(struct inode *inode, struct page *page)
209 {
210 struct f2fs_inode_info *fi = F2FS_I(inode);
211 struct inmem_pages *cur;
212
213 mutex_lock(&fi->inmem_lock);
214 cur = radix_tree_lookup(&fi->inmem_root, page->index);
215 if (cur) {
216 radix_tree_delete(&fi->inmem_root, cur->page->index);
217 f2fs_put_page(cur->page, 0);
218 list_del(&cur->list);
219 kmem_cache_free(inmem_entry_slab, cur);
220 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
221 }
222 mutex_unlock(&fi->inmem_lock);
223 }
224
225 void commit_inmem_pages(struct inode *inode, bool abort)
226 {
227 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
228 struct f2fs_inode_info *fi = F2FS_I(inode);
229 struct inmem_pages *cur, *tmp;
230 bool submit_bio = false;
231 struct f2fs_io_info fio = {
232 .type = DATA,
233 .rw = WRITE_SYNC | REQ_PRIO,
234 };
235
236 /*
237 * The abort is true only when f2fs_evict_inode is called.
238 * Basically, the f2fs_evict_inode doesn't produce any data writes, so
239 * that we don't need to call f2fs_balance_fs.
240 * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
241 * inode becomes free by iget_locked in f2fs_iget.
242 */
243 if (!abort) {
244 f2fs_balance_fs(sbi);
245 f2fs_lock_op(sbi);
246 }
247
248 mutex_lock(&fi->inmem_lock);
249 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
250 if (!abort) {
251 lock_page(cur->page);
252 if (cur->page->mapping == inode->i_mapping) {
253 f2fs_wait_on_page_writeback(cur->page, DATA);
254 if (clear_page_dirty_for_io(cur->page))
255 inode_dec_dirty_pages(inode);
256 do_write_data_page(cur->page, &fio);
257 submit_bio = true;
258 }
259 f2fs_put_page(cur->page, 1);
260 } else {
261 put_page(cur->page);
262 }
263 radix_tree_delete(&fi->inmem_root, cur->page->index);
264 list_del(&cur->list);
265 kmem_cache_free(inmem_entry_slab, cur);
266 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
267 }
268 mutex_unlock(&fi->inmem_lock);
269
270 if (!abort) {
271 f2fs_unlock_op(sbi);
272 if (submit_bio)
273 f2fs_submit_merged_bio(sbi, DATA, WRITE);
274 }
275 }
276
277 /*
278 * This function balances dirty node and dentry pages.
279 * In addition, it controls garbage collection.
280 */
281 void f2fs_balance_fs(struct f2fs_sb_info *sbi)
282 {
283 /*
284 * We should do GC or end up with checkpoint, if there are so many dirty
285 * dir/node pages without enough free segments.
286 */
287 if (has_not_enough_free_secs(sbi, 0)) {
288 mutex_lock(&sbi->gc_mutex);
289 f2fs_gc(sbi);
290 }
291 }
292
293 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
294 {
295 /* check the # of cached NAT entries and prefree segments */
296 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
297 excess_prefree_segs(sbi) ||
298 !available_free_memory(sbi, INO_ENTRIES))
299 f2fs_sync_fs(sbi->sb, true);
300 }
301
302 static int issue_flush_thread(void *data)
303 {
304 struct f2fs_sb_info *sbi = data;
305 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
306 wait_queue_head_t *q = &fcc->flush_wait_queue;
307 repeat:
308 if (kthread_should_stop())
309 return 0;
310
311 if (!llist_empty(&fcc->issue_list)) {
312 struct bio *bio = bio_alloc(GFP_NOIO, 0);
313 struct flush_cmd *cmd, *next;
314 int ret;
315
316 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
317 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
318
319 bio->bi_bdev = sbi->sb->s_bdev;
320 ret = submit_bio_wait(WRITE_FLUSH, bio);
321
322 llist_for_each_entry_safe(cmd, next,
323 fcc->dispatch_list, llnode) {
324 cmd->ret = ret;
325 complete(&cmd->wait);
326 }
327 bio_put(bio);
328 fcc->dispatch_list = NULL;
329 }
330
331 wait_event_interruptible(*q,
332 kthread_should_stop() || !llist_empty(&fcc->issue_list));
333 goto repeat;
334 }
335
336 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
337 {
338 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
339 struct flush_cmd cmd;
340
341 trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
342 test_opt(sbi, FLUSH_MERGE));
343
344 if (test_opt(sbi, NOBARRIER))
345 return 0;
346
347 if (!test_opt(sbi, FLUSH_MERGE))
348 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
349
350 init_completion(&cmd.wait);
351
352 llist_add(&cmd.llnode, &fcc->issue_list);
353
354 if (!fcc->dispatch_list)
355 wake_up(&fcc->flush_wait_queue);
356
357 wait_for_completion(&cmd.wait);
358
359 return cmd.ret;
360 }
361
362 int create_flush_cmd_control(struct f2fs_sb_info *sbi)
363 {
364 dev_t dev = sbi->sb->s_bdev->bd_dev;
365 struct flush_cmd_control *fcc;
366 int err = 0;
367
368 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
369 if (!fcc)
370 return -ENOMEM;
371 init_waitqueue_head(&fcc->flush_wait_queue);
372 init_llist_head(&fcc->issue_list);
373 SM_I(sbi)->cmd_control_info = fcc;
374 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
375 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
376 if (IS_ERR(fcc->f2fs_issue_flush)) {
377 err = PTR_ERR(fcc->f2fs_issue_flush);
378 kfree(fcc);
379 SM_I(sbi)->cmd_control_info = NULL;
380 return err;
381 }
382
383 return err;
384 }
385
386 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
387 {
388 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
389
390 if (fcc && fcc->f2fs_issue_flush)
391 kthread_stop(fcc->f2fs_issue_flush);
392 kfree(fcc);
393 SM_I(sbi)->cmd_control_info = NULL;
394 }
395
396 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
397 enum dirty_type dirty_type)
398 {
399 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
400
401 /* need not be added */
402 if (IS_CURSEG(sbi, segno))
403 return;
404
405 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
406 dirty_i->nr_dirty[dirty_type]++;
407
408 if (dirty_type == DIRTY) {
409 struct seg_entry *sentry = get_seg_entry(sbi, segno);
410 enum dirty_type t = sentry->type;
411
412 if (unlikely(t >= DIRTY)) {
413 f2fs_bug_on(sbi, 1);
414 return;
415 }
416 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
417 dirty_i->nr_dirty[t]++;
418 }
419 }
420
421 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
422 enum dirty_type dirty_type)
423 {
424 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
425
426 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
427 dirty_i->nr_dirty[dirty_type]--;
428
429 if (dirty_type == DIRTY) {
430 struct seg_entry *sentry = get_seg_entry(sbi, segno);
431 enum dirty_type t = sentry->type;
432
433 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
434 dirty_i->nr_dirty[t]--;
435
436 if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
437 clear_bit(GET_SECNO(sbi, segno),
438 dirty_i->victim_secmap);
439 }
440 }
441
442 /*
443 * Should not occur error such as -ENOMEM.
444 * Adding dirty entry into seglist is not critical operation.
445 * If a given segment is one of current working segments, it won't be added.
446 */
447 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
448 {
449 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
450 unsigned short valid_blocks;
451
452 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
453 return;
454
455 mutex_lock(&dirty_i->seglist_lock);
456
457 valid_blocks = get_valid_blocks(sbi, segno, 0);
458
459 if (valid_blocks == 0) {
460 __locate_dirty_segment(sbi, segno, PRE);
461 __remove_dirty_segment(sbi, segno, DIRTY);
462 } else if (valid_blocks < sbi->blocks_per_seg) {
463 __locate_dirty_segment(sbi, segno, DIRTY);
464 } else {
465 /* Recovery routine with SSR needs this */
466 __remove_dirty_segment(sbi, segno, DIRTY);
467 }
468
469 mutex_unlock(&dirty_i->seglist_lock);
470 }
471
472 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
473 block_t blkstart, block_t blklen)
474 {
475 sector_t start = SECTOR_FROM_BLOCK(blkstart);
476 sector_t len = SECTOR_FROM_BLOCK(blklen);
477 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
478 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
479 }
480
481 void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
482 {
483 if (f2fs_issue_discard(sbi, blkaddr, 1)) {
484 struct page *page = grab_meta_page(sbi, blkaddr);
485 /* zero-filled page */
486 set_page_dirty(page);
487 f2fs_put_page(page, 1);
488 }
489 }
490
491 static void __add_discard_entry(struct f2fs_sb_info *sbi,
492 struct cp_control *cpc, unsigned int start, unsigned int end)
493 {
494 struct list_head *head = &SM_I(sbi)->discard_list;
495 struct discard_entry *new, *last;
496
497 if (!list_empty(head)) {
498 last = list_last_entry(head, struct discard_entry, list);
499 if (START_BLOCK(sbi, cpc->trim_start) + start ==
500 last->blkaddr + last->len) {
501 last->len += end - start;
502 goto done;
503 }
504 }
505
506 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
507 INIT_LIST_HEAD(&new->list);
508 new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
509 new->len = end - start;
510 list_add_tail(&new->list, head);
511 done:
512 SM_I(sbi)->nr_discards += end - start;
513 cpc->trimmed += end - start;
514 }
515
516 static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
517 {
518 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
519 int max_blocks = sbi->blocks_per_seg;
520 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
521 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
522 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
523 unsigned long dmap[entries];
524 unsigned int start = 0, end = -1;
525 bool force = (cpc->reason == CP_DISCARD);
526 int i;
527
528 if (!force && !test_opt(sbi, DISCARD))
529 return;
530
531 if (force && !se->valid_blocks) {
532 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
533 /*
534 * if this segment is registered in the prefree list, then
535 * we should skip adding a discard candidate, and let the
536 * checkpoint do that later.
537 */
538 mutex_lock(&dirty_i->seglist_lock);
539 if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) {
540 mutex_unlock(&dirty_i->seglist_lock);
541 cpc->trimmed += sbi->blocks_per_seg;
542 return;
543 }
544 mutex_unlock(&dirty_i->seglist_lock);
545
546 __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg);
547 return;
548 }
549
550 /* zero block will be discarded through the prefree list */
551 if (!se->valid_blocks || se->valid_blocks == max_blocks)
552 return;
553
554 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
555 for (i = 0; i < entries; i++)
556 dmap[i] = ~(cur_map[i] | ckpt_map[i]);
557
558 while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
559 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
560 if (start >= max_blocks)
561 break;
562
563 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
564
565 if (end - start < cpc->trim_minlen)
566 continue;
567
568 __add_discard_entry(sbi, cpc, start, end);
569 }
570 }
571
572 void release_discard_addrs(struct f2fs_sb_info *sbi)
573 {
574 struct list_head *head = &(SM_I(sbi)->discard_list);
575 struct discard_entry *entry, *this;
576
577 /* drop caches */
578 list_for_each_entry_safe(entry, this, head, list) {
579 list_del(&entry->list);
580 kmem_cache_free(discard_entry_slab, entry);
581 }
582 }
583
584 /*
585 * Should call clear_prefree_segments after checkpoint is done.
586 */
587 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
588 {
589 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
590 unsigned int segno;
591
592 mutex_lock(&dirty_i->seglist_lock);
593 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
594 __set_test_and_free(sbi, segno);
595 mutex_unlock(&dirty_i->seglist_lock);
596 }
597
598 void clear_prefree_segments(struct f2fs_sb_info *sbi)
599 {
600 struct list_head *head = &(SM_I(sbi)->discard_list);
601 struct discard_entry *entry, *this;
602 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
603 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
604 unsigned int start = 0, end = -1;
605
606 mutex_lock(&dirty_i->seglist_lock);
607
608 while (1) {
609 int i;
610 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
611 if (start >= MAIN_SEGS(sbi))
612 break;
613 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
614 start + 1);
615
616 for (i = start; i < end; i++)
617 clear_bit(i, prefree_map);
618
619 dirty_i->nr_dirty[PRE] -= end - start;
620
621 if (!test_opt(sbi, DISCARD))
622 continue;
623
624 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
625 (end - start) << sbi->log_blocks_per_seg);
626 }
627 mutex_unlock(&dirty_i->seglist_lock);
628
629 /* send small discards */
630 list_for_each_entry_safe(entry, this, head, list) {
631 f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
632 list_del(&entry->list);
633 SM_I(sbi)->nr_discards -= entry->len;
634 kmem_cache_free(discard_entry_slab, entry);
635 }
636 }
637
638 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
639 {
640 struct sit_info *sit_i = SIT_I(sbi);
641
642 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
643 sit_i->dirty_sentries++;
644 return false;
645 }
646
647 return true;
648 }
649
650 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
651 unsigned int segno, int modified)
652 {
653 struct seg_entry *se = get_seg_entry(sbi, segno);
654 se->type = type;
655 if (modified)
656 __mark_sit_entry_dirty(sbi, segno);
657 }
658
659 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
660 {
661 struct seg_entry *se;
662 unsigned int segno, offset;
663 long int new_vblocks;
664
665 segno = GET_SEGNO(sbi, blkaddr);
666
667 se = get_seg_entry(sbi, segno);
668 new_vblocks = se->valid_blocks + del;
669 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
670
671 f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
672 (new_vblocks > sbi->blocks_per_seg)));
673
674 se->valid_blocks = new_vblocks;
675 se->mtime = get_mtime(sbi);
676 SIT_I(sbi)->max_mtime = se->mtime;
677
678 /* Update valid block bitmap */
679 if (del > 0) {
680 if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
681 f2fs_bug_on(sbi, 1);
682 } else {
683 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
684 f2fs_bug_on(sbi, 1);
685 }
686 if (!f2fs_test_bit(offset, se->ckpt_valid_map))
687 se->ckpt_valid_blocks += del;
688
689 __mark_sit_entry_dirty(sbi, segno);
690
691 /* update total number of valid blocks to be written in ckpt area */
692 SIT_I(sbi)->written_valid_blocks += del;
693
694 if (sbi->segs_per_sec > 1)
695 get_sec_entry(sbi, segno)->valid_blocks += del;
696 }
697
698 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
699 {
700 update_sit_entry(sbi, new, 1);
701 if (GET_SEGNO(sbi, old) != NULL_SEGNO)
702 update_sit_entry(sbi, old, -1);
703
704 locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
705 locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
706 }
707
708 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
709 {
710 unsigned int segno = GET_SEGNO(sbi, addr);
711 struct sit_info *sit_i = SIT_I(sbi);
712
713 f2fs_bug_on(sbi, addr == NULL_ADDR);
714 if (addr == NEW_ADDR)
715 return;
716
717 /* add it into sit main buffer */
718 mutex_lock(&sit_i->sentry_lock);
719
720 update_sit_entry(sbi, addr, -1);
721
722 /* add it into dirty seglist */
723 locate_dirty_segment(sbi, segno);
724
725 mutex_unlock(&sit_i->sentry_lock);
726 }
727
728 /*
729 * This function should be resided under the curseg_mutex lock
730 */
731 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
732 struct f2fs_summary *sum)
733 {
734 struct curseg_info *curseg = CURSEG_I(sbi, type);
735 void *addr = curseg->sum_blk;
736 addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
737 memcpy(addr, sum, sizeof(struct f2fs_summary));
738 }
739
740 /*
741 * Calculate the number of current summary pages for writing
742 */
743 int npages_for_summary_flush(struct f2fs_sb_info *sbi)
744 {
745 int valid_sum_count = 0;
746 int i, sum_in_page;
747
748 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
749 if (sbi->ckpt->alloc_type[i] == SSR)
750 valid_sum_count += sbi->blocks_per_seg;
751 else
752 valid_sum_count += curseg_blkoff(sbi, i);
753 }
754
755 sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
756 SUM_FOOTER_SIZE) / SUMMARY_SIZE;
757 if (valid_sum_count <= sum_in_page)
758 return 1;
759 else if ((valid_sum_count - sum_in_page) <=
760 (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
761 return 2;
762 return 3;
763 }
764
765 /*
766 * Caller should put this summary page
767 */
768 struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
769 {
770 return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
771 }
772
773 static void write_sum_page(struct f2fs_sb_info *sbi,
774 struct f2fs_summary_block *sum_blk, block_t blk_addr)
775 {
776 struct page *page = grab_meta_page(sbi, blk_addr);
777 void *kaddr = page_address(page);
778 memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
779 set_page_dirty(page);
780 f2fs_put_page(page, 1);
781 }
782
783 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
784 {
785 struct curseg_info *curseg = CURSEG_I(sbi, type);
786 unsigned int segno = curseg->segno + 1;
787 struct free_segmap_info *free_i = FREE_I(sbi);
788
789 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
790 return !test_bit(segno, free_i->free_segmap);
791 return 0;
792 }
793
794 /*
795 * Find a new segment from the free segments bitmap to right order
796 * This function should be returned with success, otherwise BUG
797 */
798 static void get_new_segment(struct f2fs_sb_info *sbi,
799 unsigned int *newseg, bool new_sec, int dir)
800 {
801 struct free_segmap_info *free_i = FREE_I(sbi);
802 unsigned int segno, secno, zoneno;
803 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
804 unsigned int hint = *newseg / sbi->segs_per_sec;
805 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
806 unsigned int left_start = hint;
807 bool init = true;
808 int go_left = 0;
809 int i;
810
811 write_lock(&free_i->segmap_lock);
812
813 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
814 segno = find_next_zero_bit(free_i->free_segmap,
815 MAIN_SEGS(sbi), *newseg + 1);
816 if (segno - *newseg < sbi->segs_per_sec -
817 (*newseg % sbi->segs_per_sec))
818 goto got_it;
819 }
820 find_other_zone:
821 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
822 if (secno >= MAIN_SECS(sbi)) {
823 if (dir == ALLOC_RIGHT) {
824 secno = find_next_zero_bit(free_i->free_secmap,
825 MAIN_SECS(sbi), 0);
826 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
827 } else {
828 go_left = 1;
829 left_start = hint - 1;
830 }
831 }
832 if (go_left == 0)
833 goto skip_left;
834
835 while (test_bit(left_start, free_i->free_secmap)) {
836 if (left_start > 0) {
837 left_start--;
838 continue;
839 }
840 left_start = find_next_zero_bit(free_i->free_secmap,
841 MAIN_SECS(sbi), 0);
842 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
843 break;
844 }
845 secno = left_start;
846 skip_left:
847 hint = secno;
848 segno = secno * sbi->segs_per_sec;
849 zoneno = secno / sbi->secs_per_zone;
850
851 /* give up on finding another zone */
852 if (!init)
853 goto got_it;
854 if (sbi->secs_per_zone == 1)
855 goto got_it;
856 if (zoneno == old_zoneno)
857 goto got_it;
858 if (dir == ALLOC_LEFT) {
859 if (!go_left && zoneno + 1 >= total_zones)
860 goto got_it;
861 if (go_left && zoneno == 0)
862 goto got_it;
863 }
864 for (i = 0; i < NR_CURSEG_TYPE; i++)
865 if (CURSEG_I(sbi, i)->zone == zoneno)
866 break;
867
868 if (i < NR_CURSEG_TYPE) {
869 /* zone is in user, try another */
870 if (go_left)
871 hint = zoneno * sbi->secs_per_zone - 1;
872 else if (zoneno + 1 >= total_zones)
873 hint = 0;
874 else
875 hint = (zoneno + 1) * sbi->secs_per_zone;
876 init = false;
877 goto find_other_zone;
878 }
879 got_it:
880 /* set it as dirty segment in free segmap */
881 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
882 __set_inuse(sbi, segno);
883 *newseg = segno;
884 write_unlock(&free_i->segmap_lock);
885 }
886
887 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
888 {
889 struct curseg_info *curseg = CURSEG_I(sbi, type);
890 struct summary_footer *sum_footer;
891
892 curseg->segno = curseg->next_segno;
893 curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
894 curseg->next_blkoff = 0;
895 curseg->next_segno = NULL_SEGNO;
896
897 sum_footer = &(curseg->sum_blk->footer);
898 memset(sum_footer, 0, sizeof(struct summary_footer));
899 if (IS_DATASEG(type))
900 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
901 if (IS_NODESEG(type))
902 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
903 __set_sit_entry_type(sbi, type, curseg->segno, modified);
904 }
905
906 /*
907 * Allocate a current working segment.
908 * This function always allocates a free segment in LFS manner.
909 */
910 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
911 {
912 struct curseg_info *curseg = CURSEG_I(sbi, type);
913 unsigned int segno = curseg->segno;
914 int dir = ALLOC_LEFT;
915
916 write_sum_page(sbi, curseg->sum_blk,
917 GET_SUM_BLOCK(sbi, segno));
918 if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
919 dir = ALLOC_RIGHT;
920
921 if (test_opt(sbi, NOHEAP))
922 dir = ALLOC_RIGHT;
923
924 get_new_segment(sbi, &segno, new_sec, dir);
925 curseg->next_segno = segno;
926 reset_curseg(sbi, type, 1);
927 curseg->alloc_type = LFS;
928 }
929
930 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
931 struct curseg_info *seg, block_t start)
932 {
933 struct seg_entry *se = get_seg_entry(sbi, seg->segno);
934 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
935 unsigned long target_map[entries];
936 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
937 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
938 int i, pos;
939
940 for (i = 0; i < entries; i++)
941 target_map[i] = ckpt_map[i] | cur_map[i];
942
943 pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
944
945 seg->next_blkoff = pos;
946 }
947
948 /*
949 * If a segment is written by LFS manner, next block offset is just obtained
950 * by increasing the current block offset. However, if a segment is written by
951 * SSR manner, next block offset obtained by calling __next_free_blkoff
952 */
953 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
954 struct curseg_info *seg)
955 {
956 if (seg->alloc_type == SSR)
957 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
958 else
959 seg->next_blkoff++;
960 }
961
962 /*
963 * This function always allocates a used segment(from dirty seglist) by SSR
964 * manner, so it should recover the existing segment information of valid blocks
965 */
966 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
967 {
968 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
969 struct curseg_info *curseg = CURSEG_I(sbi, type);
970 unsigned int new_segno = curseg->next_segno;
971 struct f2fs_summary_block *sum_node;
972 struct page *sum_page;
973
974 write_sum_page(sbi, curseg->sum_blk,
975 GET_SUM_BLOCK(sbi, curseg->segno));
976 __set_test_and_inuse(sbi, new_segno);
977
978 mutex_lock(&dirty_i->seglist_lock);
979 __remove_dirty_segment(sbi, new_segno, PRE);
980 __remove_dirty_segment(sbi, new_segno, DIRTY);
981 mutex_unlock(&dirty_i->seglist_lock);
982
983 reset_curseg(sbi, type, 1);
984 curseg->alloc_type = SSR;
985 __next_free_blkoff(sbi, curseg, 0);
986
987 if (reuse) {
988 sum_page = get_sum_page(sbi, new_segno);
989 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
990 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
991 f2fs_put_page(sum_page, 1);
992 }
993 }
994
995 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
996 {
997 struct curseg_info *curseg = CURSEG_I(sbi, type);
998 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
999
1000 if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
1001 return v_ops->get_victim(sbi,
1002 &(curseg)->next_segno, BG_GC, type, SSR);
1003
1004 /* For data segments, let's do SSR more intensively */
1005 for (; type >= CURSEG_HOT_DATA; type--)
1006 if (v_ops->get_victim(sbi, &(curseg)->next_segno,
1007 BG_GC, type, SSR))
1008 return 1;
1009 return 0;
1010 }
1011
1012 /*
1013 * flush out current segment and replace it with new segment
1014 * This function should be returned with success, otherwise BUG
1015 */
1016 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1017 int type, bool force)
1018 {
1019 struct curseg_info *curseg = CURSEG_I(sbi, type);
1020
1021 if (force)
1022 new_curseg(sbi, type, true);
1023 else if (type == CURSEG_WARM_NODE)
1024 new_curseg(sbi, type, false);
1025 else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
1026 new_curseg(sbi, type, false);
1027 else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1028 change_curseg(sbi, type, true);
1029 else
1030 new_curseg(sbi, type, false);
1031
1032 stat_inc_seg_type(sbi, curseg);
1033 }
1034
1035 void allocate_new_segments(struct f2fs_sb_info *sbi)
1036 {
1037 struct curseg_info *curseg;
1038 unsigned int old_curseg;
1039 int i;
1040
1041 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1042 curseg = CURSEG_I(sbi, i);
1043 old_curseg = curseg->segno;
1044 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
1045 locate_dirty_segment(sbi, old_curseg);
1046 }
1047 }
1048
1049 static const struct segment_allocation default_salloc_ops = {
1050 .allocate_segment = allocate_segment_by_default,
1051 };
1052
1053 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1054 {
1055 __u64 start = range->start >> sbi->log_blocksize;
1056 __u64 end = start + (range->len >> sbi->log_blocksize) - 1;
1057 unsigned int start_segno, end_segno;
1058 struct cp_control cpc;
1059
1060 if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) ||
1061 range->len < sbi->blocksize)
1062 return -EINVAL;
1063
1064 cpc.trimmed = 0;
1065 if (end <= MAIN_BLKADDR(sbi))
1066 goto out;
1067
1068 /* start/end segment number in main_area */
1069 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1070 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1071 GET_SEGNO(sbi, end);
1072 cpc.reason = CP_DISCARD;
1073 cpc.trim_start = start_segno;
1074 cpc.trim_end = end_segno;
1075 cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
1076
1077 /* do checkpoint to issue discard commands safely */
1078 mutex_lock(&sbi->gc_mutex);
1079 write_checkpoint(sbi, &cpc);
1080 mutex_unlock(&sbi->gc_mutex);
1081 out:
1082 range->len = cpc.trimmed << sbi->log_blocksize;
1083 return 0;
1084 }
1085
1086 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
1087 {
1088 struct curseg_info *curseg = CURSEG_I(sbi, type);
1089 if (curseg->next_blkoff < sbi->blocks_per_seg)
1090 return true;
1091 return false;
1092 }
1093
1094 static int __get_segment_type_2(struct page *page, enum page_type p_type)
1095 {
1096 if (p_type == DATA)
1097 return CURSEG_HOT_DATA;
1098 else
1099 return CURSEG_HOT_NODE;
1100 }
1101
1102 static int __get_segment_type_4(struct page *page, enum page_type p_type)
1103 {
1104 if (p_type == DATA) {
1105 struct inode *inode = page->mapping->host;
1106
1107 if (S_ISDIR(inode->i_mode))
1108 return CURSEG_HOT_DATA;
1109 else
1110 return CURSEG_COLD_DATA;
1111 } else {
1112 if (IS_DNODE(page) && is_cold_node(page))
1113 return CURSEG_WARM_NODE;
1114 else
1115 return CURSEG_COLD_NODE;
1116 }
1117 }
1118
1119 static int __get_segment_type_6(struct page *page, enum page_type p_type)
1120 {
1121 if (p_type == DATA) {
1122 struct inode *inode = page->mapping->host;
1123
1124 if (S_ISDIR(inode->i_mode))
1125 return CURSEG_HOT_DATA;
1126 else if (is_cold_data(page) || file_is_cold(inode))
1127 return CURSEG_COLD_DATA;
1128 else
1129 return CURSEG_WARM_DATA;
1130 } else {
1131 if (IS_DNODE(page))
1132 return is_cold_node(page) ? CURSEG_WARM_NODE :
1133 CURSEG_HOT_NODE;
1134 else
1135 return CURSEG_COLD_NODE;
1136 }
1137 }
1138
1139 static int __get_segment_type(struct page *page, enum page_type p_type)
1140 {
1141 switch (F2FS_P_SB(page)->active_logs) {
1142 case 2:
1143 return __get_segment_type_2(page, p_type);
1144 case 4:
1145 return __get_segment_type_4(page, p_type);
1146 }
1147 /* NR_CURSEG_TYPE(6) logs by default */
1148 f2fs_bug_on(F2FS_P_SB(page),
1149 F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
1150 return __get_segment_type_6(page, p_type);
1151 }
1152
1153 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1154 block_t old_blkaddr, block_t *new_blkaddr,
1155 struct f2fs_summary *sum, int type)
1156 {
1157 struct sit_info *sit_i = SIT_I(sbi);
1158 struct curseg_info *curseg;
1159
1160 curseg = CURSEG_I(sbi, type);
1161
1162 mutex_lock(&curseg->curseg_mutex);
1163
1164 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1165
1166 /*
1167 * __add_sum_entry should be resided under the curseg_mutex
1168 * because, this function updates a summary entry in the
1169 * current summary block.
1170 */
1171 __add_sum_entry(sbi, type, sum);
1172
1173 mutex_lock(&sit_i->sentry_lock);
1174 __refresh_next_blkoff(sbi, curseg);
1175
1176 stat_inc_block_count(sbi, curseg);
1177
1178 if (!__has_curseg_space(sbi, type))
1179 sit_i->s_ops->allocate_segment(sbi, type, false);
1180 /*
1181 * SIT information should be updated before segment allocation,
1182 * since SSR needs latest valid block information.
1183 */
1184 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1185
1186 mutex_unlock(&sit_i->sentry_lock);
1187
1188 if (page && IS_NODESEG(type))
1189 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
1190
1191 mutex_unlock(&curseg->curseg_mutex);
1192 }
1193
1194 static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
1195 block_t old_blkaddr, block_t *new_blkaddr,
1196 struct f2fs_summary *sum, struct f2fs_io_info *fio)
1197 {
1198 int type = __get_segment_type(page, fio->type);
1199
1200 allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
1201
1202 /* writeout dirty page into bdev */
1203 f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
1204 }
1205
1206 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1207 {
1208 struct f2fs_io_info fio = {
1209 .type = META,
1210 .rw = WRITE_SYNC | REQ_META | REQ_PRIO
1211 };
1212
1213 set_page_writeback(page);
1214 f2fs_submit_page_mbio(sbi, page, page->index, &fio);
1215 }
1216
1217 void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
1218 struct f2fs_io_info *fio,
1219 unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
1220 {
1221 struct f2fs_summary sum;
1222 set_summary(&sum, nid, 0, 0);
1223 do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);
1224 }
1225
1226 void write_data_page(struct page *page, struct dnode_of_data *dn,
1227 block_t *new_blkaddr, struct f2fs_io_info *fio)
1228 {
1229 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1230 struct f2fs_summary sum;
1231 struct node_info ni;
1232
1233 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1234 get_node_info(sbi, dn->nid, &ni);
1235 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1236
1237 do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);
1238 }
1239
1240 void rewrite_data_page(struct page *page, block_t old_blkaddr,
1241 struct f2fs_io_info *fio)
1242 {
1243 f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio);
1244 }
1245
1246 void recover_data_page(struct f2fs_sb_info *sbi,
1247 struct page *page, struct f2fs_summary *sum,
1248 block_t old_blkaddr, block_t new_blkaddr)
1249 {
1250 struct sit_info *sit_i = SIT_I(sbi);
1251 struct curseg_info *curseg;
1252 unsigned int segno, old_cursegno;
1253 struct seg_entry *se;
1254 int type;
1255
1256 segno = GET_SEGNO(sbi, new_blkaddr);
1257 se = get_seg_entry(sbi, segno);
1258 type = se->type;
1259
1260 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
1261 if (old_blkaddr == NULL_ADDR)
1262 type = CURSEG_COLD_DATA;
1263 else
1264 type = CURSEG_WARM_DATA;
1265 }
1266 curseg = CURSEG_I(sbi, type);
1267
1268 mutex_lock(&curseg->curseg_mutex);
1269 mutex_lock(&sit_i->sentry_lock);
1270
1271 old_cursegno = curseg->segno;
1272
1273 /* change the current segment */
1274 if (segno != curseg->segno) {
1275 curseg->next_segno = segno;
1276 change_curseg(sbi, type, true);
1277 }
1278
1279 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1280 __add_sum_entry(sbi, type, sum);
1281
1282 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1283 locate_dirty_segment(sbi, old_cursegno);
1284
1285 mutex_unlock(&sit_i->sentry_lock);
1286 mutex_unlock(&curseg->curseg_mutex);
1287 }
1288
1289 static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1290 struct page *page, enum page_type type)
1291 {
1292 enum page_type btype = PAGE_TYPE_OF_BIO(type);
1293 struct f2fs_bio_info *io = &sbi->write_io[btype];
1294 struct bio_vec *bvec;
1295 int i;
1296
1297 down_read(&io->io_rwsem);
1298 if (!io->bio)
1299 goto out;
1300
1301 bio_for_each_segment_all(bvec, io->bio, i) {
1302 if (page == bvec->bv_page) {
1303 up_read(&io->io_rwsem);
1304 return true;
1305 }
1306 }
1307
1308 out:
1309 up_read(&io->io_rwsem);
1310 return false;
1311 }
1312
1313 void f2fs_wait_on_page_writeback(struct page *page,
1314 enum page_type type)
1315 {
1316 if (PageWriteback(page)) {
1317 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1318
1319 if (is_merged_page(sbi, page, type))
1320 f2fs_submit_merged_bio(sbi, type, WRITE);
1321 wait_on_page_writeback(page);
1322 }
1323 }
1324
1325 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
1326 {
1327 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1328 struct curseg_info *seg_i;
1329 unsigned char *kaddr;
1330 struct page *page;
1331 block_t start;
1332 int i, j, offset;
1333
1334 start = start_sum_block(sbi);
1335
1336 page = get_meta_page(sbi, start++);
1337 kaddr = (unsigned char *)page_address(page);
1338
1339 /* Step 1: restore nat cache */
1340 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1341 memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1342
1343 /* Step 2: restore sit cache */
1344 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1345 memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1346 SUM_JOURNAL_SIZE);
1347 offset = 2 * SUM_JOURNAL_SIZE;
1348
1349 /* Step 3: restore summary entries */
1350 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1351 unsigned short blk_off;
1352 unsigned int segno;
1353
1354 seg_i = CURSEG_I(sbi, i);
1355 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1356 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1357 seg_i->next_segno = segno;
1358 reset_curseg(sbi, i, 0);
1359 seg_i->alloc_type = ckpt->alloc_type[i];
1360 seg_i->next_blkoff = blk_off;
1361
1362 if (seg_i->alloc_type == SSR)
1363 blk_off = sbi->blocks_per_seg;
1364
1365 for (j = 0; j < blk_off; j++) {
1366 struct f2fs_summary *s;
1367 s = (struct f2fs_summary *)(kaddr + offset);
1368 seg_i->sum_blk->entries[j] = *s;
1369 offset += SUMMARY_SIZE;
1370 if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1371 SUM_FOOTER_SIZE)
1372 continue;
1373
1374 f2fs_put_page(page, 1);
1375 page = NULL;
1376
1377 page = get_meta_page(sbi, start++);
1378 kaddr = (unsigned char *)page_address(page);
1379 offset = 0;
1380 }
1381 }
1382 f2fs_put_page(page, 1);
1383 return 0;
1384 }
1385
1386 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1387 {
1388 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1389 struct f2fs_summary_block *sum;
1390 struct curseg_info *curseg;
1391 struct page *new;
1392 unsigned short blk_off;
1393 unsigned int segno = 0;
1394 block_t blk_addr = 0;
1395
1396 /* get segment number and block addr */
1397 if (IS_DATASEG(type)) {
1398 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1399 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1400 CURSEG_HOT_DATA]);
1401 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1402 blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1403 else
1404 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1405 } else {
1406 segno = le32_to_cpu(ckpt->cur_node_segno[type -
1407 CURSEG_HOT_NODE]);
1408 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1409 CURSEG_HOT_NODE]);
1410 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1411 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1412 type - CURSEG_HOT_NODE);
1413 else
1414 blk_addr = GET_SUM_BLOCK(sbi, segno);
1415 }
1416
1417 new = get_meta_page(sbi, blk_addr);
1418 sum = (struct f2fs_summary_block *)page_address(new);
1419
1420 if (IS_NODESEG(type)) {
1421 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
1422 struct f2fs_summary *ns = &sum->entries[0];
1423 int i;
1424 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1425 ns->version = 0;
1426 ns->ofs_in_node = 0;
1427 }
1428 } else {
1429 int err;
1430
1431 err = restore_node_summary(sbi, segno, sum);
1432 if (err) {
1433 f2fs_put_page(new, 1);
1434 return err;
1435 }
1436 }
1437 }
1438
1439 /* set uncompleted segment to curseg */
1440 curseg = CURSEG_I(sbi, type);
1441 mutex_lock(&curseg->curseg_mutex);
1442 memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1443 curseg->next_segno = segno;
1444 reset_curseg(sbi, type, 0);
1445 curseg->alloc_type = ckpt->alloc_type[type];
1446 curseg->next_blkoff = blk_off;
1447 mutex_unlock(&curseg->curseg_mutex);
1448 f2fs_put_page(new, 1);
1449 return 0;
1450 }
1451
1452 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1453 {
1454 int type = CURSEG_HOT_DATA;
1455 int err;
1456
1457 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1458 /* restore for compacted data summary */
1459 if (read_compacted_summaries(sbi))
1460 return -EINVAL;
1461 type = CURSEG_HOT_NODE;
1462 }
1463
1464 for (; type <= CURSEG_COLD_NODE; type++) {
1465 err = read_normal_summaries(sbi, type);
1466 if (err)
1467 return err;
1468 }
1469
1470 return 0;
1471 }
1472
1473 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1474 {
1475 struct page *page;
1476 unsigned char *kaddr;
1477 struct f2fs_summary *summary;
1478 struct curseg_info *seg_i;
1479 int written_size = 0;
1480 int i, j;
1481
1482 page = grab_meta_page(sbi, blkaddr++);
1483 kaddr = (unsigned char *)page_address(page);
1484
1485 /* Step 1: write nat cache */
1486 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1487 memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1488 written_size += SUM_JOURNAL_SIZE;
1489
1490 /* Step 2: write sit cache */
1491 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1492 memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1493 SUM_JOURNAL_SIZE);
1494 written_size += SUM_JOURNAL_SIZE;
1495
1496 /* Step 3: write summary entries */
1497 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1498 unsigned short blkoff;
1499 seg_i = CURSEG_I(sbi, i);
1500 if (sbi->ckpt->alloc_type[i] == SSR)
1501 blkoff = sbi->blocks_per_seg;
1502 else
1503 blkoff = curseg_blkoff(sbi, i);
1504
1505 for (j = 0; j < blkoff; j++) {
1506 if (!page) {
1507 page = grab_meta_page(sbi, blkaddr++);
1508 kaddr = (unsigned char *)page_address(page);
1509 written_size = 0;
1510 }
1511 summary = (struct f2fs_summary *)(kaddr + written_size);
1512 *summary = seg_i->sum_blk->entries[j];
1513 written_size += SUMMARY_SIZE;
1514
1515 if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1516 SUM_FOOTER_SIZE)
1517 continue;
1518
1519 set_page_dirty(page);
1520 f2fs_put_page(page, 1);
1521 page = NULL;
1522 }
1523 }
1524 if (page) {
1525 set_page_dirty(page);
1526 f2fs_put_page(page, 1);
1527 }
1528 }
1529
1530 static void write_normal_summaries(struct f2fs_sb_info *sbi,
1531 block_t blkaddr, int type)
1532 {
1533 int i, end;
1534 if (IS_DATASEG(type))
1535 end = type + NR_CURSEG_DATA_TYPE;
1536 else
1537 end = type + NR_CURSEG_NODE_TYPE;
1538
1539 for (i = type; i < end; i++) {
1540 struct curseg_info *sum = CURSEG_I(sbi, i);
1541 mutex_lock(&sum->curseg_mutex);
1542 write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1543 mutex_unlock(&sum->curseg_mutex);
1544 }
1545 }
1546
1547 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1548 {
1549 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1550 write_compacted_summaries(sbi, start_blk);
1551 else
1552 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1553 }
1554
1555 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1556 {
1557 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
1558 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1559 }
1560
1561 int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1562 unsigned int val, int alloc)
1563 {
1564 int i;
1565
1566 if (type == NAT_JOURNAL) {
1567 for (i = 0; i < nats_in_cursum(sum); i++) {
1568 if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1569 return i;
1570 }
1571 if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1572 return update_nats_in_cursum(sum, 1);
1573 } else if (type == SIT_JOURNAL) {
1574 for (i = 0; i < sits_in_cursum(sum); i++)
1575 if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1576 return i;
1577 if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1578 return update_sits_in_cursum(sum, 1);
1579 }
1580 return -1;
1581 }
1582
1583 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1584 unsigned int segno)
1585 {
1586 return get_meta_page(sbi, current_sit_addr(sbi, segno));
1587 }
1588
1589 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1590 unsigned int start)
1591 {
1592 struct sit_info *sit_i = SIT_I(sbi);
1593 struct page *src_page, *dst_page;
1594 pgoff_t src_off, dst_off;
1595 void *src_addr, *dst_addr;
1596
1597 src_off = current_sit_addr(sbi, start);
1598 dst_off = next_sit_addr(sbi, src_off);
1599
1600 /* get current sit block page without lock */
1601 src_page = get_meta_page(sbi, src_off);
1602 dst_page = grab_meta_page(sbi, dst_off);
1603 f2fs_bug_on(sbi, PageDirty(src_page));
1604
1605 src_addr = page_address(src_page);
1606 dst_addr = page_address(dst_page);
1607 memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1608
1609 set_page_dirty(dst_page);
1610 f2fs_put_page(src_page, 1);
1611
1612 set_to_next_sit(sit_i, start);
1613
1614 return dst_page;
1615 }
1616
1617 static struct sit_entry_set *grab_sit_entry_set(void)
1618 {
1619 struct sit_entry_set *ses =
1620 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC);
1621
1622 ses->entry_cnt = 0;
1623 INIT_LIST_HEAD(&ses->set_list);
1624 return ses;
1625 }
1626
1627 static void release_sit_entry_set(struct sit_entry_set *ses)
1628 {
1629 list_del(&ses->set_list);
1630 kmem_cache_free(sit_entry_set_slab, ses);
1631 }
1632
1633 static void adjust_sit_entry_set(struct sit_entry_set *ses,
1634 struct list_head *head)
1635 {
1636 struct sit_entry_set *next = ses;
1637
1638 if (list_is_last(&ses->set_list, head))
1639 return;
1640
1641 list_for_each_entry_continue(next, head, set_list)
1642 if (ses->entry_cnt <= next->entry_cnt)
1643 break;
1644
1645 list_move_tail(&ses->set_list, &next->set_list);
1646 }
1647
1648 static void add_sit_entry(unsigned int segno, struct list_head *head)
1649 {
1650 struct sit_entry_set *ses;
1651 unsigned int start_segno = START_SEGNO(segno);
1652
1653 list_for_each_entry(ses, head, set_list) {
1654 if (ses->start_segno == start_segno) {
1655 ses->entry_cnt++;
1656 adjust_sit_entry_set(ses, head);
1657 return;
1658 }
1659 }
1660
1661 ses = grab_sit_entry_set();
1662
1663 ses->start_segno = start_segno;
1664 ses->entry_cnt++;
1665 list_add(&ses->set_list, head);
1666 }
1667
1668 static void add_sits_in_set(struct f2fs_sb_info *sbi)
1669 {
1670 struct f2fs_sm_info *sm_info = SM_I(sbi);
1671 struct list_head *set_list = &sm_info->sit_entry_set;
1672 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
1673 unsigned int segno;
1674
1675 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
1676 add_sit_entry(segno, set_list);
1677 }
1678
1679 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
1680 {
1681 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1682 struct f2fs_summary_block *sum = curseg->sum_blk;
1683 int i;
1684
1685 for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1686 unsigned int segno;
1687 bool dirtied;
1688
1689 segno = le32_to_cpu(segno_in_journal(sum, i));
1690 dirtied = __mark_sit_entry_dirty(sbi, segno);
1691
1692 if (!dirtied)
1693 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
1694 }
1695 update_sits_in_cursum(sum, -sits_in_cursum(sum));
1696 }
1697
1698 /*
1699 * CP calls this function, which flushes SIT entries including sit_journal,
1700 * and moves prefree segs to free segs.
1701 */
1702 void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1703 {
1704 struct sit_info *sit_i = SIT_I(sbi);
1705 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1706 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1707 struct f2fs_summary_block *sum = curseg->sum_blk;
1708 struct sit_entry_set *ses, *tmp;
1709 struct list_head *head = &SM_I(sbi)->sit_entry_set;
1710 bool to_journal = true;
1711 struct seg_entry *se;
1712
1713 mutex_lock(&curseg->curseg_mutex);
1714 mutex_lock(&sit_i->sentry_lock);
1715
1716 /*
1717 * add and account sit entries of dirty bitmap in sit entry
1718 * set temporarily
1719 */
1720 add_sits_in_set(sbi);
1721
1722 /*
1723 * if there are no enough space in journal to store dirty sit
1724 * entries, remove all entries from journal and add and account
1725 * them in sit entry set.
1726 */
1727 if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1728 remove_sits_in_journal(sbi);
1729
1730 if (!sit_i->dirty_sentries)
1731 goto out;
1732
1733 /*
1734 * there are two steps to flush sit entries:
1735 * #1, flush sit entries to journal in current cold data summary block.
1736 * #2, flush sit entries to sit page.
1737 */
1738 list_for_each_entry_safe(ses, tmp, head, set_list) {
1739 struct page *page = NULL;
1740 struct f2fs_sit_block *raw_sit = NULL;
1741 unsigned int start_segno = ses->start_segno;
1742 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
1743 (unsigned long)MAIN_SEGS(sbi));
1744 unsigned int segno = start_segno;
1745
1746 if (to_journal &&
1747 !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
1748 to_journal = false;
1749
1750 if (!to_journal) {
1751 page = get_next_sit_page(sbi, start_segno);
1752 raw_sit = page_address(page);
1753 }
1754
1755 /* flush dirty sit entries in region of current sit set */
1756 for_each_set_bit_from(segno, bitmap, end) {
1757 int offset, sit_offset;
1758
1759 se = get_seg_entry(sbi, segno);
1760
1761 /* add discard candidates */
1762 if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) {
1763 cpc->trim_start = segno;
1764 add_discard_addrs(sbi, cpc);
1765 }
1766
1767 if (to_journal) {
1768 offset = lookup_journal_in_cursum(sum,
1769 SIT_JOURNAL, segno, 1);
1770 f2fs_bug_on(sbi, offset < 0);
1771 segno_in_journal(sum, offset) =
1772 cpu_to_le32(segno);
1773 seg_info_to_raw_sit(se,
1774 &sit_in_journal(sum, offset));
1775 } else {
1776 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1777 seg_info_to_raw_sit(se,
1778 &raw_sit->entries[sit_offset]);
1779 }
1780
1781 __clear_bit(segno, bitmap);
1782 sit_i->dirty_sentries--;
1783 ses->entry_cnt--;
1784 }
1785
1786 if (!to_journal)
1787 f2fs_put_page(page, 1);
1788
1789 f2fs_bug_on(sbi, ses->entry_cnt);
1790 release_sit_entry_set(ses);
1791 }
1792
1793 f2fs_bug_on(sbi, !list_empty(head));
1794 f2fs_bug_on(sbi, sit_i->dirty_sentries);
1795 out:
1796 if (cpc->reason == CP_DISCARD) {
1797 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
1798 add_discard_addrs(sbi, cpc);
1799 }
1800 mutex_unlock(&sit_i->sentry_lock);
1801 mutex_unlock(&curseg->curseg_mutex);
1802
1803 set_prefree_as_free_segments(sbi);
1804 }
1805
1806 static int build_sit_info(struct f2fs_sb_info *sbi)
1807 {
1808 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1809 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1810 struct sit_info *sit_i;
1811 unsigned int sit_segs, start;
1812 char *src_bitmap, *dst_bitmap;
1813 unsigned int bitmap_size;
1814
1815 /* allocate memory for SIT information */
1816 sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1817 if (!sit_i)
1818 return -ENOMEM;
1819
1820 SM_I(sbi)->sit_info = sit_i;
1821
1822 sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry));
1823 if (!sit_i->sentries)
1824 return -ENOMEM;
1825
1826 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1827 sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1828 if (!sit_i->dirty_sentries_bitmap)
1829 return -ENOMEM;
1830
1831 for (start = 0; start < MAIN_SEGS(sbi); start++) {
1832 sit_i->sentries[start].cur_valid_map
1833 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1834 sit_i->sentries[start].ckpt_valid_map
1835 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1836 if (!sit_i->sentries[start].cur_valid_map
1837 || !sit_i->sentries[start].ckpt_valid_map)
1838 return -ENOMEM;
1839 }
1840
1841 if (sbi->segs_per_sec > 1) {
1842 sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
1843 sizeof(struct sec_entry));
1844 if (!sit_i->sec_entries)
1845 return -ENOMEM;
1846 }
1847
1848 /* get information related with SIT */
1849 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1850
1851 /* setup SIT bitmap from ckeckpoint pack */
1852 bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1853 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1854
1855 dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
1856 if (!dst_bitmap)
1857 return -ENOMEM;
1858
1859 /* init SIT information */
1860 sit_i->s_ops = &default_salloc_ops;
1861
1862 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
1863 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
1864 sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
1865 sit_i->sit_bitmap = dst_bitmap;
1866 sit_i->bitmap_size = bitmap_size;
1867 sit_i->dirty_sentries = 0;
1868 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
1869 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
1870 sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
1871 mutex_init(&sit_i->sentry_lock);
1872 return 0;
1873 }
1874
1875 static int build_free_segmap(struct f2fs_sb_info *sbi)
1876 {
1877 struct free_segmap_info *free_i;
1878 unsigned int bitmap_size, sec_bitmap_size;
1879
1880 /* allocate memory for free segmap information */
1881 free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
1882 if (!free_i)
1883 return -ENOMEM;
1884
1885 SM_I(sbi)->free_info = free_i;
1886
1887 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1888 free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1889 if (!free_i->free_segmap)
1890 return -ENOMEM;
1891
1892 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
1893 free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1894 if (!free_i->free_secmap)
1895 return -ENOMEM;
1896
1897 /* set all segments as dirty temporarily */
1898 memset(free_i->free_segmap, 0xff, bitmap_size);
1899 memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1900
1901 /* init free segmap information */
1902 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
1903 free_i->free_segments = 0;
1904 free_i->free_sections = 0;
1905 rwlock_init(&free_i->segmap_lock);
1906 return 0;
1907 }
1908
1909 static int build_curseg(struct f2fs_sb_info *sbi)
1910 {
1911 struct curseg_info *array;
1912 int i;
1913
1914 array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
1915 if (!array)
1916 return -ENOMEM;
1917
1918 SM_I(sbi)->curseg_array = array;
1919
1920 for (i = 0; i < NR_CURSEG_TYPE; i++) {
1921 mutex_init(&array[i].curseg_mutex);
1922 array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
1923 if (!array[i].sum_blk)
1924 return -ENOMEM;
1925 array[i].segno = NULL_SEGNO;
1926 array[i].next_blkoff = 0;
1927 }
1928 return restore_curseg_summaries(sbi);
1929 }
1930
1931 static void build_sit_entries(struct f2fs_sb_info *sbi)
1932 {
1933 struct sit_info *sit_i = SIT_I(sbi);
1934 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1935 struct f2fs_summary_block *sum = curseg->sum_blk;
1936 int sit_blk_cnt = SIT_BLK_CNT(sbi);
1937 unsigned int i, start, end;
1938 unsigned int readed, start_blk = 0;
1939 int nrpages = MAX_BIO_BLOCKS(sbi);
1940
1941 do {
1942 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
1943
1944 start = start_blk * sit_i->sents_per_block;
1945 end = (start_blk + readed) * sit_i->sents_per_block;
1946
1947 for (; start < end && start < MAIN_SEGS(sbi); start++) {
1948 struct seg_entry *se = &sit_i->sentries[start];
1949 struct f2fs_sit_block *sit_blk;
1950 struct f2fs_sit_entry sit;
1951 struct page *page;
1952
1953 mutex_lock(&curseg->curseg_mutex);
1954 for (i = 0; i < sits_in_cursum(sum); i++) {
1955 if (le32_to_cpu(segno_in_journal(sum, i))
1956 == start) {
1957 sit = sit_in_journal(sum, i);
1958 mutex_unlock(&curseg->curseg_mutex);
1959 goto got_it;
1960 }
1961 }
1962 mutex_unlock(&curseg->curseg_mutex);
1963
1964 page = get_current_sit_page(sbi, start);
1965 sit_blk = (struct f2fs_sit_block *)page_address(page);
1966 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
1967 f2fs_put_page(page, 1);
1968 got_it:
1969 check_block_count(sbi, start, &sit);
1970 seg_info_from_raw_sit(se, &sit);
1971 if (sbi->segs_per_sec > 1) {
1972 struct sec_entry *e = get_sec_entry(sbi, start);
1973 e->valid_blocks += se->valid_blocks;
1974 }
1975 }
1976 start_blk += readed;
1977 } while (start_blk < sit_blk_cnt);
1978 }
1979
1980 static void init_free_segmap(struct f2fs_sb_info *sbi)
1981 {
1982 unsigned int start;
1983 int type;
1984
1985 for (start = 0; start < MAIN_SEGS(sbi); start++) {
1986 struct seg_entry *sentry = get_seg_entry(sbi, start);
1987 if (!sentry->valid_blocks)
1988 __set_free(sbi, start);
1989 }
1990
1991 /* set use the current segments */
1992 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
1993 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
1994 __set_test_and_inuse(sbi, curseg_t->segno);
1995 }
1996 }
1997
1998 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1999 {
2000 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2001 struct free_segmap_info *free_i = FREE_I(sbi);
2002 unsigned int segno = 0, offset = 0;
2003 unsigned short valid_blocks;
2004
2005 while (1) {
2006 /* find dirty segment based on free segmap */
2007 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
2008 if (segno >= MAIN_SEGS(sbi))
2009 break;
2010 offset = segno + 1;
2011 valid_blocks = get_valid_blocks(sbi, segno, 0);
2012 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
2013 continue;
2014 if (valid_blocks > sbi->blocks_per_seg) {
2015 f2fs_bug_on(sbi, 1);
2016 continue;
2017 }
2018 mutex_lock(&dirty_i->seglist_lock);
2019 __locate_dirty_segment(sbi, segno, DIRTY);
2020 mutex_unlock(&dirty_i->seglist_lock);
2021 }
2022 }
2023
2024 static int init_victim_secmap(struct f2fs_sb_info *sbi)
2025 {
2026 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2027 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2028
2029 dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
2030 if (!dirty_i->victim_secmap)
2031 return -ENOMEM;
2032 return 0;
2033 }
2034
2035 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
2036 {
2037 struct dirty_seglist_info *dirty_i;
2038 unsigned int bitmap_size, i;
2039
2040 /* allocate memory for dirty segments list information */
2041 dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
2042 if (!dirty_i)
2043 return -ENOMEM;
2044
2045 SM_I(sbi)->dirty_info = dirty_i;
2046 mutex_init(&dirty_i->seglist_lock);
2047
2048 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2049
2050 for (i = 0; i < NR_DIRTY_TYPE; i++) {
2051 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
2052 if (!dirty_i->dirty_segmap[i])
2053 return -ENOMEM;
2054 }
2055
2056 init_dirty_segmap(sbi);
2057 return init_victim_secmap(sbi);
2058 }
2059
2060 /*
2061 * Update min, max modified time for cost-benefit GC algorithm
2062 */
2063 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
2064 {
2065 struct sit_info *sit_i = SIT_I(sbi);
2066 unsigned int segno;
2067
2068 mutex_lock(&sit_i->sentry_lock);
2069
2070 sit_i->min_mtime = LLONG_MAX;
2071
2072 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
2073 unsigned int i;
2074 unsigned long long mtime = 0;
2075
2076 for (i = 0; i < sbi->segs_per_sec; i++)
2077 mtime += get_seg_entry(sbi, segno + i)->mtime;
2078
2079 mtime = div_u64(mtime, sbi->segs_per_sec);
2080
2081 if (sit_i->min_mtime > mtime)
2082 sit_i->min_mtime = mtime;
2083 }
2084 sit_i->max_mtime = get_mtime(sbi);
2085 mutex_unlock(&sit_i->sentry_lock);
2086 }
2087
2088 int build_segment_manager(struct f2fs_sb_info *sbi)
2089 {
2090 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2091 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2092 struct f2fs_sm_info *sm_info;
2093 int err;
2094
2095 sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
2096 if (!sm_info)
2097 return -ENOMEM;
2098
2099 /* init sm info */
2100 sbi->sm_info = sm_info;
2101 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
2102 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
2103 sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
2104 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
2105 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
2106 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
2107 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
2108 sm_info->rec_prefree_segments = sm_info->main_segments *
2109 DEF_RECLAIM_PREFREE_SEGMENTS / 100;
2110 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
2111 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2112 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2113
2114 INIT_LIST_HEAD(&sm_info->discard_list);
2115 sm_info->nr_discards = 0;
2116 sm_info->max_discards = 0;
2117
2118 INIT_LIST_HEAD(&sm_info->sit_entry_set);
2119
2120 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
2121 err = create_flush_cmd_control(sbi);
2122 if (err)
2123 return err;
2124 }
2125
2126 err = build_sit_info(sbi);
2127 if (err)
2128 return err;
2129 err = build_free_segmap(sbi);
2130 if (err)
2131 return err;
2132 err = build_curseg(sbi);
2133 if (err)
2134 return err;
2135
2136 /* reinit free segmap based on SIT */
2137 build_sit_entries(sbi);
2138
2139 init_free_segmap(sbi);
2140 err = build_dirty_segmap(sbi);
2141 if (err)
2142 return err;
2143
2144 init_min_max_mtime(sbi);
2145 return 0;
2146 }
2147
2148 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
2149 enum dirty_type dirty_type)
2150 {
2151 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2152
2153 mutex_lock(&dirty_i->seglist_lock);
2154 kfree(dirty_i->dirty_segmap[dirty_type]);
2155 dirty_i->nr_dirty[dirty_type] = 0;
2156 mutex_unlock(&dirty_i->seglist_lock);
2157 }
2158
2159 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
2160 {
2161 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2162 kfree(dirty_i->victim_secmap);
2163 }
2164
2165 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
2166 {
2167 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2168 int i;
2169
2170 if (!dirty_i)
2171 return;
2172
2173 /* discard pre-free/dirty segments list */
2174 for (i = 0; i < NR_DIRTY_TYPE; i++)
2175 discard_dirty_segmap(sbi, i);
2176
2177 destroy_victim_secmap(sbi);
2178 SM_I(sbi)->dirty_info = NULL;
2179 kfree(dirty_i);
2180 }
2181
2182 static void destroy_curseg(struct f2fs_sb_info *sbi)
2183 {
2184 struct curseg_info *array = SM_I(sbi)->curseg_array;
2185 int i;
2186
2187 if (!array)
2188 return;
2189 SM_I(sbi)->curseg_array = NULL;
2190 for (i = 0; i < NR_CURSEG_TYPE; i++)
2191 kfree(array[i].sum_blk);
2192 kfree(array);
2193 }
2194
2195 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
2196 {
2197 struct free_segmap_info *free_i = SM_I(sbi)->free_info;
2198 if (!free_i)
2199 return;
2200 SM_I(sbi)->free_info = NULL;
2201 kfree(free_i->free_segmap);
2202 kfree(free_i->free_secmap);
2203 kfree(free_i);
2204 }
2205
2206 static void destroy_sit_info(struct f2fs_sb_info *sbi)
2207 {
2208 struct sit_info *sit_i = SIT_I(sbi);
2209 unsigned int start;
2210
2211 if (!sit_i)
2212 return;
2213
2214 if (sit_i->sentries) {
2215 for (start = 0; start < MAIN_SEGS(sbi); start++) {
2216 kfree(sit_i->sentries[start].cur_valid_map);
2217 kfree(sit_i->sentries[start].ckpt_valid_map);
2218 }
2219 }
2220 vfree(sit_i->sentries);
2221 vfree(sit_i->sec_entries);
2222 kfree(sit_i->dirty_sentries_bitmap);
2223
2224 SM_I(sbi)->sit_info = NULL;
2225 kfree(sit_i->sit_bitmap);
2226 kfree(sit_i);
2227 }
2228
2229 void destroy_segment_manager(struct f2fs_sb_info *sbi)
2230 {
2231 struct f2fs_sm_info *sm_info = SM_I(sbi);
2232
2233 if (!sm_info)
2234 return;
2235 destroy_flush_cmd_control(sbi);
2236 destroy_dirty_segmap(sbi);
2237 destroy_curseg(sbi);
2238 destroy_free_segmap(sbi);
2239 destroy_sit_info(sbi);
2240 sbi->sm_info = NULL;
2241 kfree(sm_info);
2242 }
2243
2244 int __init create_segment_manager_caches(void)
2245 {
2246 discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
2247 sizeof(struct discard_entry));
2248 if (!discard_entry_slab)
2249 goto fail;
2250
2251 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2252 sizeof(struct sit_entry_set));
2253 if (!sit_entry_set_slab)
2254 goto destory_discard_entry;
2255
2256 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2257 sizeof(struct inmem_pages));
2258 if (!inmem_entry_slab)
2259 goto destroy_sit_entry_set;
2260 return 0;
2261
2262 destroy_sit_entry_set:
2263 kmem_cache_destroy(sit_entry_set_slab);
2264 destory_discard_entry:
2265 kmem_cache_destroy(discard_entry_slab);
2266 fail:
2267 return -ENOMEM;
2268 }
2269
2270 void destroy_segment_manager_caches(void)
2271 {
2272 kmem_cache_destroy(sit_entry_set_slab);
2273 kmem_cache_destroy(discard_entry_slab);
2274 kmem_cache_destroy(inmem_entry_slab);
2275 }