]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - fs/f2fs/checkpoint.c
f2fs: submit bio for node blocks in the reclaim path
[mirror_ubuntu-artful-kernel.git] / fs / f2fs / checkpoint.c
CommitLineData
0a8165d7 1/*
127e670a
JK
2 * fs/f2fs/checkpoint.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/bio.h>
13#include <linux/mpage.h>
14#include <linux/writeback.h>
15#include <linux/blkdev.h>
16#include <linux/f2fs_fs.h>
17#include <linux/pagevec.h>
18#include <linux/swap.h>
19
20#include "f2fs.h"
21#include "node.h"
22#include "segment.h"
2af4bd6c 23#include <trace/events/f2fs.h>
127e670a 24
6451e041 25static struct kmem_cache *ino_entry_slab;
127e670a
JK
26static struct kmem_cache *inode_entry_slab;
27
0a8165d7 28/*
127e670a
JK
29 * We guarantee no failure on the returned page.
30 */
31struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
32{
9df27d98 33 struct address_space *mapping = META_MAPPING(sbi);
127e670a
JK
34 struct page *page = NULL;
35repeat:
bde44686 36 page = grab_cache_page(mapping, index);
127e670a
JK
37 if (!page) {
38 cond_resched();
39 goto repeat;
40 }
bde44686 41 f2fs_wait_on_page_writeback(page, META);
127e670a
JK
42 SetPageUptodate(page);
43 return page;
44}
45
0a8165d7 46/*
127e670a
JK
47 * We guarantee no failure on the returned page.
48 */
49struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
50{
9df27d98 51 struct address_space *mapping = META_MAPPING(sbi);
127e670a
JK
52 struct page *page;
53repeat:
54 page = grab_cache_page(mapping, index);
55 if (!page) {
56 cond_resched();
57 goto repeat;
58 }
393ff91f
JK
59 if (PageUptodate(page))
60 goto out;
61
93dfe2ac
JK
62 if (f2fs_submit_page_bio(sbi, page, index,
63 READ_SYNC | REQ_META | REQ_PRIO))
127e670a 64 goto repeat;
127e670a 65
393ff91f 66 lock_page(page);
6bacf52f 67 if (unlikely(page->mapping != mapping)) {
afcb7ca0
JK
68 f2fs_put_page(page, 1);
69 goto repeat;
70 }
393ff91f 71out:
127e670a
JK
72 return page;
73}
74
4c521f49
JK
75struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index)
76{
77 bool readahead = false;
78 struct page *page;
79
80 page = find_get_page(META_MAPPING(sbi), index);
81 if (!page || (page && !PageUptodate(page)))
82 readahead = true;
83 f2fs_put_page(page, 0);
84
85 if (readahead)
90a893c7 86 ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
4c521f49
JK
87 return get_meta_page(sbi, index);
88}
89
90static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
662befda
CY
91{
92 switch (type) {
93 case META_NAT:
94 return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
95 case META_SIT:
96 return SIT_BLK_CNT(sbi);
81c1a0f1 97 case META_SSA:
662befda
CY
98 case META_CP:
99 return 0;
4c521f49 100 case META_POR:
7cd8558b 101 return MAX_BLKADDR(sbi);
662befda
CY
102 default:
103 BUG();
104 }
105}
106
107/*
81c1a0f1 108 * Readahead CP/NAT/SIT/SSA pages
662befda 109 */
4c521f49 110int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type)
662befda
CY
111{
112 block_t prev_blk_addr = 0;
113 struct page *page;
4c521f49
JK
114 block_t blkno = start;
115 block_t max_blks = get_max_meta_blks(sbi, type);
662befda
CY
116
117 struct f2fs_io_info fio = {
118 .type = META,
119 .rw = READ_SYNC | REQ_META | REQ_PRIO
120 };
121
122 for (; nrpages-- > 0; blkno++) {
123 block_t blk_addr;
124
125 switch (type) {
126 case META_NAT:
127 /* get nat block addr */
128 if (unlikely(blkno >= max_blks))
129 blkno = 0;
130 blk_addr = current_nat_addr(sbi,
131 blkno * NAT_ENTRY_PER_BLOCK);
132 break;
133 case META_SIT:
134 /* get sit block addr */
135 if (unlikely(blkno >= max_blks))
136 goto out;
137 blk_addr = current_sit_addr(sbi,
138 blkno * SIT_ENTRY_PER_BLOCK);
139 if (blkno != start && prev_blk_addr + 1 != blk_addr)
140 goto out;
141 prev_blk_addr = blk_addr;
142 break;
81c1a0f1 143 case META_SSA:
662befda 144 case META_CP:
4c521f49
JK
145 case META_POR:
146 if (unlikely(blkno >= max_blks))
147 goto out;
7cd8558b 148 if (unlikely(blkno < SEG0_BLKADDR(sbi)))
4c521f49 149 goto out;
662befda
CY
150 blk_addr = blkno;
151 break;
152 default:
153 BUG();
154 }
155
156 page = grab_cache_page(META_MAPPING(sbi), blk_addr);
157 if (!page)
158 continue;
159 if (PageUptodate(page)) {
662befda
CY
160 f2fs_put_page(page, 1);
161 continue;
162 }
163
164 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
662befda
CY
165 f2fs_put_page(page, 0);
166 }
167out:
168 f2fs_submit_merged_bio(sbi, META, READ);
169 return blkno - start;
170}
171
127e670a
JK
172static int f2fs_write_meta_page(struct page *page,
173 struct writeback_control *wbc)
174{
4081363f 175 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
127e670a 176
ecda0de3
CY
177 trace_f2fs_writepage(page, META);
178
203681f6 179 if (unlikely(sbi->por_doing))
cfb271d4 180 goto redirty_out;
cfb271d4
CY
181 if (wbc->for_reclaim)
182 goto redirty_out;
1e968fdf 183 if (unlikely(f2fs_cp_error(sbi)))
cf779cab 184 goto redirty_out;
127e670a 185
3cb5ad15 186 f2fs_wait_on_page_writeback(page, META);
577e3495
JK
187 write_meta_page(sbi, page);
188 dec_page_count(sbi, F2FS_DIRTY_META);
189 unlock_page(page);
190 return 0;
cfb271d4
CY
191
192redirty_out:
76f60268 193 redirty_page_for_writepage(wbc, page);
cfb271d4 194 return AOP_WRITEPAGE_ACTIVATE;
127e670a
JK
195}
196
197static int f2fs_write_meta_pages(struct address_space *mapping,
198 struct writeback_control *wbc)
199{
4081363f 200 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
50c8cdb3 201 long diff, written;
127e670a 202
e5748434
CY
203 trace_f2fs_writepages(mapping->host, wbc, META);
204
5459aa97 205 /* collect a number of dirty meta pages and write together */
50c8cdb3
JK
206 if (wbc->for_kupdate ||
207 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
d3baf95d 208 goto skip_write;
127e670a
JK
209
210 /* if mounting is failed, skip writing node pages */
211 mutex_lock(&sbi->cp_mutex);
50c8cdb3
JK
212 diff = nr_pages_to_write(sbi, META, wbc);
213 written = sync_meta_pages(sbi, META, wbc->nr_to_write);
127e670a 214 mutex_unlock(&sbi->cp_mutex);
50c8cdb3 215 wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
127e670a 216 return 0;
d3baf95d
JK
217
218skip_write:
219 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
220 return 0;
127e670a
JK
221}
222
223long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
224 long nr_to_write)
225{
9df27d98 226 struct address_space *mapping = META_MAPPING(sbi);
127e670a
JK
227 pgoff_t index = 0, end = LONG_MAX;
228 struct pagevec pvec;
229 long nwritten = 0;
230 struct writeback_control wbc = {
231 .for_reclaim = 0,
232 };
233
234 pagevec_init(&pvec, 0);
235
236 while (index <= end) {
237 int i, nr_pages;
238 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
239 PAGECACHE_TAG_DIRTY,
240 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
cfb271d4 241 if (unlikely(nr_pages == 0))
127e670a
JK
242 break;
243
244 for (i = 0; i < nr_pages; i++) {
245 struct page *page = pvec.pages[i];
203681f6 246
127e670a 247 lock_page(page);
203681f6
JK
248
249 if (unlikely(page->mapping != mapping)) {
250continue_unlock:
251 unlock_page(page);
252 continue;
253 }
254 if (!PageDirty(page)) {
255 /* someone wrote it for us */
256 goto continue_unlock;
257 }
258
259 if (!clear_page_dirty_for_io(page))
260 goto continue_unlock;
261
577e3495
JK
262 if (f2fs_write_meta_page(page, &wbc)) {
263 unlock_page(page);
264 break;
265 }
cfb271d4
CY
266 nwritten++;
267 if (unlikely(nwritten >= nr_to_write))
127e670a
JK
268 break;
269 }
270 pagevec_release(&pvec);
271 cond_resched();
272 }
273
274 if (nwritten)
458e6197 275 f2fs_submit_merged_bio(sbi, type, WRITE);
127e670a
JK
276
277 return nwritten;
278}
279
280static int f2fs_set_meta_page_dirty(struct page *page)
281{
26c6b887
JK
282 trace_f2fs_set_page_dirty(page, META);
283
127e670a
JK
284 SetPageUptodate(page);
285 if (!PageDirty(page)) {
286 __set_page_dirty_nobuffers(page);
4081363f 287 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
127e670a
JK
288 return 1;
289 }
290 return 0;
291}
292
293const struct address_space_operations f2fs_meta_aops = {
294 .writepage = f2fs_write_meta_page,
295 .writepages = f2fs_write_meta_pages,
296 .set_page_dirty = f2fs_set_meta_page_dirty,
297};
298
6451e041 299static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
953e6cc6 300{
67298804 301 struct inode_management *im = &sbi->im[type];
39efac41
JK
302 struct ino_entry *e;
303retry:
67298804 304 spin_lock(&im->ino_lock);
39efac41 305
67298804 306 e = radix_tree_lookup(&im->ino_root, ino);
39efac41
JK
307 if (!e) {
308 e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
309 if (!e) {
67298804 310 spin_unlock(&im->ino_lock);
39efac41 311 goto retry;
953e6cc6 312 }
67298804
CY
313 if (radix_tree_insert(&im->ino_root, ino, e)) {
314 spin_unlock(&im->ino_lock);
39efac41
JK
315 kmem_cache_free(ino_entry_slab, e);
316 goto retry;
317 }
318 memset(e, 0, sizeof(struct ino_entry));
319 e->ino = ino;
953e6cc6 320
67298804 321 list_add_tail(&e->list, &im->ino_list);
8c402946 322 if (type != ORPHAN_INO)
67298804 323 im->ino_num++;
39efac41 324 }
67298804 325 spin_unlock(&im->ino_lock);
953e6cc6
JK
326}
327
6451e041 328static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
953e6cc6 329{
67298804 330 struct inode_management *im = &sbi->im[type];
6451e041 331 struct ino_entry *e;
953e6cc6 332
67298804
CY
333 spin_lock(&im->ino_lock);
334 e = radix_tree_lookup(&im->ino_root, ino);
39efac41
JK
335 if (e) {
336 list_del(&e->list);
67298804
CY
337 radix_tree_delete(&im->ino_root, ino);
338 im->ino_num--;
339 spin_unlock(&im->ino_lock);
39efac41
JK
340 kmem_cache_free(ino_entry_slab, e);
341 return;
953e6cc6 342 }
67298804 343 spin_unlock(&im->ino_lock);
953e6cc6
JK
344}
345
fff04f90
JK
346void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
347{
348 /* add new dirty ino entry into list */
349 __add_ino_entry(sbi, ino, type);
350}
351
352void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
353{
354 /* remove dirty ino entry from list */
355 __remove_ino_entry(sbi, ino, type);
356}
357
358/* mode should be APPEND_INO or UPDATE_INO */
359bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
360{
67298804 361 struct inode_management *im = &sbi->im[mode];
fff04f90 362 struct ino_entry *e;
67298804
CY
363
364 spin_lock(&im->ino_lock);
365 e = radix_tree_lookup(&im->ino_root, ino);
366 spin_unlock(&im->ino_lock);
fff04f90
JK
367 return e ? true : false;
368}
369
6f12ac25 370void release_dirty_inode(struct f2fs_sb_info *sbi)
fff04f90
JK
371{
372 struct ino_entry *e, *tmp;
373 int i;
374
375 for (i = APPEND_INO; i <= UPDATE_INO; i++) {
67298804
CY
376 struct inode_management *im = &sbi->im[i];
377
378 spin_lock(&im->ino_lock);
379 list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
fff04f90 380 list_del(&e->list);
67298804 381 radix_tree_delete(&im->ino_root, e->ino);
fff04f90 382 kmem_cache_free(ino_entry_slab, e);
67298804 383 im->ino_num--;
fff04f90 384 }
67298804 385 spin_unlock(&im->ino_lock);
fff04f90
JK
386 }
387}
388
cbd56e7d 389int acquire_orphan_inode(struct f2fs_sb_info *sbi)
127e670a 390{
67298804 391 struct inode_management *im = &sbi->im[ORPHAN_INO];
127e670a
JK
392 int err = 0;
393
67298804
CY
394 spin_lock(&im->ino_lock);
395 if (unlikely(im->ino_num >= sbi->max_orphans))
127e670a 396 err = -ENOSPC;
cbd56e7d 397 else
67298804
CY
398 im->ino_num++;
399 spin_unlock(&im->ino_lock);
0d47c1ad 400
127e670a
JK
401 return err;
402}
403
cbd56e7d
JK
404void release_orphan_inode(struct f2fs_sb_info *sbi)
405{
67298804
CY
406 struct inode_management *im = &sbi->im[ORPHAN_INO];
407
408 spin_lock(&im->ino_lock);
409 f2fs_bug_on(sbi, im->ino_num == 0);
410 im->ino_num--;
411 spin_unlock(&im->ino_lock);
cbd56e7d
JK
412}
413
127e670a
JK
414void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
415{
39efac41 416 /* add new orphan ino entry into list */
6451e041 417 __add_ino_entry(sbi, ino, ORPHAN_INO);
127e670a
JK
418}
419
420void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
421{
953e6cc6 422 /* remove orphan entry from orphan list */
6451e041 423 __remove_ino_entry(sbi, ino, ORPHAN_INO);
127e670a
JK
424}
425
426static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
427{
428 struct inode *inode = f2fs_iget(sbi->sb, ino);
9850cf4a 429 f2fs_bug_on(sbi, IS_ERR(inode));
127e670a
JK
430 clear_nlink(inode);
431
432 /* truncate all the data during iput */
433 iput(inode);
434}
435
8f99a946 436void recover_orphan_inodes(struct f2fs_sb_info *sbi)
127e670a
JK
437{
438 block_t start_blk, orphan_blkaddr, i, j;
439
25ca923b 440 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
8f99a946 441 return;
127e670a 442
aabe5136 443 sbi->por_doing = true;
1dbe4152
CL
444
445 start_blk = __start_cp_addr(sbi) + 1 +
446 le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
127e670a
JK
447 orphan_blkaddr = __start_sum_addr(sbi) - 1;
448
662befda
CY
449 ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
450
127e670a
JK
451 for (i = 0; i < orphan_blkaddr; i++) {
452 struct page *page = get_meta_page(sbi, start_blk + i);
453 struct f2fs_orphan_block *orphan_blk;
454
455 orphan_blk = (struct f2fs_orphan_block *)page_address(page);
456 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
457 nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
458 recover_orphan_inode(sbi, ino);
459 }
460 f2fs_put_page(page, 1);
461 }
462 /* clear Orphan Flag */
25ca923b 463 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
aabe5136 464 sbi->por_doing = false;
8f99a946 465 return;
127e670a
JK
466}
467
468static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
469{
502c6e0b 470 struct list_head *head;
127e670a 471 struct f2fs_orphan_block *orphan_blk = NULL;
127e670a 472 unsigned int nentries = 0;
4531929e 473 unsigned short index;
8c402946 474 unsigned short orphan_blocks;
4531929e 475 struct page *page = NULL;
6451e041 476 struct ino_entry *orphan = NULL;
67298804 477 struct inode_management *im = &sbi->im[ORPHAN_INO];
127e670a 478
67298804 479 orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
8c402946 480
4531929e 481 for (index = 0; index < orphan_blocks; index++)
63f5384c 482 grab_meta_page(sbi, start_blk + index);
127e670a 483
4531929e 484 index = 1;
67298804
CY
485 spin_lock(&im->ino_lock);
486 head = &im->ino_list;
127e670a
JK
487
488 /* loop for each orphan inode entry and write them in Jornal block */
502c6e0b
GZ
489 list_for_each_entry(orphan, head, list) {
490 if (!page) {
63f5384c 491 page = find_get_page(META_MAPPING(sbi), start_blk++);
9850cf4a 492 f2fs_bug_on(sbi, !page);
502c6e0b
GZ
493 orphan_blk =
494 (struct f2fs_orphan_block *)page_address(page);
495 memset(orphan_blk, 0, sizeof(*orphan_blk));
63f5384c 496 f2fs_put_page(page, 0);
502c6e0b 497 }
127e670a 498
36795567 499 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
127e670a 500
36795567 501 if (nentries == F2FS_ORPHANS_PER_BLOCK) {
127e670a
JK
502 /*
503 * an orphan block is full of 1020 entries,
504 * then we need to flush current orphan blocks
505 * and bring another one in memory
506 */
507 orphan_blk->blk_addr = cpu_to_le16(index);
508 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
509 orphan_blk->entry_count = cpu_to_le32(nentries);
510 set_page_dirty(page);
511 f2fs_put_page(page, 1);
512 index++;
127e670a
JK
513 nentries = 0;
514 page = NULL;
515 }
502c6e0b 516 }
127e670a 517
502c6e0b
GZ
518 if (page) {
519 orphan_blk->blk_addr = cpu_to_le16(index);
520 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
521 orphan_blk->entry_count = cpu_to_le32(nentries);
522 set_page_dirty(page);
523 f2fs_put_page(page, 1);
127e670a 524 }
502c6e0b 525
67298804 526 spin_unlock(&im->ino_lock);
127e670a
JK
527}
528
529static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
530 block_t cp_addr, unsigned long long *version)
531{
532 struct page *cp_page_1, *cp_page_2 = NULL;
533 unsigned long blk_size = sbi->blocksize;
534 struct f2fs_checkpoint *cp_block;
535 unsigned long long cur_version = 0, pre_version = 0;
127e670a 536 size_t crc_offset;
7e586fa0 537 __u32 crc = 0;
127e670a
JK
538
539 /* Read the 1st cp block in this CP pack */
540 cp_page_1 = get_meta_page(sbi, cp_addr);
541
542 /* get the version number */
543 cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
544 crc_offset = le32_to_cpu(cp_block->checksum_offset);
545 if (crc_offset >= blk_size)
546 goto invalid_cp1;
547
7e586fa0 548 crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
127e670a
JK
549 if (!f2fs_crc_valid(crc, cp_block, crc_offset))
550 goto invalid_cp1;
551
d71b5564 552 pre_version = cur_cp_version(cp_block);
127e670a
JK
553
554 /* Read the 2nd cp block in this CP pack */
25ca923b 555 cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
127e670a
JK
556 cp_page_2 = get_meta_page(sbi, cp_addr);
557
558 cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
559 crc_offset = le32_to_cpu(cp_block->checksum_offset);
560 if (crc_offset >= blk_size)
561 goto invalid_cp2;
562
7e586fa0 563 crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
127e670a
JK
564 if (!f2fs_crc_valid(crc, cp_block, crc_offset))
565 goto invalid_cp2;
566
d71b5564 567 cur_version = cur_cp_version(cp_block);
127e670a
JK
568
569 if (cur_version == pre_version) {
570 *version = cur_version;
571 f2fs_put_page(cp_page_2, 1);
572 return cp_page_1;
573 }
574invalid_cp2:
575 f2fs_put_page(cp_page_2, 1);
576invalid_cp1:
577 f2fs_put_page(cp_page_1, 1);
578 return NULL;
579}
580
581int get_valid_checkpoint(struct f2fs_sb_info *sbi)
582{
583 struct f2fs_checkpoint *cp_block;
584 struct f2fs_super_block *fsb = sbi->raw_super;
585 struct page *cp1, *cp2, *cur_page;
586 unsigned long blk_size = sbi->blocksize;
587 unsigned long long cp1_version = 0, cp2_version = 0;
588 unsigned long long cp_start_blk_no;
1dbe4152
CL
589 unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
590 block_t cp_blk_no;
591 int i;
127e670a 592
1dbe4152 593 sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
127e670a
JK
594 if (!sbi->ckpt)
595 return -ENOMEM;
596 /*
597 * Finding out valid cp block involves read both
598 * sets( cp pack1 and cp pack 2)
599 */
600 cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
601 cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
602
603 /* The second checkpoint pack should start at the next segment */
f9a4e6df
JK
604 cp_start_blk_no += ((unsigned long long)1) <<
605 le32_to_cpu(fsb->log_blocks_per_seg);
127e670a
JK
606 cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
607
608 if (cp1 && cp2) {
609 if (ver_after(cp2_version, cp1_version))
610 cur_page = cp2;
611 else
612 cur_page = cp1;
613 } else if (cp1) {
614 cur_page = cp1;
615 } else if (cp2) {
616 cur_page = cp2;
617 } else {
618 goto fail_no_cp;
619 }
620
621 cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
622 memcpy(sbi->ckpt, cp_block, blk_size);
623
1dbe4152
CL
624 if (cp_blks <= 1)
625 goto done;
626
627 cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
628 if (cur_page == cp2)
629 cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
630
631 for (i = 1; i < cp_blks; i++) {
632 void *sit_bitmap_ptr;
633 unsigned char *ckpt = (unsigned char *)sbi->ckpt;
634
635 cur_page = get_meta_page(sbi, cp_blk_no + i);
636 sit_bitmap_ptr = page_address(cur_page);
637 memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
638 f2fs_put_page(cur_page, 1);
639 }
640done:
127e670a
JK
641 f2fs_put_page(cp1, 1);
642 f2fs_put_page(cp2, 1);
643 return 0;
644
645fail_no_cp:
646 kfree(sbi->ckpt);
647 return -EINVAL;
648}
649
5deb8267 650static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
127e670a 651{
4081363f 652 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
127e670a 653
ed57c27f
JK
654 if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
655 return -EEXIST;
2d7b822a 656
ed57c27f
JK
657 set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
658 F2FS_I(inode)->dirty_dir = new;
659 list_add_tail(&new->list, &sbi->dir_inode_list);
dcdfff65 660 stat_inc_dirty_dir(sbi);
5deb8267
JK
661 return 0;
662}
663
a7ffdbe2 664void update_dirty_page(struct inode *inode, struct page *page)
5deb8267 665{
4081363f 666 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
5deb8267 667 struct dir_inode_entry *new;
cf0ee0f0 668 int ret = 0;
5deb8267 669
a7ffdbe2 670 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
127e670a 671 return;
7bd59381 672
a7ffdbe2
JK
673 if (!S_ISDIR(inode->i_mode)) {
674 inode_inc_dirty_pages(inode);
675 goto out;
676 }
677
7bd59381 678 new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
127e670a
JK
679 new->inode = inode;
680 INIT_LIST_HEAD(&new->list);
681
682 spin_lock(&sbi->dir_inode_lock);
cf0ee0f0 683 ret = __add_dirty_inode(inode, new);
a7ffdbe2 684 inode_inc_dirty_pages(inode);
5deb8267 685 spin_unlock(&sbi->dir_inode_lock);
cf0ee0f0
CY
686
687 if (ret)
688 kmem_cache_free(inode_entry_slab, new);
a7ffdbe2
JK
689out:
690 SetPagePrivate(page);
5deb8267
JK
691}
692
693void add_dirty_dir_inode(struct inode *inode)
694{
4081363f 695 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
7bd59381
GZ
696 struct dir_inode_entry *new =
697 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
cf0ee0f0 698 int ret = 0;
7bd59381 699
5deb8267
JK
700 new->inode = inode;
701 INIT_LIST_HEAD(&new->list);
127e670a 702
5deb8267 703 spin_lock(&sbi->dir_inode_lock);
cf0ee0f0 704 ret = __add_dirty_inode(inode, new);
127e670a 705 spin_unlock(&sbi->dir_inode_lock);
cf0ee0f0
CY
706
707 if (ret)
708 kmem_cache_free(inode_entry_slab, new);
127e670a
JK
709}
710
711void remove_dirty_dir_inode(struct inode *inode)
712{
4081363f 713 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2d7b822a 714 struct dir_inode_entry *entry;
127e670a
JK
715
716 if (!S_ISDIR(inode->i_mode))
717 return;
718
719 spin_lock(&sbi->dir_inode_lock);
a7ffdbe2 720 if (get_dirty_pages(inode) ||
ed57c27f 721 !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
3b10b1fd
JK
722 spin_unlock(&sbi->dir_inode_lock);
723 return;
724 }
127e670a 725
ed57c27f
JK
726 entry = F2FS_I(inode)->dirty_dir;
727 list_del(&entry->list);
728 F2FS_I(inode)->dirty_dir = NULL;
729 clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
730 stat_dec_dirty_dir(sbi);
127e670a 731 spin_unlock(&sbi->dir_inode_lock);
ed57c27f 732 kmem_cache_free(inode_entry_slab, entry);
74d0b917
JK
733
734 /* Only from the recovery routine */
afc3eda2
JK
735 if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
736 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
74d0b917 737 iput(inode);
afc3eda2 738 }
74d0b917
JK
739}
740
127e670a
JK
741void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
742{
ce3b7d80 743 struct list_head *head;
127e670a
JK
744 struct dir_inode_entry *entry;
745 struct inode *inode;
746retry:
af41d3ee
JK
747 if (unlikely(f2fs_cp_error(sbi)))
748 return;
749
127e670a 750 spin_lock(&sbi->dir_inode_lock);
ce3b7d80
GZ
751
752 head = &sbi->dir_inode_list;
127e670a
JK
753 if (list_empty(head)) {
754 spin_unlock(&sbi->dir_inode_lock);
755 return;
756 }
757 entry = list_entry(head->next, struct dir_inode_entry, list);
758 inode = igrab(entry->inode);
759 spin_unlock(&sbi->dir_inode_lock);
760 if (inode) {
87d6f890 761 filemap_fdatawrite(inode->i_mapping);
127e670a
JK
762 iput(inode);
763 } else {
764 /*
765 * We should submit bio, since it exists several
766 * wribacking dentry pages in the freeing inode.
767 */
458e6197 768 f2fs_submit_merged_bio(sbi, DATA, WRITE);
127e670a
JK
769 }
770 goto retry;
771}
772
0a8165d7 773/*
127e670a
JK
774 * Freeze all the FS-operations for checkpoint.
775 */
cf779cab 776static int block_operations(struct f2fs_sb_info *sbi)
127e670a 777{
127e670a
JK
778 struct writeback_control wbc = {
779 .sync_mode = WB_SYNC_ALL,
780 .nr_to_write = LONG_MAX,
781 .for_reclaim = 0,
782 };
c718379b 783 struct blk_plug plug;
cf779cab 784 int err = 0;
c718379b
JK
785
786 blk_start_plug(&plug);
787
39936837 788retry_flush_dents:
e479556b 789 f2fs_lock_all(sbi);
127e670a 790 /* write all the dirty dentry pages */
127e670a 791 if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
e479556b 792 f2fs_unlock_all(sbi);
39936837 793 sync_dirty_dir_inodes(sbi);
cf779cab
JK
794 if (unlikely(f2fs_cp_error(sbi))) {
795 err = -EIO;
796 goto out;
797 }
39936837 798 goto retry_flush_dents;
127e670a
JK
799 }
800
127e670a 801 /*
e1c42045 802 * POR: we should ensure that there are no dirty node pages
127e670a
JK
803 * until finishing nat/sit flush.
804 */
39936837 805retry_flush_nodes:
b3582c68 806 down_write(&sbi->node_write);
127e670a
JK
807
808 if (get_pages(sbi, F2FS_DIRTY_NODES)) {
b3582c68 809 up_write(&sbi->node_write);
39936837 810 sync_node_pages(sbi, 0, &wbc);
cf779cab
JK
811 if (unlikely(f2fs_cp_error(sbi))) {
812 f2fs_unlock_all(sbi);
813 err = -EIO;
814 goto out;
815 }
39936837 816 goto retry_flush_nodes;
127e670a 817 }
cf779cab 818out:
c718379b 819 blk_finish_plug(&plug);
cf779cab 820 return err;
127e670a
JK
821}
822
823static void unblock_operations(struct f2fs_sb_info *sbi)
824{
b3582c68 825 up_write(&sbi->node_write);
e479556b 826 f2fs_unlock_all(sbi);
127e670a
JK
827}
828
fb51b5ef
CL
829static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
830{
831 DEFINE_WAIT(wait);
832
833 for (;;) {
834 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
835
836 if (!get_pages(sbi, F2FS_WRITEBACK))
837 break;
838
839 io_schedule();
840 }
841 finish_wait(&sbi->cp_wait, &wait);
842}
843
75ab4cb8 844static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
127e670a
JK
845{
846 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
cf2271e7 847 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
77041823 848 struct f2fs_nm_info *nm_i = NM_I(sbi);
67298804 849 unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
77041823 850 nid_t last_nid = nm_i->next_scan_nid;
127e670a
JK
851 block_t start_blk;
852 struct page *cp_page;
853 unsigned int data_sum_blocks, orphan_blocks;
7e586fa0 854 __u32 crc32 = 0;
127e670a 855 void *kaddr;
127e670a 856 int i;
1dbe4152 857 int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
127e670a 858
1e87a78d
JK
859 /*
860 * This avoids to conduct wrong roll-forward operations and uses
861 * metapages, so should be called prior to sync_meta_pages below.
862 */
cf2271e7 863 discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
127e670a
JK
864
865 /* Flush all the NAT/SIT pages */
cf779cab 866 while (get_pages(sbi, F2FS_DIRTY_META)) {
127e670a 867 sync_meta_pages(sbi, META, LONG_MAX);
cf779cab
JK
868 if (unlikely(f2fs_cp_error(sbi)))
869 return;
870 }
127e670a
JK
871
872 next_free_nid(sbi, &last_nid);
873
874 /*
875 * modify checkpoint
876 * version number is already updated
877 */
878 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
879 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
880 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
b5b82205 881 for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
127e670a
JK
882 ckpt->cur_node_segno[i] =
883 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
884 ckpt->cur_node_blkoff[i] =
885 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
886 ckpt->alloc_type[i + CURSEG_HOT_NODE] =
887 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
888 }
b5b82205 889 for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
127e670a
JK
890 ckpt->cur_data_segno[i] =
891 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
892 ckpt->cur_data_blkoff[i] =
893 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
894 ckpt->alloc_type[i + CURSEG_HOT_DATA] =
895 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
896 }
897
898 ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
899 ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
900 ckpt->next_free_nid = cpu_to_le32(last_nid);
901
902 /* 2 cp + n data seg summary + orphan inode blocks */
903 data_sum_blocks = npages_for_summary_flush(sbi);
b5b82205 904 if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
25ca923b 905 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
127e670a 906 else
25ca923b 907 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
127e670a 908
67298804 909 orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
1dbe4152
CL
910 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
911 orphan_blocks);
127e670a 912
75ab4cb8 913 if (cpc->reason == CP_UMOUNT) {
25ca923b 914 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
b5b82205 915 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
1dbe4152
CL
916 cp_payload_blks + data_sum_blocks +
917 orphan_blocks + NR_CURSEG_NODE_TYPE);
127e670a 918 } else {
25ca923b 919 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
b5b82205 920 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
1dbe4152
CL
921 cp_payload_blks + data_sum_blocks +
922 orphan_blocks);
127e670a
JK
923 }
924
67298804 925 if (orphan_num)
25ca923b 926 set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
127e670a 927 else
25ca923b 928 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
127e670a 929
2ae4c673
JK
930 if (sbi->need_fsck)
931 set_ckpt_flags(ckpt, CP_FSCK_FLAG);
932
127e670a
JK
933 /* update SIT/NAT bitmap */
934 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
935 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
936
937 crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
7e586fa0
JK
938 *((__le32 *)((unsigned char *)ckpt +
939 le32_to_cpu(ckpt->checksum_offset)))
127e670a
JK
940 = cpu_to_le32(crc32);
941
942 start_blk = __start_cp_addr(sbi);
943
944 /* write out checkpoint buffer at block 0 */
945 cp_page = grab_meta_page(sbi, start_blk++);
946 kaddr = page_address(cp_page);
947 memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
948 set_page_dirty(cp_page);
949 f2fs_put_page(cp_page, 1);
950
1dbe4152
CL
951 for (i = 1; i < 1 + cp_payload_blks; i++) {
952 cp_page = grab_meta_page(sbi, start_blk++);
953 kaddr = page_address(cp_page);
954 memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE,
955 (1 << sbi->log_blocksize));
956 set_page_dirty(cp_page);
957 f2fs_put_page(cp_page, 1);
958 }
959
67298804 960 if (orphan_num) {
127e670a
JK
961 write_orphan_inodes(sbi, start_blk);
962 start_blk += orphan_blocks;
963 }
964
965 write_data_summaries(sbi, start_blk);
966 start_blk += data_sum_blocks;
75ab4cb8 967 if (cpc->reason == CP_UMOUNT) {
127e670a
JK
968 write_node_summaries(sbi, start_blk);
969 start_blk += NR_CURSEG_NODE_TYPE;
970 }
971
972 /* writeout checkpoint block */
973 cp_page = grab_meta_page(sbi, start_blk);
974 kaddr = page_address(cp_page);
975 memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
976 set_page_dirty(cp_page);
977 f2fs_put_page(cp_page, 1);
978
979 /* wait for previous submitted node/meta pages writeback */
fb51b5ef 980 wait_on_all_pages_writeback(sbi);
127e670a 981
cf779cab
JK
982 if (unlikely(f2fs_cp_error(sbi)))
983 return;
984
4ef51a8f 985 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
9df27d98 986 filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
127e670a
JK
987
988 /* update user_block_counts */
989 sbi->last_valid_block_count = sbi->total_valid_block_count;
990 sbi->alloc_valid_block_count = 0;
991
992 /* Here, we only have one bio having CP pack */
577e3495 993 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
127e670a 994
6a8f8ca5
JK
995 /* wait for previous submitted meta pages writeback */
996 wait_on_all_pages_writeback(sbi);
997
cf779cab
JK
998 release_dirty_inode(sbi);
999
1000 if (unlikely(f2fs_cp_error(sbi)))
1001 return;
1002
1003 clear_prefree_segments(sbi);
1004 F2FS_RESET_SB_DIRT(sbi);
127e670a
JK
1005}
1006
0a8165d7 1007/*
e1c42045 1008 * We guarantee that this checkpoint procedure will not fail.
127e670a 1009 */
75ab4cb8 1010void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
127e670a
JK
1011{
1012 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1013 unsigned long long ckpt_ver;
1014
75ab4cb8 1015 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
2af4bd6c 1016
43727527 1017 mutex_lock(&sbi->cp_mutex);
8501017e 1018
4b2fecc8 1019 if (!sbi->s_dirty && cpc->reason != CP_DISCARD)
8501017e 1020 goto out;
cf779cab
JK
1021 if (unlikely(f2fs_cp_error(sbi)))
1022 goto out;
1023 if (block_operations(sbi))
1024 goto out;
127e670a 1025
75ab4cb8 1026 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
2af4bd6c 1027
458e6197
JK
1028 f2fs_submit_merged_bio(sbi, DATA, WRITE);
1029 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1030 f2fs_submit_merged_bio(sbi, META, WRITE);
127e670a
JK
1031
1032 /*
1033 * update checkpoint pack index
1034 * Increase the version number so that
1035 * SIT entries and seg summaries are written at correct place
1036 */
d71b5564 1037 ckpt_ver = cur_cp_version(ckpt);
127e670a
JK
1038 ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
1039
1040 /* write cached NAT/SIT entries to NAT/SIT area */
1041 flush_nat_entries(sbi);
4b2fecc8 1042 flush_sit_entries(sbi, cpc);
127e670a 1043
127e670a 1044 /* unlock all the fs_lock[] in do_checkpoint() */
75ab4cb8 1045 do_checkpoint(sbi, cpc);
127e670a
JK
1046
1047 unblock_operations(sbi);
942e0be6 1048 stat_inc_cp_count(sbi->stat_info);
8501017e
JK
1049out:
1050 mutex_unlock(&sbi->cp_mutex);
75ab4cb8 1051 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
127e670a
JK
1052}
1053
6451e041 1054void init_ino_entry_info(struct f2fs_sb_info *sbi)
127e670a 1055{
6451e041
JK
1056 int i;
1057
1058 for (i = 0; i < MAX_INO_ENTRY; i++) {
67298804
CY
1059 struct inode_management *im = &sbi->im[i];
1060
1061 INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
1062 spin_lock_init(&im->ino_lock);
1063 INIT_LIST_HEAD(&im->ino_list);
1064 im->ino_num = 0;
6451e041
JK
1065 }
1066
0d47c1ad
GZ
1067 /*
1068 * considering 512 blocks in a segment 8 blocks are needed for cp
1069 * and log segment summaries. Remaining blocks are used to keep
1070 * orphan entries with the limitation one reserved segment
1071 * for cp pack we can have max 1020*504 orphan entries
1072 */
b5b82205
CY
1073 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1074 NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
127e670a
JK
1075}
1076
6e6093a8 1077int __init create_checkpoint_caches(void)
127e670a 1078{
6451e041
JK
1079 ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
1080 sizeof(struct ino_entry));
1081 if (!ino_entry_slab)
127e670a
JK
1082 return -ENOMEM;
1083 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
e8512d2e 1084 sizeof(struct dir_inode_entry));
6bacf52f 1085 if (!inode_entry_slab) {
6451e041 1086 kmem_cache_destroy(ino_entry_slab);
127e670a
JK
1087 return -ENOMEM;
1088 }
1089 return 0;
1090}
1091
1092void destroy_checkpoint_caches(void)
1093{
6451e041 1094 kmem_cache_destroy(ino_entry_slab);
127e670a
JK
1095 kmem_cache_destroy(inode_entry_slab);
1096}