]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
f2fs: allow write page cache when writting cp
authorYunlei He <heyunlei@huawei.com>
Mon, 13 Mar 2017 12:22:18 +0000 (20:22 +0800)
committerJaegeuk Kim <jaegeuk@kernel.org>
Sat, 25 Mar 2017 07:19:37 +0000 (00:19 -0700)
This patch allow write data to normal file when writting
new checkpoint.

We relax three limitations for write_begin path:
1. data allocation
2. node allocation
3. variables in checkpoint

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
fs/f2fs/checkpoint.c
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/node.c
fs/f2fs/super.c

index adcc2c370df9898aa571517d2dff4d770f15965f..9aba0bb340a09df2d1ec42d999814de3b8b9f724 100644 (file)
@@ -944,6 +944,19 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
        return 0;
 }
 
+static void __prepare_cp_block(struct f2fs_sb_info *sbi)
+{
+       struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+       nid_t last_nid = nm_i->next_scan_nid;
+
+       next_free_nid(sbi, &last_nid);
+       ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
+       ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
+       ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
+       ckpt->next_free_nid = cpu_to_le32(last_nid);
+}
+
 /*
  * Freeze all the FS-operations for checkpoint.
  */
@@ -970,7 +983,14 @@ retry_flush_dents:
                goto retry_flush_dents;
        }
 
+       /*
+        * POR: we should ensure that there are no dirty node pages
+        * until finishing nat/sit flush. inode->i_blocks can be updated.
+        */
+       down_write(&sbi->node_change);
+
        if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
+               up_write(&sbi->node_change);
                f2fs_unlock_all(sbi);
                err = f2fs_sync_inode_meta(sbi);
                if (err)
@@ -978,10 +998,6 @@ retry_flush_dents:
                goto retry_flush_dents;
        }
 
-       /*
-        * POR: we should ensure that there are no dirty node pages
-        * until finishing nat/sit flush.
-        */
 retry_flush_nodes:
        down_write(&sbi->node_write);
 
@@ -989,11 +1005,19 @@ retry_flush_nodes:
                up_write(&sbi->node_write);
                err = sync_node_pages(sbi, &wbc);
                if (err) {
+                       up_write(&sbi->node_change);
                        f2fs_unlock_all(sbi);
                        goto out;
                }
                goto retry_flush_nodes;
        }
+
+       /*
+        * sbi->node_change is used only for AIO write_begin path which produces
+        * dirty node blocks and some checkpoint values by block allocation.
+        */
+       __prepare_cp_block(sbi);
+       up_write(&sbi->node_change);
 out:
        blk_finish_plug(&plug);
        return err;
@@ -1061,7 +1085,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
        struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
-       nid_t last_nid = nm_i->next_scan_nid;
        block_t start_blk;
        unsigned int data_sum_blocks, orphan_blocks;
        __u32 crc32 = 0;
@@ -1078,14 +1101,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                        return -EIO;
        }
 
-       next_free_nid(sbi, &last_nid);
-
        /*
         * modify checkpoint
         * version number is already updated
         */
        ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
-       ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
        ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
        for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
                ckpt->cur_node_segno[i] =
@@ -1104,10 +1124,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                                curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
        }
 
-       ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
-       ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
-       ckpt->next_free_nid = cpu_to_le32(last_nid);
-
        /* 2 cp  + n data seg summary + orphan inode blocks */
        data_sum_blocks = npages_for_summary_flush(sbi, false);
        spin_lock(&sbi->cp_lock);
index 2c8485bb6eb1e3aa4bba8dfcbb0120eb67601e41..090413236b2780bed3fdf087483c27b10cb0815e 100644 (file)
@@ -787,6 +787,21 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
        return err;
 }
 
+static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
+{
+       if (flag == F2FS_GET_BLOCK_PRE_AIO) {
+               if (lock)
+                       down_read(&sbi->node_change);
+               else
+                       up_read(&sbi->node_change);
+       } else {
+               if (lock)
+                       f2fs_lock_op(sbi);
+               else
+                       f2fs_unlock_op(sbi);
+       }
+}
+
 /*
  * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
  * f2fs_map_blocks structure.
@@ -829,7 +844,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 
 next_dnode:
        if (create)
-               f2fs_lock_op(sbi);
+               __do_map_lock(sbi, flag, true);
 
        /* When reading holes, we need its node page */
        set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -939,7 +954,7 @@ skip:
        f2fs_put_dnode(&dn);
 
        if (create) {
-               f2fs_unlock_op(sbi);
+               __do_map_lock(sbi, flag, false);
                f2fs_balance_fs(sbi, dn.node_changed);
        }
        goto next_dnode;
@@ -948,7 +963,7 @@ sync_out:
        f2fs_put_dnode(&dn);
 unlock_out:
        if (create) {
-               f2fs_unlock_op(sbi);
+               __do_map_lock(sbi, flag, false);
                f2fs_balance_fs(sbi, dn.node_changed);
        }
 out:
@@ -1688,7 +1703,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
 
        if (f2fs_has_inline_data(inode) ||
                        (pos & PAGE_MASK) >= i_size_read(inode)) {
-               f2fs_lock_op(sbi);
+               __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
                locked = true;
        }
 restart:
@@ -1724,7 +1739,8 @@ restart:
                        err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
                        if (err || dn.data_blkaddr == NULL_ADDR) {
                                f2fs_put_dnode(&dn);
-                               f2fs_lock_op(sbi);
+                               __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
+                                                               true);
                                locked = true;
                                goto restart;
                        }
@@ -1738,7 +1754,7 @@ out:
        f2fs_put_dnode(&dn);
 unlock_out:
        if (locked)
-               f2fs_unlock_op(sbi);
+               __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
        return err;
 }
 
index 340d62c29a98614f7eead68fcb97b94c308b15e3..6fbdcac01d9af1a300b88bd10457b06c0107605b 100644 (file)
@@ -830,6 +830,7 @@ struct f2fs_sb_info {
        struct mutex cp_mutex;                  /* checkpoint procedure lock */
        struct rw_semaphore cp_rwsem;           /* blocking FS operations */
        struct rw_semaphore node_write;         /* locking node writes */
+       struct rw_semaphore node_change;        /* locking node change */
        wait_queue_head_t cp_wait;
        unsigned long last_time[MAX_TIME];      /* to store time in jiffies */
        long interval_time[MAX_TIME];           /* to store thresholds */
index 29dc996b573c7ecfbbb748a79ce8378784f635dc..6e87178d34a2fd865979eacad276cead84781f0a 100644 (file)
@@ -2448,10 +2448,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
                f2fs_put_page(page, 1);
        }
 
-       f2fs_bug_on(sbi, set->entry_cnt);
-
-       radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
-       kmem_cache_free(nat_entry_set_slab, set);
+       /* Allow dirty nats by node block allocation in write_begin */
+       if (!set->entry_cnt) {
+               radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
+               kmem_cache_free(nat_entry_set_slab, set);
+       }
 }
 
 /*
@@ -2496,8 +2497,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                __flush_nat_entry_set(sbi, set, cpc);
 
        up_write(&nm_i->nat_tree_lock);
-
-       f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
+       /* Allow dirty nats by node block allocation in write_begin */
 }
 
 static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
index 17126a2897ad836d65ca8c11636afa351c0c4938..49434f951acecb800727997c312d35188d48314d 100644 (file)
@@ -1918,6 +1918,7 @@ try_onemore:
        mutex_init(&sbi->gc_mutex);
        mutex_init(&sbi->cp_mutex);
        init_rwsem(&sbi->node_write);
+       init_rwsem(&sbi->node_change);
 
        /* disallow all the data/node/meta page writes */
        set_sbi_flag(sbi, SBI_POR_DOING);