wrappers for ->i_mutex access

[mirror_ubuntu-bionic-kernel.git] / fs / f2fs / data.c
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c

index 972eab7ac07193da485df3efc6b6c11c99dacd97..5c06db17e41fa267f5b270061d2959b2a36803e4 100644 (file)
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -225,7 +225,8 @@ void set_data_blkaddr(struct dnode_of_data *dn)
         /* Get physical address of data block */
         addr_array = blkaddr_in_node(rn);
         addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
-       set_page_dirty(node_page);
+       if (set_page_dirty(node_page))
+               dn->node_changed = true;
  }
  
  int reserve_new_block(struct dnode_of_data *dn)
@@ -412,7 +413,7 @@ struct page *get_new_data_page(struct inode *inode,
         struct page *page;
         struct dnode_of_data dn;
         int err;
-repeat:
+
         page = f2fs_grab_cache_page(mapping, index, true);
         if (!page) {
                 /*
@@ -441,12 +442,11 @@ repeat:
         } else {
                 f2fs_put_page(page, 1);
  
-               page = get_read_data_page(inode, index, READ_SYNC, true);
+               /* if ipage exists, blkaddr should be NEW_ADDR */
+               f2fs_bug_on(F2FS_I_SB(inode), ipage);
+               page = get_lock_data_page(inode, index, true);
                 if (IS_ERR(page))
-                       goto repeat;
-
-               /* wait for read completion */
-               lock_page(page);
+                       return page;
         }
  got_it:
         if (new_i_size && i_size_read(inode) <
@@ -494,14 +494,10 @@ alloc:
         if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
                 i_size_write(dn->inode,
                                 ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT));
-
-       /* direct IO doesn't use extent cache to maximize the performance */
-       f2fs_drop_largest_extent(dn->inode, fofs);
-
         return 0;
  }
  
-static void __allocate_data_blocks(struct inode *inode, loff_t offset,
+static int __allocate_data_blocks(struct inode *inode, loff_t offset,
                                                         size_t count)
  {
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -510,14 +506,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
         u64 len = F2FS_BYTES_TO_BLK(count);
         bool allocated;
         u64 end_offset;
+       int err = 0;
  
         while (len) {
-               f2fs_balance_fs(sbi);
                 f2fs_lock_op(sbi);
  
                 /* When reading holes, we need its node page */
                 set_new_dnode(&dn, inode, NULL, NULL, 0);
-               if (get_dnode_of_data(&dn, start, ALLOC_NODE))
+               err = get_dnode_of_data(&dn, start, ALLOC_NODE);
+               if (err)
                         goto out;
  
                 allocated = false;
@@ -526,12 +523,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
                 while (dn.ofs_in_node < end_offset && len) {
                         block_t blkaddr;
  
-                       if (unlikely(f2fs_cp_error(sbi)))
+                       if (unlikely(f2fs_cp_error(sbi))) {
+                               err = -EIO;
                                 goto sync_out;
+                       }
  
                         blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
                         if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
-                               if (__allocate_data_block(&dn))
+                               err = __allocate_data_block(&dn);
+                               if (err)
                                         goto sync_out;
                                 allocated = true;
                         }
@@ -545,8 +545,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
  
                 f2fs_put_dnode(&dn);
                 f2fs_unlock_op(sbi);
+
+               f2fs_balance_fs(sbi, dn.node_changed);
         }
-       return;
+       return err;
  
  sync_out:
         if (allocated)
@@ -554,7 +556,8 @@ sync_out:
         f2fs_put_dnode(&dn);
  out:
         f2fs_unlock_op(sbi);
-       return;
+       f2fs_balance_fs(sbi, dn.node_changed);
+       return err;
  }
  
  /*
@@ -566,7 +569,7 @@ out:
   *     b. do not use extent cache for better performance
   *     c. give the block addresses to blockdev
   */
-static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
+int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
                                                 int create, int flag)
  {
         unsigned int maxblocks = map->m_len;
@@ -577,6 +580,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
         int err = 0, ofs = 1;
         struct extent_info ei;
         bool allocated = false;
+       block_t blkaddr;
  
         map->m_len = 0;
         map->m_flags = 0;
@@ -592,7 +596,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
         }
  
         if (create)
-               f2fs_lock_op(F2FS_I_SB(inode));
+               f2fs_lock_op(sbi);
  
         /* When reading holes, we need its node page */
         set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -640,12 +644,21 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
         pgofs++;
  
  get_next:
+       if (map->m_len >= maxblocks)
+               goto sync_out;
+
         if (dn.ofs_in_node >= end_offset) {
                 if (allocated)
                         sync_inode_page(&dn);
                 allocated = false;
                 f2fs_put_dnode(&dn);
  
+               if (create) {
+                       f2fs_unlock_op(sbi);
+                       f2fs_balance_fs(sbi, dn.node_changed);
+                       f2fs_lock_op(sbi);
+               }
+
                 set_new_dnode(&dn, inode, NULL, NULL, 0);
                 err = get_dnode_of_data(&dn, pgofs, mode);
                 if (err) {
@@ -657,52 +670,53 @@ get_next:
                 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
         }
  
-       if (maxblocks > map->m_len) {
-               block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+       blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
  
-               if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
-                       if (create) {
-                               if (unlikely(f2fs_cp_error(sbi))) {
-                                       err = -EIO;
-                                       goto sync_out;
-                               }
-                               err = __allocate_data_block(&dn);
-                               if (err)
-                                       goto sync_out;
-                               allocated = true;
-                               map->m_flags |= F2FS_MAP_NEW;
-                               blkaddr = dn.data_blkaddr;
-                       } else {
-                               /*
-                                * we only merge preallocated unwritten blocks
-                                * for fiemap.
-                                */
-                               if (flag != F2FS_GET_BLOCK_FIEMAP ||
-                                               blkaddr != NEW_ADDR)
-                                       goto sync_out;
+       if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
+               if (create) {
+                       if (unlikely(f2fs_cp_error(sbi))) {
+                               err = -EIO;
+                               goto sync_out;
                         }
+                       err = __allocate_data_block(&dn);
+                       if (err)
+                               goto sync_out;
+                       allocated = true;
+                       map->m_flags |= F2FS_MAP_NEW;
+                       blkaddr = dn.data_blkaddr;
+               } else {
+                       /*
+                        * we only merge preallocated unwritten blocks
+                        * for fiemap.
+                        */
+                       if (flag != F2FS_GET_BLOCK_FIEMAP ||
+                                       blkaddr != NEW_ADDR)
+                               goto sync_out;
                 }
+       }
  
-               /* Give more consecutive addresses for the readahead */
-               if ((map->m_pblk != NEW_ADDR &&
-                               blkaddr == (map->m_pblk + ofs)) ||
-                               (map->m_pblk == NEW_ADDR &&
-                               blkaddr == NEW_ADDR)) {
-                       ofs++;
-                       dn.ofs_in_node++;
-                       pgofs++;
-                       map->m_len++;
-                       goto get_next;
-               }
+       /* Give more consecutive addresses for the readahead */
+       if ((map->m_pblk != NEW_ADDR &&
+                       blkaddr == (map->m_pblk + ofs)) ||
+                       (map->m_pblk == NEW_ADDR &&
+                       blkaddr == NEW_ADDR)) {
+               ofs++;
+               dn.ofs_in_node++;
+               pgofs++;
+               map->m_len++;
+               goto get_next;
         }
+
  sync_out:
         if (allocated)
                 sync_inode_page(&dn);
  put_out:
         f2fs_put_dnode(&dn);
  unlock_out:
-       if (create)
-               f2fs_unlock_op(F2FS_I_SB(inode));
+       if (create) {
+               f2fs_unlock_op(sbi);
+               f2fs_balance_fs(sbi, dn.node_changed);
+       }
  out:
         trace_f2fs_map_blocks(inode, map, err);
         return err;
@@ -742,6 +756,10 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
  static int get_data_block_bmap(struct inode *inode, sector_t iblock,
                         struct buffer_head *bh_result, int create)
  {
+       /* Block number less than F2FS MAX BLOCKS */
+       if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
+               return -EFBIG;
+
         return __get_data_block(inode, iblock, bh_result, create,
                                                 F2FS_GET_BLOCK_BMAP);
  }
@@ -761,10 +779,9 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  {
         struct buffer_head map_bh;
         sector_t start_blk, last_blk;
-       loff_t isize = i_size_read(inode);
+       loff_t isize;
         u64 logical = 0, phys = 0, size = 0;
         u32 flags = 0;
-       bool past_eof = false, whole_file = false;
         int ret = 0;
  
         ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
@@ -777,18 +794,21 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                         return ret;
         }
  
-       mutex_lock(&inode->i_mutex);
+       inode_lock(inode);
  
-       if (len >= isize) {
-               whole_file = true;
-               len = isize;
-       }
+       isize = i_size_read(inode);
+       if (start >= isize)
+               goto out;
+
+       if (start + len > isize)
+               len = isize - start;
  
         if (logical_to_blk(inode, len) == 0)
                 len = blk_to_logical(inode, 1);
  
         start_blk = logical_to_blk(inode, start);
         last_blk = logical_to_blk(inode, start + len - 1);
+
  next:
         memset(&map_bh, 0, sizeof(struct buffer_head));
         map_bh.b_size = len;
@@ -800,59 +820,37 @@ next:
  
         /* HOLE */
         if (!buffer_mapped(&map_bh)) {
-               start_blk++;
-
-               if (!past_eof && blk_to_logical(inode, start_blk) >= isize)
-                       past_eof = 1;
-
-               if (past_eof && size) {
-                       flags |= FIEMAP_EXTENT_LAST;
-                       ret = fiemap_fill_next_extent(fieinfo, logical,
-                                       phys, size, flags);
-               } else if (size) {
-                       ret = fiemap_fill_next_extent(fieinfo, logical,
-                                       phys, size, flags);
-                       size = 0;
-               }
+               /* Go through holes util pass the EOF */
+               if (blk_to_logical(inode, start_blk++) < isize)
+                       goto prep_next;
+               /* Found a hole beyond isize means no more extents.
+                * Note that the premise is that filesystems don't
+                * punch holes beyond isize and keep size unchanged.
+                */
+               flags |= FIEMAP_EXTENT_LAST;
+       }
  
-               /* if we have holes up to/past EOF then we're done */
-               if (start_blk > last_blk || past_eof || ret)
-                       goto out;
-       } else {
-               if (start_blk > last_blk && !whole_file) {
-                       ret = fiemap_fill_next_extent(fieinfo, logical,
-                                       phys, size, flags);
-                       goto out;
-               }
+       if (size) {
+               if (f2fs_encrypted_inode(inode))
+                       flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
  
-               /*
-                * if size != 0 then we know we already have an extent
-                * to add, so add it.
-                */
-               if (size) {
-                       ret = fiemap_fill_next_extent(fieinfo, logical,
-                                       phys, size, flags);
-                       if (ret)
-                               goto out;
-               }
+               ret = fiemap_fill_next_extent(fieinfo, logical,
+                               phys, size, flags);
+       }
  
-               logical = blk_to_logical(inode, start_blk);
-               phys = blk_to_logical(inode, map_bh.b_blocknr);
-               size = map_bh.b_size;
-               flags = 0;
-               if (buffer_unwritten(&map_bh))
-                       flags = FIEMAP_EXTENT_UNWRITTEN;
+       if (start_blk > last_blk || ret)
+               goto out;
  
-               start_blk += logical_to_blk(inode, size);
+       logical = blk_to_logical(inode, start_blk);
+       phys = blk_to_logical(inode, map_bh.b_blocknr);
+       size = map_bh.b_size;
+       flags = 0;
+       if (buffer_unwritten(&map_bh))
+               flags = FIEMAP_EXTENT_UNWRITTEN;
  
-               /*
-                * If we are past the EOF, then we need to make sure as
-                * soon as we find a hole that the last extent we found
-                * is marked with FIEMAP_EXTENT_LAST
-                */
-               if (!past_eof && logical + size >= isize)
-                       past_eof = true;
-       }
+       start_blk += logical_to_blk(inode, size);
+
+prep_next:
         cond_resched();
         if (fatal_signal_pending(current))
                 ret = -EINTR;
@@ -862,7 +860,7 @@ out:
         if (ret == 1)
                 ret = 0;
  
-       mutex_unlock(&inode->i_mutex);
+       inode_unlock(inode);
         return ret;
  }
  
@@ -1083,6 +1081,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
          */
         if (unlikely(fio->blk_addr != NEW_ADDR &&
                         !is_cold_data(page) &&
+                       !IS_ATOMIC_WRITTEN_PAGE(page) &&
                         need_inplace_update(inode))) {
                 rewrite_data_page(fio);
                 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
@@ -1179,10 +1178,11 @@ out:
         if (err)
                 ClearPageUptodate(page);
         unlock_page(page);
-       if (need_balance_fs)
-               f2fs_balance_fs(sbi);
-       if (wbc->for_reclaim)
+       f2fs_balance_fs(sbi, need_balance_fs);
+       if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
                 f2fs_submit_merged_bio(sbi, DATA, WRITE);
+               remove_dirty_inode(inode);
+       }
         return 0;
  
  redirty_out:
@@ -1354,6 +1354,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,
                         available_free_memory(sbi, DIRTY_DENTS))
                 goto skip_write;
  
+       /* skip writing during file defragment */
+       if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG))
+               goto skip_write;
+
         /* during POR, we don't need to trigger writepage at all. */
         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
                 goto skip_write;
@@ -1369,7 +1373,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
         if (locked)
                 mutex_unlock(&sbi->writepages);
  
-       remove_dirty_dir_inode(inode);
+       remove_dirty_inode(inode);
  
         wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
         return ret;
@@ -1382,13 +1386,85 @@ skip_write:
  static void f2fs_write_failed(struct address_space *mapping, loff_t to)
  {
         struct inode *inode = mapping->host;
+       loff_t i_size = i_size_read(inode);
  
-       if (to > inode->i_size) {
-               truncate_pagecache(inode, inode->i_size);
-               truncate_blocks(inode, inode->i_size, true);
+       if (to > i_size) {
+               truncate_pagecache(inode, i_size);
+               truncate_blocks(inode, i_size, true);
         }
  }
  
+static int prepare_write_begin(struct f2fs_sb_info *sbi,
+                       struct page *page, loff_t pos, unsigned len,
+                       block_t *blk_addr, bool *node_changed)
+{
+       struct inode *inode = page->mapping->host;
+       pgoff_t index = page->index;
+       struct dnode_of_data dn;
+       struct page *ipage;
+       bool locked = false;
+       struct extent_info ei;
+       int err = 0;
+
+       if (f2fs_has_inline_data(inode) ||
+                       (pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
+               f2fs_lock_op(sbi);
+               locked = true;
+       }
+restart:
+       /* check inline_data */
+       ipage = get_node_page(sbi, inode->i_ino);
+       if (IS_ERR(ipage)) {
+               err = PTR_ERR(ipage);
+               goto unlock_out;
+       }
+
+       set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+       if (f2fs_has_inline_data(inode)) {
+               if (pos + len <= MAX_INLINE_DATA) {
+                       read_inline_data(page, ipage);
+                       set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+                       sync_inode_page(&dn);
+               } else {
+                       err = f2fs_convert_inline_page(&dn, page);
+                       if (err)
+                               goto out;
+                       if (dn.data_blkaddr == NULL_ADDR)
+                               err = f2fs_get_block(&dn, index);
+               }
+       } else if (locked) {
+               err = f2fs_get_block(&dn, index);
+       } else {
+               if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+                       dn.data_blkaddr = ei.blk + index - ei.fofs;
+               } else {
+                       bool restart = false;
+
+                       /* hole case */
+                       err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
+                       if (err || (!err && dn.data_blkaddr == NULL_ADDR))
+                               restart = true;
+                       if (restart) {
+                               f2fs_put_dnode(&dn);
+                               f2fs_lock_op(sbi);
+                               locked = true;
+                               goto restart;
+                       }
+               }
+       }
+
+       /* convert_inline_page can make node_changed */
+       *blk_addr = dn.data_blkaddr;
+       *node_changed = dn.node_changed;
+out:
+       f2fs_put_dnode(&dn);
+unlock_out:
+       if (locked)
+               f2fs_unlock_op(sbi);
+       return err;
+}
+
  static int f2fs_write_begin(struct file *file, struct address_space *mapping,
                 loff_t pos, unsigned len, unsigned flags,
                 struct page **pagep, void **fsdata)
@@ -1396,15 +1472,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
         struct inode *inode = mapping->host;
         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
         struct page *page = NULL;
-       struct page *ipage;
         pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
-       struct dnode_of_data dn;
+       bool need_balance = false;
+       block_t blkaddr = NULL_ADDR;
         int err = 0;
  
         trace_f2fs_write_begin(inode, pos, len, flags);
  
-       f2fs_balance_fs(sbi);
-
         /*
          * We should check this at this moment to avoid deadlock on inode page
          * and #0 page. The locking rule for inline_data conversion should be:
@@ -1424,41 +1498,27 @@ repeat:
  
         *pagep = page;
  
-       f2fs_lock_op(sbi);
-
-       /* check inline_data */
-       ipage = get_node_page(sbi, inode->i_ino);
-       if (IS_ERR(ipage)) {
-               err = PTR_ERR(ipage);
-               goto unlock_fail;
-       }
-
-       set_new_dnode(&dn, inode, ipage, ipage, 0);
+       err = prepare_write_begin(sbi, page, pos, len,
+                                       &blkaddr, &need_balance);
+       if (err)
+               goto fail;
  
-       if (f2fs_has_inline_data(inode)) {
-               if (pos + len <= MAX_INLINE_DATA) {
-                       read_inline_data(page, ipage);
-                       set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
-                       sync_inode_page(&dn);
-                       goto put_next;
+       if (need_balance && has_not_enough_free_secs(sbi, 0)) {
+               unlock_page(page);
+               f2fs_balance_fs(sbi, true);
+               lock_page(page);
+               if (page->mapping != mapping) {
+                       /* The page got truncated from under us */
+                       f2fs_put_page(page, 1);
+                       goto repeat;
                 }
-               err = f2fs_convert_inline_page(&dn, page);
-               if (err)
-                       goto put_fail;
         }
  
-       err = f2fs_get_block(&dn, index);
-       if (err)
-               goto put_fail;
-put_next:
-       f2fs_put_dnode(&dn);
-       f2fs_unlock_op(sbi);
-
         f2fs_wait_on_page_writeback(page, DATA);
  
         /* wait for GCed encrypted page writeback */
         if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
-               f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
+               f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
  
         if (len == PAGE_CACHE_SIZE)
                 goto out_update;
@@ -1474,14 +1534,14 @@ put_next:
                 goto out_update;
         }
  
-       if (dn.data_blkaddr == NEW_ADDR) {
+       if (blkaddr == NEW_ADDR) {
                 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
         } else {
                 struct f2fs_io_info fio = {
                         .sbi = sbi,
                         .type = DATA,
                         .rw = READ_SYNC,
-                       .blk_addr = dn.data_blkaddr,
+                       .blk_addr = blkaddr,
                         .page = page,
                         .encrypted_page = NULL,
                 };
@@ -1512,10 +1572,6 @@ out_clear:
         clear_cold_data(page);
         return 0;
  
-put_fail:
-       f2fs_put_dnode(&dn);
-unlock_fail:
-       f2fs_unlock_op(sbi);
  fail:
         f2fs_put_page(page, 1);
         f2fs_write_failed(mapping, pos + len);
@@ -1540,6 +1596,7 @@ static int f2fs_write_end(struct file *file,
         }
  
         f2fs_put_page(page, 1);
+       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
         return copied;
  }
  
@@ -1567,11 +1624,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
         int err;
  
         /* we don't need to use inline_data strictly */
-       if (f2fs_has_inline_data(inode)) {
-               err = f2fs_convert_inline_inode(inode);
-               if (err)
-                       return err;
-       }
+       err = f2fs_convert_inline_inode(inode);
+       if (err)
+               return err;
  
         if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
                 return 0;
@@ -1583,11 +1638,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
         trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
  
         if (iov_iter_rw(iter) == WRITE) {
-               __allocate_data_blocks(inode, offset, count);
-               if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
-                       err = -EIO;
+               err = __allocate_data_blocks(inode, offset, count);
+               if (err)
                         goto out;
-               }
         }
  
         err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);