ext4: let ext4_group_add_blocks() handle 0 blocks quickly

[mirror_ubuntu-bionic-kernel.git] / fs / ext4 / mballoc.c
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c

index 859f2ae8864e6af2b85dc62135f7f89cb0258d4f..b6ef4da39ce3c1b013cacf0e97c247549ba2aafd 100644 (file)
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -75,8 +75,8 @@
   *
   * The inode preallocation space is used looking at the _logical_ start
   * block. If only the logical file block falls within the range of prealloc
- * space we will consume the particular prealloc space. This make sure that
- * that the we have contiguous physical blocks representing the file blocks
+ * space we will consume the particular prealloc space. This makes sure that
+ * we have contiguous physical blocks representing the file blocks
   *
   * The important thing to be noted in case of inode prealloc space is that
   * we don't modify the values associated to inode prealloc space except
@@ -84,7 +84,7 @@
   *
   * If we are not able to find blocks in the inode prealloc space and if we
   * have the group allocation flag set then we look at the locality group
- * prealloc space. These are per CPU prealloc list repreasented as
+ * prealloc space. These are per CPU prealloc list represented as
   *
   * ext4_sb_info.s_locality_groups[smp_processor_id()]
   *
@@ -128,12 +128,13 @@
   * we are doing a group prealloc we try to normalize the request to
   * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
   * 512 blocks. This can be tuned via
- * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in
+ * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
   * terms of number of blocks. If we have mounted the file system with -O
   * stripe=<value> option the group prealloc request is normalized to the
- * stripe value (sbi->s_stripe)
+ * the smallest multiple of the stripe value (sbi->s_stripe) which is
+ * greater than the default mb_group_prealloc.
   *
- * The regular allocator(using the buddy cache) supports few tunables.
+ * The regular allocator (using the buddy cache) supports a few tunables.
   *
   * /sys/fs/ext4/<partition>/mb_min_to_scan
   * /sys/fs/ext4/<partition>/mb_max_to_scan
@@ -152,7 +153,7 @@
   * best extent in the found extents. Searching for the blocks starts with
   * the group specified as the goal value in allocation context via
   * ac_g_ex. Each group is first checked based on the criteria whether it
- * can used for allocation. ext4_mb_good_group explains how the groups are
+ * can be used for allocation. ext4_mb_good_group explains how the groups are
   * checked.
   *
   * Both the prealloc space are getting populated as above. So for the first
@@ -1125,7 +1126,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
         grp = ext4_get_group_info(sb, group);
  
         e4b->bd_blkbits = sb->s_blocksize_bits;
-       e4b->bd_info = ext4_get_group_info(sb, group);
+       e4b->bd_info = grp;
         e4b->bd_sb = sb;
         e4b->bd_group = group;
         e4b->bd_buddy_page = NULL;
@@ -2279,8 +2280,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
  
  exit_group_info:
         /* If a meta_group_info table has been allocated, release it now */
-       if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
+       if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
                 kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
+               sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
+       }
  exit_meta_group_info:
         return -ENOMEM;
  } /* ext4_mb_add_groupinfo */
@@ -2404,14 +2407,14 @@ static int ext4_groupinfo_create_slab(size_t size)
                                         slab_size, 0, SLAB_RECLAIM_ACCOUNT,
                                         NULL);
  
+       ext4_groupinfo_caches[cache_index] = cachep;
+
         mutex_unlock(&ext4_grpinfo_slab_create_mutex);
         if (!cachep) {
                 printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
                 return -ENOMEM;
         }
  
-       ext4_groupinfo_caches[cache_index] = cachep;
-
         return 0;
  }
  
@@ -2472,6 +2475,18 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
         sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
         sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
         sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
+       /*
+        * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
+        * to the lowest multiple of s_stripe which is bigger than
+        * the s_mb_group_prealloc as determined above. We want
+        * the preallocation size to be an exact multiple of the
+        * RAID stripe size so that preallocations don't fragment
+        * the stripes.
+        */
+       if (sbi->s_stripe > 1) {
+               sbi->s_mb_group_prealloc = roundup(
+                       sbi->s_mb_group_prealloc, sbi->s_stripe);
+       }
  
         sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
         if (sbi->s_locality_groups == NULL) {
@@ -2566,7 +2581,7 @@ int ext4_mb_release(struct super_block *sb)
                                 atomic_read(&sbi->s_mb_lost_chunks));
                 printk(KERN_INFO
                        "EXT4-fs: mballoc: %lu generated and it took %Lu\n",
-                               sbi->s_mb_buddies_generated++,
+                               sbi->s_mb_buddies_generated,
                                 sbi->s_mb_generation_time);
                 printk(KERN_INFO
                        "EXT4-fs: mballoc: %u preallocated, %u discarded\n",
@@ -2628,6 +2643,15 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                 rb_erase(&entry->node, &(db->bb_free_root));
                 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
  
+               /*
+                * Clear the trimmed flag for the group so that the next
+                * ext4_trim_fs can trim it.
+                * If the volume is mounted with -o discard, online discard
+                * is supported and the free blocks will be trimmed online.
+                */
+               if (!test_opt(sb, DISCARD))
+                       EXT4_MB_GRP_CLEAR_TRIMMED(db);
+
                 if (!db->bb_free_root.rb_node) {
                         /* No more items in the per group rb tree
                          * balance refcounts from ext4_mb_free_metadata()
@@ -2830,8 +2854,9 @@ out_err:
  
  /*
   * here we normalize request for locality group
- * Group request are normalized to s_strip size if we set the same via mount
- * option. If not we set it to s_mb_group_prealloc which can be configured via
+ * Group request are normalized to s_mb_group_prealloc, which goes to
+ * s_strip if we set the same via mount option.
+ * s_mb_group_prealloc can be configured via
   * /sys/fs/ext4/<partition>/mb_group_prealloc
   *
   * XXX: should we try to preallocate more than the group has now?
@@ -2842,10 +2867,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
         struct ext4_locality_group *lg = ac->ac_lg;
  
         BUG_ON(lg == NULL);
-       if (EXT4_SB(sb)->s_stripe)
-               ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
-       else
-               ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
+       ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
         mb_debug(1, "#%u: goal %u blocks for locality group\n",
                 current->pid, ac->ac_g_ex.fe_len);
  }
@@ -3578,8 +3600,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
                 free += next - bit;
  
                 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
-               trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
-                                              grp_blk_start + bit, next - bit);
+               trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit,
+                                              next - bit);
                 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
                 bit = next + 1;
         }
@@ -3608,7 +3630,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
         ext4_group_t group;
         ext4_grpblk_t bit;
  
-       trace_ext4_mb_release_group_pa(sb, pa);
+       trace_ext4_mb_release_group_pa(pa);
         BUG_ON(pa->pa_deleted == 0);
         ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
         BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -4448,7 +4470,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
   * @inode:             inode
   * @block:             start physical block to free
   * @count:             number of blocks to count
- * @metadata:          Are these metadata blocks
+ * @flags:             flags used by ext4_free_blocks
   */
  void ext4_free_blocks(handle_t *handle, struct inode *inode,
                       struct buffer_head *bh, ext4_fsblk_t block,
@@ -4637,7 +4659,7 @@ do_more:
         }
         ext4_mark_super_dirty(sb);
  error_return:
-       if (freed)
+       if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
                 dquot_free_block(inode, freed);
         brelse(bitmap_bh);
         ext4_std_error(sb, err);
@@ -4645,7 +4667,7 @@ error_return:
  }
  
  /**
- * ext4_add_groupblocks() -- Add given blocks to an existing group
+ * ext4_group_add_blocks() -- Add given blocks to an existing group
   * @handle:                    handle to this transaction
   * @sb:                                super block
   * @block:                     start physcial block to add to the block group
@@ -4653,7 +4675,7 @@ error_return:
   *
   * This marks the blocks as free in the bitmap and buddy.
   */
-void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
+int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
                          ext4_fsblk_t block, unsigned long count)
  {
         struct buffer_head *bitmap_bh = NULL;
@@ -4666,25 +4688,35 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
         struct ext4_buddy e4b;
         int err = 0, ret, blk_free_count;
         ext4_grpblk_t blocks_freed;
-       struct ext4_group_info *grp;
  
         ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
  
+       if (count == 0)
+               return 0;
+
         ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
-       grp = ext4_get_group_info(sb, block_group);
         /*
          * Check to see if we are freeing blocks across a group
          * boundary.
          */
-       if (bit + count > EXT4_BLOCKS_PER_GROUP(sb))
+       if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
+               ext4_warning(sb, "too much blocks added to group %u\n",
+                            block_group);
+               err = -EINVAL;
                 goto error_return;
+       }
  
         bitmap_bh = ext4_read_block_bitmap(sb, block_group);
-       if (!bitmap_bh)
+       if (!bitmap_bh) {
+               err = -EIO;
                 goto error_return;
+       }
+
         desc = ext4_get_group_desc(sb, block_group, &gd_bh);
-       if (!desc)
+       if (!desc) {
+               err = -EIO;
                 goto error_return;
+       }
  
         if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
             in_range(ext4_inode_bitmap(sb, desc), block, count) ||
@@ -4694,6 +4726,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
                 ext4_error(sb, "Adding blocks in system zones - "
                            "Block = %llu, count = %lu",
                            block, count);
+               err = -EINVAL;
                 goto error_return;
         }
  
@@ -4762,7 +4795,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
  error_return:
         brelse(bitmap_bh);
         ext4_std_error(sb, err);
-       return;
+       return err;
  }
  
  /**
@@ -4782,6 +4815,8 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count,
  {
         struct ext4_free_extent ex;
  
+       trace_ext4_trim_extent(sb, group, start, count);
+
         assert_spin_locked(ext4_group_lock_ptr(sb, group));
  
         ex.fe_start = start;
@@ -4802,7 +4837,7 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count,
  /**
   * ext4_trim_all_free -- function to trim all free space in alloc. group
   * @sb:                        super block for file system
- * @e4b:               ext4 buddy
+ * @group:             group to be trimmed
   * @start:             first group block to examine
   * @max:               last group block to examine
   * @minblocks:         minimum extent block count
@@ -4823,10 +4858,12 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
                    ext4_grpblk_t minblocks)
  {
         void *bitmap;
-       ext4_grpblk_t next, count = 0;
+       ext4_grpblk_t next, count = 0, free_count = 0;
         struct ext4_buddy e4b;
         int ret;
  
+       trace_ext4_trim_all_free(sb, group, start, max);
+
         ret = ext4_mb_load_buddy(sb, group, &e4b);
         if (ret) {
                 ext4_error(sb, "Error in loading buddy "
@@ -4836,6 +4873,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
         bitmap = e4b.bd_bitmap;
  
         ext4_lock_group(sb, group);
+       if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
+           minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
+               goto out;
+
         start = (e4b.bd_info->bb_first_free > start) ?
                 e4b.bd_info->bb_first_free : start;
  
@@ -4850,6 +4891,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
                                          next - start, group, &e4b);
                         count += next - start;
                 }
+               free_count += next - start;
                 start = next + 1;
  
                 if (fatal_signal_pending(current)) {
@@ -4863,9 +4905,13 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
                         ext4_lock_group(sb, group);
                 }
  
-               if ((e4b.bd_info->bb_free - count) < minblocks)
+               if ((e4b.bd_info->bb_free - free_count) < minblocks)
                         break;
         }
+
+       if (!ret)
+               EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+out:
         ext4_unlock_group(sb, group);
         ext4_mb_unload_buddy(&e4b);
  
@@ -4904,6 +4950,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
  
         if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
                 return -EINVAL;
+       if (start + len <= first_data_blk)
+               goto out;
         if (start < first_data_blk) {
                 len -= first_data_blk - start;
                 start = first_data_blk;
@@ -4952,5 +5000,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
         }
         range->len = trimmed * sb->s_blocksize;
  
+       if (!ret)
+               atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
+
+out:
         return ret;
  }