fs/btrfs/discard.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 #include <linux/jiffies.h>
   4 #include <linux/kernel.h>
   5 #include <linux/ktime.h>
   6 #include <linux/list.h>
   7 #include <linux/math64.h>
   8 #include <linux/sizes.h>
   9 #include <linux/workqueue.h>
  10 #include "ctree.h"
  11 #include "block-group.h"
  12 #include "discard.h"
  13 #include "free-space-cache.h"
  14
  15 /*
  16  * This contains the logic to handle async discard.
  17  *
  18  * Async discard manages trimming of free space outside of transaction commit.
  19  * Discarding is done by managing the block_groups on a LRU list based on free
  20  * space recency.  Two passes are used to first prioritize discarding extents
  21  * and then allow for trimming in the bitmap the best opportunity to coalesce.
  22  * The block_groups are maintained on multiple lists to allow for multiple
  23  * passes with different discard filter requirements.  A delayed work item is
  24  * used to manage discarding with timeout determined by a max of the delay
  25  * incurred by the iops rate limit, the byte rate limit, and the max delay of
  26  * BTRFS_DISCARD_MAX_DELAY.
  27  *
  28  * Note, this only keeps track of block_groups that are explicitly for data.
  29  * Mixed block_groups are not supported.
  30  *
  31  * The first list is special to manage discarding of fully free block groups.
  32  * This is necessary because we issue a final trim for a full free block group
  33  * after forgetting it.  When a block group becomes unused, instead of directly
  34  * being added to the unused_bgs list, we add it to this first list.  Then
  35  * from there, if it becomes fully discarded, we place it onto the unused_bgs
  36  * list.
  37  *
  38  * The in-memory free space cache serves as the backing state for discard.
  39  * Consequently this means there is no persistence.  We opt to load all the
  40  * block groups in as not discarded, so the mount case degenerates to the
  41  * crashing case.
  42  *
  43  * As the free space cache uses bitmaps, there exists a tradeoff between
  44  * ease/efficiency for find_free_extent() and the accuracy of discard state.
  45  * Here we opt to let untrimmed regions merge with everything while only letting
  46  * trimmed regions merge with other trimmed regions.  This can cause
  47  * overtrimming, but the coalescing benefit seems to be worth it.  Additionally,
  48  * bitmap state is tracked as a whole.  If we're able to fully trim a bitmap,
  49  * the trimmed flag is set on the bitmap.  Otherwise, if an allocation comes in,
  50  * this resets the state and we will retry trimming the whole bitmap.  This is a
  51  * tradeoff between discard state accuracy and the cost of accounting.
  52  */
  53
  54 /* This is an initial delay to give some chance for block reuse */
  55 #define BTRFS_DISCARD_DELAY             (120ULL * NSEC_PER_SEC)
  56 #define BTRFS_DISCARD_UNUSED_DELAY      (10ULL * NSEC_PER_SEC)
  57
  58 /* Target completion latency of discarding all discardable extents */
  59 #define BTRFS_DISCARD_TARGET_MSEC       (6 * 60 * 60UL * MSEC_PER_SEC)
  60 #define BTRFS_DISCARD_MIN_DELAY_MSEC    (1UL)
  61 #define BTRFS_DISCARD_MAX_DELAY_MSEC    (1000UL)
  62 #define BTRFS_DISCARD_MAX_IOPS          (10U)
  63
  64 /* Montonically decreasing minimum length filters after index 0 */
  65 static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
  66         0,
  67         BTRFS_ASYNC_DISCARD_MAX_FILTER,
  68         BTRFS_ASYNC_DISCARD_MIN_FILTER
  69 };
  70
  71 static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
  72                                           struct btrfs_block_group *block_group)
  73 {
  74         return &discard_ctl->discard_list[block_group->discard_index];
  75 }
  76
  77 static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
  78                                   struct btrfs_block_group *block_group)
  79 {
  80         if (!btrfs_run_discard_work(discard_ctl))
  81                 return;
  82
  83         if (list_empty(&block_group->discard_list) ||
  84             block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
  85                 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
  86                         block_group->discard_index = BTRFS_DISCARD_INDEX_START;
  87                 block_group->discard_eligible_time = (ktime_get_ns() +
  88                                                       BTRFS_DISCARD_DELAY);
  89                 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
  90         }
  91
  92         list_move_tail(&block_group->discard_list,
  93                        get_discard_list(discard_ctl, block_group));
  94 }
  95
  96 static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
  97                                 struct btrfs_block_group *block_group)
  98 {
  99         if (!btrfs_is_block_group_data_only(block_group))
 100                 return;
 101
 102         spin_lock(&discard_ctl->lock);
 103         __add_to_discard_list(discard_ctl, block_group);
 104         spin_unlock(&discard_ctl->lock);
 105 }
 106
 107 static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
 108                                        struct btrfs_block_group *block_group)
 109 {
 110         spin_lock(&discard_ctl->lock);
 111
 112         if (!btrfs_run_discard_work(discard_ctl)) {
 113                 spin_unlock(&discard_ctl->lock);
 114                 return;
 115         }
 116
 117         list_del_init(&block_group->discard_list);
 118
 119         block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
 120         block_group->discard_eligible_time = (ktime_get_ns() +
 121                                               BTRFS_DISCARD_UNUSED_DELAY);
 122         block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
 123         list_add_tail(&block_group->discard_list,
 124                       &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
 125
 126         spin_unlock(&discard_ctl->lock);
 127 }
 128
 129 static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
 130                                      struct btrfs_block_group *block_group)
 131 {
 132         bool running = false;
 133
 134         spin_lock(&discard_ctl->lock);
 135
 136         if (block_group == discard_ctl->block_group) {
 137                 running = true;
 138                 discard_ctl->block_group = NULL;
 139         }
 140
 141         block_group->discard_eligible_time = 0;
 142         list_del_init(&block_group->discard_list);
 143
 144         spin_unlock(&discard_ctl->lock);
 145
 146         return running;
 147 }
 148
 149 /**
 150  * find_next_block_group - find block_group that's up next for discarding
 151  * @discard_ctl: discard control
 152  * @now: current time
 153  *
 154  * Iterate over the discard lists to find the next block_group up for
 155  * discarding checking the discard_eligible_time of block_group.
 156  */
 157 static struct btrfs_block_group *find_next_block_group(
 158                                         struct btrfs_discard_ctl *discard_ctl,
 159                                         u64 now)
 160 {
 161         struct btrfs_block_group *ret_block_group = NULL, *block_group;
 162         int i;
 163
 164         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
 165                 struct list_head *discard_list = &discard_ctl->discard_list[i];
 166
 167                 if (!list_empty(discard_list)) {
 168                         block_group = list_first_entry(discard_list,
 169                                                        struct btrfs_block_group,
 170                                                        discard_list);
 171
 172                         if (!ret_block_group)
 173                                 ret_block_group = block_group;
 174
 175                         if (ret_block_group->discard_eligible_time < now)
 176                                 break;
 177
 178                         if (ret_block_group->discard_eligible_time >
 179                             block_group->discard_eligible_time)
 180                                 ret_block_group = block_group;
 181                 }
 182         }
 183
 184         return ret_block_group;
 185 }
 186
 187 /**
 188  * peek_discard_list - wrap find_next_block_group()
 189  * @discard_ctl: discard control
 190  * @discard_state: the discard_state of the block_group after state management
 191  * @discard_index: the discard_index of the block_group after state management
 192  *
 193  * This wraps find_next_block_group() and sets the block_group to be in use.
 194  * discard_state's control flow is managed here.  Variables related to
 195  * discard_state are reset here as needed (eg discard_cursor).  @discard_state
 196  * and @discard_index are remembered as it may change while we're discarding,
 197  * but we want the discard to execute in the context determined here.
 198  */
 199 static struct btrfs_block_group *peek_discard_list(
 200                                         struct btrfs_discard_ctl *discard_ctl,
 201                                         enum btrfs_discard_state *discard_state,
 202                                         int *discard_index, u64 now)
 203 {
 204         struct btrfs_block_group *block_group;
 205
 206         spin_lock(&discard_ctl->lock);
 207 again:
 208         block_group = find_next_block_group(discard_ctl, now);
 209
 210         if (block_group && now >= block_group->discard_eligible_time) {
 211                 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
 212                     block_group->used != 0) {
 213                         if (btrfs_is_block_group_data_only(block_group))
 214                                 __add_to_discard_list(discard_ctl, block_group);
 215                         else
 216                                 list_del_init(&block_group->discard_list);
 217                         goto again;
 218                 }
 219                 if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
 220                         block_group->discard_cursor = block_group->start;
 221                         block_group->discard_state = BTRFS_DISCARD_EXTENTS;
 222                 }
 223                 discard_ctl->block_group = block_group;
 224         }
 225         if (block_group) {
 226                 *discard_state = block_group->discard_state;
 227                 *discard_index = block_group->discard_index;
 228         }
 229         spin_unlock(&discard_ctl->lock);
 230
 231         return block_group;
 232 }
 233
 234 /**
 235  * btrfs_discard_check_filter - updates a block groups filters
 236  * @block_group: block group of interest
 237  * @bytes: recently freed region size after coalescing
 238  *
 239  * Async discard maintains multiple lists with progressively smaller filters
 240  * to prioritize discarding based on size.  Should a free space that matches
 241  * a larger filter be returned to the free_space_cache, prioritize that discard
 242  * by moving @block_group to the proper filter.
 243  */
 244 void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
 245                                 u64 bytes)
 246 {
 247         struct btrfs_discard_ctl *discard_ctl;
 248
 249         if (!block_group ||
 250             !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
 251                 return;
 252
 253         discard_ctl = &block_group->fs_info->discard_ctl;
 254
 255         if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
 256             bytes >= discard_minlen[block_group->discard_index - 1]) {
 257                 int i;
 258
 259                 remove_from_discard_list(discard_ctl, block_group);
 260
 261                 for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
 262                      i++) {
 263                         if (bytes >= discard_minlen[i]) {
 264                                 block_group->discard_index = i;
 265                                 add_to_discard_list(discard_ctl, block_group);
 266                                 break;
 267                         }
 268                 }
 269         }
 270 }
 271
 272 /**
 273  * btrfs_update_discard_index - moves a block group along the discard lists
 274  * @discard_ctl: discard control
 275  * @block_group: block_group of interest
 276  *
 277  * Increment @block_group's discard_index.  If it falls of the list, let it be.
 278  * Otherwise add it back to the appropriate list.
 279  */
 280 static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
 281                                        struct btrfs_block_group *block_group)
 282 {
 283         block_group->discard_index++;
 284         if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
 285                 block_group->discard_index = 1;
 286                 return;
 287         }
 288
 289         add_to_discard_list(discard_ctl, block_group);
 290 }
 291
 292 /**
 293  * btrfs_discard_cancel_work - remove a block_group from the discard lists
 294  * @discard_ctl: discard control
 295  * @block_group: block_group of interest
 296  *
 297  * This removes @block_group from the discard lists.  If necessary, it waits on
 298  * the current work and then reschedules the delayed work.
 299  */
 300 void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
 301                                struct btrfs_block_group *block_group)
 302 {
 303         if (remove_from_discard_list(discard_ctl, block_group)) {
 304                 cancel_delayed_work_sync(&discard_ctl->work);
 305                 btrfs_discard_schedule_work(discard_ctl, true);
 306         }
 307 }
 308
 309 /**
 310  * btrfs_discard_queue_work - handles queuing the block_groups
 311  * @discard_ctl: discard control
 312  * @block_group: block_group of interest
 313  *
 314  * This maintains the LRU order of the discard lists.
 315  */
 316 void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
 317                               struct btrfs_block_group *block_group)
 318 {
 319         if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
 320                 return;
 321
 322         if (block_group->used == 0)
 323                 add_to_discard_unused_list(discard_ctl, block_group);
 324         else
 325                 add_to_discard_list(discard_ctl, block_group);
 326
 327         if (!delayed_work_pending(&discard_ctl->work))
 328                 btrfs_discard_schedule_work(discard_ctl, false);
 329 }
 330
 331 static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
 332                                           u64 now, bool override)
 333 {
 334         struct btrfs_block_group *block_group;
 335
 336         if (!btrfs_run_discard_work(discard_ctl))
 337                 return;
 338         if (!override && delayed_work_pending(&discard_ctl->work))
 339                 return;
 340
 341         block_group = find_next_block_group(discard_ctl, now);
 342         if (block_group) {
 343                 u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
 344                 u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
 345
 346                 /*
 347                  * A single delayed workqueue item is responsible for
 348                  * discarding, so we can manage the bytes rate limit by keeping
 349                  * track of the previous discard.
 350                  */
 351                 if (kbps_limit && discard_ctl->prev_discard) {
 352                         u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
 353                         u64 bps_delay = div64_u64(discard_ctl->prev_discard *
 354                                                   NSEC_PER_SEC, bps_limit);
 355
 356                         delay = max(delay, bps_delay);
 357                 }
 358
 359                 /*
 360                  * This timeout is to hopefully prevent immediate discarding
 361                  * in a recently allocated block group.
 362                  */
 363                 if (now < block_group->discard_eligible_time) {
 364                         u64 bg_timeout = block_group->discard_eligible_time - now;
 365
 366                         delay = max(delay, bg_timeout);
 367                 }
 368
 369                 if (override && discard_ctl->prev_discard) {
 370                         u64 elapsed = now - discard_ctl->prev_discard_time;
 371
 372                         if (delay > elapsed)
 373                                 delay -= elapsed;
 374                         else
 375                                 delay = 0;
 376                 }
 377
 378                 mod_delayed_work(discard_ctl->discard_workers,
 379                                  &discard_ctl->work, nsecs_to_jiffies(delay));
 380         }
 381 }
 382
 383 /*
 384  * btrfs_discard_schedule_work - responsible for scheduling the discard work
 385  * @discard_ctl:  discard control
 386  * @override:     override the current timer
 387  *
 388  * Discards are issued by a delayed workqueue item.  @override is used to
 389  * update the current delay as the baseline delay interval is reevaluated on
 390  * transaction commit.  This is also maxed with any other rate limit.
 391  */
 392 void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
 393                                  bool override)
 394 {
 395         const u64 now = ktime_get_ns();
 396
 397         spin_lock(&discard_ctl->lock);
 398         __btrfs_discard_schedule_work(discard_ctl, now, override);
 399         spin_unlock(&discard_ctl->lock);
 400 }
 401
 402 /**
 403  * btrfs_finish_discard_pass - determine next step of a block_group
 404  * @discard_ctl: discard control
 405  * @block_group: block_group of interest
 406  *
 407  * This determines the next step for a block group after it's finished going
 408  * through a pass on a discard list.  If it is unused and fully trimmed, we can
 409  * mark it unused and send it to the unused_bgs path.  Otherwise, pass it onto
 410  * the appropriate filter list or let it fall off.
 411  */
 412 static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
 413                                       struct btrfs_block_group *block_group)
 414 {
 415         remove_from_discard_list(discard_ctl, block_group);
 416
 417         if (block_group->used == 0) {
 418                 if (btrfs_is_free_space_trimmed(block_group))
 419                         btrfs_mark_bg_unused(block_group);
 420                 else
 421                         add_to_discard_unused_list(discard_ctl, block_group);
 422         } else {
 423                 btrfs_update_discard_index(discard_ctl, block_group);
 424         }
 425 }
 426
 427 /**
 428  * btrfs_discard_workfn - discard work function
 429  * @work: work
 430  *
 431  * This finds the next block_group to start discarding and then discards a
 432  * single region.  It does this in a two-pass fashion: first extents and second
 433  * bitmaps.  Completely discarded block groups are sent to the unused_bgs path.
 434  */
 435 static void btrfs_discard_workfn(struct work_struct *work)
 436 {
 437         struct btrfs_discard_ctl *discard_ctl;
 438         struct btrfs_block_group *block_group;
 439         enum btrfs_discard_state discard_state;
 440         int discard_index = 0;
 441         u64 trimmed = 0;
 442         u64 minlen = 0;
 443         u64 now = ktime_get_ns();
 444
 445         discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
 446
 447         block_group = peek_discard_list(discard_ctl, &discard_state,
 448                                         &discard_index, now);
 449         if (!block_group || !btrfs_run_discard_work(discard_ctl))
 450                 return;
 451         if (now < block_group->discard_eligible_time) {
 452                 btrfs_discard_schedule_work(discard_ctl, false);
 453                 return;
 454         }
 455
 456         /* Perform discarding */
 457         minlen = discard_minlen[discard_index];
 458
 459         if (discard_state == BTRFS_DISCARD_BITMAPS) {
 460                 u64 maxlen = 0;
 461
 462                 /*
 463                  * Use the previous levels minimum discard length as the max
 464                  * length filter.  In the case something is added to make a
 465                  * region go beyond the max filter, the entire bitmap is set
 466                  * back to BTRFS_TRIM_STATE_UNTRIMMED.
 467                  */
 468                 if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
 469                         maxlen = discard_minlen[discard_index - 1];
 470
 471                 btrfs_trim_block_group_bitmaps(block_group, &trimmed,
 472                                        block_group->discard_cursor,
 473                                        btrfs_block_group_end(block_group),
 474                                        minlen, maxlen, true);
 475                 discard_ctl->discard_bitmap_bytes += trimmed;
 476         } else {
 477                 btrfs_trim_block_group_extents(block_group, &trimmed,
 478                                        block_group->discard_cursor,
 479                                        btrfs_block_group_end(block_group),
 480                                        minlen, true);
 481                 discard_ctl->discard_extent_bytes += trimmed;
 482         }
 483
 484         /* Determine next steps for a block_group */
 485         if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
 486                 if (discard_state == BTRFS_DISCARD_BITMAPS) {
 487                         btrfs_finish_discard_pass(discard_ctl, block_group);
 488                 } else {
 489                         block_group->discard_cursor = block_group->start;
 490                         spin_lock(&discard_ctl->lock);
 491                         if (block_group->discard_state !=
 492                             BTRFS_DISCARD_RESET_CURSOR)
 493                                 block_group->discard_state =
 494                                                         BTRFS_DISCARD_BITMAPS;
 495                         spin_unlock(&discard_ctl->lock);
 496                 }
 497         }
 498
 499         now = ktime_get_ns();
 500         spin_lock(&discard_ctl->lock);
 501         discard_ctl->prev_discard = trimmed;
 502         discard_ctl->prev_discard_time = now;
 503         discard_ctl->block_group = NULL;
 504         __btrfs_discard_schedule_work(discard_ctl, now, false);
 505         spin_unlock(&discard_ctl->lock);
 506 }
 507
 508 /**
 509  * btrfs_run_discard_work - determines if async discard should be running
 510  * @discard_ctl: discard control
 511  *
 512  * Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
 513  */
 514 bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
 515 {
 516         struct btrfs_fs_info *fs_info = container_of(discard_ctl,
 517                                                      struct btrfs_fs_info,
 518                                                      discard_ctl);
 519
 520         return (!(fs_info->sb->s_flags & SB_RDONLY) &&
 521                 test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
 522 }
 523
 524 /**
 525  * btrfs_discard_calc_delay - recalculate the base delay
 526  * @discard_ctl: discard control
 527  *
 528  * Recalculate the base delay which is based off the total number of
 529  * discardable_extents.  Clamp this between the lower_limit (iops_limit or 1ms)
 530  * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
 531  */
 532 void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
 533 {
 534         s32 discardable_extents;
 535         s64 discardable_bytes;
 536         u32 iops_limit;
 537         unsigned long delay;
 538
 539         discardable_extents = atomic_read(&discard_ctl->discardable_extents);
 540         if (!discardable_extents)
 541                 return;
 542
 543         spin_lock(&discard_ctl->lock);
 544
 545         /*
 546          * The following is to fix a potential -1 discrepenancy that we're not
 547          * sure how to reproduce. But given that this is the only place that
 548          * utilizes these numbers and this is only called by from
 549          * btrfs_finish_extent_commit() which is synchronized, we can correct
 550          * here.
 551          */
 552         if (discardable_extents < 0)
 553                 atomic_add(-discardable_extents,
 554                            &discard_ctl->discardable_extents);
 555
 556         discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
 557         if (discardable_bytes < 0)
 558                 atomic64_add(-discardable_bytes,
 559                              &discard_ctl->discardable_bytes);
 560
 561         if (discardable_extents <= 0) {
 562                 spin_unlock(&discard_ctl->lock);
 563                 return;
 564         }
 565
 566         iops_limit = READ_ONCE(discard_ctl->iops_limit);
 567         if (iops_limit)
 568                 delay = MSEC_PER_SEC / iops_limit;
 569         else
 570                 delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
 571
 572         delay = clamp(delay, BTRFS_DISCARD_MIN_DELAY_MSEC,
 573                       BTRFS_DISCARD_MAX_DELAY_MSEC);
 574         discard_ctl->delay_ms = delay;
 575
 576         spin_unlock(&discard_ctl->lock);
 577 }
 578
 579 /**
 580  * btrfs_discard_update_discardable - propagate discard counters
 581  * @block_group: block_group of interest
 582  *
 583  * This propagates deltas of counters up to the discard_ctl.  It maintains a
 584  * current counter and a previous counter passing the delta up to the global
 585  * stat.  Then the current counter value becomes the previous counter value.
 586  */
 587 void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
 588 {
 589         struct btrfs_free_space_ctl *ctl;
 590         struct btrfs_discard_ctl *discard_ctl;
 591         s32 extents_delta;
 592         s64 bytes_delta;
 593
 594         if (!block_group ||
 595             !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
 596             !btrfs_is_block_group_data_only(block_group))
 597                 return;
 598
 599         ctl = block_group->free_space_ctl;
 600         discard_ctl = &block_group->fs_info->discard_ctl;
 601
 602         lockdep_assert_held(&ctl->tree_lock);
 603         extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
 604                         ctl->discardable_extents[BTRFS_STAT_PREV];
 605         if (extents_delta) {
 606                 atomic_add(extents_delta, &discard_ctl->discardable_extents);
 607                 ctl->discardable_extents[BTRFS_STAT_PREV] =
 608                         ctl->discardable_extents[BTRFS_STAT_CURR];
 609         }
 610
 611         bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
 612                       ctl->discardable_bytes[BTRFS_STAT_PREV];
 613         if (bytes_delta) {
 614                 atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
 615                 ctl->discardable_bytes[BTRFS_STAT_PREV] =
 616                         ctl->discardable_bytes[BTRFS_STAT_CURR];
 617         }
 618 }
 619
 620 /**
 621  * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists
 622  * @fs_info: fs_info of interest
 623  *
 624  * The unused_bgs list needs to be punted to the discard lists because the
 625  * order of operations is changed.  In the normal sychronous discard path, the
 626  * block groups are trimmed via a single large trim in transaction commit.  This
 627  * is ultimately what we are trying to avoid with asynchronous discard.  Thus,
 628  * it must be done before going down the unused_bgs path.
 629  */
 630 void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
 631 {
 632         struct btrfs_block_group *block_group, *next;
 633
 634         spin_lock(&fs_info->unused_bgs_lock);
 635         /* We enabled async discard, so punt all to the queue */
 636         list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
 637                                  bg_list) {
 638                 list_del_init(&block_group->bg_list);
 639                 btrfs_put_block_group(block_group);
 640                 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
 641         }
 642         spin_unlock(&fs_info->unused_bgs_lock);
 643 }
 644
 645 /**
 646  * btrfs_discard_purge_list - purge discard lists
 647  * @discard_ctl: discard control
 648  *
 649  * If we are disabling async discard, we may have intercepted block groups that
 650  * are completely free and ready for the unused_bgs path.  As discarding will
 651  * now happen in transaction commit or not at all, we can safely mark the
 652  * corresponding block groups as unused and they will be sent on their merry
 653  * way to the unused_bgs list.
 654  */
 655 static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
 656 {
 657         struct btrfs_block_group *block_group, *next;
 658         int i;
 659
 660         spin_lock(&discard_ctl->lock);
 661         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
 662                 list_for_each_entry_safe(block_group, next,
 663                                          &discard_ctl->discard_list[i],
 664                                          discard_list) {
 665                         list_del_init(&block_group->discard_list);
 666                         spin_unlock(&discard_ctl->lock);
 667                         if (block_group->used == 0)
 668                                 btrfs_mark_bg_unused(block_group);
 669                         spin_lock(&discard_ctl->lock);
 670                 }
 671         }
 672         spin_unlock(&discard_ctl->lock);
 673 }
 674
 675 void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
 676 {
 677         if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
 678                 btrfs_discard_cleanup(fs_info);
 679                 return;
 680         }
 681
 682         btrfs_discard_punt_unused_bgs_list(fs_info);
 683
 684         set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
 685 }
 686
 687 void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
 688 {
 689         clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
 690 }
 691
 692 void btrfs_discard_init(struct btrfs_fs_info *fs_info)
 693 {
 694         struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
 695         int i;
 696
 697         spin_lock_init(&discard_ctl->lock);
 698         INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
 699
 700         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
 701                 INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
 702
 703         discard_ctl->prev_discard = 0;
 704         discard_ctl->prev_discard_time = 0;
 705         atomic_set(&discard_ctl->discardable_extents, 0);
 706         atomic64_set(&discard_ctl->discardable_bytes, 0);
 707         discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
 708         discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
 709         discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
 710         discard_ctl->kbps_limit = 0;
 711         discard_ctl->discard_extent_bytes = 0;
 712         discard_ctl->discard_bitmap_bytes = 0;
 713         atomic64_set(&discard_ctl->discard_bytes_saved, 0);
 714 }
 715
 716 void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
 717 {
 718         btrfs_discard_stop(fs_info);
 719         cancel_delayed_work_sync(&fs_info->discard_ctl.work);
 720         btrfs_discard_purge_list(&fs_info->discard_ctl);
 721 }