fs/btrfs/transaction.c

   1 /*
   2  * Copyright (C) 2007 Oracle.  All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License v2 as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11  * General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public
  14  * License along with this program; if not, write to the
  15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16  * Boston, MA 021110-1307, USA.
  17  */
  18
  19 #include <linux/fs.h>
  20 #include <linux/slab.h>
  21 #include <linux/sched.h>
  22 #include <linux/writeback.h>
  23 #include <linux/pagemap.h>
  24 #include <linux/blkdev.h>
  25 #include "ctree.h"
  26 #include "disk-io.h"
  27 #include "transaction.h"
  28 #include "locking.h"
  29 #include "tree-log.h"
  30
  31 #define BTRFS_ROOT_TRANS_TAG 0
  32
  33 static noinline void put_transaction(struct btrfs_transaction *transaction)
  34 {
  35         WARN_ON(atomic_read(&transaction->use_count) == 0);
  36         if (atomic_dec_and_test(&transaction->use_count)) {
  37                 BUG_ON(!list_empty(&transaction->list));
  38                 memset(transaction, 0, sizeof(*transaction));
  39                 kmem_cache_free(btrfs_transaction_cachep, transaction);
  40         }
  41 }
  42
  43 static noinline void switch_commit_root(struct btrfs_root *root)
  44 {
  45         free_extent_buffer(root->commit_root);
  46         root->commit_root = btrfs_root_node(root);
  47 }
  48
  49 /*
  50  * either allocate a new transaction or hop into the existing one
  51  */
  52 static noinline int join_transaction(struct btrfs_root *root, int nofail)
  53 {
  54         struct btrfs_transaction *cur_trans;
  55
  56         spin_lock(&root->fs_info->trans_lock);
  57         if (root->fs_info->trans_no_join) {
  58                 if (!nofail) {
  59                         spin_unlock(&root->fs_info->trans_lock);
  60                         return -EBUSY;
  61                 }
  62         }
  63
  64         cur_trans = root->fs_info->running_transaction;
  65         if (cur_trans) {
  66                 atomic_inc(&cur_trans->use_count);
  67                 atomic_inc(&cur_trans->num_writers);
  68                 cur_trans->num_joined++;
  69                 spin_unlock(&root->fs_info->trans_lock);
  70                 return 0;
  71         }
  72         spin_unlock(&root->fs_info->trans_lock);
  73
  74         cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
  75         if (!cur_trans)
  76                 return -ENOMEM;
  77         spin_lock(&root->fs_info->trans_lock);
  78         if (root->fs_info->running_transaction) {
  79                 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
  80                 cur_trans = root->fs_info->running_transaction;
  81                 atomic_inc(&cur_trans->use_count);
  82                 atomic_inc(&cur_trans->num_writers);
  83                 cur_trans->num_joined++;
  84                 spin_unlock(&root->fs_info->trans_lock);
  85                 return 0;
  86         }
  87         atomic_set(&cur_trans->num_writers, 1);
  88         cur_trans->num_joined = 0;
  89         init_waitqueue_head(&cur_trans->writer_wait);
  90         init_waitqueue_head(&cur_trans->commit_wait);
  91         cur_trans->in_commit = 0;
  92         cur_trans->blocked = 0;
  93         /*
  94          * One for this trans handle, one so it will live on until we
  95          * commit the transaction.
  96          */
  97         atomic_set(&cur_trans->use_count, 2);
  98         cur_trans->commit_done = 0;
  99         cur_trans->start_time = get_seconds();
 100
 101         cur_trans->delayed_refs.root = RB_ROOT;
 102         cur_trans->delayed_refs.num_entries = 0;
 103         cur_trans->delayed_refs.num_heads_ready = 0;
 104         cur_trans->delayed_refs.num_heads = 0;
 105         cur_trans->delayed_refs.flushing = 0;
 106         cur_trans->delayed_refs.run_delayed_start = 0;
 107         spin_lock_init(&cur_trans->commit_lock);
 108         spin_lock_init(&cur_trans->delayed_refs.lock);
 109
 110         INIT_LIST_HEAD(&cur_trans->pending_snapshots);
 111         list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
 112         extent_io_tree_init(&cur_trans->dirty_pages,
 113                              root->fs_info->btree_inode->i_mapping,
 114                              GFP_NOFS);
 115         root->fs_info->generation++;
 116         cur_trans->transid = root->fs_info->generation;
 117         root->fs_info->running_transaction = cur_trans;
 118         spin_unlock(&root->fs_info->trans_lock);
 119
 120         return 0;
 121 }
 122
 123 /*
 124  * this does all the record keeping required to make sure that a reference
 125  * counted root is properly recorded in a given transaction.  This is required
 126  * to make sure the old root from before we joined the transaction is deleted
 127  * when the transaction commits
 128  */
 129 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
 130                                struct btrfs_root *root)
 131 {
 132         if (root->ref_cows && root->last_trans < trans->transid) {
 133                 WARN_ON(root == root->fs_info->extent_root);
 134                 WARN_ON(root->commit_root != root->node);
 135
 136                 spin_lock(&root->fs_info->fs_roots_radix_lock);
 137                 if (root->last_trans == trans->transid) {
 138                         spin_unlock(&root->fs_info->fs_roots_radix_lock);
 139                         return 0;
 140                 }
 141                 root->last_trans = trans->transid;
 142                 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
 143                            (unsigned long)root->root_key.objectid,
 144                            BTRFS_ROOT_TRANS_TAG);
 145                 spin_unlock(&root->fs_info->fs_roots_radix_lock);
 146                 btrfs_init_reloc_root(trans, root);
 147         }
 148         return 0;
 149 }
 150
 151 /* wait for commit against the current transaction to become unblocked
 152  * when this is done, it is safe to start a new transaction, but the current
 153  * transaction might not be fully on disk.
 154  */
 155 static void wait_current_trans(struct btrfs_root *root)
 156 {
 157         struct btrfs_transaction *cur_trans;
 158
 159         spin_lock(&root->fs_info->trans_lock);
 160         cur_trans = root->fs_info->running_transaction;
 161         if (cur_trans && cur_trans->blocked) {
 162                 DEFINE_WAIT(wait);
 163                 atomic_inc(&cur_trans->use_count);
 164                 spin_unlock(&root->fs_info->trans_lock);
 165                 while (1) {
 166                         prepare_to_wait(&root->fs_info->transaction_wait, &wait,
 167                                         TASK_UNINTERRUPTIBLE);
 168                         if (!cur_trans->blocked)
 169                                 break;
 170                         schedule();
 171                 }
 172                 finish_wait(&root->fs_info->transaction_wait, &wait);
 173                 put_transaction(cur_trans);
 174         } else {
 175                 spin_unlock(&root->fs_info->trans_lock);
 176         }
 177 }
 178
 179 enum btrfs_trans_type {
 180         TRANS_START,
 181         TRANS_JOIN,
 182         TRANS_USERSPACE,
 183         TRANS_JOIN_NOLOCK,
 184 };
 185
 186 static int may_wait_transaction(struct btrfs_root *root, int type)
 187 {
 188         if (root->fs_info->log_root_recovering)
 189                 return 0;
 190
 191         if (type == TRANS_USERSPACE)
 192                 return 1;
 193
 194         if (type == TRANS_START &&
 195             !atomic_read(&root->fs_info->open_ioctl_trans))
 196                 return 1;
 197
 198         return 0;
 199 }
 200
 201 static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 202                                                     u64 num_items, int type)
 203 {
 204         struct btrfs_trans_handle *h;
 205         struct btrfs_transaction *cur_trans;
 206         int retries = 0;
 207         int ret;
 208
 209         if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
 210                 return ERR_PTR(-EROFS);
 211
 212         if (current->journal_info) {
 213                 WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
 214                 h = current->journal_info;
 215                 h->use_count++;
 216                 h->orig_rsv = h->block_rsv;
 217                 h->block_rsv = NULL;
 218                 goto got_it;
 219         }
 220 again:
 221         h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
 222         if (!h)
 223                 return ERR_PTR(-ENOMEM);
 224
 225         if (may_wait_transaction(root, type))
 226                 wait_current_trans(root);
 227
 228         do {
 229                 ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
 230                 if (ret == -EBUSY)
 231                         wait_current_trans(root);
 232         } while (ret == -EBUSY);
 233
 234         if (ret < 0) {
 235                 kmem_cache_free(btrfs_trans_handle_cachep, h);
 236                 return ERR_PTR(ret);
 237         }
 238
 239         cur_trans = root->fs_info->running_transaction;
 240
 241         h->transid = cur_trans->transid;
 242         h->transaction = cur_trans;
 243         h->blocks_used = 0;
 244         h->block_group = 0;
 245         h->bytes_reserved = 0;
 246         h->delayed_ref_updates = 0;
 247         h->use_count = 1;
 248         h->block_rsv = NULL;
 249         h->orig_rsv = NULL;
 250
 251         smp_mb();
 252         if (cur_trans->blocked && may_wait_transaction(root, type)) {
 253                 btrfs_commit_transaction(h, root);
 254                 goto again;
 255         }
 256
 257         if (num_items > 0) {
 258                 ret = btrfs_trans_reserve_metadata(h, root, num_items);
 259                 if (ret == -EAGAIN && !retries) {
 260                         retries++;
 261                         btrfs_commit_transaction(h, root);
 262                         goto again;
 263                 } else if (ret == -EAGAIN) {
 264                         /*
 265                          * We have already retried and got EAGAIN, so really we
 266                          * don't have space, so set ret to -ENOSPC.
 267                          */
 268                         ret = -ENOSPC;
 269                 }
 270
 271                 if (ret < 0) {
 272                         btrfs_end_transaction(h, root);
 273                         return ERR_PTR(ret);
 274                 }
 275         }
 276
 277 got_it:
 278         btrfs_record_root_in_trans(h, root);
 279
 280         if (!current->journal_info && type != TRANS_USERSPACE)
 281                 current->journal_info = h;
 282         return h;
 283 }
 284
 285 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 286                                                    int num_items)
 287 {
 288         return start_transaction(root, num_items, TRANS_START);
 289 }
 290 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
 291 {
 292         return start_transaction(root, 0, TRANS_JOIN);
 293 }
 294
 295 struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
 296 {
 297         return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
 298 }
 299
 300 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
 301 {
 302         return start_transaction(root, 0, TRANS_USERSPACE);
 303 }
 304
 305 /* wait for a transaction commit to be fully complete */
 306 static noinline int wait_for_commit(struct btrfs_root *root,
 307                                     struct btrfs_transaction *commit)
 308 {
 309         DEFINE_WAIT(wait);
 310         while (!commit->commit_done) {
 311                 prepare_to_wait(&commit->commit_wait, &wait,
 312                                 TASK_UNINTERRUPTIBLE);
 313                 if (commit->commit_done)
 314                         break;
 315                 schedule();
 316         }
 317         finish_wait(&commit->commit_wait, &wait);
 318         return 0;
 319 }
 320
 321 int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
 322 {
 323         struct btrfs_transaction *cur_trans = NULL, *t;
 324         int ret;
 325
 326         ret = 0;
 327         if (transid) {
 328                 if (transid <= root->fs_info->last_trans_committed)
 329                         goto out;
 330
 331                 /* find specified transaction */
 332                 spin_lock(&root->fs_info->trans_lock);
 333                 list_for_each_entry(t, &root->fs_info->trans_list, list) {
 334                         if (t->transid == transid) {
 335                                 cur_trans = t;
 336                                 atomic_inc(&cur_trans->use_count);
 337                                 break;
 338                         }
 339                         if (t->transid > transid)
 340                                 break;
 341                 }
 342                 spin_unlock(&root->fs_info->trans_lock);
 343                 ret = -EINVAL;
 344                 if (!cur_trans)
 345                         goto out;  /* bad transid */
 346         } else {
 347                 /* find newest transaction that is committing | committed */
 348                 spin_lock(&root->fs_info->trans_lock);
 349                 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
 350                                             list) {
 351                         if (t->in_commit) {
 352                                 if (t->commit_done)
 353                                         goto out;
 354                                 cur_trans = t;
 355                                 atomic_inc(&cur_trans->use_count);
 356                                 break;
 357                         }
 358                 }
 359                 spin_unlock(&root->fs_info->trans_lock);
 360                 if (!cur_trans)
 361                         goto out;  /* nothing committing|committed */
 362         }
 363
 364         wait_for_commit(root, cur_trans);
 365
 366         put_transaction(cur_trans);
 367         ret = 0;
 368 out:
 369         return ret;
 370 }
 371
 372 #if 0
 373 /*
 374  * rate limit against the drop_snapshot code.  This helps to slow down new
 375  * operations if the drop_snapshot code isn't able to keep up.
 376  */
 377 static void throttle_on_drops(struct btrfs_root *root)
 378 {
 379         struct btrfs_fs_info *info = root->fs_info;
 380         int harder_count = 0;
 381
 382 harder:
 383         if (atomic_read(&info->throttles)) {
 384                 DEFINE_WAIT(wait);
 385                 int thr;
 386                 thr = atomic_read(&info->throttle_gen);
 387
 388                 do {
 389                         prepare_to_wait(&info->transaction_throttle,
 390                                         &wait, TASK_UNINTERRUPTIBLE);
 391                         if (!atomic_read(&info->throttles)) {
 392                                 finish_wait(&info->transaction_throttle, &wait);
 393                                 break;
 394                         }
 395                         schedule();
 396                         finish_wait(&info->transaction_throttle, &wait);
 397                 } while (thr == atomic_read(&info->throttle_gen));
 398                 harder_count++;
 399
 400                 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 &&
 401                     harder_count < 2)
 402                         goto harder;
 403
 404                 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 &&
 405                     harder_count < 10)
 406                         goto harder;
 407
 408                 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 &&
 409                     harder_count < 20)
 410                         goto harder;
 411         }
 412 }
 413 #endif
 414
 415 void btrfs_throttle(struct btrfs_root *root)
 416 {
 417         if (!atomic_read(&root->fs_info->open_ioctl_trans))
 418                 wait_current_trans(root);
 419 }
 420
 421 static int should_end_transaction(struct btrfs_trans_handle *trans,
 422                                   struct btrfs_root *root)
 423 {
 424         int ret;
 425         ret = btrfs_block_rsv_check(trans, root,
 426                                     &root->fs_info->global_block_rsv, 0, 5);
 427         return ret ? 1 : 0;
 428 }
 429
 430 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
 431                                  struct btrfs_root *root)
 432 {
 433         struct btrfs_transaction *cur_trans = trans->transaction;
 434         int updates;
 435
 436         smp_mb();
 437         if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
 438                 return 1;
 439
 440         updates = trans->delayed_ref_updates;
 441         trans->delayed_ref_updates = 0;
 442         if (updates)
 443                 btrfs_run_delayed_refs(trans, root, updates);
 444
 445         return should_end_transaction(trans, root);
 446 }
 447
 448 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 449                           struct btrfs_root *root, int throttle, int lock)
 450 {
 451         struct btrfs_transaction *cur_trans = trans->transaction;
 452         struct btrfs_fs_info *info = root->fs_info;
 453         int count = 0;
 454
 455         if (--trans->use_count) {
 456                 trans->block_rsv = trans->orig_rsv;
 457                 return 0;
 458         }
 459
 460         while (count < 4) {
 461                 unsigned long cur = trans->delayed_ref_updates;
 462                 trans->delayed_ref_updates = 0;
 463                 if (cur &&
 464                     trans->transaction->delayed_refs.num_heads_ready > 64) {
 465                         trans->delayed_ref_updates = 0;
 466
 467                         /*
 468                          * do a full flush if the transaction is trying
 469                          * to close
 470                          */
 471                         if (trans->transaction->delayed_refs.flushing)
 472                                 cur = 0;
 473                         btrfs_run_delayed_refs(trans, root, cur);
 474                 } else {
 475                         break;
 476                 }
 477                 count++;
 478         }
 479
 480         btrfs_trans_release_metadata(trans, root);
 481
 482         if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
 483             should_end_transaction(trans, root)) {
 484                 trans->transaction->blocked = 1;
 485                 smp_wmb();
 486         }
 487
 488         if (lock && cur_trans->blocked && !cur_trans->in_commit) {
 489                 if (throttle)
 490                         return btrfs_commit_transaction(trans, root);
 491                 else
 492                         wake_up_process(info->transaction_kthread);
 493         }
 494
 495         WARN_ON(cur_trans != info->running_transaction);
 496         WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
 497         atomic_dec(&cur_trans->num_writers);
 498
 499         smp_mb();
 500         if (waitqueue_active(&cur_trans->writer_wait))
 501                 wake_up(&cur_trans->writer_wait);
 502         put_transaction(cur_trans);
 503
 504         if (current->journal_info == trans)
 505                 current->journal_info = NULL;
 506         memset(trans, 0, sizeof(*trans));
 507         kmem_cache_free(btrfs_trans_handle_cachep, trans);
 508
 509         if (throttle)
 510                 btrfs_run_delayed_iputs(root);
 511
 512         return 0;
 513 }
 514
 515 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
 516                           struct btrfs_root *root)
 517 {
 518         return __btrfs_end_transaction(trans, root, 0, 1);
 519 }
 520
 521 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
 522                                    struct btrfs_root *root)
 523 {
 524         return __btrfs_end_transaction(trans, root, 1, 1);
 525 }
 526
 527 int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
 528                                  struct btrfs_root *root)
 529 {
 530         return __btrfs_end_transaction(trans, root, 0, 0);
 531 }
 532
 533 /*
 534  * when btree blocks are allocated, they have some corresponding bits set for
 535  * them in one of two extent_io trees.  This is used to make sure all of
 536  * those extents are sent to disk but does not wait on them
 537  */
 538 int btrfs_write_marked_extents(struct btrfs_root *root,
 539                                struct extent_io_tree *dirty_pages, int mark)
 540 {
 541         int ret;
 542         int err = 0;
 543         int werr = 0;
 544         struct page *page;
 545         struct inode *btree_inode = root->fs_info->btree_inode;
 546         u64 start = 0;
 547         u64 end;
 548         unsigned long index;
 549
 550         while (1) {
 551                 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
 552                                             mark);
 553                 if (ret)
 554                         break;
 555                 while (start <= end) {
 556                         cond_resched();
 557
 558                         index = start >> PAGE_CACHE_SHIFT;
 559                         start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
 560                         page = find_get_page(btree_inode->i_mapping, index);
 561                         if (!page)
 562                                 continue;
 563
 564                         btree_lock_page_hook(page);
 565                         if (!page->mapping) {
 566                                 unlock_page(page);
 567                                 page_cache_release(page);
 568                                 continue;
 569                         }
 570
 571                         if (PageWriteback(page)) {
 572                                 if (PageDirty(page))
 573                                         wait_on_page_writeback(page);
 574                                 else {
 575                                         unlock_page(page);
 576                                         page_cache_release(page);
 577                                         continue;
 578                                 }
 579                         }
 580                         err = write_one_page(page, 0);
 581                         if (err)
 582                                 werr = err;
 583                         page_cache_release(page);
 584                 }
 585         }
 586         if (err)
 587                 werr = err;
 588         return werr;
 589 }
 590
 591 /*
 592  * when btree blocks are allocated, they have some corresponding bits set for
 593  * them in one of two extent_io trees.  This is used to make sure all of
 594  * those extents are on disk for transaction or log commit.  We wait
 595  * on all the pages and clear them from the dirty pages state tree
 596  */
 597 int btrfs_wait_marked_extents(struct btrfs_root *root,
 598                               struct extent_io_tree *dirty_pages, int mark)
 599 {
 600         int ret;
 601         int err = 0;
 602         int werr = 0;
 603         struct page *page;
 604         struct inode *btree_inode = root->fs_info->btree_inode;
 605         u64 start = 0;
 606         u64 end;
 607         unsigned long index;
 608
 609         while (1) {
 610                 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
 611                                             mark);
 612                 if (ret)
 613                         break;
 614
 615                 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
 616                 while (start <= end) {
 617                         index = start >> PAGE_CACHE_SHIFT;
 618                         start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
 619                         page = find_get_page(btree_inode->i_mapping, index);
 620                         if (!page)
 621                                 continue;
 622                         if (PageDirty(page)) {
 623                                 btree_lock_page_hook(page);
 624                                 wait_on_page_writeback(page);
 625                                 err = write_one_page(page, 0);
 626                                 if (err)
 627                                         werr = err;
 628                         }
 629                         wait_on_page_writeback(page);
 630                         page_cache_release(page);
 631                         cond_resched();
 632                 }
 633         }
 634         if (err)
 635                 werr = err;
 636         return werr;
 637 }
 638
 639 /*
 640  * when btree blocks are allocated, they have some corresponding bits set for
 641  * them in one of two extent_io trees.  This is used to make sure all of
 642  * those extents are on disk for transaction or log commit
 643  */
 644 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
 645                                 struct extent_io_tree *dirty_pages, int mark)
 646 {
 647         int ret;
 648         int ret2;
 649
 650         ret = btrfs_write_marked_extents(root, dirty_pages, mark);
 651         ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
 652         return ret || ret2;
 653 }
 654
 655 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 656                                      struct btrfs_root *root)
 657 {
 658         if (!trans || !trans->transaction) {
 659                 struct inode *btree_inode;
 660                 btree_inode = root->fs_info->btree_inode;
 661                 return filemap_write_and_wait(btree_inode->i_mapping);
 662         }
 663         return btrfs_write_and_wait_marked_extents(root,
 664                                            &trans->transaction->dirty_pages,
 665                                            EXTENT_DIRTY);
 666 }
 667
 668 /*
 669  * this is used to update the root pointer in the tree of tree roots.
 670  *
 671  * But, in the case of the extent allocation tree, updating the root
 672  * pointer may allocate blocks which may change the root of the extent
 673  * allocation tree.
 674  *
 675  * So, this loops and repeats and makes sure the cowonly root didn't
 676  * change while the root pointer was being updated in the metadata.
 677  */
 678 static int update_cowonly_root(struct btrfs_trans_handle *trans,
 679                                struct btrfs_root *root)
 680 {
 681         int ret;
 682         u64 old_root_bytenr;
 683         u64 old_root_used;
 684         struct btrfs_root *tree_root = root->fs_info->tree_root;
 685
 686         old_root_used = btrfs_root_used(&root->root_item);
 687         btrfs_write_dirty_block_groups(trans, root);
 688
 689         while (1) {
 690                 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
 691                 if (old_root_bytenr == root->node->start &&
 692                     old_root_used == btrfs_root_used(&root->root_item))
 693                         break;
 694
 695                 btrfs_set_root_node(&root->root_item, root->node);
 696                 ret = btrfs_update_root(trans, tree_root,
 697                                         &root->root_key,
 698                                         &root->root_item);
 699                 BUG_ON(ret);
 700
 701                 old_root_used = btrfs_root_used(&root->root_item);
 702                 ret = btrfs_write_dirty_block_groups(trans, root);
 703                 BUG_ON(ret);
 704         }
 705
 706         if (root != root->fs_info->extent_root)
 707                 switch_commit_root(root);
 708
 709         return 0;
 710 }
 711
 712 /*
 713  * update all the cowonly tree roots on disk
 714  */
 715 static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 716                                          struct btrfs_root *root)
 717 {
 718         struct btrfs_fs_info *fs_info = root->fs_info;
 719         struct list_head *next;
 720         struct extent_buffer *eb;
 721         int ret;
 722
 723         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 724         BUG_ON(ret);
 725
 726         eb = btrfs_lock_root_node(fs_info->tree_root);
 727         btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb);
 728         btrfs_tree_unlock(eb);
 729         free_extent_buffer(eb);
 730
 731         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
 732         BUG_ON(ret);
 733
 734         while (!list_empty(&fs_info->dirty_cowonly_roots)) {
 735                 next = fs_info->dirty_cowonly_roots.next;
 736                 list_del_init(next);
 737                 root = list_entry(next, struct btrfs_root, dirty_list);
 738
 739                 update_cowonly_root(trans, root);
 740         }
 741
 742         down_write(&fs_info->extent_commit_sem);
 743         switch_commit_root(fs_info->extent_root);
 744         up_write(&fs_info->extent_commit_sem);
 745
 746         return 0;
 747 }
 748
 749 /*
 750  * dead roots are old snapshots that need to be deleted.  This allocates
 751  * a dirty root struct and adds it into the list of dead roots that need to
 752  * be deleted
 753  */
 754 int btrfs_add_dead_root(struct btrfs_root *root)
 755 {
 756         spin_lock(&root->fs_info->trans_lock);
 757         list_add(&root->root_list, &root->fs_info->dead_roots);
 758         spin_unlock(&root->fs_info->trans_lock);
 759         return 0;
 760 }
 761
 762 /*
 763  * update all the cowonly tree roots on disk
 764  */
 765 static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
 766                                     struct btrfs_root *root)
 767 {
 768         struct btrfs_root *gang[8];
 769         struct btrfs_fs_info *fs_info = root->fs_info;
 770         int i;
 771         int ret;
 772         int err = 0;
 773
 774         spin_lock(&fs_info->fs_roots_radix_lock);
 775         while (1) {
 776                 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
 777                                                  (void **)gang, 0,
 778                                                  ARRAY_SIZE(gang),
 779                                                  BTRFS_ROOT_TRANS_TAG);
 780                 if (ret == 0)
 781                         break;
 782                 for (i = 0; i < ret; i++) {
 783                         root = gang[i];
 784                         radix_tree_tag_clear(&fs_info->fs_roots_radix,
 785                                         (unsigned long)root->root_key.objectid,
 786                                         BTRFS_ROOT_TRANS_TAG);
 787                         spin_unlock(&fs_info->fs_roots_radix_lock);
 788
 789                         btrfs_free_log(trans, root);
 790                         btrfs_update_reloc_root(trans, root);
 791                         btrfs_orphan_commit_root(trans, root);
 792
 793                         if (root->commit_root != root->node) {
 794                                 switch_commit_root(root);
 795                                 btrfs_set_root_node(&root->root_item,
 796                                                     root->node);
 797                         }
 798
 799                         err = btrfs_update_root(trans, fs_info->tree_root,
 800                                                 &root->root_key,
 801                                                 &root->root_item);
 802                         spin_lock(&fs_info->fs_roots_radix_lock);
 803                         if (err)
 804                                 break;
 805                 }
 806         }
 807         spin_unlock(&fs_info->fs_roots_radix_lock);
 808         return err;
 809 }
 810
 811 /*
 812  * defrag a given btree.  If cacheonly == 1, this won't read from the disk,
 813  * otherwise every leaf in the btree is read and defragged.
 814  */
 815 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
 816 {
 817         struct btrfs_fs_info *info = root->fs_info;
 818         struct btrfs_trans_handle *trans;
 819         int ret;
 820         unsigned long nr;
 821
 822         if (xchg(&root->defrag_running, 1))
 823                 return 0;
 824
 825         while (1) {
 826                 trans = btrfs_start_transaction(root, 0);
 827                 if (IS_ERR(trans))
 828                         return PTR_ERR(trans);
 829
 830                 ret = btrfs_defrag_leaves(trans, root, cacheonly);
 831
 832                 nr = trans->blocks_used;
 833                 btrfs_end_transaction(trans, root);
 834                 btrfs_btree_balance_dirty(info->tree_root, nr);
 835                 cond_resched();
 836
 837                 if (root->fs_info->closing || ret != -EAGAIN)
 838                         break;
 839         }
 840         root->defrag_running = 0;
 841         return ret;
 842 }
 843
 844 #if 0
 845 /*
 846  * when dropping snapshots, we generate a ton of delayed refs, and it makes
 847  * sense not to join the transaction while it is trying to flush the current
 848  * queue of delayed refs out.
 849  *
 850  * This is used by the drop snapshot code only
 851  */
 852 static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
 853 {
 854         DEFINE_WAIT(wait);
 855
 856         mutex_lock(&info->trans_mutex);
 857         while (info->running_transaction &&
 858                info->running_transaction->delayed_refs.flushing) {
 859                 prepare_to_wait(&info->transaction_wait, &wait,
 860                                 TASK_UNINTERRUPTIBLE);
 861                 mutex_unlock(&info->trans_mutex);
 862
 863                 schedule();
 864
 865                 mutex_lock(&info->trans_mutex);
 866                 finish_wait(&info->transaction_wait, &wait);
 867         }
 868         mutex_unlock(&info->trans_mutex);
 869         return 0;
 870 }
 871
 872 /*
 873  * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
 874  * all of them
 875  */
 876 int btrfs_drop_dead_root(struct btrfs_root *root)
 877 {
 878         struct btrfs_trans_handle *trans;
 879         struct btrfs_root *tree_root = root->fs_info->tree_root;
 880         unsigned long nr;
 881         int ret;
 882
 883         while (1) {
 884                 /*
 885                  * we don't want to jump in and create a bunch of
 886                  * delayed refs if the transaction is starting to close
 887                  */
 888                 wait_transaction_pre_flush(tree_root->fs_info);
 889                 trans = btrfs_start_transaction(tree_root, 1);
 890
 891                 /*
 892                  * we've joined a transaction, make sure it isn't
 893                  * closing right now
 894                  */
 895                 if (trans->transaction->delayed_refs.flushing) {
 896                         btrfs_end_transaction(trans, tree_root);
 897                         continue;
 898                 }
 899
 900                 ret = btrfs_drop_snapshot(trans, root);
 901                 if (ret != -EAGAIN)
 902                         break;
 903
 904                 ret = btrfs_update_root(trans, tree_root,
 905                                         &root->root_key,
 906                                         &root->root_item);
 907                 if (ret)
 908                         break;
 909
 910                 nr = trans->blocks_used;
 911                 ret = btrfs_end_transaction(trans, tree_root);
 912                 BUG_ON(ret);
 913
 914                 btrfs_btree_balance_dirty(tree_root, nr);
 915                 cond_resched();
 916         }
 917         BUG_ON(ret);
 918
 919         ret = btrfs_del_root(trans, tree_root, &root->root_key);
 920         BUG_ON(ret);
 921
 922         nr = trans->blocks_used;
 923         ret = btrfs_end_transaction(trans, tree_root);
 924         BUG_ON(ret);
 925
 926         free_extent_buffer(root->node);
 927         free_extent_buffer(root->commit_root);
 928         kfree(root);
 929
 930         btrfs_btree_balance_dirty(tree_root, nr);
 931         return ret;
 932 }
 933 #endif
 934
 935 /*
 936  * new snapshots need to be created at a very specific time in the
 937  * transaction commit.  This does the actual creation
 938  */
 939 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 940                                    struct btrfs_fs_info *fs_info,
 941                                    struct btrfs_pending_snapshot *pending)
 942 {
 943         struct btrfs_key key;
 944         struct btrfs_root_item *new_root_item;
 945         struct btrfs_root *tree_root = fs_info->tree_root;
 946         struct btrfs_root *root = pending->root;
 947         struct btrfs_root *parent_root;
 948         struct inode *parent_inode;
 949         struct dentry *parent;
 950         struct dentry *dentry;
 951         struct extent_buffer *tmp;
 952         struct extent_buffer *old;
 953         int ret;
 954         u64 to_reserve = 0;
 955         u64 index = 0;
 956         u64 objectid;
 957         u64 root_flags;
 958
 959         new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
 960         if (!new_root_item) {
 961                 pending->error = -ENOMEM;
 962                 goto fail;
 963         }
 964
 965         ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid);
 966         if (ret) {
 967                 pending->error = ret;
 968                 goto fail;
 969         }
 970
 971         btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
 972         btrfs_orphan_pre_snapshot(trans, pending, &to_reserve);
 973
 974         if (to_reserve > 0) {
 975                 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
 976                                           to_reserve);
 977                 if (ret) {
 978                         pending->error = ret;
 979                         goto fail;
 980                 }
 981         }
 982
 983         key.objectid = objectid;
 984         key.offset = (u64)-1;
 985         key.type = BTRFS_ROOT_ITEM_KEY;
 986
 987         trans->block_rsv = &pending->block_rsv;
 988
 989         dentry = pending->dentry;
 990         parent = dget_parent(dentry);
 991         parent_inode = parent->d_inode;
 992         parent_root = BTRFS_I(parent_inode)->root;
 993         btrfs_record_root_in_trans(trans, parent_root);
 994
 995         /*
 996          * insert the directory item
 997          */
 998         ret = btrfs_set_inode_index(parent_inode, &index);
 999         BUG_ON(ret);
1000         ret = btrfs_insert_dir_item(trans, parent_root,
1001                                 dentry->d_name.name, dentry->d_name.len,
1002                                 parent_inode->i_ino, &key,
1003                                 BTRFS_FT_DIR, index);
1004         BUG_ON(ret);
1005
1006         btrfs_i_size_write(parent_inode, parent_inode->i_size +
1007                                          dentry->d_name.len * 2);
1008         ret = btrfs_update_inode(trans, parent_root, parent_inode);
1009         BUG_ON(ret);
1010
1011         btrfs_record_root_in_trans(trans, root);
1012         btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
1013         memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
1014         btrfs_check_and_init_root_item(new_root_item);
1015
1016         root_flags = btrfs_root_flags(new_root_item);
1017         if (pending->readonly)
1018                 root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
1019         else
1020                 root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
1021         btrfs_set_root_flags(new_root_item, root_flags);
1022
1023         old = btrfs_lock_root_node(root);
1024         btrfs_cow_block(trans, root, old, NULL, 0, &old);
1025         btrfs_set_lock_blocking(old);
1026
1027         btrfs_copy_root(trans, root, old, &tmp, objectid);
1028         btrfs_tree_unlock(old);
1029         free_extent_buffer(old);
1030
1031         btrfs_set_root_node(new_root_item, tmp);
1032         /* record when the snapshot was created in key.offset */
1033         key.offset = trans->transid;
1034         ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
1035         btrfs_tree_unlock(tmp);
1036         free_extent_buffer(tmp);
1037         BUG_ON(ret);
1038
1039         /*
1040          * insert root back/forward references
1041          */
1042         ret = btrfs_add_root_ref(trans, tree_root, objectid,
1043                                  parent_root->root_key.objectid,
1044                                  parent_inode->i_ino, index,
1045                                  dentry->d_name.name, dentry->d_name.len);
1046         BUG_ON(ret);
1047         dput(parent);
1048
1049         key.offset = (u64)-1;
1050         pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
1051         BUG_ON(IS_ERR(pending->snap));
1052
1053         btrfs_reloc_post_snapshot(trans, pending);
1054         btrfs_orphan_post_snapshot(trans, pending);
1055 fail:
1056         kfree(new_root_item);
1057         btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1);
1058         return 0;
1059 }
1060
1061 /*
1062  * create all the snapshots we've scheduled for creation
1063  */
1064 static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
1065                                              struct btrfs_fs_info *fs_info)
1066 {
1067         struct btrfs_pending_snapshot *pending;
1068         struct list_head *head = &trans->transaction->pending_snapshots;
1069         int ret;
1070
1071         list_for_each_entry(pending, head, list) {
1072                 ret = create_pending_snapshot(trans, fs_info, pending);
1073                 BUG_ON(ret);
1074         }
1075         return 0;
1076 }
1077
1078 static void update_super_roots(struct btrfs_root *root)
1079 {
1080         struct btrfs_root_item *root_item;
1081         struct btrfs_super_block *super;
1082
1083         super = &root->fs_info->super_copy;
1084
1085         root_item = &root->fs_info->chunk_root->root_item;
1086         super->chunk_root = root_item->bytenr;
1087         super->chunk_root_generation = root_item->generation;
1088         super->chunk_root_level = root_item->level;
1089
1090         root_item = &root->fs_info->tree_root->root_item;
1091         super->root = root_item->bytenr;
1092         super->generation = root_item->generation;
1093         super->root_level = root_item->level;
1094         if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
1095                 super->cache_generation = root_item->generation;
1096 }
1097
1098 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
1099 {
1100         int ret = 0;
1101         spin_lock(&info->trans_lock);
1102         if (info->running_transaction)
1103                 ret = info->running_transaction->in_commit;
1104         spin_unlock(&info->trans_lock);
1105         return ret;
1106 }
1107
1108 int btrfs_transaction_blocked(struct btrfs_fs_info *info)
1109 {
1110         int ret = 0;
1111         spin_lock(&info->trans_lock);
1112         if (info->running_transaction)
1113                 ret = info->running_transaction->blocked;
1114         spin_unlock(&info->trans_lock);
1115         return ret;
1116 }
1117
1118 /*
1119  * wait for the current transaction commit to start and block subsequent
1120  * transaction joins
1121  */
1122 static void wait_current_trans_commit_start(struct btrfs_root *root,
1123                                             struct btrfs_transaction *trans)
1124 {
1125         DEFINE_WAIT(wait);
1126
1127         if (trans->in_commit)
1128                 return;
1129
1130         while (1) {
1131                 prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
1132                                 TASK_UNINTERRUPTIBLE);
1133                 if (trans->in_commit) {
1134                         finish_wait(&root->fs_info->transaction_blocked_wait,
1135                                     &wait);
1136                         break;
1137                 }
1138                 schedule();
1139                 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1140         }
1141 }
1142
1143 /*
1144  * wait for the current transaction to start and then become unblocked.
1145  * caller holds ref.
1146  */
1147 static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1148                                          struct btrfs_transaction *trans)
1149 {
1150         DEFINE_WAIT(wait);
1151
1152         if (trans->commit_done || (trans->in_commit && !trans->blocked))
1153                 return;
1154
1155         while (1) {
1156                 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
1157                                 TASK_UNINTERRUPTIBLE);
1158                 if (trans->commit_done ||
1159                     (trans->in_commit && !trans->blocked)) {
1160                         finish_wait(&root->fs_info->transaction_wait,
1161                                     &wait);
1162                         break;
1163                 }
1164                 schedule();
1165                 finish_wait(&root->fs_info->transaction_wait,
1166                             &wait);
1167         }
1168 }
1169
1170 /*
1171  * commit transactions asynchronously. once btrfs_commit_transaction_async
1172  * returns, any subsequent transaction will not be allowed to join.
1173  */
1174 struct btrfs_async_commit {
1175         struct btrfs_trans_handle *newtrans;
1176         struct btrfs_root *root;
1177         struct delayed_work work;
1178 };
1179
1180 static void do_async_commit(struct work_struct *work)
1181 {
1182         struct btrfs_async_commit *ac =
1183                 container_of(work, struct btrfs_async_commit, work.work);
1184
1185         btrfs_commit_transaction(ac->newtrans, ac->root);
1186         kfree(ac);
1187 }
1188
1189 int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1190                                    struct btrfs_root *root,
1191                                    int wait_for_unblock)
1192 {
1193         struct btrfs_async_commit *ac;
1194         struct btrfs_transaction *cur_trans;
1195
1196         ac = kmalloc(sizeof(*ac), GFP_NOFS);
1197         if (!ac)
1198                 return -ENOMEM;
1199
1200         INIT_DELAYED_WORK(&ac->work, do_async_commit);
1201         ac->root = root;
1202         ac->newtrans = btrfs_join_transaction(root);
1203         if (IS_ERR(ac->newtrans)) {
1204                 int err = PTR_ERR(ac->newtrans);
1205                 kfree(ac);
1206                 return err;
1207         }
1208
1209         /* take transaction reference */
1210         cur_trans = trans->transaction;
1211         atomic_inc(&cur_trans->use_count);
1212
1213         btrfs_end_transaction(trans, root);
1214         schedule_delayed_work(&ac->work, 0);
1215
1216         /* wait for transaction to start and unblock */
1217         if (wait_for_unblock)
1218                 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1219         else
1220                 wait_current_trans_commit_start(root, cur_trans);
1221         put_transaction(cur_trans);
1222
1223         return 0;
1224 }
1225
1226 /*
1227  * btrfs_transaction state sequence:
1228  *    in_commit = 0, blocked = 0  (initial)
1229  *    in_commit = 1, blocked = 1
1230  *    blocked = 0
1231  *    commit_done = 1
1232  */
1233 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1234                              struct btrfs_root *root)
1235 {
1236         unsigned long joined = 0;
1237         struct btrfs_transaction *cur_trans;
1238         struct btrfs_transaction *prev_trans = NULL;
1239         DEFINE_WAIT(wait);
1240         int ret;
1241         int should_grow = 0;
1242         unsigned long now = get_seconds();
1243         int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
1244
1245         btrfs_run_ordered_operations(root, 0);
1246
1247         /* make a pass through all the delayed refs we have so far
1248          * any runnings procs may add more while we are here
1249          */
1250         ret = btrfs_run_delayed_refs(trans, root, 0);
1251         BUG_ON(ret);
1252
1253         btrfs_trans_release_metadata(trans, root);
1254
1255         cur_trans = trans->transaction;
1256         /*
1257          * set the flushing flag so procs in this transaction have to
1258          * start sending their work down.
1259          */
1260         cur_trans->delayed_refs.flushing = 1;
1261
1262         ret = btrfs_run_delayed_refs(trans, root, 0);
1263         BUG_ON(ret);
1264
1265         spin_lock(&cur_trans->commit_lock);
1266         if (cur_trans->in_commit) {
1267                 spin_unlock(&cur_trans->commit_lock);
1268                 atomic_inc(&cur_trans->use_count);
1269                 btrfs_end_transaction(trans, root);
1270
1271                 ret = wait_for_commit(root, cur_trans);
1272                 BUG_ON(ret);
1273
1274                 put_transaction(cur_trans);
1275
1276                 return 0;
1277         }
1278
1279         trans->transaction->in_commit = 1;
1280         trans->transaction->blocked = 1;
1281         spin_unlock(&cur_trans->commit_lock);
1282         wake_up(&root->fs_info->transaction_blocked_wait);
1283
1284         spin_lock(&root->fs_info->trans_lock);
1285         if (cur_trans->list.prev != &root->fs_info->trans_list) {
1286                 prev_trans = list_entry(cur_trans->list.prev,
1287                                         struct btrfs_transaction, list);
1288                 if (!prev_trans->commit_done) {
1289                         atomic_inc(&prev_trans->use_count);
1290                         spin_unlock(&root->fs_info->trans_lock);
1291
1292                         wait_for_commit(root, prev_trans);
1293
1294                         put_transaction(prev_trans);
1295                 } else {
1296                         spin_unlock(&root->fs_info->trans_lock);
1297                 }
1298         } else {
1299                 spin_unlock(&root->fs_info->trans_lock);
1300         }
1301
1302         if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
1303                 should_grow = 1;
1304
1305         do {
1306                 int snap_pending = 0;
1307
1308                 joined = cur_trans->num_joined;
1309                 if (!list_empty(&trans->transaction->pending_snapshots))
1310                         snap_pending = 1;
1311
1312                 WARN_ON(cur_trans != trans->transaction);
1313
1314                 if (flush_on_commit || snap_pending) {
1315                         btrfs_start_delalloc_inodes(root, 1);
1316                         ret = btrfs_wait_ordered_extents(root, 0, 1);
1317                         BUG_ON(ret);
1318                 }
1319
1320                 /*
1321                  * rename don't use btrfs_join_transaction, so, once we
1322                  * set the transaction to blocked above, we aren't going
1323                  * to get any new ordered operations.  We can safely run
1324                  * it here and no for sure that nothing new will be added
1325                  * to the list
1326                  */
1327                 btrfs_run_ordered_operations(root, 1);
1328
1329                 prepare_to_wait(&cur_trans->writer_wait, &wait,
1330                                 TASK_UNINTERRUPTIBLE);
1331
1332                 if (atomic_read(&cur_trans->num_writers) > 1)
1333                         schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1334                 else if (should_grow)
1335                         schedule_timeout(1);
1336
1337                 finish_wait(&cur_trans->writer_wait, &wait);
1338                 spin_lock(&root->fs_info->trans_lock);
1339                 root->fs_info->trans_no_join = 1;
1340                 spin_unlock(&root->fs_info->trans_lock);
1341         } while (atomic_read(&cur_trans->num_writers) > 1 ||
1342                  (should_grow && cur_trans->num_joined != joined));
1343
1344         ret = create_pending_snapshots(trans, root->fs_info);
1345         BUG_ON(ret);
1346
1347         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1348         BUG_ON(ret);
1349
1350         WARN_ON(cur_trans != trans->transaction);
1351
1352         /* btrfs_commit_tree_roots is responsible for getting the
1353          * various roots consistent with each other.  Every pointer
1354          * in the tree of tree roots has to point to the most up to date
1355          * root for every subvolume and other tree.  So, we have to keep
1356          * the tree logging code from jumping in and changing any
1357          * of the trees.
1358          *
1359          * At this point in the commit, there can't be any tree-log
1360          * writers, but a little lower down we drop the trans mutex
1361          * and let new people in.  By holding the tree_log_mutex
1362          * from now until after the super is written, we avoid races
1363          * with the tree-log code.
1364          */
1365         mutex_lock(&root->fs_info->tree_log_mutex);
1366
1367         ret = commit_fs_roots(trans, root);
1368         BUG_ON(ret);
1369
1370         /* commit_fs_roots gets rid of all the tree log roots, it is now
1371          * safe to free the root of tree log roots
1372          */
1373         btrfs_free_log_root_tree(trans, root->fs_info);
1374
1375         ret = commit_cowonly_roots(trans, root);
1376         BUG_ON(ret);
1377
1378         btrfs_prepare_extent_commit(trans, root);
1379
1380         cur_trans = root->fs_info->running_transaction;
1381
1382         btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1383                             root->fs_info->tree_root->node);
1384         switch_commit_root(root->fs_info->tree_root);
1385
1386         btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1387                             root->fs_info->chunk_root->node);
1388         switch_commit_root(root->fs_info->chunk_root);
1389
1390         update_super_roots(root);
1391
1392         if (!root->fs_info->log_root_recovering) {
1393                 btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
1394                 btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0);
1395         }
1396
1397         memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
1398                sizeof(root->fs_info->super_copy));
1399
1400         trans->transaction->blocked = 0;
1401         spin_lock(&root->fs_info->trans_lock);
1402         root->fs_info->running_transaction = NULL;
1403         root->fs_info->trans_no_join = 0;
1404         spin_unlock(&root->fs_info->trans_lock);
1405
1406         wake_up(&root->fs_info->transaction_wait);
1407
1408         ret = btrfs_write_and_wait_transaction(trans, root);
1409         BUG_ON(ret);
1410         write_ctree_super(trans, root, 0);
1411
1412         /*
1413          * the super is written, we can safely allow the tree-loggers
1414          * to go about their business
1415          */
1416         mutex_unlock(&root->fs_info->tree_log_mutex);
1417
1418         btrfs_finish_extent_commit(trans, root);
1419
1420         cur_trans->commit_done = 1;
1421
1422         root->fs_info->last_trans_committed = cur_trans->transid;
1423
1424         wake_up(&cur_trans->commit_wait);
1425
1426         spin_lock(&root->fs_info->trans_lock);
1427         list_del_init(&cur_trans->list);
1428         spin_unlock(&root->fs_info->trans_lock);
1429
1430         put_transaction(cur_trans);
1431         put_transaction(cur_trans);
1432
1433         trace_btrfs_transaction_commit(root);
1434
1435         if (current->journal_info == trans)
1436                 current->journal_info = NULL;
1437
1438         kmem_cache_free(btrfs_trans_handle_cachep, trans);
1439
1440         if (current != root->fs_info->transaction_kthread)
1441                 btrfs_run_delayed_iputs(root);
1442
1443         return ret;
1444 }
1445
1446 /*
1447  * interface function to delete all the snapshots we have scheduled for deletion
1448  */
1449 int btrfs_clean_old_snapshots(struct btrfs_root *root)
1450 {
1451         LIST_HEAD(list);
1452         struct btrfs_fs_info *fs_info = root->fs_info;
1453
1454         spin_lock(&fs_info->trans_lock);
1455         list_splice_init(&fs_info->dead_roots, &list);
1456         spin_unlock(&fs_info->trans_lock);
1457
1458         while (!list_empty(&list)) {
1459                 root = list_entry(list.next, struct btrfs_root, root_list);
1460                 list_del(&root->root_list);
1461
1462                 if (btrfs_header_backref_rev(root->node) <
1463                     BTRFS_MIXED_BACKREF_REV)
1464                         btrfs_drop_snapshot(root, NULL, 0);
1465                 else
1466                         btrfs_drop_snapshot(root, NULL, 1);
1467         }
1468         return 0;
1469 }