fs/btrfs/inode.c

   1 /*
   2  * Copyright (C) 2007 Oracle.  All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License v2 as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11  * General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public
  14  * License along with this program; if not, write to the
  15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16  * Boston, MA 021110-1307, USA.
  17  */
  18
  19 #include <linux/kernel.h>
  20 #include <linux/bio.h>
  21 #include <linux/buffer_head.h>
  22 #include <linux/file.h>
  23 #include <linux/fs.h>
  24 #include <linux/pagemap.h>
  25 #include <linux/highmem.h>
  26 #include <linux/time.h>
  27 #include <linux/init.h>
  28 #include <linux/string.h>
  29 #include <linux/smp_lock.h>
  30 #include <linux/backing-dev.h>
  31 #include <linux/mpage.h>
  32 #include <linux/swap.h>
  33 #include <linux/writeback.h>
  34 #include <linux/statfs.h>
  35 #include <linux/compat.h>
  36 #include <linux/bit_spinlock.h>
  37 #include <linux/version.h>
  38 #include <linux/xattr.h>
  39 #include <linux/posix_acl.h>
  40 #include "ctree.h"
  41 #include "disk-io.h"
  42 #include "transaction.h"
  43 #include "btrfs_inode.h"
  44 #include "ioctl.h"
  45 #include "print-tree.h"
  46 #include "volumes.h"
  47 #include "ordered-data.h"
  48 #include "xattr.h"
  49 #include "compat.h"
  50 #include "tree-log.h"
  51 #include "ref-cache.h"
  52
  53 struct btrfs_iget_args {
  54         u64 ino;
  55         struct btrfs_root *root;
  56 };
  57
  58 static struct inode_operations btrfs_dir_inode_operations;
  59 static struct inode_operations btrfs_symlink_inode_operations;
  60 static struct inode_operations btrfs_dir_ro_inode_operations;
  61 static struct inode_operations btrfs_special_inode_operations;
  62 static struct inode_operations btrfs_file_inode_operations;
  63 static struct address_space_operations btrfs_aops;
  64 static struct address_space_operations btrfs_symlink_aops;
  65 static struct file_operations btrfs_dir_file_operations;
  66 static struct extent_io_ops btrfs_extent_io_ops;
  67
  68 static struct kmem_cache *btrfs_inode_cachep;
  69 struct kmem_cache *btrfs_trans_handle_cachep;
  70 struct kmem_cache *btrfs_transaction_cachep;
  71 struct kmem_cache *btrfs_bit_radix_cachep;
  72 struct kmem_cache *btrfs_path_cachep;
  73
  74 #define S_SHIFT 12
  75 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
  76         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
  77         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
  78         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
  79         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
  80         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
  81         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
  82         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
  83 };
  84
  85 static void btrfs_truncate(struct inode *inode);
  86
  87 /*
  88  * a very lame attempt at stopping writes when the FS is 85% full.  There
  89  * are countless ways this is incorrect, but it is better than nothing.
  90  */
  91 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
  92                            int for_del)
  93 {
  94         u64 total;
  95         u64 used;
  96         u64 thresh;
  97         unsigned long flags;
  98         int ret = 0;
  99
 100         spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
 101         total = btrfs_super_total_bytes(&root->fs_info->super_copy);
 102         used = btrfs_super_bytes_used(&root->fs_info->super_copy);
 103         if (for_del)
 104                 thresh = total * 90;
 105         else
 106                 thresh = total * 85;
 107
 108         do_div(thresh, 100);
 109
 110         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
 111                 ret = -ENOSPC;
 112         spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
 113         return ret;
 114 }
 115
 116 /*
 117  * when extent_io.c finds a delayed allocation range in the file,
 118  * the call backs end up in this code.  The basic idea is to
 119  * allocate extents on disk for the range, and create ordered data structs
 120  * in ram to track those extents.
 121  */
 122 static int cow_file_range(struct inode *inode, u64 start, u64 end)
 123 {
 124         struct btrfs_root *root = BTRFS_I(inode)->root;
 125         struct btrfs_trans_handle *trans;
 126         u64 alloc_hint = 0;
 127         u64 num_bytes;
 128         u64 cur_alloc_size;
 129         u64 blocksize = root->sectorsize;
 130         u64 orig_num_bytes;
 131         struct btrfs_key ins;
 132         struct extent_map *em;
 133         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 134         int ret = 0;
 135
 136         trans = btrfs_join_transaction(root, 1);
 137         BUG_ON(!trans);
 138         btrfs_set_trans_block_group(trans, inode);
 139
 140         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
 141         num_bytes = max(blocksize,  num_bytes);
 142         orig_num_bytes = num_bytes;
 143
 144         if (alloc_hint == EXTENT_MAP_INLINE)
 145                 goto out;
 146
 147         BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
 148         mutex_lock(&BTRFS_I(inode)->extent_mutex);
 149         btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
 150         mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 151
 152         while(num_bytes > 0) {
 153                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
 154                 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
 155                                            root->sectorsize, 0, alloc_hint,
 156                                            (u64)-1, &ins, 1);
 157                 if (ret) {
 158                         WARN_ON(1);
 159                         goto out;
 160                 }
 161                 em = alloc_extent_map(GFP_NOFS);
 162                 em->start = start;
 163                 em->len = ins.offset;
 164                 em->block_start = ins.objectid;
 165                 em->bdev = root->fs_info->fs_devices->latest_bdev;
 166                 mutex_lock(&BTRFS_I(inode)->extent_mutex);
 167                 set_bit(EXTENT_FLAG_PINNED, &em->flags);
 168                 while(1) {
 169                         spin_lock(&em_tree->lock);
 170                         ret = add_extent_mapping(em_tree, em);
 171                         spin_unlock(&em_tree->lock);
 172                         if (ret != -EEXIST) {
 173                                 free_extent_map(em);
 174                                 break;
 175                         }
 176                         btrfs_drop_extent_cache(inode, start,
 177                                                 start + ins.offset - 1, 0);
 178                 }
 179                 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 180
 181                 cur_alloc_size = ins.offset;
 182                 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
 183                                                ins.offset, 0);
 184                 BUG_ON(ret);
 185                 if (num_bytes < cur_alloc_size) {
 186                         printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
 187                                cur_alloc_size);
 188                         break;
 189                 }
 190                 num_bytes -= cur_alloc_size;
 191                 alloc_hint = ins.objectid + ins.offset;
 192                 start += cur_alloc_size;
 193         }
 194 out:
 195         btrfs_end_transaction(trans, root);
 196         return ret;
 197 }
 198
 199 /*
 200  * when nowcow writeback call back.  This checks for snapshots or COW copies
 201  * of the extents that exist in the file, and COWs the file as required.
 202  *
 203  * If no cow copies or snapshots exist, we write directly to the existing
 204  * blocks on disk
 205  */
 206 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
 207 {
 208         u64 extent_start;
 209         u64 extent_end;
 210         u64 bytenr;
 211         u64 loops = 0;
 212         u64 total_fs_bytes;
 213         struct btrfs_root *root = BTRFS_I(inode)->root;
 214         struct btrfs_block_group_cache *block_group;
 215         struct btrfs_trans_handle *trans;
 216         struct extent_buffer *leaf;
 217         int found_type;
 218         struct btrfs_path *path;
 219         struct btrfs_file_extent_item *item;
 220         int ret;
 221         int err = 0;
 222         struct btrfs_key found_key;
 223
 224         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
 225         path = btrfs_alloc_path();
 226         BUG_ON(!path);
 227         trans = btrfs_join_transaction(root, 1);
 228         BUG_ON(!trans);
 229 again:
 230         ret = btrfs_lookup_file_extent(NULL, root, path,
 231                                        inode->i_ino, start, 0);
 232         if (ret < 0) {
 233                 err = ret;
 234                 goto out;
 235         }
 236
 237         if (ret != 0) {
 238                 if (path->slots[0] == 0)
 239                         goto not_found;
 240                 path->slots[0]--;
 241         }
 242
 243         leaf = path->nodes[0];
 244         item = btrfs_item_ptr(leaf, path->slots[0],
 245                               struct btrfs_file_extent_item);
 246
 247         /* are we inside the extent that was found? */
 248         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 249         found_type = btrfs_key_type(&found_key);
 250         if (found_key.objectid != inode->i_ino ||
 251             found_type != BTRFS_EXTENT_DATA_KEY)
 252                 goto not_found;
 253
 254         found_type = btrfs_file_extent_type(leaf, item);
 255         extent_start = found_key.offset;
 256         if (found_type == BTRFS_FILE_EXTENT_REG) {
 257                 u64 extent_num_bytes;
 258
 259                 extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
 260                 extent_end = extent_start + extent_num_bytes;
 261                 err = 0;
 262
 263                 if (loops && start != extent_start)
 264                         goto not_found;
 265
 266                 if (start < extent_start || start >= extent_end)
 267                         goto not_found;
 268
 269                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
 270                 if (bytenr == 0)
 271                         goto not_found;
 272
 273                 if (btrfs_cross_ref_exists(trans, root, &found_key, bytenr))
 274                         goto not_found;
 275                 /*
 276                  * we may be called by the resizer, make sure we're inside
 277                  * the limits of the FS
 278                  */
 279                 block_group = btrfs_lookup_block_group(root->fs_info,
 280                                                        bytenr);
 281                 if (!block_group || block_group->ro)
 282                         goto not_found;
 283
 284                 bytenr += btrfs_file_extent_offset(leaf, item);
 285                 extent_num_bytes = min(end + 1, extent_end) - start;
 286                 ret = btrfs_add_ordered_extent(inode, start, bytenr,
 287                                                 extent_num_bytes, 1);
 288                 if (ret) {
 289                         err = ret;
 290                         goto out;
 291                 }
 292
 293                 btrfs_release_path(root, path);
 294                 start = extent_end;
 295                 if (start <= end) {
 296                         loops++;
 297                         goto again;
 298                 }
 299         } else {
 300 not_found:
 301                 btrfs_end_transaction(trans, root);
 302                 btrfs_free_path(path);
 303                 return cow_file_range(inode, start, end);
 304         }
 305 out:
 306         WARN_ON(err);
 307         btrfs_end_transaction(trans, root);
 308         btrfs_free_path(path);
 309         return err;
 310 }
 311
 312 /*
 313  * extent_io.c call back to do delayed allocation processing
 314  */
 315 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
 316 {
 317         struct btrfs_root *root = BTRFS_I(inode)->root;
 318         int ret;
 319
 320         if (btrfs_test_opt(root, NODATACOW) ||
 321             btrfs_test_flag(inode, NODATACOW))
 322                 ret = run_delalloc_nocow(inode, start, end);
 323         else
 324                 ret = cow_file_range(inode, start, end);
 325
 326         return ret;
 327 }
 328
 329 /*
 330  * extent_io.c set_bit_hook, used to track delayed allocation
 331  * bytes in this file, and to maintain the list of inodes that
 332  * have pending delalloc work to be done.
 333  */
 334 int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
 335                        unsigned long old, unsigned long bits)
 336 {
 337         unsigned long flags;
 338         if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
 339                 struct btrfs_root *root = BTRFS_I(inode)->root;
 340                 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
 341                 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
 342                 root->fs_info->delalloc_bytes += end - start + 1;
 343                 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
 344                         list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
 345                                       &root->fs_info->delalloc_inodes);
 346                 }
 347                 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
 348         }
 349         return 0;
 350 }
 351
 352 /*
 353  * extent_io.c clear_bit_hook, see set_bit_hook for why
 354  */
 355 int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
 356                          unsigned long old, unsigned long bits)
 357 {
 358         if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
 359                 struct btrfs_root *root = BTRFS_I(inode)->root;
 360                 unsigned long flags;
 361
 362                 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
 363                 if (end - start + 1 > root->fs_info->delalloc_bytes) {
 364                         printk("warning: delalloc account %Lu %Lu\n",
 365                                end - start + 1, root->fs_info->delalloc_bytes);
 366                         root->fs_info->delalloc_bytes = 0;
 367                         BTRFS_I(inode)->delalloc_bytes = 0;
 368                 } else {
 369                         root->fs_info->delalloc_bytes -= end - start + 1;
 370                         BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
 371                 }
 372                 if (BTRFS_I(inode)->delalloc_bytes == 0 &&
 373                     !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
 374                         list_del_init(&BTRFS_I(inode)->delalloc_inodes);
 375                 }
 376                 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
 377         }
 378         return 0;
 379 }
 380
 381 /*
 382  * extent_io.c merge_bio_hook, this must check the chunk tree to make sure
 383  * we don't create bios that span stripes or chunks
 384  */
 385 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 386                          size_t size, struct bio *bio)
 387 {
 388         struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
 389         struct btrfs_mapping_tree *map_tree;
 390         u64 logical = (u64)bio->bi_sector << 9;
 391         u64 length = 0;
 392         u64 map_length;
 393         int ret;
 394
 395         length = bio->bi_size;
 396         map_tree = &root->fs_info->mapping_tree;
 397         map_length = length;
 398         ret = btrfs_map_block(map_tree, READ, logical,
 399                               &map_length, NULL, 0);
 400
 401         if (map_length < length + size) {
 402                 return 1;
 403         }
 404         return 0;
 405 }
 406
 407 /*
 408  * in order to insert checksums into the metadata in large chunks,
 409  * we wait until bio submission time.   All the pages in the bio are
 410  * checksummed and sums are attached onto the ordered extent record.
 411  *
 412  * At IO completion time the cums attached on the ordered extent record
 413  * are inserted into the btree
 414  */
 415 int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 416                           int mirror_num)
 417 {
 418         struct btrfs_root *root = BTRFS_I(inode)->root;
 419         int ret = 0;
 420
 421         ret = btrfs_csum_one_bio(root, inode, bio);
 422         BUG_ON(ret);
 423
 424         return btrfs_map_bio(root, rw, bio, mirror_num, 1);
 425 }
 426
 427 /*
 428  * extent_io.c submission hook. This does the right thing for csum calculation on write,
 429  * or reading the csums from the tree before a read
 430  */
 431 int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 432                           int mirror_num)
 433 {
 434         struct btrfs_root *root = BTRFS_I(inode)->root;
 435         int ret = 0;
 436
 437         ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
 438         BUG_ON(ret);
 439
 440         if (btrfs_test_opt(root, NODATASUM) ||
 441             btrfs_test_flag(inode, NODATASUM)) {
 442                 goto mapit;
 443         }
 444
 445         if (!(rw & (1 << BIO_RW))) {
 446                 btrfs_lookup_bio_sums(root, inode, bio);
 447                 goto mapit;
 448         }
 449         return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
 450                                    inode, rw, bio, mirror_num,
 451                                    __btrfs_submit_bio_hook);
 452 mapit:
 453         return btrfs_map_bio(root, rw, bio, mirror_num, 0);
 454 }
 455
 456 /*
 457  * given a list of ordered sums record them in the inode.  This happens
 458  * at IO completion time based on sums calculated at bio submission time.
 459  */
 460 static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
 461                              struct inode *inode, u64 file_offset,
 462                              struct list_head *list)
 463 {
 464         struct list_head *cur;
 465         struct btrfs_ordered_sum *sum;
 466
 467         btrfs_set_trans_block_group(trans, inode);
 468         list_for_each(cur, list) {
 469                 sum = list_entry(cur, struct btrfs_ordered_sum, list);
 470                 btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root,
 471                                        inode, sum);
 472         }
 473         return 0;
 474 }
 475
 476 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end)
 477 {
 478         return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
 479                                    GFP_NOFS);
 480 }
 481
 482 /* see btrfs_writepage_start_hook for details on why this is required */
 483 struct btrfs_writepage_fixup {
 484         struct page *page;
 485         struct btrfs_work work;
 486 };
 487
 488 void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 489 {
 490         struct btrfs_writepage_fixup *fixup;
 491         struct btrfs_ordered_extent *ordered;
 492         struct page *page;
 493         struct inode *inode;
 494         u64 page_start;
 495         u64 page_end;
 496
 497         fixup = container_of(work, struct btrfs_writepage_fixup, work);
 498         page = fixup->page;
 499 again:
 500         lock_page(page);
 501         if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
 502                 ClearPageChecked(page);
 503                 goto out_page;
 504         }
 505
 506         inode = page->mapping->host;
 507         page_start = page_offset(page);
 508         page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
 509
 510         lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
 511
 512         /* already ordered? We're done */
 513         if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
 514                              EXTENT_ORDERED, 0)) {
 515                 goto out;
 516         }
 517
 518         ordered = btrfs_lookup_ordered_extent(inode, page_start);
 519         if (ordered) {
 520                 unlock_extent(&BTRFS_I(inode)->io_tree, page_start,
 521                               page_end, GFP_NOFS);
 522                 unlock_page(page);
 523                 btrfs_start_ordered_extent(inode, ordered, 1);
 524                 goto again;
 525         }
 526
 527         btrfs_set_extent_delalloc(inode, page_start, page_end);
 528         ClearPageChecked(page);
 529 out:
 530         unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
 531 out_page:
 532         unlock_page(page);
 533         page_cache_release(page);
 534 }
 535
 536 /*
 537  * There are a few paths in the higher layers of the kernel that directly
 538  * set the page dirty bit without asking the filesystem if it is a
 539  * good idea.  This causes problems because we want to make sure COW
 540  * properly happens and the data=ordered rules are followed.
 541  *
 542  * In our case any range that doesn't have the EXTENT_ORDERED bit set
 543  * hasn't been properly setup for IO.  We kick off an async process
 544  * to fix it up.  The async helper will wait for ordered extents, set
 545  * the delalloc bit and make it safe to write the page.
 546  */
 547 int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
 548 {
 549         struct inode *inode = page->mapping->host;
 550         struct btrfs_writepage_fixup *fixup;
 551         struct btrfs_root *root = BTRFS_I(inode)->root;
 552         int ret;
 553
 554         ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
 555                              EXTENT_ORDERED, 0);
 556         if (ret)
 557                 return 0;
 558
 559         if (PageChecked(page))
 560                 return -EAGAIN;
 561
 562         fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
 563         if (!fixup)
 564                 return -EAGAIN;
 565
 566         SetPageChecked(page);
 567         page_cache_get(page);
 568         fixup->work.func = btrfs_writepage_fixup_worker;
 569         fixup->page = page;
 570         btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
 571         return -EAGAIN;
 572 }
 573
 574 /* as ordered data IO finishes, this gets called so we can finish
 575  * an ordered extent if the range of bytes in the file it covers are
 576  * fully written.
 577  */
 578 static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 579 {
 580         struct btrfs_root *root = BTRFS_I(inode)->root;
 581         struct btrfs_trans_handle *trans;
 582         struct btrfs_ordered_extent *ordered_extent;
 583         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 584         struct btrfs_file_extent_item *extent_item;
 585         struct btrfs_path *path = NULL;
 586         struct extent_buffer *leaf;
 587         u64 alloc_hint = 0;
 588         struct list_head list;
 589         struct btrfs_key ins;
 590         int ret;
 591
 592         ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1);
 593         if (!ret)
 594                 return 0;
 595
 596         trans = btrfs_join_transaction(root, 1);
 597
 598         ordered_extent = btrfs_lookup_ordered_extent(inode, start);
 599         BUG_ON(!ordered_extent);
 600         if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
 601                 goto nocow;
 602
 603         path = btrfs_alloc_path();
 604         BUG_ON(!path);
 605
 606         lock_extent(io_tree, ordered_extent->file_offset,
 607                     ordered_extent->file_offset + ordered_extent->len - 1,
 608                     GFP_NOFS);
 609
 610         INIT_LIST_HEAD(&list);
 611
 612         mutex_lock(&BTRFS_I(inode)->extent_mutex);
 613
 614         ret = btrfs_drop_extents(trans, root, inode,
 615                                  ordered_extent->file_offset,
 616                                  ordered_extent->file_offset +
 617                                  ordered_extent->len,
 618                                  ordered_extent->file_offset, &alloc_hint);
 619         BUG_ON(ret);
 620
 621         ins.objectid = inode->i_ino;
 622         ins.offset = ordered_extent->file_offset;
 623         ins.type = BTRFS_EXTENT_DATA_KEY;
 624         ret = btrfs_insert_empty_item(trans, root, path, &ins,
 625                                       sizeof(*extent_item));
 626         BUG_ON(ret);
 627         leaf = path->nodes[0];
 628         extent_item = btrfs_item_ptr(leaf, path->slots[0],
 629                                      struct btrfs_file_extent_item);
 630         btrfs_set_file_extent_generation(leaf, extent_item, trans->transid);
 631         btrfs_set_file_extent_type(leaf, extent_item, BTRFS_FILE_EXTENT_REG);
 632         btrfs_set_file_extent_disk_bytenr(leaf, extent_item,
 633                                           ordered_extent->start);
 634         btrfs_set_file_extent_disk_num_bytes(leaf, extent_item,
 635                                              ordered_extent->len);
 636         btrfs_set_file_extent_offset(leaf, extent_item, 0);
 637         btrfs_set_file_extent_num_bytes(leaf, extent_item,
 638                                         ordered_extent->len);
 639         btrfs_mark_buffer_dirty(leaf);
 640
 641         btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
 642                                 ordered_extent->file_offset +
 643                                 ordered_extent->len - 1, 0);
 644         mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 645
 646         ins.objectid = ordered_extent->start;
 647         ins.offset = ordered_extent->len;
 648         ins.type = BTRFS_EXTENT_ITEM_KEY;
 649         ret = btrfs_alloc_reserved_extent(trans, root, leaf->start,
 650                                           root->root_key.objectid,
 651                                           trans->transid, inode->i_ino, &ins);
 652         BUG_ON(ret);
 653         btrfs_release_path(root, path);
 654
 655         inode_add_bytes(inode, ordered_extent->len);
 656         unlock_extent(io_tree, ordered_extent->file_offset,
 657                     ordered_extent->file_offset + ordered_extent->len - 1,
 658                     GFP_NOFS);
 659 nocow:
 660         add_pending_csums(trans, inode, ordered_extent->file_offset,
 661                           &ordered_extent->list);
 662
 663         mutex_lock(&BTRFS_I(inode)->extent_mutex);
 664         btrfs_ordered_update_i_size(inode, ordered_extent);
 665         btrfs_update_inode(trans, root, inode);
 666         btrfs_remove_ordered_extent(inode, ordered_extent);
 667         mutex_unlock(&BTRFS_I(inode)->extent_mutex);
 668
 669         /* once for us */
 670         btrfs_put_ordered_extent(ordered_extent);
 671         /* once for the tree */
 672         btrfs_put_ordered_extent(ordered_extent);
 673
 674         btrfs_end_transaction(trans, root);
 675         if (path)
 676                 btrfs_free_path(path);
 677         return 0;
 678 }
 679
 680 int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 681                                 struct extent_state *state, int uptodate)
 682 {
 683         return btrfs_finish_ordered_io(page->mapping->host, start, end);
 684 }
 685
 686 /*
 687  * When IO fails, either with EIO or csum verification fails, we
 688  * try other mirrors that might have a good copy of the data.  This
 689  * io_failure_record is used to record state as we go through all the
 690  * mirrors.  If another mirror has good data, the page is set up to date
 691  * and things continue.  If a good mirror can't be found, the original
 692  * bio end_io callback is called to indicate things have failed.
 693  */
 694 struct io_failure_record {
 695         struct page *page;
 696         u64 start;
 697         u64 len;
 698         u64 logical;
 699         int last_mirror;
 700 };
 701
 702 int btrfs_io_failed_hook(struct bio *failed_bio,
 703                          struct page *page, u64 start, u64 end,
 704                          struct extent_state *state)
 705 {
 706         struct io_failure_record *failrec = NULL;
 707         u64 private;
 708         struct extent_map *em;
 709         struct inode *inode = page->mapping->host;
 710         struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
 711         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 712         struct bio *bio;
 713         int num_copies;
 714         int ret;
 715         int rw;
 716         u64 logical;
 717
 718         ret = get_state_private(failure_tree, start, &private);
 719         if (ret) {
 720                 failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
 721                 if (!failrec)
 722                         return -ENOMEM;
 723                 failrec->start = start;
 724                 failrec->len = end - start + 1;
 725                 failrec->last_mirror = 0;
 726
 727                 spin_lock(&em_tree->lock);
 728                 em = lookup_extent_mapping(em_tree, start, failrec->len);
 729                 if (em->start > start || em->start + em->len < start) {
 730                         free_extent_map(em);
 731                         em = NULL;
 732                 }
 733                 spin_unlock(&em_tree->lock);
 734
 735                 if (!em || IS_ERR(em)) {
 736                         kfree(failrec);
 737                         return -EIO;
 738                 }
 739                 logical = start - em->start;
 740                 logical = em->block_start + logical;
 741                 failrec->logical = logical;
 742                 free_extent_map(em);
 743                 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
 744                                 EXTENT_DIRTY, GFP_NOFS);
 745                 set_state_private(failure_tree, start,
 746                                  (u64)(unsigned long)failrec);
 747         } else {
 748                 failrec = (struct io_failure_record *)(unsigned long)private;
 749         }
 750         num_copies = btrfs_num_copies(
 751                               &BTRFS_I(inode)->root->fs_info->mapping_tree,
 752                               failrec->logical, failrec->len);
 753         failrec->last_mirror++;
 754         if (!state) {
 755                 spin_lock_irq(&BTRFS_I(inode)->io_tree.lock);
 756                 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
 757                                                     failrec->start,
 758                                                     EXTENT_LOCKED);
 759                 if (state && state->start != failrec->start)
 760                         state = NULL;
 761                 spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock);
 762         }
 763         if (!state || failrec->last_mirror > num_copies) {
 764                 set_state_private(failure_tree, failrec->start, 0);
 765                 clear_extent_bits(failure_tree, failrec->start,
 766                                   failrec->start + failrec->len - 1,
 767                                   EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
 768                 kfree(failrec);
 769                 return -EIO;
 770         }
 771         bio = bio_alloc(GFP_NOFS, 1);
 772         bio->bi_private = state;
 773         bio->bi_end_io = failed_bio->bi_end_io;
 774         bio->bi_sector = failrec->logical >> 9;
 775         bio->bi_bdev = failed_bio->bi_bdev;
 776         bio->bi_size = 0;
 777         bio_add_page(bio, page, failrec->len, start - page_offset(page));
 778         if (failed_bio->bi_rw & (1 << BIO_RW))
 779                 rw = WRITE;
 780         else
 781                 rw = READ;
 782
 783         BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
 784                                                       failrec->last_mirror);
 785         return 0;
 786 }
 787
 788 /*
 789  * each time an IO finishes, we do a fast check in the IO failure tree
 790  * to see if we need to process or clean up an io_failure_record
 791  */
 792 int btrfs_clean_io_failures(struct inode *inode, u64 start)
 793 {
 794         u64 private;
 795         u64 private_failure;
 796         struct io_failure_record *failure;
 797         int ret;
 798
 799         private = 0;
 800         if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
 801                              (u64)-1, 1, EXTENT_DIRTY)) {
 802                 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
 803                                         start, &private_failure);
 804                 if (ret == 0) {
 805                         failure = (struct io_failure_record *)(unsigned long)
 806                                    private_failure;
 807                         set_state_private(&BTRFS_I(inode)->io_failure_tree,
 808                                           failure->start, 0);
 809                         clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
 810                                           failure->start,
 811                                           failure->start + failure->len - 1,
 812                                           EXTENT_DIRTY | EXTENT_LOCKED,
 813                                           GFP_NOFS);
 814                         kfree(failure);
 815                 }
 816         }
 817         return 0;
 818 }
 819
 820 /*
 821  * when reads are done, we need to check csums to verify the data is correct
 822  * if there's a match, we allow the bio to finish.  If not, we go through
 823  * the io_failure_record routines to find good copies
 824  */
 825 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 826                                struct extent_state *state)
 827 {
 828         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
 829         struct inode *inode = page->mapping->host;
 830         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 831         char *kaddr;
 832         u64 private = ~(u32)0;
 833         int ret;
 834         struct btrfs_root *root = BTRFS_I(inode)->root;
 835         u32 csum = ~(u32)0;
 836         unsigned long flags;
 837
 838         if (btrfs_test_opt(root, NODATASUM) ||
 839             btrfs_test_flag(inode, NODATASUM))
 840                 return 0;
 841         if (state && state->start == start) {
 842                 private = state->private;
 843                 ret = 0;
 844         } else {
 845                 ret = get_state_private(io_tree, start, &private);
 846         }
 847         local_irq_save(flags);
 848         kaddr = kmap_atomic(page, KM_IRQ0);
 849         if (ret) {
 850                 goto zeroit;
 851         }
 852         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
 853         btrfs_csum_final(csum, (char *)&csum);
 854         if (csum != private) {
 855                 goto zeroit;
 856         }
 857         kunmap_atomic(kaddr, KM_IRQ0);
 858         local_irq_restore(flags);
 859
 860         /* if the io failure tree for this inode is non-empty,
 861          * check to see if we've recovered from a failed IO
 862          */
 863         btrfs_clean_io_failures(inode, start);
 864         return 0;
 865
 866 zeroit:
 867         printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n",
 868                page->mapping->host->i_ino, (unsigned long long)start, csum,
 869                private);
 870         memset(kaddr + offset, 1, end - start + 1);
 871         flush_dcache_page(page);
 872         kunmap_atomic(kaddr, KM_IRQ0);
 873         local_irq_restore(flags);
 874         if (private == 0)
 875                 return 0;
 876         return -EIO;
 877 }
 878
 879 /*
 880  * This creates an orphan entry for the given inode in case something goes
 881  * wrong in the middle of an unlink/truncate.
 882  */
 883 int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 884 {
 885         struct btrfs_root *root = BTRFS_I(inode)->root;
 886         int ret = 0;
 887
 888         spin_lock(&root->list_lock);
 889
 890         /* already on the orphan list, we're good */
 891         if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
 892                 spin_unlock(&root->list_lock);
 893                 return 0;
 894         }
 895
 896         list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
 897
 898         spin_unlock(&root->list_lock);
 899
 900         /*
 901          * insert an orphan item to track this unlinked/truncated file
 902          */
 903         ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
 904
 905         return ret;
 906 }
 907
 908 /*
 909  * We have done the truncate/delete so we can go ahead and remove the orphan
 910  * item for this particular inode.
 911  */
 912 int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
 913 {
 914         struct btrfs_root *root = BTRFS_I(inode)->root;
 915         int ret = 0;
 916
 917         spin_lock(&root->list_lock);
 918
 919         if (list_empty(&BTRFS_I(inode)->i_orphan)) {
 920                 spin_unlock(&root->list_lock);
 921                 return 0;
 922         }
 923
 924         list_del_init(&BTRFS_I(inode)->i_orphan);
 925         if (!trans) {
 926                 spin_unlock(&root->list_lock);
 927                 return 0;
 928         }
 929
 930         spin_unlock(&root->list_lock);
 931
 932         ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
 933
 934         return ret;
 935 }
 936
 937 /*
 938  * this cleans up any orphans that may be left on the list from the last use
 939  * of this root.
 940  */
 941 void btrfs_orphan_cleanup(struct btrfs_root *root)
 942 {
 943         struct btrfs_path *path;
 944         struct extent_buffer *leaf;
 945         struct btrfs_item *item;
 946         struct btrfs_key key, found_key;
 947         struct btrfs_trans_handle *trans;
 948         struct inode *inode;
 949         int ret = 0, nr_unlink = 0, nr_truncate = 0;
 950
 951         /* don't do orphan cleanup if the fs is readonly. */
 952         if (root->fs_info->sb->s_flags & MS_RDONLY)
 953                 return;
 954
 955         path = btrfs_alloc_path();
 956         if (!path)
 957                 return;
 958         path->reada = -1;
 959
 960         key.objectid = BTRFS_ORPHAN_OBJECTID;
 961         btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
 962         key.offset = (u64)-1;
 963
 964
 965         while (1) {
 966                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 967                 if (ret < 0) {
 968                         printk(KERN_ERR "Error searching slot for orphan: %d"
 969                                "\n", ret);
 970                         break;
 971                 }
 972
 973                 /*
 974                  * if ret == 0 means we found what we were searching for, which
 975                  * is weird, but possible, so only screw with path if we didnt
 976                  * find the key and see if we have stuff that matches
 977                  */
 978                 if (ret > 0) {
 979                         if (path->slots[0] == 0)
 980                                 break;
 981                         path->slots[0]--;
 982                 }
 983
 984                 /* pull out the item */
 985                 leaf = path->nodes[0];
 986                 item = btrfs_item_nr(leaf, path->slots[0]);
 987                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 988
 989                 /* make sure the item matches what we want */
 990                 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
 991                         break;
 992                 if (btrfs_key_type(&found_key) != BTRFS_ORPHAN_ITEM_KEY)
 993                         break;
 994
 995                 /* release the path since we're done with it */
 996                 btrfs_release_path(root, path);
 997
 998                 /*
 999                  * this is where we are basically btrfs_lookup, without the
1000                  * crossing root thing.  we store the inode number in the
1001                  * offset of the orphan item.
1002                  */
1003                 inode = btrfs_iget_locked(root->fs_info->sb,
1004                                           found_key.offset, root);
1005                 if (!inode)
1006                         break;
1007
1008                 if (inode->i_state & I_NEW) {
1009                         BTRFS_I(inode)->root = root;
1010
1011                         /* have to set the location manually */
1012                         BTRFS_I(inode)->location.objectid = inode->i_ino;
1013                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
1014                         BTRFS_I(inode)->location.offset = 0;
1015
1016                         btrfs_read_locked_inode(inode);
1017                         unlock_new_inode(inode);
1018                 }
1019
1020                 /*
1021                  * add this inode to the orphan list so btrfs_orphan_del does
1022                  * the proper thing when we hit it
1023                  */
1024                 spin_lock(&root->list_lock);
1025                 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
1026                 spin_unlock(&root->list_lock);
1027
1028                 /*
1029                  * if this is a bad inode, means we actually succeeded in
1030                  * removing the inode, but not the orphan record, which means
1031                  * we need to manually delete the orphan since iput will just
1032                  * do a destroy_inode
1033                  */
1034                 if (is_bad_inode(inode)) {
1035                         trans = btrfs_start_transaction(root, 1);
1036                         btrfs_orphan_del(trans, inode);
1037                         btrfs_end_transaction(trans, root);
1038                         iput(inode);
1039                         continue;
1040                 }
1041
1042                 /* if we have links, this was a truncate, lets do that */
1043                 if (inode->i_nlink) {
1044                         nr_truncate++;
1045                         btrfs_truncate(inode);
1046                 } else {
1047                         nr_unlink++;
1048                 }
1049
1050                 /* this will do delete_inode and everything for us */
1051                 iput(inode);
1052         }
1053
1054         if (nr_unlink)
1055                 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
1056         if (nr_truncate)
1057                 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
1058
1059         btrfs_free_path(path);
1060 }
1061
1062 /*
1063  * read an inode from the btree into the in-memory inode
1064  */
1065 void btrfs_read_locked_inode(struct inode *inode)
1066 {
1067         struct btrfs_path *path;
1068         struct extent_buffer *leaf;
1069         struct btrfs_inode_item *inode_item;
1070         struct btrfs_timespec *tspec;
1071         struct btrfs_root *root = BTRFS_I(inode)->root;
1072         struct btrfs_key location;
1073         u64 alloc_group_block;
1074         u32 rdev;
1075         int ret;
1076
1077         path = btrfs_alloc_path();
1078         BUG_ON(!path);
1079         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
1080
1081         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
1082         if (ret)
1083                 goto make_bad;
1084
1085         leaf = path->nodes[0];
1086         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1087                                     struct btrfs_inode_item);
1088
1089         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
1090         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
1091         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
1092         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
1093         btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
1094
1095         tspec = btrfs_inode_atime(inode_item);
1096         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
1097         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
1098
1099         tspec = btrfs_inode_mtime(inode_item);
1100         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
1101         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
1102
1103         tspec = btrfs_inode_ctime(inode_item);
1104         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
1105         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
1106
1107         inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
1108         BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
1109         inode->i_generation = BTRFS_I(inode)->generation;
1110         inode->i_rdev = 0;
1111         rdev = btrfs_inode_rdev(leaf, inode_item);
1112
1113         BTRFS_I(inode)->index_cnt = (u64)-1;
1114
1115         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
1116         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
1117                                                        alloc_group_block);
1118         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
1119         if (!BTRFS_I(inode)->block_group) {
1120                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
1121                                                  NULL, 0,
1122                                                  BTRFS_BLOCK_GROUP_METADATA, 0);
1123         }
1124         btrfs_free_path(path);
1125         inode_item = NULL;
1126
1127         switch (inode->i_mode & S_IFMT) {
1128         case S_IFREG:
1129                 inode->i_mapping->a_ops = &btrfs_aops;
1130                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
1131                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1132                 inode->i_fop = &btrfs_file_operations;
1133                 inode->i_op = &btrfs_file_inode_operations;
1134                 break;
1135         case S_IFDIR:
1136                 inode->i_fop = &btrfs_dir_file_operations;
1137                 if (root == root->fs_info->tree_root)
1138                         inode->i_op = &btrfs_dir_ro_inode_operations;
1139                 else
1140                         inode->i_op = &btrfs_dir_inode_operations;
1141                 break;
1142         case S_IFLNK:
1143                 inode->i_op = &btrfs_symlink_inode_operations;
1144                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
1145                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
1146                 break;
1147         default:
1148                 init_special_inode(inode, inode->i_mode, rdev);
1149                 break;
1150         }
1151         return;
1152
1153 make_bad:
1154         btrfs_free_path(path);
1155         make_bad_inode(inode);
1156 }
1157
1158 /*
1159  * given a leaf and an inode, copy the inode fields into the leaf
1160  */
1161 static void fill_inode_item(struct btrfs_trans_handle *trans,
1162                             struct extent_buffer *leaf,
1163                             struct btrfs_inode_item *item,
1164                             struct inode *inode)
1165 {
1166         btrfs_set_inode_uid(leaf, item, inode->i_uid);
1167         btrfs_set_inode_gid(leaf, item, inode->i_gid);
1168         btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
1169         btrfs_set_inode_mode(leaf, item, inode->i_mode);
1170         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
1171
1172         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
1173                                inode->i_atime.tv_sec);
1174         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
1175                                 inode->i_atime.tv_nsec);
1176
1177         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
1178                                inode->i_mtime.tv_sec);
1179         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
1180                                 inode->i_mtime.tv_nsec);
1181
1182         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
1183                                inode->i_ctime.tv_sec);
1184         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
1185                                 inode->i_ctime.tv_nsec);
1186
1187         btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
1188         btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
1189         btrfs_set_inode_transid(leaf, item, trans->transid);
1190         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
1191         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
1192         btrfs_set_inode_block_group(leaf, item,
1193                                     BTRFS_I(inode)->block_group->key.objectid);
1194 }
1195
1196 /*
1197  * copy everything in the in-memory inode into the btree.
1198  */
1199 int noinline btrfs_update_inode(struct btrfs_trans_handle *trans,
1200                               struct btrfs_root *root,
1201                               struct inode *inode)
1202 {
1203         struct btrfs_inode_item *inode_item;
1204         struct btrfs_path *path;
1205         struct extent_buffer *leaf;
1206         int ret;
1207
1208         path = btrfs_alloc_path();
1209         BUG_ON(!path);
1210         ret = btrfs_lookup_inode(trans, root, path,
1211                                  &BTRFS_I(inode)->location, 1);
1212         if (ret) {
1213                 if (ret > 0)
1214                         ret = -ENOENT;
1215                 goto failed;
1216         }
1217
1218         leaf = path->nodes[0];
1219         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1220                                   struct btrfs_inode_item);
1221
1222         fill_inode_item(trans, leaf, inode_item, inode);
1223         btrfs_mark_buffer_dirty(leaf);
1224         btrfs_set_inode_last_trans(trans, inode);
1225         ret = 0;
1226 failed:
1227         btrfs_free_path(path);
1228         return ret;
1229 }
1230
1231
1232 /*
1233  * unlink helper that gets used here in inode.c and in the tree logging
1234  * recovery code.  It remove a link in a directory with a given name, and
1235  * also drops the back refs in the inode to the directory
1236  */
1237 int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
1238                        struct btrfs_root *root,
1239                        struct inode *dir, struct inode *inode,
1240                        const char *name, int name_len)
1241 {
1242         struct btrfs_path *path;
1243         int ret = 0;
1244         struct extent_buffer *leaf;
1245         struct btrfs_dir_item *di;
1246         struct btrfs_key key;
1247         u64 index;
1248
1249         path = btrfs_alloc_path();
1250         if (!path) {
1251                 ret = -ENOMEM;
1252                 goto err;
1253         }
1254
1255         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
1256                                     name, name_len, -1);
1257         if (IS_ERR(di)) {
1258                 ret = PTR_ERR(di);
1259                 goto err;
1260         }
1261         if (!di) {
1262                 ret = -ENOENT;
1263                 goto err;
1264         }
1265         leaf = path->nodes[0];
1266         btrfs_dir_item_key_to_cpu(leaf, di, &key);
1267         ret = btrfs_delete_one_dir_name(trans, root, path, di);
1268         if (ret)
1269                 goto err;
1270         btrfs_release_path(root, path);
1271
1272         ret = btrfs_del_inode_ref(trans, root, name, name_len,
1273                                   inode->i_ino,
1274                                   dir->i_ino, &index);
1275         if (ret) {
1276                 printk("failed to delete reference to %.*s, "
1277                        "inode %lu parent %lu\n", name_len, name,
1278                        inode->i_ino, dir->i_ino);
1279                 goto err;
1280         }
1281
1282         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
1283                                          index, name, name_len, -1);
1284         if (IS_ERR(di)) {
1285                 ret = PTR_ERR(di);
1286                 goto err;
1287         }
1288         if (!di) {
1289                 ret = -ENOENT;
1290                 goto err;
1291         }
1292         ret = btrfs_delete_one_dir_name(trans, root, path, di);
1293         btrfs_release_path(root, path);
1294
1295         ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
1296                                          inode, dir->i_ino);
1297         BUG_ON(ret != 0 && ret != -ENOENT);
1298         if (ret != -ENOENT)
1299                 BTRFS_I(dir)->log_dirty_trans = trans->transid;
1300
1301         ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
1302                                            dir, index);
1303         BUG_ON(ret);
1304 err:
1305         btrfs_free_path(path);
1306         if (ret)
1307                 goto out;
1308
1309         btrfs_i_size_write(dir, dir->i_size - name_len * 2);
1310         inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
1311         btrfs_update_inode(trans, root, dir);
1312         btrfs_drop_nlink(inode);
1313         ret = btrfs_update_inode(trans, root, inode);
1314         dir->i_sb->s_dirt = 1;
1315 out:
1316         return ret;
1317 }
1318
1319 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
1320 {
1321         struct btrfs_root *root;
1322         struct btrfs_trans_handle *trans;
1323         struct inode *inode = dentry->d_inode;
1324         int ret;
1325         unsigned long nr = 0;
1326
1327         root = BTRFS_I(dir)->root;
1328
1329         ret = btrfs_check_free_space(root, 1, 1);
1330         if (ret)
1331                 goto fail;
1332
1333         trans = btrfs_start_transaction(root, 1);
1334
1335         btrfs_set_trans_block_group(trans, dir);
1336         ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
1337                                  dentry->d_name.name, dentry->d_name.len);
1338
1339         if (inode->i_nlink == 0)
1340                 ret = btrfs_orphan_add(trans, inode);
1341
1342         nr = trans->blocks_used;
1343
1344         btrfs_end_transaction_throttle(trans, root);
1345 fail:
1346         btrfs_btree_balance_dirty(root, nr);
1347         return ret;
1348 }
1349
1350 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
1351 {
1352         struct inode *inode = dentry->d_inode;
1353         int err = 0;
1354         int ret;
1355         struct btrfs_root *root = BTRFS_I(dir)->root;
1356         struct btrfs_trans_handle *trans;
1357         unsigned long nr = 0;
1358
1359         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
1360                 return -ENOTEMPTY;
1361         }
1362
1363         ret = btrfs_check_free_space(root, 1, 1);
1364         if (ret)
1365                 goto fail;
1366
1367         trans = btrfs_start_transaction(root, 1);
1368         btrfs_set_trans_block_group(trans, dir);
1369
1370         err = btrfs_orphan_add(trans, inode);
1371         if (err)
1372                 goto fail_trans;
1373
1374         /* now the directory is empty */
1375         err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
1376                                  dentry->d_name.name, dentry->d_name.len);
1377         if (!err) {
1378                 btrfs_i_size_write(inode, 0);
1379         }
1380
1381 fail_trans:
1382         nr = trans->blocks_used;
1383         ret = btrfs_end_transaction_throttle(trans, root);
1384 fail:
1385         btrfs_btree_balance_dirty(root, nr);
1386
1387         if (ret && !err)
1388                 err = ret;
1389         return err;
1390 }
1391
1392 /*
1393  * when truncating bytes in a file, it is possible to avoid reading
1394  * the leaves that contain only checksum items.  This can be the
1395  * majority of the IO required to delete a large file, but it must
1396  * be done carefully.
1397  *
1398  * The keys in the level just above the leaves are checked to make sure
1399  * the lowest key in a given leaf is a csum key, and starts at an offset
1400  * after the new  size.
1401  *
1402  * Then the key for the next leaf is checked to make sure it also has
1403  * a checksum item for the same file.  If it does, we know our target leaf
1404  * contains only checksum items, and it can be safely freed without reading
1405  * it.
1406  *
1407  * This is just an optimization targeted at large files.  It may do
1408  * nothing.  It will return 0 unless things went badly.
1409  */
1410 static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans,
1411                                      struct btrfs_root *root,
1412                                      struct btrfs_path *path,
1413                                      struct inode *inode, u64 new_size)
1414 {
1415         struct btrfs_key key;
1416         int ret;
1417         int nritems;
1418         struct btrfs_key found_key;
1419         struct btrfs_key other_key;
1420         struct btrfs_leaf_ref *ref;
1421         u64 leaf_gen;
1422         u64 leaf_start;
1423
1424         path->lowest_level = 1;
1425         key.objectid = inode->i_ino;
1426         key.type = BTRFS_CSUM_ITEM_KEY;
1427         key.offset = new_size;
1428 again:
1429         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1430         if (ret < 0)
1431                 goto out;
1432
1433         if (path->nodes[1] == NULL) {
1434                 ret = 0;
1435                 goto out;
1436         }
1437         ret = 0;
1438         btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]);
1439         nritems = btrfs_header_nritems(path->nodes[1]);
1440
1441         if (!nritems)
1442                 goto out;
1443
1444         if (path->slots[1] >= nritems)
1445                 goto next_node;
1446
1447         /* did we find a key greater than anything we want to delete? */
1448         if (found_key.objectid > inode->i_ino ||
1449            (found_key.objectid == inode->i_ino && found_key.type > key.type))
1450                 goto out;
1451
1452         /* we check the next key in the node to make sure the leave contains
1453          * only checksum items.  This comparison doesn't work if our
1454          * leaf is the last one in the node
1455          */
1456         if (path->slots[1] + 1 >= nritems) {
1457 next_node:
1458                 /* search forward from the last key in the node, this
1459                  * will bring us into the next node in the tree
1460                  */
1461                 btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1);
1462
1463                 /* unlikely, but we inc below, so check to be safe */
1464                 if (found_key.offset == (u64)-1)
1465                         goto out;
1466
1467                 /* search_forward needs a path with locks held, do the
1468                  * search again for the original key.  It is possible
1469                  * this will race with a balance and return a path that
1470                  * we could modify, but this drop is just an optimization
1471                  * and is allowed to miss some leaves.
1472                  */
1473                 btrfs_release_path(root, path);
1474                 found_key.offset++;
1475
1476                 /* setup a max key for search_forward */
1477                 other_key.offset = (u64)-1;
1478                 other_key.type = key.type;
1479                 other_key.objectid = key.objectid;
1480
1481                 path->keep_locks = 1;
1482                 ret = btrfs_search_forward(root, &found_key, &other_key,
1483                                            path, 0, 0);
1484                 path->keep_locks = 0;
1485                 if (ret || found_key.objectid != key.objectid ||
1486                     found_key.type != key.type) {
1487                         ret = 0;
1488                         goto out;
1489                 }
1490
1491                 key.offset = found_key.offset;
1492                 btrfs_release_path(root, path);
1493                 cond_resched();
1494                 goto again;
1495         }
1496
1497         /* we know there's one more slot after us in the tree,
1498          * read that key so we can verify it is also a checksum item
1499          */
1500         btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1);
1501
1502         if (found_key.objectid < inode->i_ino)
1503                 goto next_key;
1504
1505         if (found_key.type != key.type || found_key.offset < new_size)
1506                 goto next_key;
1507
1508         /*
1509          * if the key for the next leaf isn't a csum key from this objectid,
1510          * we can't be sure there aren't good items inside this leaf.
1511          * Bail out
1512          */
1513         if (other_key.objectid != inode->i_ino || other_key.type != key.type)
1514                 goto out;
1515
1516         leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]);
1517         leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]);
1518         /*
1519          * it is safe to delete this leaf, it contains only
1520          * csum items from this inode at an offset >= new_size
1521          */
1522         ret = btrfs_del_leaf(trans, root, path, leaf_start);
1523         BUG_ON(ret);
1524
1525         if (root->ref_cows && leaf_gen < trans->transid) {
1526                 ref = btrfs_alloc_leaf_ref(root, 0);
1527                 if (ref) {
1528                         ref->root_gen = root->root_key.offset;
1529                         ref->bytenr = leaf_start;
1530                         ref->owner = 0;
1531                         ref->generation = leaf_gen;
1532                         ref->nritems = 0;
1533
1534                         ret = btrfs_add_leaf_ref(root, ref, 0);
1535                         WARN_ON(ret);
1536                         btrfs_free_leaf_ref(root, ref);
1537                 } else {
1538                         WARN_ON(1);
1539                 }
1540         }
1541 next_key:
1542         btrfs_release_path(root, path);
1543
1544         if (other_key.objectid == inode->i_ino &&
1545             other_key.type == key.type && other_key.offset > key.offset) {
1546                 key.offset = other_key.offset;
1547                 cond_resched();
1548                 goto again;
1549         }
1550         ret = 0;
1551 out:
1552         /* fixup any changes we've made to the path */
1553         path->lowest_level = 0;
1554         path->keep_locks = 0;
1555         btrfs_release_path(root, path);
1556         return ret;
1557 }
1558
1559 /*
1560  * this can truncate away extent items, csum items and directory items.
1561  * It starts at a high offset and removes keys until it can't find
1562  * any higher than new_size
1563  *
1564  * csum items that cross the new i_size are truncated to the new size
1565  * as well.
1566  *
1567  * min_type is the minimum key type to truncate down to.  If set to 0, this
1568  * will kill all the items on this inode, including the INODE_ITEM_KEY.
1569  */
1570 noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
1571                                         struct btrfs_root *root,
1572                                         struct inode *inode,
1573                                         u64 new_size, u32 min_type)
1574 {
1575         int ret;
1576         struct btrfs_path *path;
1577         struct btrfs_key key;
1578         struct btrfs_key found_key;
1579         u32 found_type;
1580         struct extent_buffer *leaf;
1581         struct btrfs_file_extent_item *fi;
1582         u64 extent_start = 0;
1583         u64 extent_num_bytes = 0;
1584         u64 item_end = 0;
1585         u64 root_gen = 0;
1586         u64 root_owner = 0;
1587         int found_extent;
1588         int del_item;
1589         int pending_del_nr = 0;
1590         int pending_del_slot = 0;
1591         int extent_type = -1;
1592         u64 mask = root->sectorsize - 1;
1593
1594         if (root->ref_cows)
1595                 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
1596         path = btrfs_alloc_path();
1597         path->reada = -1;
1598         BUG_ON(!path);
1599
1600         /* FIXME, add redo link to tree so we don't leak on crash */
1601         key.objectid = inode->i_ino;
1602         key.offset = (u64)-1;
1603         key.type = (u8)-1;
1604
1605         btrfs_init_path(path);
1606
1607         ret = drop_csum_leaves(trans, root, path, inode, new_size);
1608         BUG_ON(ret);
1609
1610 search_again:
1611         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1612         if (ret < 0) {
1613                 goto error;
1614         }
1615         if (ret > 0) {
1616                 /* there are no items in the tree for us to truncate, we're
1617                  * done
1618                  */
1619                 if (path->slots[0] == 0) {
1620                         ret = 0;
1621                         goto error;
1622                 }
1623                 path->slots[0]--;
1624         }
1625
1626         while(1) {
1627                 fi = NULL;
1628                 leaf = path->nodes[0];
1629                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1630                 found_type = btrfs_key_type(&found_key);
1631
1632                 if (found_key.objectid != inode->i_ino)
1633                         break;
1634
1635                 if (found_type < min_type)
1636                         break;
1637
1638                 item_end = found_key.offset;
1639                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
1640                         fi = btrfs_item_ptr(leaf, path->slots[0],
1641                                             struct btrfs_file_extent_item);
1642                         extent_type = btrfs_file_extent_type(leaf, fi);
1643                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
1644                                 item_end +=
1645                                     btrfs_file_extent_num_bytes(leaf, fi);
1646                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1647                                 struct btrfs_item *item = btrfs_item_nr(leaf,
1648                                                                 path->slots[0]);
1649                                 item_end += btrfs_file_extent_inline_len(leaf,
1650                                                                          item);
1651                         }
1652                         item_end--;
1653                 }
1654                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
1655                         ret = btrfs_csum_truncate(trans, root, path,
1656                                                   new_size);
1657                         BUG_ON(ret);
1658                 }
1659                 if (item_end < new_size) {
1660                         if (found_type == BTRFS_DIR_ITEM_KEY) {
1661                                 found_type = BTRFS_INODE_ITEM_KEY;
1662                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
1663                                 found_type = BTRFS_CSUM_ITEM_KEY;
1664                         } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
1665                                 found_type = BTRFS_XATTR_ITEM_KEY;
1666                         } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
1667                                 found_type = BTRFS_INODE_REF_KEY;
1668                         } else if (found_type) {
1669                                 found_type--;
1670                         } else {
1671                                 break;
1672                         }
1673                         btrfs_set_key_type(&key, found_type);
1674                         goto next;
1675                 }
1676                 if (found_key.offset >= new_size)
1677                         del_item = 1;
1678                 else
1679                         del_item = 0;
1680                 found_extent = 0;
1681
1682                 /* FIXME, shrink the extent if the ref count is only 1 */
1683                 if (found_type != BTRFS_EXTENT_DATA_KEY)
1684                         goto delete;
1685
1686                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
1687                         u64 num_dec;
1688                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
1689                         if (!del_item) {
1690                                 u64 orig_num_bytes =
1691                                         btrfs_file_extent_num_bytes(leaf, fi);
1692                                 extent_num_bytes = new_size -
1693                                         found_key.offset + root->sectorsize - 1;
1694                                 extent_num_bytes = extent_num_bytes &
1695                                         ~((u64)root->sectorsize - 1);
1696                                 btrfs_set_file_extent_num_bytes(leaf, fi,
1697                                                          extent_num_bytes);
1698                                 num_dec = (orig_num_bytes -
1699                                            extent_num_bytes);
1700                                 if (root->ref_cows && extent_start != 0)
1701                                         inode_sub_bytes(inode, num_dec);
1702                                 btrfs_mark_buffer_dirty(leaf);
1703                         } else {
1704                                 extent_num_bytes =
1705                                         btrfs_file_extent_disk_num_bytes(leaf,
1706                                                                          fi);
1707                                 /* FIXME blocksize != 4096 */
1708                                 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
1709                                 if (extent_start != 0) {
1710                                         found_extent = 1;
1711                                         if (root->ref_cows)
1712                                                 inode_sub_bytes(inode, num_dec);
1713                                 }
1714                                 root_gen = btrfs_header_generation(leaf);
1715                                 root_owner = btrfs_header_owner(leaf);
1716                         }
1717                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1718                         if (!del_item) {
1719                                 u32 size = new_size - found_key.offset;
1720
1721                                 if (root->ref_cows) {
1722                                         inode_sub_bytes(inode, item_end + 1 -
1723                                                         new_size);
1724                                 }
1725                                 size =
1726                                     btrfs_file_extent_calc_inline_size(size);
1727                                 ret = btrfs_truncate_item(trans, root, path,
1728                                                           size, 1);
1729                                 BUG_ON(ret);
1730                         } else if (root->ref_cows) {
1731                                 inode_sub_bytes(inode, item_end + 1 -
1732                                                 found_key.offset);
1733                         }
1734                 }
1735 delete:
1736                 if (del_item) {
1737                         if (!pending_del_nr) {
1738                                 /* no pending yet, add ourselves */
1739                                 pending_del_slot = path->slots[0];
1740                                 pending_del_nr = 1;
1741                         } else if (pending_del_nr &&
1742                                    path->slots[0] + 1 == pending_del_slot) {
1743                                 /* hop on the pending chunk */
1744                                 pending_del_nr++;
1745                                 pending_del_slot = path->slots[0];
1746                         } else {
1747                                 printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot);
1748                         }
1749                 } else {
1750                         break;
1751                 }
1752                 if (found_extent) {
1753                         ret = btrfs_free_extent(trans, root, extent_start,
1754                                                 extent_num_bytes,
1755                                                 leaf->start, root_owner,
1756                                                 root_gen, inode->i_ino, 0);
1757                         BUG_ON(ret);
1758                 }
1759 next:
1760                 if (path->slots[0] == 0) {
1761                         if (pending_del_nr)
1762                                 goto del_pending;
1763                         btrfs_release_path(root, path);
1764                         goto search_again;
1765                 }
1766
1767                 path->slots[0]--;
1768                 if (pending_del_nr &&
1769                     path->slots[0] + 1 != pending_del_slot) {
1770                         struct btrfs_key debug;
1771 del_pending:
1772                         btrfs_item_key_to_cpu(path->nodes[0], &debug,
1773                                               pending_del_slot);
1774                         ret = btrfs_del_items(trans, root, path,
1775                                               pending_del_slot,
1776                                               pending_del_nr);
1777                         BUG_ON(ret);
1778                         pending_del_nr = 0;
1779                         btrfs_release_path(root, path);
1780                         goto search_again;
1781                 }
1782         }
1783         ret = 0;
1784 error:
1785         if (pending_del_nr) {
1786                 ret = btrfs_del_items(trans, root, path, pending_del_slot,
1787                                       pending_del_nr);
1788         }
1789         btrfs_free_path(path);
1790         inode->i_sb->s_dirt = 1;
1791         return ret;
1792 }
1793
1794 /*
1795  * taken from block_truncate_page, but does cow as it zeros out
1796  * any bytes left in the last page in the file.
1797  */
1798 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
1799 {
1800         struct inode *inode = mapping->host;
1801         struct btrfs_root *root = BTRFS_I(inode)->root;
1802         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1803         struct btrfs_ordered_extent *ordered;
1804         char *kaddr;
1805         u32 blocksize = root->sectorsize;
1806         pgoff_t index = from >> PAGE_CACHE_SHIFT;
1807         unsigned offset = from & (PAGE_CACHE_SIZE-1);
1808         struct page *page;
1809         int ret = 0;
1810         u64 page_start;
1811         u64 page_end;
1812
1813         if ((offset & (blocksize - 1)) == 0)
1814                 goto out;
1815
1816         ret = -ENOMEM;
1817 again:
1818         page = grab_cache_page(mapping, index);
1819         if (!page)
1820                 goto out;
1821
1822         page_start = page_offset(page);
1823         page_end = page_start + PAGE_CACHE_SIZE - 1;
1824
1825         if (!PageUptodate(page)) {
1826                 ret = btrfs_readpage(NULL, page);
1827                 lock_page(page);
1828                 if (page->mapping != mapping) {
1829                         unlock_page(page);
1830                         page_cache_release(page);
1831                         goto again;
1832                 }
1833                 if (!PageUptodate(page)) {
1834                         ret = -EIO;
1835                         goto out_unlock;
1836                 }
1837         }
1838         wait_on_page_writeback(page);
1839
1840         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
1841         set_page_extent_mapped(page);
1842
1843         ordered = btrfs_lookup_ordered_extent(inode, page_start);
1844         if (ordered) {
1845                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1846                 unlock_page(page);
1847                 page_cache_release(page);
1848                 btrfs_start_ordered_extent(inode, ordered, 1);
1849                 btrfs_put_ordered_extent(ordered);
1850                 goto again;
1851         }
1852
1853         btrfs_set_extent_delalloc(inode, page_start, page_end);
1854         ret = 0;
1855         if (offset != PAGE_CACHE_SIZE) {
1856                 kaddr = kmap(page);
1857                 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1858                 flush_dcache_page(page);
1859                 kunmap(page);
1860         }
1861         ClearPageChecked(page);
1862         set_page_dirty(page);
1863         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1864
1865 out_unlock:
1866         unlock_page(page);
1867         page_cache_release(page);
1868 out:
1869         return ret;
1870 }
1871
1872 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1873 {
1874         struct inode *inode = dentry->d_inode;
1875         int err;
1876
1877         err = inode_change_ok(inode, attr);
1878         if (err)
1879                 return err;
1880
1881         if (S_ISREG(inode->i_mode) &&
1882             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
1883                 struct btrfs_trans_handle *trans;
1884                 struct btrfs_root *root = BTRFS_I(inode)->root;
1885                 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1886
1887                 u64 mask = root->sectorsize - 1;
1888                 u64 hole_start = (inode->i_size + mask) & ~mask;
1889                 u64 block_end = (attr->ia_size + mask) & ~mask;
1890                 u64 hole_size;
1891                 u64 alloc_hint = 0;
1892
1893                 if (attr->ia_size <= hole_start)
1894                         goto out;
1895
1896                 err = btrfs_check_free_space(root, 1, 0);
1897                 if (err)
1898                         goto fail;
1899
1900                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1901
1902                 hole_size = block_end - hole_start;
1903                 while(1) {
1904                         struct btrfs_ordered_extent *ordered;
1905                         btrfs_wait_ordered_range(inode, hole_start, hole_size);
1906
1907                         lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1908                         ordered = btrfs_lookup_ordered_extent(inode, hole_start);
1909                         if (ordered) {
1910                                 unlock_extent(io_tree, hole_start,
1911                                               block_end - 1, GFP_NOFS);
1912                                 btrfs_put_ordered_extent(ordered);
1913                         } else {
1914                                 break;
1915                         }
1916                 }
1917
1918                 trans = btrfs_start_transaction(root, 1);
1919                 btrfs_set_trans_block_group(trans, inode);
1920                 mutex_lock(&BTRFS_I(inode)->extent_mutex);
1921                 err = btrfs_drop_extents(trans, root, inode,
1922                                          hole_start, block_end, hole_start,
1923                                          &alloc_hint);
1924
1925                 if (alloc_hint != EXTENT_MAP_INLINE) {
1926                         err = btrfs_insert_file_extent(trans, root,
1927                                                        inode->i_ino,
1928                                                        hole_start, 0, 0,
1929                                                        hole_size, 0);
1930                         btrfs_drop_extent_cache(inode, hole_start,
1931                                                 (u64)-1, 0);
1932                         btrfs_check_file(root, inode);
1933                 }
1934                 mutex_unlock(&BTRFS_I(inode)->extent_mutex);
1935                 btrfs_end_transaction(trans, root);
1936                 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1937                 if (err)
1938                         return err;
1939         }
1940 out:
1941         err = inode_setattr(inode, attr);
1942
1943         if (!err && ((attr->ia_valid & ATTR_MODE)))
1944                 err = btrfs_acl_chmod(inode);
1945 fail:
1946         return err;
1947 }
1948
1949 void btrfs_delete_inode(struct inode *inode)
1950 {
1951         struct btrfs_trans_handle *trans;
1952         struct btrfs_root *root = BTRFS_I(inode)->root;
1953         unsigned long nr;
1954         int ret;
1955
1956         truncate_inode_pages(&inode->i_data, 0);
1957         if (is_bad_inode(inode)) {
1958                 btrfs_orphan_del(NULL, inode);
1959                 goto no_delete;
1960         }
1961         btrfs_wait_ordered_range(inode, 0, (u64)-1);
1962
1963         btrfs_i_size_write(inode, 0);
1964         trans = btrfs_start_transaction(root, 1);
1965
1966         btrfs_set_trans_block_group(trans, inode);
1967         ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0);
1968         if (ret) {
1969                 btrfs_orphan_del(NULL, inode);
1970                 goto no_delete_lock;
1971         }
1972
1973         btrfs_orphan_del(trans, inode);
1974
1975         nr = trans->blocks_used;
1976         clear_inode(inode);
1977
1978         btrfs_end_transaction(trans, root);
1979         btrfs_btree_balance_dirty(root, nr);
1980         return;
1981
1982 no_delete_lock:
1983         nr = trans->blocks_used;
1984         btrfs_end_transaction(trans, root);
1985         btrfs_btree_balance_dirty(root, nr);
1986 no_delete:
1987         clear_inode(inode);
1988 }
1989
1990 /*
1991  * this returns the key found in the dir entry in the location pointer.
1992  * If no dir entries were found, location->objectid is 0.
1993  */
1994 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1995                                struct btrfs_key *location)
1996 {
1997         const char *name = dentry->d_name.name;
1998         int namelen = dentry->d_name.len;
1999         struct btrfs_dir_item *di;
2000         struct btrfs_path *path;
2001         struct btrfs_root *root = BTRFS_I(dir)->root;
2002         int ret = 0;
2003
2004         path = btrfs_alloc_path();
2005         BUG_ON(!path);
2006
2007         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
2008                                     namelen, 0);
2009         if (IS_ERR(di))
2010                 ret = PTR_ERR(di);
2011         if (!di || IS_ERR(di)) {
2012                 goto out_err;
2013         }
2014         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
2015 out:
2016         btrfs_free_path(path);
2017         return ret;
2018 out_err:
2019         location->objectid = 0;
2020         goto out;
2021 }
2022
2023 /*
2024  * when we hit a tree root in a directory, the btrfs part of the inode
2025  * needs to be changed to reflect the root directory of the tree root.  This
2026  * is kind of like crossing a mount point.
2027  */
2028 static int fixup_tree_root_location(struct btrfs_root *root,
2029                              struct btrfs_key *location,
2030                              struct btrfs_root **sub_root,
2031                              struct dentry *dentry)
2032 {
2033         struct btrfs_root_item *ri;
2034
2035         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
2036                 return 0;
2037         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
2038                 return 0;
2039
2040         *sub_root = btrfs_read_fs_root(root->fs_info, location,
2041                                         dentry->d_name.name,
2042                                         dentry->d_name.len);
2043         if (IS_ERR(*sub_root))
2044                 return PTR_ERR(*sub_root);
2045
2046         ri = &(*sub_root)->root_item;
2047         location->objectid = btrfs_root_dirid(ri);
2048         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
2049         location->offset = 0;
2050
2051         return 0;
2052 }
2053
2054 static noinline void init_btrfs_i(struct inode *inode)
2055 {
2056         struct btrfs_inode *bi = BTRFS_I(inode);
2057
2058         bi->i_acl = NULL;
2059         bi->i_default_acl = NULL;
2060
2061         bi->generation = 0;
2062         bi->last_trans = 0;
2063         bi->logged_trans = 0;
2064         bi->delalloc_bytes = 0;
2065         bi->disk_i_size = 0;
2066         bi->flags = 0;
2067         bi->index_cnt = (u64)-1;
2068         bi->log_dirty_trans = 0;
2069         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
2070         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
2071                              inode->i_mapping, GFP_NOFS);
2072         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
2073                              inode->i_mapping, GFP_NOFS);
2074         INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
2075         btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
2076         mutex_init(&BTRFS_I(inode)->csum_mutex);
2077         mutex_init(&BTRFS_I(inode)->extent_mutex);
2078         mutex_init(&BTRFS_I(inode)->log_mutex);
2079 }
2080
2081 static int btrfs_init_locked_inode(struct inode *inode, void *p)
2082 {
2083         struct btrfs_iget_args *args = p;
2084         inode->i_ino = args->ino;
2085         init_btrfs_i(inode);
2086         BTRFS_I(inode)->root = args->root;
2087         return 0;
2088 }
2089
2090 static int btrfs_find_actor(struct inode *inode, void *opaque)
2091 {
2092         struct btrfs_iget_args *args = opaque;
2093         return (args->ino == inode->i_ino &&
2094                 args->root == BTRFS_I(inode)->root);
2095 }
2096
2097 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
2098                             struct btrfs_root *root, int wait)
2099 {
2100         struct inode *inode;
2101         struct btrfs_iget_args args;
2102         args.ino = objectid;
2103         args.root = root;
2104
2105         if (wait) {
2106                 inode = ilookup5(s, objectid, btrfs_find_actor,
2107                                  (void *)&args);
2108         } else {
2109                 inode = ilookup5_nowait(s, objectid, btrfs_find_actor,
2110                                         (void *)&args);
2111         }
2112         return inode;
2113 }
2114
2115 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
2116                                 struct btrfs_root *root)
2117 {
2118         struct inode *inode;
2119         struct btrfs_iget_args args;
2120         args.ino = objectid;
2121         args.root = root;
2122
2123         inode = iget5_locked(s, objectid, btrfs_find_actor,
2124                              btrfs_init_locked_inode,
2125                              (void *)&args);
2126         return inode;
2127 }
2128
2129 /* Get an inode object given its location and corresponding root.
2130  * Returns in *is_new if the inode was read from disk
2131  */
2132 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
2133                          struct btrfs_root *root, int *is_new)
2134 {
2135         struct inode *inode;
2136
2137         inode = btrfs_iget_locked(s, location->objectid, root);
2138         if (!inode)
2139                 return ERR_PTR(-EACCES);
2140
2141         if (inode->i_state & I_NEW) {
2142                 BTRFS_I(inode)->root = root;
2143                 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
2144                 btrfs_read_locked_inode(inode);
2145                 unlock_new_inode(inode);
2146                 if (is_new)
2147                         *is_new = 1;
2148         } else {
2149                 if (is_new)
2150                         *is_new = 0;
2151         }
2152
2153         return inode;
2154 }
2155
2156 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
2157                                    struct nameidata *nd)
2158 {
2159         struct inode * inode;
2160         struct btrfs_inode *bi = BTRFS_I(dir);
2161         struct btrfs_root *root = bi->root;
2162         struct btrfs_root *sub_root = root;
2163         struct btrfs_key location;
2164         int ret, new, do_orphan = 0;
2165
2166         if (dentry->d_name.len > BTRFS_NAME_LEN)
2167                 return ERR_PTR(-ENAMETOOLONG);
2168
2169         ret = btrfs_inode_by_name(dir, dentry, &location);
2170
2171         if (ret < 0)
2172                 return ERR_PTR(ret);
2173
2174         inode = NULL;
2175         if (location.objectid) {
2176                 ret = fixup_tree_root_location(root, &location, &sub_root,
2177                                                 dentry);
2178                 if (ret < 0)
2179                         return ERR_PTR(ret);
2180                 if (ret > 0)
2181                         return ERR_PTR(-ENOENT);
2182                 inode = btrfs_iget(dir->i_sb, &location, sub_root, &new);
2183                 if (IS_ERR(inode))
2184                         return ERR_CAST(inode);
2185
2186                 /* the inode and parent dir are two different roots */
2187                 if (new && root != sub_root) {
2188                         igrab(inode);
2189                         sub_root->inode = inode;
2190                         do_orphan = 1;
2191                 }
2192         }
2193
2194         if (unlikely(do_orphan))
2195                 btrfs_orphan_cleanup(sub_root);
2196
2197         return d_splice_alias(inode, dentry);
2198 }
2199
2200 static unsigned char btrfs_filetype_table[] = {
2201         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
2202 };
2203
2204 static int btrfs_real_readdir(struct file *filp, void *dirent,
2205                               filldir_t filldir)
2206 {
2207         struct inode *inode = filp->f_dentry->d_inode;
2208         struct btrfs_root *root = BTRFS_I(inode)->root;
2209         struct btrfs_item *item;
2210         struct btrfs_dir_item *di;
2211         struct btrfs_key key;
2212         struct btrfs_key found_key;
2213         struct btrfs_path *path;
2214         int ret;
2215         u32 nritems;
2216         struct extent_buffer *leaf;
2217         int slot;
2218         int advance;
2219         unsigned char d_type;
2220         int over = 0;
2221         u32 di_cur;
2222         u32 di_total;
2223         u32 di_len;
2224         int key_type = BTRFS_DIR_INDEX_KEY;
2225         char tmp_name[32];
2226         char *name_ptr;
2227         int name_len;
2228
2229         /* FIXME, use a real flag for deciding about the key type */
2230         if (root->fs_info->tree_root == root)
2231                 key_type = BTRFS_DIR_ITEM_KEY;
2232
2233         /* special case for "." */
2234         if (filp->f_pos == 0) {
2235                 over = filldir(dirent, ".", 1,
2236                                1, inode->i_ino,
2237                                DT_DIR);
2238                 if (over)
2239                         return 0;
2240                 filp->f_pos = 1;
2241         }
2242         /* special case for .., just use the back ref */
2243         if (filp->f_pos == 1) {
2244                 u64 pino = parent_ino(filp->f_path.dentry);
2245                 over = filldir(dirent, "..", 2,
2246                                2, pino, DT_DIR);
2247                 if (over)
2248                         return 0;
2249                 filp->f_pos = 2;
2250         }
2251
2252         path = btrfs_alloc_path();
2253         path->reada = 2;
2254
2255         btrfs_set_key_type(&key, key_type);
2256         key.offset = filp->f_pos;
2257         key.objectid = inode->i_ino;
2258
2259         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2260         if (ret < 0)
2261                 goto err;
2262         advance = 0;
2263
2264         while (1) {
2265                 leaf = path->nodes[0];
2266                 nritems = btrfs_header_nritems(leaf);
2267                 slot = path->slots[0];
2268                 if (advance || slot >= nritems) {
2269                         if (slot >= nritems - 1) {
2270                                 ret = btrfs_next_leaf(root, path);
2271                                 if (ret)
2272                                         break;
2273                                 leaf = path->nodes[0];
2274                                 nritems = btrfs_header_nritems(leaf);
2275                                 slot = path->slots[0];
2276                         } else {
2277                                 slot++;
2278                                 path->slots[0]++;
2279                         }
2280                 }
2281                 advance = 1;
2282                 item = btrfs_item_nr(leaf, slot);
2283                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2284
2285                 if (found_key.objectid != key.objectid)
2286                         break;
2287                 if (btrfs_key_type(&found_key) != key_type)
2288                         break;
2289                 if (found_key.offset < filp->f_pos)
2290                         continue;
2291
2292                 filp->f_pos = found_key.offset;
2293
2294                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
2295                 di_cur = 0;
2296                 di_total = btrfs_item_size(leaf, item);
2297
2298                 while (di_cur < di_total) {
2299                         struct btrfs_key location;
2300
2301                         name_len = btrfs_dir_name_len(leaf, di);
2302                         if (name_len <= sizeof(tmp_name)) {
2303                                 name_ptr = tmp_name;
2304                         } else {
2305                                 name_ptr = kmalloc(name_len, GFP_NOFS);
2306                                 if (!name_ptr) {
2307                                         ret = -ENOMEM;
2308                                         goto err;
2309                                 }
2310                         }
2311                         read_extent_buffer(leaf, name_ptr,
2312                                            (unsigned long)(di + 1), name_len);
2313
2314                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
2315                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
2316                         over = filldir(dirent, name_ptr, name_len,
2317                                        found_key.offset, location.objectid,
2318                                        d_type);
2319
2320                         if (name_ptr != tmp_name)
2321                                 kfree(name_ptr);
2322
2323                         if (over)
2324                                 goto nopos;
2325
2326                         di_len = btrfs_dir_name_len(leaf, di) +
2327                                  btrfs_dir_data_len(leaf, di) + sizeof(*di);
2328                         di_cur += di_len;
2329                         di = (struct btrfs_dir_item *)((char *)di + di_len);
2330                 }
2331         }
2332
2333         /* Reached end of directory/root. Bump pos past the last item. */
2334         if (key_type == BTRFS_DIR_INDEX_KEY)
2335                 filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
2336         else
2337                 filp->f_pos++;
2338 nopos:
2339         ret = 0;
2340 err:
2341         btrfs_free_path(path);
2342         return ret;
2343 }
2344
2345 int btrfs_write_inode(struct inode *inode, int wait)
2346 {
2347         struct btrfs_root *root = BTRFS_I(inode)->root;
2348         struct btrfs_trans_handle *trans;
2349         int ret = 0;
2350
2351         if (root->fs_info->closing > 1)
2352                 return 0;
2353
2354         if (wait) {
2355                 trans = btrfs_join_transaction(root, 1);
2356                 btrfs_set_trans_block_group(trans, inode);
2357                 ret = btrfs_commit_transaction(trans, root);
2358         }
2359         return ret;
2360 }
2361
2362 /*
2363  * This is somewhat expensive, updating the tree every time the
2364  * inode changes.  But, it is most likely to find the inode in cache.
2365  * FIXME, needs more benchmarking...there are no reasons other than performance
2366  * to keep or drop this code.
2367  */
2368 void btrfs_dirty_inode(struct inode *inode)
2369 {
2370         struct btrfs_root *root = BTRFS_I(inode)->root;
2371         struct btrfs_trans_handle *trans;
2372
2373         trans = btrfs_join_transaction(root, 1);
2374         btrfs_set_trans_block_group(trans, inode);
2375         btrfs_update_inode(trans, root, inode);
2376         btrfs_end_transaction(trans, root);
2377 }
2378
2379 /*
2380  * find the highest existing sequence number in a directory
2381  * and then set the in-memory index_cnt variable to reflect
2382  * free sequence numbers
2383  */
2384 static int btrfs_set_inode_index_count(struct inode *inode)
2385 {
2386         struct btrfs_root *root = BTRFS_I(inode)->root;
2387         struct btrfs_key key, found_key;
2388         struct btrfs_path *path;
2389         struct extent_buffer *leaf;
2390         int ret;
2391
2392         key.objectid = inode->i_ino;
2393         btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
2394         key.offset = (u64)-1;
2395
2396         path = btrfs_alloc_path();
2397         if (!path)
2398                 return -ENOMEM;
2399
2400         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2401         if (ret < 0)
2402                 goto out;
2403         /* FIXME: we should be able to handle this */
2404         if (ret == 0)
2405                 goto out;
2406         ret = 0;
2407
2408         /*
2409          * MAGIC NUMBER EXPLANATION:
2410          * since we search a directory based on f_pos we have to start at 2
2411          * since '.' and '..' have f_pos of 0 and 1 respectively, so everybody
2412          * else has to start at 2
2413          */
2414         if (path->slots[0] == 0) {
2415                 BTRFS_I(inode)->index_cnt = 2;
2416                 goto out;
2417         }
2418
2419         path->slots[0]--;
2420
2421         leaf = path->nodes[0];
2422         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2423
2424         if (found_key.objectid != inode->i_ino ||
2425             btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) {
2426                 BTRFS_I(inode)->index_cnt = 2;
2427                 goto out;
2428         }
2429
2430         BTRFS_I(inode)->index_cnt = found_key.offset + 1;
2431 out:
2432         btrfs_free_path(path);
2433         return ret;
2434 }
2435
2436 /*
2437  * helper to find a free sequence number in a given directory.  This current
2438  * code is very simple, later versions will do smarter things in the btree
2439  */
2440 static int btrfs_set_inode_index(struct inode *dir, struct inode *inode,
2441                                  u64 *index)
2442 {
2443         int ret = 0;
2444
2445         if (BTRFS_I(dir)->index_cnt == (u64)-1) {
2446                 ret = btrfs_set_inode_index_count(dir);
2447                 if (ret) {
2448                         return ret;
2449                 }
2450         }
2451
2452         *index = BTRFS_I(dir)->index_cnt;
2453         BTRFS_I(dir)->index_cnt++;
2454
2455         return ret;
2456 }
2457
2458 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
2459                                      struct btrfs_root *root,
2460                                      struct inode *dir,
2461                                      const char *name, int name_len,
2462                                      u64 ref_objectid,
2463                                      u64 objectid,
2464                                      struct btrfs_block_group_cache *group,
2465                                      int mode, u64 *index)
2466 {
2467         struct inode *inode;
2468         struct btrfs_inode_item *inode_item;
2469         struct btrfs_block_group_cache *new_inode_group;
2470         struct btrfs_key *location;
2471         struct btrfs_path *path;
2472         struct btrfs_inode_ref *ref;
2473         struct btrfs_key key[2];
2474         u32 sizes[2];
2475         unsigned long ptr;
2476         int ret;
2477         int owner;
2478
2479         path = btrfs_alloc_path();
2480         BUG_ON(!path);
2481
2482         inode = new_inode(root->fs_info->sb);
2483         if (!inode)
2484                 return ERR_PTR(-ENOMEM);
2485
2486         if (dir) {
2487                 ret = btrfs_set_inode_index(dir, inode, index);
2488                 if (ret)
2489                         return ERR_PTR(ret);
2490         }
2491         /*
2492          * index_cnt is ignored for everything but a dir,
2493          * btrfs_get_inode_index_count has an explanation for the magic
2494          * number
2495          */
2496         init_btrfs_i(inode);
2497         BTRFS_I(inode)->index_cnt = 2;
2498         BTRFS_I(inode)->root = root;
2499         BTRFS_I(inode)->generation = trans->transid;
2500
2501         if (mode & S_IFDIR)
2502                 owner = 0;
2503         else
2504                 owner = 1;
2505         new_inode_group = btrfs_find_block_group(root, group, 0,
2506                                        BTRFS_BLOCK_GROUP_METADATA, owner);
2507         if (!new_inode_group) {
2508                 printk("find_block group failed\n");
2509                 new_inode_group = group;
2510         }
2511         BTRFS_I(inode)->block_group = new_inode_group;
2512
2513         key[0].objectid = objectid;
2514         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
2515         key[0].offset = 0;
2516
2517         key[1].objectid = objectid;
2518         btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
2519         key[1].offset = ref_objectid;
2520
2521         sizes[0] = sizeof(struct btrfs_inode_item);
2522         sizes[1] = name_len + sizeof(*ref);
2523
2524         ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
2525         if (ret != 0)
2526                 goto fail;
2527
2528         if (objectid > root->highest_inode)
2529                 root->highest_inode = objectid;
2530
2531         inode->i_uid = current->fsuid;
2532         inode->i_gid = current->fsgid;
2533         inode->i_mode = mode;
2534         inode->i_ino = objectid;
2535         inode_set_bytes(inode, 0);
2536         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2537         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2538                                   struct btrfs_inode_item);
2539         fill_inode_item(trans, path->nodes[0], inode_item, inode);
2540
2541         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2542                              struct btrfs_inode_ref);
2543         btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
2544         btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
2545         ptr = (unsigned long)(ref + 1);
2546         write_extent_buffer(path->nodes[0], name, ptr, name_len);
2547
2548         btrfs_mark_buffer_dirty(path->nodes[0]);
2549         btrfs_free_path(path);
2550
2551         location = &BTRFS_I(inode)->location;
2552         location->objectid = objectid;
2553         location->offset = 0;
2554         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
2555
2556         insert_inode_hash(inode);
2557         return inode;
2558 fail:
2559         if (dir)
2560                 BTRFS_I(dir)->index_cnt--;
2561         btrfs_free_path(path);
2562         return ERR_PTR(ret);
2563 }
2564
2565 static inline u8 btrfs_inode_type(struct inode *inode)
2566 {
2567         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
2568 }
2569
2570 /*
2571  * utility function to add 'inode' into 'parent_inode' with
2572  * a give name and a given sequence number.
2573  * if 'add_backref' is true, also insert a backref from the
2574  * inode to the parent directory.
2575  */
2576 int btrfs_add_link(struct btrfs_trans_handle *trans,
2577                    struct inode *parent_inode, struct inode *inode,
2578                    const char *name, int name_len, int add_backref, u64 index)
2579 {
2580         int ret;
2581         struct btrfs_key key;
2582         struct btrfs_root *root = BTRFS_I(parent_inode)->root;
2583
2584         key.objectid = inode->i_ino;
2585         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
2586         key.offset = 0;
2587
2588         ret = btrfs_insert_dir_item(trans, root, name, name_len,
2589                                     parent_inode->i_ino,
2590                                     &key, btrfs_inode_type(inode),
2591                                     index);
2592         if (ret == 0) {
2593                 if (add_backref) {
2594                         ret = btrfs_insert_inode_ref(trans, root,
2595                                                      name, name_len,
2596                                                      inode->i_ino,
2597                                                      parent_inode->i_ino,
2598                                                      index);
2599                 }
2600                 btrfs_i_size_write(parent_inode, parent_inode->i_size +
2601                                    name_len * 2);
2602                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
2603                 ret = btrfs_update_inode(trans, root, parent_inode);
2604         }
2605         return ret;
2606 }
2607
2608 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
2609                             struct dentry *dentry, struct inode *inode,
2610                             int backref, u64 index)
2611 {
2612         int err = btrfs_add_link(trans, dentry->d_parent->d_inode,
2613                                  inode, dentry->d_name.name,
2614                                  dentry->d_name.len, backref, index);
2615         if (!err) {
2616                 d_instantiate(dentry, inode);
2617                 return 0;
2618         }
2619         if (err > 0)
2620                 err = -EEXIST;
2621         return err;
2622 }
2623
2624 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
2625                         int mode, dev_t rdev)
2626 {
2627         struct btrfs_trans_handle *trans;
2628         struct btrfs_root *root = BTRFS_I(dir)->root;
2629         struct inode *inode = NULL;
2630         int err;
2631         int drop_inode = 0;
2632         u64 objectid;
2633         unsigned long nr = 0;
2634         u64 index = 0;
2635
2636         if (!new_valid_dev(rdev))
2637                 return -EINVAL;
2638
2639         err = btrfs_check_free_space(root, 1, 0);
2640         if (err)
2641                 goto fail;
2642
2643         trans = btrfs_start_transaction(root, 1);
2644         btrfs_set_trans_block_group(trans, dir);
2645
2646         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2647         if (err) {
2648                 err = -ENOSPC;
2649                 goto out_unlock;
2650         }
2651
2652         inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
2653                                 dentry->d_name.len,
2654                                 dentry->d_parent->d_inode->i_ino, objectid,
2655                                 BTRFS_I(dir)->block_group, mode, &index);
2656         err = PTR_ERR(inode);
2657         if (IS_ERR(inode))
2658                 goto out_unlock;
2659
2660         err = btrfs_init_acl(inode, dir);
2661         if (err) {
2662                 drop_inode = 1;
2663                 goto out_unlock;
2664         }
2665
2666         btrfs_set_trans_block_group(trans, inode);
2667         err = btrfs_add_nondir(trans, dentry, inode, 0, index);
2668         if (err)
2669                 drop_inode = 1;
2670         else {
2671                 inode->i_op = &btrfs_special_inode_operations;
2672                 init_special_inode(inode, inode->i_mode, rdev);
2673                 btrfs_update_inode(trans, root, inode);
2674         }
2675         dir->i_sb->s_dirt = 1;
2676         btrfs_update_inode_block_group(trans, inode);
2677         btrfs_update_inode_block_group(trans, dir);
2678 out_unlock:
2679         nr = trans->blocks_used;
2680         btrfs_end_transaction_throttle(trans, root);
2681 fail:
2682         if (drop_inode) {
2683                 inode_dec_link_count(inode);
2684                 iput(inode);
2685         }
2686         btrfs_btree_balance_dirty(root, nr);
2687         return err;
2688 }
2689
2690 static int btrfs_create(struct inode *dir, struct dentry *dentry,
2691                         int mode, struct nameidata *nd)
2692 {
2693         struct btrfs_trans_handle *trans;
2694         struct btrfs_root *root = BTRFS_I(dir)->root;
2695         struct inode *inode = NULL;
2696         int err;
2697         int drop_inode = 0;
2698         unsigned long nr = 0;
2699         u64 objectid;
2700         u64 index = 0;
2701
2702         err = btrfs_check_free_space(root, 1, 0);
2703         if (err)
2704                 goto fail;
2705         trans = btrfs_start_transaction(root, 1);
2706         btrfs_set_trans_block_group(trans, dir);
2707
2708         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2709         if (err) {
2710                 err = -ENOSPC;
2711                 goto out_unlock;
2712         }
2713
2714         inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
2715                                 dentry->d_name.len,
2716                                 dentry->d_parent->d_inode->i_ino,
2717                                 objectid, BTRFS_I(dir)->block_group, mode,
2718                                 &index);
2719         err = PTR_ERR(inode);
2720         if (IS_ERR(inode))
2721                 goto out_unlock;
2722
2723         err = btrfs_init_acl(inode, dir);
2724         if (err) {
2725                 drop_inode = 1;
2726                 goto out_unlock;
2727         }
2728
2729         btrfs_set_trans_block_group(trans, inode);
2730         err = btrfs_add_nondir(trans, dentry, inode, 0, index);
2731         if (err)
2732                 drop_inode = 1;
2733         else {
2734                 inode->i_mapping->a_ops = &btrfs_aops;
2735                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
2736                 inode->i_fop = &btrfs_file_operations;
2737                 inode->i_op = &btrfs_file_inode_operations;
2738                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
2739         }
2740         dir->i_sb->s_dirt = 1;
2741         btrfs_update_inode_block_group(trans, inode);
2742         btrfs_update_inode_block_group(trans, dir);
2743 out_unlock:
2744         nr = trans->blocks_used;
2745         btrfs_end_transaction_throttle(trans, root);
2746 fail:
2747         if (drop_inode) {
2748                 inode_dec_link_count(inode);
2749                 iput(inode);
2750         }
2751         btrfs_btree_balance_dirty(root, nr);
2752         return err;
2753 }
2754
2755 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
2756                       struct dentry *dentry)
2757 {
2758         struct btrfs_trans_handle *trans;
2759         struct btrfs_root *root = BTRFS_I(dir)->root;
2760         struct inode *inode = old_dentry->d_inode;
2761         u64 index;
2762         unsigned long nr = 0;
2763         int err;
2764         int drop_inode = 0;
2765
2766         if (inode->i_nlink == 0)
2767                 return -ENOENT;
2768
2769         btrfs_inc_nlink(inode);
2770         err = btrfs_check_free_space(root, 1, 0);
2771         if (err)
2772                 goto fail;
2773         err = btrfs_set_inode_index(dir, inode, &index);
2774         if (err)
2775                 goto fail;
2776
2777         trans = btrfs_start_transaction(root, 1);
2778
2779         btrfs_set_trans_block_group(trans, dir);
2780         atomic_inc(&inode->i_count);
2781
2782         err = btrfs_add_nondir(trans, dentry, inode, 1, index);
2783
2784         if (err)
2785                 drop_inode = 1;
2786
2787         dir->i_sb->s_dirt = 1;
2788         btrfs_update_inode_block_group(trans, dir);
2789         err = btrfs_update_inode(trans, root, inode);
2790
2791         if (err)
2792                 drop_inode = 1;
2793
2794         nr = trans->blocks_used;
2795         btrfs_end_transaction_throttle(trans, root);
2796 fail:
2797         if (drop_inode) {
2798                 inode_dec_link_count(inode);
2799                 iput(inode);
2800         }
2801         btrfs_btree_balance_dirty(root, nr);
2802         return err;
2803 }
2804
2805 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2806 {
2807         struct inode *inode = NULL;
2808         struct btrfs_trans_handle *trans;
2809         struct btrfs_root *root = BTRFS_I(dir)->root;
2810         int err = 0;
2811         int drop_on_err = 0;
2812         u64 objectid = 0;
2813         u64 index = 0;
2814         unsigned long nr = 1;
2815
2816         err = btrfs_check_free_space(root, 1, 0);
2817         if (err)
2818                 goto out_unlock;
2819
2820         trans = btrfs_start_transaction(root, 1);
2821         btrfs_set_trans_block_group(trans, dir);
2822
2823         if (IS_ERR(trans)) {
2824                 err = PTR_ERR(trans);
2825                 goto out_unlock;
2826         }
2827
2828         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2829         if (err) {
2830                 err = -ENOSPC;
2831                 goto out_unlock;
2832         }
2833
2834         inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
2835                                 dentry->d_name.len,
2836                                 dentry->d_parent->d_inode->i_ino, objectid,
2837                                 BTRFS_I(dir)->block_group, S_IFDIR | mode,
2838                                 &index);
2839         if (IS_ERR(inode)) {
2840                 err = PTR_ERR(inode);
2841                 goto out_fail;
2842         }
2843
2844         drop_on_err = 1;
2845
2846         err = btrfs_init_acl(inode, dir);
2847         if (err)
2848                 goto out_fail;
2849
2850         inode->i_op = &btrfs_dir_inode_operations;
2851         inode->i_fop = &btrfs_dir_file_operations;
2852         btrfs_set_trans_block_group(trans, inode);
2853
2854         btrfs_i_size_write(inode, 0);
2855         err = btrfs_update_inode(trans, root, inode);
2856         if (err)
2857                 goto out_fail;
2858
2859         err = btrfs_add_link(trans, dentry->d_parent->d_inode,
2860                                  inode, dentry->d_name.name,
2861                                  dentry->d_name.len, 0, index);
2862         if (err)
2863                 goto out_fail;
2864
2865         d_instantiate(dentry, inode);
2866         drop_on_err = 0;
2867         dir->i_sb->s_dirt = 1;
2868         btrfs_update_inode_block_group(trans, inode);
2869         btrfs_update_inode_block_group(trans, dir);
2870
2871 out_fail:
2872         nr = trans->blocks_used;
2873         btrfs_end_transaction_throttle(trans, root);
2874
2875 out_unlock:
2876         if (drop_on_err)
2877                 iput(inode);
2878         btrfs_btree_balance_dirty(root, nr);
2879         return err;
2880 }
2881
2882 /* helper for btfs_get_extent.  Given an existing extent in the tree,
2883  * and an extent that you want to insert, deal with overlap and insert
2884  * the new extent into the tree.
2885  */
2886 static int merge_extent_mapping(struct extent_map_tree *em_tree,
2887                                 struct extent_map *existing,
2888                                 struct extent_map *em,
2889                                 u64 map_start, u64 map_len)
2890 {
2891         u64 start_diff;
2892
2893         BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
2894         start_diff = map_start - em->start;
2895         em->start = map_start;
2896         em->len = map_len;
2897         if (em->block_start < EXTENT_MAP_LAST_BYTE)
2898                 em->block_start += start_diff;
2899         return add_extent_mapping(em_tree, em);
2900 }
2901
2902 /*
2903  * a bit scary, this does extent mapping from logical file offset to the disk.
2904  * the ugly parts come from merging extents from the disk with the
2905  * in-ram representation.  This gets more complex because of the data=ordered code,
2906  * where the in-ram extents might be locked pending data=ordered completion.
2907  *
2908  * This also copies inline extents directly into the page.
2909  */
2910 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2911                                     size_t pg_offset, u64 start, u64 len,
2912                                     int create)
2913 {
2914         int ret;
2915         int err = 0;
2916         u64 bytenr;
2917         u64 extent_start = 0;
2918         u64 extent_end = 0;
2919         u64 objectid = inode->i_ino;
2920         u32 found_type;
2921         struct btrfs_path *path = NULL;
2922         struct btrfs_root *root = BTRFS_I(inode)->root;
2923         struct btrfs_file_extent_item *item;
2924         struct extent_buffer *leaf;
2925         struct btrfs_key found_key;
2926         struct extent_map *em = NULL;
2927         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2928         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2929         struct btrfs_trans_handle *trans = NULL;
2930
2931 again:
2932         spin_lock(&em_tree->lock);
2933         em = lookup_extent_mapping(em_tree, start, len);
2934         if (em)
2935                 em->bdev = root->fs_info->fs_devices->latest_bdev;
2936         spin_unlock(&em_tree->lock);
2937
2938         if (em) {
2939                 if (em->start > start || em->start + em->len <= start)
2940                         free_extent_map(em);
2941                 else if (em->block_start == EXTENT_MAP_INLINE && page)
2942                         free_extent_map(em);
2943                 else
2944                         goto out;
2945         }
2946         em = alloc_extent_map(GFP_NOFS);
2947         if (!em) {
2948                 err = -ENOMEM;
2949                 goto out;
2950         }
2951         em->bdev = root->fs_info->fs_devices->latest_bdev;
2952         em->start = EXTENT_MAP_HOLE;
2953         em->len = (u64)-1;
2954
2955         if (!path) {
2956                 path = btrfs_alloc_path();
2957                 BUG_ON(!path);
2958         }
2959
2960         ret = btrfs_lookup_file_extent(trans, root, path,
2961                                        objectid, start, trans != NULL);
2962         if (ret < 0) {
2963                 err = ret;
2964                 goto out;
2965         }
2966
2967         if (ret != 0) {
2968                 if (path->slots[0] == 0)
2969                         goto not_found;
2970                 path->slots[0]--;
2971         }
2972
2973         leaf = path->nodes[0];
2974         item = btrfs_item_ptr(leaf, path->slots[0],
2975                               struct btrfs_file_extent_item);
2976         /* are we inside the extent that was found? */
2977         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2978         found_type = btrfs_key_type(&found_key);
2979         if (found_key.objectid != objectid ||
2980             found_type != BTRFS_EXTENT_DATA_KEY) {
2981                 goto not_found;
2982         }
2983
2984         found_type = btrfs_file_extent_type(leaf, item);
2985         extent_start = found_key.offset;
2986         if (found_type == BTRFS_FILE_EXTENT_REG) {
2987                 extent_end = extent_start +
2988                        btrfs_file_extent_num_bytes(leaf, item);
2989                 err = 0;
2990                 if (start < extent_start || start >= extent_end) {
2991                         em->start = start;
2992                         if (start < extent_start) {
2993                                 if (start + len <= extent_start)
2994                                         goto not_found;
2995                                 em->len = extent_end - extent_start;
2996                         } else {
2997                                 em->len = len;
2998                         }
2999                         goto not_found_em;
3000                 }
3001                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
3002                 if (bytenr == 0) {
3003                         em->start = extent_start;
3004                         em->len = extent_end - extent_start;
3005                         em->block_start = EXTENT_MAP_HOLE;
3006                         goto insert;
3007                 }
3008                 bytenr += btrfs_file_extent_offset(leaf, item);
3009                 em->block_start = bytenr;
3010                 em->start = extent_start;
3011                 em->len = extent_end - extent_start;
3012                 goto insert;
3013         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
3014                 u64 page_start;
3015                 unsigned long ptr;
3016                 char *map;
3017                 size_t size;
3018                 size_t extent_offset;
3019                 size_t copy_size;
3020
3021                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
3022                                                     path->slots[0]));
3023                 extent_end = (extent_start + size + root->sectorsize - 1) &
3024                         ~((u64)root->sectorsize - 1);
3025                 if (start < extent_start || start >= extent_end) {
3026                         em->start = start;
3027                         if (start < extent_start) {
3028                                 if (start + len <= extent_start)
3029                                         goto not_found;
3030                                 em->len = extent_end - extent_start;
3031                         } else {
3032                                 em->len = len;
3033                         }
3034                         goto not_found_em;
3035                 }
3036                 em->block_start = EXTENT_MAP_INLINE;
3037
3038                 if (!page) {
3039                         em->start = extent_start;
3040                         em->len = size;
3041                         goto out;
3042                 }
3043
3044                 page_start = page_offset(page) + pg_offset;
3045                 extent_offset = page_start - extent_start;
3046                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
3047                                 size - extent_offset);
3048                 em->start = extent_start + extent_offset;
3049                 em->len = (copy_size + root->sectorsize - 1) &
3050                         ~((u64)root->sectorsize - 1);
3051                 map = kmap(page);
3052                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
3053                 if (create == 0 && !PageUptodate(page)) {
3054                         read_extent_buffer(leaf, map + pg_offset, ptr,
3055                                            copy_size);
3056                         flush_dcache_page(page);
3057                 } else if (create && PageUptodate(page)) {
3058                         if (!trans) {
3059                                 kunmap(page);
3060                                 free_extent_map(em);
3061                                 em = NULL;
3062                                 btrfs_release_path(root, path);
3063                                 trans = btrfs_join_transaction(root, 1);
3064                                 goto again;
3065                         }
3066                         write_extent_buffer(leaf, map + pg_offset, ptr,
3067                                             copy_size);
3068                         btrfs_mark_buffer_dirty(leaf);
3069                 }
3070                 kunmap(page);
3071                 set_extent_uptodate(io_tree, em->start,
3072                                     extent_map_end(em) - 1, GFP_NOFS);
3073                 goto insert;
3074         } else {
3075                 printk("unkknown found_type %d\n", found_type);
3076                 WARN_ON(1);
3077         }
3078 not_found:
3079         em->start = start;
3080         em->len = len;
3081 not_found_em:
3082         em->block_start = EXTENT_MAP_HOLE;
3083 insert:
3084         btrfs_release_path(root, path);
3085         if (em->start > start || extent_map_end(em) <= start) {
3086                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
3087                 err = -EIO;
3088                 goto out;
3089         }
3090
3091         err = 0;
3092         spin_lock(&em_tree->lock);
3093         ret = add_extent_mapping(em_tree, em);
3094         /* it is possible that someone inserted the extent into the tree
3095          * while we had the lock dropped.  It is also possible that
3096          * an overlapping map exists in the tree
3097          */
3098         if (ret == -EEXIST) {
3099                 struct extent_map *existing;
3100
3101                 ret = 0;
3102
3103                 existing = lookup_extent_mapping(em_tree, start, len);
3104                 if (existing && (existing->start > start ||
3105                     existing->start + existing->len <= start)) {
3106                         free_extent_map(existing);
3107                         existing = NULL;
3108                 }
3109                 if (!existing) {
3110                         existing = lookup_extent_mapping(em_tree, em->start,
3111                                                          em->len);
3112                         if (existing) {
3113                                 err = merge_extent_mapping(em_tree, existing,
3114                                                            em, start,
3115                                                            root->sectorsize);
3116                                 free_extent_map(existing);
3117                                 if (err) {
3118                                         free_extent_map(em);
3119                                         em = NULL;
3120                                 }
3121                         } else {
3122                                 err = -EIO;
3123                                 printk("failing to insert %Lu %Lu\n",
3124                                        start, len);
3125                                 free_extent_map(em);
3126                                 em = NULL;
3127                         }
3128                 } else {
3129                         free_extent_map(em);
3130                         em = existing;
3131                         err = 0;
3132                 }
3133         }
3134         spin_unlock(&em_tree->lock);
3135 out:
3136         if (path)
3137                 btrfs_free_path(path);
3138         if (trans) {
3139                 ret = btrfs_end_transaction(trans, root);
3140                 if (!err) {
3141                         err = ret;
3142                 }
3143         }
3144         if (err) {
3145                 free_extent_map(em);
3146                 WARN_ON(1);
3147                 return ERR_PTR(err);
3148         }
3149         return em;
3150 }
3151
3152 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
3153                         const struct iovec *iov, loff_t offset,
3154                         unsigned long nr_segs)
3155 {
3156         return -EINVAL;
3157 }
3158
3159 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
3160 {
3161         return extent_bmap(mapping, iblock, btrfs_get_extent);
3162 }
3163
3164 int btrfs_readpage(struct file *file, struct page *page)
3165 {
3166         struct extent_io_tree *tree;
3167         tree = &BTRFS_I(page->mapping->host)->io_tree;
3168         return extent_read_full_page(tree, page, btrfs_get_extent);
3169 }
3170
3171 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
3172 {
3173         struct extent_io_tree *tree;
3174
3175
3176         if (current->flags & PF_MEMALLOC) {
3177                 redirty_page_for_writepage(wbc, page);
3178                 unlock_page(page);
3179                 return 0;
3180         }
3181         tree = &BTRFS_I(page->mapping->host)->io_tree;
3182         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
3183 }
3184
3185 int btrfs_writepages(struct address_space *mapping,
3186                      struct writeback_control *wbc)
3187 {
3188         struct extent_io_tree *tree;
3189         tree = &BTRFS_I(mapping->host)->io_tree;
3190         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
3191 }
3192
3193 static int
3194 btrfs_readpages(struct file *file, struct address_space *mapping,
3195                 struct list_head *pages, unsigned nr_pages)
3196 {
3197         struct extent_io_tree *tree;
3198         tree = &BTRFS_I(mapping->host)->io_tree;
3199         return extent_readpages(tree, mapping, pages, nr_pages,
3200                                 btrfs_get_extent);
3201 }
3202 static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
3203 {
3204         struct extent_io_tree *tree;
3205         struct extent_map_tree *map;
3206         int ret;
3207
3208         tree = &BTRFS_I(page->mapping->host)->io_tree;
3209         map = &BTRFS_I(page->mapping->host)->extent_tree;
3210         ret = try_release_extent_mapping(map, tree, page, gfp_flags);
3211         if (ret == 1) {
3212                 ClearPagePrivate(page);
3213                 set_page_private(page, 0);
3214                 page_cache_release(page);
3215         }
3216         return ret;
3217 }
3218
3219 static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
3220 {
3221         if (PageWriteback(page) || PageDirty(page))
3222                 return 0;
3223         return __btrfs_releasepage(page, gfp_flags);
3224 }
3225
3226 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
3227 {
3228         struct extent_io_tree *tree;
3229         struct btrfs_ordered_extent *ordered;
3230         u64 page_start = page_offset(page);
3231         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
3232
3233         wait_on_page_writeback(page);
3234         tree = &BTRFS_I(page->mapping->host)->io_tree;
3235         if (offset) {
3236                 btrfs_releasepage(page, GFP_NOFS);
3237                 return;
3238         }
3239
3240         lock_extent(tree, page_start, page_end, GFP_NOFS);
3241         ordered = btrfs_lookup_ordered_extent(page->mapping->host,
3242                                            page_offset(page));
3243         if (ordered) {
3244                 /*
3245                  * IO on this page will never be started, so we need
3246                  * to account for any ordered extents now
3247                  */
3248                 clear_extent_bit(tree, page_start, page_end,
3249                                  EXTENT_DIRTY | EXTENT_DELALLOC |
3250                                  EXTENT_LOCKED, 1, 0, GFP_NOFS);
3251                 btrfs_finish_ordered_io(page->mapping->host,
3252                                         page_start, page_end);
3253                 btrfs_put_ordered_extent(ordered);
3254                 lock_extent(tree, page_start, page_end, GFP_NOFS);
3255         }
3256         clear_extent_bit(tree, page_start, page_end,
3257                  EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
3258                  EXTENT_ORDERED,
3259                  1, 1, GFP_NOFS);
3260         __btrfs_releasepage(page, GFP_NOFS);
3261
3262         ClearPageChecked(page);
3263         if (PagePrivate(page)) {
3264                 ClearPagePrivate(page);
3265                 set_page_private(page, 0);
3266                 page_cache_release(page);
3267         }
3268 }
3269
3270 /*
3271  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
3272  * called from a page fault handler when a page is first dirtied. Hence we must
3273  * be careful to check for EOF conditions here. We set the page up correctly
3274  * for a written page which means we get ENOSPC checking when writing into
3275  * holes and correct delalloc and unwritten extent mapping on filesystems that
3276  * support these features.
3277  *
3278  * We are not allowed to take the i_mutex here so we have to play games to
3279  * protect against truncate races as the page could now be beyond EOF.  Because
3280  * vmtruncate() writes the inode size before removing pages, once we have the
3281  * page lock we can determine safely if the page is beyond EOF. If it is not
3282  * beyond EOF, then the page is guaranteed safe against truncation until we
3283  * unlock the page.
3284  */
3285 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
3286 {
3287         struct inode *inode = fdentry(vma->vm_file)->d_inode;
3288         struct btrfs_root *root = BTRFS_I(inode)->root;
3289         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3290         struct btrfs_ordered_extent *ordered;
3291         char *kaddr;
3292         unsigned long zero_start;
3293         loff_t size;
3294         int ret;
3295         u64 page_start;
3296         u64 page_end;
3297
3298         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
3299         if (ret)
3300                 goto out;
3301
3302         ret = -EINVAL;
3303 again:
3304         lock_page(page);
3305         size = i_size_read(inode);
3306         page_start = page_offset(page);
3307         page_end = page_start + PAGE_CACHE_SIZE - 1;
3308
3309         if ((page->mapping != inode->i_mapping) ||
3310             (page_start >= size)) {
3311                 /* page got truncated out from underneath us */
3312                 goto out_unlock;
3313         }
3314         wait_on_page_writeback(page);
3315
3316         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3317         set_page_extent_mapped(page);
3318
3319         /*
3320          * we can't set the delalloc bits if there are pending ordered
3321          * extents.  Drop our locks and wait for them to finish
3322          */
3323         ordered = btrfs_lookup_ordered_extent(inode, page_start);
3324         if (ordered) {
3325                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3326                 unlock_page(page);
3327                 btrfs_start_ordered_extent(inode, ordered, 1);
3328                 btrfs_put_ordered_extent(ordered);
3329                 goto again;
3330         }
3331
3332         btrfs_set_extent_delalloc(inode, page_start, page_end);
3333         ret = 0;
3334
3335         /* page is wholly or partially inside EOF */
3336         if (page_start + PAGE_CACHE_SIZE > size)
3337                 zero_start = size & ~PAGE_CACHE_MASK;
3338         else
3339                 zero_start = PAGE_CACHE_SIZE;
3340
3341         if (zero_start != PAGE_CACHE_SIZE) {
3342                 kaddr = kmap(page);
3343                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
3344                 flush_dcache_page(page);
3345                 kunmap(page);
3346         }
3347         ClearPageChecked(page);
3348         set_page_dirty(page);
3349         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3350
3351 out_unlock:
3352         unlock_page(page);
3353 out:
3354         return ret;
3355 }
3356
3357 static void btrfs_truncate(struct inode *inode)
3358 {
3359         struct btrfs_root *root = BTRFS_I(inode)->root;
3360         int ret;
3361         struct btrfs_trans_handle *trans;
3362         unsigned long nr;
3363         u64 mask = root->sectorsize - 1;
3364
3365         if (!S_ISREG(inode->i_mode))
3366                 return;
3367         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
3368                 return;
3369
3370         btrfs_truncate_page(inode->i_mapping, inode->i_size);
3371         btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
3372
3373         trans = btrfs_start_transaction(root, 1);
3374         btrfs_set_trans_block_group(trans, inode);
3375         btrfs_i_size_write(inode, inode->i_size);
3376
3377         ret = btrfs_orphan_add(trans, inode);
3378         if (ret)
3379                 goto out;
3380         /* FIXME, add redo link to tree so we don't leak on crash */
3381         ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size,
3382                                       BTRFS_EXTENT_DATA_KEY);
3383         btrfs_update_inode(trans, root, inode);
3384
3385         ret = btrfs_orphan_del(trans, inode);
3386         BUG_ON(ret);
3387
3388 out:
3389         nr = trans->blocks_used;
3390         ret = btrfs_end_transaction_throttle(trans, root);
3391         BUG_ON(ret);
3392         btrfs_btree_balance_dirty(root, nr);
3393 }
3394
3395 /*
3396  * Invalidate a single dcache entry at the root of the filesystem.
3397  * Needed after creation of snapshot or subvolume.
3398  */
3399 void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name,
3400                                   int namelen)
3401 {
3402         struct dentry *alias, *entry;
3403         struct qstr qstr;
3404
3405         alias = d_find_alias(root->fs_info->sb->s_root->d_inode);
3406         if (alias) {
3407                 qstr.name = name;
3408                 qstr.len = namelen;
3409                 /* change me if btrfs ever gets a d_hash operation */
3410                 qstr.hash = full_name_hash(qstr.name, qstr.len);
3411                 entry = d_lookup(alias, &qstr);
3412                 dput(alias);
3413                 if (entry) {
3414                         d_invalidate(entry);
3415                         dput(entry);
3416                 }
3417         }
3418 }
3419
3420 /*
3421  * create a new subvolume directory/inode (helper for the ioctl).
3422  */
3423 int btrfs_create_subvol_root(struct btrfs_root *new_root, struct dentry *dentry,
3424                 struct btrfs_trans_handle *trans, u64 new_dirid,
3425                 struct btrfs_block_group_cache *block_group)
3426 {
3427         struct inode *inode;
3428         int error;
3429         u64 index = 0;
3430
3431         inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
3432                                 new_dirid, block_group, S_IFDIR | 0700, &index);
3433         if (IS_ERR(inode))
3434                 return PTR_ERR(inode);
3435         inode->i_op = &btrfs_dir_inode_operations;
3436         inode->i_fop = &btrfs_dir_file_operations;
3437         new_root->inode = inode;
3438
3439         inode->i_nlink = 1;
3440         btrfs_i_size_write(inode, 0);
3441
3442         error = btrfs_update_inode(trans, new_root, inode);
3443         if (error)
3444                 return error;
3445
3446         d_instantiate(dentry, inode);
3447         return 0;
3448 }
3449
3450 /* helper function for file defrag and space balancing.  This
3451  * forces readahead on a given range of bytes in an inode
3452  */
3453 unsigned long btrfs_force_ra(struct address_space *mapping,
3454                               struct file_ra_state *ra, struct file *file,
3455                               pgoff_t offset, pgoff_t last_index)
3456 {
3457         pgoff_t req_size = last_index - offset + 1;
3458
3459         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
3460         return offset + req_size;
3461 }
3462
3463 struct inode *btrfs_alloc_inode(struct super_block *sb)
3464 {
3465         struct btrfs_inode *ei;
3466
3467         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
3468         if (!ei)
3469                 return NULL;
3470         ei->last_trans = 0;
3471         ei->logged_trans = 0;
3472         btrfs_ordered_inode_tree_init(&ei->ordered_tree);
3473         ei->i_acl = BTRFS_ACL_NOT_CACHED;
3474         ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
3475         INIT_LIST_HEAD(&ei->i_orphan);
3476         return &ei->vfs_inode;
3477 }
3478
3479 void btrfs_destroy_inode(struct inode *inode)
3480 {
3481         struct btrfs_ordered_extent *ordered;
3482         WARN_ON(!list_empty(&inode->i_dentry));
3483         WARN_ON(inode->i_data.nrpages);
3484
3485         if (BTRFS_I(inode)->i_acl &&
3486             BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED)
3487                 posix_acl_release(BTRFS_I(inode)->i_acl);
3488         if (BTRFS_I(inode)->i_default_acl &&
3489             BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
3490                 posix_acl_release(BTRFS_I(inode)->i_default_acl);
3491
3492         spin_lock(&BTRFS_I(inode)->root->list_lock);
3493         if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
3494                 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
3495                        " list\n", inode->i_ino);
3496                 dump_stack();
3497         }
3498         spin_unlock(&BTRFS_I(inode)->root->list_lock);
3499
3500         while(1) {
3501                 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
3502                 if (!ordered)
3503                         break;
3504                 else {
3505                         printk("found ordered extent %Lu %Lu\n",
3506                                ordered->file_offset, ordered->len);
3507                         btrfs_remove_ordered_extent(inode, ordered);
3508                         btrfs_put_ordered_extent(ordered);
3509                         btrfs_put_ordered_extent(ordered);
3510                 }
3511         }
3512         btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
3513         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
3514 }
3515
3516 static void init_once(void *foo)
3517 {
3518         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
3519
3520         inode_init_once(&ei->vfs_inode);
3521 }
3522
3523 void btrfs_destroy_cachep(void)
3524 {
3525         if (btrfs_inode_cachep)
3526                 kmem_cache_destroy(btrfs_inode_cachep);
3527         if (btrfs_trans_handle_cachep)
3528                 kmem_cache_destroy(btrfs_trans_handle_cachep);
3529         if (btrfs_transaction_cachep)
3530                 kmem_cache_destroy(btrfs_transaction_cachep);
3531         if (btrfs_bit_radix_cachep)
3532                 kmem_cache_destroy(btrfs_bit_radix_cachep);
3533         if (btrfs_path_cachep)
3534                 kmem_cache_destroy(btrfs_path_cachep);
3535 }
3536
3537 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
3538                                        unsigned long extra_flags,
3539                                        void (*ctor)(void *))
3540 {
3541         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
3542                                  SLAB_MEM_SPREAD | extra_flags), ctor);
3543 }
3544
3545 int btrfs_init_cachep(void)
3546 {
3547         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
3548                                           sizeof(struct btrfs_inode),
3549                                           0, init_once);
3550         if (!btrfs_inode_cachep)
3551                 goto fail;
3552         btrfs_trans_handle_cachep =
3553                         btrfs_cache_create("btrfs_trans_handle_cache",
3554                                            sizeof(struct btrfs_trans_handle),
3555                                            0, NULL);
3556         if (!btrfs_trans_handle_cachep)
3557                 goto fail;
3558         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
3559                                              sizeof(struct btrfs_transaction),
3560                                              0, NULL);
3561         if (!btrfs_transaction_cachep)
3562                 goto fail;
3563         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
3564                                          sizeof(struct btrfs_path),
3565                                          0, NULL);
3566         if (!btrfs_path_cachep)
3567                 goto fail;
3568         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
3569                                               SLAB_DESTROY_BY_RCU, NULL);
3570         if (!btrfs_bit_radix_cachep)
3571                 goto fail;
3572         return 0;
3573 fail:
3574         btrfs_destroy_cachep();
3575         return -ENOMEM;
3576 }
3577
3578 static int btrfs_getattr(struct vfsmount *mnt,
3579                          struct dentry *dentry, struct kstat *stat)
3580 {
3581         struct inode *inode = dentry->d_inode;
3582         generic_fillattr(inode, stat);
3583         stat->blksize = PAGE_CACHE_SIZE;
3584         stat->blocks = (inode_get_bytes(inode) +
3585                         BTRFS_I(inode)->delalloc_bytes) >> 9;
3586         return 0;
3587 }
3588
3589 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
3590                            struct inode * new_dir,struct dentry *new_dentry)
3591 {
3592         struct btrfs_trans_handle *trans;
3593         struct btrfs_root *root = BTRFS_I(old_dir)->root;
3594         struct inode *new_inode = new_dentry->d_inode;
3595         struct inode *old_inode = old_dentry->d_inode;
3596         struct timespec ctime = CURRENT_TIME;
3597         u64 index = 0;
3598         int ret;
3599
3600         if (S_ISDIR(old_inode->i_mode) && new_inode &&
3601             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
3602                 return -ENOTEMPTY;
3603         }
3604
3605         ret = btrfs_check_free_space(root, 1, 0);
3606         if (ret)
3607                 goto out_unlock;
3608
3609         trans = btrfs_start_transaction(root, 1);
3610
3611         btrfs_set_trans_block_group(trans, new_dir);
3612
3613         btrfs_inc_nlink(old_dentry->d_inode);
3614         old_dir->i_ctime = old_dir->i_mtime = ctime;
3615         new_dir->i_ctime = new_dir->i_mtime = ctime;
3616         old_inode->i_ctime = ctime;
3617
3618         ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
3619                                  old_dentry->d_name.name,
3620                                  old_dentry->d_name.len);
3621         if (ret)
3622                 goto out_fail;
3623
3624         if (new_inode) {
3625                 new_inode->i_ctime = CURRENT_TIME;
3626                 ret = btrfs_unlink_inode(trans, root, new_dir,
3627                                          new_dentry->d_inode,
3628                                          new_dentry->d_name.name,
3629                                          new_dentry->d_name.len);
3630                 if (ret)
3631                         goto out_fail;
3632                 if (new_inode->i_nlink == 0) {
3633                         ret = btrfs_orphan_add(trans, new_dentry->d_inode);
3634                         if (ret)
3635                                 goto out_fail;
3636                 }
3637
3638         }
3639         ret = btrfs_set_inode_index(new_dir, old_inode, &index);
3640         if (ret)
3641                 goto out_fail;
3642
3643         ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode,
3644                              old_inode, new_dentry->d_name.name,
3645                              new_dentry->d_name.len, 1, index);
3646         if (ret)
3647                 goto out_fail;
3648
3649 out_fail:
3650         btrfs_end_transaction_throttle(trans, root);
3651 out_unlock:
3652         return ret;
3653 }
3654
3655 /*
3656  * some fairly slow code that needs optimization. This walks the list
3657  * of all the inodes with pending delalloc and forces them to disk.
3658  */
3659 int btrfs_start_delalloc_inodes(struct btrfs_root *root)
3660 {
3661         struct list_head *head = &root->fs_info->delalloc_inodes;
3662         struct btrfs_inode *binode;
3663         struct inode *inode;
3664         unsigned long flags;
3665
3666         spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
3667         while(!list_empty(head)) {
3668                 binode = list_entry(head->next, struct btrfs_inode,
3669                                     delalloc_inodes);
3670                 inode = igrab(&binode->vfs_inode);
3671                 if (!inode)
3672                         list_del_init(&binode->delalloc_inodes);
3673                 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
3674                 if (inode) {
3675                         filemap_flush(inode->i_mapping);
3676                         iput(inode);
3677                 }
3678                 cond_resched();
3679                 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
3680         }
3681         spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
3682
3683         /* the filemap_flush will queue IO into the worker threads, but
3684          * we have to make sure the IO is actually started and that
3685          * ordered extents get created before we return
3686          */
3687         atomic_inc(&root->fs_info->async_submit_draining);
3688         while(atomic_read(&root->fs_info->nr_async_submits)) {
3689                 wait_event(root->fs_info->async_submit_wait,
3690                    (atomic_read(&root->fs_info->nr_async_submits) == 0));
3691         }
3692         atomic_dec(&root->fs_info->async_submit_draining);
3693         return 0;
3694 }
3695
3696 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
3697                          const char *symname)
3698 {
3699         struct btrfs_trans_handle *trans;
3700         struct btrfs_root *root = BTRFS_I(dir)->root;
3701         struct btrfs_path *path;
3702         struct btrfs_key key;
3703         struct inode *inode = NULL;
3704         int err;
3705         int drop_inode = 0;
3706         u64 objectid;
3707         u64 index = 0 ;
3708         int name_len;
3709         int datasize;
3710         unsigned long ptr;
3711         struct btrfs_file_extent_item *ei;
3712         struct extent_buffer *leaf;
3713         unsigned long nr = 0;
3714
3715         name_len = strlen(symname) + 1;
3716         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
3717                 return -ENAMETOOLONG;
3718
3719         err = btrfs_check_free_space(root, 1, 0);
3720         if (err)
3721                 goto out_fail;
3722
3723         trans = btrfs_start_transaction(root, 1);
3724         btrfs_set_trans_block_group(trans, dir);
3725
3726         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
3727         if (err) {
3728                 err = -ENOSPC;
3729                 goto out_unlock;
3730         }
3731
3732         inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
3733                                 dentry->d_name.len,
3734                                 dentry->d_parent->d_inode->i_ino, objectid,
3735                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
3736                                 &index);
3737         err = PTR_ERR(inode);
3738         if (IS_ERR(inode))
3739                 goto out_unlock;
3740
3741         err = btrfs_init_acl(inode, dir);
3742         if (err) {
3743                 drop_inode = 1;
3744                 goto out_unlock;
3745         }
3746
3747         btrfs_set_trans_block_group(trans, inode);
3748         err = btrfs_add_nondir(trans, dentry, inode, 0, index);
3749         if (err)
3750                 drop_inode = 1;
3751         else {
3752                 inode->i_mapping->a_ops = &btrfs_aops;
3753                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3754                 inode->i_fop = &btrfs_file_operations;
3755                 inode->i_op = &btrfs_file_inode_operations;
3756                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3757         }
3758         dir->i_sb->s_dirt = 1;
3759         btrfs_update_inode_block_group(trans, inode);
3760         btrfs_update_inode_block_group(trans, dir);
3761         if (drop_inode)
3762                 goto out_unlock;
3763
3764         path = btrfs_alloc_path();
3765         BUG_ON(!path);
3766         key.objectid = inode->i_ino;
3767         key.offset = 0;
3768         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
3769         datasize = btrfs_file_extent_calc_inline_size(name_len);
3770         err = btrfs_insert_empty_item(trans, root, path, &key,
3771                                       datasize);
3772         if (err) {
3773                 drop_inode = 1;
3774                 goto out_unlock;
3775         }
3776         leaf = path->nodes[0];
3777         ei = btrfs_item_ptr(leaf, path->slots[0],
3778                             struct btrfs_file_extent_item);
3779         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
3780         btrfs_set_file_extent_type(leaf, ei,
3781                                    BTRFS_FILE_EXTENT_INLINE);
3782         ptr = btrfs_file_extent_inline_start(ei);
3783         write_extent_buffer(leaf, symname, ptr, name_len);
3784         btrfs_mark_buffer_dirty(leaf);
3785         btrfs_free_path(path);
3786
3787         inode->i_op = &btrfs_symlink_inode_operations;
3788         inode->i_mapping->a_ops = &btrfs_symlink_aops;
3789         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3790         btrfs_i_size_write(inode, name_len - 1);
3791         err = btrfs_update_inode(trans, root, inode);
3792         if (err)
3793                 drop_inode = 1;
3794
3795 out_unlock:
3796         nr = trans->blocks_used;
3797         btrfs_end_transaction_throttle(trans, root);
3798 out_fail:
3799         if (drop_inode) {
3800                 inode_dec_link_count(inode);
3801                 iput(inode);
3802         }
3803         btrfs_btree_balance_dirty(root, nr);
3804         return err;
3805 }
3806
3807 static int btrfs_set_page_dirty(struct page *page)
3808 {
3809         return __set_page_dirty_nobuffers(page);
3810 }
3811
3812 static int btrfs_permission(struct inode *inode, int mask)
3813 {
3814         if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
3815                 return -EACCES;
3816         return generic_permission(inode, mask, btrfs_check_acl);
3817 }
3818
3819 static struct inode_operations btrfs_dir_inode_operations = {
3820         .lookup         = btrfs_lookup,
3821         .create         = btrfs_create,
3822         .unlink         = btrfs_unlink,
3823         .link           = btrfs_link,
3824         .mkdir          = btrfs_mkdir,
3825         .rmdir          = btrfs_rmdir,
3826         .rename         = btrfs_rename,
3827         .symlink        = btrfs_symlink,
3828         .setattr        = btrfs_setattr,
3829         .mknod          = btrfs_mknod,
3830         .setxattr       = btrfs_setxattr,
3831         .getxattr       = btrfs_getxattr,
3832         .listxattr      = btrfs_listxattr,
3833         .removexattr    = btrfs_removexattr,
3834         .permission     = btrfs_permission,
3835 };
3836 static struct inode_operations btrfs_dir_ro_inode_operations = {
3837         .lookup         = btrfs_lookup,
3838         .permission     = btrfs_permission,
3839 };
3840 static struct file_operations btrfs_dir_file_operations = {
3841         .llseek         = generic_file_llseek,
3842         .read           = generic_read_dir,
3843         .readdir        = btrfs_real_readdir,
3844         .unlocked_ioctl = btrfs_ioctl,
3845 #ifdef CONFIG_COMPAT
3846         .compat_ioctl   = btrfs_ioctl,
3847 #endif
3848         .release        = btrfs_release_file,
3849         .fsync          = btrfs_sync_file,
3850 };
3851
3852 static struct extent_io_ops btrfs_extent_io_ops = {
3853         .fill_delalloc = run_delalloc_range,
3854         .submit_bio_hook = btrfs_submit_bio_hook,
3855         .merge_bio_hook = btrfs_merge_bio_hook,
3856         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3857         .writepage_end_io_hook = btrfs_writepage_end_io_hook,
3858         .writepage_start_hook = btrfs_writepage_start_hook,
3859         .readpage_io_failed_hook = btrfs_io_failed_hook,
3860         .set_bit_hook = btrfs_set_bit_hook,
3861         .clear_bit_hook = btrfs_clear_bit_hook,
3862 };
3863
3864 static struct address_space_operations btrfs_aops = {
3865         .readpage       = btrfs_readpage,
3866         .writepage      = btrfs_writepage,
3867         .writepages     = btrfs_writepages,
3868         .readpages      = btrfs_readpages,
3869         .sync_page      = block_sync_page,
3870         .bmap           = btrfs_bmap,
3871         .direct_IO      = btrfs_direct_IO,
3872         .invalidatepage = btrfs_invalidatepage,
3873         .releasepage    = btrfs_releasepage,
3874         .set_page_dirty = btrfs_set_page_dirty,
3875 };
3876
3877 static struct address_space_operations btrfs_symlink_aops = {
3878         .readpage       = btrfs_readpage,
3879         .writepage      = btrfs_writepage,
3880         .invalidatepage = btrfs_invalidatepage,
3881         .releasepage    = btrfs_releasepage,
3882 };
3883
3884 static struct inode_operations btrfs_file_inode_operations = {
3885         .truncate       = btrfs_truncate,
3886         .getattr        = btrfs_getattr,
3887         .setattr        = btrfs_setattr,
3888         .setxattr       = btrfs_setxattr,
3889         .getxattr       = btrfs_getxattr,
3890         .listxattr      = btrfs_listxattr,
3891         .removexattr    = btrfs_removexattr,
3892         .permission     = btrfs_permission,
3893 };
3894 static struct inode_operations btrfs_special_inode_operations = {
3895         .getattr        = btrfs_getattr,
3896         .setattr        = btrfs_setattr,
3897         .permission     = btrfs_permission,
3898         .setxattr       = btrfs_setxattr,
3899         .getxattr       = btrfs_getxattr,
3900         .listxattr      = btrfs_listxattr,
3901         .removexattr    = btrfs_removexattr,
3902 };
3903 static struct inode_operations btrfs_symlink_inode_operations = {
3904         .readlink       = generic_readlink,
3905         .follow_link    = page_follow_link_light,
3906         .put_link       = page_put_link,
3907         .permission     = btrfs_permission,
3908 };