Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux...

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 26 Jul 2011 18:34:40 +0000 (11:34 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 26 Jul 2011 18:34:40 +0000 (11:34 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2011 18:34:40 +0000 (11:34 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2011 18:34:40 +0000 (11:34 -0700)
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt

index 272f80d5f966741c567b3259ac6755a9055373c4..22f3a0eda1d22e430ebe350d7e952099b9a9e880 100644 (file)
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -147,15 +147,6 @@ grpjquota=<file>   during journal replay. They replace the above
                         package for more details
                         (http://sourceforge.net/projects/linuxquota).
  
-bh             (*)     ext3 associates buffer heads to data pages to
-nobh                   (a) cache disk block mapping information
-                       (b) link pages into transaction to provide
-                           ordering guarantees.
-                       "bh" option forces use of buffer heads.
-                       "nobh" option tries to avoid associating buffer
-                       heads (supported only for "writeback" mode).
-
-
  Specification
  =============
  Ext3 shares all disk implementation with the ext2 filesystem, and adds
@@ -227,5 +218,5 @@ kernel source:      <file:fs/ext3/>
  programs:      http://e2fsprogs.sourceforge.net/
                 http://ext2resize.sourceforge.net
  
-useful links:  http://www.ibm.com/developerworks/library/l-fs7.html
-               http://www.ibm.com/developerworks/library/l-fs8.html
+useful links:  http://www.ibm.com/developerworks/library/l-fs7/index.html
+        http://www.ibm.com/developerworks/library/l-fs8/index.html
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt

index 3ae9bc94352a660f2d3ed9feccc0b3aa8955ffcc..232a575a0c4857249edc5aa76a235ce4e258082f 100644 (file)
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -68,12 +68,12 @@ Note: More extensive information for getting started with ext4 can be
      '-o barriers=[0|1]' mount option for both ext3 and ext4 filesystems
      for a fair comparison.  When tuning ext3 for best benchmark numbers,
      it is often worthwhile to try changing the data journaling mode; '-o
-    data=writeback,nobh' can be faster for some workloads.  (Note
-    however that running mounted with data=writeback can potentially
-    leave stale data exposed in recently written files in case of an
-    unclean shutdown, which could be a security exposure in some
-    situations.)  Configuring the filesystem with a large journal can
-    also be helpful for metadata-intensive workloads.
+    data=writeback' can be faster for some workloads.  (Note however that
+    running mounted with data=writeback can potentially leave stale data
+    exposed in recently written files in case of an unclean shutdown,
+    which could be a security exposure in some situations.)  Configuring
+    the filesystem with a large journal can also be helpful for
+    metadata-intensive workloads.
  
  2. Features
  ===========
@@ -272,14 +272,6 @@ grpjquota=<file>   during journal replay. They replace the above
                         package for more details
                         (http://sourceforge.net/projects/linuxquota).
  
-bh             (*)     ext4 associates buffer heads to data pages to
-nobh                   (a) cache disk block mapping information
-                       (b) link pages into transaction to provide
-                           ordering guarantees.
-                       "bh" option forces use of buffer heads.
-                       "nobh" option tries to avoid associating buffer
-                       heads (supported only for "writeback" mode).
-
  stripe=n               Number of filesystem blocks that mballoc will try
                         to use for allocation size and alignment. For RAID5/6
                         systems this should be the number of data
@@ -393,8 +385,7 @@ dioread_nolock              locking. If the dioread_nolock option is specified
                         write and convert the extent to initialized after IO
                         completes. This approach allows ext4 code to avoid
                         using inode mutex, which improves scalability on high
-                       speed storages. However this does not work with nobh
-                       option and the mount will fail. Nor does it work with
+                       speed storages. However this does not work with
                         data journaling and dioread_nolock option will be
                         ignored with kernel warning. Note that dioread_nolock
                         code path is only used for extent-based files.
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c

index 529970617a21c5c45cd1cffa1b39c14f318d72aa..d27b71f1d1832da7aa2b2967a4b853c829dfb721 100644 (file)
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -161,6 +161,10 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
  
         if (name == NULL)
                 return -EINVAL;
+       name_len = strlen(name);
+       if (name_len > 255)
+               return -ERANGE;
+
         down_read(&EXT2_I(inode)->xattr_sem);
         error = -ENODATA;
         if (!EXT2_I(inode)->i_file_acl)
@@ -181,12 +185,8 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
                 error = -EIO;
                 goto cleanup;
         }
-       /* find named attribute */
-       name_len = strlen(name);
  
-       error = -ERANGE;
-       if (name_len > 255)
-               goto cleanup;
+       /* find named attribute */
         entry = FIRST_ENTRY(bh);
         while (!IS_LAST_ENTRY(entry)) {
                 struct ext2_xattr_entry *next =
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c

index fe52297e31ad751abc46f5c6695d0e0b029ab3ed..6386d76f44a7d1d9939508e1e8e8be66b394033b 100644 (file)
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -21,6 +21,7 @@
  #include <linux/quotaops.h>
  #include <linux/buffer_head.h>
  #include <linux/blkdev.h>
+#include <trace/events/ext3.h>
  
  /*
   * balloc.c contains the blocks allocation and deallocation routines
@@ -161,6 +162,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
         desc = ext3_get_group_desc(sb, block_group, NULL);
         if (!desc)
                 return NULL;
+       trace_ext3_read_block_bitmap(sb, block_group);
         bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
         bh = sb_getblk(sb, bitmap_blk);
         if (unlikely(!bh)) {
@@ -351,6 +353,7 @@ void ext3_rsv_window_add(struct super_block *sb,
         struct rb_node * parent = NULL;
         struct ext3_reserve_window_node *this;
  
+       trace_ext3_rsv_window_add(sb, rsv);
         while (*p)
         {
                 parent = *p;
@@ -476,8 +479,10 @@ void ext3_discard_reservation(struct inode *inode)
         rsv = &block_i->rsv_window_node;
         if (!rsv_is_empty(&rsv->rsv_window)) {
                 spin_lock(rsv_lock);
-               if (!rsv_is_empty(&rsv->rsv_window))
+               if (!rsv_is_empty(&rsv->rsv_window)) {
+                       trace_ext3_discard_reservation(inode, rsv);
                         rsv_window_remove(inode->i_sb, rsv);
+               }
                 spin_unlock(rsv_lock);
         }
  }
@@ -683,14 +688,10 @@ error_return:
  void ext3_free_blocks(handle_t *handle, struct inode *inode,
                         ext3_fsblk_t block, unsigned long count)
  {
-       struct super_block * sb;
+       struct super_block *sb = inode->i_sb;
         unsigned long dquot_freed_blocks;
  
-       sb = inode->i_sb;
-       if (!sb) {
-               printk ("ext3_free_blocks: nonexistent device");
-               return;
-       }
+       trace_ext3_free_blocks(inode, block, count);
         ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
         if (dquot_freed_blocks)
                 dquot_free_block(inode, dquot_freed_blocks);
@@ -1136,6 +1137,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
         else
                 start_block = grp_goal + group_first_block;
  
+       trace_ext3_alloc_new_reservation(sb, start_block);
         size = my_rsv->rsv_goal_size;
  
         if (!rsv_is_empty(&my_rsv->rsv_window)) {
@@ -1230,8 +1232,11 @@ retry:
          * check if the first free block is within the
          * free space we just reserved
          */
-       if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
+       if (start_block >= my_rsv->rsv_start &&
+           start_block <= my_rsv->rsv_end) {
+               trace_ext3_reserved(sb, start_block, my_rsv);
                 return 0;               /* success */
+       }
         /*
          * if the first free bit we found is out of the reservable space
          * continue search for next reservable space,
@@ -1514,10 +1519,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
  
         *errp = -ENOSPC;
         sb = inode->i_sb;
-       if (!sb) {
-               printk("ext3_new_block: nonexistent device");
-               return 0;
-       }
  
         /*
          * Check quota for allocation of this block.
@@ -1528,8 +1529,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
                 return 0;
         }
  
+       trace_ext3_request_blocks(inode, goal, num);
+
         sbi = EXT3_SB(sb);
-       es = EXT3_SB(sb)->s_es;
+       es = sbi->s_es;
         ext3_debug("goal=%lu.\n", goal);
         /*
          * Allocate a block from reservation only when
@@ -1742,6 +1745,10 @@ allocated:
         brelse(bitmap_bh);
         dquot_free_block(inode, *count-num);
         *count = num;
+
+       trace_ext3_allocate_blocks(inode, goal, num,
+                                  (unsigned long long)ret_block);
+
         return ret_block;
  
  io_error:
@@ -1996,6 +2003,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
                 if ((next - start) < minblocks)
                         goto free_extent;
  
+               trace_ext3_discard_blocks(sb, discard_block, next - start);
                  /* Send the TRIM command down to the device */
                 err = sb_issue_discard(sb, discard_block, next - start,
                                        GFP_NOFS, 0);
@@ -2100,7 +2108,7 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
         if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
                 return -EINVAL;
         if (start >= max_blks)
-               goto out;
+               return -EINVAL;
         if (start + len > max_blks)
                 len = max_blks - start;
  
@@ -2148,8 +2156,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
  
         if (ret >= 0)
                 ret = 0;
-
-out:
         range->len = trimmed * sb->s_blocksize;
  
         return ret;
diff --git a/fs/ext3/file.c b/fs/ext3/file.c

index 2be5b99097f13ad44f2db41ca680bfc82a3fa741..724df69847dca1ef2b22ee4827fb6f5c5003ef14 100644 (file)
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -71,7 +71,6 @@ const struct file_operations ext3_file_operations = {
  };
  
  const struct inode_operations ext3_file_inode_operations = {
-       .truncate       = ext3_truncate,
         .setattr        = ext3_setattr,
  #ifdef CONFIG_EXT3_FS_XATTR
         .setxattr       = generic_setxattr,
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c

index 0bcf63adb80a9290866c42153bfe6af37777b885..d494c554c6e69e436e4dbccee6c9e5e3d6ab8060 100644 (file)
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -30,6 +30,7 @@
  #include <linux/jbd.h>
  #include <linux/ext3_fs.h>
  #include <linux/ext3_jbd.h>
+#include <trace/events/ext3.h>
  
  /*
   * akpm: A new design for ext3_sync_file().
@@ -51,12 +52,14 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         int ret, needs_barrier = 0;
         tid_t commit_tid;
  
+       trace_ext3_sync_file_enter(file, datasync);
+
         if (inode->i_sb->s_flags & MS_RDONLY)
                 return 0;
  
         ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
         if (ret)
-               return ret;
+               goto out;
  
         /*
          * Taking the mutex here just to keep consistent with how fsync was
@@ -83,7 +86,8 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
          */
         if (ext3_should_journal_data(inode)) {
                 mutex_unlock(&inode->i_mutex);
-               return ext3_force_commit(inode->i_sb);
+               ret = ext3_force_commit(inode->i_sb);
+               goto out;
         }
  
         if (datasync)
@@ -104,6 +108,9 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
          */
         if (needs_barrier)
                 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+
         mutex_unlock(&inode->i_mutex);
+out:
+       trace_ext3_sync_file_exit(inode, ret);
         return ret;
  }
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c

index bfc2dc43681d41c6b54fe405174cc480de1663e9..bf09cbf938cc155c3328b6cc6d6a8a6f1bed637c 100644 (file)
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -23,6 +23,7 @@
  #include <linux/buffer_head.h>
  #include <linux/random.h>
  #include <linux/bitops.h>
+#include <trace/events/ext3.h>
  
  #include <asm/byteorder.h>
  
@@ -118,6 +119,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
  
         ino = inode->i_ino;
         ext3_debug ("freeing inode %lu\n", ino);
+       trace_ext3_free_inode(inode);
  
         is_directory = S_ISDIR(inode->i_mode);
  
@@ -426,6 +428,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
                 return ERR_PTR(-EPERM);
  
         sb = dir->i_sb;
+       trace_ext3_request_inode(dir, mode);
         inode = new_inode(sb);
         if (!inode)
                 return ERR_PTR(-ENOMEM);
@@ -601,6 +604,7 @@ got:
         }
  
         ext3_debug("allocating inode %lu\n", inode->i_ino);
+       trace_ext3_allocate_inode(inode, dir, mode);
         goto really_out;
  fail:
         ext3_std_error(sb, err);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c

index 2978a2a17a59b8c0e34a381f3bba4ab1b059d396..04da6acde85dfbcfb50d89302c3f975baab3c76d 100644 (file)
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -38,10 +38,12 @@
  #include <linux/bio.h>
  #include <linux/fiemap.h>
  #include <linux/namei.h>
+#include <trace/events/ext3.h>
  #include "xattr.h"
  #include "acl.h"
  
  static int ext3_writepage_trans_blocks(struct inode *inode);
+static int ext3_block_truncate_page(struct inode *inode, loff_t from);
  
  /*
   * Test whether an inode is a fast symlink.
@@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
  
         might_sleep();
  
+       trace_ext3_forget(inode, is_metadata, blocknr);
         BUFFER_TRACE(bh, "enter");
  
         jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
@@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
   */
  void ext3_evict_inode (struct inode *inode)
  {
+       struct ext3_inode_info *ei = EXT3_I(inode);
         struct ext3_block_alloc_info *rsv;
         handle_t *handle;
         int want_delete = 0;
  
+       trace_ext3_evict_inode(inode);
         if (!inode->i_nlink && !is_bad_inode(inode)) {
                 dquot_initialize(inode);
                 want_delete = 1;
         }
  
+       /*
+        * When journalling data dirty buffers are tracked only in the journal.
+        * So although mm thinks everything is clean and ready for reaping the
+        * inode might still have some pages to write in the running
+        * transaction or waiting to be checkpointed. Thus calling
+        * journal_invalidatepage() (via truncate_inode_pages()) to discard
+        * these buffers can cause data loss. Also even if we did not discard
+        * these buffers, we would have no way to find them after the inode
+        * is reaped and thus user could see stale data if he tries to read
+        * them before the transaction is checkpointed. So be careful and
+        * force everything to disk here... We use ei->i_datasync_tid to
+        * store the newest transaction containing inode's data.
+        *
+        * Note that directories do not have this problem because they don't
+        * use page cache.
+        */
+       if (inode->i_nlink && ext3_should_journal_data(inode) &&
+           (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+               tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
+               journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
+
+               log_start_commit(journal, commit_tid);
+               log_wait_commit(journal, commit_tid);
+               filemap_write_and_wait(&inode->i_data);
+       }
         truncate_inode_pages(&inode->i_data, 0);
  
         ext3_discard_reservation(inode);
-       rsv = EXT3_I(inode)->i_block_alloc_info;
-       EXT3_I(inode)->i_block_alloc_info = NULL;
+       rsv = ei->i_block_alloc_info;
+       ei->i_block_alloc_info = NULL;
         if (unlikely(rsv))
                 kfree(rsv);
  
@@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode)
         if (inode->i_blocks)
                 ext3_truncate(inode);
         /*
-        * Kill off the orphan record which ext3_truncate created.
-        * AKPM: I think this can be inside the above `if'.
-        * Note that ext3_orphan_del() has to be able to cope with the
-        * deletion of a non-existent orphan - this is because we don't
-        * know if ext3_truncate() actually created an orphan record.
-        * (Well, we could do this if we need to, but heck - it works)
+        * Kill off the orphan record created when the inode lost the last
+        * link.  Note that ext3_orphan_del() has to be able to cope with the
+        * deletion of a non-existent orphan - ext3_truncate() could
+        * have removed the record.
          */
         ext3_orphan_del(handle, inode);
-       EXT3_I(inode)->i_dtime  = get_seconds();
+       ei->i_dtime = get_seconds();
  
         /*
          * One subtle ordering requirement: if anything has gone wrong
@@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
         ext3_fsblk_t first_block = 0;
  
  
+       trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
         J_ASSERT(handle != NULL || create == 0);
         depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
  
@@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
         if (!create || err == -EIO)
                 goto cleanup;
  
+       /*
+        * Block out ext3_truncate while we alter the tree
+        */
         mutex_lock(&ei->truncate_mutex);
  
         /*
@@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
          */
         count = ext3_blks_to_allocate(partial, indirect_blks,
                                         maxblocks, blocks_to_boundary);
-       /*
-        * Block out ext3_truncate while we alter the tree
-        */
         err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
                                 offsets + (partial - chain), partial);
  
@@ -970,6 +999,9 @@ cleanup:
         }
         BUFFER_TRACE(bh_result, "returned");
  out:
+       trace_ext3_get_blocks_exit(inode, iblock,
+                                  depth ? le32_to_cpu(chain[depth-1].key) : 0,
+                                  count, err);
         return err;
  }
  
@@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode)
         ext3_truncate(inode);
  }
  
+/*
+ * Truncate blocks that were not used by direct IO write. We have to zero out
+ * the last file block as well because direct IO might have written to it.
+ */
+static void ext3_truncate_failed_direct_write(struct inode *inode)
+{
+       ext3_block_truncate_page(inode, inode->i_size);
+       ext3_truncate(inode);
+}
+
  static int ext3_write_begin(struct file *file, struct address_space *mapping,
                                 loff_t pos, unsigned len, unsigned flags,
                                 struct page **pagep, void **fsdata)
@@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
          * we allocate blocks but write fails for some reason */
         int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
  
+       trace_ext3_write_begin(inode, pos, len, flags);
+
         index = pos >> PAGE_CACHE_SHIFT;
         from = pos & (PAGE_CACHE_SIZE - 1);
         to = from + len;
@@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file,
         unsigned from, to;
         int ret = 0, ret2;
  
+       trace_ext3_ordered_write_end(inode, pos, len, copied);
         copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
  
         from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file,
         struct inode *inode = file->f_mapping->host;
         int ret;
  
+       trace_ext3_writeback_write_end(inode, pos, len, copied);
         copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
         update_file_sizes(inode, pos, copied);
         /*
@@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file,
  {
         handle_t *handle = ext3_journal_current_handle();
         struct inode *inode = mapping->host;
+       struct ext3_inode_info *ei = EXT3_I(inode);
         int ret = 0, ret2;
         int partial = 0;
         unsigned from, to;
  
+       trace_ext3_journalled_write_end(inode, pos, len, copied);
         from = pos & (PAGE_CACHE_SIZE - 1);
         to = from + len;
  
@@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file,
         if (pos + len > inode->i_size && ext3_can_truncate(inode))
                 ext3_orphan_add(handle, inode);
         ext3_set_inode_state(inode, EXT3_STATE_JDATA);
-       if (inode->i_size > EXT3_I(inode)->i_disksize) {
-               EXT3_I(inode)->i_disksize = inode->i_size;
+       atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
+       if (inode->i_size > ei->i_disksize) {
+               ei->i_disksize = inode->i_size;
                 ret2 = ext3_mark_inode_dirty(handle, inode);
                 if (!ret)
                         ret = ret2;
@@ -1577,6 +1626,7 @@ static int ext3_ordered_writepage(struct page *page,
         if (ext3_journal_current_handle())
                 goto out_fail;
  
+       trace_ext3_ordered_writepage(page);
         if (!page_has_buffers(page)) {
                 create_empty_buffers(page, inode->i_sb->s_blocksize,
                                 (1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1647,6 +1697,7 @@ static int ext3_writeback_writepage(struct page *page,
         if (ext3_journal_current_handle())
                 goto out_fail;
  
+       trace_ext3_writeback_writepage(page);
         if (page_has_buffers(page)) {
                 if (!walk_page_buffers(NULL, page_buffers(page), 0,
                                       PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
@@ -1689,6 +1740,7 @@ static int ext3_journalled_writepage(struct page *page,
         if (ext3_journal_current_handle())
                 goto no_write;
  
+       trace_ext3_journalled_writepage(page);
         handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
         if (IS_ERR(handle)) {
                 ret = PTR_ERR(handle);
@@ -1715,6 +1767,8 @@ static int ext3_journalled_writepage(struct page *page,
                 if (ret == 0)
                         ret = err;
                 ext3_set_inode_state(inode, EXT3_STATE_JDATA);
+               atomic_set(&EXT3_I(inode)->i_datasync_tid,
+                          handle->h_transaction->t_tid);
                 unlock_page(page);
         } else {
                 /*
@@ -1739,6 +1793,7 @@ out_unlock:
  
  static int ext3_readpage(struct file *file, struct page *page)
  {
+       trace_ext3_readpage(page);
         return mpage_readpage(page, ext3_get_block);
  }
  
@@ -1753,6 +1808,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset)
  {
         journal_t *journal = EXT3_JOURNAL(page->mapping->host);
  
+       trace_ext3_invalidatepage(page, offset);
+
         /*
          * If it's a full truncate we just forget about the pending dirtying
          */
@@ -1766,6 +1823,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
  {
         journal_t *journal = EXT3_JOURNAL(page->mapping->host);
  
+       trace_ext3_releasepage(page);
         WARN_ON(PageChecked(page));
         if (!page_has_buffers(page))
                 return 0;
@@ -1794,6 +1852,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
         size_t count = iov_length(iov, nr_segs);
         int retries = 0;
  
+       trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+
         if (rw == WRITE) {
                 loff_t final_size = offset + count;
  
@@ -1827,7 +1887,7 @@ retry:
                 loff_t end = offset + iov_length(iov, nr_segs);
  
                 if (end > isize)
-                       vmtruncate(inode, isize);
+                       ext3_truncate_failed_direct_write(inode);
         }
         if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
                 goto retry;
@@ -1841,7 +1901,7 @@ retry:
                         /* This is really bad luck. We've written the data
                          * but cannot extend i_size. Truncate allocated blocks
                          * and pretend the write failed... */
-                       ext3_truncate(inode);
+                       ext3_truncate_failed_direct_write(inode);
                         ret = PTR_ERR(handle);
                         goto out;
                 }
@@ -1867,6 +1927,8 @@ retry:
                         ret = err;
         }
  out:
+       trace_ext3_direct_IO_exit(inode, offset,
+                               iov_length(iov, nr_segs), rw, ret);
         return ret;
  }
  
@@ -1949,17 +2011,24 @@ void ext3_set_aops(struct inode *inode)
   * This required during truncate. We need to physically zero the tail end
   * of that block so it doesn't yield old data if the file is later grown.
   */
-static int ext3_block_truncate_page(handle_t *handle, struct page *page,
-               struct address_space *mapping, loff_t from)
+static int ext3_block_truncate_page(struct inode *inode, loff_t from)
  {
         ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
-       unsigned offset = from & (PAGE_CACHE_SIZE-1);
+       unsigned offset = from & (PAGE_CACHE_SIZE - 1);
         unsigned blocksize, iblock, length, pos;
-       struct inode *inode = mapping->host;
+       struct page *page;
+       handle_t *handle = NULL;
         struct buffer_head *bh;
         int err = 0;
  
+       /* Truncated on block boundary - nothing to do */
         blocksize = inode->i_sb->s_blocksize;
+       if ((from & (blocksize - 1)) == 0)
+               return 0;
+
+       page = grab_cache_page(inode->i_mapping, index);
+       if (!page)
+               return -ENOMEM;
         length = blocksize - (offset & (blocksize - 1));
         iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
  
@@ -2004,11 +2073,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
                         goto unlock;
         }
  
+       /* data=writeback mode doesn't need transaction to zero-out data */
+       if (!ext3_should_writeback_data(inode)) {
+               /* We journal at most one block */
+               handle = ext3_journal_start(inode, 1);
+               if (IS_ERR(handle)) {
+                       clear_highpage(page);
+                       flush_dcache_page(page);
+                       err = PTR_ERR(handle);
+                       goto unlock;
+               }
+       }
+
         if (ext3_should_journal_data(inode)) {
                 BUFFER_TRACE(bh, "get write access");
                 err = ext3_journal_get_write_access(handle, bh);
                 if (err)
-                       goto unlock;
+                       goto stop;
         }
  
         zero_user(page, offset, length);
@@ -2022,6 +2103,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
                         err = ext3_journal_dirty_data(handle, bh);
                 mark_buffer_dirty(bh);
         }
+stop:
+       if (handle)
+               ext3_journal_stop(handle);
  
  unlock:
         unlock_page(page);
@@ -2390,8 +2474,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
  
  int ext3_can_truncate(struct inode *inode)
  {
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-               return 0;
         if (S_ISREG(inode->i_mode))
                 return 1;
         if (S_ISDIR(inode->i_mode))
@@ -2435,7 +2517,6 @@ void ext3_truncate(struct inode *inode)
         struct ext3_inode_info *ei = EXT3_I(inode);
         __le32 *i_data = ei->i_data;
         int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
-       struct address_space *mapping = inode->i_mapping;
         int offsets[4];
         Indirect chain[4];
         Indirect *partial;
@@ -2443,7 +2524,8 @@ void ext3_truncate(struct inode *inode)
         int n;
         long last_block;
         unsigned blocksize = inode->i_sb->s_blocksize;
-       struct page *page;
+
+       trace_ext3_truncate_enter(inode);
  
         if (!ext3_can_truncate(inode))
                 goto out_notrans;
@@ -2451,37 +2533,12 @@ void ext3_truncate(struct inode *inode)
         if (inode->i_size == 0 && ext3_should_writeback_data(inode))
                 ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
  
-       /*
-        * We have to lock the EOF page here, because lock_page() nests
-        * outside journal_start().
-        */
-       if ((inode->i_size & (blocksize - 1)) == 0) {
-               /* Block boundary? Nothing to do */
-               page = NULL;
-       } else {
-               page = grab_cache_page(mapping,
-                               inode->i_size >> PAGE_CACHE_SHIFT);
-               if (!page)
-                       goto out_notrans;
-       }
-
         handle = start_transaction(inode);
-       if (IS_ERR(handle)) {
-               if (page) {
-                       clear_highpage(page);
-                       flush_dcache_page(page);
-                       unlock_page(page);
-                       page_cache_release(page);
-               }
+       if (IS_ERR(handle))
                 goto out_notrans;
-       }
  
         last_block = (inode->i_size + blocksize-1)
                                         >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
-
-       if (page)
-               ext3_block_truncate_page(handle, page, mapping, inode->i_size);
-
         n = ext3_block_to_path(inode, last_block, offsets, NULL);
         if (n == 0)
                 goto out_stop;  /* error */
@@ -2596,6 +2653,7 @@ out_stop:
                 ext3_orphan_del(handle, inode);
  
         ext3_journal_stop(handle);
+       trace_ext3_truncate_exit(inode);
         return;
  out_notrans:
         /*
@@ -2604,6 +2662,7 @@ out_notrans:
          */
         if (inode->i_nlink)
                 ext3_orphan_del(NULL, inode);
+       trace_ext3_truncate_exit(inode);
  }
  
  static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -2745,6 +2804,7 @@ make_io:
                  * has in-inode xattrs, or we don't have this inode in memory.
                  * Read the block from disk.
                  */
+               trace_ext3_load_inode(inode);
                 get_bh(bh);
                 bh->b_end_io = end_buffer_read_sync;
                 submit_bh(READ_META, bh);
@@ -3229,18 +3289,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
                 }
  
                 error = ext3_orphan_add(handle, inode);
+               if (error) {
+                       ext3_journal_stop(handle);
+                       goto err_out;
+               }
                 EXT3_I(inode)->i_disksize = attr->ia_size;
-               rc = ext3_mark_inode_dirty(handle, inode);
-               if (!error)
-                       error = rc;
+               error = ext3_mark_inode_dirty(handle, inode);
                 ext3_journal_stop(handle);
+               if (error) {
+                       /* Some hard fs error must have happened. Bail out. */
+                       ext3_orphan_del(NULL, inode);
+                       goto err_out;
+               }
+               rc = ext3_block_truncate_page(inode, attr->ia_size);
+               if (rc) {
+                       /* Cleanup orphan list and exit */
+                       handle = ext3_journal_start(inode, 3);
+                       if (IS_ERR(handle)) {
+                               ext3_orphan_del(NULL, inode);
+                               goto err_out;
+                       }
+                       ext3_orphan_del(handle, inode);
+                       ext3_journal_stop(handle);
+                       goto err_out;
+               }
         }
  
         if ((attr->ia_valid & ATTR_SIZE) &&
             attr->ia_size != i_size_read(inode)) {
-               rc = vmtruncate(inode, attr->ia_size);
-               if (rc)
-                       goto err_out;
+               truncate_setsize(inode, attr->ia_size);
+               ext3_truncate(inode);
         }
  
         setattr_copy(inode, attr);
@@ -3374,6 +3452,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
         int err;
  
         might_sleep();
+       trace_ext3_mark_inode_dirty(inode, _RET_IP_);
         err = ext3_reserve_inode_write(handle, inode, &iloc);
         if (!err)
                 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c

index f4090bd2f345218df867b618806569415c1de688..c7f43944f160e080973ce449a6c97a4bc2a4d2bc 100644 (file)
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -285,7 +285,7 @@ group_add_out:
                 if (!capable(CAP_SYS_ADMIN))
                         return -EPERM;
  
-               if (copy_from_user(&range, (struct fstrim_range *)arg,
+               if (copy_from_user(&range, (struct fstrim_range __user *)arg,
                                    sizeof(range)))
                         return -EFAULT;
  
@@ -293,7 +293,7 @@ group_add_out:
                 if (ret < 0)
                         return ret;
  
-               if (copy_to_user((struct fstrim_range *)arg, &range,
+               if (copy_to_user((struct fstrim_range __user *)arg, &range,
                                  sizeof(range)))
                         return -EFAULT;
  
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c

index 3b57230a17bbf9adad3748d790cc3376e7b3f9c2..6e18a0b7750db81d5900ad813614e8455bf4b82d 100644 (file)
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -36,6 +36,7 @@
  #include <linux/quotaops.h>
  #include <linux/buffer_head.h>
  #include <linux/bio.h>
+#include <trace/events/ext3.h>
  
  #include "namei.h"
  #include "xattr.h"
@@ -287,7 +288,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
                                 while (len--) printk("%c", *name++);
                                 ext3fs_dirhash(de->name, de->name_len, &h);
                                 printk(":%x.%u ", h.hash,
-                                      ((char *) de - base));
+                                      (unsigned) ((char *) de - base));
                         }
                         space += EXT3_DIR_REC_LEN(de->name_len);
                         names++;
@@ -1013,7 +1014,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
  
         *err = -ENOENT;
  errout:
-       dxtrace(printk("%s not found\n", name));
+       dxtrace(printk("%s not found\n", entry->name));
         dx_release (frames);
         return NULL;
  }
@@ -2140,6 +2141,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
         struct ext3_dir_entry_2 * de;
         handle_t *handle;
  
+       trace_ext3_unlink_enter(dir, dentry);
         /* Initialize quotas before so that eventual writes go
          * in separate transaction */
         dquot_initialize(dir);
@@ -2185,6 +2187,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
  end_unlink:
         ext3_journal_stop(handle);
         brelse (bh);
+       trace_ext3_unlink_exit(dentry, retval);
         return retval;
  }
  
diff --git a/fs/ext3/super.c b/fs/ext3/super.c

index b57ea2f912693e8f36ab0c1bd74e3e53205689e8..7beb69ae0015996140fe916712f4b1e13860faa0 100644 (file)
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -44,6 +44,9 @@
  #include "acl.h"
  #include "namei.h"
  
+#define CREATE_TRACE_POINTS
+#include <trace/events/ext3.h>
+
  #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
    #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
  #else
@@ -497,6 +500,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
         return &ei->vfs_inode;
  }
  
+static int ext3_drop_inode(struct inode *inode)
+{
+       int drop = generic_drop_inode(inode);
+
+       trace_ext3_drop_inode(inode, drop);
+       return drop;
+}
+
  static void ext3_i_callback(struct rcu_head *head)
  {
         struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -788,6 +799,7 @@ static const struct super_operations ext3_sops = {
         .destroy_inode  = ext3_destroy_inode,
         .write_inode    = ext3_write_inode,
         .dirty_inode    = ext3_dirty_inode,
+       .drop_inode     = ext3_drop_inode,
         .evict_inode    = ext3_evict_inode,
         .put_super      = ext3_put_super,
         .sync_fs        = ext3_sync_fs,
@@ -2509,6 +2521,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
  {
         tid_t target;
  
+       trace_ext3_sync_fs(sb, wait);
         if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
                 if (wait)
                         log_wait_commit(EXT3_SB(sb)->s_journal, target);
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c

index 32e6cc23bd9ad69f1f280f5148f2a5c41ffd16ea..d565759d82eee0c06b10fa44e77abc51f29827d7 100644 (file)
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -803,8 +803,16 @@ inserted:
                         /* We need to allocate a new block */
                         ext3_fsblk_t goal = ext3_group_first_block_no(sb,
                                                 EXT3_I(inode)->i_block_group);
-                       ext3_fsblk_t block = ext3_new_block(handle, inode,
-                                                       goal, &error);
+                       ext3_fsblk_t block;
+
+                       /*
+                        * Protect us agaist concurrent allocations to the
+                        * same inode from ext3_..._writepage(). Reservation
+                        * code does not expect racing allocations.
+                        */
+                       mutex_lock(&EXT3_I(inode)->truncate_mutex);
+                       block = ext3_new_block(handle, inode, goal, &error);
+                       mutex_unlock(&EXT3_I(inode)->truncate_mutex);
                         if (error)
                                 goto cleanup;
                         ea_idebug(inode, "creating block %d", block);
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c

index e4b87bc1fa56e0dd2ff8c77ae853367f3a529a81..f94fc48ff3a0c2676156c4a3545f011f9e340a7e 100644 (file)
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -22,6 +22,8 @@
  #include <linux/jbd.h>
  #include <linux/errno.h>
  #include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <trace/events/jbd.h>
  
  /*
   * Unlink a buffer from a transaction checkpoint list.
@@ -95,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
  
         if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
             !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
+               /*
+                * Get our reference so that bh cannot be freed before
+                * we unlock it
+                */
+               get_bh(bh);
                 JBUFFER_TRACE(jh, "remove from checkpoint list");
                 ret = __journal_remove_checkpoint(jh) + 1;
                 jbd_unlock_bh_state(bh);
-               journal_remove_journal_head(bh);
                 BUFFER_TRACE(bh, "release");
                 __brelse(bh);
         } else {
@@ -220,8 +226,8 @@ restart:
                         spin_lock(&journal->j_list_lock);
                         goto restart;
                 }
+               get_bh(bh);
                 if (buffer_locked(bh)) {
-                       get_bh(bh);
                         spin_unlock(&journal->j_list_lock);
                         jbd_unlock_bh_state(bh);
                         wait_on_buffer(bh);
@@ -240,7 +246,6 @@ restart:
                  */
                 released = __journal_remove_checkpoint(jh);
                 jbd_unlock_bh_state(bh);
-               journal_remove_journal_head(bh);
                 __brelse(bh);
         }
  
@@ -253,9 +258,12 @@ static void
  __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
  {
         int i;
+       struct blk_plug plug;
  
+       blk_start_plug(&plug);
         for (i = 0; i < *batch_count; i++)
-               write_dirty_buffer(bhs[i], WRITE);
+               write_dirty_buffer(bhs[i], WRITE_SYNC);
+       blk_finish_plug(&plug);
  
         for (i = 0; i < *batch_count; i++) {
                 struct buffer_head *bh = bhs[i];
@@ -304,12 +312,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
                 ret = 1;
                 if (unlikely(buffer_write_io_error(bh)))
                         ret = -EIO;
+               get_bh(bh);
                 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
                 BUFFER_TRACE(bh, "remove from checkpoint");
                 __journal_remove_checkpoint(jh);
                 spin_unlock(&journal->j_list_lock);
                 jbd_unlock_bh_state(bh);
-               journal_remove_journal_head(bh);
                 __brelse(bh);
         } else {
                 /*
@@ -358,6 +366,7 @@ int log_do_checkpoint(journal_t *journal)
          * journal straight away.
          */
         result = cleanup_journal_tail(journal);
+       trace_jbd_checkpoint(journal, result);
         jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
         if (result <= 0)
                 return result;
@@ -503,6 +512,7 @@ int cleanup_journal_tail(journal_t *journal)
         if (blocknr < journal->j_tail)
                 freed = freed + journal->j_last - journal->j_first;
  
+       trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed);
         jbd_debug(1,
                   "Cleaning journal tail from %d to %d (offset %u), "
                   "freeing %u\n",
@@ -523,9 +533,9 @@ int cleanup_journal_tail(journal_t *journal)
  /*
   * journal_clean_one_cp_list
   *
- * Find all the written-back checkpoint buffers in the given list and release them.
+ * Find all the written-back checkpoint buffers in the given list and release
+ * them.
   *
- * Called with the journal locked.
   * Called with j_list_lock held.
   * Returns number of bufers reaped (for debug)
   */
@@ -632,8 +642,8 @@ out:
   * checkpoint lists.
   *
   * The function returns 1 if it frees the transaction, 0 otherwise.
+ * The function can free jh and bh.
   *
- * This function is called with the journal locked.
   * This function is called with j_list_lock held.
   * This function is called with jbd_lock_bh_state(jh2bh(jh))
   */
@@ -652,13 +662,14 @@ int __journal_remove_checkpoint(struct journal_head *jh)
         }
         journal = transaction->t_journal;
  
+       JBUFFER_TRACE(jh, "removing from transaction");
         __buffer_unlink(jh);
         jh->b_cp_transaction = NULL;
+       journal_put_journal_head(jh);
  
         if (transaction->t_checkpoint_list != NULL ||
             transaction->t_checkpoint_io_list != NULL)
                 goto out;
-       JBUFFER_TRACE(jh, "transaction has no more buffers");
  
         /*
          * There is one special case to worry about: if we have just pulled the
@@ -669,10 +680,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
          * The locking here around t_state is a bit sleazy.
          * See the comment at the end of journal_commit_transaction().
          */
-       if (transaction->t_state != T_FINISHED) {
-               JBUFFER_TRACE(jh, "belongs to running/committing transaction");
+       if (transaction->t_state != T_FINISHED)
                 goto out;
-       }
  
         /* OK, that was the last buffer for the transaction: we can now
            safely remove this transaction from the log */
@@ -684,7 +693,6 @@ int __journal_remove_checkpoint(struct journal_head *jh)
         wake_up(&journal->j_wait_logspace);
         ret = 1;
  out:
-       JBUFFER_TRACE(jh, "exit");
         return ret;
  }
  
@@ -703,6 +711,8 @@ void __journal_insert_checkpoint(struct journal_head *jh,
         J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
         J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
  
+       /* Get reference for checkpointing transaction */
+       journal_grab_journal_head(jh2bh(jh));
         jh->b_cp_transaction = transaction;
  
         if (!transaction->t_checkpoint_list) {
@@ -752,6 +762,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
         J_ASSERT(journal->j_committing_transaction != transaction);
         J_ASSERT(journal->j_running_transaction != transaction);
  
+       trace_jbd_drop_transaction(journal, transaction);
         jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
         kfree(transaction);
  }
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c

index 72ffa974b0b8d52852e25d7e016f7b4f1fc50b4a..8799207df058bbe24fa2f852fcc78dbba14828ca 100644 (file)
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -21,6 +21,7 @@
  #include <linux/pagemap.h>
  #include <linux/bio.h>
  #include <linux/blkdev.h>
+#include <trace/events/jbd.h>
  
  /*
   * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -204,6 +205,8 @@ write_out_data:
                         if (!trylock_buffer(bh)) {
                                 BUFFER_TRACE(bh, "needs blocking lock");
                                 spin_unlock(&journal->j_list_lock);
+                               trace_jbd_do_submit_data(journal,
+                                                    commit_transaction);
                                 /* Write out all data to prevent deadlocks */
                                 journal_do_submit_data(wbuf, bufs, write_op);
                                 bufs = 0;
@@ -236,6 +239,8 @@ write_out_data:
                         jbd_unlock_bh_state(bh);
                         if (bufs == journal->j_wbufsize) {
                                 spin_unlock(&journal->j_list_lock);
+                               trace_jbd_do_submit_data(journal,
+                                                    commit_transaction);
                                 journal_do_submit_data(wbuf, bufs, write_op);
                                 bufs = 0;
                                 goto write_out_data;
@@ -253,10 +258,6 @@ write_out_data:
                         jbd_unlock_bh_state(bh);
                         if (locked)
                                 unlock_buffer(bh);
-                       journal_remove_journal_head(bh);
-                       /* One for our safety reference, other for
-                        * journal_remove_journal_head() */
-                       put_bh(bh);
                         release_data_buffer(bh);
                 }
  
@@ -266,6 +267,7 @@ write_out_data:
                 }
         }
         spin_unlock(&journal->j_list_lock);
+       trace_jbd_do_submit_data(journal, commit_transaction);
         journal_do_submit_data(wbuf, bufs, write_op);
  
         return err;
@@ -316,12 +318,14 @@ void journal_commit_transaction(journal_t *journal)
         commit_transaction = journal->j_running_transaction;
         J_ASSERT(commit_transaction->t_state == T_RUNNING);
  
+       trace_jbd_start_commit(journal, commit_transaction);
         jbd_debug(1, "JBD: starting commit of transaction %d\n",
                         commit_transaction->t_tid);
  
         spin_lock(&journal->j_state_lock);
         commit_transaction->t_state = T_LOCKED;
  
+       trace_jbd_commit_locking(journal, commit_transaction);
         spin_lock(&commit_transaction->t_handle_lock);
         while (commit_transaction->t_updates) {
                 DEFINE_WAIT(wait);
@@ -392,6 +396,7 @@ void journal_commit_transaction(journal_t *journal)
          */
         journal_switch_revoke_table(journal);
  
+       trace_jbd_commit_flushing(journal, commit_transaction);
         commit_transaction->t_state = T_FLUSH;
         journal->j_committing_transaction = commit_transaction;
         journal->j_running_transaction = NULL;
@@ -446,14 +451,9 @@ void journal_commit_transaction(journal_t *journal)
                 }
                 if (buffer_jbd(bh) && bh2jh(bh) == jh &&
                     jh->b_transaction == commit_transaction &&
-                   jh->b_jlist == BJ_Locked) {
+                   jh->b_jlist == BJ_Locked)
                         __journal_unfile_buffer(jh);
-                       jbd_unlock_bh_state(bh);
-                       journal_remove_journal_head(bh);
-                       put_bh(bh);
-               } else {
-                       jbd_unlock_bh_state(bh);
-               }
+               jbd_unlock_bh_state(bh);
                 release_data_buffer(bh);
                 cond_resched_lock(&journal->j_list_lock);
         }
@@ -493,6 +493,7 @@ void journal_commit_transaction(journal_t *journal)
         commit_transaction->t_state = T_COMMIT;
         spin_unlock(&journal->j_state_lock);
  
+       trace_jbd_commit_logging(journal, commit_transaction);
         J_ASSERT(commit_transaction->t_nr_buffers <=
                  commit_transaction->t_outstanding_credits);
  
@@ -797,10 +798,16 @@ restart_loop:
         while (commit_transaction->t_forget) {
                 transaction_t *cp_transaction;
                 struct buffer_head *bh;
+               int try_to_free = 0;
  
                 jh = commit_transaction->t_forget;
                 spin_unlock(&journal->j_list_lock);
                 bh = jh2bh(jh);
+               /*
+                * Get a reference so that bh cannot be freed before we are
+                * done with it.
+                */
+               get_bh(bh);
                 jbd_lock_bh_state(bh);
                 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
                         jh->b_transaction == journal->j_running_transaction);
@@ -858,28 +865,27 @@ restart_loop:
                         __journal_insert_checkpoint(jh, commit_transaction);
                         if (is_journal_aborted(journal))
                                 clear_buffer_jbddirty(bh);
-                       JBUFFER_TRACE(jh, "refile for checkpoint writeback");
-                       __journal_refile_buffer(jh);
-                       jbd_unlock_bh_state(bh);
                 } else {
                         J_ASSERT_BH(bh, !buffer_dirty(bh));
-                       /* The buffer on BJ_Forget list and not jbddirty means
+                       /*
+                        * The buffer on BJ_Forget list and not jbddirty means
                          * it has been freed by this transaction and hence it
                          * could not have been reallocated until this
                          * transaction has committed. *BUT* it could be
                          * reallocated once we have written all the data to
                          * disk and before we process the buffer on BJ_Forget
-                        * list. */
-                       JBUFFER_TRACE(jh, "refile or unfile freed buffer");
-                       __journal_refile_buffer(jh);
-                       if (!jh->b_transaction) {
-                               jbd_unlock_bh_state(bh);
-                                /* needs a brelse */
-                               journal_remove_journal_head(bh);
-                               release_buffer_page(bh);
-                       } else
-                               jbd_unlock_bh_state(bh);
+                        * list.
+                        */
+                       if (!jh->b_next_transaction)
+                               try_to_free = 1;
                 }
+               JBUFFER_TRACE(jh, "refile or unfile freed buffer");
+               __journal_refile_buffer(jh);
+               jbd_unlock_bh_state(bh);
+               if (try_to_free)
+                       release_buffer_page(bh);
+               else
+                       __brelse(bh);
                 cond_resched_lock(&journal->j_list_lock);
         }
         spin_unlock(&journal->j_list_lock);
@@ -946,6 +952,7 @@ restart_loop:
         }
         spin_unlock(&journal->j_list_lock);
  
+       trace_jbd_end_commit(journal, commit_transaction);
         jbd_debug(1, "JBD: commit %d complete, head %d\n",
                   journal->j_commit_sequence, journal->j_tail_sequence);
  
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c

index e2d4285fbe90ebcc511a96574283418bc933634d..9fe061fb8779be389155a05672b267c8071623e7 100644 (file)
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -38,6 +38,9 @@
  #include <linux/debugfs.h>
  #include <linux/ratelimit.h>
  
+#define CREATE_TRACE_POINTS
+#include <trace/events/jbd.h>
+
  #include <asm/uaccess.h>
  #include <asm/page.h>
  
@@ -1065,6 +1068,7 @@ void journal_update_superblock(journal_t *journal, int wait)
         } else
                 write_dirty_buffer(bh, WRITE);
  
+       trace_jbd_update_superblock_end(journal, wait);
  out:
         /* If we have just flushed the log (by marking s_start==0), then
          * any future commit will have to be careful to update the
@@ -1799,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh)
   * When a buffer has its BH_JBD bit set it is immune from being released by
   * core kernel code, mainly via ->b_count.
   *
- * A journal_head may be detached from its buffer_head when the journal_head's
- * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
- * Various places in JBD call journal_remove_journal_head() to indicate that the
- * journal_head can be dropped if needed.
+ * A journal_head is detached from its buffer_head when the journal_head's
+ * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
+ * transaction (b_cp_transaction) hold their references to b_jcount.
   *
   * Various places in the kernel want to attach a journal_head to a buffer_head
   * _before_ attaching the journal_head to a transaction.  To protect the
@@ -1815,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh)
   *     (Attach a journal_head if needed.  Increments b_jcount)
   *     struct journal_head *jh = journal_add_journal_head(bh);
   *     ...
- *     jh->b_transaction = xxx;
- *     journal_put_journal_head(jh);
- *
- * Now, the journal_head's b_jcount is zero, but it is safe from being released
- * because it has a non-zero b_transaction.
+ *      (Get another reference for transaction)
+ *      journal_grab_journal_head(bh);
+ *      jh->b_transaction = xxx;
+ *      (Put original reference)
+ *      journal_put_journal_head(jh);
   */
  
  /*
   * Give a buffer_head a journal_head.
   *
- * Doesn't need the journal lock.
   * May sleep.
   */
  struct journal_head *journal_add_journal_head(struct buffer_head *bh)
@@ -1889,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
         struct journal_head *jh = bh2jh(bh);
  
         J_ASSERT_JH(jh, jh->b_jcount >= 0);
-
-       get_bh(bh);
-       if (jh->b_jcount == 0) {
-               if (jh->b_transaction == NULL &&
-                               jh->b_next_transaction == NULL &&
-                               jh->b_cp_transaction == NULL) {
-                       J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
-                       J_ASSERT_BH(bh, buffer_jbd(bh));
-                       J_ASSERT_BH(bh, jh2bh(jh) == bh);
-                       BUFFER_TRACE(bh, "remove journal_head");
-                       if (jh->b_frozen_data) {
-                               printk(KERN_WARNING "%s: freeing "
-                                               "b_frozen_data\n",
-                                               __func__);
-                               jbd_free(jh->b_frozen_data, bh->b_size);
-                       }
-                       if (jh->b_committed_data) {
-                               printk(KERN_WARNING "%s: freeing "
-                                               "b_committed_data\n",
-                                               __func__);
-                               jbd_free(jh->b_committed_data, bh->b_size);
-                       }
-                       bh->b_private = NULL;
-                       jh->b_bh = NULL;        /* debug, really */
-                       clear_buffer_jbd(bh);
-                       __brelse(bh);
-                       journal_free_journal_head(jh);
-               } else {
-                       BUFFER_TRACE(bh, "journal_head was locked");
-               }
+       J_ASSERT_JH(jh, jh->b_transaction == NULL);
+       J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+       J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+       J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
+       J_ASSERT_BH(bh, buffer_jbd(bh));
+       J_ASSERT_BH(bh, jh2bh(jh) == bh);
+       BUFFER_TRACE(bh, "remove journal_head");
+       if (jh->b_frozen_data) {
+               printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
+               jbd_free(jh->b_frozen_data, bh->b_size);
         }
+       if (jh->b_committed_data) {
+               printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
+               jbd_free(jh->b_committed_data, bh->b_size);
+       }
+       bh->b_private = NULL;
+       jh->b_bh = NULL;        /* debug, really */
+       clear_buffer_jbd(bh);
+       journal_free_journal_head(jh);
  }
  
  /*
- * journal_remove_journal_head(): if the buffer isn't attached to a transaction
- * and has a zero b_jcount then remove and release its journal_head.   If we did
- * see that the buffer is not used by any transaction we also "logically"
- * decrement ->b_count.
- *
- * We in fact take an additional increment on ->b_count as a convenience,
- * because the caller usually wants to do additional things with the bh
- * after calling here.
- * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
- * time.  Once the caller has run __brelse(), the buffer is eligible for
- * reaping by try_to_free_buffers().
- */
-void journal_remove_journal_head(struct buffer_head *bh)
-{
-       jbd_lock_bh_journal_head(bh);
-       __journal_remove_journal_head(bh);
-       jbd_unlock_bh_journal_head(bh);
-}
-
-/*
- * Drop a reference on the passed journal_head.  If it fell to zero then try to
+ * Drop a reference on the passed journal_head.  If it fell to zero then
   * release the journal_head from the buffer_head.
   */
  void journal_put_journal_head(struct journal_head *jh)
@@ -1953,11 +1923,12 @@ void journal_put_journal_head(struct journal_head *jh)
         jbd_lock_bh_journal_head(bh);
         J_ASSERT_JH(jh, jh->b_jcount > 0);
         --jh->b_jcount;
-       if (!jh->b_jcount && !jh->b_transaction) {
+       if (!jh->b_jcount) {
                 __journal_remove_journal_head(bh);
+               jbd_unlock_bh_journal_head(bh);
                 __brelse(bh);
-       }
-       jbd_unlock_bh_journal_head(bh);
+       } else
+               jbd_unlock_bh_journal_head(bh);
  }
  
  /*
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c

index f7ee81a065dabae13e29501603a8c1726132d05c..7e59c6e66f9b79a9fd57d3d450e51ee3de79b722 100644 (file)
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -26,6 +26,7 @@
  #include <linux/mm.h>
  #include <linux/highmem.h>
  #include <linux/hrtimer.h>
+#include <linux/backing-dev.h>
  
  static void __journal_temp_unlink_buffer(struct journal_head *jh);
  
@@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
  
  alloc_transaction:
         if (!journal->j_running_transaction) {
-               new_transaction = kzalloc(sizeof(*new_transaction),
-                                               GFP_NOFS|__GFP_NOFAIL);
+               new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS);
                 if (!new_transaction) {
-                       ret = -ENOMEM;
-                       goto out;
+                       congestion_wait(BLK_RW_ASYNC, HZ/50);
+                       goto alloc_transaction;
                 }
         }
  
@@ -696,7 +696,6 @@ repeat:
         if (!jh->b_transaction) {
                 JBUFFER_TRACE(jh, "no transaction");
                 J_ASSERT_JH(jh, !jh->b_next_transaction);
-               jh->b_transaction = transaction;
                 JBUFFER_TRACE(jh, "file as BJ_Reserved");
                 spin_lock(&journal->j_list_lock);
                 __journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
                  * committed and so it's safe to clear the dirty bit.
                  */
                 clear_buffer_dirty(jh2bh(jh));
-               jh->b_transaction = transaction;
  
                 /* first access by this transaction */
                 jh->b_modified = 0;
@@ -844,8 +842,8 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
          */
         JBUFFER_TRACE(jh, "cancelling revoke");
         journal_cancel_revoke(handle, jh);
-       journal_put_journal_head(jh);
  out:
+       journal_put_journal_head(jh);
         return err;
  }
  
@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
                                 ret = -EIO;
                                 goto no_journal;
                         }
-
-                       if (jh->b_transaction != NULL) {
+                       /* We might have slept so buffer could be refiled now */
+                       if (jh->b_transaction != NULL &&
+                           jh->b_transaction != handle->h_transaction) {
                                 JBUFFER_TRACE(jh, "unfile from commit");
                                 __journal_temp_unlink_buffer(jh);
                                 /* It still points to the committing
@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
                 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
                         JBUFFER_TRACE(jh, "not on correct data list: unfile");
                         J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
-                       __journal_temp_unlink_buffer(jh);
-                       jh->b_transaction = handle->h_transaction;
                         JBUFFER_TRACE(jh, "file as data");
                         __journal_file_buffer(jh, handle->h_transaction,
                                                 BJ_SyncData);
@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
                         __journal_file_buffer(jh, transaction, BJ_Forget);
                 } else {
                         __journal_unfile_buffer(jh);
-                       journal_remove_journal_head(bh);
-                       __brelse(bh);
                         if (!buffer_jbd(bh)) {
                                 spin_unlock(&journal->j_list_lock);
                                 jbd_unlock_bh_state(bh);
@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
                 mark_buffer_dirty(bh);  /* Expose it to the VM */
  }
  
+/*
+ * Remove buffer from all transactions.
+ *
+ * Called with bh_state lock and j_list_lock
+ *
+ * jh and bh may be already freed when this function returns.
+ */
  void __journal_unfile_buffer(struct journal_head *jh)
  {
         __journal_temp_unlink_buffer(jh);
         jh->b_transaction = NULL;
+       journal_put_journal_head(jh);
  }
  
  void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
  {
-       jbd_lock_bh_state(jh2bh(jh));
+       struct buffer_head *bh = jh2bh(jh);
+
+       /* Get reference so that buffer cannot be freed before we unlock it */
+       get_bh(bh);
+       jbd_lock_bh_state(bh);
         spin_lock(&journal->j_list_lock);
         __journal_unfile_buffer(jh);
         spin_unlock(&journal->j_list_lock);
-       jbd_unlock_bh_state(jh2bh(jh));
+       jbd_unlock_bh_state(bh);
+       __brelse(bh);
  }
  
  /*
@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
                         /* A written-back ordered data buffer */
                         JBUFFER_TRACE(jh, "release data");
                         __journal_unfile_buffer(jh);
-                       journal_remove_journal_head(bh);
-                       __brelse(bh);
                 }
         } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
                 /* written-back checkpointed metadata buffer */
                 if (jh->b_jlist == BJ_None) {
                         JBUFFER_TRACE(jh, "remove from checkpoint list");
                         __journal_remove_checkpoint(jh);
-                       journal_remove_journal_head(bh);
-                       __brelse(bh);
                 }
         }
         spin_unlock(&journal->j_list_lock);
@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal,
                 /*
                  * We take our own ref against the journal_head here to avoid
                  * having to add tons of locking around each instance of
-                * journal_remove_journal_head() and journal_put_journal_head().
+                * journal_put_journal_head().
                  */
                 jh = journal_grab_journal_head(bh);
                 if (!jh)
@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
         int may_free = 1;
         struct buffer_head *bh = jh2bh(jh);
  
-       __journal_unfile_buffer(jh);
-
         if (jh->b_cp_transaction) {
                 JBUFFER_TRACE(jh, "on running+cp transaction");
+               __journal_temp_unlink_buffer(jh);
                 /*
                  * We don't want to write the buffer anymore, clear the
                  * bit so that we don't confuse checks in
@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
                 may_free = 0;
         } else {
                 JBUFFER_TRACE(jh, "on running transaction");
-               journal_remove_journal_head(bh);
-               __brelse(bh);
+               __journal_unfile_buffer(jh);
         }
         return may_free;
  }
@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh,
  
         if (jh->b_transaction)
                 __journal_temp_unlink_buffer(jh);
+       else
+               journal_grab_journal_head(bh);
         jh->b_transaction = transaction;
  
         switch (jlist) {
@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh,
   * already started to be used by a subsequent transaction, refile the
   * buffer on that transaction's metadata list.
   *
- * Called under journal->j_list_lock
- *
+ * Called under j_list_lock
   * Called under jbd_lock_bh_state(jh2bh(jh))
+ *
+ * jh and bh may be already free when this function returns
   */
  void __journal_refile_buffer(struct journal_head *jh)
  {
@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh)
  
         was_dirty = test_clear_buffer_jbddirty(bh);
         __journal_temp_unlink_buffer(jh);
+       /*
+        * We set b_transaction here because b_next_transaction will inherit
+        * our jh reference and thus __journal_file_buffer() must not take a
+        * new one.
+        */
         jh->b_transaction = jh->b_next_transaction;
         jh->b_next_transaction = NULL;
         if (buffer_freed(bh))
@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh)
  }
  
  /*
- * For the unlocked version of this call, also make sure that any
- * hanging journal_head is cleaned up if necessary.
- *
- * __journal_refile_buffer is usually called as part of a single locked
- * operation on a buffer_head, in which the caller is probably going to
- * be hooking the journal_head onto other lists.  In that case it is up
- * to the caller to remove the journal_head if necessary.  For the
- * unlocked journal_refile_buffer call, the caller isn't going to be
- * doing anything else to the buffer so we need to do the cleanup
- * ourselves to avoid a jh leak.
- *
- * *** The journal_head may be freed by this call! ***
+ * __journal_refile_buffer() with necessary locking added. We take our bh
+ * reference so that we can safely unlock bh.
+ *
+ * The jh and bh may be freed by this call.
   */
  void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
  {
         struct buffer_head *bh = jh2bh(jh);
  
+       /* Get reference so that buffer cannot be freed before we unlock it */
+       get_bh(bh);
         jbd_lock_bh_state(bh);
         spin_lock(&journal->j_list_lock);
-
         __journal_refile_buffer(jh);
         jbd_unlock_bh_state(bh);
-       journal_remove_journal_head(bh);
-
         spin_unlock(&journal->j_list_lock);
         __brelse(bh);
  }
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h

index 2dfa7076e8b601f5197420c6bdbab1780ca77d7f..53792bf36c715d4c7f16c08a4a71a02ebda0eefd 100644 (file)
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -18,6 +18,7 @@
  
  #include <linux/types.h>
  #include <linux/magic.h>
+#include <linux/fs.h>
  
  /*
   * The second extended filesystem constants/structures
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h

index 0c473fd79acb4f0ae9ca4b41637fc1e8133b21de..67a803aee619c0b75593e4c740abc35087f28d49 100644 (file)
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -418,12 +418,11 @@ struct ext3_inode {
  #define EXT2_MOUNT_DATA_FLAGS          EXT3_MOUNT_DATA_FLAGS
  #endif
  
-#define ext3_set_bit                   __test_and_set_bit_le
+#define ext3_set_bit                   __set_bit_le
  #define ext3_set_bit_atomic            ext2_set_bit_atomic
-#define ext3_clear_bit                 __test_and_clear_bit_le
+#define ext3_clear_bit                 __clear_bit_le
  #define ext3_clear_bit_atomic          ext2_clear_bit_atomic
  #define ext3_test_bit                  test_bit_le
-#define ext3_find_first_zero_bit       find_first_zero_bit_le
  #define ext3_find_next_zero_bit                find_next_zero_bit_le
  
  /*
@@ -913,7 +912,7 @@ extern void ext3_dirty_inode(struct inode *, int);
  extern int ext3_change_inode_journal_flag(struct inode *, int);
  extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
  extern int ext3_can_truncate(struct inode *inode);
-extern void ext3_truncate (struct inode *);
+extern void ext3_truncate(struct inode *inode);
  extern void ext3_set_inode_flags(struct inode *);
  extern void ext3_get_inode_flags(struct ext3_inode_info *);
  extern void ext3_set_aops(struct inode *inode);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h

index e06965081ba5548f74db935543af84334f58259e..e6a5e34bed4fe64df66592371848e9c66270c569 100644 (file)
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -940,7 +940,6 @@ extern int     journal_force_commit(journal_t *);
   */
  struct journal_head *journal_add_journal_head(struct buffer_head *bh);
  struct journal_head *journal_grab_journal_head(struct buffer_head *bh);
-void journal_remove_journal_head(struct buffer_head *bh);
  void journal_put_journal_head(struct journal_head *jh);
  
  /*
diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h

index 44e95d0a721f1eb50db7e41df52a15403ddf814d..423cb6d78ee0bc9958d63a592f53c7d5a17fa874 100644 (file)
--- a/include/linux/journal-head.h
+++ b/include/linux/journal-head.h
@@ -45,7 +45,7 @@ struct journal_head {
          * has been cowed
          * [jbd_lock_bh_state()]
          */
-       unsigned b_cow_tid;
+       tid_t b_cow_tid;
  
         /*
          * Copy of the buffer data frozen for writing to the log.
diff --git a/include/linux/quota.h b/include/linux/quota.h

index 9a85412e0db6a4015388bc4ebb2c5466e6b55db0..313b7defc08861e17075b56bdc686f287e67307f 100644 (file)
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -415,13 +415,5 @@ struct quota_module_name {
         {QFMT_VFS_V0, "quota_v2"},\
         {0, NULL}}
  
-#else
-
-# /* nodep */ include <sys/cdefs.h>
-
-__BEGIN_DECLS
-long quotactl __P ((unsigned int, const char *, int, caddr_t));
-__END_DECLS
-
  #endif /* __KERNEL__ */
  #endif /* _QUOTA_ */
diff --git a/include/trace/events/ext3.h b/include/trace/events/ext3.h

new file mode 100644 (file)

index 0000000..7b53c05
--- /dev/null
+++ b/include/trace/events/ext3.h
@@ -0,0 +1,864 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ext3
+
+#if !defined(_TRACE_EXT3_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EXT3_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(ext3_free_inode,
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        umode_t, mode                   )
+               __field(        uid_t,  uid                     )
+               __field(        gid_t,  gid                     )
+               __field(        blkcnt_t, blocks                )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->mode   = inode->i_mode;
+               __entry->uid    = inode->i_uid;
+               __entry->gid    = inode->i_gid;
+               __entry->blocks = inode->i_blocks;
+       ),
+
+       TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->mode, __entry->uid, __entry->gid,
+                 (unsigned long) __entry->blocks)
+);
+
+TRACE_EVENT(ext3_request_inode,
+       TP_PROTO(struct inode *dir, int mode),
+
+       TP_ARGS(dir, mode),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  dir                     )
+               __field(        umode_t, mode                   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = dir->i_sb->s_dev;
+               __entry->dir    = dir->i_ino;
+               __entry->mode   = mode;
+       ),
+
+       TP_printk("dev %d,%d dir %lu mode 0%o",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->dir, __entry->mode)
+);
+
+TRACE_EVENT(ext3_allocate_inode,
+       TP_PROTO(struct inode *inode, struct inode *dir, int mode),
+
+       TP_ARGS(inode, dir, mode),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        ino_t,  dir                     )
+               __field(        umode_t, mode                   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->dir    = dir->i_ino;
+               __entry->mode   = mode;
+       ),
+
+       TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long) __entry->dir, __entry->mode)
+);
+
+TRACE_EVENT(ext3_evict_inode,
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        int,    nlink                   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->nlink  = inode->i_nlink;
+       ),
+
+       TP_printk("dev %d,%d ino %lu nlink %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, __entry->nlink)
+);
+
+TRACE_EVENT(ext3_drop_inode,
+       TP_PROTO(struct inode *inode, int drop),
+
+       TP_ARGS(inode, drop),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        int,    drop                    )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->drop   = drop;
+       ),
+
+       TP_printk("dev %d,%d ino %lu drop %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, __entry->drop)
+);
+
+TRACE_EVENT(ext3_mark_inode_dirty,
+       TP_PROTO(struct inode *inode, unsigned long IP),
+
+       TP_ARGS(inode, IP),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(unsigned long,  ip                      )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->ip     = IP;
+       ),
+
+       TP_printk("dev %d,%d ino %lu caller %pF",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, (void *)__entry->ip)
+);
+
+TRACE_EVENT(ext3_write_begin,
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                unsigned int flags),
+
+       TP_ARGS(inode, pos, len, flags),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        loff_t, pos                     )
+               __field(        unsigned int, len               )
+               __field(        unsigned int, flags             )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->pos    = pos;
+               __entry->len    = len;
+               __entry->flags  = flags;
+       ),
+
+       TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pos, __entry->len,
+                 __entry->flags)
+);
+
+DECLARE_EVENT_CLASS(ext3__write_end,
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                       unsigned int copied),
+
+       TP_ARGS(inode, pos, len, copied),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        loff_t, pos                     )
+               __field(        unsigned int, len               )
+               __field(        unsigned int, copied            )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->pos    = pos;
+               __entry->len    = len;
+               __entry->copied = copied;
+       ),
+
+       TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pos, __entry->len,
+                 __entry->copied)
+);
+
+DEFINE_EVENT(ext3__write_end, ext3_ordered_write_end,
+
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                unsigned int copied),
+
+       TP_ARGS(inode, pos, len, copied)
+);
+
+DEFINE_EVENT(ext3__write_end, ext3_writeback_write_end,
+
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                unsigned int copied),
+
+       TP_ARGS(inode, pos, len, copied)
+);
+
+DEFINE_EVENT(ext3__write_end, ext3_journalled_write_end,
+
+       TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+                unsigned int copied),
+
+       TP_ARGS(inode, pos, len, copied)
+);
+
+DECLARE_EVENT_CLASS(ext3__page_op,
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        pgoff_t, index                  )
+
+       ),
+
+       TP_fast_assign(
+               __entry->index  = page->index;
+               __entry->ino    = page->mapping->host->i_ino;
+               __entry->dev    = page->mapping->host->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu page_index %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, __entry->index)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_ordered_writepage,
+
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_writeback_writepage,
+
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_journalled_writepage,
+
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_readpage,
+
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_releasepage,
+
+       TP_PROTO(struct page *page),
+
+       TP_ARGS(page)
+);
+
+TRACE_EVENT(ext3_invalidatepage,
+       TP_PROTO(struct page *page, unsigned long offset),
+
+       TP_ARGS(page, offset),
+
+       TP_STRUCT__entry(
+               __field(        pgoff_t, index                  )
+               __field(        unsigned long, offset           )
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+
+       ),
+
+       TP_fast_assign(
+               __entry->index  = page->index;
+               __entry->offset = offset;
+               __entry->ino    = page->mapping->host->i_ino;
+               __entry->dev    = page->mapping->host->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->index, __entry->offset)
+);
+
+TRACE_EVENT(ext3_discard_blocks,
+       TP_PROTO(struct super_block *sb, unsigned long blk,
+                       unsigned long count),
+
+       TP_ARGS(sb, blk, count),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,          dev             )
+               __field(        unsigned long,  blk             )
+               __field(        unsigned long,  count           )
+
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->blk    = blk;
+               __entry->count  = count;
+       ),
+
+       TP_printk("dev %d,%d blk %lu count %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->blk, __entry->count)
+);
+
+TRACE_EVENT(ext3_request_blocks,
+       TP_PROTO(struct inode *inode, unsigned long goal,
+                unsigned long count),
+
+       TP_ARGS(inode, goal, count),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        unsigned long, count            )
+               __field(        unsigned long,  goal            )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->count  = count;
+               __entry->goal   = goal;
+       ),
+
+       TP_printk("dev %d,%d ino %lu count %lu goal %lu ",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->count, __entry->goal)
+);
+
+TRACE_EVENT(ext3_allocate_blocks,
+       TP_PROTO(struct inode *inode, unsigned long goal,
+                unsigned long count, unsigned long block),
+
+       TP_ARGS(inode, goal, count, block),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        unsigned long,  block           )
+               __field(        unsigned long, count            )
+               __field(        unsigned long,  goal            )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->block  = block;
+               __entry->count  = count;
+               __entry->goal   = goal;
+       ),
+
+       TP_printk("dev %d,%d ino %lu count %lu block %lu goal %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                  __entry->count, __entry->block,
+                 __entry->goal)
+);
+
+TRACE_EVENT(ext3_free_blocks,
+       TP_PROTO(struct inode *inode, unsigned long block,
+                unsigned long count),
+
+       TP_ARGS(inode, block, count),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        umode_t, mode                   )
+               __field(        unsigned long,  block           )
+               __field(        unsigned long,  count           )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->ino            = inode->i_ino;
+               __entry->mode           = inode->i_mode;
+               __entry->block          = block;
+               __entry->count          = count;
+       ),
+
+       TP_printk("dev %d,%d ino %lu mode 0%o block %lu count %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->mode, __entry->block, __entry->count)
+);
+
+TRACE_EVENT(ext3_sync_file_enter,
+       TP_PROTO(struct file *file, int datasync),
+
+       TP_ARGS(file, datasync),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        ino_t,  parent                  )
+               __field(        int,    datasync                )
+       ),
+
+       TP_fast_assign(
+               struct dentry *dentry = file->f_path.dentry;
+
+               __entry->dev            = dentry->d_inode->i_sb->s_dev;
+               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->datasync       = datasync;
+               __entry->parent         = dentry->d_parent->d_inode->i_ino;
+       ),
+
+       TP_printk("dev %d,%d ino %lu parent %ld datasync %d ",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long) __entry->parent, __entry->datasync)
+);
+
+TRACE_EVENT(ext3_sync_file_exit,
+       TP_PROTO(struct inode *inode, int ret),
+
+       TP_ARGS(inode, ret),
+
+       TP_STRUCT__entry(
+               __field(        int,    ret                     )
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+       ),
+
+       TP_fast_assign(
+               __entry->ret            = ret;
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->ret)
+);
+
+TRACE_EVENT(ext3_sync_fs,
+       TP_PROTO(struct super_block *sb, int wait),
+
+       TP_ARGS(sb, wait),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        int,    wait                    )
+
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->wait   = wait;
+       ),
+
+       TP_printk("dev %d,%d wait %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->wait)
+);
+
+TRACE_EVENT(ext3_rsv_window_add,
+       TP_PROTO(struct super_block *sb,
+                struct ext3_reserve_window_node *rsv_node),
+
+       TP_ARGS(sb, rsv_node),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  start           )
+               __field(        unsigned long,  end             )
+               __field(        dev_t,  dev                     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->start  = rsv_node->rsv_window._rsv_start;
+               __entry->end    = rsv_node->rsv_window._rsv_end;
+       ),
+
+       TP_printk("dev %d,%d start %lu end %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->start, __entry->end)
+);
+
+TRACE_EVENT(ext3_discard_reservation,
+       TP_PROTO(struct inode *inode,
+                struct ext3_reserve_window_node *rsv_node),
+
+       TP_ARGS(inode, rsv_node),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  start           )
+               __field(        unsigned long,  end             )
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+       ),
+
+       TP_fast_assign(
+               __entry->start  = rsv_node->rsv_window._rsv_start;
+               __entry->end    = rsv_node->rsv_window._rsv_end;
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu start %lu end %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long)__entry->ino, __entry->start,
+                 __entry->end)
+);
+
+TRACE_EVENT(ext3_alloc_new_reservation,
+       TP_PROTO(struct super_block *sb, unsigned long goal),
+
+       TP_ARGS(sb, goal),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        unsigned long,  goal            )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->goal   = goal;
+       ),
+
+       TP_printk("dev %d,%d goal %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->goal)
+);
+
+TRACE_EVENT(ext3_reserved,
+       TP_PROTO(struct super_block *sb, unsigned long block,
+                struct ext3_reserve_window_node *rsv_node),
+
+       TP_ARGS(sb, block, rsv_node),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  block           )
+               __field(        unsigned long,  start           )
+               __field(        unsigned long,  end             )
+               __field(        dev_t,  dev                     )
+       ),
+
+       TP_fast_assign(
+               __entry->block  = block;
+               __entry->start  = rsv_node->rsv_window._rsv_start;
+               __entry->end    = rsv_node->rsv_window._rsv_end;
+               __entry->dev    = sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d block %lu, start %lu end %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->block, __entry->start, __entry->end)
+);
+
+TRACE_EVENT(ext3_forget,
+       TP_PROTO(struct inode *inode, int is_metadata, unsigned long block),
+
+       TP_ARGS(inode, is_metadata, block),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        ino_t,  ino                     )
+               __field(        umode_t, mode                   )
+               __field(        int,    is_metadata             )
+               __field(        unsigned long,  block           )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->ino    = inode->i_ino;
+               __entry->mode   = inode->i_mode;
+               __entry->is_metadata = is_metadata;
+               __entry->block  = block;
+       ),
+
+       TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->mode, __entry->is_metadata, __entry->block)
+);
+
+TRACE_EVENT(ext3_read_block_bitmap,
+       TP_PROTO(struct super_block *sb, unsigned int group),
+
+       TP_ARGS(sb, group),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        __u32,  group                   )
+
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->group  = group;
+       ),
+
+       TP_printk("dev %d,%d group %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->group)
+);
+
+TRACE_EVENT(ext3_direct_IO_enter,
+       TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
+
+       TP_ARGS(inode, offset, len, rw),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+               __field(        loff_t, pos                     )
+               __field(        unsigned long,  len             )
+               __field(        int,    rw                      )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->pos    = offset;
+               __entry->len    = len;
+               __entry->rw     = rw;
+       ),
+
+       TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pos, __entry->len,
+                 __entry->rw)
+);
+
+TRACE_EVENT(ext3_direct_IO_exit,
+       TP_PROTO(struct inode *inode, loff_t offset, unsigned long len,
+                int rw, int ret),
+
+       TP_ARGS(inode, offset, len, rw, ret),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+               __field(        loff_t, pos                     )
+               __field(        unsigned long,  len             )
+               __field(        int,    rw                      )
+               __field(        int,    ret                     )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->pos    = offset;
+               __entry->len    = len;
+               __entry->rw     = rw;
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long) __entry->pos, __entry->len,
+                 __entry->rw, __entry->ret)
+);
+
+TRACE_EVENT(ext3_unlink_enter,
+       TP_PROTO(struct inode *parent, struct dentry *dentry),
+
+       TP_ARGS(parent, dentry),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  parent                  )
+               __field(        ino_t,  ino                     )
+               __field(        loff_t, size                    )
+               __field(        dev_t,  dev                     )
+       ),
+
+       TP_fast_assign(
+               __entry->parent         = parent->i_ino;
+               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->size           = dentry->d_inode->i_size;
+               __entry->dev            = dentry->d_inode->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu size %lld parent %ld",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 (unsigned long long)__entry->size,
+                 (unsigned long) __entry->parent)
+);
+
+TRACE_EVENT(ext3_unlink_exit,
+       TP_PROTO(struct dentry *dentry, int ret),
+
+       TP_ARGS(dentry, ret),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        dev_t,  dev                     )
+               __field(        int,    ret                     )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->dev            = dentry->d_inode->i_sb->s_dev;
+               __entry->ret            = ret;
+       ),
+
+       TP_printk("dev %d,%d ino %lu ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->ret)
+);
+
+DECLARE_EVENT_CLASS(ext3__truncate,
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        blkcnt_t,       blocks          )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->blocks = inode->i_blocks;
+       ),
+
+       TP_printk("dev %d,%d ino %lu blocks %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino, (unsigned long) __entry->blocks)
+);
+
+DEFINE_EVENT(ext3__truncate, ext3_truncate_enter,
+
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode)
+);
+
+DEFINE_EVENT(ext3__truncate, ext3_truncate_exit,
+
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode)
+);
+
+TRACE_EVENT(ext3_get_blocks_enter,
+       TP_PROTO(struct inode *inode, unsigned long lblk,
+                unsigned long len, int create),
+
+       TP_ARGS(inode, lblk, len, create),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        unsigned long,  lblk            )
+               __field(        unsigned long,  len             )
+               __field(        int,            create          )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->lblk   = lblk;
+               __entry->len    = len;
+               __entry->create = create;
+       ),
+
+       TP_printk("dev %d,%d ino %lu lblk %lu len %lu create %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->lblk, __entry->len, __entry->create)
+);
+
+TRACE_EVENT(ext3_get_blocks_exit,
+       TP_PROTO(struct inode *inode, unsigned long lblk,
+                unsigned long pblk, unsigned long len, int ret),
+
+       TP_ARGS(inode, lblk, pblk, len, ret),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino             )
+               __field(        dev_t,          dev             )
+               __field(        unsigned long,  lblk            )
+               __field(        unsigned long,  pblk            )
+               __field(        unsigned long,  len             )
+               __field(        int,            ret             )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->dev    = inode->i_sb->s_dev;
+               __entry->lblk   = lblk;
+               __entry->pblk   = pblk;
+               __entry->len    = len;
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("dev %d,%d ino %lu lblk %lu pblk %lu len %lu ret %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                  __entry->lblk, __entry->pblk,
+                 __entry->len, __entry->ret)
+);
+
+TRACE_EVENT(ext3_load_inode,
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino             )
+               __field(        dev_t,  dev             )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+       ),
+
+       TP_printk("dev %d,%d ino %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino)
+);
+
+#endif /* _TRACE_EXT3_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h

new file mode 100644 (file)

index 0000000..aff64d8
--- /dev/null
+++ b/include/trace/events/jbd.h
@@ -0,0 +1,203 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM jbd
+
+#if !defined(_TRACE_JBD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_JBD_H
+
+#include <linux/jbd.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(jbd_checkpoint,
+
+       TP_PROTO(journal_t *journal, int result),
+
+       TP_ARGS(journal, result),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        int,    result                  )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->result         = result;
+       ),
+
+       TP_printk("dev %d,%d result %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->result)
+);
+
+DECLARE_EVENT_CLASS(jbd_commit,
+
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        char,   sync_commit             )
+               __field(        int,    transaction             )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->sync_commit = commit_transaction->t_synchronous_commit;
+               __entry->transaction    = commit_transaction->t_tid;
+       ),
+
+       TP_printk("dev %d,%d transaction %d sync %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->transaction, __entry->sync_commit)
+);
+
+DEFINE_EVENT(jbd_commit, jbd_start_commit,
+
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction)
+);
+
+DEFINE_EVENT(jbd_commit, jbd_commit_locking,
+
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction)
+);
+
+DEFINE_EVENT(jbd_commit, jbd_commit_flushing,
+
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction)
+);
+
+DEFINE_EVENT(jbd_commit, jbd_commit_logging,
+
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction)
+);
+
+TRACE_EVENT(jbd_drop_transaction,
+
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        char,   sync_commit             )
+               __field(        int,    transaction             )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->sync_commit = commit_transaction->t_synchronous_commit;
+               __entry->transaction    = commit_transaction->t_tid;
+       ),
+
+       TP_printk("dev %d,%d transaction %d sync %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->transaction, __entry->sync_commit)
+);
+
+TRACE_EVENT(jbd_end_commit,
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        char,   sync_commit             )
+               __field(        int,    transaction             )
+               __field(        int,    head                    )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->sync_commit = commit_transaction->t_synchronous_commit;
+               __entry->transaction    = commit_transaction->t_tid;
+               __entry->head           = journal->j_tail_sequence;
+       ),
+
+       TP_printk("dev %d,%d transaction %d sync %d head %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->transaction, __entry->sync_commit, __entry->head)
+);
+
+TRACE_EVENT(jbd_do_submit_data,
+       TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+       TP_ARGS(journal, commit_transaction),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        char,   sync_commit             )
+               __field(        int,    transaction             )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->sync_commit = commit_transaction->t_synchronous_commit;
+               __entry->transaction    = commit_transaction->t_tid;
+       ),
+
+       TP_printk("dev %d,%d transaction %d sync %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->transaction, __entry->sync_commit)
+);
+
+TRACE_EVENT(jbd_cleanup_journal_tail,
+
+       TP_PROTO(journal_t *journal, tid_t first_tid,
+                unsigned long block_nr, unsigned long freed),
+
+       TP_ARGS(journal, first_tid, block_nr, freed),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        tid_t,  tail_sequence           )
+               __field(        tid_t,  first_tid               )
+               __field(unsigned long,  block_nr                )
+               __field(unsigned long,  freed                   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->tail_sequence  = journal->j_tail_sequence;
+               __entry->first_tid      = first_tid;
+               __entry->block_nr       = block_nr;
+               __entry->freed          = freed;
+       ),
+
+       TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->tail_sequence, __entry->first_tid,
+                 __entry->block_nr, __entry->freed)
+);
+
+TRACE_EVENT(jbd_update_superblock_end,
+       TP_PROTO(journal_t *journal, int wait),
+
+       TP_ARGS(journal, wait),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(        int,    wait                    )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = journal->j_fs_dev->bd_dev;
+               __entry->wait           = wait;
+       ),
+
+       TP_printk("dev %d,%d wait %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->wait)
+);
+
+#endif /* _TRACE_JBD_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 26 Jul 2011 18:34:40 +0000 (11:34 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 26 Jul 2011 18:34:40 +0000 (11:34 -0700)
Documentation/filesystems/ext3.txt		patch \| blob \| blame \| history
Documentation/filesystems/ext4.txt		patch \| blob \| blame \| history
fs/ext2/xattr.c		patch \| blob \| blame \| history
fs/ext3/balloc.c		patch \| blob \| blame \| history
fs/ext3/file.c		patch \| blob \| blame \| history
fs/ext3/fsync.c		patch \| blob \| blame \| history
fs/ext3/ialloc.c		patch \| blob \| blame \| history
fs/ext3/inode.c		patch \| blob \| blame \| history
fs/ext3/ioctl.c		patch \| blob \| blame \| history
fs/ext3/namei.c		patch \| blob \| blame \| history
fs/ext3/super.c		patch \| blob \| blame \| history
fs/ext3/xattr.c		patch \| blob \| blame \| history
fs/jbd/checkpoint.c		patch \| blob \| blame \| history
fs/jbd/commit.c		patch \| blob \| blame \| history
fs/jbd/journal.c		patch \| blob \| blame \| history
fs/jbd/transaction.c		patch \| blob \| blame \| history
include/linux/ext2_fs.h		patch \| blob \| blame \| history
include/linux/ext3_fs.h		patch \| blob \| blame \| history
include/linux/jbd.h		patch \| blob \| blame \| history
include/linux/journal-head.h		patch \| blob \| blame \| history
include/linux/quota.h		patch \| blob \| blame \| history
include/trace/events/ext3.h	[new file with mode: 0644]	patch \| blob
include/trace/events/jbd.h	[new file with mode: 0644]	patch \| blob