* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
jbd: change the field "b_cow_tid" of struct journal_head from type unsigned to tid_t
ext3.txt: update the links in the section "useful links" to the latest ones
ext3: Fix data corruption in inodes with journalled data
ext2: check xattr name_len before acquiring xattr_sem in ext2_xattr_get
ext3: Fix compilation with -DDX_DEBUG
quota: Remove unused declaration
jbd: Use WRITE_SYNC in journal checkpoint.
jbd: Fix oops in journal_remove_journal_head()
ext3: Return -EINVAL when start is beyond the end of fs in ext3_trim_fs()
ext3/ioctl.c: silence sparse warnings about different address spaces
ext3/ext4 Documentation: remove bh/nobh since it has been deprecated
ext3: Improve truncate error handling
ext3: use proper little-endian bitops
ext2: include fs.h into ext2_fs.h
ext3: Fix oops in ext3_try_to_allocate_with_rsv()
jbd: fix a bug of leaking jh->b_jcount
jbd: remove dependency on __GFP_NOFAIL
ext3: Convert ext3 to new truncate calling convention
jbd: Add fixed tracepoints
ext3: Add fixed tracepoints
Resolve conflicts in fs/ext3/fsync.c due to fsync locking push-down and
new fixed tracepoints.
package for more details
(http://sourceforge.net/projects/linuxquota).
-bh (*) ext3 associates buffer heads to data pages to
-nobh (a) cache disk block mapping information
- (b) link pages into transaction to provide
- ordering guarantees.
- "bh" option forces use of buffer heads.
- "nobh" option tries to avoid associating buffer
- heads (supported only for "writeback" mode).
-
-
Specification
=============
Ext3 shares all disk implementation with the ext2 filesystem, and adds
programs: http://e2fsprogs.sourceforge.net/
http://ext2resize.sourceforge.net
-useful links: http://www.ibm.com/developerworks/library/l-fs7.html
- http://www.ibm.com/developerworks/library/l-fs8.html
+useful links: http://www.ibm.com/developerworks/library/l-fs7/index.html
+ http://www.ibm.com/developerworks/library/l-fs8/index.html
'-o barriers=[0|1]' mount option for both ext3 and ext4 filesystems
for a fair comparison. When tuning ext3 for best benchmark numbers,
it is often worthwhile to try changing the data journaling mode; '-o
- data=writeback,nobh' can be faster for some workloads. (Note
- however that running mounted with data=writeback can potentially
- leave stale data exposed in recently written files in case of an
- unclean shutdown, which could be a security exposure in some
- situations.) Configuring the filesystem with a large journal can
- also be helpful for metadata-intensive workloads.
+ data=writeback' can be faster for some workloads. (Note however that
+ running mounted with data=writeback can potentially leave stale data
+ exposed in recently written files in case of an unclean shutdown,
+ which could be a security exposure in some situations.) Configuring
+ the filesystem with a large journal can also be helpful for
+ metadata-intensive workloads.
2. Features
===========
package for more details
(http://sourceforge.net/projects/linuxquota).
-bh (*) ext4 associates buffer heads to data pages to
-nobh (a) cache disk block mapping information
- (b) link pages into transaction to provide
- ordering guarantees.
- "bh" option forces use of buffer heads.
- "nobh" option tries to avoid associating buffer
- heads (supported only for "writeback" mode).
-
stripe=n Number of filesystem blocks that mballoc will try
to use for allocation size and alignment. For RAID5/6
systems this should be the number of data
write and convert the extent to initialized after IO
completes. This approach allows ext4 code to avoid
using inode mutex, which improves scalability on high
- speed storages. However this does not work with nobh
- option and the mount will fail. Nor does it work with
+ speed storages. However this does not work with
data journaling and dioread_nolock option will be
ignored with kernel warning. Note that dioread_nolock
code path is only used for extent-based files.
if (name == NULL)
return -EINVAL;
+ name_len = strlen(name);
+ if (name_len > 255)
+ return -ERANGE;
+
down_read(&EXT2_I(inode)->xattr_sem);
error = -ENODATA;
if (!EXT2_I(inode)->i_file_acl)
error = -EIO;
goto cleanup;
}
- /* find named attribute */
- name_len = strlen(name);
- error = -ERANGE;
- if (name_len > 255)
- goto cleanup;
+ /* find named attribute */
entry = FIRST_ENTRY(bh);
while (!IS_LAST_ENTRY(entry)) {
struct ext2_xattr_entry *next =
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
+#include <trace/events/ext3.h>
/*
* balloc.c contains the blocks allocation and deallocation routines
desc = ext3_get_group_desc(sb, block_group, NULL);
if (!desc)
return NULL;
+ trace_ext3_read_block_bitmap(sb, block_group);
bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
struct rb_node * parent = NULL;
struct ext3_reserve_window_node *this;
+ trace_ext3_rsv_window_add(sb, rsv);
while (*p)
{
parent = *p;
rsv = &block_i->rsv_window_node;
if (!rsv_is_empty(&rsv->rsv_window)) {
spin_lock(rsv_lock);
- if (!rsv_is_empty(&rsv->rsv_window))
+ if (!rsv_is_empty(&rsv->rsv_window)) {
+ trace_ext3_discard_reservation(inode, rsv);
rsv_window_remove(inode->i_sb, rsv);
+ }
spin_unlock(rsv_lock);
}
}
void ext3_free_blocks(handle_t *handle, struct inode *inode,
ext3_fsblk_t block, unsigned long count)
{
- struct super_block * sb;
+ struct super_block *sb = inode->i_sb;
unsigned long dquot_freed_blocks;
- sb = inode->i_sb;
- if (!sb) {
- printk ("ext3_free_blocks: nonexistent device");
- return;
- }
+ trace_ext3_free_blocks(inode, block, count);
ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
if (dquot_freed_blocks)
dquot_free_block(inode, dquot_freed_blocks);
else
start_block = grp_goal + group_first_block;
+ trace_ext3_alloc_new_reservation(sb, start_block);
size = my_rsv->rsv_goal_size;
if (!rsv_is_empty(&my_rsv->rsv_window)) {
* check if the first free block is within the
* free space we just reserved
*/
- if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
+ if (start_block >= my_rsv->rsv_start &&
+ start_block <= my_rsv->rsv_end) {
+ trace_ext3_reserved(sb, start_block, my_rsv);
return 0; /* success */
+ }
/*
* if the first free bit we found is out of the reservable space
* continue search for next reservable space,
*errp = -ENOSPC;
sb = inode->i_sb;
- if (!sb) {
- printk("ext3_new_block: nonexistent device");
- return 0;
- }
/*
* Check quota for allocation of this block.
return 0;
}
+ trace_ext3_request_blocks(inode, goal, num);
+
sbi = EXT3_SB(sb);
- es = EXT3_SB(sb)->s_es;
+ es = sbi->s_es;
ext3_debug("goal=%lu.\n", goal);
/*
* Allocate a block from reservation only when
brelse(bitmap_bh);
dquot_free_block(inode, *count-num);
*count = num;
+
+ trace_ext3_allocate_blocks(inode, goal, num,
+ (unsigned long long)ret_block);
+
return ret_block;
io_error:
if ((next - start) < minblocks)
goto free_extent;
+ trace_ext3_discard_blocks(sb, discard_block, next - start);
/* Send the TRIM command down to the device */
err = sb_issue_discard(sb, discard_block, next - start,
GFP_NOFS, 0);
if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
return -EINVAL;
if (start >= max_blks)
- goto out;
+ return -EINVAL;
if (start + len > max_blks)
len = max_blks - start;
if (ret >= 0)
ret = 0;
-
-out:
range->len = trimmed * sb->s_blocksize;
return ret;
};
const struct inode_operations ext3_file_inode_operations = {
- .truncate = ext3_truncate,
.setattr = ext3_setattr,
#ifdef CONFIG_EXT3_FS_XATTR
.setxattr = generic_setxattr,
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
+#include <trace/events/ext3.h>
/*
* akpm: A new design for ext3_sync_file().
int ret, needs_barrier = 0;
tid_t commit_tid;
+ trace_ext3_sync_file_enter(file, datasync);
+
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
- return ret;
+ goto out;
/*
* Taking the mutex here just to keep consistent with how fsync was
*/
if (ext3_should_journal_data(inode)) {
mutex_unlock(&inode->i_mutex);
- return ext3_force_commit(inode->i_sb);
+ ret = ext3_force_commit(inode->i_sb);
+ goto out;
}
if (datasync)
*/
if (needs_barrier)
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+
mutex_unlock(&inode->i_mutex);
+out:
+ trace_ext3_sync_file_exit(inode, ret);
return ret;
}
#include <linux/buffer_head.h>
#include <linux/random.h>
#include <linux/bitops.h>
+#include <trace/events/ext3.h>
#include <asm/byteorder.h>
ino = inode->i_ino;
ext3_debug ("freeing inode %lu\n", ino);
+ trace_ext3_free_inode(inode);
is_directory = S_ISDIR(inode->i_mode);
return ERR_PTR(-EPERM);
sb = dir->i_sb;
+ trace_ext3_request_inode(dir, mode);
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
}
ext3_debug("allocating inode %lu\n", inode->i_ino);
+ trace_ext3_allocate_inode(inode, dir, mode);
goto really_out;
fail:
ext3_std_error(sb, err);
#include <linux/bio.h>
#include <linux/fiemap.h>
#include <linux/namei.h>
+#include <trace/events/ext3.h>
#include "xattr.h"
#include "acl.h"
static int ext3_writepage_trans_blocks(struct inode *inode);
+static int ext3_block_truncate_page(struct inode *inode, loff_t from);
/*
* Test whether an inode is a fast symlink.
might_sleep();
+ trace_ext3_forget(inode, is_metadata, blocknr);
BUFFER_TRACE(bh, "enter");
jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
*/
void ext3_evict_inode (struct inode *inode)
{
+ struct ext3_inode_info *ei = EXT3_I(inode);
struct ext3_block_alloc_info *rsv;
handle_t *handle;
int want_delete = 0;
+ trace_ext3_evict_inode(inode);
if (!inode->i_nlink && !is_bad_inode(inode)) {
dquot_initialize(inode);
want_delete = 1;
}
+ /*
+ * When journalling data dirty buffers are tracked only in the journal.
+ * So although mm thinks everything is clean and ready for reaping the
+ * inode might still have some pages to write in the running
+ * transaction or waiting to be checkpointed. Thus calling
+ * journal_invalidatepage() (via truncate_inode_pages()) to discard
+ * these buffers can cause data loss. Also even if we did not discard
+ * these buffers, we would have no way to find them after the inode
+ * is reaped and thus user could see stale data if he tries to read
+ * them before the transaction is checkpointed. So be careful and
+ * force everything to disk here... We use ei->i_datasync_tid to
+ * store the newest transaction containing inode's data.
+ *
+ * Note that directories do not have this problem because they don't
+ * use page cache.
+ */
+ if (inode->i_nlink && ext3_should_journal_data(inode) &&
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
+ journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
+
+ log_start_commit(journal, commit_tid);
+ log_wait_commit(journal, commit_tid);
+ filemap_write_and_wait(&inode->i_data);
+ }
truncate_inode_pages(&inode->i_data, 0);
ext3_discard_reservation(inode);
- rsv = EXT3_I(inode)->i_block_alloc_info;
- EXT3_I(inode)->i_block_alloc_info = NULL;
+ rsv = ei->i_block_alloc_info;
+ ei->i_block_alloc_info = NULL;
if (unlikely(rsv))
kfree(rsv);
if (inode->i_blocks)
ext3_truncate(inode);
/*
- * Kill off the orphan record which ext3_truncate created.
- * AKPM: I think this can be inside the above `if'.
- * Note that ext3_orphan_del() has to be able to cope with the
- * deletion of a non-existent orphan - this is because we don't
- * know if ext3_truncate() actually created an orphan record.
- * (Well, we could do this if we need to, but heck - it works)
+ * Kill off the orphan record created when the inode lost the last
+ * link. Note that ext3_orphan_del() has to be able to cope with the
+ * deletion of a non-existent orphan - ext3_truncate() could
+ * have removed the record.
*/
ext3_orphan_del(handle, inode);
- EXT3_I(inode)->i_dtime = get_seconds();
+ ei->i_dtime = get_seconds();
/*
* One subtle ordering requirement: if anything has gone wrong
ext3_fsblk_t first_block = 0;
+ trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
J_ASSERT(handle != NULL || create == 0);
depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
if (!create || err == -EIO)
goto cleanup;
+ /*
+ * Block out ext3_truncate while we alter the tree
+ */
mutex_lock(&ei->truncate_mutex);
/*
*/
count = ext3_blks_to_allocate(partial, indirect_blks,
maxblocks, blocks_to_boundary);
- /*
- * Block out ext3_truncate while we alter the tree
- */
err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
offsets + (partial - chain), partial);
}
BUFFER_TRACE(bh_result, "returned");
out:
+ trace_ext3_get_blocks_exit(inode, iblock,
+ depth ? le32_to_cpu(chain[depth-1].key) : 0,
+ count, err);
return err;
}
ext3_truncate(inode);
}
+/*
+ * Truncate blocks that were not used by direct IO write. We have to zero out
+ * the last file block as well because direct IO might have written to it.
+ */
+static void ext3_truncate_failed_direct_write(struct inode *inode)
+{
+ ext3_block_truncate_page(inode, inode->i_size);
+ ext3_truncate(inode);
+}
+
static int ext3_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
* we allocate blocks but write fails for some reason */
int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
+ trace_ext3_write_begin(inode, pos, len, flags);
+
index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
unsigned from, to;
int ret = 0, ret2;
+ trace_ext3_ordered_write_end(inode, pos, len, copied);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
from = pos & (PAGE_CACHE_SIZE - 1);
struct inode *inode = file->f_mapping->host;
int ret;
+ trace_ext3_writeback_write_end(inode, pos, len, copied);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
update_file_sizes(inode, pos, copied);
/*
{
handle_t *handle = ext3_journal_current_handle();
struct inode *inode = mapping->host;
+ struct ext3_inode_info *ei = EXT3_I(inode);
int ret = 0, ret2;
int partial = 0;
unsigned from, to;
+ trace_ext3_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
if (pos + len > inode->i_size && ext3_can_truncate(inode))
ext3_orphan_add(handle, inode);
ext3_set_inode_state(inode, EXT3_STATE_JDATA);
- if (inode->i_size > EXT3_I(inode)->i_disksize) {
- EXT3_I(inode)->i_disksize = inode->i_size;
+ atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
+ if (inode->i_size > ei->i_disksize) {
+ ei->i_disksize = inode->i_size;
ret2 = ext3_mark_inode_dirty(handle, inode);
if (!ret)
ret = ret2;
if (ext3_journal_current_handle())
goto out_fail;
+ trace_ext3_ordered_writepage(page);
if (!page_has_buffers(page)) {
create_empty_buffers(page, inode->i_sb->s_blocksize,
(1 << BH_Dirty)|(1 << BH_Uptodate));
if (ext3_journal_current_handle())
goto out_fail;
+ trace_ext3_writeback_writepage(page);
if (page_has_buffers(page)) {
if (!walk_page_buffers(NULL, page_buffers(page), 0,
PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
if (ext3_journal_current_handle())
goto no_write;
+ trace_ext3_journalled_writepage(page);
handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
if (ret == 0)
ret = err;
ext3_set_inode_state(inode, EXT3_STATE_JDATA);
+ atomic_set(&EXT3_I(inode)->i_datasync_tid,
+ handle->h_transaction->t_tid);
unlock_page(page);
} else {
/*
static int ext3_readpage(struct file *file, struct page *page)
{
+ trace_ext3_readpage(page);
return mpage_readpage(page, ext3_get_block);
}
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+ trace_ext3_invalidatepage(page, offset);
+
/*
* If it's a full truncate we just forget about the pending dirtying
*/
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+ trace_ext3_releasepage(page);
WARN_ON(PageChecked(page));
if (!page_has_buffers(page))
return 0;
size_t count = iov_length(iov, nr_segs);
int retries = 0;
+ trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+
if (rw == WRITE) {
loff_t final_size = offset + count;
loff_t end = offset + iov_length(iov, nr_segs);
if (end > isize)
- vmtruncate(inode, isize);
+ ext3_truncate_failed_direct_write(inode);
}
if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
/* This is really bad luck. We've written the data
* but cannot extend i_size. Truncate allocated blocks
* and pretend the write failed... */
- ext3_truncate(inode);
+ ext3_truncate_failed_direct_write(inode);
ret = PTR_ERR(handle);
goto out;
}
ret = err;
}
out:
+ trace_ext3_direct_IO_exit(inode, offset,
+ iov_length(iov, nr_segs), rw, ret);
return ret;
}
* This required during truncate. We need to physically zero the tail end
* of that block so it doesn't yield old data if the file is later grown.
*/
-static int ext3_block_truncate_page(handle_t *handle, struct page *page,
- struct address_space *mapping, loff_t from)
+static int ext3_block_truncate_page(struct inode *inode, loff_t from)
{
ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ unsigned offset = from & (PAGE_CACHE_SIZE - 1);
unsigned blocksize, iblock, length, pos;
- struct inode *inode = mapping->host;
+ struct page *page;
+ handle_t *handle = NULL;
struct buffer_head *bh;
int err = 0;
+ /* Truncated on block boundary - nothing to do */
blocksize = inode->i_sb->s_blocksize;
+ if ((from & (blocksize - 1)) == 0)
+ return 0;
+
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page)
+ return -ENOMEM;
length = blocksize - (offset & (blocksize - 1));
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
goto unlock;
}
+ /* data=writeback mode doesn't need transaction to zero-out data */
+ if (!ext3_should_writeback_data(inode)) {
+ /* We journal at most one block */
+ handle = ext3_journal_start(inode, 1);
+ if (IS_ERR(handle)) {
+ clear_highpage(page);
+ flush_dcache_page(page);
+ err = PTR_ERR(handle);
+ goto unlock;
+ }
+ }
+
if (ext3_should_journal_data(inode)) {
BUFFER_TRACE(bh, "get write access");
err = ext3_journal_get_write_access(handle, bh);
if (err)
- goto unlock;
+ goto stop;
}
zero_user(page, offset, length);
err = ext3_journal_dirty_data(handle, bh);
mark_buffer_dirty(bh);
}
+stop:
+ if (handle)
+ ext3_journal_stop(handle);
unlock:
unlock_page(page);
int ext3_can_truncate(struct inode *inode)
{
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return 0;
if (S_ISREG(inode->i_mode))
return 1;
if (S_ISDIR(inode->i_mode))
struct ext3_inode_info *ei = EXT3_I(inode);
__le32 *i_data = ei->i_data;
int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
- struct address_space *mapping = inode->i_mapping;
int offsets[4];
Indirect chain[4];
Indirect *partial;
int n;
long last_block;
unsigned blocksize = inode->i_sb->s_blocksize;
- struct page *page;
+
+ trace_ext3_truncate_enter(inode);
if (!ext3_can_truncate(inode))
goto out_notrans;
if (inode->i_size == 0 && ext3_should_writeback_data(inode))
ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
- /*
- * We have to lock the EOF page here, because lock_page() nests
- * outside journal_start().
- */
- if ((inode->i_size & (blocksize - 1)) == 0) {
- /* Block boundary? Nothing to do */
- page = NULL;
- } else {
- page = grab_cache_page(mapping,
- inode->i_size >> PAGE_CACHE_SHIFT);
- if (!page)
- goto out_notrans;
- }
-
handle = start_transaction(inode);
- if (IS_ERR(handle)) {
- if (page) {
- clear_highpage(page);
- flush_dcache_page(page);
- unlock_page(page);
- page_cache_release(page);
- }
+ if (IS_ERR(handle))
goto out_notrans;
- }
last_block = (inode->i_size + blocksize-1)
>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
-
- if (page)
- ext3_block_truncate_page(handle, page, mapping, inode->i_size);
-
n = ext3_block_to_path(inode, last_block, offsets, NULL);
if (n == 0)
goto out_stop; /* error */
ext3_orphan_del(handle, inode);
ext3_journal_stop(handle);
+ trace_ext3_truncate_exit(inode);
return;
out_notrans:
/*
*/
if (inode->i_nlink)
ext3_orphan_del(NULL, inode);
+ trace_ext3_truncate_exit(inode);
}
static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
* has in-inode xattrs, or we don't have this inode in memory.
* Read the block from disk.
*/
+ trace_ext3_load_inode(inode);
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ_META, bh);
}
error = ext3_orphan_add(handle, inode);
+ if (error) {
+ ext3_journal_stop(handle);
+ goto err_out;
+ }
EXT3_I(inode)->i_disksize = attr->ia_size;
- rc = ext3_mark_inode_dirty(handle, inode);
- if (!error)
- error = rc;
+ error = ext3_mark_inode_dirty(handle, inode);
ext3_journal_stop(handle);
+ if (error) {
+ /* Some hard fs error must have happened. Bail out. */
+ ext3_orphan_del(NULL, inode);
+ goto err_out;
+ }
+ rc = ext3_block_truncate_page(inode, attr->ia_size);
+ if (rc) {
+ /* Cleanup orphan list and exit */
+ handle = ext3_journal_start(inode, 3);
+ if (IS_ERR(handle)) {
+ ext3_orphan_del(NULL, inode);
+ goto err_out;
+ }
+ ext3_orphan_del(handle, inode);
+ ext3_journal_stop(handle);
+ goto err_out;
+ }
}
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
- rc = vmtruncate(inode, attr->ia_size);
- if (rc)
- goto err_out;
+ truncate_setsize(inode, attr->ia_size);
+ ext3_truncate(inode);
}
setattr_copy(inode, attr);
int err;
might_sleep();
+ trace_ext3_mark_inode_dirty(inode, _RET_IP_);
err = ext3_reserve_inode_write(handle, inode, &iloc);
if (!err)
err = ext3_mark_iloc_dirty(handle, inode, &iloc);
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (copy_from_user(&range, (struct fstrim_range *)arg,
+ if (copy_from_user(&range, (struct fstrim_range __user *)arg,
sizeof(range)))
return -EFAULT;
if (ret < 0)
return ret;
- if (copy_to_user((struct fstrim_range *)arg, &range,
+ if (copy_to_user((struct fstrim_range __user *)arg, &range,
sizeof(range)))
return -EFAULT;
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
+#include <trace/events/ext3.h>
#include "namei.h"
#include "xattr.h"
while (len--) printk("%c", *name++);
ext3fs_dirhash(de->name, de->name_len, &h);
printk(":%x.%u ", h.hash,
- ((char *) de - base));
+ (unsigned) ((char *) de - base));
}
space += EXT3_DIR_REC_LEN(de->name_len);
names++;
*err = -ENOENT;
errout:
- dxtrace(printk("%s not found\n", name));
+ dxtrace(printk("%s not found\n", entry->name));
dx_release (frames);
return NULL;
}
struct ext3_dir_entry_2 * de;
handle_t *handle;
+ trace_ext3_unlink_enter(dir, dentry);
/* Initialize quotas before so that eventual writes go
* in separate transaction */
dquot_initialize(dir);
end_unlink:
ext3_journal_stop(handle);
brelse (bh);
+ trace_ext3_unlink_exit(dentry, retval);
return retval;
}
#include "acl.h"
#include "namei.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/ext3.h>
+
#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
#define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
#else
return &ei->vfs_inode;
}
+static int ext3_drop_inode(struct inode *inode)
+{
+ int drop = generic_drop_inode(inode);
+
+ trace_ext3_drop_inode(inode, drop);
+ return drop;
+}
+
static void ext3_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
.destroy_inode = ext3_destroy_inode,
.write_inode = ext3_write_inode,
.dirty_inode = ext3_dirty_inode,
+ .drop_inode = ext3_drop_inode,
.evict_inode = ext3_evict_inode,
.put_super = ext3_put_super,
.sync_fs = ext3_sync_fs,
{
tid_t target;
+ trace_ext3_sync_fs(sb, wait);
if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
if (wait)
log_wait_commit(EXT3_SB(sb)->s_journal, target);
/* We need to allocate a new block */
ext3_fsblk_t goal = ext3_group_first_block_no(sb,
EXT3_I(inode)->i_block_group);
- ext3_fsblk_t block = ext3_new_block(handle, inode,
- goal, &error);
+ ext3_fsblk_t block;
+
+ /*
+ * Protect us agaist concurrent allocations to the
+ * same inode from ext3_..._writepage(). Reservation
+ * code does not expect racing allocations.
+ */
+ mutex_lock(&EXT3_I(inode)->truncate_mutex);
+ block = ext3_new_block(handle, inode, goal, &error);
+ mutex_unlock(&EXT3_I(inode)->truncate_mutex);
if (error)
goto cleanup;
ea_idebug(inode, "creating block %d", block);
#include <linux/jbd.h>
#include <linux/errno.h>
#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <trace/events/jbd.h>
/*
* Unlink a buffer from a transaction checkpoint list.
if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
+ /*
+ * Get our reference so that bh cannot be freed before
+ * we unlock it
+ */
+ get_bh(bh);
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __journal_remove_checkpoint(jh) + 1;
jbd_unlock_bh_state(bh);
- journal_remove_journal_head(bh);
BUFFER_TRACE(bh, "release");
__brelse(bh);
} else {
spin_lock(&journal->j_list_lock);
goto restart;
}
+ get_bh(bh);
if (buffer_locked(bh)) {
- get_bh(bh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
*/
released = __journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
- journal_remove_journal_head(bh);
__brelse(bh);
}
__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
{
int i;
+ struct blk_plug plug;
+ blk_start_plug(&plug);
for (i = 0; i < *batch_count; i++)
- write_dirty_buffer(bhs[i], WRITE);
+ write_dirty_buffer(bhs[i], WRITE_SYNC);
+ blk_finish_plug(&plug);
for (i = 0; i < *batch_count; i++) {
struct buffer_head *bh = bhs[i];
ret = 1;
if (unlikely(buffer_write_io_error(bh)))
ret = -EIO;
+ get_bh(bh);
J_ASSERT_JH(jh, !buffer_jbddirty(bh));
BUFFER_TRACE(bh, "remove from checkpoint");
__journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
- journal_remove_journal_head(bh);
__brelse(bh);
} else {
/*
* journal straight away.
*/
result = cleanup_journal_tail(journal);
+ trace_jbd_checkpoint(journal, result);
jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
if (result <= 0)
return result;
if (blocknr < journal->j_tail)
freed = freed + journal->j_last - journal->j_first;
+ trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed);
jbd_debug(1,
"Cleaning journal tail from %d to %d (offset %u), "
"freeing %u\n",
/*
* journal_clean_one_cp_list
*
- * Find all the written-back checkpoint buffers in the given list and release them.
+ * Find all the written-back checkpoint buffers in the given list and release
+ * them.
*
- * Called with the journal locked.
* Called with j_list_lock held.
* Returns number of bufers reaped (for debug)
*/
* checkpoint lists.
*
* The function returns 1 if it frees the transaction, 0 otherwise.
+ * The function can free jh and bh.
*
- * This function is called with the journal locked.
* This function is called with j_list_lock held.
* This function is called with jbd_lock_bh_state(jh2bh(jh))
*/
}
journal = transaction->t_journal;
+ JBUFFER_TRACE(jh, "removing from transaction");
__buffer_unlink(jh);
jh->b_cp_transaction = NULL;
+ journal_put_journal_head(jh);
if (transaction->t_checkpoint_list != NULL ||
transaction->t_checkpoint_io_list != NULL)
goto out;
- JBUFFER_TRACE(jh, "transaction has no more buffers");
/*
* There is one special case to worry about: if we have just pulled the
* The locking here around t_state is a bit sleazy.
* See the comment at the end of journal_commit_transaction().
*/
- if (transaction->t_state != T_FINISHED) {
- JBUFFER_TRACE(jh, "belongs to running/committing transaction");
+ if (transaction->t_state != T_FINISHED)
goto out;
- }
/* OK, that was the last buffer for the transaction: we can now
safely remove this transaction from the log */
wake_up(&journal->j_wait_logspace);
ret = 1;
out:
- JBUFFER_TRACE(jh, "exit");
return ret;
}
J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+ /* Get reference for checkpointing transaction */
+ journal_grab_journal_head(jh2bh(jh));
jh->b_cp_transaction = transaction;
if (!transaction->t_checkpoint_list) {
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
+ trace_jbd_drop_transaction(journal, transaction);
jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
kfree(transaction);
}
#include <linux/pagemap.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <trace/events/jbd.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
if (!trylock_buffer(bh)) {
BUFFER_TRACE(bh, "needs blocking lock");
spin_unlock(&journal->j_list_lock);
+ trace_jbd_do_submit_data(journal,
+ commit_transaction);
/* Write out all data to prevent deadlocks */
journal_do_submit_data(wbuf, bufs, write_op);
bufs = 0;
jbd_unlock_bh_state(bh);
if (bufs == journal->j_wbufsize) {
spin_unlock(&journal->j_list_lock);
+ trace_jbd_do_submit_data(journal,
+ commit_transaction);
journal_do_submit_data(wbuf, bufs, write_op);
bufs = 0;
goto write_out_data;
jbd_unlock_bh_state(bh);
if (locked)
unlock_buffer(bh);
- journal_remove_journal_head(bh);
- /* One for our safety reference, other for
- * journal_remove_journal_head() */
- put_bh(bh);
release_data_buffer(bh);
}
}
}
spin_unlock(&journal->j_list_lock);
+ trace_jbd_do_submit_data(journal, commit_transaction);
journal_do_submit_data(wbuf, bufs, write_op);
return err;
commit_transaction = journal->j_running_transaction;
J_ASSERT(commit_transaction->t_state == T_RUNNING);
+ trace_jbd_start_commit(journal, commit_transaction);
jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid);
spin_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED;
+ trace_jbd_commit_locking(journal, commit_transaction);
spin_lock(&commit_transaction->t_handle_lock);
while (commit_transaction->t_updates) {
DEFINE_WAIT(wait);
*/
journal_switch_revoke_table(journal);
+ trace_jbd_commit_flushing(journal, commit_transaction);
commit_transaction->t_state = T_FLUSH;
journal->j_committing_transaction = commit_transaction;
journal->j_running_transaction = NULL;
}
if (buffer_jbd(bh) && bh2jh(bh) == jh &&
jh->b_transaction == commit_transaction &&
- jh->b_jlist == BJ_Locked) {
+ jh->b_jlist == BJ_Locked)
__journal_unfile_buffer(jh);
- jbd_unlock_bh_state(bh);
- journal_remove_journal_head(bh);
- put_bh(bh);
- } else {
- jbd_unlock_bh_state(bh);
- }
+ jbd_unlock_bh_state(bh);
release_data_buffer(bh);
cond_resched_lock(&journal->j_list_lock);
}
commit_transaction->t_state = T_COMMIT;
spin_unlock(&journal->j_state_lock);
+ trace_jbd_commit_logging(journal, commit_transaction);
J_ASSERT(commit_transaction->t_nr_buffers <=
commit_transaction->t_outstanding_credits);
while (commit_transaction->t_forget) {
transaction_t *cp_transaction;
struct buffer_head *bh;
+ int try_to_free = 0;
jh = commit_transaction->t_forget;
spin_unlock(&journal->j_list_lock);
bh = jh2bh(jh);
+ /*
+ * Get a reference so that bh cannot be freed before we are
+ * done with it.
+ */
+ get_bh(bh);
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
jh->b_transaction == journal->j_running_transaction);
__journal_insert_checkpoint(jh, commit_transaction);
if (is_journal_aborted(journal))
clear_buffer_jbddirty(bh);
- JBUFFER_TRACE(jh, "refile for checkpoint writeback");
- __journal_refile_buffer(jh);
- jbd_unlock_bh_state(bh);
} else {
J_ASSERT_BH(bh, !buffer_dirty(bh));
- /* The buffer on BJ_Forget list and not jbddirty means
+ /*
+ * The buffer on BJ_Forget list and not jbddirty means
* it has been freed by this transaction and hence it
* could not have been reallocated until this
* transaction has committed. *BUT* it could be
* reallocated once we have written all the data to
* disk and before we process the buffer on BJ_Forget
- * list. */
- JBUFFER_TRACE(jh, "refile or unfile freed buffer");
- __journal_refile_buffer(jh);
- if (!jh->b_transaction) {
- jbd_unlock_bh_state(bh);
- /* needs a brelse */
- journal_remove_journal_head(bh);
- release_buffer_page(bh);
- } else
- jbd_unlock_bh_state(bh);
+ * list.
+ */
+ if (!jh->b_next_transaction)
+ try_to_free = 1;
}
+ JBUFFER_TRACE(jh, "refile or unfile freed buffer");
+ __journal_refile_buffer(jh);
+ jbd_unlock_bh_state(bh);
+ if (try_to_free)
+ release_buffer_page(bh);
+ else
+ __brelse(bh);
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
+ trace_jbd_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
#include <linux/debugfs.h>
#include <linux/ratelimit.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/jbd.h>
+
#include <asm/uaccess.h>
#include <asm/page.h>
} else
write_dirty_buffer(bh, WRITE);
+ trace_jbd_update_superblock_end(journal, wait);
out:
/* If we have just flushed the log (by marking s_start==0), then
* any future commit will have to be careful to update the
* When a buffer has its BH_JBD bit set it is immune from being released by
* core kernel code, mainly via ->b_count.
*
- * A journal_head may be detached from its buffer_head when the journal_head's
- * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
- * Various places in JBD call journal_remove_journal_head() to indicate that the
- * journal_head can be dropped if needed.
+ * A journal_head is detached from its buffer_head when the journal_head's
+ * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
+ * transaction (b_cp_transaction) hold their references to b_jcount.
*
* Various places in the kernel want to attach a journal_head to a buffer_head
* _before_ attaching the journal_head to a transaction. To protect the
* (Attach a journal_head if needed. Increments b_jcount)
* struct journal_head *jh = journal_add_journal_head(bh);
* ...
- * jh->b_transaction = xxx;
- * journal_put_journal_head(jh);
- *
- * Now, the journal_head's b_jcount is zero, but it is safe from being released
- * because it has a non-zero b_transaction.
+ * (Get another reference for transaction)
+ * journal_grab_journal_head(bh);
+ * jh->b_transaction = xxx;
+ * (Put original reference)
+ * journal_put_journal_head(jh);
*/
/*
* Give a buffer_head a journal_head.
*
- * Doesn't need the journal lock.
* May sleep.
*/
struct journal_head *journal_add_journal_head(struct buffer_head *bh)
struct journal_head *jh = bh2jh(bh);
J_ASSERT_JH(jh, jh->b_jcount >= 0);
-
- get_bh(bh);
- if (jh->b_jcount == 0) {
- if (jh->b_transaction == NULL &&
- jh->b_next_transaction == NULL &&
- jh->b_cp_transaction == NULL) {
- J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
- J_ASSERT_BH(bh, buffer_jbd(bh));
- J_ASSERT_BH(bh, jh2bh(jh) == bh);
- BUFFER_TRACE(bh, "remove journal_head");
- if (jh->b_frozen_data) {
- printk(KERN_WARNING "%s: freeing "
- "b_frozen_data\n",
- __func__);
- jbd_free(jh->b_frozen_data, bh->b_size);
- }
- if (jh->b_committed_data) {
- printk(KERN_WARNING "%s: freeing "
- "b_committed_data\n",
- __func__);
- jbd_free(jh->b_committed_data, bh->b_size);
- }
- bh->b_private = NULL;
- jh->b_bh = NULL; /* debug, really */
- clear_buffer_jbd(bh);
- __brelse(bh);
- journal_free_journal_head(jh);
- } else {
- BUFFER_TRACE(bh, "journal_head was locked");
- }
+ J_ASSERT_JH(jh, jh->b_transaction == NULL);
+ J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+ J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+ J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
+ J_ASSERT_BH(bh, buffer_jbd(bh));
+ J_ASSERT_BH(bh, jh2bh(jh) == bh);
+ BUFFER_TRACE(bh, "remove journal_head");
+ if (jh->b_frozen_data) {
+ printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
+ jbd_free(jh->b_frozen_data, bh->b_size);
}
+ if (jh->b_committed_data) {
+ printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
+ jbd_free(jh->b_committed_data, bh->b_size);
+ }
+ bh->b_private = NULL;
+ jh->b_bh = NULL; /* debug, really */
+ clear_buffer_jbd(bh);
+ journal_free_journal_head(jh);
}
/*
- * journal_remove_journal_head(): if the buffer isn't attached to a transaction
- * and has a zero b_jcount then remove and release its journal_head. If we did
- * see that the buffer is not used by any transaction we also "logically"
- * decrement ->b_count.
- *
- * We in fact take an additional increment on ->b_count as a convenience,
- * because the caller usually wants to do additional things with the bh
- * after calling here.
- * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
- * time. Once the caller has run __brelse(), the buffer is eligible for
- * reaping by try_to_free_buffers().
- */
-void journal_remove_journal_head(struct buffer_head *bh)
-{
- jbd_lock_bh_journal_head(bh);
- __journal_remove_journal_head(bh);
- jbd_unlock_bh_journal_head(bh);
-}
-
-/*
- * Drop a reference on the passed journal_head. If it fell to zero then try to
+ * Drop a reference on the passed journal_head. If it fell to zero then
* release the journal_head from the buffer_head.
*/
void journal_put_journal_head(struct journal_head *jh)
jbd_lock_bh_journal_head(bh);
J_ASSERT_JH(jh, jh->b_jcount > 0);
--jh->b_jcount;
- if (!jh->b_jcount && !jh->b_transaction) {
+ if (!jh->b_jcount) {
__journal_remove_journal_head(bh);
+ jbd_unlock_bh_journal_head(bh);
__brelse(bh);
- }
- jbd_unlock_bh_journal_head(bh);
+ } else
+ jbd_unlock_bh_journal_head(bh);
}
/*
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/hrtimer.h>
+#include <linux/backing-dev.h>
static void __journal_temp_unlink_buffer(struct journal_head *jh);
alloc_transaction:
if (!journal->j_running_transaction) {
- new_transaction = kzalloc(sizeof(*new_transaction),
- GFP_NOFS|__GFP_NOFAIL);
+ new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS);
if (!new_transaction) {
- ret = -ENOMEM;
- goto out;
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto alloc_transaction;
}
}
if (!jh->b_transaction) {
JBUFFER_TRACE(jh, "no transaction");
J_ASSERT_JH(jh, !jh->b_next_transaction);
- jh->b_transaction = transaction;
JBUFFER_TRACE(jh, "file as BJ_Reserved");
spin_lock(&journal->j_list_lock);
__journal_file_buffer(jh, transaction, BJ_Reserved);
* committed and so it's safe to clear the dirty bit.
*/
clear_buffer_dirty(jh2bh(jh));
- jh->b_transaction = transaction;
/* first access by this transaction */
jh->b_modified = 0;
*/
JBUFFER_TRACE(jh, "cancelling revoke");
journal_cancel_revoke(handle, jh);
- journal_put_journal_head(jh);
out:
+ journal_put_journal_head(jh);
return err;
}
ret = -EIO;
goto no_journal;
}
-
- if (jh->b_transaction != NULL) {
+ /* We might have slept so buffer could be refiled now */
+ if (jh->b_transaction != NULL &&
+ jh->b_transaction != handle->h_transaction) {
JBUFFER_TRACE(jh, "unfile from commit");
__journal_temp_unlink_buffer(jh);
/* It still points to the committing
if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
JBUFFER_TRACE(jh, "not on correct data list: unfile");
J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
- __journal_temp_unlink_buffer(jh);
- jh->b_transaction = handle->h_transaction;
JBUFFER_TRACE(jh, "file as data");
__journal_file_buffer(jh, handle->h_transaction,
BJ_SyncData);
__journal_file_buffer(jh, transaction, BJ_Forget);
} else {
__journal_unfile_buffer(jh);
- journal_remove_journal_head(bh);
- __brelse(bh);
if (!buffer_jbd(bh)) {
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
mark_buffer_dirty(bh); /* Expose it to the VM */
}
+/*
+ * Remove buffer from all transactions.
+ *
+ * Called with bh_state lock and j_list_lock
+ *
+ * jh and bh may be already freed when this function returns.
+ */
void __journal_unfile_buffer(struct journal_head *jh)
{
__journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
+ journal_put_journal_head(jh);
}
void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
{
- jbd_lock_bh_state(jh2bh(jh));
+ struct buffer_head *bh = jh2bh(jh);
+
+ /* Get reference so that buffer cannot be freed before we unlock it */
+ get_bh(bh);
+ jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
__journal_unfile_buffer(jh);
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(jh2bh(jh));
+ jbd_unlock_bh_state(bh);
+ __brelse(bh);
}
/*
/* A written-back ordered data buffer */
JBUFFER_TRACE(jh, "release data");
__journal_unfile_buffer(jh);
- journal_remove_journal_head(bh);
- __brelse(bh);
}
} else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
/* written-back checkpointed metadata buffer */
if (jh->b_jlist == BJ_None) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
__journal_remove_checkpoint(jh);
- journal_remove_journal_head(bh);
- __brelse(bh);
}
}
spin_unlock(&journal->j_list_lock);
/*
* We take our own ref against the journal_head here to avoid
* having to add tons of locking around each instance of
- * journal_remove_journal_head() and journal_put_journal_head().
+ * journal_put_journal_head().
*/
jh = journal_grab_journal_head(bh);
if (!jh)
int may_free = 1;
struct buffer_head *bh = jh2bh(jh);
- __journal_unfile_buffer(jh);
-
if (jh->b_cp_transaction) {
JBUFFER_TRACE(jh, "on running+cp transaction");
+ __journal_temp_unlink_buffer(jh);
/*
* We don't want to write the buffer anymore, clear the
* bit so that we don't confuse checks in
may_free = 0;
} else {
JBUFFER_TRACE(jh, "on running transaction");
- journal_remove_journal_head(bh);
- __brelse(bh);
+ __journal_unfile_buffer(jh);
}
return may_free;
}
if (jh->b_transaction)
__journal_temp_unlink_buffer(jh);
+ else
+ journal_grab_journal_head(bh);
jh->b_transaction = transaction;
switch (jlist) {
* already started to be used by a subsequent transaction, refile the
* buffer on that transaction's metadata list.
*
- * Called under journal->j_list_lock
- *
+ * Called under j_list_lock
* Called under jbd_lock_bh_state(jh2bh(jh))
+ *
+ * jh and bh may be already free when this function returns
*/
void __journal_refile_buffer(struct journal_head *jh)
{
was_dirty = test_clear_buffer_jbddirty(bh);
__journal_temp_unlink_buffer(jh);
+ /*
+ * We set b_transaction here because b_next_transaction will inherit
+ * our jh reference and thus __journal_file_buffer() must not take a
+ * new one.
+ */
jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL;
if (buffer_freed(bh))
}
/*
- * For the unlocked version of this call, also make sure that any
- * hanging journal_head is cleaned up if necessary.
- *
- * __journal_refile_buffer is usually called as part of a single locked
- * operation on a buffer_head, in which the caller is probably going to
- * be hooking the journal_head onto other lists. In that case it is up
- * to the caller to remove the journal_head if necessary. For the
- * unlocked journal_refile_buffer call, the caller isn't going to be
- * doing anything else to the buffer so we need to do the cleanup
- * ourselves to avoid a jh leak.
- *
- * *** The journal_head may be freed by this call! ***
+ * __journal_refile_buffer() with necessary locking added. We take our bh
+ * reference so that we can safely unlock bh.
+ *
+ * The jh and bh may be freed by this call.
*/
void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
{
struct buffer_head *bh = jh2bh(jh);
+ /* Get reference so that buffer cannot be freed before we unlock it */
+ get_bh(bh);
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
-
__journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
- journal_remove_journal_head(bh);
-
spin_unlock(&journal->j_list_lock);
__brelse(bh);
}
#include <linux/types.h>
#include <linux/magic.h>
+#include <linux/fs.h>
/*
* The second extended filesystem constants/structures
#define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS
#endif
-#define ext3_set_bit __test_and_set_bit_le
+#define ext3_set_bit __set_bit_le
#define ext3_set_bit_atomic ext2_set_bit_atomic
-#define ext3_clear_bit __test_and_clear_bit_le
+#define ext3_clear_bit __clear_bit_le
#define ext3_clear_bit_atomic ext2_clear_bit_atomic
#define ext3_test_bit test_bit_le
-#define ext3_find_first_zero_bit find_first_zero_bit_le
#define ext3_find_next_zero_bit find_next_zero_bit_le
/*
extern int ext3_change_inode_journal_flag(struct inode *, int);
extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
extern int ext3_can_truncate(struct inode *inode);
-extern void ext3_truncate (struct inode *);
+extern void ext3_truncate(struct inode *inode);
extern void ext3_set_inode_flags(struct inode *);
extern void ext3_get_inode_flags(struct ext3_inode_info *);
extern void ext3_set_aops(struct inode *inode);
*/
struct journal_head *journal_add_journal_head(struct buffer_head *bh);
struct journal_head *journal_grab_journal_head(struct buffer_head *bh);
-void journal_remove_journal_head(struct buffer_head *bh);
void journal_put_journal_head(struct journal_head *jh);
/*
* has been cowed
* [jbd_lock_bh_state()]
*/
- unsigned b_cow_tid;
+ tid_t b_cow_tid;
/*
* Copy of the buffer data frozen for writing to the log.
{QFMT_VFS_V0, "quota_v2"},\
{0, NULL}}
-#else
-
-# /* nodep */ include <sys/cdefs.h>
-
-__BEGIN_DECLS
-long quotactl __P ((unsigned int, const char *, int, caddr_t));
-__END_DECLS
-
#endif /* __KERNEL__ */
#endif /* _QUOTA_ */
--- /dev/null
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ext3
+
+#if !defined(_TRACE_EXT3_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EXT3_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(ext3_free_inode,
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( umode_t, mode )
+ __field( uid_t, uid )
+ __field( gid_t, gid )
+ __field( blkcnt_t, blocks )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->mode = inode->i_mode;
+ __entry->uid = inode->i_uid;
+ __entry->gid = inode->i_gid;
+ __entry->blocks = inode->i_blocks;
+ ),
+
+ TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->mode, __entry->uid, __entry->gid,
+ (unsigned long) __entry->blocks)
+);
+
+TRACE_EVENT(ext3_request_inode,
+ TP_PROTO(struct inode *dir, int mode),
+
+ TP_ARGS(dir, mode),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, dir )
+ __field( umode_t, mode )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = dir->i_ino;
+ __entry->mode = mode;
+ ),
+
+ TP_printk("dev %d,%d dir %lu mode 0%o",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->dir, __entry->mode)
+);
+
+TRACE_EVENT(ext3_allocate_inode,
+ TP_PROTO(struct inode *inode, struct inode *dir, int mode),
+
+ TP_ARGS(inode, dir, mode),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( ino_t, dir )
+ __field( umode_t, mode )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->dir = dir->i_ino;
+ __entry->mode = mode;
+ ),
+
+ TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned long) __entry->dir, __entry->mode)
+);
+
+TRACE_EVENT(ext3_evict_inode,
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( int, nlink )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->nlink = inode->i_nlink;
+ ),
+
+ TP_printk("dev %d,%d ino %lu nlink %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino, __entry->nlink)
+);
+
+TRACE_EVENT(ext3_drop_inode,
+ TP_PROTO(struct inode *inode, int drop),
+
+ TP_ARGS(inode, drop),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( int, drop )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->drop = drop;
+ ),
+
+ TP_printk("dev %d,%d ino %lu drop %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino, __entry->drop)
+);
+
+TRACE_EVENT(ext3_mark_inode_dirty,
+ TP_PROTO(struct inode *inode, unsigned long IP),
+
+ TP_ARGS(inode, IP),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field(unsigned long, ip )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->ip = IP;
+ ),
+
+ TP_printk("dev %d,%d ino %lu caller %pF",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino, (void *)__entry->ip)
+);
+
+TRACE_EVENT(ext3_write_begin,
+ TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+ unsigned int flags),
+
+ TP_ARGS(inode, pos, len, flags),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( loff_t, pos )
+ __field( unsigned int, len )
+ __field( unsigned int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pos = pos;
+ __entry->len = len;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned long long) __entry->pos, __entry->len,
+ __entry->flags)
+);
+
+DECLARE_EVENT_CLASS(ext3__write_end,
+ TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+ unsigned int copied),
+
+ TP_ARGS(inode, pos, len, copied),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( loff_t, pos )
+ __field( unsigned int, len )
+ __field( unsigned int, copied )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pos = pos;
+ __entry->len = len;
+ __entry->copied = copied;
+ ),
+
+ TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned long long) __entry->pos, __entry->len,
+ __entry->copied)
+);
+
+DEFINE_EVENT(ext3__write_end, ext3_ordered_write_end,
+
+ TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+ unsigned int copied),
+
+ TP_ARGS(inode, pos, len, copied)
+);
+
+DEFINE_EVENT(ext3__write_end, ext3_writeback_write_end,
+
+ TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+ unsigned int copied),
+
+ TP_ARGS(inode, pos, len, copied)
+);
+
+DEFINE_EVENT(ext3__write_end, ext3_journalled_write_end,
+
+ TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+ unsigned int copied),
+
+ TP_ARGS(inode, pos, len, copied)
+);
+
+DECLARE_EVENT_CLASS(ext3__page_op,
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( pgoff_t, index )
+
+ ),
+
+ TP_fast_assign(
+ __entry->index = page->index;
+ __entry->ino = page->mapping->host->i_ino;
+ __entry->dev = page->mapping->host->i_sb->s_dev;
+ ),
+
+ TP_printk("dev %d,%d ino %lu page_index %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino, __entry->index)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_ordered_writepage,
+
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_writeback_writepage,
+
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_journalled_writepage,
+
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_readpage,
+
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext3__page_op, ext3_releasepage,
+
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page)
+);
+
+TRACE_EVENT(ext3_invalidatepage,
+ TP_PROTO(struct page *page, unsigned long offset),
+
+ TP_ARGS(page, offset),
+
+ TP_STRUCT__entry(
+ __field( pgoff_t, index )
+ __field( unsigned long, offset )
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+
+ ),
+
+ TP_fast_assign(
+ __entry->index = page->index;
+ __entry->offset = offset;
+ __entry->ino = page->mapping->host->i_ino;
+ __entry->dev = page->mapping->host->i_sb->s_dev;
+ ),
+
+ TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->index, __entry->offset)
+);
+
+TRACE_EVENT(ext3_discard_blocks,
+ TP_PROTO(struct super_block *sb, unsigned long blk,
+ unsigned long count),
+
+ TP_ARGS(sb, blk, count),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( unsigned long, blk )
+ __field( unsigned long, count )
+
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->blk = blk;
+ __entry->count = count;
+ ),
+
+ TP_printk("dev %d,%d blk %lu count %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->blk, __entry->count)
+);
+
+TRACE_EVENT(ext3_request_blocks,
+ TP_PROTO(struct inode *inode, unsigned long goal,
+ unsigned long count),
+
+ TP_ARGS(inode, goal, count),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( unsigned long, count )
+ __field( unsigned long, goal )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->count = count;
+ __entry->goal = goal;
+ ),
+
+ TP_printk("dev %d,%d ino %lu count %lu goal %lu ",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->count, __entry->goal)
+);
+
+TRACE_EVENT(ext3_allocate_blocks,
+ TP_PROTO(struct inode *inode, unsigned long goal,
+ unsigned long count, unsigned long block),
+
+ TP_ARGS(inode, goal, count, block),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( unsigned long, block )
+ __field( unsigned long, count )
+ __field( unsigned long, goal )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->block = block;
+ __entry->count = count;
+ __entry->goal = goal;
+ ),
+
+ TP_printk("dev %d,%d ino %lu count %lu block %lu goal %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->count, __entry->block,
+ __entry->goal)
+);
+
+TRACE_EVENT(ext3_free_blocks,
+ TP_PROTO(struct inode *inode, unsigned long block,
+ unsigned long count),
+
+ TP_ARGS(inode, block, count),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( umode_t, mode )
+ __field( unsigned long, block )
+ __field( unsigned long, count )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->mode = inode->i_mode;
+ __entry->block = block;
+ __entry->count = count;
+ ),
+
+ TP_printk("dev %d,%d ino %lu mode 0%o block %lu count %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->mode, __entry->block, __entry->count)
+);
+
+TRACE_EVENT(ext3_sync_file_enter,
+ TP_PROTO(struct file *file, int datasync),
+
+ TP_ARGS(file, datasync),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( ino_t, parent )
+ __field( int, datasync )
+ ),
+
+ TP_fast_assign(
+ struct dentry *dentry = file->f_path.dentry;
+
+ __entry->dev = dentry->d_inode->i_sb->s_dev;
+ __entry->ino = dentry->d_inode->i_ino;
+ __entry->datasync = datasync;
+ __entry->parent = dentry->d_parent->d_inode->i_ino;
+ ),
+
+ TP_printk("dev %d,%d ino %lu parent %ld datasync %d ",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned long) __entry->parent, __entry->datasync)
+);
+
+TRACE_EVENT(ext3_sync_file_exit,
+ TP_PROTO(struct inode *inode, int ret),
+
+ TP_ARGS(inode, ret),
+
+ TP_STRUCT__entry(
+ __field( int, ret )
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ ),
+
+ TP_fast_assign(
+ __entry->ret = ret;
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ ),
+
+ TP_printk("dev %d,%d ino %lu ret %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->ret)
+);
+
+TRACE_EVENT(ext3_sync_fs,
+ TP_PROTO(struct super_block *sb, int wait),
+
+ TP_ARGS(sb, wait),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, wait )
+
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->wait = wait;
+ ),
+
+ TP_printk("dev %d,%d wait %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->wait)
+);
+
+TRACE_EVENT(ext3_rsv_window_add,
+ TP_PROTO(struct super_block *sb,
+ struct ext3_reserve_window_node *rsv_node),
+
+ TP_ARGS(sb, rsv_node),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, start )
+ __field( unsigned long, end )
+ __field( dev_t, dev )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->start = rsv_node->rsv_window._rsv_start;
+ __entry->end = rsv_node->rsv_window._rsv_end;
+ ),
+
+ TP_printk("dev %d,%d start %lu end %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->start, __entry->end)
+);
+
+TRACE_EVENT(ext3_discard_reservation,
+ TP_PROTO(struct inode *inode,
+ struct ext3_reserve_window_node *rsv_node),
+
+ TP_ARGS(inode, rsv_node),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, start )
+ __field( unsigned long, end )
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ ),
+
+ TP_fast_assign(
+ __entry->start = rsv_node->rsv_window._rsv_start;
+ __entry->end = rsv_node->rsv_window._rsv_end;
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ ),
+
+ TP_printk("dev %d,%d ino %lu start %lu end %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long)__entry->ino, __entry->start,
+ __entry->end)
+);
+
+TRACE_EVENT(ext3_alloc_new_reservation,
+ TP_PROTO(struct super_block *sb, unsigned long goal),
+
+ TP_ARGS(sb, goal),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( unsigned long, goal )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->goal = goal;
+ ),
+
+ TP_printk("dev %d,%d goal %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->goal)
+);
+
+TRACE_EVENT(ext3_reserved,
+ TP_PROTO(struct super_block *sb, unsigned long block,
+ struct ext3_reserve_window_node *rsv_node),
+
+ TP_ARGS(sb, block, rsv_node),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, block )
+ __field( unsigned long, start )
+ __field( unsigned long, end )
+ __field( dev_t, dev )
+ ),
+
+ TP_fast_assign(
+ __entry->block = block;
+ __entry->start = rsv_node->rsv_window._rsv_start;
+ __entry->end = rsv_node->rsv_window._rsv_end;
+ __entry->dev = sb->s_dev;
+ ),
+
+ TP_printk("dev %d,%d block %lu, start %lu end %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->block, __entry->start, __entry->end)
+);
+
+TRACE_EVENT(ext3_forget,
+ TP_PROTO(struct inode *inode, int is_metadata, unsigned long block),
+
+ TP_ARGS(inode, is_metadata, block),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( ino_t, ino )
+ __field( umode_t, mode )
+ __field( int, is_metadata )
+ __field( unsigned long, block )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->mode = inode->i_mode;
+ __entry->is_metadata = is_metadata;
+ __entry->block = block;
+ ),
+
+ TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->mode, __entry->is_metadata, __entry->block)
+);
+
+TRACE_EVENT(ext3_read_block_bitmap,
+ TP_PROTO(struct super_block *sb, unsigned int group),
+
+ TP_ARGS(sb, group),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( __u32, group )
+
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->group = group;
+ ),
+
+ TP_printk("dev %d,%d group %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->group)
+);
+
+TRACE_EVENT(ext3_direct_IO_enter,
+ TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
+
+ TP_ARGS(inode, offset, len, rw),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( loff_t, pos )
+ __field( unsigned long, len )
+ __field( int, rw )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->pos = offset;
+ __entry->len = len;
+ __entry->rw = rw;
+ ),
+
+ TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned long long) __entry->pos, __entry->len,
+ __entry->rw)
+);
+
+TRACE_EVENT(ext3_direct_IO_exit,
+ TP_PROTO(struct inode *inode, loff_t offset, unsigned long len,
+ int rw, int ret),
+
+ TP_ARGS(inode, offset, len, rw, ret),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( loff_t, pos )
+ __field( unsigned long, len )
+ __field( int, rw )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->pos = offset;
+ __entry->len = len;
+ __entry->rw = rw;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned long long) __entry->pos, __entry->len,
+ __entry->rw, __entry->ret)
+);
+
+TRACE_EVENT(ext3_unlink_enter,
+ TP_PROTO(struct inode *parent, struct dentry *dentry),
+
+ TP_ARGS(parent, dentry),
+
+ TP_STRUCT__entry(
+ __field( ino_t, parent )
+ __field( ino_t, ino )
+ __field( loff_t, size )
+ __field( dev_t, dev )
+ ),
+
+ TP_fast_assign(
+ __entry->parent = parent->i_ino;
+ __entry->ino = dentry->d_inode->i_ino;
+ __entry->size = dentry->d_inode->i_size;
+ __entry->dev = dentry->d_inode->i_sb->s_dev;
+ ),
+
+ TP_printk("dev %d,%d ino %lu size %lld parent %ld",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned long long)__entry->size,
+ (unsigned long) __entry->parent)
+);
+
+TRACE_EVENT(ext3_unlink_exit,
+ TP_PROTO(struct dentry *dentry, int ret),
+
+ TP_ARGS(dentry, ret),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = dentry->d_inode->i_ino;
+ __entry->dev = dentry->d_inode->i_sb->s_dev;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev %d,%d ino %lu ret %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->ret)
+);
+
+DECLARE_EVENT_CLASS(ext3__truncate,
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( blkcnt_t, blocks )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->blocks = inode->i_blocks;
+ ),
+
+ TP_printk("dev %d,%d ino %lu blocks %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino, (unsigned long) __entry->blocks)
+);
+
+DEFINE_EVENT(ext3__truncate, ext3_truncate_enter,
+
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode)
+);
+
+DEFINE_EVENT(ext3__truncate, ext3_truncate_exit,
+
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode)
+);
+
+TRACE_EVENT(ext3_get_blocks_enter,
+ TP_PROTO(struct inode *inode, unsigned long lblk,
+ unsigned long len, int create),
+
+ TP_ARGS(inode, lblk, len, create),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( unsigned long, lblk )
+ __field( unsigned long, len )
+ __field( int, create )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->lblk = lblk;
+ __entry->len = len;
+ __entry->create = create;
+ ),
+
+ TP_printk("dev %d,%d ino %lu lblk %lu len %lu create %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->lblk, __entry->len, __entry->create)
+);
+
+TRACE_EVENT(ext3_get_blocks_exit,
+ TP_PROTO(struct inode *inode, unsigned long lblk,
+ unsigned long pblk, unsigned long len, int ret),
+
+ TP_ARGS(inode, lblk, pblk, len, ret),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( unsigned long, lblk )
+ __field( unsigned long, pblk )
+ __field( unsigned long, len )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->lblk = lblk;
+ __entry->pblk = pblk;
+ __entry->len = len;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev %d,%d ino %lu lblk %lu pblk %lu len %lu ret %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->lblk, __entry->pblk,
+ __entry->len, __entry->ret)
+);
+
+TRACE_EVENT(ext3_load_inode,
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ ),
+
+ TP_printk("dev %d,%d ino %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino)
+);
+
+#endif /* _TRACE_EXT3_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--- /dev/null
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM jbd
+
+#if !defined(_TRACE_JBD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_JBD_H
+
+#include <linux/jbd.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(jbd_checkpoint,
+
+ TP_PROTO(journal_t *journal, int result),
+
+ TP_ARGS(journal, result),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, result )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = journal->j_fs_dev->bd_dev;
+ __entry->result = result;
+ ),
+
+ TP_printk("dev %d,%d result %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->result)
+);
+
+DECLARE_EVENT_CLASS(jbd_commit,
+
+ TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+ TP_ARGS(journal, commit_transaction),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( char, sync_commit )
+ __field( int, transaction )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = journal->j_fs_dev->bd_dev;
+ __entry->sync_commit = commit_transaction->t_synchronous_commit;
+ __entry->transaction = commit_transaction->t_tid;
+ ),
+
+ TP_printk("dev %d,%d transaction %d sync %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->transaction, __entry->sync_commit)
+);
+
+DEFINE_EVENT(jbd_commit, jbd_start_commit,
+
+ TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+ TP_ARGS(journal, commit_transaction)
+);
+
+DEFINE_EVENT(jbd_commit, jbd_commit_locking,
+
+ TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+ TP_ARGS(journal, commit_transaction)
+);
+
+DEFINE_EVENT(jbd_commit, jbd_commit_flushing,
+
+ TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+ TP_ARGS(journal, commit_transaction)
+);
+
+DEFINE_EVENT(jbd_commit, jbd_commit_logging,
+
+ TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+ TP_ARGS(journal, commit_transaction)
+);
+
+TRACE_EVENT(jbd_drop_transaction,
+
+ TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+ TP_ARGS(journal, commit_transaction),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( char, sync_commit )
+ __field( int, transaction )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = journal->j_fs_dev->bd_dev;
+ __entry->sync_commit = commit_transaction->t_synchronous_commit;
+ __entry->transaction = commit_transaction->t_tid;
+ ),
+
+ TP_printk("dev %d,%d transaction %d sync %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->transaction, __entry->sync_commit)
+);
+
+TRACE_EVENT(jbd_end_commit,
+ TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+ TP_ARGS(journal, commit_transaction),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( char, sync_commit )
+ __field( int, transaction )
+ __field( int, head )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = journal->j_fs_dev->bd_dev;
+ __entry->sync_commit = commit_transaction->t_synchronous_commit;
+ __entry->transaction = commit_transaction->t_tid;
+ __entry->head = journal->j_tail_sequence;
+ ),
+
+ TP_printk("dev %d,%d transaction %d sync %d head %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->transaction, __entry->sync_commit, __entry->head)
+);
+
+TRACE_EVENT(jbd_do_submit_data,
+ TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
+
+ TP_ARGS(journal, commit_transaction),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( char, sync_commit )
+ __field( int, transaction )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = journal->j_fs_dev->bd_dev;
+ __entry->sync_commit = commit_transaction->t_synchronous_commit;
+ __entry->transaction = commit_transaction->t_tid;
+ ),
+
+ TP_printk("dev %d,%d transaction %d sync %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->transaction, __entry->sync_commit)
+);
+
+TRACE_EVENT(jbd_cleanup_journal_tail,
+
+ TP_PROTO(journal_t *journal, tid_t first_tid,
+ unsigned long block_nr, unsigned long freed),
+
+ TP_ARGS(journal, first_tid, block_nr, freed),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( tid_t, tail_sequence )
+ __field( tid_t, first_tid )
+ __field(unsigned long, block_nr )
+ __field(unsigned long, freed )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = journal->j_fs_dev->bd_dev;
+ __entry->tail_sequence = journal->j_tail_sequence;
+ __entry->first_tid = first_tid;
+ __entry->block_nr = block_nr;
+ __entry->freed = freed;
+ ),
+
+ TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->tail_sequence, __entry->first_tid,
+ __entry->block_nr, __entry->freed)
+);
+
+TRACE_EVENT(jbd_update_superblock_end,
+ TP_PROTO(journal_t *journal, int wait),
+
+ TP_ARGS(journal, wait),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, wait )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = journal->j_fs_dev->bd_dev;
+ __entry->wait = wait;
+ ),
+
+ TP_printk("dev %d,%d wait %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->wait)
+);
+
+#endif /* _TRACE_JBD_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>