]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
vfs: skip I_CLEAR state inodes
authorWu Fengguang <fengguang.wu@intel.com>
Thu, 2 Apr 2009 23:56:34 +0000 (16:56 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Apr 2009 02:04:48 +0000 (19:04 -0700)
clear_inode() will switch inode state from I_FREEING to I_CLEAR, and do so
_outside_ of inode_lock.  So any I_FREEING testing is incomplete without a
coupled testing of I_CLEAR.

So add I_CLEAR tests to drop_pagecache_sb(), generic_sync_sb_inodes() and
add_dquot_ref().

Masayoshi MIZUMA discovered the bug in drop_pagecache_sb() and Jan Kara
reminds fixing the other two cases.

Masayoshi MIZUMA has a nice panic flow:

=====================================================================
            [process A]               |        [process B]
 |                                    |
 |    prune_icache()                  | drop_pagecache()
 |      spin_lock(&inode_lock)        |   drop_pagecache_sb()
 |      inode->i_state |= I_FREEING;  |       |
 |      spin_unlock(&inode_lock)      |       V
 |          |                         |     spin_lock(&inode_lock)
 |          V                         |         |
 |      dispose_list()                |         |
 |        list_del()                  |         |
 |        clear_inode()               |         |
 |          inode->i_state = I_CLEAR  |         |
 |            |                       |         V
 |            |                       |      if (inode->i_state & (I_FREEING|I_WILL_FREE))
 |            |                       |              continue;           <==== NOT MATCH
 |            |                       |
 |            |                       | (DANGER from here on! Accessing disposing inode!)
 |            |                       |
 |            |                       |      __iget()
 |            |                       |        list_move() <===== PANIC on poisoned list !!
 V            V                       |
(time)
=====================================================================

Reported-by: Masayoshi MIZUMA <m.mizuma@jp.fujitsu.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/drop_caches.c
fs/fs-writeback.c
fs/quota/dquot.c

index 44d725f612cf346ec2e4d9bf2801913de393b967..b6a719a909f8eee1829764ea770e879c1c9c73fe 100644 (file)
@@ -18,7 +18,7 @@ static void drop_pagecache_sb(struct super_block *sb)
 
        spin_lock(&inode_lock);
        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-               if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+               if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
                        continue;
                if (inode->i_mapping->nrpages == 0)
                        continue;
index e3fe9918faafb710446f5bd73445eeca5ce5b0d7..f81f9e71871e6645e04746ec99c3682a272e275e 100644 (file)
@@ -538,7 +538,8 @@ void generic_sync_sb_inodes(struct super_block *sb,
                list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
                        struct address_space *mapping;
 
-                       if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+                       if (inode->i_state &
+                                       (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
                                continue;
                        mapping = inode->i_mapping;
                        if (mapping->nrpages == 0)
index 2ca967a5ef77de2011d9896695bf80ad4f0e155b..607c579e5eca0cc0427c53a844d68ff159186e47 100644 (file)
@@ -823,7 +823,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
 
        spin_lock(&inode_lock);
        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-               if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
+               if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
                        continue;
                if (!atomic_read(&inode->i_writecount))
                        continue;