]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/commitdiff
dax: prevent invalidation of mapped DAX entries
authorRoss Zwisler <ross.zwisler@linux.intel.com>
Fri, 12 May 2017 22:46:47 +0000 (15:46 -0700)
committerStefan Bader <stefan.bader@canonical.com>
Tue, 20 Jun 2017 08:50:46 +0000 (10:50 +0200)
BugLink: http://bugs.launchpad.net/bugs/1692898
commit 4636e70bb0a8b871998b6841a2e4b205cf2bc863 upstream.

Patch series "mm,dax: Fix data corruption due to mmap inconsistency",
v4.

This series fixes data corruption that can happen for DAX mounts when
page faults race with write(2) and as a result page tables get out of
sync with block mappings in the filesystem and thus data seen through
mmap is different from data seen through read(2).

The series passes testing with t_mmap_stale test program from Ross and
also other mmap related tests on DAX filesystem.

This patch (of 4):

dax_invalidate_mapping_entry() currently removes DAX exceptional entries
only if they are clean and unlocked.  This is done via:

  invalidate_mapping_pages()
    invalidate_exceptional_entry()
      dax_invalidate_mapping_entry()

However, for page cache pages removed in invalidate_mapping_pages()
there is an additional criteria which is that the page must not be
mapped.  This is noted in the comments above invalidate_mapping_pages()
and is checked in invalidate_inode_page().

For DAX entries this means that we can can end up in a situation where a
DAX exceptional entry, either a huge zero page or a regular DAX entry,
could end up mapped but without an associated radix tree entry.  This is
inconsistent with the rest of the DAX code and with what happens in the
page cache case.

We aren't able to unmap the DAX exceptional entry because according to
its comments invalidate_mapping_pages() isn't allowed to block, and
unmap_mapping_range() takes a write lock on the mapping->i_mmap_rwsem.

Since we essentially never have unmapped DAX entries to evict from the
radix tree, just remove dax_invalidate_mapping_entry().

Fixes: c6dcf52c23d2 ("mm: Invalidate DAX radix tree entries only if appropriate")
Link: http://lkml.kernel.org/r/20170510085419.27601-2-jack@suse.cz
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Reported-by: Jan Kara <jack@suse.cz>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
fs/dax.c
include/linux/dax.h
mm/truncate.c

index a39b404b646a30f09a8655c1d93d04af011724c0..5e6d7982b4db300a4cfb0bcdfd49591fbff087d1 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -502,35 +502,6 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
        return ret;
 }
 
-/*
- * Invalidate exceptional DAX entry if easily possible. This handles DAX
- * entries for invalidate_inode_pages() so we evict the entry only if we can
- * do so without blocking.
- */
-int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index)
-{
-       int ret = 0;
-       void *entry, **slot;
-       struct radix_tree_root *page_tree = &mapping->page_tree;
-
-       spin_lock_irq(&mapping->tree_lock);
-       entry = __radix_tree_lookup(page_tree, index, NULL, &slot);
-       if (!entry || !radix_tree_exceptional_entry(entry) ||
-           slot_locked(mapping, slot))
-               goto out;
-       if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
-           radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
-               goto out;
-       radix_tree_delete(page_tree, index);
-       mapping->nrexceptional--;
-       ret = 1;
-out:
-       spin_unlock_irq(&mapping->tree_lock);
-       if (ret)
-               dax_wake_mapping_entry_waiter(mapping, index, entry, true);
-       return ret;
-}
-
 /*
  * Invalidate exceptional DAX entry if it is clean.
  */
index 24ad711739955e573aefea713852b2f49a8fed85..f6cc5ef33e359293794efb457dc72a94713f89bd 100644 (file)
@@ -41,7 +41,6 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                        struct iomap_ops *ops);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
-int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
                                      pgoff_t index);
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,
index dd7b24e083c5b1f76851eb0b5e3359dde92d910e..97a5cf0d735cc14fd8350320e239dbffb617f100 100644 (file)
@@ -66,17 +66,14 @@ static void truncate_exceptional_entry(struct address_space *mapping,
 
 /*
  * Invalidate exceptional entry if easily possible. This handles exceptional
- * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and
- * clean entries.
+ * entries for invalidate_inode_pages().
  */
 static int invalidate_exceptional_entry(struct address_space *mapping,
                                        pgoff_t index, void *entry)
 {
-       /* Handled by shmem itself */
-       if (shmem_mapping(mapping))
+       /* Handled by shmem itself, or for DAX we do nothing. */
+       if (shmem_mapping(mapping) || dax_mapping(mapping))
                return 1;
-       if (dax_mapping(mapping))
-               return dax_invalidate_mapping_entry(mapping, index);
        clear_shadow_entry(mapping, index, entry);
        return 1;
 }