]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
mm,ksm: FOLL_MIGRATION do migration_entry_wait
authorHugh Dickins <hughd@google.com>
Sat, 23 Feb 2013 00:36:07 +0000 (16:36 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 24 Feb 2013 01:50:23 +0000 (17:50 -0800)
In "ksm: remove old stable nodes more thoroughly" I said that I'd never
seen its WARN_ON_ONCE(page_mapped(page)).  True at the time of writing,
but it soon appeared once I tried fuller tests on the whole series.

It turned out to be due to the KSM page migration itself: unmerge_and_
remove_all_rmap_items() failed to locate and replace all the KSM pages,
because of that hiatus in page migration when old pte has been replaced
by migration entry, but not yet by new pte.  follow_page() finds no page
at that instant, but a KSM page reappears shortly after, without a
fault.

Add FOLL_MIGRATION flag, so follow_page() can do migration_entry_wait()
for KSM's break_cow().  I'd have preferred to avoid another flag, and do
it every time, in case someone else makes the same easy mistake; but did
not find another transgressor (the common get_user_pages() is of course
safe), and cannot be sure that every follow_page() caller is prepared to
sleep - ia64's xencomm_vtop()? Now, THP's wait_split_huge_page() can
already sleep there, since anon_vma locking was changed to mutex, but
maybe that's somehow excluded.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Petr Holasek <pholasek@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Izik Eidus <izik.eidus@ravellosystems.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/mm.h
mm/ksm.c
mm/memory.c

index 6124f1db50fe90d9f94bbfcb20b069a6df905e31..e7c3f9a0111aa3b2fcb7b145477709208d668bcc 100644 (file)
@@ -1651,6 +1651,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_SPLIT     0x80    /* don't return transhuge pages, split them */
 #define FOLL_HWPOISON  0x100   /* check page is hwpoisoned */
 #define FOLL_NUMA      0x200   /* force NUMA hinting page fault */
+#define FOLL_MIGRATION 0x400   /* wait for page to replace migration entry */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
                        void *data);
index 0320327b8a6c94a8463bfb8a601277d5ac10b422..d61cba6fa1dc4380677dd03f6e092b12e208c881 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -364,7 +364,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
 
        do {
                cond_resched();
-               page = follow_page(vma, addr, FOLL_GET);
+               page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION);
                if (IS_ERR_OR_NULL(page))
                        break;
                if (PageKsm(page))
index 5d2ef1217d0c6f8747f3efc448a8cc65b27c2867..ec8ba011fa7dd3397a3ba629b40082eabf6c45d3 100644 (file)
@@ -1548,8 +1548,24 @@ split_fallthrough:
        ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
 
        pte = *ptep;
-       if (!pte_present(pte))
-               goto no_page;
+       if (!pte_present(pte)) {
+               swp_entry_t entry;
+               /*
+                * KSM's break_ksm() relies upon recognizing a ksm page
+                * even while it is being migrated, so for that case we
+                * need migration_entry_wait().
+                */
+               if (likely(!(flags & FOLL_MIGRATION)))
+                       goto no_page;
+               if (pte_none(pte) || pte_file(pte))
+                       goto no_page;
+               entry = pte_to_swp_entry(pte);
+               if (!is_migration_entry(entry))
+                       goto no_page;
+               pte_unmap_unlock(ptep, ptl);
+               migration_entry_wait(mm, pmd, address);
+               goto split_fallthrough;
+       }
        if ((flags & FOLL_NUMA) && pte_numa(pte))
                goto no_page;
        if ((flags & FOLL_WRITE) && !pte_write(pte))