]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/commitdiff
mm/autonuma: let architecture override how the write bit should be stashed in a protn...
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Fri, 24 Feb 2017 22:59:16 +0000 (14:59 -0800)
committerTim Gardner <tim.gardner@canonical.com>
Fri, 10 Mar 2017 13:22:05 +0000 (06:22 -0700)
BugLink: http://bugs.launchpad.net/bugs/1671613
Patch series "Numabalancing preserve write fix", v2.

This patch series address an issue w.r.t THP migration and autonuma
preserve write feature.  migrate_misplaced_transhuge_page() cannot deal
with concurrent modification of the page.  It does a page copy without
following the migration pte sequence.  IIUC, this was done to keep the
migration simpler and at the time of implemenation we didn't had THP
page cache which would have required a more elaborate migration scheme.
That means thp autonuma migration expect the protnone with saved write
to be done such that both kernel and user cannot update the page
content.  This patch series enables archs like ppc64 to do that.  We are
good with the hash translation mode with the current code, because we
never create a hardware page table entry for a protnone pte.

This patch (of 2):

Autonuma preserves the write permission across numa fault to avoid
taking a writefault after a numa fault (Commit: b191f9b106ea " mm: numa:
preserve PTE write permissions across a NUMA hinting fault").
Architecture can implement protnone in different ways and some may
choose to implement that by clearing Read/ Write/Exec bit of pte.
Setting the write bit on such pte can result in wrong behaviour.  Fix
this up by allowing arch to override how to save the write bit on a
protnone pte.

[aneesh.kumar@linux.vnet.ibm.com: don't mark pte saved write in case of dirty_accountable]
Link: http://lkml.kernel.org/r/1487942884-16517-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com
[aneesh.kumar@linux.vnet.ibm.com: v3]
Link: http://lkml.kernel.org/r/1487498625-10891-2-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1487050314-3892-2-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Michael Neuling <mikey@neuling.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <michaele@au1.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
(cherry picked from commit 288bc54949fc2625a4fd811a188fb200cc498946)
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
include/asm-generic/pgtable.h
mm/huge_memory.c
mm/memory.c
mm/mprotect.c

index 18af2bcefe6a7cc564934479d8c0390cb77498fa..b6f3a8a4b73829e9bc62f6fde9b2d14d2e2f506d 100644 (file)
@@ -192,6 +192,22 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 }
 #endif
 
+#ifndef pte_savedwrite
+#define pte_savedwrite pte_write
+#endif
+
+#ifndef pte_mk_savedwrite
+#define pte_mk_savedwrite pte_mkwrite
+#endif
+
+#ifndef pmd_savedwrite
+#define pmd_savedwrite pmd_write
+#endif
+
+#ifndef pmd_mk_savedwrite
+#define pmd_mk_savedwrite pmd_mkwrite
+#endif
+
 #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
index 5f3ad65c85de01fa6e4c8a07ef9494410bf2b133..6498fb3ac83f629df9db0c6f50a996d510424bc1 100644 (file)
@@ -1253,7 +1253,7 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
        }
 
        /* See similar comment in do_numa_page for explanation */
-       if (!pmd_write(pmd))
+       if (!pmd_savedwrite(pmd))
                flags |= TNF_NO_GROUP;
 
        /*
@@ -1316,7 +1316,7 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
        goto out;
 clear_pmdnuma:
        BUG_ON(!PageLocked(page));
-       was_writable = pmd_write(pmd);
+       was_writable = pmd_savedwrite(pmd);
        pmd = pmd_modify(pmd, vma->vm_page_prot);
        pmd = pmd_mkyoung(pmd);
        if (was_writable)
@@ -1571,7 +1571,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                        entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
                        entry = pmd_modify(entry, newprot);
                        if (preserve_write)
-                               entry = pmd_mkwrite(entry);
+                               entry = pmd_mk_savedwrite(entry);
                        ret = HPAGE_PMD_NR;
                        set_pmd_at(mm, addr, pmd, entry);
                        BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
index 77f6a2a2c4e8d76fffc2fb2c02c5520467e8424f..c89214451507b530568b0ee766c542e4f1d637a2 100644 (file)
@@ -3388,7 +3388,7 @@ static int do_numa_page(struct vm_fault *vmf)
        int target_nid;
        bool migrated = false;
        pte_t pte;
-       bool was_writable = pte_write(vmf->orig_pte);
+       bool was_writable = pte_savedwrite(vmf->orig_pte);
        int flags = 0;
 
        /*
index f9c07f54dd62d928187985554695c3c1117c75a7..bccccc73080d015ba5259d04a5c313203674facf 100644 (file)
@@ -113,7 +113,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                        ptent = ptep_modify_prot_start(mm, addr, pte);
                        ptent = pte_modify(ptent, newprot);
                        if (preserve_write)
-                               ptent = pte_mkwrite(ptent);
+                               ptent = pte_mk_savedwrite(ptent);
 
                        /* Avoid taking write faults for known dirty pages */
                        if (dirty_accountable && pte_dirty(ptent) &&