mm/tlb, x86/mm: Support invalidating TLB caches for RCU_TABLE_FREE

[mirror_ubuntu-bionic-kernel.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index ca5674cbaff2b65c4e51086e5922fbbd274f2cfa..313331fd36887ed8769147447d326e3ade968509 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -81,7 +81,7 @@
  
  #include "internal.h"
  
-#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST)
  #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
  #endif
  
@@ -246,9 +246,6 @@ static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
  
         tlb_flush(tlb);
         mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-       tlb_table_flush(tlb);
-#endif
         __tlb_reset_range(tlb);
  }
  
@@ -256,6 +253,9 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb)
  {
         struct mmu_gather_batch *batch;
  
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+       tlb_table_flush(tlb);
+#endif
         for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
                 free_pages_and_swap_cache(batch->pages, batch->nr);
                 batch->nr = 0;
@@ -331,6 +331,21 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
   * See the comment near struct mmu_table_batch.
   */
  
+/*
+ * If we want tlb_remove_table() to imply TLB invalidates.
+ */
+static inline void tlb_table_invalidate(struct mmu_gather *tlb)
+{
+#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE
+       /*
+        * Invalidate page-table caches used by hardware walkers. Then we still
+        * need to RCU-sched wait while freeing the pages because software
+        * walkers can still be in-flight.
+        */
+       tlb_flush_mmu_tlbonly(tlb);
+#endif
+}
+
  static void tlb_remove_table_smp_sync(void *arg)
  {
         /* Simply deliver the interrupt */
@@ -367,6 +382,7 @@ void tlb_table_flush(struct mmu_gather *tlb)
         struct mmu_table_batch **batch = &tlb->batch;
  
         if (*batch) {
+               tlb_table_invalidate(tlb);
                 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
                 *batch = NULL;
         }
@@ -376,23 +392,16 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
  {
         struct mmu_table_batch **batch = &tlb->batch;
  
-       /*
-        * When there's less then two users of this mm there cannot be a
-        * concurrent page-table walk.
-        */
-       if (atomic_read(&tlb->mm->mm_users) < 2) {
-               __tlb_remove_table(table);
-               return;
-       }
-
         if (*batch == NULL) {
                 *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
                 if (*batch == NULL) {
+                       tlb_table_invalidate(tlb);
                         tlb_remove_table_one(table);
                         return;
                 }
                 (*batch)->nr = 0;
         }
+
         (*batch)->tables[(*batch)->nr++] = table;
         if ((*batch)->nr == MAX_TABLE_BATCH)
                 tlb_table_flush(tlb);
@@ -1888,6 +1897,9 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
         if (addr < vma->vm_start || addr >= vma->vm_end)
                 return -EFAULT;
  
+       if (!pfn_modify_allowed(pfn, pgprot))
+               return -EACCES;
+
         track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));
  
         ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot,
@@ -1897,18 +1909,35 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
  }
  EXPORT_SYMBOL(vm_insert_pfn_prot);
  
+static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
+{
+       /* these checks mirror the abort conditions in vm_normal_page */
+       if (vma->vm_flags & VM_MIXEDMAP)
+               return true;
+       if (pfn_t_devmap(pfn))
+               return true;
+       if (pfn_t_special(pfn))
+               return true;
+       if (is_zero_pfn(pfn_t_to_pfn(pfn)))
+               return true;
+       return false;
+}
+
  static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
                         pfn_t pfn, bool mkwrite)
  {
         pgprot_t pgprot = vma->vm_page_prot;
  
-       BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
+       BUG_ON(!vm_mixed_ok(vma, pfn));
  
         if (addr < vma->vm_start || addr >= vma->vm_end)
                 return -EFAULT;
  
         track_pfn_insert(vma, &pgprot, pfn);
  
+       if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot))
+               return -EACCES;
+
         /*
          * If we don't have pte special, then we have to use the pfn_valid()
          * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
@@ -1956,6 +1985,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
  {
         pte_t *pte;
         spinlock_t *ptl;
+       int err = 0;
  
         pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
         if (!pte)
@@ -1963,12 +1993,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
         arch_enter_lazy_mmu_mode();
         do {
                 BUG_ON(!pte_none(*pte));
+               if (!pfn_modify_allowed(pfn, prot)) {
+                       err = -EACCES;
+                       break;
+               }
                 set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
                 pfn++;
         } while (pte++, addr += PAGE_SIZE, addr != end);
         arch_leave_lazy_mmu_mode();
         pte_unmap_unlock(pte - 1, ptl);
-       return 0;
+       return err;
  }
  
  static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1977,6 +2011,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
  {
         pmd_t *pmd;
         unsigned long next;
+       int err;
  
         pfn -= addr >> PAGE_SHIFT;
         pmd = pmd_alloc(mm, pud, addr);
@@ -1985,9 +2020,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
         VM_BUG_ON(pmd_trans_huge(*pmd));
         do {
                 next = pmd_addr_end(addr, end);
-               if (remap_pte_range(mm, pmd, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pte_range(mm, pmd, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
+                       return err;
         } while (pmd++, addr = next, addr != end);
         return 0;
  }
@@ -1998,6 +2034,7 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
  {
         pud_t *pud;
         unsigned long next;
+       int err;
  
         pfn -= addr >> PAGE_SHIFT;
         pud = pud_alloc(mm, p4d, addr);
@@ -2005,9 +2042,10 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
                 return -ENOMEM;
         do {
                 next = pud_addr_end(addr, end);
-               if (remap_pmd_range(mm, pud, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pmd_range(mm, pud, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
+                       return err;
         } while (pud++, addr = next, addr != end);
         return 0;
  }
@@ -2018,6 +2056,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
  {
         p4d_t *p4d;
         unsigned long next;
+       int err;
  
         pfn -= addr >> PAGE_SHIFT;
         p4d = p4d_alloc(mm, pgd, addr);
@@ -2025,9 +2064,10 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
                 return -ENOMEM;
         do {
                 next = p4d_addr_end(addr, end);
-               if (remap_pud_range(mm, p4d, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pud_range(mm, p4d, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
+                       return err;
         } while (p4d++, addr = next, addr != end);
         return 0;
  }
@@ -2857,8 +2897,11 @@ int do_swap_page(struct vm_fault *vmf)
         int ret = 0;
         bool vma_readahead = swap_use_vma_readahead();
  
-       if (vma_readahead)
+       if (vma_readahead) {
                 page = swap_readahead_detect(vmf, &swap_ra);
+               swapcache = page;
+       }
+
         if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
                 if (page)
                         put_page(page);
@@ -2889,9 +2932,12 @@ int do_swap_page(struct vm_fault *vmf)
  
  
         delayacct_set_flag(DELAYACCT_PF_SWAPIN);
-       if (!page)
+       if (!page) {
                 page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
                                          vmf->address);
+               swapcache = page;
+       }
+
         if (!page) {
                 struct swap_info_struct *si = swp_swap_info(entry);