mm: migration: fix migration of huge PMD shared pages

author Mike Kravetz <mike.kravetz@oracle.com>

Fri, 5 Oct 2018 22:51:29 +0000 (15:51 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 5 Oct 2018 23:32:04 +0000 (16:32 -0700)
author Mike Kravetz <mike.kravetz@oracle.com>
Fri, 5 Oct 2018 22:51:29 +0000 (15:51 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 5 Oct 2018 23:32:04 +0000 (16:32 -0700)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index 6b68e345f0ca64da6590817f719796471d0c5c2d..087fd5f48c9128752cf7ff8a872f30afab057381 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
  pte_t *huge_pte_offset(struct mm_struct *mm,
                        unsigned long addr, unsigned long sz);
  int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end);
  struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
                               int write);
  struct page *follow_huge_pd(struct vm_area_struct *vma,
@@ -170,6 +172,18 @@ static inline unsigned long hugetlb_total_pages(void)
         return 0;
  }
  
+static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
+                                       pte_t *ptep)
+{
+       return 0;
+}
+
+static inline void adjust_range_if_pmd_sharing_possible(
+                               struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+}
+
  #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n)        ({ BUG(); 0; })
  #define follow_huge_addr(mm, addr, write)      ERR_PTR(-EINVAL)
  #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
diff --git a/include/linux/mm.h b/include/linux/mm.h

index a61ebe8ad4ca92e72e23855c17f8e7c9ad059a54..0416a7204be37b331a506efedc5c4c1333633a6a 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2455,6 +2455,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
         return vma;
  }
  
+static inline bool range_in_vma(struct vm_area_struct *vma,
+                               unsigned long start, unsigned long end)
+{
+       return (vma && vma->vm_start <= start && end <= vma->vm_end);
+}
+
  #ifdef CONFIG_MMU
  pgprot_t vm_get_page_prot(unsigned long vm_flags);
  void vma_set_page_prot(struct vm_area_struct *vma);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 3c21775f196b2f38a25ed05687791a4c79a3d3d4..b903d746e1320209e15c22e681f606982f73af50 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4545,12 +4545,40 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
         /*
          * check on proper vm_flags and page table alignment
          */
-       if (vma->vm_flags & VM_MAYSHARE &&
-           vma->vm_start <= base && end <= vma->vm_end)
+       if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
                 return true;
         return false;
  }
  
+/*
+ * Determine if start,end range within vma could be mapped by shared pmd.
+ * If yes, adjust start and end to cover range associated with possible
+ * shared pmd mappings.
+ */
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+       unsigned long check_addr = *start;
+
+       if (!(vma->vm_flags & VM_MAYSHARE))
+               return;
+
+       for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
+               unsigned long a_start = check_addr & PUD_MASK;
+               unsigned long a_end = a_start + PUD_SIZE;
+
+               /*
+                * If sharing is possible, adjust start/end if necessary.
+                */
+               if (range_in_vma(vma, a_start, a_end)) {
+                       if (a_start < *start)
+                               *start = a_start;
+                       if (a_end > *end)
+                               *end = a_end;
+               }
+       }
+}
+
  /*
   * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
   * and returns the corresponding pte. While this is not necessary for the
@@ -4648,6 +4676,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
  {
         return 0;
  }
+
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+}
  #define want_pmd_share()       (0)
  #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
  
diff --git a/mm/rmap.c b/mm/rmap.c

index eb477809a5c0a534e2977f6fd6c1df74a05bc170..1e79fac3186b63208cbe37a8c05597c44d2234c9 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1362,11 +1362,21 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         }
  
         /*
-        * We have to assume the worse case ie pmd for invalidation. Note that
-        * the page can not be free in this function as call of try_to_unmap()
-        * must hold a reference on the page.
+        * For THP, we have to assume the worse case ie pmd for invalidation.
+        * For hugetlb, it could be much worse if we need to do pud
+        * invalidation in the case of pmd sharing.
+        *
+        * Note that the page can not be free in this function as call of
+        * try_to_unmap() must hold a reference on the page.
          */
         end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+       if (PageHuge(page)) {
+               /*
+                * If sharing is possible, start and end will be adjusted
+                * accordingly.
+                */
+               adjust_range_if_pmd_sharing_possible(vma, &start, &end);
+       }
         mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
  
         while (page_vma_mapped_walk(&pvmw)) {
@@ -1409,6 +1419,32 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
                 address = pvmw.address;
  
+               if (PageHuge(page)) {
+                       if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
+                               /*
+                                * huge_pmd_unshare unmapped an entire PMD
+                                * page.  There is no way of knowing exactly
+                                * which PMDs may be cached for this mm, so
+                                * we must flush them all.  start/end were
+                                * already adjusted above to cover this range.
+                                */
+                               flush_cache_range(vma, start, end);
+                               flush_tlb_range(vma, start, end);
+                               mmu_notifier_invalidate_range(mm, start, end);
+
+                               /*
+                                * The ref count of the PMD page was dropped
+                                * which is part of the way map counting
+                                * is done for shared PMDs.  Return 'true'
+                                * here.  When there is no other sharing,
+                                * huge_pmd_unshare returns false and we will
+                                * unmap the actual page and drop map count
+                                * to zero.
+                                */
+                               page_vma_mapped_walk_done(&pvmw);
+                               break;
+                       }
+               }
  
                 if (IS_ENABLED(CONFIG_MIGRATION) &&
                     (flags & TTU_MIGRATION) &&
author	Mike Kravetz <mike.kravetz@oracle.com>
	Fri, 5 Oct 2018 22:51:29 +0000 (15:51 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 5 Oct 2018 23:32:04 +0000 (16:32 -0700)
include/linux/hugetlb.h		patch \| blob \| blame \| history
include/linux/mm.h		patch \| blob \| blame \| history
mm/hugetlb.c		patch \| blob \| blame \| history
mm/rmap.c		patch \| blob \| blame \| history