]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - mm/hugetlb.c
ARM: dts: Cygnus: Fix MDIO node address/size cells
[mirror_ubuntu-bionic-kernel.git] / mm / hugetlb.c
index 9a334f5fb730873190a57648bc0f040f91ac0ed6..d9b59501c8a307c7d5b1e6e1eeb66e23847c178b 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/bootmem.h>
 #include <linux/sysfs.h>
 #include <linux/slab.h>
+#include <linux/mmdebug.h>
 #include <linux/sched/signal.h>
 #include <linux/rmap.h>
 #include <linux/string_helpers.h>
@@ -1080,11 +1081,10 @@ static bool pfn_range_valid_gigantic(struct zone *z,
        struct page *page;
 
        for (i = start_pfn; i < end_pfn; i++) {
-               if (!pfn_valid(i))
+               page = pfn_to_online_page(i);
+               if (!page)
                        return false;
 
-               page = pfn_to_page(i);
-
                if (page_zone(page) != z)
                        return false;
 
@@ -1270,12 +1270,23 @@ void free_huge_page(struct page *page)
        ClearPagePrivate(page);
 
        /*
-        * A return code of zero implies that the subpool will be under its
-        * minimum size if the reservation is not restored after page is free.
-        * Therefore, force restore_reserve operation.
+        * If PagePrivate() was set on page, page allocation consumed a
+        * reservation.  If the page was associated with a subpool, there
+        * would have been a page reserved in the subpool before allocation
+        * via hugepage_subpool_get_pages().  Since we are 'restoring' the
+        * reservtion, do not call hugepage_subpool_put_pages() as this will
+        * remove the reserved page from the subpool.
         */
-       if (hugepage_subpool_put_pages(spool, 1) == 0)
-               restore_reserve = true;
+       if (!restore_reserve) {
+               /*
+                * A return code of zero implies that the subpool will be
+                * under its minimum size if the reservation is not restored
+                * after page is free.  Therefore, force restore_reserve
+                * operation.
+                */
+               if (hugepage_subpool_put_pages(spool, 1) == 0)
+                       restore_reserve = true;
+       }
 
        spin_lock(&hugetlb_lock);
        clear_page_huge_active(page);
@@ -2158,6 +2169,7 @@ static void __init gather_bootmem_prealloc(void)
                 */
                if (hstate_is_gigantic(h))
                        adjust_managed_page_count(page, 1 << h->order);
+               cond_resched();
        }
 }
 
@@ -3209,7 +3221,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte)
 int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                            struct vm_area_struct *vma)
 {
-       pte_t *src_pte, *dst_pte, entry;
+       pte_t *src_pte, *dst_pte, entry, dst_entry;
        struct page *ptepage;
        unsigned long addr;
        int cow;
@@ -3237,15 +3249,30 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                        break;
                }
 
-               /* If the pagetables are shared don't copy or take references */
-               if (dst_pte == src_pte)
+               /*
+                * If the pagetables are shared don't copy or take references.
+                * dst_pte == src_pte is the common case of src/dest sharing.
+                *
+                * However, src could have 'unshared' and dst shares with
+                * another vma.  If dst_pte !none, this implies sharing.
+                * Check here before taking page table lock, and once again
+                * after taking the lock below.
+                */
+               dst_entry = huge_ptep_get(dst_pte);
+               if ((dst_pte == src_pte) || !huge_pte_none(dst_entry))
                        continue;
 
                dst_ptl = huge_pte_lock(h, dst, dst_pte);
                src_ptl = huge_pte_lockptr(h, src, src_pte);
                spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
                entry = huge_ptep_get(src_pte);
-               if (huge_pte_none(entry)) { /* skip none entry */
+               dst_entry = huge_ptep_get(dst_pte);
+               if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
+                       /*
+                        * Skip if src entry none.  Also, skip in the
+                        * unlikely case dst entry !none as this implies
+                        * sharing with another vma.
+                        */
                        ;
                } else if (unlikely(is_hugetlb_entry_migration(entry) ||
                                    is_hugetlb_entry_hwpoisoned(entry))) {
@@ -3565,7 +3592,6 @@ retry_avoidcopy:
        copy_user_huge_page(new_page, old_page, address, vma,
                            pages_per_huge_page(h));
        __SetPageUptodate(new_page);
-       set_page_huge_active(new_page);
 
        mmun_start = address & huge_page_mask(h);
        mmun_end = mmun_start + huge_page_size(h);
@@ -3588,6 +3614,7 @@ retry_avoidcopy:
                                make_huge_pte(vma, new_page, 1));
                page_remove_rmap(old_page, true);
                hugepage_add_new_anon_rmap(new_page, vma, address);
+               set_page_huge_active(new_page);
                /* Make the old page be freed below */
                new_page = old_page;
        }
@@ -3647,6 +3674,12 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
                return err;
        ClearPagePrivate(page);
 
+       /*
+        * set page dirty so that it will not be removed from cache/file
+        * by non-hugetlbfs specific code paths.
+        */
+       set_page_dirty(page);
+
        spin_lock(&inode->i_lock);
        inode->i_blocks += blocks_per_huge_page(h);
        spin_unlock(&inode->i_lock);
@@ -3664,6 +3697,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
        struct page *page;
        pte_t new_pte;
        spinlock_t *ptl;
+       bool new_page = false;
 
        /*
         * Currently, we are forced to kill the process in the event the
@@ -3710,8 +3744,7 @@ retry:
                         * handling userfault.  Reacquire after handling
                         * fault to make calling code simpler.
                         */
-                       hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping,
-                                                       idx, address);
+                       hash = hugetlb_fault_mutex_hash(h, mapping, idx, address);
                        mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                        ret = handle_userfault(&vmf, VM_UFFD_MISSING);
                        mutex_lock(&hugetlb_fault_mutex_table[hash]);
@@ -3729,7 +3762,7 @@ retry:
                }
                clear_huge_page(page, address, pages_per_huge_page(h));
                __SetPageUptodate(page);
-               set_page_huge_active(page);
+               new_page = true;
 
                if (vma->vm_flags & VM_MAYSHARE) {
                        int err = huge_add_to_page_cache(page, mapping, idx);
@@ -3800,6 +3833,15 @@ retry:
        }
 
        spin_unlock(ptl);
+
+       /*
+        * Only make newly allocated pages active.  Existing pages found
+        * in the pagecache could be !page_huge_active() if they have been
+        * isolated for migration.
+        */
+       if (new_page)
+               set_page_huge_active(page);
+
        unlock_page(page);
 out:
        return ret;
@@ -3814,21 +3856,14 @@ backout_unlocked:
 }
 
 #ifdef CONFIG_SMP
-u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
-                           struct vm_area_struct *vma,
-                           struct address_space *mapping,
+u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
                            pgoff_t idx, unsigned long address)
 {
        unsigned long key[2];
        u32 hash;
 
-       if (vma->vm_flags & VM_SHARED) {
-               key[0] = (unsigned long) mapping;
-               key[1] = idx;
-       } else {
-               key[0] = (unsigned long) mm;
-               key[1] = address >> huge_page_shift(h);
-       }
+       key[0] = (unsigned long) mapping;
+       key[1] = idx;
 
        hash = jhash2((u32 *)&key, sizeof(key)/sizeof(u32), 0);
 
@@ -3839,9 +3874,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
  * For uniprocesor systems we always use a single mutex, so just
  * return 0 and avoid the hashing overhead.
  */
-u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
-                           struct vm_area_struct *vma,
-                           struct address_space *mapping,
+u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping,
                            pgoff_t idx, unsigned long address)
 {
        return 0;
@@ -3887,7 +3920,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         * get spurious allocation failures if two CPUs race to instantiate
         * the same page in the page cache.
         */
-       hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, address);
+       hash = hugetlb_fault_mutex_hash(h, mapping, idx, address);
        mutex_lock(&hugetlb_fault_mutex_table[hash]);
 
        entry = huge_ptep_get(ptep);
@@ -4019,7 +4052,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 
                /* fallback to copy_from_user outside mmap_sem */
                if (unlikely(ret)) {
-                       ret = -EFAULT;
+                       ret = -ENOENT;
                        *pagep = page;
                        /* don't free the page */
                        goto out;
@@ -4035,7 +4068,6 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
         * the set_pte_at() write.
         */
        __SetPageUptodate(page);
-       set_page_huge_active(page);
 
        mapping = dst_vma->vm_file->f_mapping;
        idx = vma_hugecache_offset(h, dst_vma, dst_addr);
@@ -4103,6 +4135,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
        update_mmu_cache(dst_vma, dst_addr, dst_pte);
 
        spin_unlock(ptl);
+       set_page_huge_active(page);
        if (vm_shared)
                unlock_page(page);
        ret = 0;
@@ -4208,7 +4241,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                break;
                        }
                        if (ret & VM_FAULT_RETRY) {
-                               if (nonblocking)
+                               if (nonblocking &&
+                                   !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
                                        *nonblocking = 0;
                                *nr_pages = 0;
                                /*
@@ -4227,6 +4261,19 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
                pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
                page = pte_page(huge_ptep_get(pte));
+
+               /*
+                * Instead of doing 'try_get_page()' below in the same_page
+                * loop, just check the count once here.
+                */
+               if (unlikely(page_count(page) <= 0)) {
+                       if (pages) {
+                               spin_unlock(ptl);
+                               remainder = 0;
+                               err = -ENOMEM;
+                               break;
+                       }
+               }
 same_page:
                if (pages) {
                        pages[i] = mem_map_offset(page, pfn_offset);
@@ -4354,6 +4401,12 @@ int hugetlb_reserve_pages(struct inode *inode,
        struct resv_map *resv_map;
        long gbl_reserve;
 
+       /* This should never happen */
+       if (from > to) {
+               VM_WARN(1, "%s called with a negative range\n", __func__);
+               return -EINVAL;
+       }
+
        /*
         * Only apply hugepage reservation if asked. At fault time, an
         * attempt will be made for VM_NORESERVE to allocate a page
@@ -4519,12 +4572,40 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
        /*
         * check on proper vm_flags and page table alignment
         */
-       if (vma->vm_flags & VM_MAYSHARE &&
-           vma->vm_start <= base && end <= vma->vm_end)
+       if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
                return true;
        return false;
 }
 
+/*
+ * Determine if start,end range within vma could be mapped by shared pmd.
+ * If yes, adjust start and end to cover range associated with possible
+ * shared pmd mappings.
+ */
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+       unsigned long check_addr = *start;
+
+       if (!(vma->vm_flags & VM_MAYSHARE))
+               return;
+
+       for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
+               unsigned long a_start = check_addr & PUD_MASK;
+               unsigned long a_end = a_start + PUD_SIZE;
+
+               /*
+                * If sharing is possible, adjust start/end if necessary.
+                */
+               if (range_in_vma(vma, a_start, a_end)) {
+                       if (a_start < *start)
+                               *start = a_start;
+                       if (a_end > *end)
+                               *end = a_end;
+               }
+       }
+}
+
 /*
  * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
  * and returns the corresponding pte. While this is not necessary for the
@@ -4622,6 +4703,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 {
        return 0;
 }
+
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+}
 #define want_pmd_share()       (0)
 #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */