mm, hugetlb: get rid of surplus page accounting tricks

author Michal Hocko <mhocko@suse.com>

Thu, 1 Feb 2018 00:20:52 +0000 (16:20 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 1 Feb 2018 01:18:40 +0000 (17:18 -0800)
author Michal Hocko <mhocko@suse.com>
Thu, 1 Feb 2018 00:20:52 +0000 (16:20 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 1 Feb 2018 01:18:40 +0000 (17:18 -0800)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index f260ffa263633a35e14a380f1a35d9aa47f7afa5..7dc80cbe8e89fe536c88f5346987cd69abd96cd1 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1540,62 +1540,46 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
  static struct page *__alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
                 int nid, nodemask_t *nmask)
  {
-       struct page *page;
-       unsigned int r_nid;
+       struct page *page = NULL;
  
         if (hstate_is_gigantic(h))
                 return NULL;
  
-       /*
-        * Assume we will successfully allocate the surplus page to
-        * prevent racing processes from causing the surplus to exceed
-        * overcommit
-        *
-        * This however introduces a different race, where a process B
-        * tries to grow the static hugepage pool while alloc_pages() is
-        * called by process A. B will only examine the per-node
-        * counters in determining if surplus huge pages can be
-        * converted to normal huge pages in adjust_pool_surplus(). A
-        * won't be able to increment the per-node counter, until the
-        * lock is dropped by B, but B doesn't drop hugetlb_lock until
-        * no more huge pages can be converted from surplus to normal
-        * state (and doesn't try to convert again). Thus, we have a
-        * case where a surplus huge page exists, the pool is grown, and
-        * the surplus huge page still exists after, even though it
-        * should just have been converted to a normal huge page. This
-        * does not leak memory, though, as the hugepage will be freed
-        * once it is out of use. It also does not allow the counters to
-        * go out of whack in adjust_pool_surplus() as we don't modify
-        * the node values until we've gotten the hugepage and only the
-        * per-node value is checked there.
-        */
         spin_lock(&hugetlb_lock);
-       if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
-               spin_unlock(&hugetlb_lock);
-               return NULL;
-       } else {
-               h->nr_huge_pages++;
-               h->surplus_huge_pages++;
-       }
+       if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages)
+               goto out_unlock;
         spin_unlock(&hugetlb_lock);
  
         page = __hugetlb_alloc_buddy_huge_page(h, gfp_mask, nid, nmask);
+       if (!page)
+               goto out_unlock;
  
         spin_lock(&hugetlb_lock);
-       if (page) {
+       /*
+        * We could have raced with the pool size change.
+        * Double check that and simply deallocate the new page
+        * if we would end up overcommiting the surpluses. Abuse
+        * temporary page to workaround the nasty free_huge_page
+        * codeflow
+        */
+       if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
+               SetPageHugeTemporary(page);
+               put_page(page);
+               page = NULL;
+       } else {
+               int r_nid;
+
+               h->surplus_huge_pages++;
+               h->nr_huge_pages++;
                 INIT_LIST_HEAD(&page->lru);
                 r_nid = page_to_nid(page);
                 set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
                 set_hugetlb_cgroup(page, NULL);
-               /*
-                * We incremented the global counters already
-                */
                 h->nr_huge_pages_node[r_nid]++;
                 h->surplus_huge_pages_node[r_nid]++;
-       } else {
-               h->nr_huge_pages--;
-               h->surplus_huge_pages--;
         }
+
+out_unlock:
         spin_unlock(&hugetlb_lock);
  
         return page;
author	Michal Hocko <mhocko@suse.com>
	Thu, 1 Feb 2018 00:20:52 +0000 (16:20 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 1 Feb 2018 01:18:40 +0000 (17:18 -0800)