mm, hugetlb: decrement reserve count if VM_NORESERVE alloc page cache

author Joonsoo Kim <iamjoonsoo.kim@lge.com>

Wed, 11 Sep 2013 21:21:18 +0000 (14:21 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 11 Sep 2013 22:57:28 +0000 (15:57 -0700)
author Joonsoo Kim <iamjoonsoo.kim@lge.com>
Wed, 11 Sep 2013 21:21:18 +0000 (14:21 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 11 Sep 2013 22:57:28 +0000 (15:57 -0700)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index dacf0d2256d9790c669b88867a5c277b7a423991..5b084c7b34c6ba2232d4afcfc8729d3b87b65f5b 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -443,10 +443,23 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
  }
  
  /* Returns true if the VMA has associated reserve pages */
-static int vma_has_reserves(struct vm_area_struct *vma)
+static int vma_has_reserves(struct vm_area_struct *vma, long chg)
  {
-       if (vma->vm_flags & VM_NORESERVE)
-               return 0;
+       if (vma->vm_flags & VM_NORESERVE) {
+               /*
+                * This address is already reserved by other process(chg == 0),
+                * so, we should decrement reserved count. Without decrementing,
+                * reserve count remains after releasing inode, because this
+                * allocated page will go into page cache and is regarded as
+                * coming from reserved pool in releasing step.  Currently, we
+                * don't have any other solution to deal with this situation
+                * properly, so add work-around here.
+                */
+               if (vma->vm_flags & VM_MAYSHARE && chg == 0)
+                       return 1;
+               else
+                       return 0;
+       }
  
         /* Shared mappings always use reserves */
         if (vma->vm_flags & VM_MAYSHARE)
@@ -520,7 +533,8 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
  
  static struct page *dequeue_huge_page_vma(struct hstate *h,
                                 struct vm_area_struct *vma,
-                               unsigned long address, int avoid_reserve)
+                               unsigned long address, int avoid_reserve,
+                               long chg)
  {
         struct page *page = NULL;
         struct mempolicy *mpol;
@@ -535,7 +549,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
          * have no page reserves. This check ensures that reservations are
          * not "stolen". The child may still get SIGKILLed
          */
-       if (!vma_has_reserves(vma) &&
+       if (!vma_has_reserves(vma, chg) &&
                         h->free_huge_pages - h->resv_huge_pages == 0)
                 goto err;
  
@@ -553,8 +567,12 @@ retry_cpuset:
                 if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
                         page = dequeue_huge_page_node(h, zone_to_nid(zone));
                         if (page) {
-                               if (!avoid_reserve && vma_has_reserves(vma))
-                                       h->resv_huge_pages--;
+                               if (avoid_reserve)
+                                       break;
+                               if (!vma_has_reserves(vma, chg))
+                                       break;
+
+                               h->resv_huge_pages--;
                                 break;
                         }
                 }
@@ -1155,7 +1173,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
                 return ERR_PTR(-ENOSPC);
         }
         spin_lock(&hugetlb_lock);
-       page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve);
+       page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg);
         if (!page) {
                 spin_unlock(&hugetlb_lock);
                 page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
author	Joonsoo Kim <iamjoonsoo.kim@lge.com>
	Wed, 11 Sep 2013 21:21:18 +0000 (14:21 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 11 Sep 2013 22:57:28 +0000 (15:57 -0700)