UBUNTU: Ubuntu-4.15.0-96.97

[mirror_ubuntu-bionic-kernel.git] / mm / oom_kill.c
diff --git a/mm/oom_kill.c b/mm/oom_kill.c

index 29f855551efef89d6c251075828bc0cd79da1842..4591ba7fd951338b0761ec74e9114c33d91fe84b 100644 (file)
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -474,7 +474,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
         return false;
  }
  
-
  #ifdef CONFIG_MMU
  /*
   * OOM Reaper kernel thread which tries to reap the memory used by the OOM
@@ -485,16 +484,51 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
  static struct task_struct *oom_reaper_list;
  static DEFINE_SPINLOCK(oom_reaper_lock);
  
-static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
+void __oom_reap_task_mm(struct mm_struct *mm)
  {
-       struct mmu_gather tlb;
         struct vm_area_struct *vma;
+
+       /*
+        * Tell all users of get_user/copy_from_user etc... that the content
+        * is no longer stable. No barriers really needed because unmapping
+        * should imply barriers already and the reader would hit a page fault
+        * if it stumbled over a reaped memory.
+        */
+       set_bit(MMF_UNSTABLE, &mm->flags);
+
+       for (vma = mm->mmap ; vma; vma = vma->vm_next) {
+               if (!can_madv_dontneed_vma(vma))
+                       continue;
+
+               /*
+                * Only anonymous pages have a good chance to be dropped
+                * without additional steps which we cannot afford as we
+                * are OOM already.
+                *
+                * We do not even care about fs backed pages because all
+                * which are reclaimable have already been reclaimed and
+                * we do not want to block exit_mmap by keeping mm ref
+                * count elevated without a good reason.
+                */
+               if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
+                       struct mmu_gather tlb;
+
+                       tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end);
+                       unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
+                                        NULL);
+                       tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end);
+               }
+       }
+}
+
+static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
+{
         bool ret = true;
  
         /*
          * We have to make sure to not race with the victim exit path
          * and cause premature new oom victim selection:
-        * __oom_reap_task_mm           exit_mm
+        * oom_reap_task_mm             exit_mm
          *   mmget_not_zero
          *                                mmput
          *                                  atomic_dec_and_test
@@ -542,35 +576,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
  
         trace_start_task_reaping(tsk->pid);
  
-       /*
-        * Tell all users of get_user/copy_from_user etc... that the content
-        * is no longer stable. No barriers really needed because unmapping
-        * should imply barriers already and the reader would hit a page fault
-        * if it stumbled over a reaped memory.
-        */
-       set_bit(MMF_UNSTABLE, &mm->flags);
+       __oom_reap_task_mm(mm);
  
-       for (vma = mm->mmap ; vma; vma = vma->vm_next) {
-               if (!can_madv_dontneed_vma(vma))
-                       continue;
-
-               /*
-                * Only anonymous pages have a good chance to be dropped
-                * without additional steps which we cannot afford as we
-                * are OOM already.
-                *
-                * We do not even care about fs backed pages because all
-                * which are reclaimable have already been reclaimed and
-                * we do not want to block exit_mmap by keeping mm ref
-                * count elevated without a good reason.
-                */
-               if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
-                       tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end);
-                       unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
-                                        NULL);
-                       tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end);
-               }
-       }
         pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
                         task_pid_nr(tsk), tsk->comm,
                         K(get_mm_counter(mm, MM_ANONPAGES)),
@@ -591,13 +598,12 @@ static void oom_reap_task(struct task_struct *tsk)
         struct mm_struct *mm = tsk->signal->oom_mm;
  
         /* Retry the down_read_trylock(mmap_sem) a few times */
-       while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
+       while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
                 schedule_timeout_idle(HZ/10);
  
         if (attempts <= MAX_OOM_REAP_RETRIES)
                 goto done;
  
-
         pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
                 task_pid_nr(tsk), tsk->comm);
         debug_show_all_locks();
@@ -637,8 +643,8 @@ static int oom_reaper(void *unused)
  
  static void wake_oom_reaper(struct task_struct *tsk)
  {
-       /* tsk is already queued? */
-       if (tsk == oom_reaper_list || tsk->oom_reaper_list)
+       /* mm is already queued? */
+       if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
                 return;
  
         get_task_struct(tsk);
@@ -874,6 +880,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
          * still freeing memory.
          */
         read_lock(&tasklist_lock);
+
+       /*
+        * The task 'p' might have already exited before reaching here. The
+        * put_task_struct() will free task_struct 'p' while the loop still try
+        * to access the field of 'p', so, get an extra reference.
+        */
+       get_task_struct(p);
         for_each_thread(p, t) {
                 list_for_each_entry(child, &t->children, sibling) {
                         unsigned int child_points;
@@ -893,6 +906,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
                         }
                 }
         }
+       put_task_struct(p);
         read_unlock(&tasklist_lock);
  
         p = find_lock_task_mm(victim);
@@ -1046,9 +1060,10 @@ bool out_of_memory(struct oom_control *oc)
          * The OOM killer does not compensate for IO-less reclaim.
          * pagefault_out_of_memory lost its gfp context so we have to
          * make sure exclude 0 mask - all other users should have at least
-        * ___GFP_DIRECT_RECLAIM to get here.
+        * ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
+        * invoke the OOM killer even if it is a GFP_NOFS allocation.
          */
-       if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS))
+       if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
                 return true;
  
         /*