]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - mm/oom_kill.c
oom: select task from tasklist for mempolicy ooms
[mirror_ubuntu-zesty-kernel.git] / mm / oom_kill.c
index 7c8488f6a3f5fd3443a718505c4d6d16368236fa..13ceed78bc452754b7cffd03a971717b9f5efbc4 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/memcontrol.h>
+#include <linux/mempolicy.h>
 #include <linux/security.h>
 
 int sysctl_panic_on_oom;
@@ -35,23 +36,57 @@ int sysctl_oom_dump_tasks;
 static DEFINE_SPINLOCK(zone_scan_lock);
 /* #define DEBUG */
 
-/*
- * Is all threads of the target process nodes overlap ours?
+#ifdef CONFIG_NUMA
+/**
+ * has_intersects_mems_allowed() - check task eligiblity for kill
+ * @tsk: task struct of which task to consider
+ * @mask: nodemask passed to page allocator for mempolicy ooms
+ *
+ * Task eligibility is determined by whether or not a candidate task, @tsk,
+ * shares the same mempolicy nodes as current if it is bound by such a policy
+ * and whether or not it has the same set of allowed cpuset nodes.
  */
-static int has_intersects_mems_allowed(struct task_struct *tsk)
+static bool has_intersects_mems_allowed(struct task_struct *tsk,
+                                       const nodemask_t *mask)
 {
-       struct task_struct *t;
+       struct task_struct *start = tsk;
 
-       t = tsk;
        do {
-               if (cpuset_mems_allowed_intersects(current, t))
-                       return 1;
-               t = next_thread(t);
-       } while (t != tsk);
-
-       return 0;
+               if (mask) {
+                       /*
+                        * If this is a mempolicy constrained oom, tsk's
+                        * cpuset is irrelevant.  Only return true if its
+                        * mempolicy intersects current, otherwise it may be
+                        * needlessly killed.
+                        */
+                       if (mempolicy_nodemask_intersects(tsk, mask))
+                               return true;
+               } else {
+                       /*
+                        * This is not a mempolicy constrained oom, so only
+                        * check the mems of tsk's cpuset.
+                        */
+                       if (cpuset_mems_allowed_intersects(current, tsk))
+                               return true;
+               }
+               tsk = next_thread(tsk);
+       } while (tsk != start);
+       return false;
+}
+#else
+static bool has_intersects_mems_allowed(struct task_struct *tsk,
+                                       const nodemask_t *mask)
+{
+       return true;
 }
+#endif /* CONFIG_NUMA */
 
+/*
+ * The process p may have detached its own ->mm while exiting or through
+ * use_mm(), but one or more of its subthreads may still have a valid
+ * pointer.  Return p, or any of its subthreads with a valid ->mm, with
+ * task_lock() held.
+ */
 static struct task_struct *find_lock_task_mm(struct task_struct *p)
 {
        struct task_struct *t = p;
@@ -106,10 +141,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
         * The memory size of the process is the basis for the badness.
         */
        points = p->mm->total_vm;
-
-       /*
-        * After this unlock we can no longer dereference local variable `mm'
-        */
        task_unlock(p);
 
        /*
@@ -253,7 +284,8 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
  * (not docbooked, we don't want this one cluttering up the manual)
  */
 static struct task_struct *select_bad_process(unsigned long *ppoints,
-                                               struct mem_cgroup *mem)
+               struct mem_cgroup *mem, enum oom_constraint constraint,
+               const nodemask_t *mask)
 {
        struct task_struct *p;
        struct task_struct *chosen = NULL;
@@ -269,7 +301,9 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
                        continue;
                if (mem && !task_in_mem_cgroup(p, mem))
                        continue;
-               if (!has_intersects_mems_allowed(p))
+               if (!has_intersects_mems_allowed(p,
+                               constraint == CONSTRAINT_MEMORY_POLICY ? mask :
+                                                                        NULL))
                        continue;
 
                /*
@@ -497,7 +531,7 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
                panic("out of memory(memcg). panic_on_oom is selected.\n");
        read_lock(&tasklist_lock);
 retry:
-       p = select_bad_process(&points, mem);
+       p = select_bad_process(&points, mem, CONSTRAINT_NONE, NULL);
        if (!p || PTR_ERR(p) == -1UL)
                goto out;
 
@@ -576,7 +610,8 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
 /*
  * Must be called with tasklist_lock held for read.
  */
-static void __out_of_memory(gfp_t gfp_mask, int order)
+static void __out_of_memory(gfp_t gfp_mask, int order,
+                       enum oom_constraint constraint, const nodemask_t *mask)
 {
        struct task_struct *p;
        unsigned long points;
@@ -590,7 +625,7 @@ retry:
         * Rambo mode: Shoot down a process and hope it solves whatever
         * issues we may have.
         */
-       p = select_bad_process(&points, NULL);
+       p = select_bad_process(&points, NULL, constraint, mask);
 
        if (PTR_ERR(p) == -1UL)
                return;
@@ -624,7 +659,8 @@ void pagefault_out_of_memory(void)
                panic("out of memory from page fault. panic_on_oom is selected.\n");
 
        read_lock(&tasklist_lock);
-       __out_of_memory(0, 0); /* unknown gfp_mask and order */
+       /* unknown gfp_mask and order */
+       __out_of_memory(0, 0, CONSTRAINT_NONE, NULL);
        read_unlock(&tasklist_lock);
 
        /*
@@ -640,6 +676,7 @@ void pagefault_out_of_memory(void)
  * @zonelist: zonelist pointer
  * @gfp_mask: memory allocation flags
  * @order: amount of memory being requested as a power of 2
+ * @nodemask: nodemask passed to page allocator
  *
  * If we run out of memory, we have the choice between either
  * killing a random task (bad), letting the system crash (worse)
@@ -678,24 +715,19 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
         */
        constraint = constrained_alloc(zonelist, gfp_mask, nodemask);
        read_lock(&tasklist_lock);
-
-       switch (constraint) {
-       case CONSTRAINT_MEMORY_POLICY:
-               oom_kill_process(current, gfp_mask, order, 0, NULL,
-                               "No available memory (MPOL_BIND)");
-               break;
-
-       case CONSTRAINT_NONE:
-               if (sysctl_panic_on_oom) {
+       if (unlikely(sysctl_panic_on_oom)) {
+               /*
+                * panic_on_oom only affects CONSTRAINT_NONE, the kernel
+                * should not panic for cpuset or mempolicy induced memory
+                * failures.
+                */
+               if (constraint == CONSTRAINT_NONE) {
                        dump_header(NULL, gfp_mask, order, NULL);
-                       panic("out of memory. panic_on_oom is selected\n");
+                       read_unlock(&tasklist_lock);
+                       panic("Out of memory: panic_on_oom is enabled\n");
                }
-               /* Fall-through */
-       case CONSTRAINT_CPUSET:
-               __out_of_memory(gfp_mask, order);
-               break;
        }
-
+       __out_of_memory(gfp_mask, order, constraint, nodemask);
        read_unlock(&tasklist_lock);
 
        /*