]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - mm/memcontrol.c
UBUNTU: Ubuntu-4.15.0-96.97
[mirror_ubuntu-bionic-kernel.git] / mm / memcontrol.c
index ac2ffd5e02b914fb9564649c9475babc51119de6..1a34cd1d17a3c65edd42f35b3f9de0b240f64873 100644 (file)
@@ -233,6 +233,12 @@ enum res_type {
 /* Used for OOM nofiier */
 #define OOM_CONTROL            (0)
 
+static inline bool should_force_charge(void)
+{
+       return tsk_is_oom_victim(current) || fatal_signal_pending(current) ||
+               (current->flags & PF_EXITING);
+}
+
 /* Some nice accessors for the vmpressure. */
 struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
 {
@@ -725,7 +731,7 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
                        if (unlikely(!memcg))
                                memcg = root_mem_cgroup;
                }
-       } while (!css_tryget_online(&memcg->css));
+       } while (!css_tryget(&memcg->css));
        rcu_read_unlock();
        return memcg;
 }
@@ -871,26 +877,45 @@ void mem_cgroup_iter_break(struct mem_cgroup *root,
                css_put(&prev->css);
 }
 
-static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
+static void __invalidate_reclaim_iterators(struct mem_cgroup *from,
+                                       struct mem_cgroup *dead_memcg)
 {
-       struct mem_cgroup *memcg = dead_memcg;
        struct mem_cgroup_reclaim_iter *iter;
        struct mem_cgroup_per_node *mz;
        int nid;
        int i;
 
-       while ((memcg = parent_mem_cgroup(memcg))) {
-               for_each_node(nid) {
-                       mz = mem_cgroup_nodeinfo(memcg, nid);
-                       for (i = 0; i <= DEF_PRIORITY; i++) {
-                               iter = &mz->iter[i];
-                               cmpxchg(&iter->position,
-                                       dead_memcg, NULL);
-                       }
+       for_each_node(nid) {
+               mz = mem_cgroup_nodeinfo(from, nid);
+               for (i = 0; i <= DEF_PRIORITY; i++) {
+                       iter = &mz->iter[i];
+                       cmpxchg(&iter->position,
+                               dead_memcg, NULL);
                }
        }
 }
 
+static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
+{
+       struct mem_cgroup *memcg = dead_memcg;
+       struct mem_cgroup *last;
+
+       do {
+               __invalidate_reclaim_iterators(memcg, dead_memcg);
+               last = memcg;
+       } while ((memcg = parent_mem_cgroup(memcg)));
+
+       /*
+        * When cgruop1 non-hierarchy mode is used,
+        * parent_mem_cgroup() does not walk all the way up to the
+        * cgroup root (root_mem_cgroup). So we have to handle
+        * dead_memcg from cgroup root separately.
+        */
+       if (last != root_mem_cgroup)
+               __invalidate_reclaim_iterators(root_mem_cgroup,
+                                               dead_memcg);
+}
+
 /*
  * Iteration constructs for visiting all cgroups (under a tree).  If
  * loops are exited prematurely (break), mem_cgroup_iter_break() must
@@ -1251,8 +1276,13 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
        };
        bool ret;
 
-       mutex_lock(&oom_lock);
-       ret = out_of_memory(&oc);
+       if (mutex_lock_killable(&oom_lock))
+               return true;
+       /*
+        * A few threads which were not waiting at mutex_lock_killable() can
+        * fail to bail out. Therefore, check again after holding oom_lock.
+        */
+       ret = should_force_charge() || out_of_memory(&oc);
        mutex_unlock(&oom_lock);
        return ret;
 }
@@ -1936,15 +1966,22 @@ retry:
                goto retry;
        }
 
+       /*
+        * Memcg doesn't have a dedicated reserve for atomic
+        * allocations. But like the global atomic pool, we need to
+        * put the burden of reclaim on regular allocation requests
+        * and let these go through as privileged allocations.
+        */
+       if (gfp_mask & __GFP_ATOMIC)
+               goto force;
+
        /*
         * Unlike in global OOM situations, memcg is not in a physical
         * memory shortage.  Allow dying and OOM-killed tasks to
         * bypass the last charges so that they can exit quickly and
         * free their memory.
         */
-       if (unlikely(tsk_is_oom_victim(current) ||
-                    fatal_signal_pending(current) ||
-                    current->flags & PF_EXITING))
+       if (unlikely(should_force_charge()))
                goto force;
 
        /*
@@ -2205,7 +2242,7 @@ static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
 {
        struct memcg_kmem_cache_create_work *cw;
 
-       cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
+       cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN);
        if (!cw)
                return;
 
@@ -2333,6 +2370,16 @@ int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 
        if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
            !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) {
+
+               /*
+                * Enforce __GFP_NOFAIL allocation because callers are not
+                * prepared to see failures and likely do not have any failure
+                * handling code.
+                */
+               if (gfp & __GFP_NOFAIL) {
+                       page_counter_charge(&memcg->kmem, nr_pages);
+                       return 0;
+               }
                cancel_charge(memcg, nr_pages);
                return -ENOMEM;
        }
@@ -2355,7 +2402,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
        struct mem_cgroup *memcg;
        int ret = 0;
 
-       if (memcg_kmem_bypass())
+       if (mem_cgroup_disabled() || memcg_kmem_bypass())
                return 0;
 
        memcg = get_mem_cgroup_from_mm(current->mm);
@@ -4110,6 +4157,14 @@ static struct cftype mem_cgroup_legacy_files[] = {
 
 static DEFINE_IDR(mem_cgroup_idr);
 
+static void mem_cgroup_id_remove(struct mem_cgroup *memcg)
+{
+       if (memcg->id.id > 0) {
+               idr_remove(&mem_cgroup_idr, memcg->id.id);
+               memcg->id.id = 0;
+       }
+}
+
 static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
 {
        VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0);
@@ -4120,8 +4175,7 @@ static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
 {
        VM_BUG_ON(atomic_read(&memcg->id.ref) < n);
        if (atomic_sub_and_test(n, &memcg->id.ref)) {
-               idr_remove(&mem_cgroup_idr, memcg->id.id);
-               memcg->id.id = 0;
+               mem_cgroup_id_remove(memcg);
 
                /* Memcg ID pins CSS */
                css_put(&memcg->css);
@@ -4187,6 +4241,9 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 {
        struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
 
+       if (!pn)
+               return;
+
        free_percpu(pn->lruvec_stat);
        kfree(pn);
 }
@@ -4255,8 +4312,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
        idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
        return memcg;
 fail:
-       if (memcg->id.id > 0)
-               idr_remove(&mem_cgroup_idr, memcg->id.id);
+       mem_cgroup_id_remove(memcg);
        __mem_cgroup_free(memcg);
        return NULL;
 }
@@ -4315,6 +4371,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 
        return &memcg->css;
 fail:
+       mem_cgroup_id_remove(memcg);
        mem_cgroup_free(memcg);
        return ERR_PTR(-ENOMEM);
 }
@@ -5828,6 +5885,20 @@ void mem_cgroup_sk_alloc(struct sock *sk)
        if (!mem_cgroup_sockets_enabled)
                return;
 
+       /*
+        * Socket cloning can throw us here with sk_memcg already
+        * filled. It won't however, necessarily happen from
+        * process context. So the test for root memcg given
+        * the current task's memcg won't help us in this case.
+        *
+        * Respecting the original socket's memcg is a better
+        * decision in this case.
+        */
+       if (sk->sk_memcg) {
+               css_get(&sk->sk_memcg->css);
+               return;
+       }
+
        rcu_read_lock();
        memcg = mem_cgroup_from_task(current);
        if (memcg == root_mem_cgroup)