]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - mm/memcontrol.c
memcg: add mem_cgroup_zone_nr_pages()
[mirror_ubuntu-zesty-kernel.git] / mm / memcontrol.c
index b837900830871ad19ccf714c62b6f49756c36a85..313247e6c503db056535df40ed8c520272bc389a 100644 (file)
@@ -156,6 +156,9 @@ struct mem_cgroup {
        unsigned long   last_oom_jiffies;
        int             obsolete;
        atomic_t        refcnt;
+
+       unsigned int inactive_ratio;
+
        /*
         * statistics. This must be placed at the end of memcg.
         */
@@ -183,7 +186,6 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
        0, /* FORCE */
 };
 
-
 /* for encoding cft->private value on file */
 #define _MEM                   (0)
 #define _MEMSWAP               (1)
@@ -231,6 +233,9 @@ page_cgroup_zoneinfo(struct page_cgroup *pc)
        int nid = page_cgroup_nid(pc);
        int zid = page_cgroup_zid(pc);
 
+       if (!mem)
+               return NULL;
+
        return mem_cgroup_zoneinfo(mem, nid, zid);
 }
 
@@ -428,6 +433,31 @@ long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
        return (nr_pages >> priority);
 }
 
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
+{
+       unsigned long active;
+       unsigned long inactive;
+
+       inactive = mem_cgroup_get_all_zonestat(memcg, LRU_INACTIVE_ANON);
+       active = mem_cgroup_get_all_zonestat(memcg, LRU_ACTIVE_ANON);
+
+       if (inactive * memcg->inactive_ratio < active)
+               return 1;
+
+       return 0;
+}
+
+unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
+                                      struct zone *zone,
+                                      enum lru_list lru)
+{
+       int nid = zone->zone_pgdat->node_id;
+       int zid = zone_idx(zone);
+       struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+
+       return MEM_CGROUP_ZSTAT(mz, lru);
+}
+
 unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
                                        struct list_head *dst,
                                        unsigned long *scanned, int order,
@@ -571,6 +601,18 @@ done:
        return ret;
 }
 
+static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
+{
+       if (do_swap_account) {
+               if (res_counter_check_under_limit(&mem->res) &&
+                       res_counter_check_under_limit(&mem->memsw))
+                       return true;
+       } else
+               if (res_counter_check_under_limit(&mem->res))
+                       return true;
+       return false;
+}
+
 /*
  * Dance down the hierarchy if needed to reclaim memory. We remember the
  * last child we reclaimed from, so that we don't end up penalizing
@@ -592,8 +634,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
         * have left.
         */
        ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap);
-       if (res_counter_check_under_limit(&root_mem->res))
+       if (mem_cgroup_check_under_limit(root_mem))
                return 0;
+       if (!root_mem->use_hierarchy)
+               return ret;
 
        next_mem = mem_cgroup_get_first_node(root_mem);
 
@@ -606,7 +650,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
                        continue;
                }
                ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap);
-               if (res_counter_check_under_limit(&root_mem->res))
+               if (mem_cgroup_check_under_limit(root_mem))
                        return 0;
                cgroup_lock();
                next_mem = mem_cgroup_get_next_node(next_mem, root_mem);
@@ -709,12 +753,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
                 * current usage of the cgroup before giving up
                 *
                 */
-               if (do_swap_account) {
-                       if (res_counter_check_under_limit(&mem_over_limit->res) &&
-                           res_counter_check_under_limit(&mem_over_limit->memsw))
-                               continue;
-               } else if (res_counter_check_under_limit(&mem_over_limit->res))
-                               continue;
+               if (mem_cgroup_check_under_limit(mem_over_limit))
+                       continue;
 
                if (!nr_retries--) {
                        if (oom) {
@@ -1163,7 +1203,9 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
        mz = page_cgroup_zoneinfo(pc);
        unlock_page_cgroup(pc);
 
-       css_put(&mem->css);
+       /* at swapout, this memcg will be accessed to record to swap */
+       if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
+               css_put(&mem->css);
 
        return mem;
 
@@ -1204,6 +1246,8 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
                swap_cgroup_record(ent, memcg);
                mem_cgroup_get(memcg);
        }
+       if (memcg)
+               css_put(&memcg->css);
 }
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -1334,7 +1378,7 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
 
        do {
                progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true);
-               progress += res_counter_check_under_limit(&mem->res);
+               progress += mem_cgroup_check_under_limit(mem);
        } while (!progress && --retry);
 
        css_put(&mem->css);
@@ -1343,6 +1387,29 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
        return 0;
 }
 
+/*
+ * The inactive anon list should be small enough that the VM never has to
+ * do too much work, but large enough that each inactive page has a chance
+ * to be referenced again before it is swapped out.
+ *
+ * this calculation is straightforward porting from
+ * page_alloc.c::setup_per_zone_inactive_ratio().
+ * it describe more detail.
+ */
+static void mem_cgroup_set_inactive_ratio(struct mem_cgroup *memcg)
+{
+       unsigned int gb, ratio;
+
+       gb = res_counter_read_u64(&memcg->res, RES_LIMIT) >> 30;
+       if (gb)
+               ratio = int_sqrt(10 * gb);
+       else
+               ratio = 1;
+
+       memcg->inactive_ratio = ratio;
+
+}
+
 static DEFINE_MUTEX(set_limit_mutex);
 
 static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
@@ -1381,6 +1448,10 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
                                GFP_KERNEL, false);
                if (!progress)                  retry_count--;
        }
+
+       if (!ret)
+               mem_cgroup_set_inactive_ratio(memcg);
+
        return ret;
 }
 
@@ -1965,7 +2036,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
                res_counter_init(&mem->res, NULL);
                res_counter_init(&mem->memsw, NULL);
        }
-
+       mem_cgroup_set_inactive_ratio(mem);
        mem->last_scanned_child = NULL;
 
        return &mem->css;