memcg: add mem_cgroup_zone_nr_pages()

[mirror_ubuntu-zesty-kernel.git] / mm / memcontrol.c
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index b837900830871ad19ccf714c62b6f49756c36a85..313247e6c503db056535df40ed8c520272bc389a 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -156,6 +156,9 @@ struct mem_cgroup {
         unsigned long   last_oom_jiffies;
         int             obsolete;
         atomic_t        refcnt;
+
+       unsigned int inactive_ratio;
+
         /*
          * statistics. This must be placed at the end of memcg.
          */
@@ -183,7 +186,6 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
         0, /* FORCE */
  };
  
-
  /* for encoding cft->private value on file */
  #define _MEM                   (0)
  #define _MEMSWAP               (1)
@@ -231,6 +233,9 @@ page_cgroup_zoneinfo(struct page_cgroup *pc)
         int nid = page_cgroup_nid(pc);
         int zid = page_cgroup_zid(pc);
  
+       if (!mem)
+               return NULL;
+
         return mem_cgroup_zoneinfo(mem, nid, zid);
  }
  
@@ -428,6 +433,31 @@ long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
         return (nr_pages >> priority);
  }
  
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
+{
+       unsigned long active;
+       unsigned long inactive;
+
+       inactive = mem_cgroup_get_all_zonestat(memcg, LRU_INACTIVE_ANON);
+       active = mem_cgroup_get_all_zonestat(memcg, LRU_ACTIVE_ANON);
+
+       if (inactive * memcg->inactive_ratio < active)
+               return 1;
+
+       return 0;
+}
+
+unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
+                                      struct zone *zone,
+                                      enum lru_list lru)
+{
+       int nid = zone->zone_pgdat->node_id;
+       int zid = zone_idx(zone);
+       struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+
+       return MEM_CGROUP_ZSTAT(mz, lru);
+}
+
  unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
                                         struct list_head *dst,
                                         unsigned long *scanned, int order,
@@ -571,6 +601,18 @@ done:
         return ret;
  }
  
+static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
+{
+       if (do_swap_account) {
+               if (res_counter_check_under_limit(&mem->res) &&
+                       res_counter_check_under_limit(&mem->memsw))
+                       return true;
+       } else
+               if (res_counter_check_under_limit(&mem->res))
+                       return true;
+       return false;
+}
+
  /*
   * Dance down the hierarchy if needed to reclaim memory. We remember the
   * last child we reclaimed from, so that we don't end up penalizing
@@ -592,8 +634,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
          * have left.
          */
         ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap);
-       if (res_counter_check_under_limit(&root_mem->res))
+       if (mem_cgroup_check_under_limit(root_mem))
                 return 0;
+       if (!root_mem->use_hierarchy)
+               return ret;
  
         next_mem = mem_cgroup_get_first_node(root_mem);
  
@@ -606,7 +650,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
                         continue;
                 }
                 ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap);
-               if (res_counter_check_under_limit(&root_mem->res))
+               if (mem_cgroup_check_under_limit(root_mem))
                         return 0;
                 cgroup_lock();
                 next_mem = mem_cgroup_get_next_node(next_mem, root_mem);
@@ -709,12 +753,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
                  * current usage of the cgroup before giving up
                  *
                  */
-               if (do_swap_account) {
-                       if (res_counter_check_under_limit(&mem_over_limit->res) &&
-                           res_counter_check_under_limit(&mem_over_limit->memsw))
-                               continue;
-               } else if (res_counter_check_under_limit(&mem_over_limit->res))
-                               continue;
+               if (mem_cgroup_check_under_limit(mem_over_limit))
+                       continue;
  
                 if (!nr_retries--) {
                         if (oom) {
@@ -1163,7 +1203,9 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
         mz = page_cgroup_zoneinfo(pc);
         unlock_page_cgroup(pc);
  
-       css_put(&mem->css);
+       /* at swapout, this memcg will be accessed to record to swap */
+       if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
+               css_put(&mem->css);
  
         return mem;
  
@@ -1204,6 +1246,8 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
                 swap_cgroup_record(ent, memcg);
                 mem_cgroup_get(memcg);
         }
+       if (memcg)
+               css_put(&memcg->css);
  }
  
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -1334,7 +1378,7 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
  
         do {
                 progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true);
-               progress += res_counter_check_under_limit(&mem->res);
+               progress += mem_cgroup_check_under_limit(mem);
         } while (!progress && --retry);
  
         css_put(&mem->css);
@@ -1343,6 +1387,29 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
         return 0;
  }
  
+/*
+ * The inactive anon list should be small enough that the VM never has to
+ * do too much work, but large enough that each inactive page has a chance
+ * to be referenced again before it is swapped out.
+ *
+ * this calculation is straightforward porting from
+ * page_alloc.c::setup_per_zone_inactive_ratio().
+ * it describe more detail.
+ */
+static void mem_cgroup_set_inactive_ratio(struct mem_cgroup *memcg)
+{
+       unsigned int gb, ratio;
+
+       gb = res_counter_read_u64(&memcg->res, RES_LIMIT) >> 30;
+       if (gb)
+               ratio = int_sqrt(10 * gb);
+       else
+               ratio = 1;
+
+       memcg->inactive_ratio = ratio;
+
+}
+
  static DEFINE_MUTEX(set_limit_mutex);
  
  static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
@@ -1381,6 +1448,10 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
                                 GFP_KERNEL, false);
                 if (!progress)                  retry_count--;
         }
+
+       if (!ret)
+               mem_cgroup_set_inactive_ratio(memcg);
+
         return ret;
  }
  
@@ -1965,7 +2036,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
                 res_counter_init(&mem->res, NULL);
                 res_counter_init(&mem->memsw, NULL);
         }
-
+       mem_cgroup_set_inactive_ratio(mem);
         mem->last_scanned_child = NULL;
  
         return &mem->css;