memcg: punt high overage reclaim to return-to-userland path

author Tejun Heo <tj@kernel.org>

Fri, 6 Nov 2015 02:46:11 +0000 (18:46 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 6 Nov 2015 03:34:48 +0000 (19:34 -0800)
author Tejun Heo <tj@kernel.org>
Fri, 6 Nov 2015 02:46:11 +0000 (18:46 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Nov 2015 03:34:48 +0000 (19:34 -0800)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index 56174c7199ee530e2ded7e6b7f7afd0396190aea..77bf429662000b43c8ba727a793d7faf53e28455 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -401,6 +401,8 @@ static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
         return inactive * inactive_ratio < active;
  }
  
+void mem_cgroup_handle_over_high(void);
+
  void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
                                 struct task_struct *p);
  
@@ -620,6 +622,10 @@ static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
  {
  }
  
+static inline void mem_cgroup_handle_over_high(void)
+{
+}
+
  static inline void mem_cgroup_oom_enable(void)
  {
  }
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 17bf8b845aa0f457b30f2033c5b88f3a93aea999..055f2ee3b0f0587c16f7bbf42b8834f84a4a6917 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1809,6 +1809,9 @@ struct task_struct {
         struct mem_cgroup *memcg_in_oom;
         gfp_t memcg_oom_gfp_mask;
         int memcg_oom_order;
+
+       /* number of pages to reclaim on returning to userland */
+       unsigned int memcg_nr_pages_over_high;
  #endif
  #ifdef CONFIG_UPROBES
         struct uprobe_task *utask;
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h

index 84d497297c5f44af0bcc1ace0a0584092d77ab92..26c152122a424dc337764357db5bb297a40074fd 100644 (file)
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -50,6 +50,7 @@
  #include <linux/ptrace.h>
  #include <linux/security.h>
  #include <linux/task_work.h>
+#include <linux/memcontrol.h>
  struct linux_binprm;
  
  /*
@@ -188,6 +189,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
         smp_mb__after_atomic();
         if (unlikely(current->task_works))
                 task_work_run();
+
+       mem_cgroup_handle_over_high();
  }
  
  #endif /* <linux/tracehook.h> */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 47bd7f13f526a7deb4a1a8e1c91f4d3edc7cd2de..327dcda3ebf681d30c3255b8d48f2352dc85bdca 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -62,6 +62,7 @@
  #include <linux/oom.h>
  #include <linux/lockdep.h>
  #include <linux/file.h>
+#include <linux/tracehook.h>
  #include "internal.h"
  #include <net/sock.h>
  #include <net/ip.h>
@@ -1972,6 +1973,31 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
         return NOTIFY_OK;
  }
  
+/*
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+void mem_cgroup_handle_over_high(void)
+{
+       unsigned int nr_pages = current->memcg_nr_pages_over_high;
+       struct mem_cgroup *memcg, *pos;
+
+       if (likely(!nr_pages))
+               return;
+
+       pos = memcg = get_mem_cgroup_from_mm(current->mm);
+
+       do {
+               if (page_counter_read(&pos->memory) <= pos->high)
+                       continue;
+               mem_cgroup_events(pos, MEMCG_HIGH, 1);
+               try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true);
+       } while ((pos = parent_mem_cgroup(pos)));
+
+       css_put(&memcg->css);
+       current->memcg_nr_pages_over_high = 0;
+}
+
  static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
                       unsigned int nr_pages)
  {
@@ -2080,17 +2106,22 @@ done_restock:
         css_get_many(&memcg->css, batch);
         if (batch > nr_pages)
                 refill_stock(memcg, batch - nr_pages);
-       if (!(gfp_mask & __GFP_WAIT))
-               goto done;
+
         /*
-        * If the hierarchy is above the normal consumption range,
-        * make the charging task trim their excess contribution.
+        * If the hierarchy is above the normal consumption range, schedule
+        * reclaim on returning to userland.  We can perform reclaim here
+        * if __GFP_WAIT but let's always punt for simplicity and so that
+        * GFP_KERNEL can consistently be used during reclaim.  @memcg is
+        * not recorded as it most likely matches current's and won't
+        * change in the meantime.  As high limit is checked again before
+        * reclaim, the cost of mismatch is negligible.
          */
         do {
-               if (page_counter_read(&memcg->memory) <= memcg->high)
-                       continue;
-               mem_cgroup_events(memcg, MEMCG_HIGH, 1);
-               try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
+               if (page_counter_read(&memcg->memory) > memcg->high) {
+                       current->memcg_nr_pages_over_high += nr_pages;
+                       set_notify_resume(current);
+                       break;
+               }
         } while ((memcg = parent_mem_cgroup(memcg)));
  done:
         return ret;
author	Tejun Heo <tj@kernel.org>
	Fri, 6 Nov 2015 02:46:11 +0000 (18:46 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 6 Nov 2015 03:34:48 +0000 (19:34 -0800)
include/linux/memcontrol.h		patch \| blob \| blame \| history
include/linux/sched.h		patch \| blob \| blame \| history
include/linux/tracehook.h		patch \| blob \| blame \| history
mm/memcontrol.c		patch \| blob \| blame \| history