]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blobdiff - mm/memcontrol.c
mm: memcontrol: dump memory.stat during cgroup OOM
[mirror_ubuntu-hirsute-kernel.git] / mm / memcontrol.c
index ba9138a4a1de37f744eaf488fc0930b82f60b3e1..6de79ec3cd214054068af30ac17f98439a761b3f 100644 (file)
@@ -57,6 +57,7 @@
 #include <linux/lockdep.h>
 #include <linux/file.h>
 #include <linux/tracehook.h>
+#include <linux/seq_buf.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
@@ -1356,27 +1357,114 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
        return false;
 }
 
-static const unsigned int memcg1_stats[] = {
-       MEMCG_CACHE,
-       MEMCG_RSS,
-       MEMCG_RSS_HUGE,
-       NR_SHMEM,
-       NR_FILE_MAPPED,
-       NR_FILE_DIRTY,
-       NR_WRITEBACK,
-       MEMCG_SWAP,
-};
+static char *memory_stat_format(struct mem_cgroup *memcg)
+{
+       struct seq_buf s;
+       int i;
 
-static const char *const memcg1_stat_names[] = {
-       "cache",
-       "rss",
-       "rss_huge",
-       "shmem",
-       "mapped_file",
-       "dirty",
-       "writeback",
-       "swap",
-};
+       seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
+       if (!s.buffer)
+               return NULL;
+
+       /*
+        * Provide statistics on the state of the memory subsystem as
+        * well as cumulative event counters that show past behavior.
+        *
+        * This list is ordered following a combination of these gradients:
+        * 1) generic big picture -> specifics and details
+        * 2) reflecting userspace activity -> reflecting kernel heuristics
+        *
+        * Current memory state:
+        */
+
+       seq_buf_printf(&s, "anon %llu\n",
+                      (u64)memcg_page_state(memcg, MEMCG_RSS) *
+                      PAGE_SIZE);
+       seq_buf_printf(&s, "file %llu\n",
+                      (u64)memcg_page_state(memcg, MEMCG_CACHE) *
+                      PAGE_SIZE);
+       seq_buf_printf(&s, "kernel_stack %llu\n",
+                      (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) *
+                      1024);
+       seq_buf_printf(&s, "slab %llu\n",
+                      (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE) +
+                            memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE)) *
+                      PAGE_SIZE);
+       seq_buf_printf(&s, "sock %llu\n",
+                      (u64)memcg_page_state(memcg, MEMCG_SOCK) *
+                      PAGE_SIZE);
+
+       seq_buf_printf(&s, "shmem %llu\n",
+                      (u64)memcg_page_state(memcg, NR_SHMEM) *
+                      PAGE_SIZE);
+       seq_buf_printf(&s, "file_mapped %llu\n",
+                      (u64)memcg_page_state(memcg, NR_FILE_MAPPED) *
+                      PAGE_SIZE);
+       seq_buf_printf(&s, "file_dirty %llu\n",
+                      (u64)memcg_page_state(memcg, NR_FILE_DIRTY) *
+                      PAGE_SIZE);
+       seq_buf_printf(&s, "file_writeback %llu\n",
+                      (u64)memcg_page_state(memcg, NR_WRITEBACK) *
+                      PAGE_SIZE);
+
+       /*
+        * TODO: We should eventually replace our own MEMCG_RSS_HUGE counter
+        * with the NR_ANON_THP vm counter, but right now it's a pain in the
+        * arse because it requires migrating the work out of rmap to a place
+        * where the page->mem_cgroup is set up and stable.
+        */
+       seq_buf_printf(&s, "anon_thp %llu\n",
+                      (u64)memcg_page_state(memcg, MEMCG_RSS_HUGE) *
+                      PAGE_SIZE);
+
+       for (i = 0; i < NR_LRU_LISTS; i++)
+               seq_buf_printf(&s, "%s %llu\n", mem_cgroup_lru_names[i],
+                              (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
+                              PAGE_SIZE);
+
+       seq_buf_printf(&s, "slab_reclaimable %llu\n",
+                      (u64)memcg_page_state(memcg, NR_SLAB_RECLAIMABLE) *
+                      PAGE_SIZE);
+       seq_buf_printf(&s, "slab_unreclaimable %llu\n",
+                      (u64)memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE) *
+                      PAGE_SIZE);
+
+       /* Accumulated memory events */
+
+       seq_buf_printf(&s, "pgfault %lu\n", memcg_events(memcg, PGFAULT));
+       seq_buf_printf(&s, "pgmajfault %lu\n", memcg_events(memcg, PGMAJFAULT));
+
+       seq_buf_printf(&s, "workingset_refault %lu\n",
+                      memcg_page_state(memcg, WORKINGSET_REFAULT));
+       seq_buf_printf(&s, "workingset_activate %lu\n",
+                      memcg_page_state(memcg, WORKINGSET_ACTIVATE));
+       seq_buf_printf(&s, "workingset_nodereclaim %lu\n",
+                      memcg_page_state(memcg, WORKINGSET_NODERECLAIM));
+
+       seq_buf_printf(&s, "pgrefill %lu\n", memcg_events(memcg, PGREFILL));
+       seq_buf_printf(&s, "pgscan %lu\n",
+                      memcg_events(memcg, PGSCAN_KSWAPD) +
+                      memcg_events(memcg, PGSCAN_DIRECT));
+       seq_buf_printf(&s, "pgsteal %lu\n",
+                      memcg_events(memcg, PGSTEAL_KSWAPD) +
+                      memcg_events(memcg, PGSTEAL_DIRECT));
+       seq_buf_printf(&s, "pgactivate %lu\n", memcg_events(memcg, PGACTIVATE));
+       seq_buf_printf(&s, "pgdeactivate %lu\n", memcg_events(memcg, PGDEACTIVATE));
+       seq_buf_printf(&s, "pglazyfree %lu\n", memcg_events(memcg, PGLAZYFREE));
+       seq_buf_printf(&s, "pglazyfreed %lu\n", memcg_events(memcg, PGLAZYFREED));
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       seq_buf_printf(&s, "thp_fault_alloc %lu\n",
+                      memcg_events(memcg, THP_FAULT_ALLOC));
+       seq_buf_printf(&s, "thp_collapse_alloc %lu\n",
+                      memcg_events(memcg, THP_COLLAPSE_ALLOC));
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+       /* The above should easily fit into one page */
+       WARN_ON_ONCE(seq_buf_has_overflowed(&s));
+
+       return s.buffer;
+}
 
 #define K(x) ((x) << (PAGE_SHIFT-10))
 /**
@@ -1411,39 +1499,32 @@ void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *
  */
 void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
 {
-       struct mem_cgroup *iter;
-       unsigned int i;
+       char *buf;
 
        pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n",
                K((u64)page_counter_read(&memcg->memory)),
                K((u64)memcg->memory.max), memcg->memory.failcnt);
-       pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %lu\n",
-               K((u64)page_counter_read(&memcg->memsw)),
-               K((u64)memcg->memsw.max), memcg->memsw.failcnt);
-       pr_info("kmem: usage %llukB, limit %llukB, failcnt %lu\n",
-               K((u64)page_counter_read(&memcg->kmem)),
-               K((u64)memcg->kmem.max), memcg->kmem.failcnt);
-
-       for_each_mem_cgroup_tree(iter, memcg) {
-               pr_info("Memory cgroup stats for ");
-               pr_cont_cgroup_path(iter->css.cgroup);
-               pr_cont(":");
-
-               for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
-                       if (memcg1_stats[i] == MEMCG_SWAP && !do_swap_account)
-                               continue;
-                       pr_cont(" %s:%luKB", memcg1_stat_names[i],
-                               K(memcg_page_state_local(iter,
-                                                        memcg1_stats[i])));
-               }
-
-               for (i = 0; i < NR_LRU_LISTS; i++)
-                       pr_cont(" %s:%luKB", mem_cgroup_lru_names[i],
-                               K(memcg_page_state_local(iter,
-                                                        NR_LRU_BASE + i)));
-
-               pr_cont("\n");
+       if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
+               pr_info("swap: usage %llukB, limit %llukB, failcnt %lu\n",
+                       K((u64)page_counter_read(&memcg->swap)),
+                       K((u64)memcg->swap.max), memcg->swap.failcnt);
+       else {
+               pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %lu\n",
+                       K((u64)page_counter_read(&memcg->memsw)),
+                       K((u64)memcg->memsw.max), memcg->memsw.failcnt);
+               pr_info("kmem: usage %llukB, limit %llukB, failcnt %lu\n",
+                       K((u64)page_counter_read(&memcg->kmem)),
+                       K((u64)memcg->kmem.max), memcg->kmem.failcnt);
        }
+
+       pr_info("Memory cgroup stats for ");
+       pr_cont_cgroup_path(memcg->css.cgroup);
+       pr_cont(":");
+       buf = memory_stat_format(memcg);
+       if (!buf)
+               return;
+       pr_info("%s", buf);
+       kfree(buf);
 }
 
 /*
@@ -2279,7 +2360,6 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
        unsigned long nr_reclaimed;
        bool may_swap = true;
        bool drained = false;
-       bool oomed = false;
        enum oom_status oom_status;
 
        if (mem_cgroup_is_root(memcg))
@@ -2366,7 +2446,7 @@ retry:
        if (nr_retries--)
                goto retry;
 
-       if (gfp_mask & __GFP_RETRY_MAYFAIL && oomed)
+       if (gfp_mask & __GFP_RETRY_MAYFAIL)
                goto nomem;
 
        if (gfp_mask & __GFP_NOFAIL)
@@ -2385,7 +2465,6 @@ retry:
        switch (oom_status) {
        case OOM_SUCCESS:
                nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
-               oomed = true;
                goto retry;
        case OOM_FAILED:
                goto force;
@@ -3472,6 +3551,28 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
 }
 #endif /* CONFIG_NUMA */
 
+static const unsigned int memcg1_stats[] = {
+       MEMCG_CACHE,
+       MEMCG_RSS,
+       MEMCG_RSS_HUGE,
+       NR_SHMEM,
+       NR_FILE_MAPPED,
+       NR_FILE_DIRTY,
+       NR_WRITEBACK,
+       MEMCG_SWAP,
+};
+
+static const char *const memcg1_stat_names[] = {
+       "cache",
+       "rss",
+       "rss_huge",
+       "shmem",
+       "mapped_file",
+       "dirty",
+       "writeback",
+       "swap",
+};
+
 /* Universal VM events cgroup1 shows, original sort order */
 static const unsigned int memcg1_events[] = {
        PGPGIN,
@@ -3530,12 +3631,13 @@ static int memcg_stat_show(struct seq_file *m, void *v)
                if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
                        continue;
                seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
-                          (u64)memcg_page_state(memcg, i) * PAGE_SIZE);
+                          (u64)memcg_page_state(memcg, memcg1_stats[i]) *
+                          PAGE_SIZE);
        }
 
        for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
                seq_printf(m, "total_%s %llu\n", memcg1_event_names[i],
-                          (u64)memcg_events(memcg, i));
+                          (u64)memcg_events(memcg, memcg1_events[i]));
 
        for (i = 0; i < NR_LRU_LISTS; i++)
                seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i],
@@ -5625,112 +5727,42 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
        return nbytes;
 }
 
+static void __memory_events_show(struct seq_file *m, atomic_long_t *events)
+{
+       seq_printf(m, "low %lu\n", atomic_long_read(&events[MEMCG_LOW]));
+       seq_printf(m, "high %lu\n", atomic_long_read(&events[MEMCG_HIGH]));
+       seq_printf(m, "max %lu\n", atomic_long_read(&events[MEMCG_MAX]));
+       seq_printf(m, "oom %lu\n", atomic_long_read(&events[MEMCG_OOM]));
+       seq_printf(m, "oom_kill %lu\n",
+                  atomic_long_read(&events[MEMCG_OOM_KILL]));
+}
+
 static int memory_events_show(struct seq_file *m, void *v)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 
-       seq_printf(m, "low %lu\n",
-                  atomic_long_read(&memcg->memory_events[MEMCG_LOW]));
-       seq_printf(m, "high %lu\n",
-                  atomic_long_read(&memcg->memory_events[MEMCG_HIGH]));
-       seq_printf(m, "max %lu\n",
-                  atomic_long_read(&memcg->memory_events[MEMCG_MAX]));
-       seq_printf(m, "oom %lu\n",
-                  atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
-       seq_printf(m, "oom_kill %lu\n",
-                  atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
-
+       __memory_events_show(m, memcg->memory_events);
        return 0;
 }
 
-static int memory_stat_show(struct seq_file *m, void *v)
+static int memory_events_local_show(struct seq_file *m, void *v)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
-       int i;
-
-       /*
-        * Provide statistics on the state of the memory subsystem as
-        * well as cumulative event counters that show past behavior.
-        *
-        * This list is ordered following a combination of these gradients:
-        * 1) generic big picture -> specifics and details
-        * 2) reflecting userspace activity -> reflecting kernel heuristics
-        *
-        * Current memory state:
-        */
-
-       seq_printf(m, "anon %llu\n",
-                  (u64)memcg_page_state(memcg, MEMCG_RSS) * PAGE_SIZE);
-       seq_printf(m, "file %llu\n",
-                  (u64)memcg_page_state(memcg, MEMCG_CACHE) * PAGE_SIZE);
-       seq_printf(m, "kernel_stack %llu\n",
-                  (u64)memcg_page_state(memcg, MEMCG_KERNEL_STACK_KB) * 1024);
-       seq_printf(m, "slab %llu\n",
-                  (u64)(memcg_page_state(memcg, NR_SLAB_RECLAIMABLE) +
-                        memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE)) *
-                  PAGE_SIZE);
-       seq_printf(m, "sock %llu\n",
-                  (u64)memcg_page_state(memcg, MEMCG_SOCK) * PAGE_SIZE);
-
-       seq_printf(m, "shmem %llu\n",
-                  (u64)memcg_page_state(memcg, NR_SHMEM) * PAGE_SIZE);
-       seq_printf(m, "file_mapped %llu\n",
-                  (u64)memcg_page_state(memcg, NR_FILE_MAPPED) * PAGE_SIZE);
-       seq_printf(m, "file_dirty %llu\n",
-                  (u64)memcg_page_state(memcg, NR_FILE_DIRTY) * PAGE_SIZE);
-       seq_printf(m, "file_writeback %llu\n",
-                  (u64)memcg_page_state(memcg, NR_WRITEBACK) * PAGE_SIZE);
 
-       /*
-        * TODO: We should eventually replace our own MEMCG_RSS_HUGE counter
-        * with the NR_ANON_THP vm counter, but right now it's a pain in the
-        * arse because it requires migrating the work out of rmap to a place
-        * where the page->mem_cgroup is set up and stable.
-        */
-       seq_printf(m, "anon_thp %llu\n",
-                  (u64)memcg_page_state(memcg, MEMCG_RSS_HUGE) * PAGE_SIZE);
-
-       for (i = 0; i < NR_LRU_LISTS; i++)
-               seq_printf(m, "%s %llu\n", mem_cgroup_lru_names[i],
-                          (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
-                          PAGE_SIZE);
-
-       seq_printf(m, "slab_reclaimable %llu\n",
-                  (u64)memcg_page_state(memcg, NR_SLAB_RECLAIMABLE) *
-                  PAGE_SIZE);
-       seq_printf(m, "slab_unreclaimable %llu\n",
-                  (u64)memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE) *
-                  PAGE_SIZE);
-
-       /* Accumulated memory events */
-
-       seq_printf(m, "pgfault %lu\n", memcg_events(memcg, PGFAULT));
-       seq_printf(m, "pgmajfault %lu\n", memcg_events(memcg, PGMAJFAULT));
-
-       seq_printf(m, "workingset_refault %lu\n",
-                  memcg_page_state(memcg, WORKINGSET_REFAULT));
-       seq_printf(m, "workingset_activate %lu\n",
-                  memcg_page_state(memcg, WORKINGSET_ACTIVATE));
-       seq_printf(m, "workingset_nodereclaim %lu\n",
-                  memcg_page_state(memcg, WORKINGSET_NODERECLAIM));
-
-       seq_printf(m, "pgrefill %lu\n", memcg_events(memcg, PGREFILL));
-       seq_printf(m, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) +
-                  memcg_events(memcg, PGSCAN_DIRECT));
-       seq_printf(m, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) +
-                  memcg_events(memcg, PGSTEAL_DIRECT));
-       seq_printf(m, "pgactivate %lu\n", memcg_events(memcg, PGACTIVATE));
-       seq_printf(m, "pgdeactivate %lu\n", memcg_events(memcg, PGDEACTIVATE));
-       seq_printf(m, "pglazyfree %lu\n", memcg_events(memcg, PGLAZYFREE));
-       seq_printf(m, "pglazyfreed %lu\n", memcg_events(memcg, PGLAZYFREED));
+       __memory_events_show(m, memcg->memory_events_local);
+       return 0;
+}
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       seq_printf(m, "thp_fault_alloc %lu\n",
-                  memcg_events(memcg, THP_FAULT_ALLOC));
-       seq_printf(m, "thp_collapse_alloc %lu\n",
-                  memcg_events(memcg, THP_COLLAPSE_ALLOC));
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+static int memory_stat_show(struct seq_file *m, void *v)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+       char *buf;
 
+       buf = memory_stat_format(memcg);
+       if (!buf)
+               return -ENOMEM;
+       seq_puts(m, buf);
+       kfree(buf);
        return 0;
 }
 
@@ -5801,6 +5833,12 @@ static struct cftype memory_files[] = {
                .file_offset = offsetof(struct mem_cgroup, events_file),
                .seq_show = memory_events_show,
        },
+       {
+               .name = "events.local",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .file_offset = offsetof(struct mem_cgroup, events_local_file),
+               .seq_show = memory_events_local_show,
+       },
        {
                .name = "stat",
                .flags = CFTYPE_NOT_ON_ROOT,