]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
memcg: extract memcg_vmstats from struct mem_cgroup
authorShakeel Butt <shakeelb@google.com>
Wed, 7 Sep 2022 04:35:35 +0000 (04:35 +0000)
committerAndrew Morton <akpm@linux-foundation.org>
Mon, 3 Oct 2022 21:03:04 +0000 (14:03 -0700)
Patch series "memcg: reduce memory overhead of memory cgroups".

Currently a lot of memory is wasted to maintain the vmevents for memory
cgroups as we have multiple arrays of size NR_VM_EVENT_ITEMS which can be
as large as 110.  However memcg code uses small portion of those entries.
This patch series eliminate this overhead by removing the unneeded vmevent
entries from memory cgroup data structures.

This patch (of 3):

This is a preparatory patch to reduce the memory overhead of memory
cgroup. The struct memcg_vmstats is the largest object embedded into the
struct mem_cgroup. This patch extracts struct memcg_vmstats from struct
mem_cgroup to ease the following patches in reducing the size of struct
memcg_vmstats.

Link: https://lkml.kernel.org/r/20220907043537.3457014-1-shakeelb@google.com
Link: https://lkml.kernel.org/r/20220907043537.3457014-2-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/memcontrol.h
mm/memcontrol.c

index ca0df42662ad171c54768b95ad67c27e4d9c82e0..dc7d40e575d5f1f42f9a6ab43945daee1252ac6c 100644 (file)
@@ -80,29 +80,8 @@ enum mem_cgroup_events_target {
        MEM_CGROUP_NTARGETS,
 };
 
-struct memcg_vmstats_percpu {
-       /* Local (CPU and cgroup) page state & events */
-       long                    state[MEMCG_NR_STAT];
-       unsigned long           events[NR_VM_EVENT_ITEMS];
-
-       /* Delta calculation for lockless upward propagation */
-       long                    state_prev[MEMCG_NR_STAT];
-       unsigned long           events_prev[NR_VM_EVENT_ITEMS];
-
-       /* Cgroup1: threshold notifications & softlimit tree updates */
-       unsigned long           nr_page_events;
-       unsigned long           targets[MEM_CGROUP_NTARGETS];
-};
-
-struct memcg_vmstats {
-       /* Aggregated (CPU and subtree) page state & events */
-       long                    state[MEMCG_NR_STAT];
-       unsigned long           events[NR_VM_EVENT_ITEMS];
-
-       /* Pending child counts during tree propagation */
-       long                    state_pending[MEMCG_NR_STAT];
-       unsigned long           events_pending[NR_VM_EVENT_ITEMS];
-};
+struct memcg_vmstats_percpu;
+struct memcg_vmstats;
 
 struct mem_cgroup_reclaim_iter {
        struct mem_cgroup *position;
@@ -298,7 +277,7 @@ struct mem_cgroup {
        CACHELINE_PADDING(_pad1_);
 
        /* memory.stat */
-       struct memcg_vmstats    vmstats;
+       struct memcg_vmstats    *vmstats;
 
        /* memory.events */
        atomic_long_t           memory_events[MEMCG_NR_MEMORY_EVENTS];
@@ -1001,15 +980,7 @@ static inline void mod_memcg_page_state(struct page *page,
        rcu_read_unlock();
 }
 
-static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
-{
-       long x = READ_ONCE(memcg->vmstats.state[idx]);
-#ifdef CONFIG_SMP
-       if (x < 0)
-               x = 0;
-#endif
-       return x;
-}
+unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);
 
 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
                                              enum node_stat_item idx)
index 632402001bca16ba8ca93110faeceec150c69b6a..0a44a733bb03cc0236298431217b00fef7e43c0f 100644 (file)
@@ -669,6 +669,40 @@ static void flush_memcg_stats_dwork(struct work_struct *w)
        queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME);
 }
 
+struct memcg_vmstats_percpu {
+       /* Local (CPU and cgroup) page state & events */
+       long                    state[MEMCG_NR_STAT];
+       unsigned long           events[NR_VM_EVENT_ITEMS];
+
+       /* Delta calculation for lockless upward propagation */
+       long                    state_prev[MEMCG_NR_STAT];
+       unsigned long           events_prev[NR_VM_EVENT_ITEMS];
+
+       /* Cgroup1: threshold notifications & softlimit tree updates */
+       unsigned long           nr_page_events;
+       unsigned long           targets[MEM_CGROUP_NTARGETS];
+};
+
+struct memcg_vmstats {
+       /* Aggregated (CPU and subtree) page state & events */
+       long                    state[MEMCG_NR_STAT];
+       unsigned long           events[NR_VM_EVENT_ITEMS];
+
+       /* Pending child counts during tree propagation */
+       long                    state_pending[MEMCG_NR_STAT];
+       unsigned long           events_pending[NR_VM_EVENT_ITEMS];
+};
+
+unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+       long x = READ_ONCE(memcg->vmstats->state[idx]);
+#ifdef CONFIG_SMP
+       if (x < 0)
+               x = 0;
+#endif
+       return x;
+}
+
 /**
  * __mod_memcg_state - update cgroup memory statistics
  * @memcg: the memory cgroup
@@ -827,7 +861,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 
 static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
 {
-       return READ_ONCE(memcg->vmstats.events[event]);
+       return READ_ONCE(memcg->vmstats->events[event]);
 }
 
 static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
@@ -5170,6 +5204,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 
        for_each_node(node)
                free_mem_cgroup_per_node_info(memcg, node);
+       kfree(memcg->vmstats);
        free_percpu(memcg->vmstats_percpu);
        kfree(memcg);
 }
@@ -5199,6 +5234,10 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
                goto fail;
        }
 
+       memcg->vmstats = kzalloc(sizeof(struct memcg_vmstats), GFP_KERNEL);
+       if (!memcg->vmstats)
+               goto fail;
+
        memcg->vmstats_percpu = alloc_percpu_gfp(struct memcg_vmstats_percpu,
                                                 GFP_KERNEL_ACCOUNT);
        if (!memcg->vmstats_percpu)
@@ -5418,9 +5457,9 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
                 * below us. We're in a per-cpu loop here and this is
                 * a global counter, so the first cycle will get them.
                 */
-               delta = memcg->vmstats.state_pending[i];
+               delta = memcg->vmstats->state_pending[i];
                if (delta)
-                       memcg->vmstats.state_pending[i] = 0;
+                       memcg->vmstats->state_pending[i] = 0;
 
                /* Add CPU changes on this level since the last flush */
                v = READ_ONCE(statc->state[i]);
@@ -5433,15 +5472,15 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
                        continue;
 
                /* Aggregate counts on this level and propagate upwards */
-               memcg->vmstats.state[i] += delta;
+               memcg->vmstats->state[i] += delta;
                if (parent)
-                       parent->vmstats.state_pending[i] += delta;
+                       parent->vmstats->state_pending[i] += delta;
        }
 
        for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
-               delta = memcg->vmstats.events_pending[i];
+               delta = memcg->vmstats->events_pending[i];
                if (delta)
-                       memcg->vmstats.events_pending[i] = 0;
+                       memcg->vmstats->events_pending[i] = 0;
 
                v = READ_ONCE(statc->events[i]);
                if (v != statc->events_prev[i]) {
@@ -5452,9 +5491,9 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
                if (!delta)
                        continue;
 
-               memcg->vmstats.events[i] += delta;
+               memcg->vmstats->events[i] += delta;
                if (parent)
-                       parent->vmstats.events_pending[i] += delta;
+                       parent->vmstats->events_pending[i] += delta;
        }
 
        for_each_node_state(nid, N_MEMORY) {