]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blame - mm/list_lru.c
mm/memcontrol.c: export mem_cgroup_is_root()
[mirror_ubuntu-eoan-kernel.git] / mm / list_lru.c
CommitLineData
a38e4082
DC
1/*
2 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
3 * Authors: David Chinner and Glauber Costa
4 *
5 * Generic LRU infrastructure
6 */
7#include <linux/kernel.h>
8#include <linux/module.h>
3b1d58a4 9#include <linux/mm.h>
a38e4082 10#include <linux/list_lru.h>
5ca302c8 11#include <linux/slab.h>
c0a5b560 12#include <linux/mutex.h>
60d3fd32 13#include <linux/memcontrol.h>
c0a5b560 14
84c07d11 15#ifdef CONFIG_MEMCG_KMEM
c0a5b560
VD
16static LIST_HEAD(list_lrus);
17static DEFINE_MUTEX(list_lrus_mutex);
18
19static void list_lru_register(struct list_lru *lru)
20{
21 mutex_lock(&list_lrus_mutex);
22 list_add(&lru->list, &list_lrus);
23 mutex_unlock(&list_lrus_mutex);
24}
25
26static void list_lru_unregister(struct list_lru *lru)
27{
28 mutex_lock(&list_lrus_mutex);
29 list_del(&lru->list);
30 mutex_unlock(&list_lrus_mutex);
31}
c0a5b560 32
60d3fd32
VD
33static inline bool list_lru_memcg_aware(struct list_lru *lru)
34{
145949a1
R
35 /*
36 * This needs node 0 to be always present, even
37 * in the systems supporting sparse numa ids.
38 */
60d3fd32
VD
39 return !!lru->node[0].memcg_lrus;
40}
41
42static inline struct list_lru_one *
43list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
44{
0c7c1bed 45 struct list_lru_memcg *memcg_lrus;
60d3fd32 46 /*
0c7c1bed
KT
47 * Either lock or RCU protects the array of per cgroup lists
48 * from relocation (see memcg_update_list_lru_node).
60d3fd32 49 */
0c7c1bed
KT
50 memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
51 lockdep_is_held(&nlru->lock));
52 if (memcg_lrus && idx >= 0)
53 return memcg_lrus->lru[idx];
60d3fd32
VD
54 return &nlru->lru;
55}
56
df406551
VD
57static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
58{
59 struct page *page;
60
61 if (!memcg_kmem_enabled())
62 return NULL;
63 page = virt_to_head_page(ptr);
64 return page->mem_cgroup;
65}
66
60d3fd32 67static inline struct list_lru_one *
44bd4a47
KT
68list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
69 struct mem_cgroup **memcg_ptr)
60d3fd32 70{
44bd4a47
KT
71 struct list_lru_one *l = &nlru->lru;
72 struct mem_cgroup *memcg = NULL;
60d3fd32
VD
73
74 if (!nlru->memcg_lrus)
44bd4a47 75 goto out;
60d3fd32
VD
76
77 memcg = mem_cgroup_from_kmem(ptr);
78 if (!memcg)
44bd4a47 79 goto out;
60d3fd32 80
44bd4a47
KT
81 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
82out:
83 if (memcg_ptr)
84 *memcg_ptr = memcg;
85 return l;
60d3fd32
VD
86}
87#else
e0295238
KT
88static void list_lru_register(struct list_lru *lru)
89{
90}
91
92static void list_lru_unregister(struct list_lru *lru)
93{
94}
95
60d3fd32
VD
96static inline bool list_lru_memcg_aware(struct list_lru *lru)
97{
98 return false;
99}
100
101static inline struct list_lru_one *
102list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
103{
104 return &nlru->lru;
105}
106
107static inline struct list_lru_one *
44bd4a47
KT
108list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
109 struct mem_cgroup **memcg_ptr)
60d3fd32 110{
44bd4a47
KT
111 if (memcg_ptr)
112 *memcg_ptr = NULL;
60d3fd32
VD
113 return &nlru->lru;
114}
84c07d11 115#endif /* CONFIG_MEMCG_KMEM */
60d3fd32 116
a38e4082
DC
117bool list_lru_add(struct list_lru *lru, struct list_head *item)
118{
3b1d58a4
DC
119 int nid = page_to_nid(virt_to_page(item));
120 struct list_lru_node *nlru = &lru->node[nid];
60d3fd32 121 struct list_lru_one *l;
3b1d58a4
DC
122
123 spin_lock(&nlru->lock);
a38e4082 124 if (list_empty(item)) {
44bd4a47 125 l = list_lru_from_kmem(nlru, item, NULL);
60d3fd32
VD
126 list_add_tail(item, &l->list);
127 l->nr_items++;
2c80cd57 128 nlru->nr_items++;
3b1d58a4 129 spin_unlock(&nlru->lock);
a38e4082
DC
130 return true;
131 }
3b1d58a4 132 spin_unlock(&nlru->lock);
a38e4082
DC
133 return false;
134}
135EXPORT_SYMBOL_GPL(list_lru_add);
136
137bool list_lru_del(struct list_lru *lru, struct list_head *item)
138{
3b1d58a4
DC
139 int nid = page_to_nid(virt_to_page(item));
140 struct list_lru_node *nlru = &lru->node[nid];
60d3fd32 141 struct list_lru_one *l;
3b1d58a4
DC
142
143 spin_lock(&nlru->lock);
a38e4082 144 if (!list_empty(item)) {
44bd4a47 145 l = list_lru_from_kmem(nlru, item, NULL);
a38e4082 146 list_del_init(item);
60d3fd32 147 l->nr_items--;
2c80cd57 148 nlru->nr_items--;
3b1d58a4 149 spin_unlock(&nlru->lock);
a38e4082
DC
150 return true;
151 }
3b1d58a4 152 spin_unlock(&nlru->lock);
a38e4082
DC
153 return false;
154}
155EXPORT_SYMBOL_GPL(list_lru_del);
156
3f97b163
VD
157void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
158{
159 list_del_init(item);
160 list->nr_items--;
161}
162EXPORT_SYMBOL_GPL(list_lru_isolate);
163
164void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
165 struct list_head *head)
166{
167 list_move(item, head);
168 list->nr_items--;
169}
170EXPORT_SYMBOL_GPL(list_lru_isolate_move);
171
930eaac5
AM
172unsigned long list_lru_count_one(struct list_lru *lru,
173 int nid, struct mem_cgroup *memcg)
a38e4082 174{
6a4f496f 175 struct list_lru_node *nlru = &lru->node[nid];
60d3fd32
VD
176 struct list_lru_one *l;
177 unsigned long count;
3b1d58a4 178
0c7c1bed 179 rcu_read_lock();
930eaac5 180 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
60d3fd32 181 count = l->nr_items;
0c7c1bed 182 rcu_read_unlock();
3b1d58a4
DC
183
184 return count;
185}
60d3fd32
VD
186EXPORT_SYMBOL_GPL(list_lru_count_one);
187
188unsigned long list_lru_count_node(struct list_lru *lru, int nid)
189{
2c80cd57 190 struct list_lru_node *nlru;
60d3fd32 191
2c80cd57
ST
192 nlru = &lru->node[nid];
193 return nlru->nr_items;
60d3fd32 194}
6a4f496f 195EXPORT_SYMBOL_GPL(list_lru_count_node);
3b1d58a4 196
60d3fd32
VD
197static unsigned long
198__list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
199 list_lru_walk_cb isolate, void *cb_arg,
200 unsigned long *nr_to_walk)
3b1d58a4
DC
201{
202
60d3fd32
VD
203 struct list_lru_node *nlru = &lru->node[nid];
204 struct list_lru_one *l;
a38e4082 205 struct list_head *item, *n;
3b1d58a4 206 unsigned long isolated = 0;
a38e4082 207
3b1d58a4 208 spin_lock(&nlru->lock);
60d3fd32 209 l = list_lru_from_memcg_idx(nlru, memcg_idx);
a38e4082 210restart:
60d3fd32 211 list_for_each_safe(item, n, &l->list) {
a38e4082 212 enum lru_status ret;
5cedf721
DC
213
214 /*
215 * decrement nr_to_walk first so that we don't livelock if we
216 * get stuck on large numbesr of LRU_RETRY items
217 */
c56b097a 218 if (!*nr_to_walk)
5cedf721 219 break;
c56b097a 220 --*nr_to_walk;
5cedf721 221
3f97b163 222 ret = isolate(item, l, &nlru->lock, cb_arg);
a38e4082 223 switch (ret) {
449dd698
JW
224 case LRU_REMOVED_RETRY:
225 assert_spin_locked(&nlru->lock);
5b568acc 226 /* fall through */
a38e4082 227 case LRU_REMOVED:
3b1d58a4 228 isolated++;
2c80cd57 229 nlru->nr_items--;
449dd698
JW
230 /*
231 * If the lru lock has been dropped, our list
232 * traversal is now invalid and so we have to
233 * restart from scratch.
234 */
235 if (ret == LRU_REMOVED_RETRY)
236 goto restart;
a38e4082
DC
237 break;
238 case LRU_ROTATE:
60d3fd32 239 list_move_tail(item, &l->list);
a38e4082
DC
240 break;
241 case LRU_SKIP:
242 break;
243 case LRU_RETRY:
5cedf721
DC
244 /*
245 * The lru lock has been dropped, our list traversal is
246 * now invalid and so we have to restart from scratch.
247 */
449dd698 248 assert_spin_locked(&nlru->lock);
a38e4082
DC
249 goto restart;
250 default:
251 BUG();
252 }
a38e4082 253 }
3b1d58a4
DC
254
255 spin_unlock(&nlru->lock);
256 return isolated;
257}
60d3fd32
VD
258
259unsigned long
260list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
261 list_lru_walk_cb isolate, void *cb_arg,
262 unsigned long *nr_to_walk)
263{
264 return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg),
265 isolate, cb_arg, nr_to_walk);
266}
267EXPORT_SYMBOL_GPL(list_lru_walk_one);
268
269unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
270 list_lru_walk_cb isolate, void *cb_arg,
271 unsigned long *nr_to_walk)
272{
273 long isolated = 0;
274 int memcg_idx;
275
276 isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg,
277 nr_to_walk);
278 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
279 for_each_memcg_cache_index(memcg_idx) {
280 isolated += __list_lru_walk_one(lru, nid, memcg_idx,
281 isolate, cb_arg, nr_to_walk);
282 if (*nr_to_walk <= 0)
283 break;
284 }
285 }
286 return isolated;
287}
3b1d58a4
DC
288EXPORT_SYMBOL_GPL(list_lru_walk_node);
289
60d3fd32
VD
290static void init_one_lru(struct list_lru_one *l)
291{
292 INIT_LIST_HEAD(&l->list);
293 l->nr_items = 0;
294}
295
84c07d11 296#ifdef CONFIG_MEMCG_KMEM
60d3fd32
VD
297static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
298 int begin, int end)
299{
300 int i;
301
302 for (i = begin; i < end; i++)
303 kfree(memcg_lrus->lru[i]);
304}
305
306static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
307 int begin, int end)
308{
309 int i;
310
311 for (i = begin; i < end; i++) {
312 struct list_lru_one *l;
313
314 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
315 if (!l)
316 goto fail;
317
318 init_one_lru(l);
319 memcg_lrus->lru[i] = l;
320 }
321 return 0;
322fail:
323 __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
324 return -ENOMEM;
325}
326
327static int memcg_init_list_lru_node(struct list_lru_node *nlru)
328{
0c7c1bed 329 struct list_lru_memcg *memcg_lrus;
60d3fd32
VD
330 int size = memcg_nr_cache_ids;
331
0c7c1bed
KT
332 memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
333 size * sizeof(void *), GFP_KERNEL);
334 if (!memcg_lrus)
60d3fd32
VD
335 return -ENOMEM;
336
0c7c1bed
KT
337 if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
338 kvfree(memcg_lrus);
60d3fd32
VD
339 return -ENOMEM;
340 }
0c7c1bed 341 RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
60d3fd32
VD
342
343 return 0;
344}
345
346static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
347{
0c7c1bed
KT
348 struct list_lru_memcg *memcg_lrus;
349 /*
350 * This is called when shrinker has already been unregistered,
351 * and nobody can use it. So, there is no need to use kvfree_rcu().
352 */
353 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
354 __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
355 kvfree(memcg_lrus);
356}
357
358static void kvfree_rcu(struct rcu_head *head)
359{
360 struct list_lru_memcg *mlru;
361
362 mlru = container_of(head, struct list_lru_memcg, rcu);
363 kvfree(mlru);
60d3fd32
VD
364}
365
366static int memcg_update_list_lru_node(struct list_lru_node *nlru,
367 int old_size, int new_size)
368{
369 struct list_lru_memcg *old, *new;
370
371 BUG_ON(old_size > new_size);
372
0c7c1bed
KT
373 old = rcu_dereference_protected(nlru->memcg_lrus,
374 lockdep_is_held(&list_lrus_mutex));
375 new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
60d3fd32
VD
376 if (!new)
377 return -ENOMEM;
378
379 if (__memcg_init_list_lru_node(new, old_size, new_size)) {
f80c7dab 380 kvfree(new);
60d3fd32
VD
381 return -ENOMEM;
382 }
383
0c7c1bed 384 memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
60d3fd32
VD
385
386 /*
0c7c1bed
KT
387 * The locking below allows readers that hold nlru->lock avoid taking
388 * rcu_read_lock (see list_lru_from_memcg_idx).
60d3fd32
VD
389 *
390 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
391 * we have to use IRQ-safe primitives here to avoid deadlock.
392 */
393 spin_lock_irq(&nlru->lock);
0c7c1bed 394 rcu_assign_pointer(nlru->memcg_lrus, new);
60d3fd32
VD
395 spin_unlock_irq(&nlru->lock);
396
0c7c1bed 397 call_rcu(&old->rcu, kvfree_rcu);
60d3fd32
VD
398 return 0;
399}
400
401static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
402 int old_size, int new_size)
403{
0c7c1bed
KT
404 struct list_lru_memcg *memcg_lrus;
405
406 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
407 lockdep_is_held(&list_lrus_mutex));
60d3fd32
VD
408 /* do not bother shrinking the array back to the old size, because we
409 * cannot handle allocation failures here */
0c7c1bed 410 __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
60d3fd32
VD
411}
412
413static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
414{
415 int i;
416
145949a1
R
417 if (!memcg_aware)
418 return 0;
419
420 for_each_node(i) {
421 if (memcg_init_list_lru_node(&lru->node[i]))
60d3fd32
VD
422 goto fail;
423 }
424 return 0;
425fail:
145949a1
R
426 for (i = i - 1; i >= 0; i--) {
427 if (!lru->node[i].memcg_lrus)
428 continue;
60d3fd32 429 memcg_destroy_list_lru_node(&lru->node[i]);
145949a1 430 }
60d3fd32
VD
431 return -ENOMEM;
432}
433
434static void memcg_destroy_list_lru(struct list_lru *lru)
435{
436 int i;
437
438 if (!list_lru_memcg_aware(lru))
439 return;
440
145949a1 441 for_each_node(i)
60d3fd32
VD
442 memcg_destroy_list_lru_node(&lru->node[i]);
443}
444
445static int memcg_update_list_lru(struct list_lru *lru,
446 int old_size, int new_size)
447{
448 int i;
449
450 if (!list_lru_memcg_aware(lru))
451 return 0;
452
145949a1 453 for_each_node(i) {
60d3fd32
VD
454 if (memcg_update_list_lru_node(&lru->node[i],
455 old_size, new_size))
456 goto fail;
457 }
458 return 0;
459fail:
145949a1
R
460 for (i = i - 1; i >= 0; i--) {
461 if (!lru->node[i].memcg_lrus)
462 continue;
463
60d3fd32
VD
464 memcg_cancel_update_list_lru_node(&lru->node[i],
465 old_size, new_size);
145949a1 466 }
60d3fd32
VD
467 return -ENOMEM;
468}
469
470static void memcg_cancel_update_list_lru(struct list_lru *lru,
471 int old_size, int new_size)
472{
473 int i;
474
475 if (!list_lru_memcg_aware(lru))
476 return;
477
145949a1 478 for_each_node(i)
60d3fd32
VD
479 memcg_cancel_update_list_lru_node(&lru->node[i],
480 old_size, new_size);
481}
482
483int memcg_update_all_list_lrus(int new_size)
484{
485 int ret = 0;
486 struct list_lru *lru;
487 int old_size = memcg_nr_cache_ids;
488
489 mutex_lock(&list_lrus_mutex);
490 list_for_each_entry(lru, &list_lrus, list) {
491 ret = memcg_update_list_lru(lru, old_size, new_size);
492 if (ret)
493 goto fail;
494 }
495out:
496 mutex_unlock(&list_lrus_mutex);
497 return ret;
498fail:
499 list_for_each_entry_continue_reverse(lru, &list_lrus, list)
500 memcg_cancel_update_list_lru(lru, old_size, new_size);
501 goto out;
502}
2788cf0c 503
3b82c4dc 504static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
9bec5c35 505 int src_idx, struct mem_cgroup *dst_memcg)
2788cf0c 506{
3b82c4dc 507 struct list_lru_node *nlru = &lru->node[nid];
9bec5c35 508 int dst_idx = dst_memcg->kmemcg_id;
2788cf0c
VD
509 struct list_lru_one *src, *dst;
510
511 /*
512 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
513 * we have to use IRQ-safe primitives here to avoid deadlock.
514 */
515 spin_lock_irq(&nlru->lock);
516
517 src = list_lru_from_memcg_idx(nlru, src_idx);
518 dst = list_lru_from_memcg_idx(nlru, dst_idx);
519
520 list_splice_init(&src->list, &dst->list);
521 dst->nr_items += src->nr_items;
522 src->nr_items = 0;
523
524 spin_unlock_irq(&nlru->lock);
525}
526
527static void memcg_drain_list_lru(struct list_lru *lru,
9bec5c35 528 int src_idx, struct mem_cgroup *dst_memcg)
2788cf0c
VD
529{
530 int i;
531
532 if (!list_lru_memcg_aware(lru))
533 return;
534
145949a1 535 for_each_node(i)
3b82c4dc 536 memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
2788cf0c
VD
537}
538
9bec5c35 539void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
2788cf0c
VD
540{
541 struct list_lru *lru;
542
543 mutex_lock(&list_lrus_mutex);
544 list_for_each_entry(lru, &list_lrus, list)
9bec5c35 545 memcg_drain_list_lru(lru, src_idx, dst_memcg);
2788cf0c
VD
546 mutex_unlock(&list_lrus_mutex);
547}
60d3fd32
VD
548#else
549static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
550{
551 return 0;
552}
553
554static void memcg_destroy_list_lru(struct list_lru *lru)
555{
556}
84c07d11 557#endif /* CONFIG_MEMCG_KMEM */
60d3fd32
VD
558
559int __list_lru_init(struct list_lru *lru, bool memcg_aware,
c92e8e10 560 struct lock_class_key *key, struct shrinker *shrinker)
a38e4082 561{
3b1d58a4 562 int i;
5ca302c8 563 size_t size = sizeof(*lru->node) * nr_node_ids;
60d3fd32
VD
564 int err = -ENOMEM;
565
c92e8e10
KT
566#ifdef CONFIG_MEMCG_KMEM
567 if (shrinker)
568 lru->shrinker_id = shrinker->id;
569 else
570 lru->shrinker_id = -1;
571#endif
60d3fd32 572 memcg_get_cache_ids();
5ca302c8
GC
573
574 lru->node = kzalloc(size, GFP_KERNEL);
575 if (!lru->node)
60d3fd32 576 goto out;
a38e4082 577
145949a1 578 for_each_node(i) {
3b1d58a4 579 spin_lock_init(&lru->node[i].lock);
449dd698
JW
580 if (key)
581 lockdep_set_class(&lru->node[i].lock, key);
60d3fd32
VD
582 init_one_lru(&lru->node[i].lru);
583 }
584
585 err = memcg_init_list_lru(lru, memcg_aware);
586 if (err) {
587 kfree(lru->node);
1bc11d70
AP
588 /* Do this so a list_lru_destroy() doesn't crash: */
589 lru->node = NULL;
60d3fd32 590 goto out;
3b1d58a4 591 }
60d3fd32 592
c0a5b560 593 list_lru_register(lru);
60d3fd32
VD
594out:
595 memcg_put_cache_ids();
596 return err;
a38e4082 597}
60d3fd32 598EXPORT_SYMBOL_GPL(__list_lru_init);
5ca302c8
GC
599
600void list_lru_destroy(struct list_lru *lru)
601{
c0a5b560
VD
602 /* Already destroyed or not yet initialized? */
603 if (!lru->node)
604 return;
60d3fd32
VD
605
606 memcg_get_cache_ids();
607
c0a5b560 608 list_lru_unregister(lru);
60d3fd32
VD
609
610 memcg_destroy_list_lru(lru);
5ca302c8 611 kfree(lru->node);
c0a5b560 612 lru->node = NULL;
60d3fd32 613
c92e8e10
KT
614#ifdef CONFIG_MEMCG_KMEM
615 lru->shrinker_id = -1;
616#endif
60d3fd32 617 memcg_put_cache_ids();
5ca302c8
GC
618}
619EXPORT_SYMBOL_GPL(list_lru_destroy);