]>
Commit | Line | Data |
---|---|---|
a38e4082 DC |
1 | /* |
2 | * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. | |
3 | * Authors: David Chinner and Glauber Costa | |
4 | * | |
5 | * Generic LRU infrastructure | |
6 | */ | |
7 | #include <linux/kernel.h> | |
8 | #include <linux/module.h> | |
3b1d58a4 | 9 | #include <linux/mm.h> |
a38e4082 | 10 | #include <linux/list_lru.h> |
5ca302c8 | 11 | #include <linux/slab.h> |
c0a5b560 | 12 | #include <linux/mutex.h> |
60d3fd32 | 13 | #include <linux/memcontrol.h> |
c0a5b560 VD |
14 | |
15 | #ifdef CONFIG_MEMCG_KMEM | |
16 | static LIST_HEAD(list_lrus); | |
17 | static DEFINE_MUTEX(list_lrus_mutex); | |
18 | ||
19 | static void list_lru_register(struct list_lru *lru) | |
20 | { | |
21 | mutex_lock(&list_lrus_mutex); | |
22 | list_add(&lru->list, &list_lrus); | |
23 | mutex_unlock(&list_lrus_mutex); | |
24 | } | |
25 | ||
26 | static void list_lru_unregister(struct list_lru *lru) | |
27 | { | |
28 | mutex_lock(&list_lrus_mutex); | |
29 | list_del(&lru->list); | |
30 | mutex_unlock(&list_lrus_mutex); | |
31 | } | |
32 | #else | |
33 | static void list_lru_register(struct list_lru *lru) | |
34 | { | |
35 | } | |
36 | ||
37 | static void list_lru_unregister(struct list_lru *lru) | |
38 | { | |
39 | } | |
40 | #endif /* CONFIG_MEMCG_KMEM */ | |
a38e4082 | 41 | |
60d3fd32 VD |
42 | #ifdef CONFIG_MEMCG_KMEM |
43 | static inline bool list_lru_memcg_aware(struct list_lru *lru) | |
44 | { | |
45 | return !!lru->node[0].memcg_lrus; | |
46 | } | |
47 | ||
48 | static inline struct list_lru_one * | |
49 | list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) | |
50 | { | |
51 | /* | |
52 | * The lock protects the array of per cgroup lists from relocation | |
53 | * (see memcg_update_list_lru_node). | |
54 | */ | |
55 | lockdep_assert_held(&nlru->lock); | |
56 | if (nlru->memcg_lrus && idx >= 0) | |
57 | return nlru->memcg_lrus->lru[idx]; | |
58 | ||
59 | return &nlru->lru; | |
60 | } | |
61 | ||
62 | static inline struct list_lru_one * | |
63 | list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) | |
64 | { | |
65 | struct mem_cgroup *memcg; | |
66 | ||
67 | if (!nlru->memcg_lrus) | |
68 | return &nlru->lru; | |
69 | ||
70 | memcg = mem_cgroup_from_kmem(ptr); | |
71 | if (!memcg) | |
72 | return &nlru->lru; | |
73 | ||
74 | return list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); | |
75 | } | |
76 | #else | |
77 | static inline bool list_lru_memcg_aware(struct list_lru *lru) | |
78 | { | |
79 | return false; | |
80 | } | |
81 | ||
82 | static inline struct list_lru_one * | |
83 | list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) | |
84 | { | |
85 | return &nlru->lru; | |
86 | } | |
87 | ||
88 | static inline struct list_lru_one * | |
89 | list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) | |
90 | { | |
91 | return &nlru->lru; | |
92 | } | |
93 | #endif /* CONFIG_MEMCG_KMEM */ | |
94 | ||
a38e4082 DC |
95 | bool list_lru_add(struct list_lru *lru, struct list_head *item) |
96 | { | |
3b1d58a4 DC |
97 | int nid = page_to_nid(virt_to_page(item)); |
98 | struct list_lru_node *nlru = &lru->node[nid]; | |
60d3fd32 | 99 | struct list_lru_one *l; |
3b1d58a4 DC |
100 | |
101 | spin_lock(&nlru->lock); | |
60d3fd32 VD |
102 | l = list_lru_from_kmem(nlru, item); |
103 | WARN_ON_ONCE(l->nr_items < 0); | |
a38e4082 | 104 | if (list_empty(item)) { |
60d3fd32 VD |
105 | list_add_tail(item, &l->list); |
106 | l->nr_items++; | |
3b1d58a4 | 107 | spin_unlock(&nlru->lock); |
a38e4082 DC |
108 | return true; |
109 | } | |
3b1d58a4 | 110 | spin_unlock(&nlru->lock); |
a38e4082 DC |
111 | return false; |
112 | } | |
113 | EXPORT_SYMBOL_GPL(list_lru_add); | |
114 | ||
115 | bool list_lru_del(struct list_lru *lru, struct list_head *item) | |
116 | { | |
3b1d58a4 DC |
117 | int nid = page_to_nid(virt_to_page(item)); |
118 | struct list_lru_node *nlru = &lru->node[nid]; | |
60d3fd32 | 119 | struct list_lru_one *l; |
3b1d58a4 DC |
120 | |
121 | spin_lock(&nlru->lock); | |
60d3fd32 | 122 | l = list_lru_from_kmem(nlru, item); |
a38e4082 DC |
123 | if (!list_empty(item)) { |
124 | list_del_init(item); | |
60d3fd32 VD |
125 | l->nr_items--; |
126 | WARN_ON_ONCE(l->nr_items < 0); | |
3b1d58a4 | 127 | spin_unlock(&nlru->lock); |
a38e4082 DC |
128 | return true; |
129 | } | |
3b1d58a4 | 130 | spin_unlock(&nlru->lock); |
a38e4082 DC |
131 | return false; |
132 | } | |
133 | EXPORT_SYMBOL_GPL(list_lru_del); | |
134 | ||
3f97b163 VD |
135 | void list_lru_isolate(struct list_lru_one *list, struct list_head *item) |
136 | { | |
137 | list_del_init(item); | |
138 | list->nr_items--; | |
139 | } | |
140 | EXPORT_SYMBOL_GPL(list_lru_isolate); | |
141 | ||
142 | void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, | |
143 | struct list_head *head) | |
144 | { | |
145 | list_move(item, head); | |
146 | list->nr_items--; | |
147 | } | |
148 | EXPORT_SYMBOL_GPL(list_lru_isolate_move); | |
149 | ||
60d3fd32 VD |
150 | static unsigned long __list_lru_count_one(struct list_lru *lru, |
151 | int nid, int memcg_idx) | |
a38e4082 | 152 | { |
6a4f496f | 153 | struct list_lru_node *nlru = &lru->node[nid]; |
60d3fd32 VD |
154 | struct list_lru_one *l; |
155 | unsigned long count; | |
3b1d58a4 | 156 | |
6a4f496f | 157 | spin_lock(&nlru->lock); |
60d3fd32 VD |
158 | l = list_lru_from_memcg_idx(nlru, memcg_idx); |
159 | WARN_ON_ONCE(l->nr_items < 0); | |
160 | count = l->nr_items; | |
6a4f496f | 161 | spin_unlock(&nlru->lock); |
3b1d58a4 DC |
162 | |
163 | return count; | |
164 | } | |
60d3fd32 VD |
165 | |
166 | unsigned long list_lru_count_one(struct list_lru *lru, | |
167 | int nid, struct mem_cgroup *memcg) | |
168 | { | |
169 | return __list_lru_count_one(lru, nid, memcg_cache_id(memcg)); | |
170 | } | |
171 | EXPORT_SYMBOL_GPL(list_lru_count_one); | |
172 | ||
173 | unsigned long list_lru_count_node(struct list_lru *lru, int nid) | |
174 | { | |
175 | long count = 0; | |
176 | int memcg_idx; | |
177 | ||
178 | count += __list_lru_count_one(lru, nid, -1); | |
179 | if (list_lru_memcg_aware(lru)) { | |
180 | for_each_memcg_cache_index(memcg_idx) | |
181 | count += __list_lru_count_one(lru, nid, memcg_idx); | |
182 | } | |
183 | return count; | |
184 | } | |
6a4f496f | 185 | EXPORT_SYMBOL_GPL(list_lru_count_node); |
3b1d58a4 | 186 | |
60d3fd32 VD |
187 | static unsigned long |
188 | __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, | |
189 | list_lru_walk_cb isolate, void *cb_arg, | |
190 | unsigned long *nr_to_walk) | |
3b1d58a4 DC |
191 | { |
192 | ||
60d3fd32 VD |
193 | struct list_lru_node *nlru = &lru->node[nid]; |
194 | struct list_lru_one *l; | |
a38e4082 | 195 | struct list_head *item, *n; |
3b1d58a4 | 196 | unsigned long isolated = 0; |
a38e4082 | 197 | |
3b1d58a4 | 198 | spin_lock(&nlru->lock); |
60d3fd32 | 199 | l = list_lru_from_memcg_idx(nlru, memcg_idx); |
a38e4082 | 200 | restart: |
60d3fd32 | 201 | list_for_each_safe(item, n, &l->list) { |
a38e4082 | 202 | enum lru_status ret; |
5cedf721 DC |
203 | |
204 | /* | |
205 | * decrement nr_to_walk first so that we don't livelock if we | |
206 | * get stuck on large numbesr of LRU_RETRY items | |
207 | */ | |
c56b097a | 208 | if (!*nr_to_walk) |
5cedf721 | 209 | break; |
c56b097a | 210 | --*nr_to_walk; |
5cedf721 | 211 | |
3f97b163 | 212 | ret = isolate(item, l, &nlru->lock, cb_arg); |
a38e4082 | 213 | switch (ret) { |
449dd698 JW |
214 | case LRU_REMOVED_RETRY: |
215 | assert_spin_locked(&nlru->lock); | |
a38e4082 | 216 | case LRU_REMOVED: |
3b1d58a4 | 217 | isolated++; |
449dd698 JW |
218 | /* |
219 | * If the lru lock has been dropped, our list | |
220 | * traversal is now invalid and so we have to | |
221 | * restart from scratch. | |
222 | */ | |
223 | if (ret == LRU_REMOVED_RETRY) | |
224 | goto restart; | |
a38e4082 DC |
225 | break; |
226 | case LRU_ROTATE: | |
60d3fd32 | 227 | list_move_tail(item, &l->list); |
a38e4082 DC |
228 | break; |
229 | case LRU_SKIP: | |
230 | break; | |
231 | case LRU_RETRY: | |
5cedf721 DC |
232 | /* |
233 | * The lru lock has been dropped, our list traversal is | |
234 | * now invalid and so we have to restart from scratch. | |
235 | */ | |
449dd698 | 236 | assert_spin_locked(&nlru->lock); |
a38e4082 DC |
237 | goto restart; |
238 | default: | |
239 | BUG(); | |
240 | } | |
a38e4082 | 241 | } |
3b1d58a4 DC |
242 | |
243 | spin_unlock(&nlru->lock); | |
244 | return isolated; | |
245 | } | |
60d3fd32 VD |
246 | |
247 | unsigned long | |
248 | list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, | |
249 | list_lru_walk_cb isolate, void *cb_arg, | |
250 | unsigned long *nr_to_walk) | |
251 | { | |
252 | return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), | |
253 | isolate, cb_arg, nr_to_walk); | |
254 | } | |
255 | EXPORT_SYMBOL_GPL(list_lru_walk_one); | |
256 | ||
257 | unsigned long list_lru_walk_node(struct list_lru *lru, int nid, | |
258 | list_lru_walk_cb isolate, void *cb_arg, | |
259 | unsigned long *nr_to_walk) | |
260 | { | |
261 | long isolated = 0; | |
262 | int memcg_idx; | |
263 | ||
264 | isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg, | |
265 | nr_to_walk); | |
266 | if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { | |
267 | for_each_memcg_cache_index(memcg_idx) { | |
268 | isolated += __list_lru_walk_one(lru, nid, memcg_idx, | |
269 | isolate, cb_arg, nr_to_walk); | |
270 | if (*nr_to_walk <= 0) | |
271 | break; | |
272 | } | |
273 | } | |
274 | return isolated; | |
275 | } | |
3b1d58a4 DC |
276 | EXPORT_SYMBOL_GPL(list_lru_walk_node); |
277 | ||
60d3fd32 VD |
278 | static void init_one_lru(struct list_lru_one *l) |
279 | { | |
280 | INIT_LIST_HEAD(&l->list); | |
281 | l->nr_items = 0; | |
282 | } | |
283 | ||
284 | #ifdef CONFIG_MEMCG_KMEM | |
285 | static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, | |
286 | int begin, int end) | |
287 | { | |
288 | int i; | |
289 | ||
290 | for (i = begin; i < end; i++) | |
291 | kfree(memcg_lrus->lru[i]); | |
292 | } | |
293 | ||
294 | static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, | |
295 | int begin, int end) | |
296 | { | |
297 | int i; | |
298 | ||
299 | for (i = begin; i < end; i++) { | |
300 | struct list_lru_one *l; | |
301 | ||
302 | l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); | |
303 | if (!l) | |
304 | goto fail; | |
305 | ||
306 | init_one_lru(l); | |
307 | memcg_lrus->lru[i] = l; | |
308 | } | |
309 | return 0; | |
310 | fail: | |
311 | __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); | |
312 | return -ENOMEM; | |
313 | } | |
314 | ||
315 | static int memcg_init_list_lru_node(struct list_lru_node *nlru) | |
316 | { | |
317 | int size = memcg_nr_cache_ids; | |
318 | ||
319 | nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL); | |
320 | if (!nlru->memcg_lrus) | |
321 | return -ENOMEM; | |
322 | ||
323 | if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { | |
324 | kfree(nlru->memcg_lrus); | |
325 | return -ENOMEM; | |
326 | } | |
327 | ||
328 | return 0; | |
329 | } | |
330 | ||
331 | static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) | |
332 | { | |
333 | __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); | |
334 | kfree(nlru->memcg_lrus); | |
335 | } | |
336 | ||
337 | static int memcg_update_list_lru_node(struct list_lru_node *nlru, | |
338 | int old_size, int new_size) | |
339 | { | |
340 | struct list_lru_memcg *old, *new; | |
341 | ||
342 | BUG_ON(old_size > new_size); | |
343 | ||
344 | old = nlru->memcg_lrus; | |
345 | new = kmalloc(new_size * sizeof(void *), GFP_KERNEL); | |
346 | if (!new) | |
347 | return -ENOMEM; | |
348 | ||
349 | if (__memcg_init_list_lru_node(new, old_size, new_size)) { | |
350 | kfree(new); | |
351 | return -ENOMEM; | |
352 | } | |
353 | ||
354 | memcpy(new, old, old_size * sizeof(void *)); | |
355 | ||
356 | /* | |
357 | * The lock guarantees that we won't race with a reader | |
358 | * (see list_lru_from_memcg_idx). | |
359 | * | |
360 | * Since list_lru_{add,del} may be called under an IRQ-safe lock, | |
361 | * we have to use IRQ-safe primitives here to avoid deadlock. | |
362 | */ | |
363 | spin_lock_irq(&nlru->lock); | |
364 | nlru->memcg_lrus = new; | |
365 | spin_unlock_irq(&nlru->lock); | |
366 | ||
367 | kfree(old); | |
368 | return 0; | |
369 | } | |
370 | ||
371 | static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, | |
372 | int old_size, int new_size) | |
373 | { | |
374 | /* do not bother shrinking the array back to the old size, because we | |
375 | * cannot handle allocation failures here */ | |
376 | __memcg_destroy_list_lru_node(nlru->memcg_lrus, old_size, new_size); | |
377 | } | |
378 | ||
379 | static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) | |
380 | { | |
381 | int i; | |
382 | ||
383 | for (i = 0; i < nr_node_ids; i++) { | |
384 | if (!memcg_aware) | |
385 | lru->node[i].memcg_lrus = NULL; | |
386 | else if (memcg_init_list_lru_node(&lru->node[i])) | |
387 | goto fail; | |
388 | } | |
389 | return 0; | |
390 | fail: | |
391 | for (i = i - 1; i >= 0; i--) | |
392 | memcg_destroy_list_lru_node(&lru->node[i]); | |
393 | return -ENOMEM; | |
394 | } | |
395 | ||
396 | static void memcg_destroy_list_lru(struct list_lru *lru) | |
397 | { | |
398 | int i; | |
399 | ||
400 | if (!list_lru_memcg_aware(lru)) | |
401 | return; | |
402 | ||
403 | for (i = 0; i < nr_node_ids; i++) | |
404 | memcg_destroy_list_lru_node(&lru->node[i]); | |
405 | } | |
406 | ||
407 | static int memcg_update_list_lru(struct list_lru *lru, | |
408 | int old_size, int new_size) | |
409 | { | |
410 | int i; | |
411 | ||
412 | if (!list_lru_memcg_aware(lru)) | |
413 | return 0; | |
414 | ||
415 | for (i = 0; i < nr_node_ids; i++) { | |
416 | if (memcg_update_list_lru_node(&lru->node[i], | |
417 | old_size, new_size)) | |
418 | goto fail; | |
419 | } | |
420 | return 0; | |
421 | fail: | |
422 | for (i = i - 1; i >= 0; i--) | |
423 | memcg_cancel_update_list_lru_node(&lru->node[i], | |
424 | old_size, new_size); | |
425 | return -ENOMEM; | |
426 | } | |
427 | ||
428 | static void memcg_cancel_update_list_lru(struct list_lru *lru, | |
429 | int old_size, int new_size) | |
430 | { | |
431 | int i; | |
432 | ||
433 | if (!list_lru_memcg_aware(lru)) | |
434 | return; | |
435 | ||
436 | for (i = 0; i < nr_node_ids; i++) | |
437 | memcg_cancel_update_list_lru_node(&lru->node[i], | |
438 | old_size, new_size); | |
439 | } | |
440 | ||
441 | int memcg_update_all_list_lrus(int new_size) | |
442 | { | |
443 | int ret = 0; | |
444 | struct list_lru *lru; | |
445 | int old_size = memcg_nr_cache_ids; | |
446 | ||
447 | mutex_lock(&list_lrus_mutex); | |
448 | list_for_each_entry(lru, &list_lrus, list) { | |
449 | ret = memcg_update_list_lru(lru, old_size, new_size); | |
450 | if (ret) | |
451 | goto fail; | |
452 | } | |
453 | out: | |
454 | mutex_unlock(&list_lrus_mutex); | |
455 | return ret; | |
456 | fail: | |
457 | list_for_each_entry_continue_reverse(lru, &list_lrus, list) | |
458 | memcg_cancel_update_list_lru(lru, old_size, new_size); | |
459 | goto out; | |
460 | } | |
461 | #else | |
462 | static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) | |
463 | { | |
464 | return 0; | |
465 | } | |
466 | ||
467 | static void memcg_destroy_list_lru(struct list_lru *lru) | |
468 | { | |
469 | } | |
470 | #endif /* CONFIG_MEMCG_KMEM */ | |
471 | ||
472 | int __list_lru_init(struct list_lru *lru, bool memcg_aware, | |
473 | struct lock_class_key *key) | |
a38e4082 | 474 | { |
3b1d58a4 | 475 | int i; |
5ca302c8 | 476 | size_t size = sizeof(*lru->node) * nr_node_ids; |
60d3fd32 VD |
477 | int err = -ENOMEM; |
478 | ||
479 | memcg_get_cache_ids(); | |
5ca302c8 GC |
480 | |
481 | lru->node = kzalloc(size, GFP_KERNEL); | |
482 | if (!lru->node) | |
60d3fd32 | 483 | goto out; |
a38e4082 | 484 | |
5ca302c8 | 485 | for (i = 0; i < nr_node_ids; i++) { |
3b1d58a4 | 486 | spin_lock_init(&lru->node[i].lock); |
449dd698 JW |
487 | if (key) |
488 | lockdep_set_class(&lru->node[i].lock, key); | |
60d3fd32 VD |
489 | init_one_lru(&lru->node[i].lru); |
490 | } | |
491 | ||
492 | err = memcg_init_list_lru(lru, memcg_aware); | |
493 | if (err) { | |
494 | kfree(lru->node); | |
495 | goto out; | |
3b1d58a4 | 496 | } |
60d3fd32 | 497 | |
c0a5b560 | 498 | list_lru_register(lru); |
60d3fd32 VD |
499 | out: |
500 | memcg_put_cache_ids(); | |
501 | return err; | |
a38e4082 | 502 | } |
60d3fd32 | 503 | EXPORT_SYMBOL_GPL(__list_lru_init); |
5ca302c8 GC |
504 | |
505 | void list_lru_destroy(struct list_lru *lru) | |
506 | { | |
c0a5b560 VD |
507 | /* Already destroyed or not yet initialized? */ |
508 | if (!lru->node) | |
509 | return; | |
60d3fd32 VD |
510 | |
511 | memcg_get_cache_ids(); | |
512 | ||
c0a5b560 | 513 | list_lru_unregister(lru); |
60d3fd32 VD |
514 | |
515 | memcg_destroy_list_lru(lru); | |
5ca302c8 | 516 | kfree(lru->node); |
c0a5b560 | 517 | lru->node = NULL; |
60d3fd32 VD |
518 | |
519 | memcg_put_cache_ids(); | |
5ca302c8 GC |
520 | } |
521 | EXPORT_SYMBOL_GPL(list_lru_destroy); |