]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - mm/hugetlb_cgroup.c
hugetlb_cgroup: add hugetlb_cgroup reservation counter
[mirror_ubuntu-jammy-kernel.git] / mm / hugetlb_cgroup.c
CommitLineData
2bc64a20
AK
1/*
2 *
3 * Copyright IBM Corporation, 2012
4 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
5 *
faced7e0
GS
6 * Cgroup v2
7 * Copyright (C) 2019 Red Hat, Inc.
8 * Author: Giuseppe Scrivano <gscrivan@redhat.com>
9 *
2bc64a20
AK
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of version 2.1 of the GNU Lesser General Public License
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it would be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
17 *
18 */
19
20#include <linux/cgroup.h>
71f87bee 21#include <linux/page_counter.h>
2bc64a20
AK
22#include <linux/slab.h>
23#include <linux/hugetlb.h>
24#include <linux/hugetlb_cgroup.h>
25
faced7e0
GS
26enum hugetlb_memory_event {
27 HUGETLB_MAX,
28 HUGETLB_NR_MEMORY_EVENTS,
29};
30
2bc64a20
AK
31struct hugetlb_cgroup {
32 struct cgroup_subsys_state css;
faced7e0 33
2bc64a20
AK
34 /*
35 * the counter to account for hugepages from hugetlb.
36 */
71f87bee 37 struct page_counter hugepage[HUGE_MAX_HSTATE];
faced7e0 38
cdc2fcfe
MA
39 /*
40 * the counter to account for hugepage reservations from hugetlb.
41 */
42 struct page_counter rsvd_hugepage[HUGE_MAX_HSTATE];
43
faced7e0
GS
44 atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
45 atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
46
47 /* Handle for "hugetlb.events" */
48 struct cgroup_file events_file[HUGE_MAX_HSTATE];
49
50 /* Handle for "hugetlb.events.local" */
51 struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
2bc64a20
AK
52};
53
abb8206c
AK
54#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
55#define MEMFILE_IDX(val) (((val) >> 16) & 0xffff)
56#define MEMFILE_ATTR(val) ((val) & 0xffff)
57
faced7e0
GS
58#define hugetlb_cgroup_from_counter(counter, idx) \
59 container_of(counter, struct hugetlb_cgroup, hugepage[idx])
60
2bc64a20
AK
61static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
62
cdc2fcfe
MA
63static inline struct page_counter *
64hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
65 bool rsvd)
66{
67 if (rsvd)
68 return &h_cg->rsvd_hugepage[idx];
69 return &h_cg->hugepage[idx];
70}
71
2bc64a20
AK
72static inline
73struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
74{
a7c6d554 75 return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
2bc64a20
AK
76}
77
2bc64a20
AK
78static inline
79struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
80{
073219e9 81 return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
2bc64a20
AK
82}
83
84static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
85{
86 return (h_cg == root_h_cgroup);
87}
88
3f798518
TH
89static inline struct hugetlb_cgroup *
90parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
2bc64a20 91{
5c9d535b 92 return hugetlb_cgroup_from_css(h_cg->css.parent);
2bc64a20
AK
93}
94
3f798518 95static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
2bc64a20
AK
96{
97 int idx;
2bc64a20
AK
98
99 for (idx = 0; idx < hugetlb_max_hstate; idx++) {
71f87bee 100 if (page_counter_read(&h_cg->hugepage[idx]))
2bc64a20
AK
101 return true;
102 }
103 return false;
104}
105
297880f4
DR
106static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
107 struct hugetlb_cgroup *parent_h_cgroup)
108{
109 int idx;
110
111 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
112 struct page_counter *counter = &h_cgroup->hugepage[idx];
113 struct page_counter *parent = NULL;
114 unsigned long limit;
115 int ret;
116
117 if (parent_h_cgroup)
118 parent = &parent_h_cgroup->hugepage[idx];
119 page_counter_init(counter, parent);
120
121 limit = round_down(PAGE_COUNTER_MAX,
122 1 << huge_page_order(&hstates[idx]));
bbec2e15 123 ret = page_counter_set_max(counter, limit);
297880f4
DR
124 VM_BUG_ON(ret);
125 }
126}
127
eb95419b
TH
128static struct cgroup_subsys_state *
129hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
2bc64a20 130{
eb95419b
TH
131 struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
132 struct hugetlb_cgroup *h_cgroup;
2bc64a20
AK
133
134 h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
135 if (!h_cgroup)
136 return ERR_PTR(-ENOMEM);
137
297880f4 138 if (!parent_h_cgroup)
2bc64a20 139 root_h_cgroup = h_cgroup;
297880f4
DR
140
141 hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
2bc64a20
AK
142 return &h_cgroup->css;
143}
144
eb95419b 145static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
2bc64a20
AK
146{
147 struct hugetlb_cgroup *h_cgroup;
148
eb95419b 149 h_cgroup = hugetlb_cgroup_from_css(css);
2bc64a20
AK
150 kfree(h_cgroup);
151}
152
da1def55
AK
153
154/*
155 * Should be called with hugetlb_lock held.
156 * Since we are holding hugetlb_lock, pages cannot get moved from
157 * active list or uncharged from the cgroup, So no need to get
158 * page reference and test for page active here. This function
159 * cannot fail.
160 */
3f798518 161static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
da1def55
AK
162 struct page *page)
163{
71f87bee
JW
164 unsigned int nr_pages;
165 struct page_counter *counter;
da1def55 166 struct hugetlb_cgroup *page_hcg;
3f798518 167 struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
da1def55
AK
168
169 page_hcg = hugetlb_cgroup_from_page(page);
170 /*
171 * We can have pages in active list without any cgroup
172 * ie, hugepage with less than 3 pages. We can safely
173 * ignore those pages.
174 */
175 if (!page_hcg || page_hcg != h_cg)
176 goto out;
177
d8c6546b 178 nr_pages = compound_nr(page);
da1def55
AK
179 if (!parent) {
180 parent = root_h_cgroup;
181 /* root has no limit */
71f87bee 182 page_counter_charge(&parent->hugepage[idx], nr_pages);
da1def55
AK
183 }
184 counter = &h_cg->hugepage[idx];
71f87bee
JW
185 /* Take the pages off the local counter */
186 page_counter_cancel(counter, nr_pages);
da1def55
AK
187
188 set_hugetlb_cgroup(page, parent);
189out:
190 return;
191}
192
193/*
194 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
195 * the parent cgroup.
196 */
eb95419b 197static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
2bc64a20 198{
eb95419b 199 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
da1def55
AK
200 struct hstate *h;
201 struct page *page;
9d093cb1 202 int idx = 0;
da1def55
AK
203
204 do {
da1def55
AK
205 for_each_hstate(h) {
206 spin_lock(&hugetlb_lock);
207 list_for_each_entry(page, &h->hugepage_activelist, lru)
3f798518 208 hugetlb_cgroup_move_parent(idx, h_cg, page);
da1def55
AK
209
210 spin_unlock(&hugetlb_lock);
211 idx++;
212 }
213 cond_resched();
3f798518 214 } while (hugetlb_cgroup_have_usage(h_cg));
2bc64a20
AK
215}
216
faced7e0
GS
217static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
218 enum hugetlb_memory_event event)
219{
220 atomic_long_inc(&hugetlb->events_local[idx][event]);
221 cgroup_file_notify(&hugetlb->events_local_file[idx]);
222
223 do {
224 atomic_long_inc(&hugetlb->events[idx][event]);
225 cgroup_file_notify(&hugetlb->events_file[idx]);
226 } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
227 !hugetlb_cgroup_is_root(hugetlb));
228}
229
6d76dcf4
AK
230int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
231 struct hugetlb_cgroup **ptr)
232{
233 int ret = 0;
71f87bee 234 struct page_counter *counter;
6d76dcf4 235 struct hugetlb_cgroup *h_cg = NULL;
6d76dcf4
AK
236
237 if (hugetlb_cgroup_disabled())
238 goto done;
239 /*
240 * We don't charge any cgroup if the compound page have less
241 * than 3 pages.
242 */
243 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
244 goto done;
245again:
246 rcu_read_lock();
247 h_cg = hugetlb_cgroup_from_task(current);
0362f326 248 if (!css_tryget(&h_cg->css)) {
6d76dcf4
AK
249 rcu_read_unlock();
250 goto again;
251 }
252 rcu_read_unlock();
253
faced7e0
GS
254 if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages,
255 &counter)) {
6071ca52 256 ret = -ENOMEM;
726b7bbe 257 hugetlb_event(h_cg, idx, HUGETLB_MAX);
faced7e0 258 }
6d76dcf4
AK
259 css_put(&h_cg->css);
260done:
261 *ptr = h_cg;
262 return ret;
263}
264
94ae8ba7 265/* Should be called with hugetlb_lock held */
6d76dcf4
AK
266void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
267 struct hugetlb_cgroup *h_cg,
268 struct page *page)
269{
270 if (hugetlb_cgroup_disabled() || !h_cg)
271 return;
272
6d76dcf4 273 set_hugetlb_cgroup(page, h_cg);
6d76dcf4
AK
274 return;
275}
276
277/*
278 * Should be called with hugetlb_lock held
279 */
280void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
281 struct page *page)
282{
283 struct hugetlb_cgroup *h_cg;
6d76dcf4
AK
284
285 if (hugetlb_cgroup_disabled())
286 return;
7ea8574e 287 lockdep_assert_held(&hugetlb_lock);
6d76dcf4
AK
288 h_cg = hugetlb_cgroup_from_page(page);
289 if (unlikely(!h_cg))
290 return;
291 set_hugetlb_cgroup(page, NULL);
71f87bee 292 page_counter_uncharge(&h_cg->hugepage[idx], nr_pages);
6d76dcf4
AK
293 return;
294}
295
296void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
297 struct hugetlb_cgroup *h_cg)
298{
6d76dcf4
AK
299 if (hugetlb_cgroup_disabled() || !h_cg)
300 return;
301
302 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
303 return;
304
71f87bee 305 page_counter_uncharge(&h_cg->hugepage[idx], nr_pages);
6d76dcf4
AK
306 return;
307}
308
71f87bee
JW
309enum {
310 RES_USAGE,
cdc2fcfe 311 RES_RSVD_USAGE,
71f87bee 312 RES_LIMIT,
cdc2fcfe 313 RES_RSVD_LIMIT,
71f87bee 314 RES_MAX_USAGE,
cdc2fcfe 315 RES_RSVD_MAX_USAGE,
71f87bee 316 RES_FAILCNT,
cdc2fcfe 317 RES_RSVD_FAILCNT,
71f87bee
JW
318};
319
716f479d
TH
320static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
321 struct cftype *cft)
abb8206c 322{
71f87bee 323 struct page_counter *counter;
cdc2fcfe 324 struct page_counter *rsvd_counter;
182446d0 325 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
abb8206c 326
71f87bee 327 counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
cdc2fcfe 328 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
abb8206c 329
71f87bee
JW
330 switch (MEMFILE_ATTR(cft->private)) {
331 case RES_USAGE:
332 return (u64)page_counter_read(counter) * PAGE_SIZE;
cdc2fcfe
MA
333 case RES_RSVD_USAGE:
334 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
71f87bee 335 case RES_LIMIT:
bbec2e15 336 return (u64)counter->max * PAGE_SIZE;
cdc2fcfe
MA
337 case RES_RSVD_LIMIT:
338 return (u64)rsvd_counter->max * PAGE_SIZE;
71f87bee
JW
339 case RES_MAX_USAGE:
340 return (u64)counter->watermark * PAGE_SIZE;
cdc2fcfe
MA
341 case RES_RSVD_MAX_USAGE:
342 return (u64)rsvd_counter->watermark * PAGE_SIZE;
71f87bee
JW
343 case RES_FAILCNT:
344 return counter->failcnt;
cdc2fcfe
MA
345 case RES_RSVD_FAILCNT:
346 return rsvd_counter->failcnt;
71f87bee
JW
347 default:
348 BUG();
349 }
abb8206c
AK
350}
351
faced7e0
GS
352static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
353{
354 int idx;
355 u64 val;
356 struct cftype *cft = seq_cft(seq);
357 unsigned long limit;
358 struct page_counter *counter;
359 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
360
361 idx = MEMFILE_IDX(cft->private);
362 counter = &h_cg->hugepage[idx];
363
364 limit = round_down(PAGE_COUNTER_MAX,
365 1 << huge_page_order(&hstates[idx]));
366
367 switch (MEMFILE_ATTR(cft->private)) {
cdc2fcfe
MA
368 case RES_RSVD_USAGE:
369 counter = &h_cg->rsvd_hugepage[idx];
370 /* Fall through. */
faced7e0
GS
371 case RES_USAGE:
372 val = (u64)page_counter_read(counter);
373 seq_printf(seq, "%llu\n", val * PAGE_SIZE);
374 break;
cdc2fcfe
MA
375 case RES_RSVD_LIMIT:
376 counter = &h_cg->rsvd_hugepage[idx];
377 /* Fall through. */
faced7e0
GS
378 case RES_LIMIT:
379 val = (u64)counter->max;
380 if (val == limit)
381 seq_puts(seq, "max\n");
382 else
383 seq_printf(seq, "%llu\n", val * PAGE_SIZE);
384 break;
385 default:
386 BUG();
387 }
388
389 return 0;
390}
391
71f87bee
JW
392static DEFINE_MUTEX(hugetlb_limit_mutex);
393
451af504 394static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
faced7e0
GS
395 char *buf, size_t nbytes, loff_t off,
396 const char *max)
abb8206c 397{
71f87bee
JW
398 int ret, idx;
399 unsigned long nr_pages;
451af504 400 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
cdc2fcfe 401 bool rsvd = false;
abb8206c 402
71f87bee
JW
403 if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
404 return -EINVAL;
405
451af504 406 buf = strstrip(buf);
faced7e0 407 ret = page_counter_memparse(buf, max, &nr_pages);
71f87bee
JW
408 if (ret)
409 return ret;
410
451af504 411 idx = MEMFILE_IDX(of_cft(of)->private);
297880f4 412 nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx]));
abb8206c 413
71f87bee 414 switch (MEMFILE_ATTR(of_cft(of)->private)) {
cdc2fcfe
MA
415 case RES_RSVD_LIMIT:
416 rsvd = true;
417 /* Fall through. */
abb8206c 418 case RES_LIMIT:
71f87bee 419 mutex_lock(&hugetlb_limit_mutex);
cdc2fcfe
MA
420 ret = page_counter_set_max(
421 hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
422 nr_pages);
71f87bee 423 mutex_unlock(&hugetlb_limit_mutex);
abb8206c
AK
424 break;
425 default:
426 ret = -EINVAL;
427 break;
428 }
451af504 429 return ret ?: nbytes;
abb8206c
AK
430}
431
faced7e0
GS
432static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
433 char *buf, size_t nbytes, loff_t off)
434{
435 return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
436}
437
438static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
439 char *buf, size_t nbytes, loff_t off)
440{
441 return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
442}
443
6770c64e
TH
444static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
445 char *buf, size_t nbytes, loff_t off)
abb8206c 446{
71f87bee 447 int ret = 0;
cdc2fcfe 448 struct page_counter *counter, *rsvd_counter;
6770c64e 449 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
abb8206c 450
71f87bee 451 counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
cdc2fcfe 452 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
abb8206c 453
71f87bee 454 switch (MEMFILE_ATTR(of_cft(of)->private)) {
abb8206c 455 case RES_MAX_USAGE:
71f87bee 456 page_counter_reset_watermark(counter);
abb8206c 457 break;
cdc2fcfe
MA
458 case RES_RSVD_MAX_USAGE:
459 page_counter_reset_watermark(rsvd_counter);
460 break;
abb8206c 461 case RES_FAILCNT:
71f87bee 462 counter->failcnt = 0;
abb8206c 463 break;
cdc2fcfe
MA
464 case RES_RSVD_FAILCNT:
465 rsvd_counter->failcnt = 0;
466 break;
abb8206c
AK
467 default:
468 ret = -EINVAL;
469 break;
470 }
6770c64e 471 return ret ?: nbytes;
abb8206c
AK
472}
473
474static char *mem_fmt(char *buf, int size, unsigned long hsize)
475{
476 if (hsize >= (1UL << 30))
477 snprintf(buf, size, "%luGB", hsize >> 30);
478 else if (hsize >= (1UL << 20))
479 snprintf(buf, size, "%luMB", hsize >> 20);
480 else
481 snprintf(buf, size, "%luKB", hsize >> 10);
482 return buf;
483}
484
faced7e0
GS
485static int __hugetlb_events_show(struct seq_file *seq, bool local)
486{
487 int idx;
488 long max;
489 struct cftype *cft = seq_cft(seq);
490 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
491
492 idx = MEMFILE_IDX(cft->private);
493
494 if (local)
495 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
496 else
497 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
498
499 seq_printf(seq, "max %lu\n", max);
500
501 return 0;
502}
503
504static int hugetlb_events_show(struct seq_file *seq, void *v)
505{
506 return __hugetlb_events_show(seq, false);
507}
508
509static int hugetlb_events_local_show(struct seq_file *seq, void *v)
510{
511 return __hugetlb_events_show(seq, true);
512}
513
514static void __init __hugetlb_cgroup_file_dfl_init(int idx)
abb8206c
AK
515{
516 char buf[32];
517 struct cftype *cft;
518 struct hstate *h = &hstates[idx];
519
520 /* format the size */
cdc2fcfe 521 mem_fmt(buf, sizeof(buf), huge_page_size(h));
abb8206c
AK
522
523 /* Add the limit file */
faced7e0
GS
524 cft = &h->cgroup_files_dfl[0];
525 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf);
526 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
527 cft->seq_show = hugetlb_cgroup_read_u64_max;
528 cft->write = hugetlb_cgroup_write_dfl;
529 cft->flags = CFTYPE_NOT_ON_ROOT;
530
cdc2fcfe 531 /* Add the reservation limit file */
faced7e0 532 cft = &h->cgroup_files_dfl[1];
cdc2fcfe
MA
533 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf);
534 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
535 cft->seq_show = hugetlb_cgroup_read_u64_max;
536 cft->write = hugetlb_cgroup_write_dfl;
537 cft->flags = CFTYPE_NOT_ON_ROOT;
538
539 /* Add the current usage file */
540 cft = &h->cgroup_files_dfl[2];
faced7e0
GS
541 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf);
542 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
543 cft->seq_show = hugetlb_cgroup_read_u64_max;
544 cft->flags = CFTYPE_NOT_ON_ROOT;
545
cdc2fcfe
MA
546 /* Add the current reservation usage file */
547 cft = &h->cgroup_files_dfl[3];
548 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf);
549 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
550 cft->seq_show = hugetlb_cgroup_read_u64_max;
551 cft->flags = CFTYPE_NOT_ON_ROOT;
552
faced7e0 553 /* Add the events file */
cdc2fcfe 554 cft = &h->cgroup_files_dfl[4];
faced7e0
GS
555 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf);
556 cft->private = MEMFILE_PRIVATE(idx, 0);
557 cft->seq_show = hugetlb_events_show;
558 cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]),
559 cft->flags = CFTYPE_NOT_ON_ROOT;
560
561 /* Add the events.local file */
cdc2fcfe 562 cft = &h->cgroup_files_dfl[5];
faced7e0
GS
563 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf);
564 cft->private = MEMFILE_PRIVATE(idx, 0);
565 cft->seq_show = hugetlb_events_local_show;
566 cft->file_offset = offsetof(struct hugetlb_cgroup,
567 events_local_file[idx]),
568 cft->flags = CFTYPE_NOT_ON_ROOT;
569
570 /* NULL terminate the last cft */
cdc2fcfe 571 cft = &h->cgroup_files_dfl[6];
faced7e0
GS
572 memset(cft, 0, sizeof(*cft));
573
574 WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
575 h->cgroup_files_dfl));
576}
577
578static void __init __hugetlb_cgroup_file_legacy_init(int idx)
579{
580 char buf[32];
581 struct cftype *cft;
582 struct hstate *h = &hstates[idx];
583
584 /* format the size */
cdc2fcfe 585 mem_fmt(buf, sizeof(buf), huge_page_size(h));
faced7e0
GS
586
587 /* Add the limit file */
588 cft = &h->cgroup_files_legacy[0];
abb8206c
AK
589 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
590 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
716f479d 591 cft->read_u64 = hugetlb_cgroup_read_u64;
faced7e0 592 cft->write = hugetlb_cgroup_write_legacy;
abb8206c 593
cdc2fcfe 594 /* Add the reservation limit file */
faced7e0 595 cft = &h->cgroup_files_legacy[1];
cdc2fcfe
MA
596 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf);
597 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
598 cft->read_u64 = hugetlb_cgroup_read_u64;
599 cft->write = hugetlb_cgroup_write_legacy;
600
601 /* Add the usage file */
602 cft = &h->cgroup_files_legacy[2];
abb8206c
AK
603 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
604 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
716f479d 605 cft->read_u64 = hugetlb_cgroup_read_u64;
abb8206c 606
cdc2fcfe
MA
607 /* Add the reservation usage file */
608 cft = &h->cgroup_files_legacy[3];
609 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf);
610 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
611 cft->read_u64 = hugetlb_cgroup_read_u64;
612
abb8206c 613 /* Add the MAX usage file */
cdc2fcfe 614 cft = &h->cgroup_files_legacy[4];
abb8206c
AK
615 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
616 cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
6770c64e 617 cft->write = hugetlb_cgroup_reset;
716f479d 618 cft->read_u64 = hugetlb_cgroup_read_u64;
abb8206c 619
cdc2fcfe
MA
620 /* Add the MAX reservation usage file */
621 cft = &h->cgroup_files_legacy[5];
622 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf);
623 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE);
624 cft->write = hugetlb_cgroup_reset;
625 cft->read_u64 = hugetlb_cgroup_read_u64;
626
abb8206c 627 /* Add the failcntfile */
cdc2fcfe 628 cft = &h->cgroup_files_legacy[6];
abb8206c 629 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
cdc2fcfe
MA
630 cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
631 cft->write = hugetlb_cgroup_reset;
632 cft->read_u64 = hugetlb_cgroup_read_u64;
633
634 /* Add the reservation failcntfile */
635 cft = &h->cgroup_files_legacy[7];
636 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf);
637 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT);
6770c64e 638 cft->write = hugetlb_cgroup_reset;
716f479d 639 cft->read_u64 = hugetlb_cgroup_read_u64;
abb8206c
AK
640
641 /* NULL terminate the last cft */
cdc2fcfe 642 cft = &h->cgroup_files_legacy[8];
abb8206c
AK
643 memset(cft, 0, sizeof(*cft));
644
2cf669a5 645 WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
faced7e0
GS
646 h->cgroup_files_legacy));
647}
648
649static void __init __hugetlb_cgroup_file_init(int idx)
650{
651 __hugetlb_cgroup_file_dfl_init(idx);
652 __hugetlb_cgroup_file_legacy_init(idx);
7179e7bf
JW
653}
654
655void __init hugetlb_cgroup_file_init(void)
656{
657 struct hstate *h;
658
659 for_each_hstate(h) {
660 /*
661 * Add cgroup control files only if the huge page consists
662 * of more than two normal pages. This is because we use
1d798ca3 663 * page[2].private for storing cgroup details.
7179e7bf
JW
664 */
665 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
666 __hugetlb_cgroup_file_init(hstate_index(h));
667 }
abb8206c
AK
668}
669
75754681
AK
670/*
671 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
672 * when we migrate hugepages
673 */
8e6ac7fa
AK
674void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
675{
676 struct hugetlb_cgroup *h_cg;
94ae8ba7 677 struct hstate *h = page_hstate(oldhpage);
8e6ac7fa
AK
678
679 if (hugetlb_cgroup_disabled())
680 return;
681
309381fe 682 VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
8e6ac7fa
AK
683 spin_lock(&hugetlb_lock);
684 h_cg = hugetlb_cgroup_from_page(oldhpage);
685 set_hugetlb_cgroup(oldhpage, NULL);
8e6ac7fa
AK
686
687 /* move the h_cg details to new cgroup */
688 set_hugetlb_cgroup(newhpage, h_cg);
94ae8ba7 689 list_move(&newhpage->lru, &h->hugepage_activelist);
8e6ac7fa 690 spin_unlock(&hugetlb_lock);
8e6ac7fa
AK
691 return;
692}
693
faced7e0
GS
694static struct cftype hugetlb_files[] = {
695 {} /* terminate */
696};
697
073219e9 698struct cgroup_subsys hugetlb_cgrp_subsys = {
92fb9748
TH
699 .css_alloc = hugetlb_cgroup_css_alloc,
700 .css_offline = hugetlb_cgroup_css_offline,
701 .css_free = hugetlb_cgroup_css_free,
faced7e0
GS
702 .dfl_cftypes = hugetlb_files,
703 .legacy_cftypes = hugetlb_files,
2bc64a20 704};