mm/hugetlb_cgroup.c

   1 /*
   2  *
   3  * Copyright IBM Corporation, 2012
   4  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
   5  *
   6  * Cgroup v2
   7  * Copyright (C) 2019 Red Hat, Inc.
   8  * Author: Giuseppe Scrivano <gscrivan@redhat.com>
   9  *
  10  * This program is free software; you can redistribute it and/or modify it
  11  * under the terms of version 2.1 of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation.
  13  *
  14  * This program is distributed in the hope that it would be useful, but
  15  * WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  17  *
  18  */
  19
  20 #include <linux/cgroup.h>
  21 #include <linux/page_counter.h>
  22 #include <linux/slab.h>
  23 #include <linux/hugetlb.h>
  24 #include <linux/hugetlb_cgroup.h>
  25
  26 #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
  27 #define MEMFILE_IDX(val)        (((val) >> 16) & 0xffff)
  28 #define MEMFILE_ATTR(val)       ((val) & 0xffff)
  29
  30 #define hugetlb_cgroup_from_counter(counter, idx)                   \
  31         container_of(counter, struct hugetlb_cgroup, hugepage[idx])
  32
  33 static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
  34
  35 static inline struct page_counter *
  36 __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
  37                                      bool rsvd)
  38 {
  39         if (rsvd)
  40                 return &h_cg->rsvd_hugepage[idx];
  41         return &h_cg->hugepage[idx];
  42 }
  43
  44 static inline struct page_counter *
  45 hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx)
  46 {
  47         return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false);
  48 }
  49
  50 static inline struct page_counter *
  51 hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx)
  52 {
  53         return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true);
  54 }
  55
  56 static inline
  57 struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
  58 {
  59         return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
  60 }
  61
  62 static inline
  63 struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
  64 {
  65         return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
  66 }
  67
  68 static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
  69 {
  70         return (h_cg == root_h_cgroup);
  71 }
  72
  73 static inline struct hugetlb_cgroup *
  74 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
  75 {
  76         return hugetlb_cgroup_from_css(h_cg->css.parent);
  77 }
  78
  79 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
  80 {
  81         int idx;
  82
  83         for (idx = 0; idx < hugetlb_max_hstate; idx++) {
  84                 if (page_counter_read(
  85                             hugetlb_cgroup_counter_from_cgroup(h_cg, idx)) ||
  86                     page_counter_read(hugetlb_cgroup_counter_from_cgroup_rsvd(
  87                             h_cg, idx))) {
  88                         return true;
  89                 }
  90         }
  91         return false;
  92 }
  93
  94 static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
  95                                 struct hugetlb_cgroup *parent_h_cgroup)
  96 {
  97         int idx;
  98
  99         for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
 100                 struct page_counter *fault_parent = NULL;
 101                 struct page_counter *rsvd_parent = NULL;
 102                 unsigned long limit;
 103                 int ret;
 104
 105                 if (parent_h_cgroup) {
 106                         fault_parent = hugetlb_cgroup_counter_from_cgroup(
 107                                 parent_h_cgroup, idx);
 108                         rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd(
 109                                 parent_h_cgroup, idx);
 110                 }
 111                 page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup,
 112                                                                      idx),
 113                                   fault_parent);
 114                 page_counter_init(
 115                         hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
 116                         rsvd_parent);
 117
 118                 limit = round_down(PAGE_COUNTER_MAX,
 119                                    1 << huge_page_order(&hstates[idx]));
 120
 121                 ret = page_counter_set_max(
 122                         hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx),
 123                         limit);
 124                 VM_BUG_ON(ret);
 125                 ret = page_counter_set_max(
 126                         hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
 127                         limit);
 128                 VM_BUG_ON(ret);
 129         }
 130 }
 131
 132 static struct cgroup_subsys_state *
 133 hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 134 {
 135         struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
 136         struct hugetlb_cgroup *h_cgroup;
 137
 138         h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
 139         if (!h_cgroup)
 140                 return ERR_PTR(-ENOMEM);
 141
 142         if (!parent_h_cgroup)
 143                 root_h_cgroup = h_cgroup;
 144
 145         hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
 146         return &h_cgroup->css;
 147 }
 148
 149 static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
 150 {
 151         struct hugetlb_cgroup *h_cgroup;
 152
 153         h_cgroup = hugetlb_cgroup_from_css(css);
 154         kfree(h_cgroup);
 155 }
 156
 157 /*
 158  * Should be called with hugetlb_lock held.
 159  * Since we are holding hugetlb_lock, pages cannot get moved from
 160  * active list or uncharged from the cgroup, So no need to get
 161  * page reference and test for page active here. This function
 162  * cannot fail.
 163  */
 164 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
 165                                        struct page *page)
 166 {
 167         unsigned int nr_pages;
 168         struct page_counter *counter;
 169         struct hugetlb_cgroup *page_hcg;
 170         struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
 171
 172         page_hcg = hugetlb_cgroup_from_page(page);
 173         /*
 174          * We can have pages in active list without any cgroup
 175          * ie, hugepage with less than 3 pages. We can safely
 176          * ignore those pages.
 177          */
 178         if (!page_hcg || page_hcg != h_cg)
 179                 goto out;
 180
 181         nr_pages = compound_nr(page);
 182         if (!parent) {
 183                 parent = root_h_cgroup;
 184                 /* root has no limit */
 185                 page_counter_charge(&parent->hugepage[idx], nr_pages);
 186         }
 187         counter = &h_cg->hugepage[idx];
 188         /* Take the pages off the local counter */
 189         page_counter_cancel(counter, nr_pages);
 190
 191         set_hugetlb_cgroup(page, parent);
 192 out:
 193         return;
 194 }
 195
 196 /*
 197  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
 198  * the parent cgroup.
 199  */
 200 static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
 201 {
 202         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 203         struct hstate *h;
 204         struct page *page;
 205         int idx = 0;
 206
 207         do {
 208                 for_each_hstate(h) {
 209                         spin_lock(&hugetlb_lock);
 210                         list_for_each_entry(page, &h->hugepage_activelist, lru)
 211                                 hugetlb_cgroup_move_parent(idx, h_cg, page);
 212
 213                         spin_unlock(&hugetlb_lock);
 214                         idx++;
 215                 }
 216                 cond_resched();
 217         } while (hugetlb_cgroup_have_usage(h_cg));
 218 }
 219
 220 static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
 221                                  enum hugetlb_memory_event event)
 222 {
 223         atomic_long_inc(&hugetlb->events_local[idx][event]);
 224         cgroup_file_notify(&hugetlb->events_local_file[idx]);
 225
 226         do {
 227                 atomic_long_inc(&hugetlb->events[idx][event]);
 228                 cgroup_file_notify(&hugetlb->events_file[idx]);
 229         } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
 230                  !hugetlb_cgroup_is_root(hugetlb));
 231 }
 232
 233 static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 234                                           struct hugetlb_cgroup **ptr,
 235                                           bool rsvd)
 236 {
 237         int ret = 0;
 238         struct page_counter *counter;
 239         struct hugetlb_cgroup *h_cg = NULL;
 240
 241         if (hugetlb_cgroup_disabled())
 242                 goto done;
 243         /*
 244          * We don't charge any cgroup if the compound page have less
 245          * than 3 pages.
 246          */
 247         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 248                 goto done;
 249 again:
 250         rcu_read_lock();
 251         h_cg = hugetlb_cgroup_from_task(current);
 252         if (!css_tryget(&h_cg->css)) {
 253                 rcu_read_unlock();
 254                 goto again;
 255         }
 256         rcu_read_unlock();
 257
 258         if (!page_counter_try_charge(
 259                     __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
 260                     nr_pages, &counter)) {
 261                 ret = -ENOMEM;
 262                 hugetlb_event(h_cg, idx, HUGETLB_MAX);
 263                 css_put(&h_cg->css);
 264                 goto done;
 265         }
 266         /* Reservations take a reference to the css because they do not get
 267          * reparented.
 268          */
 269         if (!rsvd)
 270                 css_put(&h_cg->css);
 271 done:
 272         *ptr = h_cg;
 273         return ret;
 274 }
 275
 276 int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 277                                  struct hugetlb_cgroup **ptr)
 278 {
 279         return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false);
 280 }
 281
 282 int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
 283                                       struct hugetlb_cgroup **ptr)
 284 {
 285         return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true);
 286 }
 287
 288 /* Should be called with hugetlb_lock held */
 289 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 290                                            struct hugetlb_cgroup *h_cg,
 291                                            struct page *page, bool rsvd)
 292 {
 293         if (hugetlb_cgroup_disabled() || !h_cg)
 294                 return;
 295
 296         __set_hugetlb_cgroup(page, h_cg, rsvd);
 297         return;
 298 }
 299
 300 void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 301                                   struct hugetlb_cgroup *h_cg,
 302                                   struct page *page)
 303 {
 304         __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false);
 305 }
 306
 307 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
 308                                        struct hugetlb_cgroup *h_cg,
 309                                        struct page *page)
 310 {
 311         __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true);
 312 }
 313
 314 /*
 315  * Should be called with hugetlb_lock held
 316  */
 317 static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 318                                            struct page *page, bool rsvd)
 319 {
 320         struct hugetlb_cgroup *h_cg;
 321
 322         if (hugetlb_cgroup_disabled())
 323                 return;
 324         lockdep_assert_held(&hugetlb_lock);
 325         h_cg = __hugetlb_cgroup_from_page(page, rsvd);
 326         if (unlikely(!h_cg))
 327                 return;
 328         __set_hugetlb_cgroup(page, NULL, rsvd);
 329
 330         page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 331                                                                    rsvd),
 332                               nr_pages);
 333
 334         if (rsvd)
 335                 css_put(&h_cg->css);
 336
 337         return;
 338 }
 339
 340 void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 341                                   struct page *page)
 342 {
 343         __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false);
 344 }
 345
 346 void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
 347                                        struct page *page)
 348 {
 349         __hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true);
 350 }
 351
 352 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 353                                              struct hugetlb_cgroup *h_cg,
 354                                              bool rsvd)
 355 {
 356         if (hugetlb_cgroup_disabled() || !h_cg)
 357                 return;
 358
 359         if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 360                 return;
 361
 362         page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 363                                                                    rsvd),
 364                               nr_pages);
 365
 366         if (rsvd)
 367                 css_put(&h_cg->css);
 368 }
 369
 370 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 371                                     struct hugetlb_cgroup *h_cg)
 372 {
 373         __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false);
 374 }
 375
 376 void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
 377                                          struct hugetlb_cgroup *h_cg)
 378 {
 379         __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true);
 380 }
 381
 382 void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
 383                                      unsigned long end)
 384 {
 385         if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter ||
 386             !resv->css)
 387                 return;
 388
 389         page_counter_uncharge(resv->reservation_counter,
 390                               (end - start) * resv->pages_per_hpage);
 391         css_put(resv->css);
 392 }
 393
 394 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
 395                                          struct file_region *rg,
 396                                          unsigned long nr_pages)
 397 {
 398         if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages)
 399                 return;
 400
 401         if (rg->reservation_counter && resv->pages_per_hpage && nr_pages > 0 &&
 402             !resv->reservation_counter) {
 403                 page_counter_uncharge(rg->reservation_counter,
 404                                       nr_pages * resv->pages_per_hpage);
 405                 css_put(rg->css);
 406         }
 407 }
 408
 409 enum {
 410         RES_USAGE,
 411         RES_RSVD_USAGE,
 412         RES_LIMIT,
 413         RES_RSVD_LIMIT,
 414         RES_MAX_USAGE,
 415         RES_RSVD_MAX_USAGE,
 416         RES_FAILCNT,
 417         RES_RSVD_FAILCNT,
 418 };
 419
 420 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
 421                                    struct cftype *cft)
 422 {
 423         struct page_counter *counter;
 424         struct page_counter *rsvd_counter;
 425         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 426
 427         counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
 428         rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
 429
 430         switch (MEMFILE_ATTR(cft->private)) {
 431         case RES_USAGE:
 432                 return (u64)page_counter_read(counter) * PAGE_SIZE;
 433         case RES_RSVD_USAGE:
 434                 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
 435         case RES_LIMIT:
 436                 return (u64)counter->max * PAGE_SIZE;
 437         case RES_RSVD_LIMIT:
 438                 return (u64)rsvd_counter->max * PAGE_SIZE;
 439         case RES_MAX_USAGE:
 440                 return (u64)counter->watermark * PAGE_SIZE;
 441         case RES_RSVD_MAX_USAGE:
 442                 return (u64)rsvd_counter->watermark * PAGE_SIZE;
 443         case RES_FAILCNT:
 444                 return counter->failcnt;
 445         case RES_RSVD_FAILCNT:
 446                 return rsvd_counter->failcnt;
 447         default:
 448                 BUG();
 449         }
 450 }
 451
 452 static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
 453 {
 454         int idx;
 455         u64 val;
 456         struct cftype *cft = seq_cft(seq);
 457         unsigned long limit;
 458         struct page_counter *counter;
 459         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
 460
 461         idx = MEMFILE_IDX(cft->private);
 462         counter = &h_cg->hugepage[idx];
 463
 464         limit = round_down(PAGE_COUNTER_MAX,
 465                            1 << huge_page_order(&hstates[idx]));
 466
 467         switch (MEMFILE_ATTR(cft->private)) {
 468         case RES_RSVD_USAGE:
 469                 counter = &h_cg->rsvd_hugepage[idx];
 470                 fallthrough;
 471         case RES_USAGE:
 472                 val = (u64)page_counter_read(counter);
 473                 seq_printf(seq, "%llu\n", val * PAGE_SIZE);
 474                 break;
 475         case RES_RSVD_LIMIT:
 476                 counter = &h_cg->rsvd_hugepage[idx];
 477                 fallthrough;
 478         case RES_LIMIT:
 479                 val = (u64)counter->max;
 480                 if (val == limit)
 481                         seq_puts(seq, "max\n");
 482                 else
 483                         seq_printf(seq, "%llu\n", val * PAGE_SIZE);
 484                 break;
 485         default:
 486                 BUG();
 487         }
 488
 489         return 0;
 490 }
 491
 492 static DEFINE_MUTEX(hugetlb_limit_mutex);
 493
 494 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
 495                                     char *buf, size_t nbytes, loff_t off,
 496                                     const char *max)
 497 {
 498         int ret, idx;
 499         unsigned long nr_pages;
 500         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 501         bool rsvd = false;
 502
 503         if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
 504                 return -EINVAL;
 505
 506         buf = strstrip(buf);
 507         ret = page_counter_memparse(buf, max, &nr_pages);
 508         if (ret)
 509                 return ret;
 510
 511         idx = MEMFILE_IDX(of_cft(of)->private);
 512         nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx]));
 513
 514         switch (MEMFILE_ATTR(of_cft(of)->private)) {
 515         case RES_RSVD_LIMIT:
 516                 rsvd = true;
 517                 fallthrough;
 518         case RES_LIMIT:
 519                 mutex_lock(&hugetlb_limit_mutex);
 520                 ret = page_counter_set_max(
 521                         __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
 522                         nr_pages);
 523                 mutex_unlock(&hugetlb_limit_mutex);
 524                 break;
 525         default:
 526                 ret = -EINVAL;
 527                 break;
 528         }
 529         return ret ?: nbytes;
 530 }
 531
 532 static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
 533                                            char *buf, size_t nbytes, loff_t off)
 534 {
 535         return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
 536 }
 537
 538 static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
 539                                         char *buf, size_t nbytes, loff_t off)
 540 {
 541         return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
 542 }
 543
 544 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
 545                                     char *buf, size_t nbytes, loff_t off)
 546 {
 547         int ret = 0;
 548         struct page_counter *counter, *rsvd_counter;
 549         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 550
 551         counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
 552         rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
 553
 554         switch (MEMFILE_ATTR(of_cft(of)->private)) {
 555         case RES_MAX_USAGE:
 556                 page_counter_reset_watermark(counter);
 557                 break;
 558         case RES_RSVD_MAX_USAGE:
 559                 page_counter_reset_watermark(rsvd_counter);
 560                 break;
 561         case RES_FAILCNT:
 562                 counter->failcnt = 0;
 563                 break;
 564         case RES_RSVD_FAILCNT:
 565                 rsvd_counter->failcnt = 0;
 566                 break;
 567         default:
 568                 ret = -EINVAL;
 569                 break;
 570         }
 571         return ret ?: nbytes;
 572 }
 573
 574 static char *mem_fmt(char *buf, int size, unsigned long hsize)
 575 {
 576         if (hsize >= (1UL << 30))
 577                 snprintf(buf, size, "%luGB", hsize >> 30);
 578         else if (hsize >= (1UL << 20))
 579                 snprintf(buf, size, "%luMB", hsize >> 20);
 580         else
 581                 snprintf(buf, size, "%luKB", hsize >> 10);
 582         return buf;
 583 }
 584
 585 static int __hugetlb_events_show(struct seq_file *seq, bool local)
 586 {
 587         int idx;
 588         long max;
 589         struct cftype *cft = seq_cft(seq);
 590         struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
 591
 592         idx = MEMFILE_IDX(cft->private);
 593
 594         if (local)
 595                 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
 596         else
 597                 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
 598
 599         seq_printf(seq, "max %lu\n", max);
 600
 601         return 0;
 602 }
 603
 604 static int hugetlb_events_show(struct seq_file *seq, void *v)
 605 {
 606         return __hugetlb_events_show(seq, false);
 607 }
 608
 609 static int hugetlb_events_local_show(struct seq_file *seq, void *v)
 610 {
 611         return __hugetlb_events_show(seq, true);
 612 }
 613
 614 static void __init __hugetlb_cgroup_file_dfl_init(int idx)
 615 {
 616         char buf[32];
 617         struct cftype *cft;
 618         struct hstate *h = &hstates[idx];
 619
 620         /* format the size */
 621         mem_fmt(buf, sizeof(buf), huge_page_size(h));
 622
 623         /* Add the limit file */
 624         cft = &h->cgroup_files_dfl[0];
 625         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf);
 626         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 627         cft->seq_show = hugetlb_cgroup_read_u64_max;
 628         cft->write = hugetlb_cgroup_write_dfl;
 629         cft->flags = CFTYPE_NOT_ON_ROOT;
 630
 631         /* Add the reservation limit file */
 632         cft = &h->cgroup_files_dfl[1];
 633         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf);
 634         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
 635         cft->seq_show = hugetlb_cgroup_read_u64_max;
 636         cft->write = hugetlb_cgroup_write_dfl;
 637         cft->flags = CFTYPE_NOT_ON_ROOT;
 638
 639         /* Add the current usage file */
 640         cft = &h->cgroup_files_dfl[2];
 641         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf);
 642         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
 643         cft->seq_show = hugetlb_cgroup_read_u64_max;
 644         cft->flags = CFTYPE_NOT_ON_ROOT;
 645
 646         /* Add the current reservation usage file */
 647         cft = &h->cgroup_files_dfl[3];
 648         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf);
 649         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
 650         cft->seq_show = hugetlb_cgroup_read_u64_max;
 651         cft->flags = CFTYPE_NOT_ON_ROOT;
 652
 653         /* Add the events file */
 654         cft = &h->cgroup_files_dfl[4];
 655         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf);
 656         cft->private = MEMFILE_PRIVATE(idx, 0);
 657         cft->seq_show = hugetlb_events_show;
 658         cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]);
 659         cft->flags = CFTYPE_NOT_ON_ROOT;
 660
 661         /* Add the events.local file */
 662         cft = &h->cgroup_files_dfl[5];
 663         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf);
 664         cft->private = MEMFILE_PRIVATE(idx, 0);
 665         cft->seq_show = hugetlb_events_local_show;
 666         cft->file_offset = offsetof(struct hugetlb_cgroup,
 667                                     events_local_file[idx]);
 668         cft->flags = CFTYPE_NOT_ON_ROOT;
 669
 670         /* NULL terminate the last cft */
 671         cft = &h->cgroup_files_dfl[6];
 672         memset(cft, 0, sizeof(*cft));
 673
 674         WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
 675                                        h->cgroup_files_dfl));
 676 }
 677
 678 static void __init __hugetlb_cgroup_file_legacy_init(int idx)
 679 {
 680         char buf[32];
 681         struct cftype *cft;
 682         struct hstate *h = &hstates[idx];
 683
 684         /* format the size */
 685         mem_fmt(buf, sizeof(buf), huge_page_size(h));
 686
 687         /* Add the limit file */
 688         cft = &h->cgroup_files_legacy[0];
 689         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
 690         cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 691         cft->read_u64 = hugetlb_cgroup_read_u64;
 692         cft->write = hugetlb_cgroup_write_legacy;
 693
 694         /* Add the reservation limit file */
 695         cft = &h->cgroup_files_legacy[1];
 696         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf);
 697         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
 698         cft->read_u64 = hugetlb_cgroup_read_u64;
 699         cft->write = hugetlb_cgroup_write_legacy;
 700
 701         /* Add the usage file */
 702         cft = &h->cgroup_files_legacy[2];
 703         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
 704         cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
 705         cft->read_u64 = hugetlb_cgroup_read_u64;
 706
 707         /* Add the reservation usage file */
 708         cft = &h->cgroup_files_legacy[3];
 709         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf);
 710         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
 711         cft->read_u64 = hugetlb_cgroup_read_u64;
 712
 713         /* Add the MAX usage file */
 714         cft = &h->cgroup_files_legacy[4];
 715         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
 716         cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
 717         cft->write = hugetlb_cgroup_reset;
 718         cft->read_u64 = hugetlb_cgroup_read_u64;
 719
 720         /* Add the MAX reservation usage file */
 721         cft = &h->cgroup_files_legacy[5];
 722         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf);
 723         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE);
 724         cft->write = hugetlb_cgroup_reset;
 725         cft->read_u64 = hugetlb_cgroup_read_u64;
 726
 727         /* Add the failcntfile */
 728         cft = &h->cgroup_files_legacy[6];
 729         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
 730         cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
 731         cft->write = hugetlb_cgroup_reset;
 732         cft->read_u64 = hugetlb_cgroup_read_u64;
 733
 734         /* Add the reservation failcntfile */
 735         cft = &h->cgroup_files_legacy[7];
 736         snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf);
 737         cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT);
 738         cft->write = hugetlb_cgroup_reset;
 739         cft->read_u64 = hugetlb_cgroup_read_u64;
 740
 741         /* NULL terminate the last cft */
 742         cft = &h->cgroup_files_legacy[8];
 743         memset(cft, 0, sizeof(*cft));
 744
 745         WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
 746                                           h->cgroup_files_legacy));
 747 }
 748
 749 static void __init __hugetlb_cgroup_file_init(int idx)
 750 {
 751         __hugetlb_cgroup_file_dfl_init(idx);
 752         __hugetlb_cgroup_file_legacy_init(idx);
 753 }
 754
 755 void __init hugetlb_cgroup_file_init(void)
 756 {
 757         struct hstate *h;
 758
 759         for_each_hstate(h) {
 760                 /*
 761                  * Add cgroup control files only if the huge page consists
 762                  * of more than two normal pages. This is because we use
 763                  * page[2].private for storing cgroup details.
 764                  */
 765                 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
 766                         __hugetlb_cgroup_file_init(hstate_index(h));
 767         }
 768 }
 769
 770 /*
 771  * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
 772  * when we migrate hugepages
 773  */
 774 void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
 775 {
 776         struct hugetlb_cgroup *h_cg;
 777         struct hugetlb_cgroup *h_cg_rsvd;
 778         struct hstate *h = page_hstate(oldhpage);
 779
 780         if (hugetlb_cgroup_disabled())
 781                 return;
 782
 783         VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
 784         spin_lock(&hugetlb_lock);
 785         h_cg = hugetlb_cgroup_from_page(oldhpage);
 786         h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage);
 787         set_hugetlb_cgroup(oldhpage, NULL);
 788         set_hugetlb_cgroup_rsvd(oldhpage, NULL);
 789
 790         /* move the h_cg details to new cgroup */
 791         set_hugetlb_cgroup(newhpage, h_cg);
 792         set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd);
 793         list_move(&newhpage->lru, &h->hugepage_activelist);
 794         spin_unlock(&hugetlb_lock);
 795         return;
 796 }
 797
 798 static struct cftype hugetlb_files[] = {
 799         {} /* terminate */
 800 };
 801
 802 struct cgroup_subsys hugetlb_cgrp_subsys = {
 803         .css_alloc      = hugetlb_cgroup_css_alloc,
 804         .css_offline    = hugetlb_cgroup_css_offline,
 805         .css_free       = hugetlb_cgroup_css_free,
 806         .dfl_cftypes    = hugetlb_files,
 807         .legacy_cftypes = hugetlb_files,
 808 };