arch/x86/kernel/cpu/intel_rdt_rdtgroup.c

   1 /*
   2  * User interface for Resource Alloction in Resource Director Technology(RDT)
   3  *
   4  * Copyright (C) 2016 Intel Corporation
   5  *
   6  * Author: Fenghua Yu <fenghua.yu@intel.com>
   7  *
   8  * This program is free software; you can redistribute it and/or modify it
   9  * under the terms and conditions of the GNU General Public License,
  10  * version 2, as published by the Free Software Foundation.
  11  *
  12  * This program is distributed in the hope it will be useful, but WITHOUT
  13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15  * more details.
  16  *
  17  * More information about RDT be found in the Intel (R) x86 Architecture
  18  * Software Developer Manual.
  19  */
  20
  21 #define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
  22
  23 #include <linux/cpu.h>
  24 #include <linux/fs.h>
  25 #include <linux/sysfs.h>
  26 #include <linux/kernfs.h>
  27 #include <linux/seq_file.h>
  28 #include <linux/sched/signal.h>
  29 #include <linux/sched/task.h>
  30 #include <linux/slab.h>
  31 #include <linux/cpu.h>
  32 #include <linux/task_work.h>
  33
  34 #include <uapi/linux/magic.h>
  35
  36 #include <asm/intel_rdt.h>
  37 #include <asm/intel_rdt_common.h>
  38
  39 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
  40 struct kernfs_root *rdt_root;
  41 struct rdtgroup rdtgroup_default;
  42 LIST_HEAD(rdt_all_groups);
  43
  44 /* Kernel fs node for "info" directory under root */
  45 static struct kernfs_node *kn_info;
  46
  47 /*
  48  * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  49  * we can keep a bitmap of free CLOSIDs in a single integer.
  50  *
  51  * Using a global CLOSID across all resources has some advantages and
  52  * some drawbacks:
  53  * + We can simply set "current->closid" to assign a task to a resource
  54  *   group.
  55  * + Context switch code can avoid extra memory references deciding which
  56  *   CLOSID to load into the PQR_ASSOC MSR
  57  * - We give up some options in configuring resource groups across multi-socket
  58  *   systems.
  59  * - Our choices on how to configure each resource become progressively more
  60  *   limited as the number of resources grows.
  61  */
  62 static int closid_free_map;
  63
  64 static void closid_init(void)
  65 {
  66         struct rdt_resource *r;
  67         int rdt_min_closid = 32;
  68
  69         /* Compute rdt_min_closid across all resources */
  70         for_each_enabled_rdt_resource(r)
  71                 rdt_min_closid = min(rdt_min_closid, r->num_closid);
  72
  73         closid_free_map = BIT_MASK(rdt_min_closid) - 1;
  74
  75         /* CLOSID 0 is always reserved for the default group */
  76         closid_free_map &= ~1;
  77 }
  78
  79 int closid_alloc(void)
  80 {
  81         int closid = ffs(closid_free_map);
  82
  83         if (closid == 0)
  84                 return -ENOSPC;
  85         closid--;
  86         closid_free_map &= ~(1 << closid);
  87
  88         return closid;
  89 }
  90
  91 static void closid_free(int closid)
  92 {
  93         closid_free_map |= 1 << closid;
  94 }
  95
  96 /* set uid and gid of rdtgroup dirs and files to that of the creator */
  97 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
  98 {
  99         struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
 100                                 .ia_uid = current_fsuid(),
 101                                 .ia_gid = current_fsgid(), };
 102
 103         if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
 104             gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
 105                 return 0;
 106
 107         return kernfs_setattr(kn, &iattr);
 108 }
 109
 110 static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
 111 {
 112         struct kernfs_node *kn;
 113         int ret;
 114
 115         kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
 116                                   0, rft->kf_ops, rft, NULL, NULL);
 117         if (IS_ERR(kn))
 118                 return PTR_ERR(kn);
 119
 120         ret = rdtgroup_kn_set_ugid(kn);
 121         if (ret) {
 122                 kernfs_remove(kn);
 123                 return ret;
 124         }
 125
 126         return 0;
 127 }
 128
 129 static int rdtgroup_add_files(struct kernfs_node *kn, struct rftype *rfts,
 130                               int len)
 131 {
 132         struct rftype *rft;
 133         int ret;
 134
 135         lockdep_assert_held(&rdtgroup_mutex);
 136
 137         for (rft = rfts; rft < rfts + len; rft++) {
 138                 ret = rdtgroup_add_file(kn, rft);
 139                 if (ret)
 140                         goto error;
 141         }
 142
 143         return 0;
 144 error:
 145         pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
 146         while (--rft >= rfts)
 147                 kernfs_remove_by_name(kn, rft->name);
 148         return ret;
 149 }
 150
 151 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
 152 {
 153         struct kernfs_open_file *of = m->private;
 154         struct rftype *rft = of->kn->priv;
 155
 156         if (rft->seq_show)
 157                 return rft->seq_show(of, m, arg);
 158         return 0;
 159 }
 160
 161 static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
 162                                    size_t nbytes, loff_t off)
 163 {
 164         struct rftype *rft = of->kn->priv;
 165
 166         if (rft->write)
 167                 return rft->write(of, buf, nbytes, off);
 168
 169         return -EINVAL;
 170 }
 171
 172 static struct kernfs_ops rdtgroup_kf_single_ops = {
 173         .atomic_write_len       = PAGE_SIZE,
 174         .write                  = rdtgroup_file_write,
 175         .seq_show               = rdtgroup_seqfile_show,
 176 };
 177
 178 static int rdtgroup_cpus_show(struct kernfs_open_file *of,
 179                               struct seq_file *s, void *v)
 180 {
 181         struct rdtgroup *rdtgrp;
 182         int ret = 0;
 183
 184         rdtgrp = rdtgroup_kn_lock_live(of->kn);
 185
 186         if (rdtgrp)
 187                 seq_printf(s, "%*pb\n", cpumask_pr_args(&rdtgrp->cpu_mask));
 188         else
 189                 ret = -ENOENT;
 190         rdtgroup_kn_unlock(of->kn);
 191
 192         return ret;
 193 }
 194
 195 /*
 196  * This is safe against intel_rdt_sched_in() called from __switch_to()
 197  * because __switch_to() is executed with interrupts disabled. A local call
 198  * from rdt_update_closid() is proteced against __switch_to() because
 199  * preemption is disabled.
 200  */
 201 static void rdt_update_cpu_closid(void *closid)
 202 {
 203         if (closid)
 204                 this_cpu_write(cpu_closid, *(int *)closid);
 205         /*
 206          * We cannot unconditionally write the MSR because the current
 207          * executing task might have its own closid selected. Just reuse
 208          * the context switch code.
 209          */
 210         intel_rdt_sched_in();
 211 }
 212
 213 /*
 214  * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
 215  *
 216  * Per task closids must have been set up before calling this function.
 217  *
 218  * The per cpu closids are updated with the smp function call, when @closid
 219  * is not NULL. If @closid is NULL then all affected percpu closids must
 220  * have been set up before calling this function.
 221  */
 222 static void
 223 rdt_update_closid(const struct cpumask *cpu_mask, int *closid)
 224 {
 225         int cpu = get_cpu();
 226
 227         if (cpumask_test_cpu(cpu, cpu_mask))
 228                 rdt_update_cpu_closid(closid);
 229         smp_call_function_many(cpu_mask, rdt_update_cpu_closid, closid, 1);
 230         put_cpu();
 231 }
 232
 233 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 234                                    char *buf, size_t nbytes, loff_t off)
 235 {
 236         cpumask_var_t tmpmask, newmask;
 237         struct rdtgroup *rdtgrp, *r;
 238         int ret;
 239
 240         if (!buf)
 241                 return -EINVAL;
 242
 243         if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 244                 return -ENOMEM;
 245         if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
 246                 free_cpumask_var(tmpmask);
 247                 return -ENOMEM;
 248         }
 249
 250         rdtgrp = rdtgroup_kn_lock_live(of->kn);
 251         if (!rdtgrp) {
 252                 ret = -ENOENT;
 253                 goto unlock;
 254         }
 255
 256         ret = cpumask_parse(buf, newmask);
 257         if (ret)
 258                 goto unlock;
 259
 260         /* check that user didn't specify any offline cpus */
 261         cpumask_andnot(tmpmask, newmask, cpu_online_mask);
 262         if (cpumask_weight(tmpmask)) {
 263                 ret = -EINVAL;
 264                 goto unlock;
 265         }
 266
 267         /* Check whether cpus are dropped from this group */
 268         cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 269         if (cpumask_weight(tmpmask)) {
 270                 /* Can't drop from default group */
 271                 if (rdtgrp == &rdtgroup_default) {
 272                         ret = -EINVAL;
 273                         goto unlock;
 274                 }
 275                 /* Give any dropped cpus to rdtgroup_default */
 276                 cpumask_or(&rdtgroup_default.cpu_mask,
 277                            &rdtgroup_default.cpu_mask, tmpmask);
 278                 rdt_update_closid(tmpmask, &rdtgroup_default.closid);
 279         }
 280
 281         /*
 282          * If we added cpus, remove them from previous group that owned them
 283          * and update per-cpu closid
 284          */
 285         cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
 286         if (cpumask_weight(tmpmask)) {
 287                 list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
 288                         if (r == rdtgrp)
 289                                 continue;
 290                         cpumask_andnot(&r->cpu_mask, &r->cpu_mask, tmpmask);
 291                 }
 292                 rdt_update_closid(tmpmask, &rdtgrp->closid);
 293         }
 294
 295         /* Done pushing/pulling - update this group with new mask */
 296         cpumask_copy(&rdtgrp->cpu_mask, newmask);
 297
 298 unlock:
 299         rdtgroup_kn_unlock(of->kn);
 300         free_cpumask_var(tmpmask);
 301         free_cpumask_var(newmask);
 302
 303         return ret ?: nbytes;
 304 }
 305
 306 struct task_move_callback {
 307         struct callback_head    work;
 308         struct rdtgroup         *rdtgrp;
 309 };
 310
 311 static void move_myself(struct callback_head *head)
 312 {
 313         struct task_move_callback *callback;
 314         struct rdtgroup *rdtgrp;
 315
 316         callback = container_of(head, struct task_move_callback, work);
 317         rdtgrp = callback->rdtgrp;
 318
 319         /*
 320          * If resource group was deleted before this task work callback
 321          * was invoked, then assign the task to root group and free the
 322          * resource group.
 323          */
 324         if (atomic_dec_and_test(&rdtgrp->waitcount) &&
 325             (rdtgrp->flags & RDT_DELETED)) {
 326                 current->closid = 0;
 327                 kfree(rdtgrp);
 328         }
 329
 330         preempt_disable();
 331         /* update PQR_ASSOC MSR to make resource group go into effect */
 332         intel_rdt_sched_in();
 333         preempt_enable();
 334
 335         kfree(callback);
 336 }
 337
 338 static int __rdtgroup_move_task(struct task_struct *tsk,
 339                                 struct rdtgroup *rdtgrp)
 340 {
 341         struct task_move_callback *callback;
 342         int ret;
 343
 344         callback = kzalloc(sizeof(*callback), GFP_KERNEL);
 345         if (!callback)
 346                 return -ENOMEM;
 347         callback->work.func = move_myself;
 348         callback->rdtgrp = rdtgrp;
 349
 350         /*
 351          * Take a refcount, so rdtgrp cannot be freed before the
 352          * callback has been invoked.
 353          */
 354         atomic_inc(&rdtgrp->waitcount);
 355         ret = task_work_add(tsk, &callback->work, true);
 356         if (ret) {
 357                 /*
 358                  * Task is exiting. Drop the refcount and free the callback.
 359                  * No need to check the refcount as the group cannot be
 360                  * deleted before the write function unlocks rdtgroup_mutex.
 361                  */
 362                 atomic_dec(&rdtgrp->waitcount);
 363                 kfree(callback);
 364         } else {
 365                 tsk->closid = rdtgrp->closid;
 366         }
 367         return ret;
 368 }
 369
 370 static int rdtgroup_task_write_permission(struct task_struct *task,
 371                                           struct kernfs_open_file *of)
 372 {
 373         const struct cred *tcred = get_task_cred(task);
 374         const struct cred *cred = current_cred();
 375         int ret = 0;
 376
 377         /*
 378          * Even if we're attaching all tasks in the thread group, we only
 379          * need to check permissions on one of them.
 380          */
 381         if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 382             !uid_eq(cred->euid, tcred->uid) &&
 383             !uid_eq(cred->euid, tcred->suid))
 384                 ret = -EPERM;
 385
 386         put_cred(tcred);
 387         return ret;
 388 }
 389
 390 static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
 391                               struct kernfs_open_file *of)
 392 {
 393         struct task_struct *tsk;
 394         int ret;
 395
 396         rcu_read_lock();
 397         if (pid) {
 398                 tsk = find_task_by_vpid(pid);
 399                 if (!tsk) {
 400                         rcu_read_unlock();
 401                         return -ESRCH;
 402                 }
 403         } else {
 404                 tsk = current;
 405         }
 406
 407         get_task_struct(tsk);
 408         rcu_read_unlock();
 409
 410         ret = rdtgroup_task_write_permission(tsk, of);
 411         if (!ret)
 412                 ret = __rdtgroup_move_task(tsk, rdtgrp);
 413
 414         put_task_struct(tsk);
 415         return ret;
 416 }
 417
 418 static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
 419                                     char *buf, size_t nbytes, loff_t off)
 420 {
 421         struct rdtgroup *rdtgrp;
 422         int ret = 0;
 423         pid_t pid;
 424
 425         if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
 426                 return -EINVAL;
 427         rdtgrp = rdtgroup_kn_lock_live(of->kn);
 428
 429         if (rdtgrp)
 430                 ret = rdtgroup_move_task(pid, rdtgrp, of);
 431         else
 432                 ret = -ENOENT;
 433
 434         rdtgroup_kn_unlock(of->kn);
 435
 436         return ret ?: nbytes;
 437 }
 438
 439 static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
 440 {
 441         struct task_struct *p, *t;
 442
 443         rcu_read_lock();
 444         for_each_process_thread(p, t) {
 445                 if (t->closid == r->closid)
 446                         seq_printf(s, "%d\n", t->pid);
 447         }
 448         rcu_read_unlock();
 449 }
 450
 451 static int rdtgroup_tasks_show(struct kernfs_open_file *of,
 452                                struct seq_file *s, void *v)
 453 {
 454         struct rdtgroup *rdtgrp;
 455         int ret = 0;
 456
 457         rdtgrp = rdtgroup_kn_lock_live(of->kn);
 458         if (rdtgrp)
 459                 show_rdt_tasks(rdtgrp, s);
 460         else
 461                 ret = -ENOENT;
 462         rdtgroup_kn_unlock(of->kn);
 463
 464         return ret;
 465 }
 466
 467 /* Files in each rdtgroup */
 468 static struct rftype rdtgroup_base_files[] = {
 469         {
 470                 .name           = "cpus",
 471                 .mode           = 0644,
 472                 .kf_ops         = &rdtgroup_kf_single_ops,
 473                 .write          = rdtgroup_cpus_write,
 474                 .seq_show       = rdtgroup_cpus_show,
 475         },
 476         {
 477                 .name           = "tasks",
 478                 .mode           = 0644,
 479                 .kf_ops         = &rdtgroup_kf_single_ops,
 480                 .write          = rdtgroup_tasks_write,
 481                 .seq_show       = rdtgroup_tasks_show,
 482         },
 483         {
 484                 .name           = "schemata",
 485                 .mode           = 0644,
 486                 .kf_ops         = &rdtgroup_kf_single_ops,
 487                 .write          = rdtgroup_schemata_write,
 488                 .seq_show       = rdtgroup_schemata_show,
 489         },
 490 };
 491
 492 static int rdt_num_closids_show(struct kernfs_open_file *of,
 493                                 struct seq_file *seq, void *v)
 494 {
 495         struct rdt_resource *r = of->kn->parent->priv;
 496
 497         seq_printf(seq, "%d\n", r->num_closid);
 498
 499         return 0;
 500 }
 501
 502 static int rdt_cbm_mask_show(struct kernfs_open_file *of,
 503                              struct seq_file *seq, void *v)
 504 {
 505         struct rdt_resource *r = of->kn->parent->priv;
 506
 507         seq_printf(seq, "%x\n", r->max_cbm);
 508
 509         return 0;
 510 }
 511
 512 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
 513                              struct seq_file *seq, void *v)
 514 {
 515         struct rdt_resource *r = of->kn->parent->priv;
 516
 517         seq_printf(seq, "%d\n", r->min_cbm_bits);
 518
 519         return 0;
 520 }
 521
 522 /* rdtgroup information files for one cache resource. */
 523 static struct rftype res_info_files[] = {
 524         {
 525                 .name           = "num_closids",
 526                 .mode           = 0444,
 527                 .kf_ops         = &rdtgroup_kf_single_ops,
 528                 .seq_show       = rdt_num_closids_show,
 529         },
 530         {
 531                 .name           = "cbm_mask",
 532                 .mode           = 0444,
 533                 .kf_ops         = &rdtgroup_kf_single_ops,
 534                 .seq_show       = rdt_cbm_mask_show,
 535         },
 536         {
 537                 .name           = "min_cbm_bits",
 538                 .mode           = 0444,
 539                 .kf_ops         = &rdtgroup_kf_single_ops,
 540                 .seq_show       = rdt_min_cbm_bits_show,
 541         },
 542 };
 543
 544 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
 545 {
 546         struct kernfs_node *kn_subdir;
 547         struct rdt_resource *r;
 548         int ret;
 549
 550         /* create the directory */
 551         kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
 552         if (IS_ERR(kn_info))
 553                 return PTR_ERR(kn_info);
 554         kernfs_get(kn_info);
 555
 556         for_each_enabled_rdt_resource(r) {
 557                 kn_subdir = kernfs_create_dir(kn_info, r->name,
 558                                               kn_info->mode, r);
 559                 if (IS_ERR(kn_subdir)) {
 560                         ret = PTR_ERR(kn_subdir);
 561                         goto out_destroy;
 562                 }
 563                 kernfs_get(kn_subdir);
 564                 ret = rdtgroup_kn_set_ugid(kn_subdir);
 565                 if (ret)
 566                         goto out_destroy;
 567                 ret = rdtgroup_add_files(kn_subdir, res_info_files,
 568                                          ARRAY_SIZE(res_info_files));
 569                 if (ret)
 570                         goto out_destroy;
 571                 kernfs_activate(kn_subdir);
 572         }
 573
 574         /*
 575          * This extra ref will be put in kernfs_remove() and guarantees
 576          * that @rdtgrp->kn is always accessible.
 577          */
 578         kernfs_get(kn_info);
 579
 580         ret = rdtgroup_kn_set_ugid(kn_info);
 581         if (ret)
 582                 goto out_destroy;
 583
 584         kernfs_activate(kn_info);
 585
 586         return 0;
 587
 588 out_destroy:
 589         kernfs_remove(kn_info);
 590         return ret;
 591 }
 592
 593 static void l3_qos_cfg_update(void *arg)
 594 {
 595         bool *enable = arg;
 596
 597         wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
 598 }
 599
 600 static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
 601 {
 602         cpumask_var_t cpu_mask;
 603         struct rdt_domain *d;
 604         int cpu;
 605
 606         if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
 607                 return -ENOMEM;
 608
 609         list_for_each_entry(d, &r->domains, list) {
 610                 /* Pick one CPU from each domain instance to update MSR */
 611                 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
 612         }
 613         cpu = get_cpu();
 614         /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
 615         if (cpumask_test_cpu(cpu, cpu_mask))
 616                 l3_qos_cfg_update(&enable);
 617         /* Update QOS_CFG MSR on all other cpus in cpu_mask. */
 618         smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1);
 619         put_cpu();
 620
 621         free_cpumask_var(cpu_mask);
 622
 623         return 0;
 624 }
 625
 626 static int cdp_enable(void)
 627 {
 628         struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA];
 629         struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE];
 630         struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
 631         int ret;
 632
 633         if (!r_l3->capable || !r_l3data->capable || !r_l3code->capable)
 634                 return -EINVAL;
 635
 636         ret = set_l3_qos_cfg(r_l3, true);
 637         if (!ret) {
 638                 r_l3->enabled = false;
 639                 r_l3data->enabled = true;
 640                 r_l3code->enabled = true;
 641         }
 642         return ret;
 643 }
 644
 645 static void cdp_disable(void)
 646 {
 647         struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
 648
 649         r->enabled = r->capable;
 650
 651         if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled) {
 652                 rdt_resources_all[RDT_RESOURCE_L3DATA].enabled = false;
 653                 rdt_resources_all[RDT_RESOURCE_L3CODE].enabled = false;
 654                 set_l3_qos_cfg(r, false);
 655         }
 656 }
 657
 658 static int parse_rdtgroupfs_options(char *data)
 659 {
 660         char *token, *o = data;
 661         int ret = 0;
 662
 663         while ((token = strsep(&o, ",")) != NULL) {
 664                 if (!*token)
 665                         return -EINVAL;
 666
 667                 if (!strcmp(token, "cdp"))
 668                         ret = cdp_enable();
 669         }
 670
 671         return ret;
 672 }
 673
 674 /*
 675  * We don't allow rdtgroup directories to be created anywhere
 676  * except the root directory. Thus when looking for the rdtgroup
 677  * structure for a kernfs node we are either looking at a directory,
 678  * in which case the rdtgroup structure is pointed at by the "priv"
 679  * field, otherwise we have a file, and need only look to the parent
 680  * to find the rdtgroup.
 681  */
 682 static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
 683 {
 684         if (kernfs_type(kn) == KERNFS_DIR) {
 685                 /*
 686                  * All the resource directories use "kn->priv"
 687                  * to point to the "struct rdtgroup" for the
 688                  * resource. "info" and its subdirectories don't
 689                  * have rdtgroup structures, so return NULL here.
 690                  */
 691                 if (kn == kn_info || kn->parent == kn_info)
 692                         return NULL;
 693                 else
 694                         return kn->priv;
 695         } else {
 696                 return kn->parent->priv;
 697         }
 698 }
 699
 700 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
 701 {
 702         struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
 703
 704         if (!rdtgrp)
 705                 return NULL;
 706
 707         atomic_inc(&rdtgrp->waitcount);
 708         kernfs_break_active_protection(kn);
 709
 710         mutex_lock(&rdtgroup_mutex);
 711
 712         /* Was this group deleted while we waited? */
 713         if (rdtgrp->flags & RDT_DELETED)
 714                 return NULL;
 715
 716         return rdtgrp;
 717 }
 718
 719 void rdtgroup_kn_unlock(struct kernfs_node *kn)
 720 {
 721         struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
 722
 723         if (!rdtgrp)
 724                 return;
 725
 726         mutex_unlock(&rdtgroup_mutex);
 727
 728         if (atomic_dec_and_test(&rdtgrp->waitcount) &&
 729             (rdtgrp->flags & RDT_DELETED)) {
 730                 kernfs_unbreak_active_protection(kn);
 731                 kernfs_put(kn);
 732                 kfree(rdtgrp);
 733         } else {
 734                 kernfs_unbreak_active_protection(kn);
 735         }
 736 }
 737
 738 static struct dentry *rdt_mount(struct file_system_type *fs_type,
 739                                 int flags, const char *unused_dev_name,
 740                                 void *data)
 741 {
 742         struct dentry *dentry;
 743         int ret;
 744
 745         mutex_lock(&rdtgroup_mutex);
 746         /*
 747          * resctrl file system can only be mounted once.
 748          */
 749         if (static_branch_unlikely(&rdt_enable_key)) {
 750                 dentry = ERR_PTR(-EBUSY);
 751                 goto out;
 752         }
 753
 754         ret = parse_rdtgroupfs_options(data);
 755         if (ret) {
 756                 dentry = ERR_PTR(ret);
 757                 goto out_cdp;
 758         }
 759
 760         closid_init();
 761
 762         ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
 763         if (ret) {
 764                 dentry = ERR_PTR(ret);
 765                 goto out_cdp;
 766         }
 767
 768         dentry = kernfs_mount(fs_type, flags, rdt_root,
 769                               RDTGROUP_SUPER_MAGIC, NULL);
 770         if (IS_ERR(dentry))
 771                 goto out_cdp;
 772
 773         static_branch_enable(&rdt_enable_key);
 774         goto out;
 775
 776 out_cdp:
 777         cdp_disable();
 778 out:
 779         mutex_unlock(&rdtgroup_mutex);
 780
 781         return dentry;
 782 }
 783
 784 static int reset_all_cbms(struct rdt_resource *r)
 785 {
 786         struct msr_param msr_param;
 787         cpumask_var_t cpu_mask;
 788         struct rdt_domain *d;
 789         int i, cpu;
 790
 791         if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
 792                 return -ENOMEM;
 793
 794         msr_param.res = r;
 795         msr_param.low = 0;
 796         msr_param.high = r->num_closid;
 797
 798         /*
 799          * Disable resource control for this resource by setting all
 800          * CBMs in all domains to the maximum mask value. Pick one CPU
 801          * from each domain to update the MSRs below.
 802          */
 803         list_for_each_entry(d, &r->domains, list) {
 804                 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
 805
 806                 for (i = 0; i < r->num_closid; i++)
 807                         d->cbm[i] = r->max_cbm;
 808         }
 809         cpu = get_cpu();
 810         /* Update CBM on this cpu if it's in cpu_mask. */
 811         if (cpumask_test_cpu(cpu, cpu_mask))
 812                 rdt_cbm_update(&msr_param);
 813         /* Update CBM on all other cpus in cpu_mask. */
 814         smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1);
 815         put_cpu();
 816
 817         free_cpumask_var(cpu_mask);
 818
 819         return 0;
 820 }
 821
 822 /*
 823  * Move tasks from one to the other group. If @from is NULL, then all tasks
 824  * in the systems are moved unconditionally (used for teardown).
 825  *
 826  * If @mask is not NULL the cpus on which moved tasks are running are set
 827  * in that mask so the update smp function call is restricted to affected
 828  * cpus.
 829  */
 830 static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
 831                                  struct cpumask *mask)
 832 {
 833         struct task_struct *p, *t;
 834
 835         read_lock(&tasklist_lock);
 836         for_each_process_thread(p, t) {
 837                 if (!from || t->closid == from->closid) {
 838                         t->closid = to->closid;
 839 #ifdef CONFIG_SMP
 840                         /*
 841                          * This is safe on x86 w/o barriers as the ordering
 842                          * of writing to task_cpu() and t->on_cpu is
 843                          * reverse to the reading here. The detection is
 844                          * inaccurate as tasks might move or schedule
 845                          * before the smp function call takes place. In
 846                          * such a case the function call is pointless, but
 847                          * there is no other side effect.
 848                          */
 849                         if (mask && t->on_cpu)
 850                                 cpumask_set_cpu(task_cpu(t), mask);
 851 #endif
 852                 }
 853         }
 854         read_unlock(&tasklist_lock);
 855 }
 856
 857 /*
 858  * Forcibly remove all of subdirectories under root.
 859  */
 860 static void rmdir_all_sub(void)
 861 {
 862         struct rdtgroup *rdtgrp, *tmp;
 863
 864         /* Move all tasks to the default resource group */
 865         rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
 866
 867         list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
 868                 /* Remove each rdtgroup other than root */
 869                 if (rdtgrp == &rdtgroup_default)
 870                         continue;
 871
 872                 /*
 873                  * Give any CPUs back to the default group. We cannot copy
 874                  * cpu_online_mask because a CPU might have executed the
 875                  * offline callback already, but is still marked online.
 876                  */
 877                 cpumask_or(&rdtgroup_default.cpu_mask,
 878                            &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
 879
 880                 kernfs_remove(rdtgrp->kn);
 881                 list_del(&rdtgrp->rdtgroup_list);
 882                 kfree(rdtgrp);
 883         }
 884         /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
 885         get_online_cpus();
 886         rdt_update_closid(cpu_online_mask, &rdtgroup_default.closid);
 887         put_online_cpus();
 888
 889         kernfs_remove(kn_info);
 890 }
 891
 892 static void rdt_kill_sb(struct super_block *sb)
 893 {
 894         struct rdt_resource *r;
 895
 896         mutex_lock(&rdtgroup_mutex);
 897
 898         /*Put everything back to default values. */
 899         for_each_enabled_rdt_resource(r)
 900                 reset_all_cbms(r);
 901         cdp_disable();
 902         rmdir_all_sub();
 903         static_branch_disable(&rdt_enable_key);
 904         kernfs_kill_sb(sb);
 905         mutex_unlock(&rdtgroup_mutex);
 906 }
 907
 908 static struct file_system_type rdt_fs_type = {
 909         .name    = "resctrl",
 910         .mount   = rdt_mount,
 911         .kill_sb = rdt_kill_sb,
 912 };
 913
 914 static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 915                           umode_t mode)
 916 {
 917         struct rdtgroup *parent, *rdtgrp;
 918         struct kernfs_node *kn;
 919         int ret, closid;
 920
 921         /* Only allow mkdir in the root directory */
 922         if (parent_kn != rdtgroup_default.kn)
 923                 return -EPERM;
 924
 925         /* Do not accept '\n' to avoid unparsable situation. */
 926         if (strchr(name, '\n'))
 927                 return -EINVAL;
 928
 929         parent = rdtgroup_kn_lock_live(parent_kn);
 930         if (!parent) {
 931                 ret = -ENODEV;
 932                 goto out_unlock;
 933         }
 934
 935         ret = closid_alloc();
 936         if (ret < 0)
 937                 goto out_unlock;
 938         closid = ret;
 939
 940         /* allocate the rdtgroup. */
 941         rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
 942         if (!rdtgrp) {
 943                 ret = -ENOSPC;
 944                 goto out_closid_free;
 945         }
 946         rdtgrp->closid = closid;
 947         list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
 948
 949         /* kernfs creates the directory for rdtgrp */
 950         kn = kernfs_create_dir(parent->kn, name, mode, rdtgrp);
 951         if (IS_ERR(kn)) {
 952                 ret = PTR_ERR(kn);
 953                 goto out_cancel_ref;
 954         }
 955         rdtgrp->kn = kn;
 956
 957         /*
 958          * kernfs_remove() will drop the reference count on "kn" which
 959          * will free it. But we still need it to stick around for the
 960          * rdtgroup_kn_unlock(kn} call below. Take one extra reference
 961          * here, which will be dropped inside rdtgroup_kn_unlock().
 962          */
 963         kernfs_get(kn);
 964
 965         ret = rdtgroup_kn_set_ugid(kn);
 966         if (ret)
 967                 goto out_destroy;
 968
 969         ret = rdtgroup_add_files(kn, rdtgroup_base_files,
 970                                  ARRAY_SIZE(rdtgroup_base_files));
 971         if (ret)
 972                 goto out_destroy;
 973
 974         kernfs_activate(kn);
 975
 976         ret = 0;
 977         goto out_unlock;
 978
 979 out_destroy:
 980         kernfs_remove(rdtgrp->kn);
 981 out_cancel_ref:
 982         list_del(&rdtgrp->rdtgroup_list);
 983         kfree(rdtgrp);
 984 out_closid_free:
 985         closid_free(closid);
 986 out_unlock:
 987         rdtgroup_kn_unlock(parent_kn);
 988         return ret;
 989 }
 990
 991 static int rdtgroup_rmdir(struct kernfs_node *kn)
 992 {
 993         int ret, cpu, closid = rdtgroup_default.closid;
 994         struct rdtgroup *rdtgrp;
 995         cpumask_var_t tmpmask;
 996
 997         if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
 998                 return -ENOMEM;
 999
1000         rdtgrp = rdtgroup_kn_lock_live(kn);
1001         if (!rdtgrp) {
1002                 ret = -EPERM;
1003                 goto out;
1004         }
1005
1006         /* Give any tasks back to the default group */
1007         rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
1008
1009         /* Give any CPUs back to the default group */
1010         cpumask_or(&rdtgroup_default.cpu_mask,
1011                    &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
1012
1013         /* Update per cpu closid of the moved CPUs first */
1014         for_each_cpu(cpu, &rdtgrp->cpu_mask)
1015                 per_cpu(cpu_closid, cpu) = closid;
1016         /*
1017          * Update the MSR on moved CPUs and CPUs which have moved
1018          * task running on them.
1019          */
1020         cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
1021         rdt_update_closid(tmpmask, NULL);
1022
1023         rdtgrp->flags = RDT_DELETED;
1024         closid_free(rdtgrp->closid);
1025         list_del(&rdtgrp->rdtgroup_list);
1026
1027         /*
1028          * one extra hold on this, will drop when we kfree(rdtgrp)
1029          * in rdtgroup_kn_unlock()
1030          */
1031         kernfs_get(kn);
1032         kernfs_remove(rdtgrp->kn);
1033         ret = 0;
1034 out:
1035         rdtgroup_kn_unlock(kn);
1036         free_cpumask_var(tmpmask);
1037         return ret;
1038 }
1039
1040 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
1041 {
1042         if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled)
1043                 seq_puts(seq, ",cdp");
1044         return 0;
1045 }
1046
1047 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
1048         .mkdir          = rdtgroup_mkdir,
1049         .rmdir          = rdtgroup_rmdir,
1050         .show_options   = rdtgroup_show_options,
1051 };
1052
1053 static int __init rdtgroup_setup_root(void)
1054 {
1055         int ret;
1056
1057         rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
1058                                       KERNFS_ROOT_CREATE_DEACTIVATED,
1059                                       &rdtgroup_default);
1060         if (IS_ERR(rdt_root))
1061                 return PTR_ERR(rdt_root);
1062
1063         mutex_lock(&rdtgroup_mutex);
1064
1065         rdtgroup_default.closid = 0;
1066         list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
1067
1068         ret = rdtgroup_add_files(rdt_root->kn, rdtgroup_base_files,
1069                                  ARRAY_SIZE(rdtgroup_base_files));
1070         if (ret) {
1071                 kernfs_destroy_root(rdt_root);
1072                 goto out;
1073         }
1074
1075         rdtgroup_default.kn = rdt_root->kn;
1076         kernfs_activate(rdtgroup_default.kn);
1077
1078 out:
1079         mutex_unlock(&rdtgroup_mutex);
1080
1081         return ret;
1082 }
1083
1084 /*
1085  * rdtgroup_init - rdtgroup initialization
1086  *
1087  * Setup resctrl file system including set up root, create mount point,
1088  * register rdtgroup filesystem, and initialize files under root directory.
1089  *
1090  * Return: 0 on success or -errno
1091  */
1092 int __init rdtgroup_init(void)
1093 {
1094         int ret = 0;
1095
1096         ret = rdtgroup_setup_root();
1097         if (ret)
1098                 return ret;
1099
1100         ret = sysfs_create_mount_point(fs_kobj, "resctrl");
1101         if (ret)
1102                 goto cleanup_root;
1103
1104         ret = register_filesystem(&rdt_fs_type);
1105         if (ret)
1106                 goto cleanup_mountpoint;
1107
1108         return 0;
1109
1110 cleanup_mountpoint:
1111         sysfs_remove_mount_point(fs_kobj, "resctrl");
1112 cleanup_root:
1113         kernfs_destroy_root(rdt_root);
1114
1115         return ret;
1116 }