]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
sched/headers: Prepare for new header dependencies before moving code to <linux/sched...
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / kernel / cpu / intel_rdt_rdtgroup.c
CommitLineData
5ff193fb
FY
1/*
2 * User interface for Resource Alloction in Resource Director Technology(RDT)
3 *
4 * Copyright (C) 2016 Intel Corporation
5 *
6 * Author: Fenghua Yu <fenghua.yu@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * More information about RDT be found in the Intel (R) x86 Architecture
18 * Software Developer Manual.
19 */
20
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
12e0110c 23#include <linux/cpu.h>
5ff193fb
FY
24#include <linux/fs.h>
25#include <linux/sysfs.h>
26#include <linux/kernfs.h>
4e978d06 27#include <linux/seq_file.h>
3f07c014 28#include <linux/sched/signal.h>
29930025 29#include <linux/sched/task.h>
5ff193fb 30#include <linux/slab.h>
60cf5e10 31#include <linux/cpu.h>
e02737d5 32#include <linux/task_work.h>
5ff193fb
FY
33
34#include <uapi/linux/magic.h>
35
36#include <asm/intel_rdt.h>
60cf5e10 37#include <asm/intel_rdt_common.h>
5ff193fb
FY
38
39DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
40struct kernfs_root *rdt_root;
41struct rdtgroup rdtgroup_default;
42LIST_HEAD(rdt_all_groups);
43
4e978d06
FY
44/* Kernel fs node for "info" directory under root */
45static struct kernfs_node *kn_info;
46
60cf5e10
FY
47/*
48 * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
49 * we can keep a bitmap of free CLOSIDs in a single integer.
50 *
51 * Using a global CLOSID across all resources has some advantages and
52 * some drawbacks:
53 * + We can simply set "current->closid" to assign a task to a resource
54 * group.
55 * + Context switch code can avoid extra memory references deciding which
56 * CLOSID to load into the PQR_ASSOC MSR
57 * - We give up some options in configuring resource groups across multi-socket
58 * systems.
59 * - Our choices on how to configure each resource become progressively more
60 * limited as the number of resources grows.
61 */
62static int closid_free_map;
63
64static void closid_init(void)
65{
66 struct rdt_resource *r;
67 int rdt_min_closid = 32;
68
69 /* Compute rdt_min_closid across all resources */
70 for_each_enabled_rdt_resource(r)
71 rdt_min_closid = min(rdt_min_closid, r->num_closid);
72
73 closid_free_map = BIT_MASK(rdt_min_closid) - 1;
74
75 /* CLOSID 0 is always reserved for the default group */
76 closid_free_map &= ~1;
77}
78
79int closid_alloc(void)
80{
81 int closid = ffs(closid_free_map);
82
83 if (closid == 0)
84 return -ENOSPC;
85 closid--;
86 closid_free_map &= ~(1 << closid);
87
88 return closid;
89}
90
91static void closid_free(int closid)
92{
93 closid_free_map |= 1 << closid;
94}
95
4e978d06
FY
96/* set uid and gid of rdtgroup dirs and files to that of the creator */
97static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
98{
99 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
100 .ia_uid = current_fsuid(),
101 .ia_gid = current_fsgid(), };
102
103 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
104 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
105 return 0;
106
107 return kernfs_setattr(kn, &iattr);
108}
109
110static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
111{
112 struct kernfs_node *kn;
113 int ret;
114
115 kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
116 0, rft->kf_ops, rft, NULL, NULL);
117 if (IS_ERR(kn))
118 return PTR_ERR(kn);
119
120 ret = rdtgroup_kn_set_ugid(kn);
121 if (ret) {
122 kernfs_remove(kn);
123 return ret;
124 }
125
126 return 0;
127}
128
129static int rdtgroup_add_files(struct kernfs_node *kn, struct rftype *rfts,
130 int len)
131{
132 struct rftype *rft;
133 int ret;
134
135 lockdep_assert_held(&rdtgroup_mutex);
136
137 for (rft = rfts; rft < rfts + len; rft++) {
138 ret = rdtgroup_add_file(kn, rft);
139 if (ret)
140 goto error;
141 }
142
143 return 0;
144error:
145 pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
146 while (--rft >= rfts)
147 kernfs_remove_by_name(kn, rft->name);
148 return ret;
149}
150
151static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
152{
153 struct kernfs_open_file *of = m->private;
154 struct rftype *rft = of->kn->priv;
155
156 if (rft->seq_show)
157 return rft->seq_show(of, m, arg);
158 return 0;
159}
160
161static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
162 size_t nbytes, loff_t off)
163{
164 struct rftype *rft = of->kn->priv;
165
166 if (rft->write)
167 return rft->write(of, buf, nbytes, off);
168
169 return -EINVAL;
170}
171
172static struct kernfs_ops rdtgroup_kf_single_ops = {
173 .atomic_write_len = PAGE_SIZE,
174 .write = rdtgroup_file_write,
175 .seq_show = rdtgroup_seqfile_show,
176};
177
12e0110c
TL
178static int rdtgroup_cpus_show(struct kernfs_open_file *of,
179 struct seq_file *s, void *v)
180{
181 struct rdtgroup *rdtgrp;
182 int ret = 0;
183
184 rdtgrp = rdtgroup_kn_lock_live(of->kn);
185
186 if (rdtgrp)
187 seq_printf(s, "%*pb\n", cpumask_pr_args(&rdtgrp->cpu_mask));
188 else
189 ret = -ENOENT;
190 rdtgroup_kn_unlock(of->kn);
191
192 return ret;
193}
194
f4107702
FY
195/*
196 * This is safe against intel_rdt_sched_in() called from __switch_to()
197 * because __switch_to() is executed with interrupts disabled. A local call
0efc89be 198 * from rdt_update_closid() is proteced against __switch_to() because
f4107702
FY
199 * preemption is disabled.
200 */
0efc89be 201static void rdt_update_cpu_closid(void *closid)
f4107702 202{
0efc89be
FY
203 if (closid)
204 this_cpu_write(cpu_closid, *(int *)closid);
f4107702
FY
205 /*
206 * We cannot unconditionally write the MSR because the current
207 * executing task might have its own closid selected. Just reuse
208 * the context switch code.
209 */
210 intel_rdt_sched_in();
211}
212
0efc89be
FY
213/*
214 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
215 *
216 * Per task closids must have been set up before calling this function.
217 *
218 * The per cpu closids are updated with the smp function call, when @closid
219 * is not NULL. If @closid is NULL then all affected percpu closids must
220 * have been set up before calling this function.
221 */
222static void
223rdt_update_closid(const struct cpumask *cpu_mask, int *closid)
f4107702
FY
224{
225 int cpu = get_cpu();
226
227 if (cpumask_test_cpu(cpu, cpu_mask))
0efc89be
FY
228 rdt_update_cpu_closid(closid);
229 smp_call_function_many(cpu_mask, rdt_update_cpu_closid, closid, 1);
f4107702
FY
230 put_cpu();
231}
232
12e0110c
TL
233static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
234 char *buf, size_t nbytes, loff_t off)
235{
236 cpumask_var_t tmpmask, newmask;
237 struct rdtgroup *rdtgrp, *r;
f4107702 238 int ret;
12e0110c
TL
239
240 if (!buf)
241 return -EINVAL;
242
243 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
244 return -ENOMEM;
245 if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
246 free_cpumask_var(tmpmask);
247 return -ENOMEM;
248 }
a2584e1d 249
12e0110c
TL
250 rdtgrp = rdtgroup_kn_lock_live(of->kn);
251 if (!rdtgrp) {
252 ret = -ENOENT;
253 goto unlock;
254 }
255
256 ret = cpumask_parse(buf, newmask);
257 if (ret)
258 goto unlock;
259
12e0110c
TL
260 /* check that user didn't specify any offline cpus */
261 cpumask_andnot(tmpmask, newmask, cpu_online_mask);
262 if (cpumask_weight(tmpmask)) {
263 ret = -EINVAL;
a2584e1d 264 goto unlock;
12e0110c
TL
265 }
266
267 /* Check whether cpus are dropped from this group */
268 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
269 if (cpumask_weight(tmpmask)) {
270 /* Can't drop from default group */
271 if (rdtgrp == &rdtgroup_default) {
272 ret = -EINVAL;
a2584e1d 273 goto unlock;
12e0110c
TL
274 }
275 /* Give any dropped cpus to rdtgroup_default */
276 cpumask_or(&rdtgroup_default.cpu_mask,
277 &rdtgroup_default.cpu_mask, tmpmask);
0efc89be 278 rdt_update_closid(tmpmask, &rdtgroup_default.closid);
12e0110c
TL
279 }
280
281 /*
282 * If we added cpus, remove them from previous group that owned them
283 * and update per-cpu closid
284 */
285 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
286 if (cpumask_weight(tmpmask)) {
287 list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
288 if (r == rdtgrp)
289 continue;
290 cpumask_andnot(&r->cpu_mask, &r->cpu_mask, tmpmask);
291 }
0efc89be 292 rdt_update_closid(tmpmask, &rdtgrp->closid);
12e0110c
TL
293 }
294
295 /* Done pushing/pulling - update this group with new mask */
296 cpumask_copy(&rdtgrp->cpu_mask, newmask);
297
12e0110c
TL
298unlock:
299 rdtgroup_kn_unlock(of->kn);
300 free_cpumask_var(tmpmask);
301 free_cpumask_var(newmask);
302
303 return ret ?: nbytes;
304}
305
e02737d5
FY
306struct task_move_callback {
307 struct callback_head work;
308 struct rdtgroup *rdtgrp;
309};
310
311static void move_myself(struct callback_head *head)
312{
313 struct task_move_callback *callback;
314 struct rdtgroup *rdtgrp;
315
316 callback = container_of(head, struct task_move_callback, work);
317 rdtgrp = callback->rdtgrp;
318
319 /*
320 * If resource group was deleted before this task work callback
321 * was invoked, then assign the task to root group and free the
322 * resource group.
323 */
324 if (atomic_dec_and_test(&rdtgrp->waitcount) &&
325 (rdtgrp->flags & RDT_DELETED)) {
326 current->closid = 0;
327 kfree(rdtgrp);
328 }
329
74fcdae1 330 preempt_disable();
4f341a5e
FY
331 /* update PQR_ASSOC MSR to make resource group go into effect */
332 intel_rdt_sched_in();
74fcdae1 333 preempt_enable();
4f341a5e 334
e02737d5
FY
335 kfree(callback);
336}
337
338static int __rdtgroup_move_task(struct task_struct *tsk,
339 struct rdtgroup *rdtgrp)
340{
341 struct task_move_callback *callback;
342 int ret;
343
344 callback = kzalloc(sizeof(*callback), GFP_KERNEL);
345 if (!callback)
346 return -ENOMEM;
347 callback->work.func = move_myself;
348 callback->rdtgrp = rdtgrp;
349
350 /*
351 * Take a refcount, so rdtgrp cannot be freed before the
352 * callback has been invoked.
353 */
354 atomic_inc(&rdtgrp->waitcount);
355 ret = task_work_add(tsk, &callback->work, true);
356 if (ret) {
357 /*
358 * Task is exiting. Drop the refcount and free the callback.
359 * No need to check the refcount as the group cannot be
360 * deleted before the write function unlocks rdtgroup_mutex.
361 */
362 atomic_dec(&rdtgrp->waitcount);
363 kfree(callback);
364 } else {
365 tsk->closid = rdtgrp->closid;
366 }
367 return ret;
368}
369
370static int rdtgroup_task_write_permission(struct task_struct *task,
371 struct kernfs_open_file *of)
372{
373 const struct cred *tcred = get_task_cred(task);
374 const struct cred *cred = current_cred();
375 int ret = 0;
376
377 /*
378 * Even if we're attaching all tasks in the thread group, we only
379 * need to check permissions on one of them.
380 */
381 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
382 !uid_eq(cred->euid, tcred->uid) &&
383 !uid_eq(cred->euid, tcred->suid))
384 ret = -EPERM;
385
386 put_cred(tcred);
387 return ret;
388}
389
390static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
391 struct kernfs_open_file *of)
392{
393 struct task_struct *tsk;
394 int ret;
395
396 rcu_read_lock();
397 if (pid) {
398 tsk = find_task_by_vpid(pid);
399 if (!tsk) {
400 rcu_read_unlock();
401 return -ESRCH;
402 }
403 } else {
404 tsk = current;
405 }
406
407 get_task_struct(tsk);
408 rcu_read_unlock();
409
410 ret = rdtgroup_task_write_permission(tsk, of);
411 if (!ret)
412 ret = __rdtgroup_move_task(tsk, rdtgrp);
413
414 put_task_struct(tsk);
415 return ret;
416}
417
418static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
419 char *buf, size_t nbytes, loff_t off)
420{
421 struct rdtgroup *rdtgrp;
422 int ret = 0;
423 pid_t pid;
424
425 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
426 return -EINVAL;
427 rdtgrp = rdtgroup_kn_lock_live(of->kn);
428
429 if (rdtgrp)
430 ret = rdtgroup_move_task(pid, rdtgrp, of);
431 else
432 ret = -ENOENT;
433
434 rdtgroup_kn_unlock(of->kn);
435
436 return ret ?: nbytes;
437}
438
439static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
440{
441 struct task_struct *p, *t;
442
443 rcu_read_lock();
444 for_each_process_thread(p, t) {
445 if (t->closid == r->closid)
446 seq_printf(s, "%d\n", t->pid);
447 }
448 rcu_read_unlock();
449}
450
451static int rdtgroup_tasks_show(struct kernfs_open_file *of,
452 struct seq_file *s, void *v)
453{
454 struct rdtgroup *rdtgrp;
455 int ret = 0;
456
457 rdtgrp = rdtgroup_kn_lock_live(of->kn);
458 if (rdtgrp)
459 show_rdt_tasks(rdtgrp, s);
460 else
461 ret = -ENOENT;
462 rdtgroup_kn_unlock(of->kn);
463
464 return ret;
465}
466
12e0110c
TL
467/* Files in each rdtgroup */
468static struct rftype rdtgroup_base_files[] = {
469 {
470 .name = "cpus",
471 .mode = 0644,
472 .kf_ops = &rdtgroup_kf_single_ops,
473 .write = rdtgroup_cpus_write,
474 .seq_show = rdtgroup_cpus_show,
475 },
e02737d5
FY
476 {
477 .name = "tasks",
478 .mode = 0644,
479 .kf_ops = &rdtgroup_kf_single_ops,
480 .write = rdtgroup_tasks_write,
481 .seq_show = rdtgroup_tasks_show,
482 },
60ec2440
TL
483 {
484 .name = "schemata",
485 .mode = 0644,
486 .kf_ops = &rdtgroup_kf_single_ops,
487 .write = rdtgroup_schemata_write,
488 .seq_show = rdtgroup_schemata_show,
489 },
12e0110c
TL
490};
491
4e978d06
FY
492static int rdt_num_closids_show(struct kernfs_open_file *of,
493 struct seq_file *seq, void *v)
494{
495 struct rdt_resource *r = of->kn->parent->priv;
496
497 seq_printf(seq, "%d\n", r->num_closid);
498
499 return 0;
500}
501
502static int rdt_cbm_mask_show(struct kernfs_open_file *of,
503 struct seq_file *seq, void *v)
504{
505 struct rdt_resource *r = of->kn->parent->priv;
506
507 seq_printf(seq, "%x\n", r->max_cbm);
508
509 return 0;
510}
511
53a114a6
SL
512static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
513 struct seq_file *seq, void *v)
514{
515 struct rdt_resource *r = of->kn->parent->priv;
516
517 seq_printf(seq, "%d\n", r->min_cbm_bits);
518
519 return 0;
520}
521
4e978d06
FY
522/* rdtgroup information files for one cache resource. */
523static struct rftype res_info_files[] = {
524 {
525 .name = "num_closids",
526 .mode = 0444,
527 .kf_ops = &rdtgroup_kf_single_ops,
528 .seq_show = rdt_num_closids_show,
529 },
530 {
531 .name = "cbm_mask",
532 .mode = 0444,
533 .kf_ops = &rdtgroup_kf_single_ops,
534 .seq_show = rdt_cbm_mask_show,
535 },
53a114a6
SL
536 {
537 .name = "min_cbm_bits",
538 .mode = 0444,
539 .kf_ops = &rdtgroup_kf_single_ops,
540 .seq_show = rdt_min_cbm_bits_show,
541 },
4e978d06
FY
542};
543
544static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
545{
546 struct kernfs_node *kn_subdir;
547 struct rdt_resource *r;
548 int ret;
549
550 /* create the directory */
551 kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
552 if (IS_ERR(kn_info))
553 return PTR_ERR(kn_info);
554 kernfs_get(kn_info);
555
556 for_each_enabled_rdt_resource(r) {
557 kn_subdir = kernfs_create_dir(kn_info, r->name,
558 kn_info->mode, r);
559 if (IS_ERR(kn_subdir)) {
560 ret = PTR_ERR(kn_subdir);
561 goto out_destroy;
562 }
563 kernfs_get(kn_subdir);
564 ret = rdtgroup_kn_set_ugid(kn_subdir);
565 if (ret)
566 goto out_destroy;
567 ret = rdtgroup_add_files(kn_subdir, res_info_files,
568 ARRAY_SIZE(res_info_files));
569 if (ret)
570 goto out_destroy;
571 kernfs_activate(kn_subdir);
572 }
573
574 /*
575 * This extra ref will be put in kernfs_remove() and guarantees
576 * that @rdtgrp->kn is always accessible.
577 */
578 kernfs_get(kn_info);
579
580 ret = rdtgroup_kn_set_ugid(kn_info);
581 if (ret)
582 goto out_destroy;
583
584 kernfs_activate(kn_info);
585
586 return 0;
587
588out_destroy:
589 kernfs_remove(kn_info);
590 return ret;
591}
592
5ff193fb
FY
593static void l3_qos_cfg_update(void *arg)
594{
595 bool *enable = arg;
596
597 wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
598}
599
600static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
601{
602 cpumask_var_t cpu_mask;
603 struct rdt_domain *d;
604 int cpu;
605
606 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
607 return -ENOMEM;
608
609 list_for_each_entry(d, &r->domains, list) {
610 /* Pick one CPU from each domain instance to update MSR */
611 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
612 }
613 cpu = get_cpu();
614 /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
615 if (cpumask_test_cpu(cpu, cpu_mask))
616 l3_qos_cfg_update(&enable);
617 /* Update QOS_CFG MSR on all other cpus in cpu_mask. */
618 smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1);
619 put_cpu();
620
621 free_cpumask_var(cpu_mask);
622
623 return 0;
624}
625
626static int cdp_enable(void)
627{
628 struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA];
629 struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE];
630 struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
631 int ret;
632
633 if (!r_l3->capable || !r_l3data->capable || !r_l3code->capable)
634 return -EINVAL;
635
636 ret = set_l3_qos_cfg(r_l3, true);
637 if (!ret) {
638 r_l3->enabled = false;
639 r_l3data->enabled = true;
640 r_l3code->enabled = true;
641 }
642 return ret;
643}
644
645static void cdp_disable(void)
646{
647 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
648
649 r->enabled = r->capable;
650
651 if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled) {
652 rdt_resources_all[RDT_RESOURCE_L3DATA].enabled = false;
653 rdt_resources_all[RDT_RESOURCE_L3CODE].enabled = false;
654 set_l3_qos_cfg(r, false);
655 }
656}
657
658static int parse_rdtgroupfs_options(char *data)
659{
660 char *token, *o = data;
661 int ret = 0;
662
663 while ((token = strsep(&o, ",")) != NULL) {
664 if (!*token)
665 return -EINVAL;
666
667 if (!strcmp(token, "cdp"))
668 ret = cdp_enable();
669 }
670
671 return ret;
672}
673
60cf5e10
FY
674/*
675 * We don't allow rdtgroup directories to be created anywhere
676 * except the root directory. Thus when looking for the rdtgroup
677 * structure for a kernfs node we are either looking at a directory,
678 * in which case the rdtgroup structure is pointed at by the "priv"
679 * field, otherwise we have a file, and need only look to the parent
680 * to find the rdtgroup.
681 */
682static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
683{
f57b3087
FY
684 if (kernfs_type(kn) == KERNFS_DIR) {
685 /*
686 * All the resource directories use "kn->priv"
687 * to point to the "struct rdtgroup" for the
688 * resource. "info" and its subdirectories don't
689 * have rdtgroup structures, so return NULL here.
690 */
691 if (kn == kn_info || kn->parent == kn_info)
692 return NULL;
693 else
694 return kn->priv;
695 } else {
60cf5e10 696 return kn->parent->priv;
f57b3087 697 }
60cf5e10
FY
698}
699
700struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
701{
702 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
703
f57b3087
FY
704 if (!rdtgrp)
705 return NULL;
706
60cf5e10
FY
707 atomic_inc(&rdtgrp->waitcount);
708 kernfs_break_active_protection(kn);
709
710 mutex_lock(&rdtgroup_mutex);
711
712 /* Was this group deleted while we waited? */
713 if (rdtgrp->flags & RDT_DELETED)
714 return NULL;
715
716 return rdtgrp;
717}
718
719void rdtgroup_kn_unlock(struct kernfs_node *kn)
720{
721 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
722
f57b3087
FY
723 if (!rdtgrp)
724 return;
725
60cf5e10
FY
726 mutex_unlock(&rdtgroup_mutex);
727
728 if (atomic_dec_and_test(&rdtgrp->waitcount) &&
729 (rdtgrp->flags & RDT_DELETED)) {
730 kernfs_unbreak_active_protection(kn);
731 kernfs_put(kn);
732 kfree(rdtgrp);
733 } else {
734 kernfs_unbreak_active_protection(kn);
735 }
736}
737
5ff193fb
FY
738static struct dentry *rdt_mount(struct file_system_type *fs_type,
739 int flags, const char *unused_dev_name,
740 void *data)
741{
742 struct dentry *dentry;
743 int ret;
744
745 mutex_lock(&rdtgroup_mutex);
746 /*
747 * resctrl file system can only be mounted once.
748 */
749 if (static_branch_unlikely(&rdt_enable_key)) {
750 dentry = ERR_PTR(-EBUSY);
751 goto out;
752 }
753
754 ret = parse_rdtgroupfs_options(data);
755 if (ret) {
756 dentry = ERR_PTR(ret);
757 goto out_cdp;
758 }
759
60cf5e10
FY
760 closid_init();
761
4e978d06 762 ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
7bff0af5
SL
763 if (ret) {
764 dentry = ERR_PTR(ret);
4e978d06 765 goto out_cdp;
7bff0af5 766 }
4e978d06 767
5ff193fb
FY
768 dentry = kernfs_mount(fs_type, flags, rdt_root,
769 RDTGROUP_SUPER_MAGIC, NULL);
770 if (IS_ERR(dentry))
771 goto out_cdp;
772
773 static_branch_enable(&rdt_enable_key);
774 goto out;
775
776out_cdp:
777 cdp_disable();
778out:
779 mutex_unlock(&rdtgroup_mutex);
780
781 return dentry;
782}
783
784static int reset_all_cbms(struct rdt_resource *r)
785{
786 struct msr_param msr_param;
787 cpumask_var_t cpu_mask;
788 struct rdt_domain *d;
789 int i, cpu;
790
791 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
792 return -ENOMEM;
793
794 msr_param.res = r;
795 msr_param.low = 0;
796 msr_param.high = r->num_closid;
797
798 /*
799 * Disable resource control for this resource by setting all
800 * CBMs in all domains to the maximum mask value. Pick one CPU
801 * from each domain to update the MSRs below.
802 */
803 list_for_each_entry(d, &r->domains, list) {
804 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
805
806 for (i = 0; i < r->num_closid; i++)
807 d->cbm[i] = r->max_cbm;
808 }
809 cpu = get_cpu();
810 /* Update CBM on this cpu if it's in cpu_mask. */
811 if (cpumask_test_cpu(cpu, cpu_mask))
812 rdt_cbm_update(&msr_param);
813 /* Update CBM on all other cpus in cpu_mask. */
814 smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1);
815 put_cpu();
816
817 free_cpumask_var(cpu_mask);
818
819 return 0;
820}
821
4e978d06 822/*
0efc89be
FY
823 * Move tasks from one to the other group. If @from is NULL, then all tasks
824 * in the systems are moved unconditionally (used for teardown).
825 *
826 * If @mask is not NULL the cpus on which moved tasks are running are set
827 * in that mask so the update smp function call is restricted to affected
828 * cpus.
4e978d06 829 */
0efc89be
FY
830static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
831 struct cpumask *mask)
4e978d06 832{
e02737d5
FY
833 struct task_struct *p, *t;
834
e02737d5 835 read_lock(&tasklist_lock);
0efc89be
FY
836 for_each_process_thread(p, t) {
837 if (!from || t->closid == from->closid) {
838 t->closid = to->closid;
839#ifdef CONFIG_SMP
840 /*
841 * This is safe on x86 w/o barriers as the ordering
842 * of writing to task_cpu() and t->on_cpu is
843 * reverse to the reading here. The detection is
844 * inaccurate as tasks might move or schedule
845 * before the smp function call takes place. In
846 * such a case the function call is pointless, but
847 * there is no other side effect.
848 */
849 if (mask && t->on_cpu)
850 cpumask_set_cpu(task_cpu(t), mask);
851#endif
852 }
853 }
e02737d5 854 read_unlock(&tasklist_lock);
0efc89be
FY
855}
856
857/*
858 * Forcibly remove all of subdirectories under root.
859 */
860static void rmdir_all_sub(void)
861{
862 struct rdtgroup *rdtgrp, *tmp;
863
864 /* Move all tasks to the default resource group */
865 rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
60cf5e10 866
60cf5e10
FY
867 list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
868 /* Remove each rdtgroup other than root */
869 if (rdtgrp == &rdtgroup_default)
870 continue;
c7cc0cc1
FY
871
872 /*
873 * Give any CPUs back to the default group. We cannot copy
874 * cpu_online_mask because a CPU might have executed the
875 * offline callback already, but is still marked online.
876 */
877 cpumask_or(&rdtgroup_default.cpu_mask,
878 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
879
60cf5e10
FY
880 kernfs_remove(rdtgrp->kn);
881 list_del(&rdtgrp->rdtgroup_list);
882 kfree(rdtgrp);
883 }
0efc89be
FY
884 /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
885 get_online_cpus();
886 rdt_update_closid(cpu_online_mask, &rdtgroup_default.closid);
887 put_online_cpus();
888
4e978d06
FY
889 kernfs_remove(kn_info);
890}
891
5ff193fb
FY
892static void rdt_kill_sb(struct super_block *sb)
893{
894 struct rdt_resource *r;
895
896 mutex_lock(&rdtgroup_mutex);
897
898 /*Put everything back to default values. */
899 for_each_enabled_rdt_resource(r)
900 reset_all_cbms(r);
901 cdp_disable();
4e978d06 902 rmdir_all_sub();
5ff193fb
FY
903 static_branch_disable(&rdt_enable_key);
904 kernfs_kill_sb(sb);
905 mutex_unlock(&rdtgroup_mutex);
906}
907
908static struct file_system_type rdt_fs_type = {
909 .name = "resctrl",
910 .mount = rdt_mount,
911 .kill_sb = rdt_kill_sb,
912};
913
60cf5e10
FY
914static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
915 umode_t mode)
916{
917 struct rdtgroup *parent, *rdtgrp;
918 struct kernfs_node *kn;
919 int ret, closid;
920
921 /* Only allow mkdir in the root directory */
922 if (parent_kn != rdtgroup_default.kn)
923 return -EPERM;
924
925 /* Do not accept '\n' to avoid unparsable situation. */
926 if (strchr(name, '\n'))
927 return -EINVAL;
928
929 parent = rdtgroup_kn_lock_live(parent_kn);
930 if (!parent) {
931 ret = -ENODEV;
932 goto out_unlock;
933 }
934
935 ret = closid_alloc();
936 if (ret < 0)
937 goto out_unlock;
938 closid = ret;
939
940 /* allocate the rdtgroup. */
941 rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
942 if (!rdtgrp) {
943 ret = -ENOSPC;
944 goto out_closid_free;
945 }
946 rdtgrp->closid = closid;
947 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
948
949 /* kernfs creates the directory for rdtgrp */
950 kn = kernfs_create_dir(parent->kn, name, mode, rdtgrp);
951 if (IS_ERR(kn)) {
952 ret = PTR_ERR(kn);
953 goto out_cancel_ref;
954 }
955 rdtgrp->kn = kn;
956
957 /*
958 * kernfs_remove() will drop the reference count on "kn" which
959 * will free it. But we still need it to stick around for the
960 * rdtgroup_kn_unlock(kn} call below. Take one extra reference
961 * here, which will be dropped inside rdtgroup_kn_unlock().
962 */
963 kernfs_get(kn);
964
965 ret = rdtgroup_kn_set_ugid(kn);
966 if (ret)
967 goto out_destroy;
968
12e0110c
TL
969 ret = rdtgroup_add_files(kn, rdtgroup_base_files,
970 ARRAY_SIZE(rdtgroup_base_files));
971 if (ret)
972 goto out_destroy;
973
60cf5e10
FY
974 kernfs_activate(kn);
975
976 ret = 0;
977 goto out_unlock;
978
979out_destroy:
980 kernfs_remove(rdtgrp->kn);
981out_cancel_ref:
982 list_del(&rdtgrp->rdtgroup_list);
983 kfree(rdtgrp);
984out_closid_free:
985 closid_free(closid);
986out_unlock:
987 rdtgroup_kn_unlock(parent_kn);
988 return ret;
989}
990
991static int rdtgroup_rmdir(struct kernfs_node *kn)
992{
0efc89be 993 int ret, cpu, closid = rdtgroup_default.closid;
60cf5e10 994 struct rdtgroup *rdtgrp;
0efc89be
FY
995 cpumask_var_t tmpmask;
996
997 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
998 return -ENOMEM;
60cf5e10
FY
999
1000 rdtgrp = rdtgroup_kn_lock_live(kn);
1001 if (!rdtgrp) {
0efc89be
FY
1002 ret = -EPERM;
1003 goto out;
60cf5e10
FY
1004 }
1005
e02737d5 1006 /* Give any tasks back to the default group */
0efc89be 1007 rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
e02737d5 1008
12e0110c
TL
1009 /* Give any CPUs back to the default group */
1010 cpumask_or(&rdtgroup_default.cpu_mask,
1011 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
0efc89be
FY
1012
1013 /* Update per cpu closid of the moved CPUs first */
1014 for_each_cpu(cpu, &rdtgrp->cpu_mask)
1015 per_cpu(cpu_closid, cpu) = closid;
1016 /*
1017 * Update the MSR on moved CPUs and CPUs which have moved
1018 * task running on them.
1019 */
1020 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
1021 rdt_update_closid(tmpmask, NULL);
12e0110c 1022
60cf5e10
FY
1023 rdtgrp->flags = RDT_DELETED;
1024 closid_free(rdtgrp->closid);
1025 list_del(&rdtgrp->rdtgroup_list);
1026
1027 /*
1028 * one extra hold on this, will drop when we kfree(rdtgrp)
1029 * in rdtgroup_kn_unlock()
1030 */
1031 kernfs_get(kn);
1032 kernfs_remove(rdtgrp->kn);
0efc89be
FY
1033 ret = 0;
1034out:
60cf5e10 1035 rdtgroup_kn_unlock(kn);
0efc89be
FY
1036 free_cpumask_var(tmpmask);
1037 return ret;
60cf5e10
FY
1038}
1039
76ae054c
SL
1040static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
1041{
1042 if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled)
1043 seq_puts(seq, ",cdp");
1044 return 0;
1045}
1046
5ff193fb 1047static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
76ae054c
SL
1048 .mkdir = rdtgroup_mkdir,
1049 .rmdir = rdtgroup_rmdir,
1050 .show_options = rdtgroup_show_options,
5ff193fb
FY
1051};
1052
1053static int __init rdtgroup_setup_root(void)
1054{
12e0110c
TL
1055 int ret;
1056
5ff193fb
FY
1057 rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
1058 KERNFS_ROOT_CREATE_DEACTIVATED,
1059 &rdtgroup_default);
1060 if (IS_ERR(rdt_root))
1061 return PTR_ERR(rdt_root);
1062
1063 mutex_lock(&rdtgroup_mutex);
1064
1065 rdtgroup_default.closid = 0;
1066 list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
1067
12e0110c
TL
1068 ret = rdtgroup_add_files(rdt_root->kn, rdtgroup_base_files,
1069 ARRAY_SIZE(rdtgroup_base_files));
1070 if (ret) {
1071 kernfs_destroy_root(rdt_root);
1072 goto out;
1073 }
1074
5ff193fb
FY
1075 rdtgroup_default.kn = rdt_root->kn;
1076 kernfs_activate(rdtgroup_default.kn);
1077
12e0110c 1078out:
5ff193fb
FY
1079 mutex_unlock(&rdtgroup_mutex);
1080
12e0110c 1081 return ret;
5ff193fb
FY
1082}
1083
1084/*
1085 * rdtgroup_init - rdtgroup initialization
1086 *
1087 * Setup resctrl file system including set up root, create mount point,
1088 * register rdtgroup filesystem, and initialize files under root directory.
1089 *
1090 * Return: 0 on success or -errno
1091 */
1092int __init rdtgroup_init(void)
1093{
1094 int ret = 0;
1095
1096 ret = rdtgroup_setup_root();
1097 if (ret)
1098 return ret;
1099
1100 ret = sysfs_create_mount_point(fs_kobj, "resctrl");
1101 if (ret)
1102 goto cleanup_root;
1103
1104 ret = register_filesystem(&rdt_fs_type);
1105 if (ret)
1106 goto cleanup_mountpoint;
1107
1108 return 0;
1109
1110cleanup_mountpoint:
1111 sysfs_remove_mount_point(fs_kobj, "resctrl");
1112cleanup_root:
1113 kernfs_destroy_root(rdt_root);
1114
1115 return ret;
1116}