]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
x86/process: Allow runtime control of Speculative Store Bypass
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / cpu / intel_rdt_rdtgroup.c
1 /*
2 * User interface for Resource Alloction in Resource Director Technology(RDT)
3 *
4 * Copyright (C) 2016 Intel Corporation
5 *
6 * Author: Fenghua Yu <fenghua.yu@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * More information about RDT be found in the Intel (R) x86 Architecture
18 * Software Developer Manual.
19 */
20
21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23 #include <linux/cpu.h>
24 #include <linux/fs.h>
25 #include <linux/sysfs.h>
26 #include <linux/kernfs.h>
27 #include <linux/seq_file.h>
28 #include <linux/sched/signal.h>
29 #include <linux/sched/task.h>
30 #include <linux/slab.h>
31 #include <linux/task_work.h>
32
33 #include <uapi/linux/magic.h>
34
35 #include <asm/intel_rdt_sched.h>
36 #include "intel_rdt.h"
37
38 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
39 DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
40 DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
41 static struct kernfs_root *rdt_root;
42 struct rdtgroup rdtgroup_default;
43 LIST_HEAD(rdt_all_groups);
44
45 /* Kernel fs node for "info" directory under root */
46 static struct kernfs_node *kn_info;
47
48 /* Kernel fs node for "mon_groups" directory under root */
49 static struct kernfs_node *kn_mongrp;
50
51 /* Kernel fs node for "mon_data" directory under root */
52 static struct kernfs_node *kn_mondata;
53
54 /*
55 * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
56 * we can keep a bitmap of free CLOSIDs in a single integer.
57 *
58 * Using a global CLOSID across all resources has some advantages and
59 * some drawbacks:
60 * + We can simply set "current->closid" to assign a task to a resource
61 * group.
62 * + Context switch code can avoid extra memory references deciding which
63 * CLOSID to load into the PQR_ASSOC MSR
64 * - We give up some options in configuring resource groups across multi-socket
65 * systems.
66 * - Our choices on how to configure each resource become progressively more
67 * limited as the number of resources grows.
68 */
69 static int closid_free_map;
70
71 static void closid_init(void)
72 {
73 struct rdt_resource *r;
74 int rdt_min_closid = 32;
75
76 /* Compute rdt_min_closid across all resources */
77 for_each_alloc_enabled_rdt_resource(r)
78 rdt_min_closid = min(rdt_min_closid, r->num_closid);
79
80 closid_free_map = BIT_MASK(rdt_min_closid) - 1;
81
82 /* CLOSID 0 is always reserved for the default group */
83 closid_free_map &= ~1;
84 }
85
86 static int closid_alloc(void)
87 {
88 u32 closid = ffs(closid_free_map);
89
90 if (closid == 0)
91 return -ENOSPC;
92 closid--;
93 closid_free_map &= ~(1 << closid);
94
95 return closid;
96 }
97
98 static void closid_free(int closid)
99 {
100 closid_free_map |= 1 << closid;
101 }
102
103 /* set uid and gid of rdtgroup dirs and files to that of the creator */
104 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
105 {
106 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
107 .ia_uid = current_fsuid(),
108 .ia_gid = current_fsgid(), };
109
110 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
111 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
112 return 0;
113
114 return kernfs_setattr(kn, &iattr);
115 }
116
117 static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
118 {
119 struct kernfs_node *kn;
120 int ret;
121
122 kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
123 0, rft->kf_ops, rft, NULL, NULL);
124 if (IS_ERR(kn))
125 return PTR_ERR(kn);
126
127 ret = rdtgroup_kn_set_ugid(kn);
128 if (ret) {
129 kernfs_remove(kn);
130 return ret;
131 }
132
133 return 0;
134 }
135
136 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
137 {
138 struct kernfs_open_file *of = m->private;
139 struct rftype *rft = of->kn->priv;
140
141 if (rft->seq_show)
142 return rft->seq_show(of, m, arg);
143 return 0;
144 }
145
146 static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
147 size_t nbytes, loff_t off)
148 {
149 struct rftype *rft = of->kn->priv;
150
151 if (rft->write)
152 return rft->write(of, buf, nbytes, off);
153
154 return -EINVAL;
155 }
156
157 static struct kernfs_ops rdtgroup_kf_single_ops = {
158 .atomic_write_len = PAGE_SIZE,
159 .write = rdtgroup_file_write,
160 .seq_show = rdtgroup_seqfile_show,
161 };
162
163 static struct kernfs_ops kf_mondata_ops = {
164 .atomic_write_len = PAGE_SIZE,
165 .seq_show = rdtgroup_mondata_show,
166 };
167
168 static bool is_cpu_list(struct kernfs_open_file *of)
169 {
170 struct rftype *rft = of->kn->priv;
171
172 return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
173 }
174
175 static int rdtgroup_cpus_show(struct kernfs_open_file *of,
176 struct seq_file *s, void *v)
177 {
178 struct rdtgroup *rdtgrp;
179 int ret = 0;
180
181 rdtgrp = rdtgroup_kn_lock_live(of->kn);
182
183 if (rdtgrp) {
184 seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
185 cpumask_pr_args(&rdtgrp->cpu_mask));
186 } else {
187 ret = -ENOENT;
188 }
189 rdtgroup_kn_unlock(of->kn);
190
191 return ret;
192 }
193
194 /*
195 * This is safe against intel_rdt_sched_in() called from __switch_to()
196 * because __switch_to() is executed with interrupts disabled. A local call
197 * from update_closid_rmid() is proteced against __switch_to() because
198 * preemption is disabled.
199 */
200 static void update_cpu_closid_rmid(void *info)
201 {
202 struct rdtgroup *r = info;
203
204 if (r) {
205 this_cpu_write(pqr_state.default_closid, r->closid);
206 this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
207 }
208
209 /*
210 * We cannot unconditionally write the MSR because the current
211 * executing task might have its own closid selected. Just reuse
212 * the context switch code.
213 */
214 intel_rdt_sched_in();
215 }
216
217 /*
218 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
219 *
220 * Per task closids/rmids must have been set up before calling this function.
221 */
222 static void
223 update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
224 {
225 int cpu = get_cpu();
226
227 if (cpumask_test_cpu(cpu, cpu_mask))
228 update_cpu_closid_rmid(r);
229 smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
230 put_cpu();
231 }
232
233 static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
234 cpumask_var_t tmpmask)
235 {
236 struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
237 struct list_head *head;
238
239 /* Check whether cpus belong to parent ctrl group */
240 cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
241 if (cpumask_weight(tmpmask))
242 return -EINVAL;
243
244 /* Check whether cpus are dropped from this group */
245 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
246 if (cpumask_weight(tmpmask)) {
247 /* Give any dropped cpus to parent rdtgroup */
248 cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
249 update_closid_rmid(tmpmask, prgrp);
250 }
251
252 /*
253 * If we added cpus, remove them from previous group that owned them
254 * and update per-cpu rmid
255 */
256 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
257 if (cpumask_weight(tmpmask)) {
258 head = &prgrp->mon.crdtgrp_list;
259 list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
260 if (crgrp == rdtgrp)
261 continue;
262 cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
263 tmpmask);
264 }
265 update_closid_rmid(tmpmask, rdtgrp);
266 }
267
268 /* Done pushing/pulling - update this group with new mask */
269 cpumask_copy(&rdtgrp->cpu_mask, newmask);
270
271 return 0;
272 }
273
274 static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
275 {
276 struct rdtgroup *crgrp;
277
278 cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
279 /* update the child mon group masks as well*/
280 list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
281 cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
282 }
283
284 static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
285 cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
286 {
287 struct rdtgroup *r, *crgrp;
288 struct list_head *head;
289
290 /* Check whether cpus are dropped from this group */
291 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
292 if (cpumask_weight(tmpmask)) {
293 /* Can't drop from default group */
294 if (rdtgrp == &rdtgroup_default)
295 return -EINVAL;
296
297 /* Give any dropped cpus to rdtgroup_default */
298 cpumask_or(&rdtgroup_default.cpu_mask,
299 &rdtgroup_default.cpu_mask, tmpmask);
300 update_closid_rmid(tmpmask, &rdtgroup_default);
301 }
302
303 /*
304 * If we added cpus, remove them from previous group and
305 * the prev group's child groups that owned them
306 * and update per-cpu closid/rmid.
307 */
308 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
309 if (cpumask_weight(tmpmask)) {
310 list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
311 if (r == rdtgrp)
312 continue;
313 cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
314 if (cpumask_weight(tmpmask1))
315 cpumask_rdtgrp_clear(r, tmpmask1);
316 }
317 update_closid_rmid(tmpmask, rdtgrp);
318 }
319
320 /* Done pushing/pulling - update this group with new mask */
321 cpumask_copy(&rdtgrp->cpu_mask, newmask);
322
323 /*
324 * Clear child mon group masks since there is a new parent mask
325 * now and update the rmid for the cpus the child lost.
326 */
327 head = &rdtgrp->mon.crdtgrp_list;
328 list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
329 cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
330 update_closid_rmid(tmpmask, rdtgrp);
331 cpumask_clear(&crgrp->cpu_mask);
332 }
333
334 return 0;
335 }
336
337 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
338 char *buf, size_t nbytes, loff_t off)
339 {
340 cpumask_var_t tmpmask, newmask, tmpmask1;
341 struct rdtgroup *rdtgrp;
342 int ret;
343
344 if (!buf)
345 return -EINVAL;
346
347 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
348 return -ENOMEM;
349 if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
350 free_cpumask_var(tmpmask);
351 return -ENOMEM;
352 }
353 if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
354 free_cpumask_var(tmpmask);
355 free_cpumask_var(newmask);
356 return -ENOMEM;
357 }
358
359 rdtgrp = rdtgroup_kn_lock_live(of->kn);
360 if (!rdtgrp) {
361 ret = -ENOENT;
362 goto unlock;
363 }
364
365 if (is_cpu_list(of))
366 ret = cpulist_parse(buf, newmask);
367 else
368 ret = cpumask_parse(buf, newmask);
369
370 if (ret)
371 goto unlock;
372
373 /* check that user didn't specify any offline cpus */
374 cpumask_andnot(tmpmask, newmask, cpu_online_mask);
375 if (cpumask_weight(tmpmask)) {
376 ret = -EINVAL;
377 goto unlock;
378 }
379
380 if (rdtgrp->type == RDTCTRL_GROUP)
381 ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
382 else if (rdtgrp->type == RDTMON_GROUP)
383 ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
384 else
385 ret = -EINVAL;
386
387 unlock:
388 rdtgroup_kn_unlock(of->kn);
389 free_cpumask_var(tmpmask);
390 free_cpumask_var(newmask);
391 free_cpumask_var(tmpmask1);
392
393 return ret ?: nbytes;
394 }
395
396 struct task_move_callback {
397 struct callback_head work;
398 struct rdtgroup *rdtgrp;
399 };
400
401 static void move_myself(struct callback_head *head)
402 {
403 struct task_move_callback *callback;
404 struct rdtgroup *rdtgrp;
405
406 callback = container_of(head, struct task_move_callback, work);
407 rdtgrp = callback->rdtgrp;
408
409 /*
410 * If resource group was deleted before this task work callback
411 * was invoked, then assign the task to root group and free the
412 * resource group.
413 */
414 if (atomic_dec_and_test(&rdtgrp->waitcount) &&
415 (rdtgrp->flags & RDT_DELETED)) {
416 current->closid = 0;
417 current->rmid = 0;
418 kfree(rdtgrp);
419 }
420
421 preempt_disable();
422 /* update PQR_ASSOC MSR to make resource group go into effect */
423 intel_rdt_sched_in();
424 preempt_enable();
425
426 kfree(callback);
427 }
428
429 static int __rdtgroup_move_task(struct task_struct *tsk,
430 struct rdtgroup *rdtgrp)
431 {
432 struct task_move_callback *callback;
433 int ret;
434
435 callback = kzalloc(sizeof(*callback), GFP_KERNEL);
436 if (!callback)
437 return -ENOMEM;
438 callback->work.func = move_myself;
439 callback->rdtgrp = rdtgrp;
440
441 /*
442 * Take a refcount, so rdtgrp cannot be freed before the
443 * callback has been invoked.
444 */
445 atomic_inc(&rdtgrp->waitcount);
446 ret = task_work_add(tsk, &callback->work, true);
447 if (ret) {
448 /*
449 * Task is exiting. Drop the refcount and free the callback.
450 * No need to check the refcount as the group cannot be
451 * deleted before the write function unlocks rdtgroup_mutex.
452 */
453 atomic_dec(&rdtgrp->waitcount);
454 kfree(callback);
455 } else {
456 /*
457 * For ctrl_mon groups move both closid and rmid.
458 * For monitor groups, can move the tasks only from
459 * their parent CTRL group.
460 */
461 if (rdtgrp->type == RDTCTRL_GROUP) {
462 tsk->closid = rdtgrp->closid;
463 tsk->rmid = rdtgrp->mon.rmid;
464 } else if (rdtgrp->type == RDTMON_GROUP) {
465 if (rdtgrp->mon.parent->closid == tsk->closid)
466 tsk->rmid = rdtgrp->mon.rmid;
467 else
468 ret = -EINVAL;
469 }
470 }
471 return ret;
472 }
473
474 static int rdtgroup_task_write_permission(struct task_struct *task,
475 struct kernfs_open_file *of)
476 {
477 const struct cred *tcred = get_task_cred(task);
478 const struct cred *cred = current_cred();
479 int ret = 0;
480
481 /*
482 * Even if we're attaching all tasks in the thread group, we only
483 * need to check permissions on one of them.
484 */
485 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
486 !uid_eq(cred->euid, tcred->uid) &&
487 !uid_eq(cred->euid, tcred->suid))
488 ret = -EPERM;
489
490 put_cred(tcred);
491 return ret;
492 }
493
494 static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
495 struct kernfs_open_file *of)
496 {
497 struct task_struct *tsk;
498 int ret;
499
500 rcu_read_lock();
501 if (pid) {
502 tsk = find_task_by_vpid(pid);
503 if (!tsk) {
504 rcu_read_unlock();
505 return -ESRCH;
506 }
507 } else {
508 tsk = current;
509 }
510
511 get_task_struct(tsk);
512 rcu_read_unlock();
513
514 ret = rdtgroup_task_write_permission(tsk, of);
515 if (!ret)
516 ret = __rdtgroup_move_task(tsk, rdtgrp);
517
518 put_task_struct(tsk);
519 return ret;
520 }
521
522 static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
523 char *buf, size_t nbytes, loff_t off)
524 {
525 struct rdtgroup *rdtgrp;
526 int ret = 0;
527 pid_t pid;
528
529 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
530 return -EINVAL;
531 rdtgrp = rdtgroup_kn_lock_live(of->kn);
532
533 if (rdtgrp)
534 ret = rdtgroup_move_task(pid, rdtgrp, of);
535 else
536 ret = -ENOENT;
537
538 rdtgroup_kn_unlock(of->kn);
539
540 return ret ?: nbytes;
541 }
542
543 static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
544 {
545 struct task_struct *p, *t;
546
547 rcu_read_lock();
548 for_each_process_thread(p, t) {
549 if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
550 (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid))
551 seq_printf(s, "%d\n", t->pid);
552 }
553 rcu_read_unlock();
554 }
555
556 static int rdtgroup_tasks_show(struct kernfs_open_file *of,
557 struct seq_file *s, void *v)
558 {
559 struct rdtgroup *rdtgrp;
560 int ret = 0;
561
562 rdtgrp = rdtgroup_kn_lock_live(of->kn);
563 if (rdtgrp)
564 show_rdt_tasks(rdtgrp, s);
565 else
566 ret = -ENOENT;
567 rdtgroup_kn_unlock(of->kn);
568
569 return ret;
570 }
571
572 static int rdt_num_closids_show(struct kernfs_open_file *of,
573 struct seq_file *seq, void *v)
574 {
575 struct rdt_resource *r = of->kn->parent->priv;
576
577 seq_printf(seq, "%d\n", r->num_closid);
578 return 0;
579 }
580
581 static int rdt_default_ctrl_show(struct kernfs_open_file *of,
582 struct seq_file *seq, void *v)
583 {
584 struct rdt_resource *r = of->kn->parent->priv;
585
586 seq_printf(seq, "%x\n", r->default_ctrl);
587 return 0;
588 }
589
590 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
591 struct seq_file *seq, void *v)
592 {
593 struct rdt_resource *r = of->kn->parent->priv;
594
595 seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
596 return 0;
597 }
598
599 static int rdt_shareable_bits_show(struct kernfs_open_file *of,
600 struct seq_file *seq, void *v)
601 {
602 struct rdt_resource *r = of->kn->parent->priv;
603
604 seq_printf(seq, "%x\n", r->cache.shareable_bits);
605 return 0;
606 }
607
608 static int rdt_min_bw_show(struct kernfs_open_file *of,
609 struct seq_file *seq, void *v)
610 {
611 struct rdt_resource *r = of->kn->parent->priv;
612
613 seq_printf(seq, "%u\n", r->membw.min_bw);
614 return 0;
615 }
616
617 static int rdt_num_rmids_show(struct kernfs_open_file *of,
618 struct seq_file *seq, void *v)
619 {
620 struct rdt_resource *r = of->kn->parent->priv;
621
622 seq_printf(seq, "%d\n", r->num_rmid);
623
624 return 0;
625 }
626
627 static int rdt_mon_features_show(struct kernfs_open_file *of,
628 struct seq_file *seq, void *v)
629 {
630 struct rdt_resource *r = of->kn->parent->priv;
631 struct mon_evt *mevt;
632
633 list_for_each_entry(mevt, &r->evt_list, list)
634 seq_printf(seq, "%s\n", mevt->name);
635
636 return 0;
637 }
638
639 static int rdt_bw_gran_show(struct kernfs_open_file *of,
640 struct seq_file *seq, void *v)
641 {
642 struct rdt_resource *r = of->kn->parent->priv;
643
644 seq_printf(seq, "%u\n", r->membw.bw_gran);
645 return 0;
646 }
647
648 static int rdt_delay_linear_show(struct kernfs_open_file *of,
649 struct seq_file *seq, void *v)
650 {
651 struct rdt_resource *r = of->kn->parent->priv;
652
653 seq_printf(seq, "%u\n", r->membw.delay_linear);
654 return 0;
655 }
656
657 static int max_threshold_occ_show(struct kernfs_open_file *of,
658 struct seq_file *seq, void *v)
659 {
660 struct rdt_resource *r = of->kn->parent->priv;
661
662 seq_printf(seq, "%u\n", intel_cqm_threshold * r->mon_scale);
663
664 return 0;
665 }
666
667 static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
668 char *buf, size_t nbytes, loff_t off)
669 {
670 struct rdt_resource *r = of->kn->parent->priv;
671 unsigned int bytes;
672 int ret;
673
674 ret = kstrtouint(buf, 0, &bytes);
675 if (ret)
676 return ret;
677
678 if (bytes > (boot_cpu_data.x86_cache_size * 1024))
679 return -EINVAL;
680
681 intel_cqm_threshold = bytes / r->mon_scale;
682
683 return nbytes;
684 }
685
686 /* rdtgroup information files for one cache resource. */
687 static struct rftype res_common_files[] = {
688 {
689 .name = "num_closids",
690 .mode = 0444,
691 .kf_ops = &rdtgroup_kf_single_ops,
692 .seq_show = rdt_num_closids_show,
693 .fflags = RF_CTRL_INFO,
694 },
695 {
696 .name = "mon_features",
697 .mode = 0444,
698 .kf_ops = &rdtgroup_kf_single_ops,
699 .seq_show = rdt_mon_features_show,
700 .fflags = RF_MON_INFO,
701 },
702 {
703 .name = "num_rmids",
704 .mode = 0444,
705 .kf_ops = &rdtgroup_kf_single_ops,
706 .seq_show = rdt_num_rmids_show,
707 .fflags = RF_MON_INFO,
708 },
709 {
710 .name = "cbm_mask",
711 .mode = 0444,
712 .kf_ops = &rdtgroup_kf_single_ops,
713 .seq_show = rdt_default_ctrl_show,
714 .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
715 },
716 {
717 .name = "min_cbm_bits",
718 .mode = 0444,
719 .kf_ops = &rdtgroup_kf_single_ops,
720 .seq_show = rdt_min_cbm_bits_show,
721 .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
722 },
723 {
724 .name = "shareable_bits",
725 .mode = 0444,
726 .kf_ops = &rdtgroup_kf_single_ops,
727 .seq_show = rdt_shareable_bits_show,
728 .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
729 },
730 {
731 .name = "min_bandwidth",
732 .mode = 0444,
733 .kf_ops = &rdtgroup_kf_single_ops,
734 .seq_show = rdt_min_bw_show,
735 .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
736 },
737 {
738 .name = "bandwidth_gran",
739 .mode = 0444,
740 .kf_ops = &rdtgroup_kf_single_ops,
741 .seq_show = rdt_bw_gran_show,
742 .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
743 },
744 {
745 .name = "delay_linear",
746 .mode = 0444,
747 .kf_ops = &rdtgroup_kf_single_ops,
748 .seq_show = rdt_delay_linear_show,
749 .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
750 },
751 {
752 .name = "max_threshold_occupancy",
753 .mode = 0644,
754 .kf_ops = &rdtgroup_kf_single_ops,
755 .write = max_threshold_occ_write,
756 .seq_show = max_threshold_occ_show,
757 .fflags = RF_MON_INFO | RFTYPE_RES_CACHE,
758 },
759 {
760 .name = "cpus",
761 .mode = 0644,
762 .kf_ops = &rdtgroup_kf_single_ops,
763 .write = rdtgroup_cpus_write,
764 .seq_show = rdtgroup_cpus_show,
765 .fflags = RFTYPE_BASE,
766 },
767 {
768 .name = "cpus_list",
769 .mode = 0644,
770 .kf_ops = &rdtgroup_kf_single_ops,
771 .write = rdtgroup_cpus_write,
772 .seq_show = rdtgroup_cpus_show,
773 .flags = RFTYPE_FLAGS_CPUS_LIST,
774 .fflags = RFTYPE_BASE,
775 },
776 {
777 .name = "tasks",
778 .mode = 0644,
779 .kf_ops = &rdtgroup_kf_single_ops,
780 .write = rdtgroup_tasks_write,
781 .seq_show = rdtgroup_tasks_show,
782 .fflags = RFTYPE_BASE,
783 },
784 {
785 .name = "schemata",
786 .mode = 0644,
787 .kf_ops = &rdtgroup_kf_single_ops,
788 .write = rdtgroup_schemata_write,
789 .seq_show = rdtgroup_schemata_show,
790 .fflags = RF_CTRL_BASE,
791 },
792 };
793
794 static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
795 {
796 struct rftype *rfts, *rft;
797 int ret, len;
798
799 rfts = res_common_files;
800 len = ARRAY_SIZE(res_common_files);
801
802 lockdep_assert_held(&rdtgroup_mutex);
803
804 for (rft = rfts; rft < rfts + len; rft++) {
805 if ((fflags & rft->fflags) == rft->fflags) {
806 ret = rdtgroup_add_file(kn, rft);
807 if (ret)
808 goto error;
809 }
810 }
811
812 return 0;
813 error:
814 pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
815 while (--rft >= rfts) {
816 if ((fflags & rft->fflags) == rft->fflags)
817 kernfs_remove_by_name(kn, rft->name);
818 }
819 return ret;
820 }
821
822 static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
823 unsigned long fflags)
824 {
825 struct kernfs_node *kn_subdir;
826 int ret;
827
828 kn_subdir = kernfs_create_dir(kn_info, name,
829 kn_info->mode, r);
830 if (IS_ERR(kn_subdir))
831 return PTR_ERR(kn_subdir);
832
833 kernfs_get(kn_subdir);
834 ret = rdtgroup_kn_set_ugid(kn_subdir);
835 if (ret)
836 return ret;
837
838 ret = rdtgroup_add_files(kn_subdir, fflags);
839 if (!ret)
840 kernfs_activate(kn_subdir);
841
842 return ret;
843 }
844
845 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
846 {
847 struct rdt_resource *r;
848 unsigned long fflags;
849 char name[32];
850 int ret;
851
852 /* create the directory */
853 kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
854 if (IS_ERR(kn_info))
855 return PTR_ERR(kn_info);
856 kernfs_get(kn_info);
857
858 for_each_alloc_enabled_rdt_resource(r) {
859 fflags = r->fflags | RF_CTRL_INFO;
860 ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
861 if (ret)
862 goto out_destroy;
863 }
864
865 for_each_mon_enabled_rdt_resource(r) {
866 fflags = r->fflags | RF_MON_INFO;
867 sprintf(name, "%s_MON", r->name);
868 ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
869 if (ret)
870 goto out_destroy;
871 }
872
873 /*
874 * This extra ref will be put in kernfs_remove() and guarantees
875 * that @rdtgrp->kn is always accessible.
876 */
877 kernfs_get(kn_info);
878
879 ret = rdtgroup_kn_set_ugid(kn_info);
880 if (ret)
881 goto out_destroy;
882
883 kernfs_activate(kn_info);
884
885 return 0;
886
887 out_destroy:
888 kernfs_remove(kn_info);
889 return ret;
890 }
891
892 static int
893 mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
894 char *name, struct kernfs_node **dest_kn)
895 {
896 struct kernfs_node *kn;
897 int ret;
898
899 /* create the directory */
900 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
901 if (IS_ERR(kn))
902 return PTR_ERR(kn);
903
904 if (dest_kn)
905 *dest_kn = kn;
906
907 /*
908 * This extra ref will be put in kernfs_remove() and guarantees
909 * that @rdtgrp->kn is always accessible.
910 */
911 kernfs_get(kn);
912
913 ret = rdtgroup_kn_set_ugid(kn);
914 if (ret)
915 goto out_destroy;
916
917 kernfs_activate(kn);
918
919 return 0;
920
921 out_destroy:
922 kernfs_remove(kn);
923 return ret;
924 }
925 static void l3_qos_cfg_update(void *arg)
926 {
927 bool *enable = arg;
928
929 wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
930 }
931
932 static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
933 {
934 cpumask_var_t cpu_mask;
935 struct rdt_domain *d;
936 int cpu;
937
938 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
939 return -ENOMEM;
940
941 list_for_each_entry(d, &r->domains, list) {
942 /* Pick one CPU from each domain instance to update MSR */
943 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
944 }
945 cpu = get_cpu();
946 /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
947 if (cpumask_test_cpu(cpu, cpu_mask))
948 l3_qos_cfg_update(&enable);
949 /* Update QOS_CFG MSR on all other cpus in cpu_mask. */
950 smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1);
951 put_cpu();
952
953 free_cpumask_var(cpu_mask);
954
955 return 0;
956 }
957
958 static int cdp_enable(void)
959 {
960 struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA];
961 struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE];
962 struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
963 int ret;
964
965 if (!r_l3->alloc_capable || !r_l3data->alloc_capable ||
966 !r_l3code->alloc_capable)
967 return -EINVAL;
968
969 ret = set_l3_qos_cfg(r_l3, true);
970 if (!ret) {
971 r_l3->alloc_enabled = false;
972 r_l3data->alloc_enabled = true;
973 r_l3code->alloc_enabled = true;
974 }
975 return ret;
976 }
977
978 static void cdp_disable(void)
979 {
980 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
981
982 r->alloc_enabled = r->alloc_capable;
983
984 if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) {
985 rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled = false;
986 rdt_resources_all[RDT_RESOURCE_L3CODE].alloc_enabled = false;
987 set_l3_qos_cfg(r, false);
988 }
989 }
990
991 static int parse_rdtgroupfs_options(char *data)
992 {
993 char *token, *o = data;
994 int ret = 0;
995
996 while ((token = strsep(&o, ",")) != NULL) {
997 if (!*token)
998 return -EINVAL;
999
1000 if (!strcmp(token, "cdp"))
1001 ret = cdp_enable();
1002 }
1003
1004 return ret;
1005 }
1006
1007 /*
1008 * We don't allow rdtgroup directories to be created anywhere
1009 * except the root directory. Thus when looking for the rdtgroup
1010 * structure for a kernfs node we are either looking at a directory,
1011 * in which case the rdtgroup structure is pointed at by the "priv"
1012 * field, otherwise we have a file, and need only look to the parent
1013 * to find the rdtgroup.
1014 */
1015 static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
1016 {
1017 if (kernfs_type(kn) == KERNFS_DIR) {
1018 /*
1019 * All the resource directories use "kn->priv"
1020 * to point to the "struct rdtgroup" for the
1021 * resource. "info" and its subdirectories don't
1022 * have rdtgroup structures, so return NULL here.
1023 */
1024 if (kn == kn_info || kn->parent == kn_info)
1025 return NULL;
1026 else
1027 return kn->priv;
1028 } else {
1029 return kn->parent->priv;
1030 }
1031 }
1032
1033 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
1034 {
1035 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
1036
1037 if (!rdtgrp)
1038 return NULL;
1039
1040 atomic_inc(&rdtgrp->waitcount);
1041 kernfs_break_active_protection(kn);
1042
1043 mutex_lock(&rdtgroup_mutex);
1044
1045 /* Was this group deleted while we waited? */
1046 if (rdtgrp->flags & RDT_DELETED)
1047 return NULL;
1048
1049 return rdtgrp;
1050 }
1051
1052 void rdtgroup_kn_unlock(struct kernfs_node *kn)
1053 {
1054 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
1055
1056 if (!rdtgrp)
1057 return;
1058
1059 mutex_unlock(&rdtgroup_mutex);
1060
1061 if (atomic_dec_and_test(&rdtgrp->waitcount) &&
1062 (rdtgrp->flags & RDT_DELETED)) {
1063 kernfs_unbreak_active_protection(kn);
1064 kernfs_put(rdtgrp->kn);
1065 kfree(rdtgrp);
1066 } else {
1067 kernfs_unbreak_active_protection(kn);
1068 }
1069 }
1070
1071 static int mkdir_mondata_all(struct kernfs_node *parent_kn,
1072 struct rdtgroup *prgrp,
1073 struct kernfs_node **mon_data_kn);
1074
1075 static struct dentry *rdt_mount(struct file_system_type *fs_type,
1076 int flags, const char *unused_dev_name,
1077 void *data)
1078 {
1079 struct rdt_domain *dom;
1080 struct rdt_resource *r;
1081 struct dentry *dentry;
1082 int ret;
1083
1084 mutex_lock(&rdtgroup_mutex);
1085 /*
1086 * resctrl file system can only be mounted once.
1087 */
1088 if (static_branch_unlikely(&rdt_enable_key)) {
1089 dentry = ERR_PTR(-EBUSY);
1090 goto out;
1091 }
1092
1093 ret = parse_rdtgroupfs_options(data);
1094 if (ret) {
1095 dentry = ERR_PTR(ret);
1096 goto out_cdp;
1097 }
1098
1099 closid_init();
1100
1101 ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
1102 if (ret) {
1103 dentry = ERR_PTR(ret);
1104 goto out_cdp;
1105 }
1106
1107 if (rdt_mon_capable) {
1108 ret = mongroup_create_dir(rdtgroup_default.kn,
1109 NULL, "mon_groups",
1110 &kn_mongrp);
1111 if (ret) {
1112 dentry = ERR_PTR(ret);
1113 goto out_info;
1114 }
1115 kernfs_get(kn_mongrp);
1116
1117 ret = mkdir_mondata_all(rdtgroup_default.kn,
1118 &rdtgroup_default, &kn_mondata);
1119 if (ret) {
1120 dentry = ERR_PTR(ret);
1121 goto out_mongrp;
1122 }
1123 kernfs_get(kn_mondata);
1124 rdtgroup_default.mon.mon_data_kn = kn_mondata;
1125 }
1126
1127 dentry = kernfs_mount(fs_type, flags, rdt_root,
1128 RDTGROUP_SUPER_MAGIC, NULL);
1129 if (IS_ERR(dentry))
1130 goto out_mondata;
1131
1132 if (rdt_alloc_capable)
1133 static_branch_enable(&rdt_alloc_enable_key);
1134 if (rdt_mon_capable)
1135 static_branch_enable(&rdt_mon_enable_key);
1136
1137 if (rdt_alloc_capable || rdt_mon_capable)
1138 static_branch_enable(&rdt_enable_key);
1139
1140 if (is_mbm_enabled()) {
1141 r = &rdt_resources_all[RDT_RESOURCE_L3];
1142 list_for_each_entry(dom, &r->domains, list)
1143 mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
1144 }
1145
1146 goto out;
1147
1148 out_mondata:
1149 if (rdt_mon_capable)
1150 kernfs_remove(kn_mondata);
1151 out_mongrp:
1152 if (rdt_mon_capable)
1153 kernfs_remove(kn_mongrp);
1154 out_info:
1155 kernfs_remove(kn_info);
1156 out_cdp:
1157 cdp_disable();
1158 out:
1159 mutex_unlock(&rdtgroup_mutex);
1160
1161 return dentry;
1162 }
1163
1164 static int reset_all_ctrls(struct rdt_resource *r)
1165 {
1166 struct msr_param msr_param;
1167 cpumask_var_t cpu_mask;
1168 struct rdt_domain *d;
1169 int i, cpu;
1170
1171 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
1172 return -ENOMEM;
1173
1174 msr_param.res = r;
1175 msr_param.low = 0;
1176 msr_param.high = r->num_closid;
1177
1178 /*
1179 * Disable resource control for this resource by setting all
1180 * CBMs in all domains to the maximum mask value. Pick one CPU
1181 * from each domain to update the MSRs below.
1182 */
1183 list_for_each_entry(d, &r->domains, list) {
1184 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
1185
1186 for (i = 0; i < r->num_closid; i++)
1187 d->ctrl_val[i] = r->default_ctrl;
1188 }
1189 cpu = get_cpu();
1190 /* Update CBM on this cpu if it's in cpu_mask. */
1191 if (cpumask_test_cpu(cpu, cpu_mask))
1192 rdt_ctrl_update(&msr_param);
1193 /* Update CBM on all other cpus in cpu_mask. */
1194 smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
1195 put_cpu();
1196
1197 free_cpumask_var(cpu_mask);
1198
1199 return 0;
1200 }
1201
1202 static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
1203 {
1204 return (rdt_alloc_capable &&
1205 (r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
1206 }
1207
1208 static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
1209 {
1210 return (rdt_mon_capable &&
1211 (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
1212 }
1213
1214 /*
1215 * Move tasks from one to the other group. If @from is NULL, then all tasks
1216 * in the systems are moved unconditionally (used for teardown).
1217 *
1218 * If @mask is not NULL the cpus on which moved tasks are running are set
1219 * in that mask so the update smp function call is restricted to affected
1220 * cpus.
1221 */
1222 static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
1223 struct cpumask *mask)
1224 {
1225 struct task_struct *p, *t;
1226
1227 read_lock(&tasklist_lock);
1228 for_each_process_thread(p, t) {
1229 if (!from || is_closid_match(t, from) ||
1230 is_rmid_match(t, from)) {
1231 t->closid = to->closid;
1232 t->rmid = to->mon.rmid;
1233
1234 #ifdef CONFIG_SMP
1235 /*
1236 * This is safe on x86 w/o barriers as the ordering
1237 * of writing to task_cpu() and t->on_cpu is
1238 * reverse to the reading here. The detection is
1239 * inaccurate as tasks might move or schedule
1240 * before the smp function call takes place. In
1241 * such a case the function call is pointless, but
1242 * there is no other side effect.
1243 */
1244 if (mask && t->on_cpu)
1245 cpumask_set_cpu(task_cpu(t), mask);
1246 #endif
1247 }
1248 }
1249 read_unlock(&tasklist_lock);
1250 }
1251
1252 static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
1253 {
1254 struct rdtgroup *sentry, *stmp;
1255 struct list_head *head;
1256
1257 head = &rdtgrp->mon.crdtgrp_list;
1258 list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
1259 free_rmid(sentry->mon.rmid);
1260 list_del(&sentry->mon.crdtgrp_list);
1261 kfree(sentry);
1262 }
1263 }
1264
1265 /*
1266 * Forcibly remove all of subdirectories under root.
1267 */
1268 static void rmdir_all_sub(void)
1269 {
1270 struct rdtgroup *rdtgrp, *tmp;
1271
1272 /* Move all tasks to the default resource group */
1273 rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
1274
1275 list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
1276 /* Free any child rmids */
1277 free_all_child_rdtgrp(rdtgrp);
1278
1279 /* Remove each rdtgroup other than root */
1280 if (rdtgrp == &rdtgroup_default)
1281 continue;
1282
1283 /*
1284 * Give any CPUs back to the default group. We cannot copy
1285 * cpu_online_mask because a CPU might have executed the
1286 * offline callback already, but is still marked online.
1287 */
1288 cpumask_or(&rdtgroup_default.cpu_mask,
1289 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
1290
1291 free_rmid(rdtgrp->mon.rmid);
1292
1293 kernfs_remove(rdtgrp->kn);
1294 list_del(&rdtgrp->rdtgroup_list);
1295 kfree(rdtgrp);
1296 }
1297 /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
1298 get_online_cpus();
1299 update_closid_rmid(cpu_online_mask, &rdtgroup_default);
1300 put_online_cpus();
1301
1302 kernfs_remove(kn_info);
1303 kernfs_remove(kn_mongrp);
1304 kernfs_remove(kn_mondata);
1305 }
1306
1307 static void rdt_kill_sb(struct super_block *sb)
1308 {
1309 struct rdt_resource *r;
1310
1311 mutex_lock(&rdtgroup_mutex);
1312
1313 /*Put everything back to default values. */
1314 for_each_alloc_enabled_rdt_resource(r)
1315 reset_all_ctrls(r);
1316 cdp_disable();
1317 rmdir_all_sub();
1318 static_branch_disable(&rdt_alloc_enable_key);
1319 static_branch_disable(&rdt_mon_enable_key);
1320 static_branch_disable(&rdt_enable_key);
1321 kernfs_kill_sb(sb);
1322 mutex_unlock(&rdtgroup_mutex);
1323 }
1324
1325 static struct file_system_type rdt_fs_type = {
1326 .name = "resctrl",
1327 .mount = rdt_mount,
1328 .kill_sb = rdt_kill_sb,
1329 };
1330
1331 static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
1332 void *priv)
1333 {
1334 struct kernfs_node *kn;
1335 int ret = 0;
1336
1337 kn = __kernfs_create_file(parent_kn, name, 0444, 0,
1338 &kf_mondata_ops, priv, NULL, NULL);
1339 if (IS_ERR(kn))
1340 return PTR_ERR(kn);
1341
1342 ret = rdtgroup_kn_set_ugid(kn);
1343 if (ret) {
1344 kernfs_remove(kn);
1345 return ret;
1346 }
1347
1348 return ret;
1349 }
1350
1351 /*
1352 * Remove all subdirectories of mon_data of ctrl_mon groups
1353 * and monitor groups with given domain id.
1354 */
1355 void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id)
1356 {
1357 struct rdtgroup *prgrp, *crgrp;
1358 char name[32];
1359
1360 if (!r->mon_enabled)
1361 return;
1362
1363 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
1364 sprintf(name, "mon_%s_%02d", r->name, dom_id);
1365 kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
1366
1367 list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
1368 kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
1369 }
1370 }
1371
1372 static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
1373 struct rdt_domain *d,
1374 struct rdt_resource *r, struct rdtgroup *prgrp)
1375 {
1376 union mon_data_bits priv;
1377 struct kernfs_node *kn;
1378 struct mon_evt *mevt;
1379 struct rmid_read rr;
1380 char name[32];
1381 int ret;
1382
1383 sprintf(name, "mon_%s_%02d", r->name, d->id);
1384 /* create the directory */
1385 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
1386 if (IS_ERR(kn))
1387 return PTR_ERR(kn);
1388
1389 /*
1390 * This extra ref will be put in kernfs_remove() and guarantees
1391 * that kn is always accessible.
1392 */
1393 kernfs_get(kn);
1394 ret = rdtgroup_kn_set_ugid(kn);
1395 if (ret)
1396 goto out_destroy;
1397
1398 if (WARN_ON(list_empty(&r->evt_list))) {
1399 ret = -EPERM;
1400 goto out_destroy;
1401 }
1402
1403 priv.u.rid = r->rid;
1404 priv.u.domid = d->id;
1405 list_for_each_entry(mevt, &r->evt_list, list) {
1406 priv.u.evtid = mevt->evtid;
1407 ret = mon_addfile(kn, mevt->name, priv.priv);
1408 if (ret)
1409 goto out_destroy;
1410
1411 if (is_mbm_event(mevt->evtid))
1412 mon_event_read(&rr, d, prgrp, mevt->evtid, true);
1413 }
1414 kernfs_activate(kn);
1415 return 0;
1416
1417 out_destroy:
1418 kernfs_remove(kn);
1419 return ret;
1420 }
1421
1422 /*
1423 * Add all subdirectories of mon_data for "ctrl_mon" groups
1424 * and "monitor" groups with given domain id.
1425 */
1426 void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
1427 struct rdt_domain *d)
1428 {
1429 struct kernfs_node *parent_kn;
1430 struct rdtgroup *prgrp, *crgrp;
1431 struct list_head *head;
1432
1433 if (!r->mon_enabled)
1434 return;
1435
1436 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
1437 parent_kn = prgrp->mon.mon_data_kn;
1438 mkdir_mondata_subdir(parent_kn, d, r, prgrp);
1439
1440 head = &prgrp->mon.crdtgrp_list;
1441 list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
1442 parent_kn = crgrp->mon.mon_data_kn;
1443 mkdir_mondata_subdir(parent_kn, d, r, crgrp);
1444 }
1445 }
1446 }
1447
1448 static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
1449 struct rdt_resource *r,
1450 struct rdtgroup *prgrp)
1451 {
1452 struct rdt_domain *dom;
1453 int ret;
1454
1455 list_for_each_entry(dom, &r->domains, list) {
1456 ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
1457 if (ret)
1458 return ret;
1459 }
1460
1461 return 0;
1462 }
1463
1464 /*
1465 * This creates a directory mon_data which contains the monitored data.
1466 *
1467 * mon_data has one directory for each domain whic are named
1468 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
1469 * with L3 domain looks as below:
1470 * ./mon_data:
1471 * mon_L3_00
1472 * mon_L3_01
1473 * mon_L3_02
1474 * ...
1475 *
1476 * Each domain directory has one file per event:
1477 * ./mon_L3_00/:
1478 * llc_occupancy
1479 *
1480 */
1481 static int mkdir_mondata_all(struct kernfs_node *parent_kn,
1482 struct rdtgroup *prgrp,
1483 struct kernfs_node **dest_kn)
1484 {
1485 struct rdt_resource *r;
1486 struct kernfs_node *kn;
1487 int ret;
1488
1489 /*
1490 * Create the mon_data directory first.
1491 */
1492 ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
1493 if (ret)
1494 return ret;
1495
1496 if (dest_kn)
1497 *dest_kn = kn;
1498
1499 /*
1500 * Create the subdirectories for each domain. Note that all events
1501 * in a domain like L3 are grouped into a resource whose domain is L3
1502 */
1503 for_each_mon_enabled_rdt_resource(r) {
1504 ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
1505 if (ret)
1506 goto out_destroy;
1507 }
1508
1509 return 0;
1510
1511 out_destroy:
1512 kernfs_remove(kn);
1513 return ret;
1514 }
1515
1516 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
1517 struct kernfs_node *prgrp_kn,
1518 const char *name, umode_t mode,
1519 enum rdt_group_type rtype, struct rdtgroup **r)
1520 {
1521 struct rdtgroup *prdtgrp, *rdtgrp;
1522 struct kernfs_node *kn;
1523 uint files = 0;
1524 int ret;
1525
1526 prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
1527 if (!prdtgrp) {
1528 ret = -ENODEV;
1529 goto out_unlock;
1530 }
1531
1532 /* allocate the rdtgroup. */
1533 rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
1534 if (!rdtgrp) {
1535 ret = -ENOSPC;
1536 goto out_unlock;
1537 }
1538 *r = rdtgrp;
1539 rdtgrp->mon.parent = prdtgrp;
1540 rdtgrp->type = rtype;
1541 INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
1542
1543 /* kernfs creates the directory for rdtgrp */
1544 kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
1545 if (IS_ERR(kn)) {
1546 ret = PTR_ERR(kn);
1547 goto out_free_rgrp;
1548 }
1549 rdtgrp->kn = kn;
1550
1551 /*
1552 * kernfs_remove() will drop the reference count on "kn" which
1553 * will free it. But we still need it to stick around for the
1554 * rdtgroup_kn_unlock(kn} call below. Take one extra reference
1555 * here, which will be dropped inside rdtgroup_kn_unlock().
1556 */
1557 kernfs_get(kn);
1558
1559 ret = rdtgroup_kn_set_ugid(kn);
1560 if (ret)
1561 goto out_destroy;
1562
1563 files = RFTYPE_BASE | RFTYPE_CTRL;
1564 files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
1565 ret = rdtgroup_add_files(kn, files);
1566 if (ret)
1567 goto out_destroy;
1568
1569 if (rdt_mon_capable) {
1570 ret = alloc_rmid();
1571 if (ret < 0)
1572 goto out_destroy;
1573 rdtgrp->mon.rmid = ret;
1574
1575 ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
1576 if (ret)
1577 goto out_idfree;
1578 }
1579 kernfs_activate(kn);
1580
1581 /*
1582 * The caller unlocks the prgrp_kn upon success.
1583 */
1584 return 0;
1585
1586 out_idfree:
1587 free_rmid(rdtgrp->mon.rmid);
1588 out_destroy:
1589 kernfs_remove(rdtgrp->kn);
1590 out_free_rgrp:
1591 kfree(rdtgrp);
1592 out_unlock:
1593 rdtgroup_kn_unlock(prgrp_kn);
1594 return ret;
1595 }
1596
1597 static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
1598 {
1599 kernfs_remove(rgrp->kn);
1600 free_rmid(rgrp->mon.rmid);
1601 kfree(rgrp);
1602 }
1603
1604 /*
1605 * Create a monitor group under "mon_groups" directory of a control
1606 * and monitor group(ctrl_mon). This is a resource group
1607 * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
1608 */
1609 static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
1610 struct kernfs_node *prgrp_kn,
1611 const char *name,
1612 umode_t mode)
1613 {
1614 struct rdtgroup *rdtgrp, *prgrp;
1615 int ret;
1616
1617 ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP,
1618 &rdtgrp);
1619 if (ret)
1620 return ret;
1621
1622 prgrp = rdtgrp->mon.parent;
1623 rdtgrp->closid = prgrp->closid;
1624
1625 /*
1626 * Add the rdtgrp to the list of rdtgrps the parent
1627 * ctrl_mon group has to track.
1628 */
1629 list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
1630
1631 rdtgroup_kn_unlock(prgrp_kn);
1632 return ret;
1633 }
1634
1635 /*
1636 * These are rdtgroups created under the root directory. Can be used
1637 * to allocate and monitor resources.
1638 */
1639 static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
1640 struct kernfs_node *prgrp_kn,
1641 const char *name, umode_t mode)
1642 {
1643 struct rdtgroup *rdtgrp;
1644 struct kernfs_node *kn;
1645 u32 closid;
1646 int ret;
1647
1648 ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP,
1649 &rdtgrp);
1650 if (ret)
1651 return ret;
1652
1653 kn = rdtgrp->kn;
1654 ret = closid_alloc();
1655 if (ret < 0)
1656 goto out_common_fail;
1657 closid = ret;
1658
1659 rdtgrp->closid = closid;
1660 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
1661
1662 if (rdt_mon_capable) {
1663 /*
1664 * Create an empty mon_groups directory to hold the subset
1665 * of tasks and cpus to monitor.
1666 */
1667 ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
1668 if (ret)
1669 goto out_id_free;
1670 }
1671
1672 goto out_unlock;
1673
1674 out_id_free:
1675 closid_free(closid);
1676 list_del(&rdtgrp->rdtgroup_list);
1677 out_common_fail:
1678 mkdir_rdt_prepare_clean(rdtgrp);
1679 out_unlock:
1680 rdtgroup_kn_unlock(prgrp_kn);
1681 return ret;
1682 }
1683
1684 /*
1685 * We allow creating mon groups only with in a directory called "mon_groups"
1686 * which is present in every ctrl_mon group. Check if this is a valid
1687 * "mon_groups" directory.
1688 *
1689 * 1. The directory should be named "mon_groups".
1690 * 2. The mon group itself should "not" be named "mon_groups".
1691 * This makes sure "mon_groups" directory always has a ctrl_mon group
1692 * as parent.
1693 */
1694 static bool is_mon_groups(struct kernfs_node *kn, const char *name)
1695 {
1696 return (!strcmp(kn->name, "mon_groups") &&
1697 strcmp(name, "mon_groups"));
1698 }
1699
1700 static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
1701 umode_t mode)
1702 {
1703 /* Do not accept '\n' to avoid unparsable situation. */
1704 if (strchr(name, '\n'))
1705 return -EINVAL;
1706
1707 /*
1708 * If the parent directory is the root directory and RDT
1709 * allocation is supported, add a control and monitoring
1710 * subdirectory
1711 */
1712 if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
1713 return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode);
1714
1715 /*
1716 * If RDT monitoring is supported and the parent directory is a valid
1717 * "mon_groups" directory, add a monitoring subdirectory.
1718 */
1719 if (rdt_mon_capable && is_mon_groups(parent_kn, name))
1720 return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode);
1721
1722 return -EPERM;
1723 }
1724
1725 static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
1726 cpumask_var_t tmpmask)
1727 {
1728 struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
1729 int cpu;
1730
1731 /* Give any tasks back to the parent group */
1732 rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
1733
1734 /* Update per cpu rmid of the moved CPUs first */
1735 for_each_cpu(cpu, &rdtgrp->cpu_mask)
1736 per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
1737 /*
1738 * Update the MSR on moved CPUs and CPUs which have moved
1739 * task running on them.
1740 */
1741 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
1742 update_closid_rmid(tmpmask, NULL);
1743
1744 rdtgrp->flags = RDT_DELETED;
1745 free_rmid(rdtgrp->mon.rmid);
1746
1747 /*
1748 * Remove the rdtgrp from the parent ctrl_mon group's list
1749 */
1750 WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
1751 list_del(&rdtgrp->mon.crdtgrp_list);
1752
1753 /*
1754 * one extra hold on this, will drop when we kfree(rdtgrp)
1755 * in rdtgroup_kn_unlock()
1756 */
1757 kernfs_get(kn);
1758 kernfs_remove(rdtgrp->kn);
1759
1760 return 0;
1761 }
1762
1763 static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
1764 cpumask_var_t tmpmask)
1765 {
1766 int cpu;
1767
1768 /* Give any tasks back to the default group */
1769 rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
1770
1771 /* Give any CPUs back to the default group */
1772 cpumask_or(&rdtgroup_default.cpu_mask,
1773 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
1774
1775 /* Update per cpu closid and rmid of the moved CPUs first */
1776 for_each_cpu(cpu, &rdtgrp->cpu_mask) {
1777 per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
1778 per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
1779 }
1780
1781 /*
1782 * Update the MSR on moved CPUs and CPUs which have moved
1783 * task running on them.
1784 */
1785 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
1786 update_closid_rmid(tmpmask, NULL);
1787
1788 rdtgrp->flags = RDT_DELETED;
1789 closid_free(rdtgrp->closid);
1790 free_rmid(rdtgrp->mon.rmid);
1791
1792 /*
1793 * Free all the child monitor group rmids.
1794 */
1795 free_all_child_rdtgrp(rdtgrp);
1796
1797 list_del(&rdtgrp->rdtgroup_list);
1798
1799 /*
1800 * one extra hold on this, will drop when we kfree(rdtgrp)
1801 * in rdtgroup_kn_unlock()
1802 */
1803 kernfs_get(kn);
1804 kernfs_remove(rdtgrp->kn);
1805
1806 return 0;
1807 }
1808
1809 static int rdtgroup_rmdir(struct kernfs_node *kn)
1810 {
1811 struct kernfs_node *parent_kn = kn->parent;
1812 struct rdtgroup *rdtgrp;
1813 cpumask_var_t tmpmask;
1814 int ret = 0;
1815
1816 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
1817 return -ENOMEM;
1818
1819 rdtgrp = rdtgroup_kn_lock_live(kn);
1820 if (!rdtgrp) {
1821 ret = -EPERM;
1822 goto out;
1823 }
1824
1825 /*
1826 * If the rdtgroup is a ctrl_mon group and parent directory
1827 * is the root directory, remove the ctrl_mon group.
1828 *
1829 * If the rdtgroup is a mon group and parent directory
1830 * is a valid "mon_groups" directory, remove the mon group.
1831 */
1832 if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn)
1833 ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
1834 else if (rdtgrp->type == RDTMON_GROUP &&
1835 is_mon_groups(parent_kn, kn->name))
1836 ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask);
1837 else
1838 ret = -EPERM;
1839
1840 out:
1841 rdtgroup_kn_unlock(kn);
1842 free_cpumask_var(tmpmask);
1843 return ret;
1844 }
1845
1846 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
1847 {
1848 if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
1849 seq_puts(seq, ",cdp");
1850 return 0;
1851 }
1852
1853 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
1854 .mkdir = rdtgroup_mkdir,
1855 .rmdir = rdtgroup_rmdir,
1856 .show_options = rdtgroup_show_options,
1857 };
1858
1859 static int __init rdtgroup_setup_root(void)
1860 {
1861 int ret;
1862
1863 rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
1864 KERNFS_ROOT_CREATE_DEACTIVATED,
1865 &rdtgroup_default);
1866 if (IS_ERR(rdt_root))
1867 return PTR_ERR(rdt_root);
1868
1869 mutex_lock(&rdtgroup_mutex);
1870
1871 rdtgroup_default.closid = 0;
1872 rdtgroup_default.mon.rmid = 0;
1873 rdtgroup_default.type = RDTCTRL_GROUP;
1874 INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
1875
1876 list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
1877
1878 ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE);
1879 if (ret) {
1880 kernfs_destroy_root(rdt_root);
1881 goto out;
1882 }
1883
1884 rdtgroup_default.kn = rdt_root->kn;
1885 kernfs_activate(rdtgroup_default.kn);
1886
1887 out:
1888 mutex_unlock(&rdtgroup_mutex);
1889
1890 return ret;
1891 }
1892
1893 /*
1894 * rdtgroup_init - rdtgroup initialization
1895 *
1896 * Setup resctrl file system including set up root, create mount point,
1897 * register rdtgroup filesystem, and initialize files under root directory.
1898 *
1899 * Return: 0 on success or -errno
1900 */
1901 int __init rdtgroup_init(void)
1902 {
1903 int ret = 0;
1904
1905 ret = rdtgroup_setup_root();
1906 if (ret)
1907 return ret;
1908
1909 ret = sysfs_create_mount_point(fs_kobj, "resctrl");
1910 if (ret)
1911 goto cleanup_root;
1912
1913 ret = register_filesystem(&rdt_fs_type);
1914 if (ret)
1915 goto cleanup_mountpoint;
1916
1917 return 0;
1918
1919 cleanup_mountpoint:
1920 sysfs_remove_mount_point(fs_kobj, "resctrl");
1921 cleanup_root:
1922 kernfs_destroy_root(rdt_root);
1923
1924 return ret;
1925 }