2 * User interface for Resource Alloction in Resource Director Technology(RDT)
4 * Copyright (C) 2016 Intel Corporation
6 * Author: Fenghua Yu <fenghua.yu@intel.com>
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * More information about RDT be found in the Intel (R) x86 Architecture
18 * Software Developer Manual.
21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
23 #include <linux/cpu.h>
25 #include <linux/sysfs.h>
26 #include <linux/kernfs.h>
27 #include <linux/seq_file.h>
28 #include <linux/sched/signal.h>
29 #include <linux/sched/task.h>
30 #include <linux/slab.h>
31 #include <linux/task_work.h>
33 #include <uapi/linux/magic.h>
35 #include <asm/intel_rdt_sched.h>
36 #include "intel_rdt.h"
38 DEFINE_STATIC_KEY_FALSE(rdt_enable_key
);
39 DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key
);
40 DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key
);
41 static struct kernfs_root
*rdt_root
;
42 struct rdtgroup rdtgroup_default
;
43 LIST_HEAD(rdt_all_groups
);
45 /* Kernel fs node for "info" directory under root */
46 static struct kernfs_node
*kn_info
;
48 /* Kernel fs node for "mon_groups" directory under root */
49 static struct kernfs_node
*kn_mongrp
;
51 /* Kernel fs node for "mon_data" directory under root */
52 static struct kernfs_node
*kn_mondata
;
55 * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
56 * we can keep a bitmap of free CLOSIDs in a single integer.
58 * Using a global CLOSID across all resources has some advantages and
60 * + We can simply set "current->closid" to assign a task to a resource
62 * + Context switch code can avoid extra memory references deciding which
63 * CLOSID to load into the PQR_ASSOC MSR
64 * - We give up some options in configuring resource groups across multi-socket
66 * - Our choices on how to configure each resource become progressively more
67 * limited as the number of resources grows.
69 static int closid_free_map
;
71 static void closid_init(void)
73 struct rdt_resource
*r
;
74 int rdt_min_closid
= 32;
76 /* Compute rdt_min_closid across all resources */
77 for_each_alloc_enabled_rdt_resource(r
)
78 rdt_min_closid
= min(rdt_min_closid
, r
->num_closid
);
80 closid_free_map
= BIT_MASK(rdt_min_closid
) - 1;
82 /* CLOSID 0 is always reserved for the default group */
83 closid_free_map
&= ~1;
86 static int closid_alloc(void)
88 u32 closid
= ffs(closid_free_map
);
93 closid_free_map
&= ~(1 << closid
);
98 static void closid_free(int closid
)
100 closid_free_map
|= 1 << closid
;
103 /* set uid and gid of rdtgroup dirs and files to that of the creator */
104 static int rdtgroup_kn_set_ugid(struct kernfs_node
*kn
)
106 struct iattr iattr
= { .ia_valid
= ATTR_UID
| ATTR_GID
,
107 .ia_uid
= current_fsuid(),
108 .ia_gid
= current_fsgid(), };
110 if (uid_eq(iattr
.ia_uid
, GLOBAL_ROOT_UID
) &&
111 gid_eq(iattr
.ia_gid
, GLOBAL_ROOT_GID
))
114 return kernfs_setattr(kn
, &iattr
);
117 static int rdtgroup_add_file(struct kernfs_node
*parent_kn
, struct rftype
*rft
)
119 struct kernfs_node
*kn
;
122 kn
= __kernfs_create_file(parent_kn
, rft
->name
, rft
->mode
,
123 0, rft
->kf_ops
, rft
, NULL
, NULL
);
127 ret
= rdtgroup_kn_set_ugid(kn
);
136 static int rdtgroup_seqfile_show(struct seq_file
*m
, void *arg
)
138 struct kernfs_open_file
*of
= m
->private;
139 struct rftype
*rft
= of
->kn
->priv
;
142 return rft
->seq_show(of
, m
, arg
);
146 static ssize_t
rdtgroup_file_write(struct kernfs_open_file
*of
, char *buf
,
147 size_t nbytes
, loff_t off
)
149 struct rftype
*rft
= of
->kn
->priv
;
152 return rft
->write(of
, buf
, nbytes
, off
);
157 static struct kernfs_ops rdtgroup_kf_single_ops
= {
158 .atomic_write_len
= PAGE_SIZE
,
159 .write
= rdtgroup_file_write
,
160 .seq_show
= rdtgroup_seqfile_show
,
163 static struct kernfs_ops kf_mondata_ops
= {
164 .atomic_write_len
= PAGE_SIZE
,
165 .seq_show
= rdtgroup_mondata_show
,
168 static bool is_cpu_list(struct kernfs_open_file
*of
)
170 struct rftype
*rft
= of
->kn
->priv
;
172 return rft
->flags
& RFTYPE_FLAGS_CPUS_LIST
;
175 static int rdtgroup_cpus_show(struct kernfs_open_file
*of
,
176 struct seq_file
*s
, void *v
)
178 struct rdtgroup
*rdtgrp
;
181 rdtgrp
= rdtgroup_kn_lock_live(of
->kn
);
184 seq_printf(s
, is_cpu_list(of
) ? "%*pbl\n" : "%*pb\n",
185 cpumask_pr_args(&rdtgrp
->cpu_mask
));
189 rdtgroup_kn_unlock(of
->kn
);
195 * This is safe against intel_rdt_sched_in() called from __switch_to()
196 * because __switch_to() is executed with interrupts disabled. A local call
197 * from update_closid_rmid() is proteced against __switch_to() because
198 * preemption is disabled.
200 static void update_cpu_closid_rmid(void *info
)
202 struct rdtgroup
*r
= info
;
205 this_cpu_write(pqr_state
.default_closid
, r
->closid
);
206 this_cpu_write(pqr_state
.default_rmid
, r
->mon
.rmid
);
210 * We cannot unconditionally write the MSR because the current
211 * executing task might have its own closid selected. Just reuse
212 * the context switch code.
214 intel_rdt_sched_in();
218 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
220 * Per task closids/rmids must have been set up before calling this function.
223 update_closid_rmid(const struct cpumask
*cpu_mask
, struct rdtgroup
*r
)
227 if (cpumask_test_cpu(cpu
, cpu_mask
))
228 update_cpu_closid_rmid(r
);
229 smp_call_function_many(cpu_mask
, update_cpu_closid_rmid
, r
, 1);
233 static int cpus_mon_write(struct rdtgroup
*rdtgrp
, cpumask_var_t newmask
,
234 cpumask_var_t tmpmask
)
236 struct rdtgroup
*prgrp
= rdtgrp
->mon
.parent
, *crgrp
;
237 struct list_head
*head
;
239 /* Check whether cpus belong to parent ctrl group */
240 cpumask_andnot(tmpmask
, newmask
, &prgrp
->cpu_mask
);
241 if (cpumask_weight(tmpmask
))
244 /* Check whether cpus are dropped from this group */
245 cpumask_andnot(tmpmask
, &rdtgrp
->cpu_mask
, newmask
);
246 if (cpumask_weight(tmpmask
)) {
247 /* Give any dropped cpus to parent rdtgroup */
248 cpumask_or(&prgrp
->cpu_mask
, &prgrp
->cpu_mask
, tmpmask
);
249 update_closid_rmid(tmpmask
, prgrp
);
253 * If we added cpus, remove them from previous group that owned them
254 * and update per-cpu rmid
256 cpumask_andnot(tmpmask
, newmask
, &rdtgrp
->cpu_mask
);
257 if (cpumask_weight(tmpmask
)) {
258 head
= &prgrp
->mon
.crdtgrp_list
;
259 list_for_each_entry(crgrp
, head
, mon
.crdtgrp_list
) {
262 cpumask_andnot(&crgrp
->cpu_mask
, &crgrp
->cpu_mask
,
265 update_closid_rmid(tmpmask
, rdtgrp
);
268 /* Done pushing/pulling - update this group with new mask */
269 cpumask_copy(&rdtgrp
->cpu_mask
, newmask
);
274 static void cpumask_rdtgrp_clear(struct rdtgroup
*r
, struct cpumask
*m
)
276 struct rdtgroup
*crgrp
;
278 cpumask_andnot(&r
->cpu_mask
, &r
->cpu_mask
, m
);
279 /* update the child mon group masks as well*/
280 list_for_each_entry(crgrp
, &r
->mon
.crdtgrp_list
, mon
.crdtgrp_list
)
281 cpumask_and(&crgrp
->cpu_mask
, &r
->cpu_mask
, &crgrp
->cpu_mask
);
284 static int cpus_ctrl_write(struct rdtgroup
*rdtgrp
, cpumask_var_t newmask
,
285 cpumask_var_t tmpmask
, cpumask_var_t tmpmask1
)
287 struct rdtgroup
*r
, *crgrp
;
288 struct list_head
*head
;
290 /* Check whether cpus are dropped from this group */
291 cpumask_andnot(tmpmask
, &rdtgrp
->cpu_mask
, newmask
);
292 if (cpumask_weight(tmpmask
)) {
293 /* Can't drop from default group */
294 if (rdtgrp
== &rdtgroup_default
)
297 /* Give any dropped cpus to rdtgroup_default */
298 cpumask_or(&rdtgroup_default
.cpu_mask
,
299 &rdtgroup_default
.cpu_mask
, tmpmask
);
300 update_closid_rmid(tmpmask
, &rdtgroup_default
);
304 * If we added cpus, remove them from previous group and
305 * the prev group's child groups that owned them
306 * and update per-cpu closid/rmid.
308 cpumask_andnot(tmpmask
, newmask
, &rdtgrp
->cpu_mask
);
309 if (cpumask_weight(tmpmask
)) {
310 list_for_each_entry(r
, &rdt_all_groups
, rdtgroup_list
) {
313 cpumask_and(tmpmask1
, &r
->cpu_mask
, tmpmask
);
314 if (cpumask_weight(tmpmask1
))
315 cpumask_rdtgrp_clear(r
, tmpmask1
);
317 update_closid_rmid(tmpmask
, rdtgrp
);
320 /* Done pushing/pulling - update this group with new mask */
321 cpumask_copy(&rdtgrp
->cpu_mask
, newmask
);
324 * Clear child mon group masks since there is a new parent mask
325 * now and update the rmid for the cpus the child lost.
327 head
= &rdtgrp
->mon
.crdtgrp_list
;
328 list_for_each_entry(crgrp
, head
, mon
.crdtgrp_list
) {
329 cpumask_and(tmpmask
, &rdtgrp
->cpu_mask
, &crgrp
->cpu_mask
);
330 update_closid_rmid(tmpmask
, rdtgrp
);
331 cpumask_clear(&crgrp
->cpu_mask
);
337 static ssize_t
rdtgroup_cpus_write(struct kernfs_open_file
*of
,
338 char *buf
, size_t nbytes
, loff_t off
)
340 cpumask_var_t tmpmask
, newmask
, tmpmask1
;
341 struct rdtgroup
*rdtgrp
;
347 if (!zalloc_cpumask_var(&tmpmask
, GFP_KERNEL
))
349 if (!zalloc_cpumask_var(&newmask
, GFP_KERNEL
)) {
350 free_cpumask_var(tmpmask
);
353 if (!zalloc_cpumask_var(&tmpmask1
, GFP_KERNEL
)) {
354 free_cpumask_var(tmpmask
);
355 free_cpumask_var(newmask
);
359 rdtgrp
= rdtgroup_kn_lock_live(of
->kn
);
366 ret
= cpulist_parse(buf
, newmask
);
368 ret
= cpumask_parse(buf
, newmask
);
373 /* check that user didn't specify any offline cpus */
374 cpumask_andnot(tmpmask
, newmask
, cpu_online_mask
);
375 if (cpumask_weight(tmpmask
)) {
380 if (rdtgrp
->type
== RDTCTRL_GROUP
)
381 ret
= cpus_ctrl_write(rdtgrp
, newmask
, tmpmask
, tmpmask1
);
382 else if (rdtgrp
->type
== RDTMON_GROUP
)
383 ret
= cpus_mon_write(rdtgrp
, newmask
, tmpmask
);
388 rdtgroup_kn_unlock(of
->kn
);
389 free_cpumask_var(tmpmask
);
390 free_cpumask_var(newmask
);
391 free_cpumask_var(tmpmask1
);
393 return ret
?: nbytes
;
396 struct task_move_callback
{
397 struct callback_head work
;
398 struct rdtgroup
*rdtgrp
;
401 static void move_myself(struct callback_head
*head
)
403 struct task_move_callback
*callback
;
404 struct rdtgroup
*rdtgrp
;
406 callback
= container_of(head
, struct task_move_callback
, work
);
407 rdtgrp
= callback
->rdtgrp
;
410 * If resource group was deleted before this task work callback
411 * was invoked, then assign the task to root group and free the
414 if (atomic_dec_and_test(&rdtgrp
->waitcount
) &&
415 (rdtgrp
->flags
& RDT_DELETED
)) {
422 /* update PQR_ASSOC MSR to make resource group go into effect */
423 intel_rdt_sched_in();
429 static int __rdtgroup_move_task(struct task_struct
*tsk
,
430 struct rdtgroup
*rdtgrp
)
432 struct task_move_callback
*callback
;
435 callback
= kzalloc(sizeof(*callback
), GFP_KERNEL
);
438 callback
->work
.func
= move_myself
;
439 callback
->rdtgrp
= rdtgrp
;
442 * Take a refcount, so rdtgrp cannot be freed before the
443 * callback has been invoked.
445 atomic_inc(&rdtgrp
->waitcount
);
446 ret
= task_work_add(tsk
, &callback
->work
, true);
449 * Task is exiting. Drop the refcount and free the callback.
450 * No need to check the refcount as the group cannot be
451 * deleted before the write function unlocks rdtgroup_mutex.
453 atomic_dec(&rdtgrp
->waitcount
);
457 * For ctrl_mon groups move both closid and rmid.
458 * For monitor groups, can move the tasks only from
459 * their parent CTRL group.
461 if (rdtgrp
->type
== RDTCTRL_GROUP
) {
462 tsk
->closid
= rdtgrp
->closid
;
463 tsk
->rmid
= rdtgrp
->mon
.rmid
;
464 } else if (rdtgrp
->type
== RDTMON_GROUP
) {
465 if (rdtgrp
->mon
.parent
->closid
== tsk
->closid
)
466 tsk
->rmid
= rdtgrp
->mon
.rmid
;
474 static int rdtgroup_task_write_permission(struct task_struct
*task
,
475 struct kernfs_open_file
*of
)
477 const struct cred
*tcred
= get_task_cred(task
);
478 const struct cred
*cred
= current_cred();
482 * Even if we're attaching all tasks in the thread group, we only
483 * need to check permissions on one of them.
485 if (!uid_eq(cred
->euid
, GLOBAL_ROOT_UID
) &&
486 !uid_eq(cred
->euid
, tcred
->uid
) &&
487 !uid_eq(cred
->euid
, tcred
->suid
))
494 static int rdtgroup_move_task(pid_t pid
, struct rdtgroup
*rdtgrp
,
495 struct kernfs_open_file
*of
)
497 struct task_struct
*tsk
;
502 tsk
= find_task_by_vpid(pid
);
511 get_task_struct(tsk
);
514 ret
= rdtgroup_task_write_permission(tsk
, of
);
516 ret
= __rdtgroup_move_task(tsk
, rdtgrp
);
518 put_task_struct(tsk
);
522 static ssize_t
rdtgroup_tasks_write(struct kernfs_open_file
*of
,
523 char *buf
, size_t nbytes
, loff_t off
)
525 struct rdtgroup
*rdtgrp
;
529 if (kstrtoint(strstrip(buf
), 0, &pid
) || pid
< 0)
531 rdtgrp
= rdtgroup_kn_lock_live(of
->kn
);
534 ret
= rdtgroup_move_task(pid
, rdtgrp
, of
);
538 rdtgroup_kn_unlock(of
->kn
);
540 return ret
?: nbytes
;
543 static void show_rdt_tasks(struct rdtgroup
*r
, struct seq_file
*s
)
545 struct task_struct
*p
, *t
;
548 for_each_process_thread(p
, t
) {
549 if ((r
->type
== RDTCTRL_GROUP
&& t
->closid
== r
->closid
) ||
550 (r
->type
== RDTMON_GROUP
&& t
->rmid
== r
->mon
.rmid
))
551 seq_printf(s
, "%d\n", t
->pid
);
556 static int rdtgroup_tasks_show(struct kernfs_open_file
*of
,
557 struct seq_file
*s
, void *v
)
559 struct rdtgroup
*rdtgrp
;
562 rdtgrp
= rdtgroup_kn_lock_live(of
->kn
);
564 show_rdt_tasks(rdtgrp
, s
);
567 rdtgroup_kn_unlock(of
->kn
);
572 static int rdt_num_closids_show(struct kernfs_open_file
*of
,
573 struct seq_file
*seq
, void *v
)
575 struct rdt_resource
*r
= of
->kn
->parent
->priv
;
577 seq_printf(seq
, "%d\n", r
->num_closid
);
581 static int rdt_default_ctrl_show(struct kernfs_open_file
*of
,
582 struct seq_file
*seq
, void *v
)
584 struct rdt_resource
*r
= of
->kn
->parent
->priv
;
586 seq_printf(seq
, "%x\n", r
->default_ctrl
);
590 static int rdt_min_cbm_bits_show(struct kernfs_open_file
*of
,
591 struct seq_file
*seq
, void *v
)
593 struct rdt_resource
*r
= of
->kn
->parent
->priv
;
595 seq_printf(seq
, "%u\n", r
->cache
.min_cbm_bits
);
599 static int rdt_shareable_bits_show(struct kernfs_open_file
*of
,
600 struct seq_file
*seq
, void *v
)
602 struct rdt_resource
*r
= of
->kn
->parent
->priv
;
604 seq_printf(seq
, "%x\n", r
->cache
.shareable_bits
);
608 static int rdt_min_bw_show(struct kernfs_open_file
*of
,
609 struct seq_file
*seq
, void *v
)
611 struct rdt_resource
*r
= of
->kn
->parent
->priv
;
613 seq_printf(seq
, "%u\n", r
->membw
.min_bw
);
617 static int rdt_num_rmids_show(struct kernfs_open_file
*of
,
618 struct seq_file
*seq
, void *v
)
620 struct rdt_resource
*r
= of
->kn
->parent
->priv
;
622 seq_printf(seq
, "%d\n", r
->num_rmid
);
627 static int rdt_mon_features_show(struct kernfs_open_file
*of
,
628 struct seq_file
*seq
, void *v
)
630 struct rdt_resource
*r
= of
->kn
->parent
->priv
;
631 struct mon_evt
*mevt
;
633 list_for_each_entry(mevt
, &r
->evt_list
, list
)
634 seq_printf(seq
, "%s\n", mevt
->name
);
639 static int rdt_bw_gran_show(struct kernfs_open_file
*of
,
640 struct seq_file
*seq
, void *v
)
642 struct rdt_resource
*r
= of
->kn
->parent
->priv
;
644 seq_printf(seq
, "%u\n", r
->membw
.bw_gran
);
648 static int rdt_delay_linear_show(struct kernfs_open_file
*of
,
649 struct seq_file
*seq
, void *v
)
651 struct rdt_resource
*r
= of
->kn
->parent
->priv
;
653 seq_printf(seq
, "%u\n", r
->membw
.delay_linear
);
657 static int max_threshold_occ_show(struct kernfs_open_file
*of
,
658 struct seq_file
*seq
, void *v
)
660 struct rdt_resource
*r
= of
->kn
->parent
->priv
;
662 seq_printf(seq
, "%u\n", intel_cqm_threshold
* r
->mon_scale
);
667 static ssize_t
max_threshold_occ_write(struct kernfs_open_file
*of
,
668 char *buf
, size_t nbytes
, loff_t off
)
670 struct rdt_resource
*r
= of
->kn
->parent
->priv
;
674 ret
= kstrtouint(buf
, 0, &bytes
);
678 if (bytes
> (boot_cpu_data
.x86_cache_size
* 1024))
681 intel_cqm_threshold
= bytes
/ r
->mon_scale
;
686 /* rdtgroup information files for one cache resource. */
687 static struct rftype res_common_files
[] = {
689 .name
= "num_closids",
691 .kf_ops
= &rdtgroup_kf_single_ops
,
692 .seq_show
= rdt_num_closids_show
,
693 .fflags
= RF_CTRL_INFO
,
696 .name
= "mon_features",
698 .kf_ops
= &rdtgroup_kf_single_ops
,
699 .seq_show
= rdt_mon_features_show
,
700 .fflags
= RF_MON_INFO
,
705 .kf_ops
= &rdtgroup_kf_single_ops
,
706 .seq_show
= rdt_num_rmids_show
,
707 .fflags
= RF_MON_INFO
,
712 .kf_ops
= &rdtgroup_kf_single_ops
,
713 .seq_show
= rdt_default_ctrl_show
,
714 .fflags
= RF_CTRL_INFO
| RFTYPE_RES_CACHE
,
717 .name
= "min_cbm_bits",
719 .kf_ops
= &rdtgroup_kf_single_ops
,
720 .seq_show
= rdt_min_cbm_bits_show
,
721 .fflags
= RF_CTRL_INFO
| RFTYPE_RES_CACHE
,
724 .name
= "shareable_bits",
726 .kf_ops
= &rdtgroup_kf_single_ops
,
727 .seq_show
= rdt_shareable_bits_show
,
728 .fflags
= RF_CTRL_INFO
| RFTYPE_RES_CACHE
,
731 .name
= "min_bandwidth",
733 .kf_ops
= &rdtgroup_kf_single_ops
,
734 .seq_show
= rdt_min_bw_show
,
735 .fflags
= RF_CTRL_INFO
| RFTYPE_RES_MB
,
738 .name
= "bandwidth_gran",
740 .kf_ops
= &rdtgroup_kf_single_ops
,
741 .seq_show
= rdt_bw_gran_show
,
742 .fflags
= RF_CTRL_INFO
| RFTYPE_RES_MB
,
745 .name
= "delay_linear",
747 .kf_ops
= &rdtgroup_kf_single_ops
,
748 .seq_show
= rdt_delay_linear_show
,
749 .fflags
= RF_CTRL_INFO
| RFTYPE_RES_MB
,
752 .name
= "max_threshold_occupancy",
754 .kf_ops
= &rdtgroup_kf_single_ops
,
755 .write
= max_threshold_occ_write
,
756 .seq_show
= max_threshold_occ_show
,
757 .fflags
= RF_MON_INFO
| RFTYPE_RES_CACHE
,
762 .kf_ops
= &rdtgroup_kf_single_ops
,
763 .write
= rdtgroup_cpus_write
,
764 .seq_show
= rdtgroup_cpus_show
,
765 .fflags
= RFTYPE_BASE
,
770 .kf_ops
= &rdtgroup_kf_single_ops
,
771 .write
= rdtgroup_cpus_write
,
772 .seq_show
= rdtgroup_cpus_show
,
773 .flags
= RFTYPE_FLAGS_CPUS_LIST
,
774 .fflags
= RFTYPE_BASE
,
779 .kf_ops
= &rdtgroup_kf_single_ops
,
780 .write
= rdtgroup_tasks_write
,
781 .seq_show
= rdtgroup_tasks_show
,
782 .fflags
= RFTYPE_BASE
,
787 .kf_ops
= &rdtgroup_kf_single_ops
,
788 .write
= rdtgroup_schemata_write
,
789 .seq_show
= rdtgroup_schemata_show
,
790 .fflags
= RF_CTRL_BASE
,
794 static int rdtgroup_add_files(struct kernfs_node
*kn
, unsigned long fflags
)
796 struct rftype
*rfts
, *rft
;
799 rfts
= res_common_files
;
800 len
= ARRAY_SIZE(res_common_files
);
802 lockdep_assert_held(&rdtgroup_mutex
);
804 for (rft
= rfts
; rft
< rfts
+ len
; rft
++) {
805 if ((fflags
& rft
->fflags
) == rft
->fflags
) {
806 ret
= rdtgroup_add_file(kn
, rft
);
814 pr_warn("Failed to add %s, err=%d\n", rft
->name
, ret
);
815 while (--rft
>= rfts
) {
816 if ((fflags
& rft
->fflags
) == rft
->fflags
)
817 kernfs_remove_by_name(kn
, rft
->name
);
822 static int rdtgroup_mkdir_info_resdir(struct rdt_resource
*r
, char *name
,
823 unsigned long fflags
)
825 struct kernfs_node
*kn_subdir
;
828 kn_subdir
= kernfs_create_dir(kn_info
, name
,
830 if (IS_ERR(kn_subdir
))
831 return PTR_ERR(kn_subdir
);
833 kernfs_get(kn_subdir
);
834 ret
= rdtgroup_kn_set_ugid(kn_subdir
);
838 ret
= rdtgroup_add_files(kn_subdir
, fflags
);
840 kernfs_activate(kn_subdir
);
845 static int rdtgroup_create_info_dir(struct kernfs_node
*parent_kn
)
847 struct rdt_resource
*r
;
848 unsigned long fflags
;
852 /* create the directory */
853 kn_info
= kernfs_create_dir(parent_kn
, "info", parent_kn
->mode
, NULL
);
855 return PTR_ERR(kn_info
);
858 for_each_alloc_enabled_rdt_resource(r
) {
859 fflags
= r
->fflags
| RF_CTRL_INFO
;
860 ret
= rdtgroup_mkdir_info_resdir(r
, r
->name
, fflags
);
865 for_each_mon_enabled_rdt_resource(r
) {
866 fflags
= r
->fflags
| RF_MON_INFO
;
867 sprintf(name
, "%s_MON", r
->name
);
868 ret
= rdtgroup_mkdir_info_resdir(r
, name
, fflags
);
874 * This extra ref will be put in kernfs_remove() and guarantees
875 * that @rdtgrp->kn is always accessible.
879 ret
= rdtgroup_kn_set_ugid(kn_info
);
883 kernfs_activate(kn_info
);
888 kernfs_remove(kn_info
);
893 mongroup_create_dir(struct kernfs_node
*parent_kn
, struct rdtgroup
*prgrp
,
894 char *name
, struct kernfs_node
**dest_kn
)
896 struct kernfs_node
*kn
;
899 /* create the directory */
900 kn
= kernfs_create_dir(parent_kn
, name
, parent_kn
->mode
, prgrp
);
908 * This extra ref will be put in kernfs_remove() and guarantees
909 * that @rdtgrp->kn is always accessible.
913 ret
= rdtgroup_kn_set_ugid(kn
);
925 static void l3_qos_cfg_update(void *arg
)
929 wrmsrl(IA32_L3_QOS_CFG
, *enable
? L3_QOS_CDP_ENABLE
: 0ULL);
932 static int set_l3_qos_cfg(struct rdt_resource
*r
, bool enable
)
934 cpumask_var_t cpu_mask
;
935 struct rdt_domain
*d
;
938 if (!zalloc_cpumask_var(&cpu_mask
, GFP_KERNEL
))
941 list_for_each_entry(d
, &r
->domains
, list
) {
942 /* Pick one CPU from each domain instance to update MSR */
943 cpumask_set_cpu(cpumask_any(&d
->cpu_mask
), cpu_mask
);
946 /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
947 if (cpumask_test_cpu(cpu
, cpu_mask
))
948 l3_qos_cfg_update(&enable
);
949 /* Update QOS_CFG MSR on all other cpus in cpu_mask. */
950 smp_call_function_many(cpu_mask
, l3_qos_cfg_update
, &enable
, 1);
953 free_cpumask_var(cpu_mask
);
958 static int cdp_enable(void)
960 struct rdt_resource
*r_l3data
= &rdt_resources_all
[RDT_RESOURCE_L3DATA
];
961 struct rdt_resource
*r_l3code
= &rdt_resources_all
[RDT_RESOURCE_L3CODE
];
962 struct rdt_resource
*r_l3
= &rdt_resources_all
[RDT_RESOURCE_L3
];
965 if (!r_l3
->alloc_capable
|| !r_l3data
->alloc_capable
||
966 !r_l3code
->alloc_capable
)
969 ret
= set_l3_qos_cfg(r_l3
, true);
971 r_l3
->alloc_enabled
= false;
972 r_l3data
->alloc_enabled
= true;
973 r_l3code
->alloc_enabled
= true;
978 static void cdp_disable(void)
980 struct rdt_resource
*r
= &rdt_resources_all
[RDT_RESOURCE_L3
];
982 r
->alloc_enabled
= r
->alloc_capable
;
984 if (rdt_resources_all
[RDT_RESOURCE_L3DATA
].alloc_enabled
) {
985 rdt_resources_all
[RDT_RESOURCE_L3DATA
].alloc_enabled
= false;
986 rdt_resources_all
[RDT_RESOURCE_L3CODE
].alloc_enabled
= false;
987 set_l3_qos_cfg(r
, false);
991 static int parse_rdtgroupfs_options(char *data
)
993 char *token
, *o
= data
;
996 while ((token
= strsep(&o
, ",")) != NULL
) {
1000 if (!strcmp(token
, "cdp"))
1008 * We don't allow rdtgroup directories to be created anywhere
1009 * except the root directory. Thus when looking for the rdtgroup
1010 * structure for a kernfs node we are either looking at a directory,
1011 * in which case the rdtgroup structure is pointed at by the "priv"
1012 * field, otherwise we have a file, and need only look to the parent
1013 * to find the rdtgroup.
1015 static struct rdtgroup
*kernfs_to_rdtgroup(struct kernfs_node
*kn
)
1017 if (kernfs_type(kn
) == KERNFS_DIR
) {
1019 * All the resource directories use "kn->priv"
1020 * to point to the "struct rdtgroup" for the
1021 * resource. "info" and its subdirectories don't
1022 * have rdtgroup structures, so return NULL here.
1024 if (kn
== kn_info
|| kn
->parent
== kn_info
)
1029 return kn
->parent
->priv
;
1033 struct rdtgroup
*rdtgroup_kn_lock_live(struct kernfs_node
*kn
)
1035 struct rdtgroup
*rdtgrp
= kernfs_to_rdtgroup(kn
);
1040 atomic_inc(&rdtgrp
->waitcount
);
1041 kernfs_break_active_protection(kn
);
1043 mutex_lock(&rdtgroup_mutex
);
1045 /* Was this group deleted while we waited? */
1046 if (rdtgrp
->flags
& RDT_DELETED
)
1052 void rdtgroup_kn_unlock(struct kernfs_node
*kn
)
1054 struct rdtgroup
*rdtgrp
= kernfs_to_rdtgroup(kn
);
1059 mutex_unlock(&rdtgroup_mutex
);
1061 if (atomic_dec_and_test(&rdtgrp
->waitcount
) &&
1062 (rdtgrp
->flags
& RDT_DELETED
)) {
1063 kernfs_unbreak_active_protection(kn
);
1064 kernfs_put(rdtgrp
->kn
);
1067 kernfs_unbreak_active_protection(kn
);
1071 static int mkdir_mondata_all(struct kernfs_node
*parent_kn
,
1072 struct rdtgroup
*prgrp
,
1073 struct kernfs_node
**mon_data_kn
);
1075 static struct dentry
*rdt_mount(struct file_system_type
*fs_type
,
1076 int flags
, const char *unused_dev_name
,
1079 struct rdt_domain
*dom
;
1080 struct rdt_resource
*r
;
1081 struct dentry
*dentry
;
1084 mutex_lock(&rdtgroup_mutex
);
1086 * resctrl file system can only be mounted once.
1088 if (static_branch_unlikely(&rdt_enable_key
)) {
1089 dentry
= ERR_PTR(-EBUSY
);
1093 ret
= parse_rdtgroupfs_options(data
);
1095 dentry
= ERR_PTR(ret
);
1101 ret
= rdtgroup_create_info_dir(rdtgroup_default
.kn
);
1103 dentry
= ERR_PTR(ret
);
1107 if (rdt_mon_capable
) {
1108 ret
= mongroup_create_dir(rdtgroup_default
.kn
,
1112 dentry
= ERR_PTR(ret
);
1115 kernfs_get(kn_mongrp
);
1117 ret
= mkdir_mondata_all(rdtgroup_default
.kn
,
1118 &rdtgroup_default
, &kn_mondata
);
1120 dentry
= ERR_PTR(ret
);
1123 kernfs_get(kn_mondata
);
1124 rdtgroup_default
.mon
.mon_data_kn
= kn_mondata
;
1127 dentry
= kernfs_mount(fs_type
, flags
, rdt_root
,
1128 RDTGROUP_SUPER_MAGIC
, NULL
);
1132 if (rdt_alloc_capable
)
1133 static_branch_enable(&rdt_alloc_enable_key
);
1134 if (rdt_mon_capable
)
1135 static_branch_enable(&rdt_mon_enable_key
);
1137 if (rdt_alloc_capable
|| rdt_mon_capable
)
1138 static_branch_enable(&rdt_enable_key
);
1140 if (is_mbm_enabled()) {
1141 r
= &rdt_resources_all
[RDT_RESOURCE_L3
];
1142 list_for_each_entry(dom
, &r
->domains
, list
)
1143 mbm_setup_overflow_handler(dom
, MBM_OVERFLOW_INTERVAL
);
1149 if (rdt_mon_capable
)
1150 kernfs_remove(kn_mondata
);
1152 if (rdt_mon_capable
)
1153 kernfs_remove(kn_mongrp
);
1155 kernfs_remove(kn_info
);
1159 mutex_unlock(&rdtgroup_mutex
);
1164 static int reset_all_ctrls(struct rdt_resource
*r
)
1166 struct msr_param msr_param
;
1167 cpumask_var_t cpu_mask
;
1168 struct rdt_domain
*d
;
1171 if (!zalloc_cpumask_var(&cpu_mask
, GFP_KERNEL
))
1176 msr_param
.high
= r
->num_closid
;
1179 * Disable resource control for this resource by setting all
1180 * CBMs in all domains to the maximum mask value. Pick one CPU
1181 * from each domain to update the MSRs below.
1183 list_for_each_entry(d
, &r
->domains
, list
) {
1184 cpumask_set_cpu(cpumask_any(&d
->cpu_mask
), cpu_mask
);
1186 for (i
= 0; i
< r
->num_closid
; i
++)
1187 d
->ctrl_val
[i
] = r
->default_ctrl
;
1190 /* Update CBM on this cpu if it's in cpu_mask. */
1191 if (cpumask_test_cpu(cpu
, cpu_mask
))
1192 rdt_ctrl_update(&msr_param
);
1193 /* Update CBM on all other cpus in cpu_mask. */
1194 smp_call_function_many(cpu_mask
, rdt_ctrl_update
, &msr_param
, 1);
1197 free_cpumask_var(cpu_mask
);
1202 static bool is_closid_match(struct task_struct
*t
, struct rdtgroup
*r
)
1204 return (rdt_alloc_capable
&&
1205 (r
->type
== RDTCTRL_GROUP
) && (t
->closid
== r
->closid
));
1208 static bool is_rmid_match(struct task_struct
*t
, struct rdtgroup
*r
)
1210 return (rdt_mon_capable
&&
1211 (r
->type
== RDTMON_GROUP
) && (t
->rmid
== r
->mon
.rmid
));
1215 * Move tasks from one to the other group. If @from is NULL, then all tasks
1216 * in the systems are moved unconditionally (used for teardown).
1218 * If @mask is not NULL the cpus on which moved tasks are running are set
1219 * in that mask so the update smp function call is restricted to affected
1222 static void rdt_move_group_tasks(struct rdtgroup
*from
, struct rdtgroup
*to
,
1223 struct cpumask
*mask
)
1225 struct task_struct
*p
, *t
;
1227 read_lock(&tasklist_lock
);
1228 for_each_process_thread(p
, t
) {
1229 if (!from
|| is_closid_match(t
, from
) ||
1230 is_rmid_match(t
, from
)) {
1231 t
->closid
= to
->closid
;
1232 t
->rmid
= to
->mon
.rmid
;
1236 * This is safe on x86 w/o barriers as the ordering
1237 * of writing to task_cpu() and t->on_cpu is
1238 * reverse to the reading here. The detection is
1239 * inaccurate as tasks might move or schedule
1240 * before the smp function call takes place. In
1241 * such a case the function call is pointless, but
1242 * there is no other side effect.
1244 if (mask
&& t
->on_cpu
)
1245 cpumask_set_cpu(task_cpu(t
), mask
);
1249 read_unlock(&tasklist_lock
);
1252 static void free_all_child_rdtgrp(struct rdtgroup
*rdtgrp
)
1254 struct rdtgroup
*sentry
, *stmp
;
1255 struct list_head
*head
;
1257 head
= &rdtgrp
->mon
.crdtgrp_list
;
1258 list_for_each_entry_safe(sentry
, stmp
, head
, mon
.crdtgrp_list
) {
1259 free_rmid(sentry
->mon
.rmid
);
1260 list_del(&sentry
->mon
.crdtgrp_list
);
1266 * Forcibly remove all of subdirectories under root.
1268 static void rmdir_all_sub(void)
1270 struct rdtgroup
*rdtgrp
, *tmp
;
1272 /* Move all tasks to the default resource group */
1273 rdt_move_group_tasks(NULL
, &rdtgroup_default
, NULL
);
1275 list_for_each_entry_safe(rdtgrp
, tmp
, &rdt_all_groups
, rdtgroup_list
) {
1276 /* Free any child rmids */
1277 free_all_child_rdtgrp(rdtgrp
);
1279 /* Remove each rdtgroup other than root */
1280 if (rdtgrp
== &rdtgroup_default
)
1284 * Give any CPUs back to the default group. We cannot copy
1285 * cpu_online_mask because a CPU might have executed the
1286 * offline callback already, but is still marked online.
1288 cpumask_or(&rdtgroup_default
.cpu_mask
,
1289 &rdtgroup_default
.cpu_mask
, &rdtgrp
->cpu_mask
);
1291 free_rmid(rdtgrp
->mon
.rmid
);
1293 kernfs_remove(rdtgrp
->kn
);
1294 list_del(&rdtgrp
->rdtgroup_list
);
1297 /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
1299 update_closid_rmid(cpu_online_mask
, &rdtgroup_default
);
1302 kernfs_remove(kn_info
);
1303 kernfs_remove(kn_mongrp
);
1304 kernfs_remove(kn_mondata
);
1307 static void rdt_kill_sb(struct super_block
*sb
)
1309 struct rdt_resource
*r
;
1311 mutex_lock(&rdtgroup_mutex
);
1313 /*Put everything back to default values. */
1314 for_each_alloc_enabled_rdt_resource(r
)
1318 static_branch_disable(&rdt_alloc_enable_key
);
1319 static_branch_disable(&rdt_mon_enable_key
);
1320 static_branch_disable(&rdt_enable_key
);
1322 mutex_unlock(&rdtgroup_mutex
);
1325 static struct file_system_type rdt_fs_type
= {
1328 .kill_sb
= rdt_kill_sb
,
1331 static int mon_addfile(struct kernfs_node
*parent_kn
, const char *name
,
1334 struct kernfs_node
*kn
;
1337 kn
= __kernfs_create_file(parent_kn
, name
, 0444, 0,
1338 &kf_mondata_ops
, priv
, NULL
, NULL
);
1342 ret
= rdtgroup_kn_set_ugid(kn
);
1352 * Remove all subdirectories of mon_data of ctrl_mon groups
1353 * and monitor groups with given domain id.
1355 void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource
*r
, unsigned int dom_id
)
1357 struct rdtgroup
*prgrp
, *crgrp
;
1360 if (!r
->mon_enabled
)
1363 list_for_each_entry(prgrp
, &rdt_all_groups
, rdtgroup_list
) {
1364 sprintf(name
, "mon_%s_%02d", r
->name
, dom_id
);
1365 kernfs_remove_by_name(prgrp
->mon
.mon_data_kn
, name
);
1367 list_for_each_entry(crgrp
, &prgrp
->mon
.crdtgrp_list
, mon
.crdtgrp_list
)
1368 kernfs_remove_by_name(crgrp
->mon
.mon_data_kn
, name
);
1372 static int mkdir_mondata_subdir(struct kernfs_node
*parent_kn
,
1373 struct rdt_domain
*d
,
1374 struct rdt_resource
*r
, struct rdtgroup
*prgrp
)
1376 union mon_data_bits priv
;
1377 struct kernfs_node
*kn
;
1378 struct mon_evt
*mevt
;
1379 struct rmid_read rr
;
1383 sprintf(name
, "mon_%s_%02d", r
->name
, d
->id
);
1384 /* create the directory */
1385 kn
= kernfs_create_dir(parent_kn
, name
, parent_kn
->mode
, prgrp
);
1390 * This extra ref will be put in kernfs_remove() and guarantees
1391 * that kn is always accessible.
1394 ret
= rdtgroup_kn_set_ugid(kn
);
1398 if (WARN_ON(list_empty(&r
->evt_list
))) {
1403 priv
.u
.rid
= r
->rid
;
1404 priv
.u
.domid
= d
->id
;
1405 list_for_each_entry(mevt
, &r
->evt_list
, list
) {
1406 priv
.u
.evtid
= mevt
->evtid
;
1407 ret
= mon_addfile(kn
, mevt
->name
, priv
.priv
);
1411 if (is_mbm_event(mevt
->evtid
))
1412 mon_event_read(&rr
, d
, prgrp
, mevt
->evtid
, true);
1414 kernfs_activate(kn
);
1423 * Add all subdirectories of mon_data for "ctrl_mon" groups
1424 * and "monitor" groups with given domain id.
1426 void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource
*r
,
1427 struct rdt_domain
*d
)
1429 struct kernfs_node
*parent_kn
;
1430 struct rdtgroup
*prgrp
, *crgrp
;
1431 struct list_head
*head
;
1433 if (!r
->mon_enabled
)
1436 list_for_each_entry(prgrp
, &rdt_all_groups
, rdtgroup_list
) {
1437 parent_kn
= prgrp
->mon
.mon_data_kn
;
1438 mkdir_mondata_subdir(parent_kn
, d
, r
, prgrp
);
1440 head
= &prgrp
->mon
.crdtgrp_list
;
1441 list_for_each_entry(crgrp
, head
, mon
.crdtgrp_list
) {
1442 parent_kn
= crgrp
->mon
.mon_data_kn
;
1443 mkdir_mondata_subdir(parent_kn
, d
, r
, crgrp
);
1448 static int mkdir_mondata_subdir_alldom(struct kernfs_node
*parent_kn
,
1449 struct rdt_resource
*r
,
1450 struct rdtgroup
*prgrp
)
1452 struct rdt_domain
*dom
;
1455 list_for_each_entry(dom
, &r
->domains
, list
) {
1456 ret
= mkdir_mondata_subdir(parent_kn
, dom
, r
, prgrp
);
1465 * This creates a directory mon_data which contains the monitored data.
1467 * mon_data has one directory for each domain whic are named
1468 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
1469 * with L3 domain looks as below:
1476 * Each domain directory has one file per event:
1481 static int mkdir_mondata_all(struct kernfs_node
*parent_kn
,
1482 struct rdtgroup
*prgrp
,
1483 struct kernfs_node
**dest_kn
)
1485 struct rdt_resource
*r
;
1486 struct kernfs_node
*kn
;
1490 * Create the mon_data directory first.
1492 ret
= mongroup_create_dir(parent_kn
, NULL
, "mon_data", &kn
);
1500 * Create the subdirectories for each domain. Note that all events
1501 * in a domain like L3 are grouped into a resource whose domain is L3
1503 for_each_mon_enabled_rdt_resource(r
) {
1504 ret
= mkdir_mondata_subdir_alldom(kn
, r
, prgrp
);
1516 static int mkdir_rdt_prepare(struct kernfs_node
*parent_kn
,
1517 struct kernfs_node
*prgrp_kn
,
1518 const char *name
, umode_t mode
,
1519 enum rdt_group_type rtype
, struct rdtgroup
**r
)
1521 struct rdtgroup
*prdtgrp
, *rdtgrp
;
1522 struct kernfs_node
*kn
;
1526 prdtgrp
= rdtgroup_kn_lock_live(prgrp_kn
);
1532 /* allocate the rdtgroup. */
1533 rdtgrp
= kzalloc(sizeof(*rdtgrp
), GFP_KERNEL
);
1539 rdtgrp
->mon
.parent
= prdtgrp
;
1540 rdtgrp
->type
= rtype
;
1541 INIT_LIST_HEAD(&rdtgrp
->mon
.crdtgrp_list
);
1543 /* kernfs creates the directory for rdtgrp */
1544 kn
= kernfs_create_dir(parent_kn
, name
, mode
, rdtgrp
);
1552 * kernfs_remove() will drop the reference count on "kn" which
1553 * will free it. But we still need it to stick around for the
1554 * rdtgroup_kn_unlock(kn} call below. Take one extra reference
1555 * here, which will be dropped inside rdtgroup_kn_unlock().
1559 ret
= rdtgroup_kn_set_ugid(kn
);
1563 files
= RFTYPE_BASE
| RFTYPE_CTRL
;
1564 files
= RFTYPE_BASE
| BIT(RF_CTRLSHIFT
+ rtype
);
1565 ret
= rdtgroup_add_files(kn
, files
);
1569 if (rdt_mon_capable
) {
1573 rdtgrp
->mon
.rmid
= ret
;
1575 ret
= mkdir_mondata_all(kn
, rdtgrp
, &rdtgrp
->mon
.mon_data_kn
);
1579 kernfs_activate(kn
);
1582 * The caller unlocks the prgrp_kn upon success.
1587 free_rmid(rdtgrp
->mon
.rmid
);
1589 kernfs_remove(rdtgrp
->kn
);
1593 rdtgroup_kn_unlock(prgrp_kn
);
1597 static void mkdir_rdt_prepare_clean(struct rdtgroup
*rgrp
)
1599 kernfs_remove(rgrp
->kn
);
1600 free_rmid(rgrp
->mon
.rmid
);
1605 * Create a monitor group under "mon_groups" directory of a control
1606 * and monitor group(ctrl_mon). This is a resource group
1607 * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
1609 static int rdtgroup_mkdir_mon(struct kernfs_node
*parent_kn
,
1610 struct kernfs_node
*prgrp_kn
,
1614 struct rdtgroup
*rdtgrp
, *prgrp
;
1617 ret
= mkdir_rdt_prepare(parent_kn
, prgrp_kn
, name
, mode
, RDTMON_GROUP
,
1622 prgrp
= rdtgrp
->mon
.parent
;
1623 rdtgrp
->closid
= prgrp
->closid
;
1626 * Add the rdtgrp to the list of rdtgrps the parent
1627 * ctrl_mon group has to track.
1629 list_add_tail(&rdtgrp
->mon
.crdtgrp_list
, &prgrp
->mon
.crdtgrp_list
);
1631 rdtgroup_kn_unlock(prgrp_kn
);
1636 * These are rdtgroups created under the root directory. Can be used
1637 * to allocate and monitor resources.
1639 static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node
*parent_kn
,
1640 struct kernfs_node
*prgrp_kn
,
1641 const char *name
, umode_t mode
)
1643 struct rdtgroup
*rdtgrp
;
1644 struct kernfs_node
*kn
;
1648 ret
= mkdir_rdt_prepare(parent_kn
, prgrp_kn
, name
, mode
, RDTCTRL_GROUP
,
1654 ret
= closid_alloc();
1656 goto out_common_fail
;
1659 rdtgrp
->closid
= closid
;
1660 list_add(&rdtgrp
->rdtgroup_list
, &rdt_all_groups
);
1662 if (rdt_mon_capable
) {
1664 * Create an empty mon_groups directory to hold the subset
1665 * of tasks and cpus to monitor.
1667 ret
= mongroup_create_dir(kn
, NULL
, "mon_groups", NULL
);
1675 closid_free(closid
);
1676 list_del(&rdtgrp
->rdtgroup_list
);
1678 mkdir_rdt_prepare_clean(rdtgrp
);
1680 rdtgroup_kn_unlock(prgrp_kn
);
1685 * We allow creating mon groups only with in a directory called "mon_groups"
1686 * which is present in every ctrl_mon group. Check if this is a valid
1687 * "mon_groups" directory.
1689 * 1. The directory should be named "mon_groups".
1690 * 2. The mon group itself should "not" be named "mon_groups".
1691 * This makes sure "mon_groups" directory always has a ctrl_mon group
1694 static bool is_mon_groups(struct kernfs_node
*kn
, const char *name
)
1696 return (!strcmp(kn
->name
, "mon_groups") &&
1697 strcmp(name
, "mon_groups"));
1700 static int rdtgroup_mkdir(struct kernfs_node
*parent_kn
, const char *name
,
1703 /* Do not accept '\n' to avoid unparsable situation. */
1704 if (strchr(name
, '\n'))
1708 * If the parent directory is the root directory and RDT
1709 * allocation is supported, add a control and monitoring
1712 if (rdt_alloc_capable
&& parent_kn
== rdtgroup_default
.kn
)
1713 return rdtgroup_mkdir_ctrl_mon(parent_kn
, parent_kn
, name
, mode
);
1716 * If RDT monitoring is supported and the parent directory is a valid
1717 * "mon_groups" directory, add a monitoring subdirectory.
1719 if (rdt_mon_capable
&& is_mon_groups(parent_kn
, name
))
1720 return rdtgroup_mkdir_mon(parent_kn
, parent_kn
->parent
, name
, mode
);
1725 static int rdtgroup_rmdir_mon(struct kernfs_node
*kn
, struct rdtgroup
*rdtgrp
,
1726 cpumask_var_t tmpmask
)
1728 struct rdtgroup
*prdtgrp
= rdtgrp
->mon
.parent
;
1731 /* Give any tasks back to the parent group */
1732 rdt_move_group_tasks(rdtgrp
, prdtgrp
, tmpmask
);
1734 /* Update per cpu rmid of the moved CPUs first */
1735 for_each_cpu(cpu
, &rdtgrp
->cpu_mask
)
1736 per_cpu(pqr_state
.default_rmid
, cpu
) = prdtgrp
->mon
.rmid
;
1738 * Update the MSR on moved CPUs and CPUs which have moved
1739 * task running on them.
1741 cpumask_or(tmpmask
, tmpmask
, &rdtgrp
->cpu_mask
);
1742 update_closid_rmid(tmpmask
, NULL
);
1744 rdtgrp
->flags
= RDT_DELETED
;
1745 free_rmid(rdtgrp
->mon
.rmid
);
1748 * Remove the rdtgrp from the parent ctrl_mon group's list
1750 WARN_ON(list_empty(&prdtgrp
->mon
.crdtgrp_list
));
1751 list_del(&rdtgrp
->mon
.crdtgrp_list
);
1754 * one extra hold on this, will drop when we kfree(rdtgrp)
1755 * in rdtgroup_kn_unlock()
1758 kernfs_remove(rdtgrp
->kn
);
1763 static int rdtgroup_rmdir_ctrl(struct kernfs_node
*kn
, struct rdtgroup
*rdtgrp
,
1764 cpumask_var_t tmpmask
)
1768 /* Give any tasks back to the default group */
1769 rdt_move_group_tasks(rdtgrp
, &rdtgroup_default
, tmpmask
);
1771 /* Give any CPUs back to the default group */
1772 cpumask_or(&rdtgroup_default
.cpu_mask
,
1773 &rdtgroup_default
.cpu_mask
, &rdtgrp
->cpu_mask
);
1775 /* Update per cpu closid and rmid of the moved CPUs first */
1776 for_each_cpu(cpu
, &rdtgrp
->cpu_mask
) {
1777 per_cpu(pqr_state
.default_closid
, cpu
) = rdtgroup_default
.closid
;
1778 per_cpu(pqr_state
.default_rmid
, cpu
) = rdtgroup_default
.mon
.rmid
;
1782 * Update the MSR on moved CPUs and CPUs which have moved
1783 * task running on them.
1785 cpumask_or(tmpmask
, tmpmask
, &rdtgrp
->cpu_mask
);
1786 update_closid_rmid(tmpmask
, NULL
);
1788 rdtgrp
->flags
= RDT_DELETED
;
1789 closid_free(rdtgrp
->closid
);
1790 free_rmid(rdtgrp
->mon
.rmid
);
1793 * Free all the child monitor group rmids.
1795 free_all_child_rdtgrp(rdtgrp
);
1797 list_del(&rdtgrp
->rdtgroup_list
);
1800 * one extra hold on this, will drop when we kfree(rdtgrp)
1801 * in rdtgroup_kn_unlock()
1804 kernfs_remove(rdtgrp
->kn
);
1809 static int rdtgroup_rmdir(struct kernfs_node
*kn
)
1811 struct kernfs_node
*parent_kn
= kn
->parent
;
1812 struct rdtgroup
*rdtgrp
;
1813 cpumask_var_t tmpmask
;
1816 if (!zalloc_cpumask_var(&tmpmask
, GFP_KERNEL
))
1819 rdtgrp
= rdtgroup_kn_lock_live(kn
);
1826 * If the rdtgroup is a ctrl_mon group and parent directory
1827 * is the root directory, remove the ctrl_mon group.
1829 * If the rdtgroup is a mon group and parent directory
1830 * is a valid "mon_groups" directory, remove the mon group.
1832 if (rdtgrp
->type
== RDTCTRL_GROUP
&& parent_kn
== rdtgroup_default
.kn
)
1833 ret
= rdtgroup_rmdir_ctrl(kn
, rdtgrp
, tmpmask
);
1834 else if (rdtgrp
->type
== RDTMON_GROUP
&&
1835 is_mon_groups(parent_kn
, kn
->name
))
1836 ret
= rdtgroup_rmdir_mon(kn
, rdtgrp
, tmpmask
);
1841 rdtgroup_kn_unlock(kn
);
1842 free_cpumask_var(tmpmask
);
1846 static int rdtgroup_show_options(struct seq_file
*seq
, struct kernfs_root
*kf
)
1848 if (rdt_resources_all
[RDT_RESOURCE_L3DATA
].alloc_enabled
)
1849 seq_puts(seq
, ",cdp");
1853 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops
= {
1854 .mkdir
= rdtgroup_mkdir
,
1855 .rmdir
= rdtgroup_rmdir
,
1856 .show_options
= rdtgroup_show_options
,
1859 static int __init
rdtgroup_setup_root(void)
1863 rdt_root
= kernfs_create_root(&rdtgroup_kf_syscall_ops
,
1864 KERNFS_ROOT_CREATE_DEACTIVATED
,
1866 if (IS_ERR(rdt_root
))
1867 return PTR_ERR(rdt_root
);
1869 mutex_lock(&rdtgroup_mutex
);
1871 rdtgroup_default
.closid
= 0;
1872 rdtgroup_default
.mon
.rmid
= 0;
1873 rdtgroup_default
.type
= RDTCTRL_GROUP
;
1874 INIT_LIST_HEAD(&rdtgroup_default
.mon
.crdtgrp_list
);
1876 list_add(&rdtgroup_default
.rdtgroup_list
, &rdt_all_groups
);
1878 ret
= rdtgroup_add_files(rdt_root
->kn
, RF_CTRL_BASE
);
1880 kernfs_destroy_root(rdt_root
);
1884 rdtgroup_default
.kn
= rdt_root
->kn
;
1885 kernfs_activate(rdtgroup_default
.kn
);
1888 mutex_unlock(&rdtgroup_mutex
);
1894 * rdtgroup_init - rdtgroup initialization
1896 * Setup resctrl file system including set up root, create mount point,
1897 * register rdtgroup filesystem, and initialize files under root directory.
1899 * Return: 0 on success or -errno
1901 int __init
rdtgroup_init(void)
1905 ret
= rdtgroup_setup_root();
1909 ret
= sysfs_create_mount_point(fs_kobj
, "resctrl");
1913 ret
= register_filesystem(&rdt_fs_type
);
1915 goto cleanup_mountpoint
;
1920 sysfs_remove_mount_point(fs_kobj
, "resctrl");
1922 kernfs_destroy_root(rdt_root
);