]>
Commit | Line | Data |
---|---|---|
5ff193fb FY |
1 | /* |
2 | * User interface for Resource Alloction in Resource Director Technology(RDT) | |
3 | * | |
4 | * Copyright (C) 2016 Intel Corporation | |
5 | * | |
6 | * Author: Fenghua Yu <fenghua.yu@intel.com> | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or modify it | |
9 | * under the terms and conditions of the GNU General Public License, | |
10 | * version 2, as published by the Free Software Foundation. | |
11 | * | |
12 | * This program is distributed in the hope it will be useful, but WITHOUT | |
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 | * more details. | |
16 | * | |
17 | * More information about RDT be found in the Intel (R) x86 Architecture | |
18 | * Software Developer Manual. | |
19 | */ | |
20 | ||
21 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
22 | ||
12e0110c | 23 | #include <linux/cpu.h> |
5ff193fb FY |
24 | #include <linux/fs.h> |
25 | #include <linux/sysfs.h> | |
26 | #include <linux/kernfs.h> | |
4e978d06 | 27 | #include <linux/seq_file.h> |
3f07c014 | 28 | #include <linux/sched/signal.h> |
29930025 | 29 | #include <linux/sched/task.h> |
5ff193fb | 30 | #include <linux/slab.h> |
e02737d5 | 31 | #include <linux/task_work.h> |
5ff193fb FY |
32 | |
33 | #include <uapi/linux/magic.h> | |
34 | ||
7db9d979 VS |
35 | #include <asm/intel_rdt_sched.h> |
36 | #include "intel_rdt.h" | |
5ff193fb | 37 | |
26017611 | 38 | DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key); |
0fc5d206 | 39 | static struct kernfs_root *rdt_root; |
5ff193fb FY |
40 | struct rdtgroup rdtgroup_default; |
41 | LIST_HEAD(rdt_all_groups); | |
42 | ||
4e978d06 FY |
43 | /* Kernel fs node for "info" directory under root */ |
44 | static struct kernfs_node *kn_info; | |
45 | ||
60cf5e10 FY |
46 | /* |
47 | * Trivial allocator for CLOSIDs. Since h/w only supports a small number, | |
48 | * we can keep a bitmap of free CLOSIDs in a single integer. | |
49 | * | |
50 | * Using a global CLOSID across all resources has some advantages and | |
51 | * some drawbacks: | |
52 | * + We can simply set "current->closid" to assign a task to a resource | |
53 | * group. | |
54 | * + Context switch code can avoid extra memory references deciding which | |
55 | * CLOSID to load into the PQR_ASSOC MSR | |
56 | * - We give up some options in configuring resource groups across multi-socket | |
57 | * systems. | |
58 | * - Our choices on how to configure each resource become progressively more | |
59 | * limited as the number of resources grows. | |
60 | */ | |
61 | static int closid_free_map; | |
62 | ||
63 | static void closid_init(void) | |
64 | { | |
65 | struct rdt_resource *r; | |
66 | int rdt_min_closid = 32; | |
67 | ||
68 | /* Compute rdt_min_closid across all resources */ | |
26017611 | 69 | for_each_alloc_enabled_rdt_resource(r) |
60cf5e10 FY |
70 | rdt_min_closid = min(rdt_min_closid, r->num_closid); |
71 | ||
72 | closid_free_map = BIT_MASK(rdt_min_closid) - 1; | |
73 | ||
74 | /* CLOSID 0 is always reserved for the default group */ | |
75 | closid_free_map &= ~1; | |
76 | } | |
77 | ||
0fc5d206 | 78 | static int closid_alloc(void) |
60cf5e10 | 79 | { |
703c3837 | 80 | u32 closid = ffs(closid_free_map); |
60cf5e10 FY |
81 | |
82 | if (closid == 0) | |
83 | return -ENOSPC; | |
84 | closid--; | |
85 | closid_free_map &= ~(1 << closid); | |
86 | ||
87 | return closid; | |
88 | } | |
89 | ||
90 | static void closid_free(int closid) | |
91 | { | |
92 | closid_free_map |= 1 << closid; | |
93 | } | |
94 | ||
4e978d06 FY |
95 | /* set uid and gid of rdtgroup dirs and files to that of the creator */ |
96 | static int rdtgroup_kn_set_ugid(struct kernfs_node *kn) | |
97 | { | |
98 | struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID, | |
99 | .ia_uid = current_fsuid(), | |
100 | .ia_gid = current_fsgid(), }; | |
101 | ||
102 | if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) && | |
103 | gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID)) | |
104 | return 0; | |
105 | ||
106 | return kernfs_setattr(kn, &iattr); | |
107 | } | |
108 | ||
109 | static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft) | |
110 | { | |
111 | struct kernfs_node *kn; | |
112 | int ret; | |
113 | ||
114 | kn = __kernfs_create_file(parent_kn, rft->name, rft->mode, | |
115 | 0, rft->kf_ops, rft, NULL, NULL); | |
116 | if (IS_ERR(kn)) | |
117 | return PTR_ERR(kn); | |
118 | ||
119 | ret = rdtgroup_kn_set_ugid(kn); | |
120 | if (ret) { | |
121 | kernfs_remove(kn); | |
122 | return ret; | |
123 | } | |
124 | ||
125 | return 0; | |
126 | } | |
127 | ||
4e978d06 FY |
128 | static int rdtgroup_seqfile_show(struct seq_file *m, void *arg) |
129 | { | |
130 | struct kernfs_open_file *of = m->private; | |
131 | struct rftype *rft = of->kn->priv; | |
132 | ||
133 | if (rft->seq_show) | |
134 | return rft->seq_show(of, m, arg); | |
135 | return 0; | |
136 | } | |
137 | ||
138 | static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf, | |
139 | size_t nbytes, loff_t off) | |
140 | { | |
141 | struct rftype *rft = of->kn->priv; | |
142 | ||
143 | if (rft->write) | |
144 | return rft->write(of, buf, nbytes, off); | |
145 | ||
146 | return -EINVAL; | |
147 | } | |
148 | ||
149 | static struct kernfs_ops rdtgroup_kf_single_ops = { | |
150 | .atomic_write_len = PAGE_SIZE, | |
151 | .write = rdtgroup_file_write, | |
152 | .seq_show = rdtgroup_seqfile_show, | |
153 | }; | |
154 | ||
4ffa3c97 JO |
155 | static bool is_cpu_list(struct kernfs_open_file *of) |
156 | { | |
157 | struct rftype *rft = of->kn->priv; | |
158 | ||
159 | return rft->flags & RFTYPE_FLAGS_CPUS_LIST; | |
160 | } | |
161 | ||
12e0110c TL |
162 | static int rdtgroup_cpus_show(struct kernfs_open_file *of, |
163 | struct seq_file *s, void *v) | |
164 | { | |
165 | struct rdtgroup *rdtgrp; | |
166 | int ret = 0; | |
167 | ||
168 | rdtgrp = rdtgroup_kn_lock_live(of->kn); | |
169 | ||
4ffa3c97 JO |
170 | if (rdtgrp) { |
171 | seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n", | |
172 | cpumask_pr_args(&rdtgrp->cpu_mask)); | |
173 | } else { | |
12e0110c | 174 | ret = -ENOENT; |
4ffa3c97 | 175 | } |
12e0110c TL |
176 | rdtgroup_kn_unlock(of->kn); |
177 | ||
178 | return ret; | |
179 | } | |
180 | ||
f4107702 FY |
181 | /* |
182 | * This is safe against intel_rdt_sched_in() called from __switch_to() | |
183 | * because __switch_to() is executed with interrupts disabled. A local call | |
154334a0 | 184 | * from update_closid() is proteced against __switch_to() because |
f4107702 FY |
185 | * preemption is disabled. |
186 | */ | |
154334a0 | 187 | static void update_cpu_closid(void *info) |
f4107702 | 188 | { |
154334a0 VS |
189 | struct rdtgroup *r = info; |
190 | ||
191 | if (r) | |
192 | this_cpu_write(rdt_cpu_default.closid, r->closid); | |
193 | ||
f4107702 FY |
194 | /* |
195 | * We cannot unconditionally write the MSR because the current | |
196 | * executing task might have its own closid selected. Just reuse | |
197 | * the context switch code. | |
198 | */ | |
199 | intel_rdt_sched_in(); | |
200 | } | |
201 | ||
0efc89be FY |
202 | /* |
203 | * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, | |
204 | * | |
154334a0 | 205 | * Per task closids/rmids must have been set up before calling this function. |
0efc89be FY |
206 | */ |
207 | static void | |
154334a0 | 208 | update_closid(const struct cpumask *cpu_mask, struct rdtgroup *r) |
f4107702 FY |
209 | { |
210 | int cpu = get_cpu(); | |
211 | ||
212 | if (cpumask_test_cpu(cpu, cpu_mask)) | |
154334a0 VS |
213 | update_cpu_closid(r); |
214 | smp_call_function_many(cpu_mask, update_cpu_closid, r, 1); | |
f4107702 FY |
215 | put_cpu(); |
216 | } | |
217 | ||
154334a0 VS |
218 | static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, |
219 | cpumask_var_t tmpmask) | |
220 | { | |
221 | struct rdtgroup *r; | |
222 | ||
223 | /* Check whether cpus are dropped from this group */ | |
224 | cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); | |
225 | if (cpumask_weight(tmpmask)) { | |
226 | /* Can't drop from default group */ | |
227 | if (rdtgrp == &rdtgroup_default) | |
228 | return -EINVAL; | |
229 | ||
230 | /* Give any dropped cpus to rdtgroup_default */ | |
231 | cpumask_or(&rdtgroup_default.cpu_mask, | |
232 | &rdtgroup_default.cpu_mask, tmpmask); | |
233 | update_closid(tmpmask, &rdtgroup_default); | |
234 | } | |
235 | ||
236 | /* | |
237 | * If we added cpus, remove them from previous group that owned them | |
238 | * and update per-cpu closid | |
239 | */ | |
240 | cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); | |
241 | if (cpumask_weight(tmpmask)) { | |
242 | list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) { | |
243 | if (r == rdtgrp) | |
244 | continue; | |
245 | cpumask_andnot(&r->cpu_mask, &r->cpu_mask, tmpmask); | |
246 | } | |
247 | update_closid(tmpmask, rdtgrp); | |
248 | } | |
249 | ||
250 | /* Done pushing/pulling - update this group with new mask */ | |
251 | cpumask_copy(&rdtgrp->cpu_mask, newmask); | |
252 | ||
253 | return 0; | |
254 | } | |
255 | ||
12e0110c TL |
256 | static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, |
257 | char *buf, size_t nbytes, loff_t off) | |
258 | { | |
259 | cpumask_var_t tmpmask, newmask; | |
154334a0 | 260 | struct rdtgroup *rdtgrp; |
f4107702 | 261 | int ret; |
12e0110c TL |
262 | |
263 | if (!buf) | |
264 | return -EINVAL; | |
265 | ||
266 | if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) | |
267 | return -ENOMEM; | |
268 | if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) { | |
269 | free_cpumask_var(tmpmask); | |
270 | return -ENOMEM; | |
271 | } | |
a2584e1d | 272 | |
12e0110c TL |
273 | rdtgrp = rdtgroup_kn_lock_live(of->kn); |
274 | if (!rdtgrp) { | |
275 | ret = -ENOENT; | |
276 | goto unlock; | |
277 | } | |
278 | ||
4ffa3c97 JO |
279 | if (is_cpu_list(of)) |
280 | ret = cpulist_parse(buf, newmask); | |
281 | else | |
282 | ret = cpumask_parse(buf, newmask); | |
283 | ||
12e0110c TL |
284 | if (ret) |
285 | goto unlock; | |
286 | ||
12e0110c TL |
287 | /* check that user didn't specify any offline cpus */ |
288 | cpumask_andnot(tmpmask, newmask, cpu_online_mask); | |
289 | if (cpumask_weight(tmpmask)) { | |
290 | ret = -EINVAL; | |
a2584e1d | 291 | goto unlock; |
12e0110c TL |
292 | } |
293 | ||
154334a0 VS |
294 | if (rdtgrp->type == RDTCTRL_GROUP) |
295 | ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask); | |
296 | else | |
297 | ret = -EINVAL; | |
12e0110c | 298 | |
12e0110c TL |
299 | unlock: |
300 | rdtgroup_kn_unlock(of->kn); | |
301 | free_cpumask_var(tmpmask); | |
302 | free_cpumask_var(newmask); | |
303 | ||
304 | return ret ?: nbytes; | |
305 | } | |
306 | ||
e02737d5 FY |
307 | struct task_move_callback { |
308 | struct callback_head work; | |
309 | struct rdtgroup *rdtgrp; | |
310 | }; | |
311 | ||
312 | static void move_myself(struct callback_head *head) | |
313 | { | |
314 | struct task_move_callback *callback; | |
315 | struct rdtgroup *rdtgrp; | |
316 | ||
317 | callback = container_of(head, struct task_move_callback, work); | |
318 | rdtgrp = callback->rdtgrp; | |
319 | ||
320 | /* | |
321 | * If resource group was deleted before this task work callback | |
322 | * was invoked, then assign the task to root group and free the | |
323 | * resource group. | |
324 | */ | |
325 | if (atomic_dec_and_test(&rdtgrp->waitcount) && | |
326 | (rdtgrp->flags & RDT_DELETED)) { | |
327 | current->closid = 0; | |
5ba6b745 | 328 | current->rmid = 0; |
e02737d5 FY |
329 | kfree(rdtgrp); |
330 | } | |
331 | ||
74fcdae1 | 332 | preempt_disable(); |
4f341a5e FY |
333 | /* update PQR_ASSOC MSR to make resource group go into effect */ |
334 | intel_rdt_sched_in(); | |
74fcdae1 | 335 | preempt_enable(); |
4f341a5e | 336 | |
e02737d5 FY |
337 | kfree(callback); |
338 | } | |
339 | ||
340 | static int __rdtgroup_move_task(struct task_struct *tsk, | |
341 | struct rdtgroup *rdtgrp) | |
342 | { | |
343 | struct task_move_callback *callback; | |
344 | int ret; | |
345 | ||
346 | callback = kzalloc(sizeof(*callback), GFP_KERNEL); | |
347 | if (!callback) | |
348 | return -ENOMEM; | |
349 | callback->work.func = move_myself; | |
350 | callback->rdtgrp = rdtgrp; | |
351 | ||
352 | /* | |
353 | * Take a refcount, so rdtgrp cannot be freed before the | |
354 | * callback has been invoked. | |
355 | */ | |
356 | atomic_inc(&rdtgrp->waitcount); | |
357 | ret = task_work_add(tsk, &callback->work, true); | |
358 | if (ret) { | |
359 | /* | |
360 | * Task is exiting. Drop the refcount and free the callback. | |
361 | * No need to check the refcount as the group cannot be | |
362 | * deleted before the write function unlocks rdtgroup_mutex. | |
363 | */ | |
364 | atomic_dec(&rdtgrp->waitcount); | |
365 | kfree(callback); | |
366 | } else { | |
5ba6b745 VS |
367 | /* |
368 | * For ctrl_mon groups move both closid and rmid. | |
369 | * For monitor groups, can move the tasks only from | |
370 | * their parent CTRL group. | |
371 | */ | |
372 | if (rdtgrp->type == RDTCTRL_GROUP) { | |
373 | tsk->closid = rdtgrp->closid; | |
374 | tsk->rmid = rdtgrp->mon.rmid; | |
375 | } else if (rdtgrp->type == RDTMON_GROUP) { | |
376 | if (rdtgrp->mon.parent->closid == tsk->closid) | |
377 | tsk->rmid = rdtgrp->mon.rmid; | |
378 | else | |
379 | ret = -EINVAL; | |
380 | } | |
e02737d5 FY |
381 | } |
382 | return ret; | |
383 | } | |
384 | ||
385 | static int rdtgroup_task_write_permission(struct task_struct *task, | |
386 | struct kernfs_open_file *of) | |
387 | { | |
388 | const struct cred *tcred = get_task_cred(task); | |
389 | const struct cred *cred = current_cred(); | |
390 | int ret = 0; | |
391 | ||
392 | /* | |
393 | * Even if we're attaching all tasks in the thread group, we only | |
394 | * need to check permissions on one of them. | |
395 | */ | |
396 | if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && | |
397 | !uid_eq(cred->euid, tcred->uid) && | |
398 | !uid_eq(cred->euid, tcred->suid)) | |
399 | ret = -EPERM; | |
400 | ||
401 | put_cred(tcred); | |
402 | return ret; | |
403 | } | |
404 | ||
405 | static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, | |
406 | struct kernfs_open_file *of) | |
407 | { | |
408 | struct task_struct *tsk; | |
409 | int ret; | |
410 | ||
411 | rcu_read_lock(); | |
412 | if (pid) { | |
413 | tsk = find_task_by_vpid(pid); | |
414 | if (!tsk) { | |
415 | rcu_read_unlock(); | |
416 | return -ESRCH; | |
417 | } | |
418 | } else { | |
419 | tsk = current; | |
420 | } | |
421 | ||
422 | get_task_struct(tsk); | |
423 | rcu_read_unlock(); | |
424 | ||
425 | ret = rdtgroup_task_write_permission(tsk, of); | |
426 | if (!ret) | |
427 | ret = __rdtgroup_move_task(tsk, rdtgrp); | |
428 | ||
429 | put_task_struct(tsk); | |
430 | return ret; | |
431 | } | |
432 | ||
433 | static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, | |
434 | char *buf, size_t nbytes, loff_t off) | |
435 | { | |
436 | struct rdtgroup *rdtgrp; | |
437 | int ret = 0; | |
438 | pid_t pid; | |
439 | ||
440 | if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) | |
441 | return -EINVAL; | |
442 | rdtgrp = rdtgroup_kn_lock_live(of->kn); | |
443 | ||
444 | if (rdtgrp) | |
445 | ret = rdtgroup_move_task(pid, rdtgrp, of); | |
446 | else | |
447 | ret = -ENOENT; | |
448 | ||
449 | rdtgroup_kn_unlock(of->kn); | |
450 | ||
451 | return ret ?: nbytes; | |
452 | } | |
453 | ||
454 | static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) | |
455 | { | |
456 | struct task_struct *p, *t; | |
457 | ||
458 | rcu_read_lock(); | |
459 | for_each_process_thread(p, t) { | |
5ba6b745 VS |
460 | if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) || |
461 | (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) | |
e02737d5 FY |
462 | seq_printf(s, "%d\n", t->pid); |
463 | } | |
464 | rcu_read_unlock(); | |
465 | } | |
466 | ||
467 | static int rdtgroup_tasks_show(struct kernfs_open_file *of, | |
468 | struct seq_file *s, void *v) | |
469 | { | |
470 | struct rdtgroup *rdtgrp; | |
471 | int ret = 0; | |
472 | ||
473 | rdtgrp = rdtgroup_kn_lock_live(of->kn); | |
474 | if (rdtgrp) | |
475 | show_rdt_tasks(rdtgrp, s); | |
476 | else | |
477 | ret = -ENOENT; | |
478 | rdtgroup_kn_unlock(of->kn); | |
479 | ||
480 | return ret; | |
481 | } | |
482 | ||
4e978d06 FY |
483 | static int rdt_num_closids_show(struct kernfs_open_file *of, |
484 | struct seq_file *seq, void *v) | |
485 | { | |
486 | struct rdt_resource *r = of->kn->parent->priv; | |
487 | ||
488 | seq_printf(seq, "%d\n", r->num_closid); | |
4e978d06 FY |
489 | return 0; |
490 | } | |
491 | ||
2545e9f5 | 492 | static int rdt_default_ctrl_show(struct kernfs_open_file *of, |
4e978d06 FY |
493 | struct seq_file *seq, void *v) |
494 | { | |
495 | struct rdt_resource *r = of->kn->parent->priv; | |
496 | ||
2545e9f5 | 497 | seq_printf(seq, "%x\n", r->default_ctrl); |
4e978d06 FY |
498 | return 0; |
499 | } | |
500 | ||
53a114a6 SL |
501 | static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, |
502 | struct seq_file *seq, void *v) | |
503 | { | |
504 | struct rdt_resource *r = of->kn->parent->priv; | |
505 | ||
d3e11b4d | 506 | seq_printf(seq, "%u\n", r->cache.min_cbm_bits); |
db69ef65 VS |
507 | return 0; |
508 | } | |
509 | ||
510 | static int rdt_min_bw_show(struct kernfs_open_file *of, | |
511 | struct seq_file *seq, void *v) | |
512 | { | |
513 | struct rdt_resource *r = of->kn->parent->priv; | |
53a114a6 | 514 | |
db69ef65 VS |
515 | seq_printf(seq, "%u\n", r->membw.min_bw); |
516 | return 0; | |
517 | } | |
518 | ||
a9a0c771 VS |
519 | static int rdt_num_rmids_show(struct kernfs_open_file *of, |
520 | struct seq_file *seq, void *v) | |
521 | { | |
522 | struct rdt_resource *r = of->kn->parent->priv; | |
523 | ||
524 | seq_printf(seq, "%d\n", r->num_rmid); | |
525 | ||
526 | return 0; | |
527 | } | |
528 | ||
529 | static int rdt_mon_features_show(struct kernfs_open_file *of, | |
530 | struct seq_file *seq, void *v) | |
531 | { | |
532 | struct rdt_resource *r = of->kn->parent->priv; | |
533 | struct mon_evt *mevt; | |
534 | ||
535 | list_for_each_entry(mevt, &r->evt_list, list) | |
536 | seq_printf(seq, "%s\n", mevt->name); | |
537 | ||
538 | return 0; | |
539 | } | |
540 | ||
db69ef65 VS |
541 | static int rdt_bw_gran_show(struct kernfs_open_file *of, |
542 | struct seq_file *seq, void *v) | |
543 | { | |
544 | struct rdt_resource *r = of->kn->parent->priv; | |
545 | ||
546 | seq_printf(seq, "%u\n", r->membw.bw_gran); | |
547 | return 0; | |
548 | } | |
549 | ||
550 | static int rdt_delay_linear_show(struct kernfs_open_file *of, | |
551 | struct seq_file *seq, void *v) | |
552 | { | |
553 | struct rdt_resource *r = of->kn->parent->priv; | |
554 | ||
555 | seq_printf(seq, "%u\n", r->membw.delay_linear); | |
53a114a6 SL |
556 | return 0; |
557 | } | |
558 | ||
a9a0c771 VS |
559 | static int max_threshold_occ_show(struct kernfs_open_file *of, |
560 | struct seq_file *seq, void *v) | |
561 | { | |
562 | struct rdt_resource *r = of->kn->parent->priv; | |
563 | ||
564 | seq_printf(seq, "%u\n", intel_cqm_threshold * r->mon_scale); | |
565 | ||
566 | return 0; | |
567 | } | |
568 | ||
569 | static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, | |
570 | char *buf, size_t nbytes, loff_t off) | |
571 | { | |
572 | struct rdt_resource *r = of->kn->parent->priv; | |
573 | unsigned int bytes; | |
574 | int ret; | |
575 | ||
576 | ret = kstrtouint(buf, 0, &bytes); | |
577 | if (ret) | |
578 | return ret; | |
579 | ||
580 | if (bytes > (boot_cpu_data.x86_cache_size * 1024)) | |
581 | return -EINVAL; | |
582 | ||
583 | intel_cqm_threshold = bytes / r->mon_scale; | |
584 | ||
585 | return ret ?: nbytes; | |
586 | } | |
587 | ||
4e978d06 | 588 | /* rdtgroup information files for one cache resource. */ |
5ae32bbc | 589 | static struct rftype res_common_files[] = { |
4e978d06 FY |
590 | { |
591 | .name = "num_closids", | |
592 | .mode = 0444, | |
593 | .kf_ops = &rdtgroup_kf_single_ops, | |
594 | .seq_show = rdt_num_closids_show, | |
5ae32bbc | 595 | .fflags = RF_CTRL_INFO, |
4e978d06 | 596 | }, |
a9a0c771 VS |
597 | { |
598 | .name = "mon_features", | |
599 | .mode = 0444, | |
600 | .kf_ops = &rdtgroup_kf_single_ops, | |
601 | .seq_show = rdt_mon_features_show, | |
602 | .fflags = RF_MON_INFO, | |
603 | }, | |
604 | { | |
605 | .name = "num_rmids", | |
606 | .mode = 0444, | |
607 | .kf_ops = &rdtgroup_kf_single_ops, | |
608 | .seq_show = rdt_num_rmids_show, | |
609 | .fflags = RF_MON_INFO, | |
610 | }, | |
4e978d06 FY |
611 | { |
612 | .name = "cbm_mask", | |
613 | .mode = 0444, | |
614 | .kf_ops = &rdtgroup_kf_single_ops, | |
2545e9f5 | 615 | .seq_show = rdt_default_ctrl_show, |
5ae32bbc | 616 | .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, |
4e978d06 | 617 | }, |
53a114a6 SL |
618 | { |
619 | .name = "min_cbm_bits", | |
620 | .mode = 0444, | |
621 | .kf_ops = &rdtgroup_kf_single_ops, | |
622 | .seq_show = rdt_min_cbm_bits_show, | |
5ae32bbc | 623 | .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE, |
db69ef65 VS |
624 | }, |
625 | { | |
626 | .name = "min_bandwidth", | |
627 | .mode = 0444, | |
628 | .kf_ops = &rdtgroup_kf_single_ops, | |
629 | .seq_show = rdt_min_bw_show, | |
5ae32bbc | 630 | .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, |
db69ef65 VS |
631 | }, |
632 | { | |
633 | .name = "bandwidth_gran", | |
634 | .mode = 0444, | |
635 | .kf_ops = &rdtgroup_kf_single_ops, | |
636 | .seq_show = rdt_bw_gran_show, | |
5ae32bbc | 637 | .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, |
db69ef65 VS |
638 | }, |
639 | { | |
640 | .name = "delay_linear", | |
641 | .mode = 0444, | |
642 | .kf_ops = &rdtgroup_kf_single_ops, | |
643 | .seq_show = rdt_delay_linear_show, | |
5ae32bbc TL |
644 | .fflags = RF_CTRL_INFO | RFTYPE_RES_MB, |
645 | }, | |
a9a0c771 VS |
646 | { |
647 | .name = "max_threshold_occupancy", | |
648 | .mode = 0644, | |
649 | .kf_ops = &rdtgroup_kf_single_ops, | |
650 | .write = max_threshold_occ_write, | |
651 | .seq_show = max_threshold_occ_show, | |
652 | .fflags = RF_MON_INFO | RFTYPE_RES_CACHE, | |
653 | }, | |
5ae32bbc TL |
654 | { |
655 | .name = "cpus", | |
656 | .mode = 0644, | |
657 | .kf_ops = &rdtgroup_kf_single_ops, | |
658 | .write = rdtgroup_cpus_write, | |
659 | .seq_show = rdtgroup_cpus_show, | |
660 | .fflags = RFTYPE_BASE, | |
661 | }, | |
662 | { | |
663 | .name = "cpus_list", | |
664 | .mode = 0644, | |
665 | .kf_ops = &rdtgroup_kf_single_ops, | |
666 | .write = rdtgroup_cpus_write, | |
667 | .seq_show = rdtgroup_cpus_show, | |
668 | .flags = RFTYPE_FLAGS_CPUS_LIST, | |
669 | .fflags = RFTYPE_BASE, | |
670 | }, | |
671 | { | |
672 | .name = "tasks", | |
673 | .mode = 0644, | |
674 | .kf_ops = &rdtgroup_kf_single_ops, | |
675 | .write = rdtgroup_tasks_write, | |
676 | .seq_show = rdtgroup_tasks_show, | |
677 | .fflags = RFTYPE_BASE, | |
678 | }, | |
679 | { | |
680 | .name = "schemata", | |
681 | .mode = 0644, | |
682 | .kf_ops = &rdtgroup_kf_single_ops, | |
683 | .write = rdtgroup_schemata_write, | |
684 | .seq_show = rdtgroup_schemata_show, | |
685 | .fflags = RF_CTRL_BASE, | |
db69ef65 VS |
686 | }, |
687 | }; | |
688 | ||
5ae32bbc | 689 | static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags) |
db69ef65 | 690 | { |
5ae32bbc TL |
691 | struct rftype *rfts, *rft; |
692 | int ret, len; | |
693 | ||
694 | rfts = res_common_files; | |
695 | len = ARRAY_SIZE(res_common_files); | |
696 | ||
697 | lockdep_assert_held(&rdtgroup_mutex); | |
698 | ||
699 | for (rft = rfts; rft < rfts + len; rft++) { | |
700 | if ((fflags & rft->fflags) == rft->fflags) { | |
701 | ret = rdtgroup_add_file(kn, rft); | |
702 | if (ret) | |
703 | goto error; | |
704 | } | |
705 | } | |
706 | ||
707 | return 0; | |
708 | error: | |
709 | pr_warn("Failed to add %s, err=%d\n", rft->name, ret); | |
710 | while (--rft >= rfts) { | |
711 | if ((fflags & rft->fflags) == rft->fflags) | |
712 | kernfs_remove_by_name(kn, rft->name); | |
713 | } | |
714 | return ret; | |
db69ef65 VS |
715 | } |
716 | ||
5ae32bbc TL |
717 | static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, |
718 | unsigned long fflags) | |
6a507a6a | 719 | { |
5ae32bbc TL |
720 | struct kernfs_node *kn_subdir; |
721 | int ret; | |
722 | ||
723 | kn_subdir = kernfs_create_dir(kn_info, name, | |
724 | kn_info->mode, r); | |
725 | if (IS_ERR(kn_subdir)) | |
726 | return PTR_ERR(kn_subdir); | |
727 | ||
728 | kernfs_get(kn_subdir); | |
729 | ret = rdtgroup_kn_set_ugid(kn_subdir); | |
730 | if (ret) | |
731 | return ret; | |
732 | ||
733 | ret = rdtgroup_add_files(kn_subdir, fflags); | |
734 | if (!ret) | |
735 | kernfs_activate(kn_subdir); | |
736 | ||
737 | return ret; | |
6a507a6a VS |
738 | } |
739 | ||
4e978d06 FY |
740 | static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) |
741 | { | |
4e978d06 | 742 | struct rdt_resource *r; |
5ae32bbc | 743 | unsigned long fflags; |
a9a0c771 | 744 | char name[32]; |
5ae32bbc | 745 | int ret; |
4e978d06 FY |
746 | |
747 | /* create the directory */ | |
748 | kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); | |
749 | if (IS_ERR(kn_info)) | |
750 | return PTR_ERR(kn_info); | |
751 | kernfs_get(kn_info); | |
752 | ||
26017611 | 753 | for_each_alloc_enabled_rdt_resource(r) { |
5ae32bbc TL |
754 | fflags = r->fflags | RF_CTRL_INFO; |
755 | ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags); | |
4e978d06 FY |
756 | if (ret) |
757 | goto out_destroy; | |
4e978d06 | 758 | } |
a9a0c771 VS |
759 | |
760 | for_each_mon_enabled_rdt_resource(r) { | |
761 | fflags = r->fflags | RF_MON_INFO; | |
762 | sprintf(name, "%s_MON", r->name); | |
763 | ret = rdtgroup_mkdir_info_resdir(r, name, fflags); | |
764 | if (ret) | |
765 | goto out_destroy; | |
766 | } | |
767 | ||
4e978d06 FY |
768 | /* |
769 | * This extra ref will be put in kernfs_remove() and guarantees | |
770 | * that @rdtgrp->kn is always accessible. | |
771 | */ | |
772 | kernfs_get(kn_info); | |
773 | ||
774 | ret = rdtgroup_kn_set_ugid(kn_info); | |
775 | if (ret) | |
776 | goto out_destroy; | |
777 | ||
778 | kernfs_activate(kn_info); | |
779 | ||
780 | return 0; | |
781 | ||
782 | out_destroy: | |
783 | kernfs_remove(kn_info); | |
784 | return ret; | |
785 | } | |
786 | ||
ccac7180 VS |
787 | static int |
788 | mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, | |
789 | char *name, struct kernfs_node **dest_kn) | |
790 | { | |
791 | struct kernfs_node *kn; | |
792 | int ret; | |
793 | ||
794 | /* create the directory */ | |
795 | kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp); | |
796 | if (IS_ERR(kn)) | |
797 | return PTR_ERR(kn); | |
798 | ||
799 | if (dest_kn) | |
800 | *dest_kn = kn; | |
801 | ||
802 | /* | |
803 | * This extra ref will be put in kernfs_remove() and guarantees | |
804 | * that @rdtgrp->kn is always accessible. | |
805 | */ | |
806 | kernfs_get(kn); | |
807 | ||
808 | ret = rdtgroup_kn_set_ugid(kn); | |
809 | if (ret) | |
810 | goto out_destroy; | |
811 | ||
812 | kernfs_activate(kn); | |
813 | ||
814 | return 0; | |
815 | ||
816 | out_destroy: | |
817 | kernfs_remove(kn); | |
818 | return ret; | |
819 | } | |
5ff193fb FY |
820 | static void l3_qos_cfg_update(void *arg) |
821 | { | |
822 | bool *enable = arg; | |
823 | ||
824 | wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); | |
825 | } | |
826 | ||
827 | static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) | |
828 | { | |
829 | cpumask_var_t cpu_mask; | |
830 | struct rdt_domain *d; | |
831 | int cpu; | |
832 | ||
833 | if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) | |
834 | return -ENOMEM; | |
835 | ||
836 | list_for_each_entry(d, &r->domains, list) { | |
837 | /* Pick one CPU from each domain instance to update MSR */ | |
838 | cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); | |
839 | } | |
840 | cpu = get_cpu(); | |
841 | /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ | |
842 | if (cpumask_test_cpu(cpu, cpu_mask)) | |
843 | l3_qos_cfg_update(&enable); | |
844 | /* Update QOS_CFG MSR on all other cpus in cpu_mask. */ | |
845 | smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1); | |
846 | put_cpu(); | |
847 | ||
848 | free_cpumask_var(cpu_mask); | |
849 | ||
850 | return 0; | |
851 | } | |
852 | ||
853 | static int cdp_enable(void) | |
854 | { | |
855 | struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA]; | |
856 | struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE]; | |
857 | struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; | |
858 | int ret; | |
859 | ||
26017611 VS |
860 | if (!r_l3->alloc_capable || !r_l3data->alloc_capable || |
861 | !r_l3code->alloc_capable) | |
5ff193fb FY |
862 | return -EINVAL; |
863 | ||
864 | ret = set_l3_qos_cfg(r_l3, true); | |
865 | if (!ret) { | |
26017611 VS |
866 | r_l3->alloc_enabled = false; |
867 | r_l3data->alloc_enabled = true; | |
868 | r_l3code->alloc_enabled = true; | |
5ff193fb FY |
869 | } |
870 | return ret; | |
871 | } | |
872 | ||
873 | static void cdp_disable(void) | |
874 | { | |
875 | struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; | |
876 | ||
26017611 | 877 | r->alloc_enabled = r->alloc_capable; |
5ff193fb | 878 | |
26017611 VS |
879 | if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) { |
880 | rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled = false; | |
881 | rdt_resources_all[RDT_RESOURCE_L3CODE].alloc_enabled = false; | |
5ff193fb FY |
882 | set_l3_qos_cfg(r, false); |
883 | } | |
884 | } | |
885 | ||
886 | static int parse_rdtgroupfs_options(char *data) | |
887 | { | |
888 | char *token, *o = data; | |
889 | int ret = 0; | |
890 | ||
891 | while ((token = strsep(&o, ",")) != NULL) { | |
892 | if (!*token) | |
893 | return -EINVAL; | |
894 | ||
895 | if (!strcmp(token, "cdp")) | |
896 | ret = cdp_enable(); | |
897 | } | |
898 | ||
899 | return ret; | |
900 | } | |
901 | ||
60cf5e10 FY |
902 | /* |
903 | * We don't allow rdtgroup directories to be created anywhere | |
904 | * except the root directory. Thus when looking for the rdtgroup | |
905 | * structure for a kernfs node we are either looking at a directory, | |
906 | * in which case the rdtgroup structure is pointed at by the "priv" | |
907 | * field, otherwise we have a file, and need only look to the parent | |
908 | * to find the rdtgroup. | |
909 | */ | |
910 | static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) | |
911 | { | |
f57b3087 FY |
912 | if (kernfs_type(kn) == KERNFS_DIR) { |
913 | /* | |
914 | * All the resource directories use "kn->priv" | |
915 | * to point to the "struct rdtgroup" for the | |
916 | * resource. "info" and its subdirectories don't | |
917 | * have rdtgroup structures, so return NULL here. | |
918 | */ | |
919 | if (kn == kn_info || kn->parent == kn_info) | |
920 | return NULL; | |
921 | else | |
922 | return kn->priv; | |
923 | } else { | |
60cf5e10 | 924 | return kn->parent->priv; |
f57b3087 | 925 | } |
60cf5e10 FY |
926 | } |
927 | ||
928 | struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) | |
929 | { | |
930 | struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); | |
931 | ||
f57b3087 FY |
932 | if (!rdtgrp) |
933 | return NULL; | |
934 | ||
60cf5e10 FY |
935 | atomic_inc(&rdtgrp->waitcount); |
936 | kernfs_break_active_protection(kn); | |
937 | ||
938 | mutex_lock(&rdtgroup_mutex); | |
939 | ||
940 | /* Was this group deleted while we waited? */ | |
941 | if (rdtgrp->flags & RDT_DELETED) | |
942 | return NULL; | |
943 | ||
944 | return rdtgrp; | |
945 | } | |
946 | ||
947 | void rdtgroup_kn_unlock(struct kernfs_node *kn) | |
948 | { | |
949 | struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); | |
950 | ||
f57b3087 FY |
951 | if (!rdtgrp) |
952 | return; | |
953 | ||
60cf5e10 FY |
954 | mutex_unlock(&rdtgroup_mutex); |
955 | ||
956 | if (atomic_dec_and_test(&rdtgrp->waitcount) && | |
957 | (rdtgrp->flags & RDT_DELETED)) { | |
958 | kernfs_unbreak_active_protection(kn); | |
49ec8f5b | 959 | kernfs_put(rdtgrp->kn); |
60cf5e10 FY |
960 | kfree(rdtgrp); |
961 | } else { | |
962 | kernfs_unbreak_active_protection(kn); | |
963 | } | |
964 | } | |
965 | ||
5ff193fb FY |
966 | static struct dentry *rdt_mount(struct file_system_type *fs_type, |
967 | int flags, const char *unused_dev_name, | |
968 | void *data) | |
969 | { | |
970 | struct dentry *dentry; | |
971 | int ret; | |
972 | ||
973 | mutex_lock(&rdtgroup_mutex); | |
974 | /* | |
975 | * resctrl file system can only be mounted once. | |
976 | */ | |
26017611 | 977 | if (static_branch_unlikely(&rdt_alloc_enable_key)) { |
5ff193fb FY |
978 | dentry = ERR_PTR(-EBUSY); |
979 | goto out; | |
980 | } | |
981 | ||
982 | ret = parse_rdtgroupfs_options(data); | |
983 | if (ret) { | |
984 | dentry = ERR_PTR(ret); | |
985 | goto out_cdp; | |
986 | } | |
987 | ||
60cf5e10 FY |
988 | closid_init(); |
989 | ||
4e978d06 | 990 | ret = rdtgroup_create_info_dir(rdtgroup_default.kn); |
7bff0af5 SL |
991 | if (ret) { |
992 | dentry = ERR_PTR(ret); | |
4e978d06 | 993 | goto out_cdp; |
7bff0af5 | 994 | } |
4e978d06 | 995 | |
5ff193fb FY |
996 | dentry = kernfs_mount(fs_type, flags, rdt_root, |
997 | RDTGROUP_SUPER_MAGIC, NULL); | |
998 | if (IS_ERR(dentry)) | |
79298acc | 999 | goto out_destroy; |
5ff193fb | 1000 | |
26017611 | 1001 | static_branch_enable(&rdt_alloc_enable_key); |
5ff193fb FY |
1002 | goto out; |
1003 | ||
79298acc VS |
1004 | out_destroy: |
1005 | kernfs_remove(kn_info); | |
5ff193fb FY |
1006 | out_cdp: |
1007 | cdp_disable(); | |
1008 | out: | |
1009 | mutex_unlock(&rdtgroup_mutex); | |
1010 | ||
1011 | return dentry; | |
1012 | } | |
1013 | ||
2545e9f5 | 1014 | static int reset_all_ctrls(struct rdt_resource *r) |
5ff193fb FY |
1015 | { |
1016 | struct msr_param msr_param; | |
1017 | cpumask_var_t cpu_mask; | |
1018 | struct rdt_domain *d; | |
1019 | int i, cpu; | |
1020 | ||
1021 | if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) | |
1022 | return -ENOMEM; | |
1023 | ||
1024 | msr_param.res = r; | |
1025 | msr_param.low = 0; | |
1026 | msr_param.high = r->num_closid; | |
1027 | ||
1028 | /* | |
1029 | * Disable resource control for this resource by setting all | |
1030 | * CBMs in all domains to the maximum mask value. Pick one CPU | |
1031 | * from each domain to update the MSRs below. | |
1032 | */ | |
1033 | list_for_each_entry(d, &r->domains, list) { | |
1034 | cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); | |
1035 | ||
1036 | for (i = 0; i < r->num_closid; i++) | |
2545e9f5 | 1037 | d->ctrl_val[i] = r->default_ctrl; |
5ff193fb FY |
1038 | } |
1039 | cpu = get_cpu(); | |
1040 | /* Update CBM on this cpu if it's in cpu_mask. */ | |
1041 | if (cpumask_test_cpu(cpu, cpu_mask)) | |
2545e9f5 | 1042 | rdt_ctrl_update(&msr_param); |
5ff193fb | 1043 | /* Update CBM on all other cpus in cpu_mask. */ |
2545e9f5 | 1044 | smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1); |
5ff193fb FY |
1045 | put_cpu(); |
1046 | ||
1047 | free_cpumask_var(cpu_mask); | |
1048 | ||
1049 | return 0; | |
1050 | } | |
1051 | ||
4e978d06 | 1052 | /* |
0efc89be FY |
1053 | * Move tasks from one to the other group. If @from is NULL, then all tasks |
1054 | * in the systems are moved unconditionally (used for teardown). | |
1055 | * | |
1056 | * If @mask is not NULL the cpus on which moved tasks are running are set | |
1057 | * in that mask so the update smp function call is restricted to affected | |
1058 | * cpus. | |
4e978d06 | 1059 | */ |
0efc89be FY |
1060 | static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, |
1061 | struct cpumask *mask) | |
4e978d06 | 1062 | { |
e02737d5 FY |
1063 | struct task_struct *p, *t; |
1064 | ||
e02737d5 | 1065 | read_lock(&tasklist_lock); |
0efc89be FY |
1066 | for_each_process_thread(p, t) { |
1067 | if (!from || t->closid == from->closid) { | |
1068 | t->closid = to->closid; | |
1069 | #ifdef CONFIG_SMP | |
1070 | /* | |
1071 | * This is safe on x86 w/o barriers as the ordering | |
1072 | * of writing to task_cpu() and t->on_cpu is | |
1073 | * reverse to the reading here. The detection is | |
1074 | * inaccurate as tasks might move or schedule | |
1075 | * before the smp function call takes place. In | |
1076 | * such a case the function call is pointless, but | |
1077 | * there is no other side effect. | |
1078 | */ | |
1079 | if (mask && t->on_cpu) | |
1080 | cpumask_set_cpu(task_cpu(t), mask); | |
1081 | #endif | |
1082 | } | |
1083 | } | |
e02737d5 | 1084 | read_unlock(&tasklist_lock); |
0efc89be FY |
1085 | } |
1086 | ||
1087 | /* | |
1088 | * Forcibly remove all of subdirectories under root. | |
1089 | */ | |
1090 | static void rmdir_all_sub(void) | |
1091 | { | |
1092 | struct rdtgroup *rdtgrp, *tmp; | |
1093 | ||
1094 | /* Move all tasks to the default resource group */ | |
1095 | rdt_move_group_tasks(NULL, &rdtgroup_default, NULL); | |
60cf5e10 | 1096 | |
60cf5e10 FY |
1097 | list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) { |
1098 | /* Remove each rdtgroup other than root */ | |
1099 | if (rdtgrp == &rdtgroup_default) | |
1100 | continue; | |
c7cc0cc1 FY |
1101 | |
1102 | /* | |
1103 | * Give any CPUs back to the default group. We cannot copy | |
1104 | * cpu_online_mask because a CPU might have executed the | |
1105 | * offline callback already, but is still marked online. | |
1106 | */ | |
1107 | cpumask_or(&rdtgroup_default.cpu_mask, | |
1108 | &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); | |
1109 | ||
60cf5e10 FY |
1110 | kernfs_remove(rdtgrp->kn); |
1111 | list_del(&rdtgrp->rdtgroup_list); | |
1112 | kfree(rdtgrp); | |
1113 | } | |
0efc89be FY |
1114 | /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ |
1115 | get_online_cpus(); | |
154334a0 | 1116 | update_closid(cpu_online_mask, &rdtgroup_default); |
0efc89be FY |
1117 | put_online_cpus(); |
1118 | ||
4e978d06 FY |
1119 | kernfs_remove(kn_info); |
1120 | } | |
1121 | ||
5ff193fb FY |
1122 | static void rdt_kill_sb(struct super_block *sb) |
1123 | { | |
1124 | struct rdt_resource *r; | |
1125 | ||
1126 | mutex_lock(&rdtgroup_mutex); | |
1127 | ||
1128 | /*Put everything back to default values. */ | |
26017611 | 1129 | for_each_alloc_enabled_rdt_resource(r) |
2545e9f5 | 1130 | reset_all_ctrls(r); |
5ff193fb | 1131 | cdp_disable(); |
4e978d06 | 1132 | rmdir_all_sub(); |
26017611 | 1133 | static_branch_disable(&rdt_alloc_enable_key); |
5ff193fb FY |
1134 | kernfs_kill_sb(sb); |
1135 | mutex_unlock(&rdtgroup_mutex); | |
1136 | } | |
1137 | ||
1138 | static struct file_system_type rdt_fs_type = { | |
1139 | .name = "resctrl", | |
1140 | .mount = rdt_mount, | |
1141 | .kill_sb = rdt_kill_sb, | |
1142 | }; | |
1143 | ||
da68325a VS |
1144 | static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, |
1145 | struct kernfs_node *prgrp_kn, | |
1146 | const char *name, umode_t mode, | |
ccac7180 | 1147 | enum rdt_group_type rtype, struct rdtgroup **r) |
60cf5e10 | 1148 | { |
da68325a | 1149 | struct rdtgroup *prdtgrp, *rdtgrp; |
60cf5e10 | 1150 | struct kernfs_node *kn; |
da68325a VS |
1151 | uint files = 0; |
1152 | int ret; | |
60cf5e10 | 1153 | |
da68325a VS |
1154 | prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); |
1155 | if (!prdtgrp) { | |
60cf5e10 FY |
1156 | ret = -ENODEV; |
1157 | goto out_unlock; | |
1158 | } | |
1159 | ||
60cf5e10 FY |
1160 | /* allocate the rdtgroup. */ |
1161 | rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); | |
1162 | if (!rdtgrp) { | |
1163 | ret = -ENOSPC; | |
da68325a | 1164 | goto out_unlock; |
60cf5e10 | 1165 | } |
da68325a | 1166 | *r = rdtgrp; |
ccac7180 VS |
1167 | rdtgrp->mon.parent = prdtgrp; |
1168 | rdtgrp->type = rtype; | |
1169 | INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list); | |
60cf5e10 FY |
1170 | |
1171 | /* kernfs creates the directory for rdtgrp */ | |
da68325a | 1172 | kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp); |
60cf5e10 FY |
1173 | if (IS_ERR(kn)) { |
1174 | ret = PTR_ERR(kn); | |
da68325a | 1175 | goto out_free_rgrp; |
60cf5e10 FY |
1176 | } |
1177 | rdtgrp->kn = kn; | |
1178 | ||
1179 | /* | |
1180 | * kernfs_remove() will drop the reference count on "kn" which | |
1181 | * will free it. But we still need it to stick around for the | |
1182 | * rdtgroup_kn_unlock(kn} call below. Take one extra reference | |
1183 | * here, which will be dropped inside rdtgroup_kn_unlock(). | |
1184 | */ | |
1185 | kernfs_get(kn); | |
1186 | ||
1187 | ret = rdtgroup_kn_set_ugid(kn); | |
1188 | if (ret) | |
1189 | goto out_destroy; | |
1190 | ||
da68325a | 1191 | files = RFTYPE_BASE | RFTYPE_CTRL; |
ccac7180 | 1192 | files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype); |
da68325a | 1193 | ret = rdtgroup_add_files(kn, files); |
12e0110c TL |
1194 | if (ret) |
1195 | goto out_destroy; | |
1196 | ||
ccac7180 VS |
1197 | if (rdt_mon_capable) { |
1198 | ret = alloc_rmid(); | |
1199 | if (ret < 0) | |
1200 | goto out_destroy; | |
1201 | rdtgrp->mon.rmid = ret; | |
1202 | } | |
60cf5e10 FY |
1203 | kernfs_activate(kn); |
1204 | ||
da68325a VS |
1205 | /* |
1206 | * The caller unlocks the prgrp_kn upon success. | |
1207 | */ | |
1208 | return 0; | |
60cf5e10 FY |
1209 | |
1210 | out_destroy: | |
1211 | kernfs_remove(rdtgrp->kn); | |
da68325a | 1212 | out_free_rgrp: |
60cf5e10 | 1213 | kfree(rdtgrp); |
60cf5e10 | 1214 | out_unlock: |
da68325a VS |
1215 | rdtgroup_kn_unlock(prgrp_kn); |
1216 | return ret; | |
1217 | } | |
1218 | ||
1219 | static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) | |
1220 | { | |
1221 | kernfs_remove(rgrp->kn); | |
ccac7180 | 1222 | free_rmid(rgrp->mon.rmid); |
da68325a VS |
1223 | kfree(rgrp); |
1224 | } | |
1225 | ||
ccac7180 VS |
1226 | /* |
1227 | * Create a monitor group under "mon_groups" directory of a control | |
1228 | * and monitor group(ctrl_mon). This is a resource group | |
1229 | * to monitor a subset of tasks and cpus in its parent ctrl_mon group. | |
1230 | */ | |
1231 | static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, | |
1232 | struct kernfs_node *prgrp_kn, | |
1233 | const char *name, | |
1234 | umode_t mode) | |
1235 | { | |
1236 | struct rdtgroup *rdtgrp, *prgrp; | |
1237 | int ret; | |
1238 | ||
1239 | ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP, | |
1240 | &rdtgrp); | |
1241 | if (ret) | |
1242 | return ret; | |
1243 | ||
1244 | prgrp = rdtgrp->mon.parent; | |
1245 | rdtgrp->closid = prgrp->closid; | |
1246 | ||
1247 | /* | |
1248 | * Add the rdtgrp to the list of rdtgrps the parent | |
1249 | * ctrl_mon group has to track. | |
1250 | */ | |
1251 | list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); | |
1252 | ||
1253 | rdtgroup_kn_unlock(prgrp_kn); | |
1254 | return ret; | |
1255 | } | |
1256 | ||
da68325a VS |
1257 | /* |
1258 | * These are rdtgroups created under the root directory. Can be used | |
ccac7180 | 1259 | * to allocate and monitor resources. |
da68325a | 1260 | */ |
ccac7180 VS |
1261 | static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, |
1262 | struct kernfs_node *prgrp_kn, | |
1263 | const char *name, umode_t mode) | |
da68325a VS |
1264 | { |
1265 | struct rdtgroup *rdtgrp; | |
1266 | struct kernfs_node *kn; | |
1267 | u32 closid; | |
1268 | int ret; | |
1269 | ||
ccac7180 VS |
1270 | ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP, |
1271 | &rdtgrp); | |
da68325a VS |
1272 | if (ret) |
1273 | return ret; | |
1274 | ||
1275 | kn = rdtgrp->kn; | |
1276 | ret = closid_alloc(); | |
1277 | if (ret < 0) | |
1278 | goto out_common_fail; | |
1279 | closid = ret; | |
1280 | ||
1281 | rdtgrp->closid = closid; | |
1282 | list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); | |
1283 | ||
ccac7180 VS |
1284 | if (rdt_mon_capable) { |
1285 | /* | |
1286 | * Create an empty mon_groups directory to hold the subset | |
1287 | * of tasks and cpus to monitor. | |
1288 | */ | |
1289 | ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); | |
1290 | if (ret) | |
1291 | goto out_id_free; | |
1292 | } | |
1293 | ||
da68325a VS |
1294 | goto out_unlock; |
1295 | ||
ccac7180 VS |
1296 | out_id_free: |
1297 | closid_free(closid); | |
1298 | list_del(&rdtgrp->rdtgroup_list); | |
da68325a VS |
1299 | out_common_fail: |
1300 | mkdir_rdt_prepare_clean(rdtgrp); | |
1301 | out_unlock: | |
1302 | rdtgroup_kn_unlock(prgrp_kn); | |
60cf5e10 FY |
1303 | return ret; |
1304 | } | |
1305 | ||
ccac7180 VS |
1306 | /* |
1307 | * We allow creating mon groups only with in a directory called "mon_groups" | |
1308 | * which is present in every ctrl_mon group. Check if this is a valid | |
1309 | * "mon_groups" directory. | |
1310 | * | |
1311 | * 1. The directory should be named "mon_groups". | |
1312 | * 2. The mon group itself should "not" be named "mon_groups". | |
1313 | * This makes sure "mon_groups" directory always has a ctrl_mon group | |
1314 | * as parent. | |
1315 | */ | |
1316 | static bool is_mon_groups(struct kernfs_node *kn, const char *name) | |
1317 | { | |
1318 | return (!strcmp(kn->name, "mon_groups") && | |
1319 | strcmp(name, "mon_groups")); | |
1320 | } | |
1321 | ||
da68325a VS |
1322 | static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, |
1323 | umode_t mode) | |
1324 | { | |
1325 | /* Do not accept '\n' to avoid unparsable situation. */ | |
1326 | if (strchr(name, '\n')) | |
1327 | return -EINVAL; | |
1328 | ||
1329 | /* | |
1330 | * If the parent directory is the root directory and RDT | |
ccac7180 VS |
1331 | * allocation is supported, add a control and monitoring |
1332 | * subdirectory | |
da68325a VS |
1333 | */ |
1334 | if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn) | |
ccac7180 VS |
1335 | return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode); |
1336 | ||
1337 | /* | |
1338 | * If RDT monitoring is supported and the parent directory is a valid | |
1339 | * "mon_groups" directory, add a monitoring subdirectory. | |
1340 | */ | |
1341 | if (rdt_mon_capable && is_mon_groups(parent_kn, name)) | |
1342 | return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode); | |
da68325a VS |
1343 | |
1344 | return -EPERM; | |
1345 | } | |
1346 | ||
60cf5e10 FY |
1347 | static int rdtgroup_rmdir(struct kernfs_node *kn) |
1348 | { | |
0efc89be | 1349 | int ret, cpu, closid = rdtgroup_default.closid; |
60cf5e10 | 1350 | struct rdtgroup *rdtgrp; |
0efc89be FY |
1351 | cpumask_var_t tmpmask; |
1352 | ||
1353 | if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) | |
1354 | return -ENOMEM; | |
60cf5e10 FY |
1355 | |
1356 | rdtgrp = rdtgroup_kn_lock_live(kn); | |
1357 | if (!rdtgrp) { | |
0efc89be FY |
1358 | ret = -EPERM; |
1359 | goto out; | |
60cf5e10 FY |
1360 | } |
1361 | ||
e02737d5 | 1362 | /* Give any tasks back to the default group */ |
0efc89be | 1363 | rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask); |
e02737d5 | 1364 | |
12e0110c TL |
1365 | /* Give any CPUs back to the default group */ |
1366 | cpumask_or(&rdtgroup_default.cpu_mask, | |
1367 | &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); | |
0efc89be FY |
1368 | |
1369 | /* Update per cpu closid of the moved CPUs first */ | |
1370 | for_each_cpu(cpu, &rdtgrp->cpu_mask) | |
154334a0 | 1371 | per_cpu(rdt_cpu_default.closid, cpu) = closid; |
0efc89be FY |
1372 | /* |
1373 | * Update the MSR on moved CPUs and CPUs which have moved | |
1374 | * task running on them. | |
1375 | */ | |
1376 | cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); | |
154334a0 | 1377 | update_closid(tmpmask, NULL); |
12e0110c | 1378 | |
60cf5e10 FY |
1379 | rdtgrp->flags = RDT_DELETED; |
1380 | closid_free(rdtgrp->closid); | |
1381 | list_del(&rdtgrp->rdtgroup_list); | |
1382 | ||
1383 | /* | |
1384 | * one extra hold on this, will drop when we kfree(rdtgrp) | |
1385 | * in rdtgroup_kn_unlock() | |
1386 | */ | |
1387 | kernfs_get(kn); | |
1388 | kernfs_remove(rdtgrp->kn); | |
0efc89be FY |
1389 | ret = 0; |
1390 | out: | |
60cf5e10 | 1391 | rdtgroup_kn_unlock(kn); |
0efc89be FY |
1392 | free_cpumask_var(tmpmask); |
1393 | return ret; | |
60cf5e10 FY |
1394 | } |
1395 | ||
76ae054c SL |
1396 | static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) |
1397 | { | |
26017611 | 1398 | if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) |
76ae054c SL |
1399 | seq_puts(seq, ",cdp"); |
1400 | return 0; | |
1401 | } | |
1402 | ||
5ff193fb | 1403 | static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { |
76ae054c SL |
1404 | .mkdir = rdtgroup_mkdir, |
1405 | .rmdir = rdtgroup_rmdir, | |
1406 | .show_options = rdtgroup_show_options, | |
5ff193fb FY |
1407 | }; |
1408 | ||
1409 | static int __init rdtgroup_setup_root(void) | |
1410 | { | |
12e0110c TL |
1411 | int ret; |
1412 | ||
5ff193fb FY |
1413 | rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, |
1414 | KERNFS_ROOT_CREATE_DEACTIVATED, | |
1415 | &rdtgroup_default); | |
1416 | if (IS_ERR(rdt_root)) | |
1417 | return PTR_ERR(rdt_root); | |
1418 | ||
1419 | mutex_lock(&rdtgroup_mutex); | |
1420 | ||
1421 | rdtgroup_default.closid = 0; | |
ccac7180 VS |
1422 | rdtgroup_default.mon.rmid = 0; |
1423 | rdtgroup_default.type = RDTCTRL_GROUP; | |
1424 | INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); | |
1425 | ||
5ff193fb FY |
1426 | list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); |
1427 | ||
5ae32bbc | 1428 | ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE); |
12e0110c TL |
1429 | if (ret) { |
1430 | kernfs_destroy_root(rdt_root); | |
1431 | goto out; | |
1432 | } | |
1433 | ||
5ff193fb FY |
1434 | rdtgroup_default.kn = rdt_root->kn; |
1435 | kernfs_activate(rdtgroup_default.kn); | |
1436 | ||
12e0110c | 1437 | out: |
5ff193fb FY |
1438 | mutex_unlock(&rdtgroup_mutex); |
1439 | ||
12e0110c | 1440 | return ret; |
5ff193fb FY |
1441 | } |
1442 | ||
1443 | /* | |
1444 | * rdtgroup_init - rdtgroup initialization | |
1445 | * | |
1446 | * Setup resctrl file system including set up root, create mount point, | |
1447 | * register rdtgroup filesystem, and initialize files under root directory. | |
1448 | * | |
1449 | * Return: 0 on success or -errno | |
1450 | */ | |
1451 | int __init rdtgroup_init(void) | |
1452 | { | |
1453 | int ret = 0; | |
1454 | ||
1455 | ret = rdtgroup_setup_root(); | |
1456 | if (ret) | |
1457 | return ret; | |
1458 | ||
1459 | ret = sysfs_create_mount_point(fs_kobj, "resctrl"); | |
1460 | if (ret) | |
1461 | goto cleanup_root; | |
1462 | ||
1463 | ret = register_filesystem(&rdt_fs_type); | |
1464 | if (ret) | |
1465 | goto cleanup_mountpoint; | |
1466 | ||
1467 | return 0; | |
1468 | ||
1469 | cleanup_mountpoint: | |
1470 | sysfs_remove_mount_point(fs_kobj, "resctrl"); | |
1471 | cleanup_root: | |
1472 | kernfs_destroy_root(rdt_root); | |
1473 | ||
1474 | return ret; | |
1475 | } |