]>
Commit | Line | Data |
---|---|---|
5ff193fb FY |
1 | /* |
2 | * User interface for Resource Alloction in Resource Director Technology(RDT) | |
3 | * | |
4 | * Copyright (C) 2016 Intel Corporation | |
5 | * | |
6 | * Author: Fenghua Yu <fenghua.yu@intel.com> | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or modify it | |
9 | * under the terms and conditions of the GNU General Public License, | |
10 | * version 2, as published by the Free Software Foundation. | |
11 | * | |
12 | * This program is distributed in the hope it will be useful, but WITHOUT | |
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
15 | * more details. | |
16 | * | |
17 | * More information about RDT be found in the Intel (R) x86 Architecture | |
18 | * Software Developer Manual. | |
19 | */ | |
20 | ||
21 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
22 | ||
12e0110c | 23 | #include <linux/cpu.h> |
5ff193fb FY |
24 | #include <linux/fs.h> |
25 | #include <linux/sysfs.h> | |
26 | #include <linux/kernfs.h> | |
4e978d06 | 27 | #include <linux/seq_file.h> |
3f07c014 | 28 | #include <linux/sched/signal.h> |
29930025 | 29 | #include <linux/sched/task.h> |
5ff193fb | 30 | #include <linux/slab.h> |
60cf5e10 | 31 | #include <linux/cpu.h> |
e02737d5 | 32 | #include <linux/task_work.h> |
5ff193fb FY |
33 | |
34 | #include <uapi/linux/magic.h> | |
35 | ||
36 | #include <asm/intel_rdt.h> | |
60cf5e10 | 37 | #include <asm/intel_rdt_common.h> |
5ff193fb FY |
38 | |
39 | DEFINE_STATIC_KEY_FALSE(rdt_enable_key); | |
40 | struct kernfs_root *rdt_root; | |
41 | struct rdtgroup rdtgroup_default; | |
42 | LIST_HEAD(rdt_all_groups); | |
43 | ||
4e978d06 FY |
44 | /* Kernel fs node for "info" directory under root */ |
45 | static struct kernfs_node *kn_info; | |
46 | ||
60cf5e10 FY |
47 | /* |
48 | * Trivial allocator for CLOSIDs. Since h/w only supports a small number, | |
49 | * we can keep a bitmap of free CLOSIDs in a single integer. | |
50 | * | |
51 | * Using a global CLOSID across all resources has some advantages and | |
52 | * some drawbacks: | |
53 | * + We can simply set "current->closid" to assign a task to a resource | |
54 | * group. | |
55 | * + Context switch code can avoid extra memory references deciding which | |
56 | * CLOSID to load into the PQR_ASSOC MSR | |
57 | * - We give up some options in configuring resource groups across multi-socket | |
58 | * systems. | |
59 | * - Our choices on how to configure each resource become progressively more | |
60 | * limited as the number of resources grows. | |
61 | */ | |
62 | static int closid_free_map; | |
63 | ||
64 | static void closid_init(void) | |
65 | { | |
66 | struct rdt_resource *r; | |
67 | int rdt_min_closid = 32; | |
68 | ||
69 | /* Compute rdt_min_closid across all resources */ | |
70 | for_each_enabled_rdt_resource(r) | |
71 | rdt_min_closid = min(rdt_min_closid, r->num_closid); | |
72 | ||
73 | closid_free_map = BIT_MASK(rdt_min_closid) - 1; | |
74 | ||
75 | /* CLOSID 0 is always reserved for the default group */ | |
76 | closid_free_map &= ~1; | |
77 | } | |
78 | ||
79 | int closid_alloc(void) | |
80 | { | |
81 | int closid = ffs(closid_free_map); | |
82 | ||
83 | if (closid == 0) | |
84 | return -ENOSPC; | |
85 | closid--; | |
86 | closid_free_map &= ~(1 << closid); | |
87 | ||
88 | return closid; | |
89 | } | |
90 | ||
91 | static void closid_free(int closid) | |
92 | { | |
93 | closid_free_map |= 1 << closid; | |
94 | } | |
95 | ||
4e978d06 FY |
96 | /* set uid and gid of rdtgroup dirs and files to that of the creator */ |
97 | static int rdtgroup_kn_set_ugid(struct kernfs_node *kn) | |
98 | { | |
99 | struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID, | |
100 | .ia_uid = current_fsuid(), | |
101 | .ia_gid = current_fsgid(), }; | |
102 | ||
103 | if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) && | |
104 | gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID)) | |
105 | return 0; | |
106 | ||
107 | return kernfs_setattr(kn, &iattr); | |
108 | } | |
109 | ||
110 | static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft) | |
111 | { | |
112 | struct kernfs_node *kn; | |
113 | int ret; | |
114 | ||
115 | kn = __kernfs_create_file(parent_kn, rft->name, rft->mode, | |
116 | 0, rft->kf_ops, rft, NULL, NULL); | |
117 | if (IS_ERR(kn)) | |
118 | return PTR_ERR(kn); | |
119 | ||
120 | ret = rdtgroup_kn_set_ugid(kn); | |
121 | if (ret) { | |
122 | kernfs_remove(kn); | |
123 | return ret; | |
124 | } | |
125 | ||
126 | return 0; | |
127 | } | |
128 | ||
129 | static int rdtgroup_add_files(struct kernfs_node *kn, struct rftype *rfts, | |
130 | int len) | |
131 | { | |
132 | struct rftype *rft; | |
133 | int ret; | |
134 | ||
135 | lockdep_assert_held(&rdtgroup_mutex); | |
136 | ||
137 | for (rft = rfts; rft < rfts + len; rft++) { | |
138 | ret = rdtgroup_add_file(kn, rft); | |
139 | if (ret) | |
140 | goto error; | |
141 | } | |
142 | ||
143 | return 0; | |
144 | error: | |
145 | pr_warn("Failed to add %s, err=%d\n", rft->name, ret); | |
146 | while (--rft >= rfts) | |
147 | kernfs_remove_by_name(kn, rft->name); | |
148 | return ret; | |
149 | } | |
150 | ||
151 | static int rdtgroup_seqfile_show(struct seq_file *m, void *arg) | |
152 | { | |
153 | struct kernfs_open_file *of = m->private; | |
154 | struct rftype *rft = of->kn->priv; | |
155 | ||
156 | if (rft->seq_show) | |
157 | return rft->seq_show(of, m, arg); | |
158 | return 0; | |
159 | } | |
160 | ||
161 | static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf, | |
162 | size_t nbytes, loff_t off) | |
163 | { | |
164 | struct rftype *rft = of->kn->priv; | |
165 | ||
166 | if (rft->write) | |
167 | return rft->write(of, buf, nbytes, off); | |
168 | ||
169 | return -EINVAL; | |
170 | } | |
171 | ||
172 | static struct kernfs_ops rdtgroup_kf_single_ops = { | |
173 | .atomic_write_len = PAGE_SIZE, | |
174 | .write = rdtgroup_file_write, | |
175 | .seq_show = rdtgroup_seqfile_show, | |
176 | }; | |
177 | ||
12e0110c TL |
178 | static int rdtgroup_cpus_show(struct kernfs_open_file *of, |
179 | struct seq_file *s, void *v) | |
180 | { | |
181 | struct rdtgroup *rdtgrp; | |
182 | int ret = 0; | |
183 | ||
184 | rdtgrp = rdtgroup_kn_lock_live(of->kn); | |
185 | ||
186 | if (rdtgrp) | |
187 | seq_printf(s, "%*pb\n", cpumask_pr_args(&rdtgrp->cpu_mask)); | |
188 | else | |
189 | ret = -ENOENT; | |
190 | rdtgroup_kn_unlock(of->kn); | |
191 | ||
192 | return ret; | |
193 | } | |
194 | ||
f4107702 FY |
195 | /* |
196 | * This is safe against intel_rdt_sched_in() called from __switch_to() | |
197 | * because __switch_to() is executed with interrupts disabled. A local call | |
0efc89be | 198 | * from rdt_update_closid() is proteced against __switch_to() because |
f4107702 FY |
199 | * preemption is disabled. |
200 | */ | |
0efc89be | 201 | static void rdt_update_cpu_closid(void *closid) |
f4107702 | 202 | { |
0efc89be FY |
203 | if (closid) |
204 | this_cpu_write(cpu_closid, *(int *)closid); | |
f4107702 FY |
205 | /* |
206 | * We cannot unconditionally write the MSR because the current | |
207 | * executing task might have its own closid selected. Just reuse | |
208 | * the context switch code. | |
209 | */ | |
210 | intel_rdt_sched_in(); | |
211 | } | |
212 | ||
0efc89be FY |
213 | /* |
214 | * Update the PGR_ASSOC MSR on all cpus in @cpu_mask, | |
215 | * | |
216 | * Per task closids must have been set up before calling this function. | |
217 | * | |
218 | * The per cpu closids are updated with the smp function call, when @closid | |
219 | * is not NULL. If @closid is NULL then all affected percpu closids must | |
220 | * have been set up before calling this function. | |
221 | */ | |
222 | static void | |
223 | rdt_update_closid(const struct cpumask *cpu_mask, int *closid) | |
f4107702 FY |
224 | { |
225 | int cpu = get_cpu(); | |
226 | ||
227 | if (cpumask_test_cpu(cpu, cpu_mask)) | |
0efc89be FY |
228 | rdt_update_cpu_closid(closid); |
229 | smp_call_function_many(cpu_mask, rdt_update_cpu_closid, closid, 1); | |
f4107702 FY |
230 | put_cpu(); |
231 | } | |
232 | ||
12e0110c TL |
233 | static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, |
234 | char *buf, size_t nbytes, loff_t off) | |
235 | { | |
236 | cpumask_var_t tmpmask, newmask; | |
237 | struct rdtgroup *rdtgrp, *r; | |
f4107702 | 238 | int ret; |
12e0110c TL |
239 | |
240 | if (!buf) | |
241 | return -EINVAL; | |
242 | ||
243 | if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) | |
244 | return -ENOMEM; | |
245 | if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) { | |
246 | free_cpumask_var(tmpmask); | |
247 | return -ENOMEM; | |
248 | } | |
a2584e1d | 249 | |
12e0110c TL |
250 | rdtgrp = rdtgroup_kn_lock_live(of->kn); |
251 | if (!rdtgrp) { | |
252 | ret = -ENOENT; | |
253 | goto unlock; | |
254 | } | |
255 | ||
256 | ret = cpumask_parse(buf, newmask); | |
257 | if (ret) | |
258 | goto unlock; | |
259 | ||
12e0110c TL |
260 | /* check that user didn't specify any offline cpus */ |
261 | cpumask_andnot(tmpmask, newmask, cpu_online_mask); | |
262 | if (cpumask_weight(tmpmask)) { | |
263 | ret = -EINVAL; | |
a2584e1d | 264 | goto unlock; |
12e0110c TL |
265 | } |
266 | ||
267 | /* Check whether cpus are dropped from this group */ | |
268 | cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); | |
269 | if (cpumask_weight(tmpmask)) { | |
270 | /* Can't drop from default group */ | |
271 | if (rdtgrp == &rdtgroup_default) { | |
272 | ret = -EINVAL; | |
a2584e1d | 273 | goto unlock; |
12e0110c TL |
274 | } |
275 | /* Give any dropped cpus to rdtgroup_default */ | |
276 | cpumask_or(&rdtgroup_default.cpu_mask, | |
277 | &rdtgroup_default.cpu_mask, tmpmask); | |
0efc89be | 278 | rdt_update_closid(tmpmask, &rdtgroup_default.closid); |
12e0110c TL |
279 | } |
280 | ||
281 | /* | |
282 | * If we added cpus, remove them from previous group that owned them | |
283 | * and update per-cpu closid | |
284 | */ | |
285 | cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask); | |
286 | if (cpumask_weight(tmpmask)) { | |
287 | list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) { | |
288 | if (r == rdtgrp) | |
289 | continue; | |
290 | cpumask_andnot(&r->cpu_mask, &r->cpu_mask, tmpmask); | |
291 | } | |
0efc89be | 292 | rdt_update_closid(tmpmask, &rdtgrp->closid); |
12e0110c TL |
293 | } |
294 | ||
295 | /* Done pushing/pulling - update this group with new mask */ | |
296 | cpumask_copy(&rdtgrp->cpu_mask, newmask); | |
297 | ||
12e0110c TL |
298 | unlock: |
299 | rdtgroup_kn_unlock(of->kn); | |
300 | free_cpumask_var(tmpmask); | |
301 | free_cpumask_var(newmask); | |
302 | ||
303 | return ret ?: nbytes; | |
304 | } | |
305 | ||
e02737d5 FY |
306 | struct task_move_callback { |
307 | struct callback_head work; | |
308 | struct rdtgroup *rdtgrp; | |
309 | }; | |
310 | ||
311 | static void move_myself(struct callback_head *head) | |
312 | { | |
313 | struct task_move_callback *callback; | |
314 | struct rdtgroup *rdtgrp; | |
315 | ||
316 | callback = container_of(head, struct task_move_callback, work); | |
317 | rdtgrp = callback->rdtgrp; | |
318 | ||
319 | /* | |
320 | * If resource group was deleted before this task work callback | |
321 | * was invoked, then assign the task to root group and free the | |
322 | * resource group. | |
323 | */ | |
324 | if (atomic_dec_and_test(&rdtgrp->waitcount) && | |
325 | (rdtgrp->flags & RDT_DELETED)) { | |
326 | current->closid = 0; | |
327 | kfree(rdtgrp); | |
328 | } | |
329 | ||
74fcdae1 | 330 | preempt_disable(); |
4f341a5e FY |
331 | /* update PQR_ASSOC MSR to make resource group go into effect */ |
332 | intel_rdt_sched_in(); | |
74fcdae1 | 333 | preempt_enable(); |
4f341a5e | 334 | |
e02737d5 FY |
335 | kfree(callback); |
336 | } | |
337 | ||
338 | static int __rdtgroup_move_task(struct task_struct *tsk, | |
339 | struct rdtgroup *rdtgrp) | |
340 | { | |
341 | struct task_move_callback *callback; | |
342 | int ret; | |
343 | ||
344 | callback = kzalloc(sizeof(*callback), GFP_KERNEL); | |
345 | if (!callback) | |
346 | return -ENOMEM; | |
347 | callback->work.func = move_myself; | |
348 | callback->rdtgrp = rdtgrp; | |
349 | ||
350 | /* | |
351 | * Take a refcount, so rdtgrp cannot be freed before the | |
352 | * callback has been invoked. | |
353 | */ | |
354 | atomic_inc(&rdtgrp->waitcount); | |
355 | ret = task_work_add(tsk, &callback->work, true); | |
356 | if (ret) { | |
357 | /* | |
358 | * Task is exiting. Drop the refcount and free the callback. | |
359 | * No need to check the refcount as the group cannot be | |
360 | * deleted before the write function unlocks rdtgroup_mutex. | |
361 | */ | |
362 | atomic_dec(&rdtgrp->waitcount); | |
363 | kfree(callback); | |
364 | } else { | |
365 | tsk->closid = rdtgrp->closid; | |
366 | } | |
367 | return ret; | |
368 | } | |
369 | ||
370 | static int rdtgroup_task_write_permission(struct task_struct *task, | |
371 | struct kernfs_open_file *of) | |
372 | { | |
373 | const struct cred *tcred = get_task_cred(task); | |
374 | const struct cred *cred = current_cred(); | |
375 | int ret = 0; | |
376 | ||
377 | /* | |
378 | * Even if we're attaching all tasks in the thread group, we only | |
379 | * need to check permissions on one of them. | |
380 | */ | |
381 | if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && | |
382 | !uid_eq(cred->euid, tcred->uid) && | |
383 | !uid_eq(cred->euid, tcred->suid)) | |
384 | ret = -EPERM; | |
385 | ||
386 | put_cred(tcred); | |
387 | return ret; | |
388 | } | |
389 | ||
390 | static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, | |
391 | struct kernfs_open_file *of) | |
392 | { | |
393 | struct task_struct *tsk; | |
394 | int ret; | |
395 | ||
396 | rcu_read_lock(); | |
397 | if (pid) { | |
398 | tsk = find_task_by_vpid(pid); | |
399 | if (!tsk) { | |
400 | rcu_read_unlock(); | |
401 | return -ESRCH; | |
402 | } | |
403 | } else { | |
404 | tsk = current; | |
405 | } | |
406 | ||
407 | get_task_struct(tsk); | |
408 | rcu_read_unlock(); | |
409 | ||
410 | ret = rdtgroup_task_write_permission(tsk, of); | |
411 | if (!ret) | |
412 | ret = __rdtgroup_move_task(tsk, rdtgrp); | |
413 | ||
414 | put_task_struct(tsk); | |
415 | return ret; | |
416 | } | |
417 | ||
418 | static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, | |
419 | char *buf, size_t nbytes, loff_t off) | |
420 | { | |
421 | struct rdtgroup *rdtgrp; | |
422 | int ret = 0; | |
423 | pid_t pid; | |
424 | ||
425 | if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) | |
426 | return -EINVAL; | |
427 | rdtgrp = rdtgroup_kn_lock_live(of->kn); | |
428 | ||
429 | if (rdtgrp) | |
430 | ret = rdtgroup_move_task(pid, rdtgrp, of); | |
431 | else | |
432 | ret = -ENOENT; | |
433 | ||
434 | rdtgroup_kn_unlock(of->kn); | |
435 | ||
436 | return ret ?: nbytes; | |
437 | } | |
438 | ||
439 | static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) | |
440 | { | |
441 | struct task_struct *p, *t; | |
442 | ||
443 | rcu_read_lock(); | |
444 | for_each_process_thread(p, t) { | |
445 | if (t->closid == r->closid) | |
446 | seq_printf(s, "%d\n", t->pid); | |
447 | } | |
448 | rcu_read_unlock(); | |
449 | } | |
450 | ||
451 | static int rdtgroup_tasks_show(struct kernfs_open_file *of, | |
452 | struct seq_file *s, void *v) | |
453 | { | |
454 | struct rdtgroup *rdtgrp; | |
455 | int ret = 0; | |
456 | ||
457 | rdtgrp = rdtgroup_kn_lock_live(of->kn); | |
458 | if (rdtgrp) | |
459 | show_rdt_tasks(rdtgrp, s); | |
460 | else | |
461 | ret = -ENOENT; | |
462 | rdtgroup_kn_unlock(of->kn); | |
463 | ||
464 | return ret; | |
465 | } | |
466 | ||
12e0110c TL |
467 | /* Files in each rdtgroup */ |
468 | static struct rftype rdtgroup_base_files[] = { | |
469 | { | |
470 | .name = "cpus", | |
471 | .mode = 0644, | |
472 | .kf_ops = &rdtgroup_kf_single_ops, | |
473 | .write = rdtgroup_cpus_write, | |
474 | .seq_show = rdtgroup_cpus_show, | |
475 | }, | |
e02737d5 FY |
476 | { |
477 | .name = "tasks", | |
478 | .mode = 0644, | |
479 | .kf_ops = &rdtgroup_kf_single_ops, | |
480 | .write = rdtgroup_tasks_write, | |
481 | .seq_show = rdtgroup_tasks_show, | |
482 | }, | |
60ec2440 TL |
483 | { |
484 | .name = "schemata", | |
485 | .mode = 0644, | |
486 | .kf_ops = &rdtgroup_kf_single_ops, | |
487 | .write = rdtgroup_schemata_write, | |
488 | .seq_show = rdtgroup_schemata_show, | |
489 | }, | |
12e0110c TL |
490 | }; |
491 | ||
4e978d06 FY |
492 | static int rdt_num_closids_show(struct kernfs_open_file *of, |
493 | struct seq_file *seq, void *v) | |
494 | { | |
495 | struct rdt_resource *r = of->kn->parent->priv; | |
496 | ||
497 | seq_printf(seq, "%d\n", r->num_closid); | |
498 | ||
499 | return 0; | |
500 | } | |
501 | ||
502 | static int rdt_cbm_mask_show(struct kernfs_open_file *of, | |
503 | struct seq_file *seq, void *v) | |
504 | { | |
505 | struct rdt_resource *r = of->kn->parent->priv; | |
506 | ||
507 | seq_printf(seq, "%x\n", r->max_cbm); | |
508 | ||
509 | return 0; | |
510 | } | |
511 | ||
53a114a6 SL |
512 | static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, |
513 | struct seq_file *seq, void *v) | |
514 | { | |
515 | struct rdt_resource *r = of->kn->parent->priv; | |
516 | ||
517 | seq_printf(seq, "%d\n", r->min_cbm_bits); | |
518 | ||
519 | return 0; | |
520 | } | |
521 | ||
4e978d06 FY |
522 | /* rdtgroup information files for one cache resource. */ |
523 | static struct rftype res_info_files[] = { | |
524 | { | |
525 | .name = "num_closids", | |
526 | .mode = 0444, | |
527 | .kf_ops = &rdtgroup_kf_single_ops, | |
528 | .seq_show = rdt_num_closids_show, | |
529 | }, | |
530 | { | |
531 | .name = "cbm_mask", | |
532 | .mode = 0444, | |
533 | .kf_ops = &rdtgroup_kf_single_ops, | |
534 | .seq_show = rdt_cbm_mask_show, | |
535 | }, | |
53a114a6 SL |
536 | { |
537 | .name = "min_cbm_bits", | |
538 | .mode = 0444, | |
539 | .kf_ops = &rdtgroup_kf_single_ops, | |
540 | .seq_show = rdt_min_cbm_bits_show, | |
541 | }, | |
4e978d06 FY |
542 | }; |
543 | ||
544 | static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) | |
545 | { | |
546 | struct kernfs_node *kn_subdir; | |
547 | struct rdt_resource *r; | |
548 | int ret; | |
549 | ||
550 | /* create the directory */ | |
551 | kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); | |
552 | if (IS_ERR(kn_info)) | |
553 | return PTR_ERR(kn_info); | |
554 | kernfs_get(kn_info); | |
555 | ||
556 | for_each_enabled_rdt_resource(r) { | |
557 | kn_subdir = kernfs_create_dir(kn_info, r->name, | |
558 | kn_info->mode, r); | |
559 | if (IS_ERR(kn_subdir)) { | |
560 | ret = PTR_ERR(kn_subdir); | |
561 | goto out_destroy; | |
562 | } | |
563 | kernfs_get(kn_subdir); | |
564 | ret = rdtgroup_kn_set_ugid(kn_subdir); | |
565 | if (ret) | |
566 | goto out_destroy; | |
567 | ret = rdtgroup_add_files(kn_subdir, res_info_files, | |
568 | ARRAY_SIZE(res_info_files)); | |
569 | if (ret) | |
570 | goto out_destroy; | |
571 | kernfs_activate(kn_subdir); | |
572 | } | |
573 | ||
574 | /* | |
575 | * This extra ref will be put in kernfs_remove() and guarantees | |
576 | * that @rdtgrp->kn is always accessible. | |
577 | */ | |
578 | kernfs_get(kn_info); | |
579 | ||
580 | ret = rdtgroup_kn_set_ugid(kn_info); | |
581 | if (ret) | |
582 | goto out_destroy; | |
583 | ||
584 | kernfs_activate(kn_info); | |
585 | ||
586 | return 0; | |
587 | ||
588 | out_destroy: | |
589 | kernfs_remove(kn_info); | |
590 | return ret; | |
591 | } | |
592 | ||
5ff193fb FY |
593 | static void l3_qos_cfg_update(void *arg) |
594 | { | |
595 | bool *enable = arg; | |
596 | ||
597 | wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); | |
598 | } | |
599 | ||
600 | static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) | |
601 | { | |
602 | cpumask_var_t cpu_mask; | |
603 | struct rdt_domain *d; | |
604 | int cpu; | |
605 | ||
606 | if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) | |
607 | return -ENOMEM; | |
608 | ||
609 | list_for_each_entry(d, &r->domains, list) { | |
610 | /* Pick one CPU from each domain instance to update MSR */ | |
611 | cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); | |
612 | } | |
613 | cpu = get_cpu(); | |
614 | /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ | |
615 | if (cpumask_test_cpu(cpu, cpu_mask)) | |
616 | l3_qos_cfg_update(&enable); | |
617 | /* Update QOS_CFG MSR on all other cpus in cpu_mask. */ | |
618 | smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1); | |
619 | put_cpu(); | |
620 | ||
621 | free_cpumask_var(cpu_mask); | |
622 | ||
623 | return 0; | |
624 | } | |
625 | ||
626 | static int cdp_enable(void) | |
627 | { | |
628 | struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA]; | |
629 | struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE]; | |
630 | struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; | |
631 | int ret; | |
632 | ||
633 | if (!r_l3->capable || !r_l3data->capable || !r_l3code->capable) | |
634 | return -EINVAL; | |
635 | ||
636 | ret = set_l3_qos_cfg(r_l3, true); | |
637 | if (!ret) { | |
638 | r_l3->enabled = false; | |
639 | r_l3data->enabled = true; | |
640 | r_l3code->enabled = true; | |
641 | } | |
642 | return ret; | |
643 | } | |
644 | ||
645 | static void cdp_disable(void) | |
646 | { | |
647 | struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; | |
648 | ||
649 | r->enabled = r->capable; | |
650 | ||
651 | if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled) { | |
652 | rdt_resources_all[RDT_RESOURCE_L3DATA].enabled = false; | |
653 | rdt_resources_all[RDT_RESOURCE_L3CODE].enabled = false; | |
654 | set_l3_qos_cfg(r, false); | |
655 | } | |
656 | } | |
657 | ||
658 | static int parse_rdtgroupfs_options(char *data) | |
659 | { | |
660 | char *token, *o = data; | |
661 | int ret = 0; | |
662 | ||
663 | while ((token = strsep(&o, ",")) != NULL) { | |
664 | if (!*token) | |
665 | return -EINVAL; | |
666 | ||
667 | if (!strcmp(token, "cdp")) | |
668 | ret = cdp_enable(); | |
669 | } | |
670 | ||
671 | return ret; | |
672 | } | |
673 | ||
60cf5e10 FY |
674 | /* |
675 | * We don't allow rdtgroup directories to be created anywhere | |
676 | * except the root directory. Thus when looking for the rdtgroup | |
677 | * structure for a kernfs node we are either looking at a directory, | |
678 | * in which case the rdtgroup structure is pointed at by the "priv" | |
679 | * field, otherwise we have a file, and need only look to the parent | |
680 | * to find the rdtgroup. | |
681 | */ | |
682 | static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) | |
683 | { | |
f57b3087 FY |
684 | if (kernfs_type(kn) == KERNFS_DIR) { |
685 | /* | |
686 | * All the resource directories use "kn->priv" | |
687 | * to point to the "struct rdtgroup" for the | |
688 | * resource. "info" and its subdirectories don't | |
689 | * have rdtgroup structures, so return NULL here. | |
690 | */ | |
691 | if (kn == kn_info || kn->parent == kn_info) | |
692 | return NULL; | |
693 | else | |
694 | return kn->priv; | |
695 | } else { | |
60cf5e10 | 696 | return kn->parent->priv; |
f57b3087 | 697 | } |
60cf5e10 FY |
698 | } |
699 | ||
700 | struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) | |
701 | { | |
702 | struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); | |
703 | ||
f57b3087 FY |
704 | if (!rdtgrp) |
705 | return NULL; | |
706 | ||
60cf5e10 FY |
707 | atomic_inc(&rdtgrp->waitcount); |
708 | kernfs_break_active_protection(kn); | |
709 | ||
710 | mutex_lock(&rdtgroup_mutex); | |
711 | ||
712 | /* Was this group deleted while we waited? */ | |
713 | if (rdtgrp->flags & RDT_DELETED) | |
714 | return NULL; | |
715 | ||
716 | return rdtgrp; | |
717 | } | |
718 | ||
719 | void rdtgroup_kn_unlock(struct kernfs_node *kn) | |
720 | { | |
721 | struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); | |
722 | ||
f57b3087 FY |
723 | if (!rdtgrp) |
724 | return; | |
725 | ||
60cf5e10 FY |
726 | mutex_unlock(&rdtgroup_mutex); |
727 | ||
728 | if (atomic_dec_and_test(&rdtgrp->waitcount) && | |
729 | (rdtgrp->flags & RDT_DELETED)) { | |
730 | kernfs_unbreak_active_protection(kn); | |
731 | kernfs_put(kn); | |
732 | kfree(rdtgrp); | |
733 | } else { | |
734 | kernfs_unbreak_active_protection(kn); | |
735 | } | |
736 | } | |
737 | ||
5ff193fb FY |
738 | static struct dentry *rdt_mount(struct file_system_type *fs_type, |
739 | int flags, const char *unused_dev_name, | |
740 | void *data) | |
741 | { | |
742 | struct dentry *dentry; | |
743 | int ret; | |
744 | ||
745 | mutex_lock(&rdtgroup_mutex); | |
746 | /* | |
747 | * resctrl file system can only be mounted once. | |
748 | */ | |
749 | if (static_branch_unlikely(&rdt_enable_key)) { | |
750 | dentry = ERR_PTR(-EBUSY); | |
751 | goto out; | |
752 | } | |
753 | ||
754 | ret = parse_rdtgroupfs_options(data); | |
755 | if (ret) { | |
756 | dentry = ERR_PTR(ret); | |
757 | goto out_cdp; | |
758 | } | |
759 | ||
60cf5e10 FY |
760 | closid_init(); |
761 | ||
4e978d06 | 762 | ret = rdtgroup_create_info_dir(rdtgroup_default.kn); |
7bff0af5 SL |
763 | if (ret) { |
764 | dentry = ERR_PTR(ret); | |
4e978d06 | 765 | goto out_cdp; |
7bff0af5 | 766 | } |
4e978d06 | 767 | |
5ff193fb FY |
768 | dentry = kernfs_mount(fs_type, flags, rdt_root, |
769 | RDTGROUP_SUPER_MAGIC, NULL); | |
770 | if (IS_ERR(dentry)) | |
771 | goto out_cdp; | |
772 | ||
773 | static_branch_enable(&rdt_enable_key); | |
774 | goto out; | |
775 | ||
776 | out_cdp: | |
777 | cdp_disable(); | |
778 | out: | |
779 | mutex_unlock(&rdtgroup_mutex); | |
780 | ||
781 | return dentry; | |
782 | } | |
783 | ||
784 | static int reset_all_cbms(struct rdt_resource *r) | |
785 | { | |
786 | struct msr_param msr_param; | |
787 | cpumask_var_t cpu_mask; | |
788 | struct rdt_domain *d; | |
789 | int i, cpu; | |
790 | ||
791 | if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) | |
792 | return -ENOMEM; | |
793 | ||
794 | msr_param.res = r; | |
795 | msr_param.low = 0; | |
796 | msr_param.high = r->num_closid; | |
797 | ||
798 | /* | |
799 | * Disable resource control for this resource by setting all | |
800 | * CBMs in all domains to the maximum mask value. Pick one CPU | |
801 | * from each domain to update the MSRs below. | |
802 | */ | |
803 | list_for_each_entry(d, &r->domains, list) { | |
804 | cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); | |
805 | ||
806 | for (i = 0; i < r->num_closid; i++) | |
807 | d->cbm[i] = r->max_cbm; | |
808 | } | |
809 | cpu = get_cpu(); | |
810 | /* Update CBM on this cpu if it's in cpu_mask. */ | |
811 | if (cpumask_test_cpu(cpu, cpu_mask)) | |
812 | rdt_cbm_update(&msr_param); | |
813 | /* Update CBM on all other cpus in cpu_mask. */ | |
814 | smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1); | |
815 | put_cpu(); | |
816 | ||
817 | free_cpumask_var(cpu_mask); | |
818 | ||
819 | return 0; | |
820 | } | |
821 | ||
4e978d06 | 822 | /* |
0efc89be FY |
823 | * Move tasks from one to the other group. If @from is NULL, then all tasks |
824 | * in the systems are moved unconditionally (used for teardown). | |
825 | * | |
826 | * If @mask is not NULL the cpus on which moved tasks are running are set | |
827 | * in that mask so the update smp function call is restricted to affected | |
828 | * cpus. | |
4e978d06 | 829 | */ |
0efc89be FY |
830 | static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, |
831 | struct cpumask *mask) | |
4e978d06 | 832 | { |
e02737d5 FY |
833 | struct task_struct *p, *t; |
834 | ||
e02737d5 | 835 | read_lock(&tasklist_lock); |
0efc89be FY |
836 | for_each_process_thread(p, t) { |
837 | if (!from || t->closid == from->closid) { | |
838 | t->closid = to->closid; | |
839 | #ifdef CONFIG_SMP | |
840 | /* | |
841 | * This is safe on x86 w/o barriers as the ordering | |
842 | * of writing to task_cpu() and t->on_cpu is | |
843 | * reverse to the reading here. The detection is | |
844 | * inaccurate as tasks might move or schedule | |
845 | * before the smp function call takes place. In | |
846 | * such a case the function call is pointless, but | |
847 | * there is no other side effect. | |
848 | */ | |
849 | if (mask && t->on_cpu) | |
850 | cpumask_set_cpu(task_cpu(t), mask); | |
851 | #endif | |
852 | } | |
853 | } | |
e02737d5 | 854 | read_unlock(&tasklist_lock); |
0efc89be FY |
855 | } |
856 | ||
857 | /* | |
858 | * Forcibly remove all of subdirectories under root. | |
859 | */ | |
860 | static void rmdir_all_sub(void) | |
861 | { | |
862 | struct rdtgroup *rdtgrp, *tmp; | |
863 | ||
864 | /* Move all tasks to the default resource group */ | |
865 | rdt_move_group_tasks(NULL, &rdtgroup_default, NULL); | |
60cf5e10 | 866 | |
60cf5e10 FY |
867 | list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) { |
868 | /* Remove each rdtgroup other than root */ | |
869 | if (rdtgrp == &rdtgroup_default) | |
870 | continue; | |
c7cc0cc1 FY |
871 | |
872 | /* | |
873 | * Give any CPUs back to the default group. We cannot copy | |
874 | * cpu_online_mask because a CPU might have executed the | |
875 | * offline callback already, but is still marked online. | |
876 | */ | |
877 | cpumask_or(&rdtgroup_default.cpu_mask, | |
878 | &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); | |
879 | ||
60cf5e10 FY |
880 | kernfs_remove(rdtgrp->kn); |
881 | list_del(&rdtgrp->rdtgroup_list); | |
882 | kfree(rdtgrp); | |
883 | } | |
0efc89be FY |
884 | /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ |
885 | get_online_cpus(); | |
886 | rdt_update_closid(cpu_online_mask, &rdtgroup_default.closid); | |
887 | put_online_cpus(); | |
888 | ||
4e978d06 FY |
889 | kernfs_remove(kn_info); |
890 | } | |
891 | ||
5ff193fb FY |
892 | static void rdt_kill_sb(struct super_block *sb) |
893 | { | |
894 | struct rdt_resource *r; | |
895 | ||
896 | mutex_lock(&rdtgroup_mutex); | |
897 | ||
898 | /*Put everything back to default values. */ | |
899 | for_each_enabled_rdt_resource(r) | |
900 | reset_all_cbms(r); | |
901 | cdp_disable(); | |
4e978d06 | 902 | rmdir_all_sub(); |
5ff193fb FY |
903 | static_branch_disable(&rdt_enable_key); |
904 | kernfs_kill_sb(sb); | |
905 | mutex_unlock(&rdtgroup_mutex); | |
906 | } | |
907 | ||
908 | static struct file_system_type rdt_fs_type = { | |
909 | .name = "resctrl", | |
910 | .mount = rdt_mount, | |
911 | .kill_sb = rdt_kill_sb, | |
912 | }; | |
913 | ||
60cf5e10 FY |
914 | static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, |
915 | umode_t mode) | |
916 | { | |
917 | struct rdtgroup *parent, *rdtgrp; | |
918 | struct kernfs_node *kn; | |
919 | int ret, closid; | |
920 | ||
921 | /* Only allow mkdir in the root directory */ | |
922 | if (parent_kn != rdtgroup_default.kn) | |
923 | return -EPERM; | |
924 | ||
925 | /* Do not accept '\n' to avoid unparsable situation. */ | |
926 | if (strchr(name, '\n')) | |
927 | return -EINVAL; | |
928 | ||
929 | parent = rdtgroup_kn_lock_live(parent_kn); | |
930 | if (!parent) { | |
931 | ret = -ENODEV; | |
932 | goto out_unlock; | |
933 | } | |
934 | ||
935 | ret = closid_alloc(); | |
936 | if (ret < 0) | |
937 | goto out_unlock; | |
938 | closid = ret; | |
939 | ||
940 | /* allocate the rdtgroup. */ | |
941 | rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); | |
942 | if (!rdtgrp) { | |
943 | ret = -ENOSPC; | |
944 | goto out_closid_free; | |
945 | } | |
946 | rdtgrp->closid = closid; | |
947 | list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); | |
948 | ||
949 | /* kernfs creates the directory for rdtgrp */ | |
950 | kn = kernfs_create_dir(parent->kn, name, mode, rdtgrp); | |
951 | if (IS_ERR(kn)) { | |
952 | ret = PTR_ERR(kn); | |
953 | goto out_cancel_ref; | |
954 | } | |
955 | rdtgrp->kn = kn; | |
956 | ||
957 | /* | |
958 | * kernfs_remove() will drop the reference count on "kn" which | |
959 | * will free it. But we still need it to stick around for the | |
960 | * rdtgroup_kn_unlock(kn} call below. Take one extra reference | |
961 | * here, which will be dropped inside rdtgroup_kn_unlock(). | |
962 | */ | |
963 | kernfs_get(kn); | |
964 | ||
965 | ret = rdtgroup_kn_set_ugid(kn); | |
966 | if (ret) | |
967 | goto out_destroy; | |
968 | ||
12e0110c TL |
969 | ret = rdtgroup_add_files(kn, rdtgroup_base_files, |
970 | ARRAY_SIZE(rdtgroup_base_files)); | |
971 | if (ret) | |
972 | goto out_destroy; | |
973 | ||
60cf5e10 FY |
974 | kernfs_activate(kn); |
975 | ||
976 | ret = 0; | |
977 | goto out_unlock; | |
978 | ||
979 | out_destroy: | |
980 | kernfs_remove(rdtgrp->kn); | |
981 | out_cancel_ref: | |
982 | list_del(&rdtgrp->rdtgroup_list); | |
983 | kfree(rdtgrp); | |
984 | out_closid_free: | |
985 | closid_free(closid); | |
986 | out_unlock: | |
987 | rdtgroup_kn_unlock(parent_kn); | |
988 | return ret; | |
989 | } | |
990 | ||
991 | static int rdtgroup_rmdir(struct kernfs_node *kn) | |
992 | { | |
0efc89be | 993 | int ret, cpu, closid = rdtgroup_default.closid; |
60cf5e10 | 994 | struct rdtgroup *rdtgrp; |
0efc89be FY |
995 | cpumask_var_t tmpmask; |
996 | ||
997 | if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) | |
998 | return -ENOMEM; | |
60cf5e10 FY |
999 | |
1000 | rdtgrp = rdtgroup_kn_lock_live(kn); | |
1001 | if (!rdtgrp) { | |
0efc89be FY |
1002 | ret = -EPERM; |
1003 | goto out; | |
60cf5e10 FY |
1004 | } |
1005 | ||
e02737d5 | 1006 | /* Give any tasks back to the default group */ |
0efc89be | 1007 | rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask); |
e02737d5 | 1008 | |
12e0110c TL |
1009 | /* Give any CPUs back to the default group */ |
1010 | cpumask_or(&rdtgroup_default.cpu_mask, | |
1011 | &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); | |
0efc89be FY |
1012 | |
1013 | /* Update per cpu closid of the moved CPUs first */ | |
1014 | for_each_cpu(cpu, &rdtgrp->cpu_mask) | |
1015 | per_cpu(cpu_closid, cpu) = closid; | |
1016 | /* | |
1017 | * Update the MSR on moved CPUs and CPUs which have moved | |
1018 | * task running on them. | |
1019 | */ | |
1020 | cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask); | |
1021 | rdt_update_closid(tmpmask, NULL); | |
12e0110c | 1022 | |
60cf5e10 FY |
1023 | rdtgrp->flags = RDT_DELETED; |
1024 | closid_free(rdtgrp->closid); | |
1025 | list_del(&rdtgrp->rdtgroup_list); | |
1026 | ||
1027 | /* | |
1028 | * one extra hold on this, will drop when we kfree(rdtgrp) | |
1029 | * in rdtgroup_kn_unlock() | |
1030 | */ | |
1031 | kernfs_get(kn); | |
1032 | kernfs_remove(rdtgrp->kn); | |
0efc89be FY |
1033 | ret = 0; |
1034 | out: | |
60cf5e10 | 1035 | rdtgroup_kn_unlock(kn); |
0efc89be FY |
1036 | free_cpumask_var(tmpmask); |
1037 | return ret; | |
60cf5e10 FY |
1038 | } |
1039 | ||
76ae054c SL |
1040 | static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) |
1041 | { | |
1042 | if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled) | |
1043 | seq_puts(seq, ",cdp"); | |
1044 | return 0; | |
1045 | } | |
1046 | ||
5ff193fb | 1047 | static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { |
76ae054c SL |
1048 | .mkdir = rdtgroup_mkdir, |
1049 | .rmdir = rdtgroup_rmdir, | |
1050 | .show_options = rdtgroup_show_options, | |
5ff193fb FY |
1051 | }; |
1052 | ||
1053 | static int __init rdtgroup_setup_root(void) | |
1054 | { | |
12e0110c TL |
1055 | int ret; |
1056 | ||
5ff193fb FY |
1057 | rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops, |
1058 | KERNFS_ROOT_CREATE_DEACTIVATED, | |
1059 | &rdtgroup_default); | |
1060 | if (IS_ERR(rdt_root)) | |
1061 | return PTR_ERR(rdt_root); | |
1062 | ||
1063 | mutex_lock(&rdtgroup_mutex); | |
1064 | ||
1065 | rdtgroup_default.closid = 0; | |
1066 | list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); | |
1067 | ||
12e0110c TL |
1068 | ret = rdtgroup_add_files(rdt_root->kn, rdtgroup_base_files, |
1069 | ARRAY_SIZE(rdtgroup_base_files)); | |
1070 | if (ret) { | |
1071 | kernfs_destroy_root(rdt_root); | |
1072 | goto out; | |
1073 | } | |
1074 | ||
5ff193fb FY |
1075 | rdtgroup_default.kn = rdt_root->kn; |
1076 | kernfs_activate(rdtgroup_default.kn); | |
1077 | ||
12e0110c | 1078 | out: |
5ff193fb FY |
1079 | mutex_unlock(&rdtgroup_mutex); |
1080 | ||
12e0110c | 1081 | return ret; |
5ff193fb FY |
1082 | } |
1083 | ||
1084 | /* | |
1085 | * rdtgroup_init - rdtgroup initialization | |
1086 | * | |
1087 | * Setup resctrl file system including set up root, create mount point, | |
1088 | * register rdtgroup filesystem, and initialize files under root directory. | |
1089 | * | |
1090 | * Return: 0 on success or -errno | |
1091 | */ | |
1092 | int __init rdtgroup_init(void) | |
1093 | { | |
1094 | int ret = 0; | |
1095 | ||
1096 | ret = rdtgroup_setup_root(); | |
1097 | if (ret) | |
1098 | return ret; | |
1099 | ||
1100 | ret = sysfs_create_mount_point(fs_kobj, "resctrl"); | |
1101 | if (ret) | |
1102 | goto cleanup_root; | |
1103 | ||
1104 | ret = register_filesystem(&rdt_fs_type); | |
1105 | if (ret) | |
1106 | goto cleanup_mountpoint; | |
1107 | ||
1108 | return 0; | |
1109 | ||
1110 | cleanup_mountpoint: | |
1111 | sysfs_remove_mount_point(fs_kobj, "resctrl"); | |
1112 | cleanup_root: | |
1113 | kernfs_destroy_root(rdt_root); | |
1114 | ||
1115 | return ret; | |
1116 | } |