]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/x86/kernel/cpu/intel_rdt.c
c958d6cd8a2245875a3fb75ef8227b00c20b55d0
[mirror_ubuntu-artful-kernel.git] / arch / x86 / kernel / cpu / intel_rdt.c
1 /*
2 * Resource Director Technology(RDT)
3 * - Cache Allocation code.
4 *
5 * Copyright (C) 2016 Intel Corporation
6 *
7 * Authors:
8 * Fenghua Yu <fenghua.yu@intel.com>
9 * Tony Luck <tony.luck@intel.com>
10 * Vikas Shivappa <vikas.shivappa@intel.com>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * More information about RDT be found in the Intel (R) x86 Architecture
22 * Software Developer Manual June 2016, volume 3, section 17.17.
23 */
24
25 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
26
27 #include <linux/slab.h>
28 #include <linux/err.h>
29 #include <linux/cacheinfo.h>
30 #include <linux/cpuhotplug.h>
31
32 #include <asm/intel-family.h>
33 #include <asm/intel_rdt.h>
34
35 #define MAX_MBA_BW 100u
36 #define MBA_IS_LINEAR 0x4
37
38 /* Mutex to protect rdtgroup access. */
39 DEFINE_MUTEX(rdtgroup_mutex);
40
41 DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid);
42
43 /*
44 * The cached intel_pqr_state is strictly per CPU and can never be
45 * updated from a remote CPU. Functions which modify the state
46 * are called with interrupts disabled and no preemption, which
47 * is sufficient for the protection.
48 */
49 DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
50
51 /*
52 * Used to store the max resource name width and max resource data width
53 * to display the schemata in a tabular format
54 */
55 int max_name_width, max_data_width;
56
57 static void
58 mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
59 static void
60 cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
61
62 #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
63
64 struct rdt_resource rdt_resources_all[] = {
65 {
66 .name = "L3",
67 .domains = domain_init(RDT_RESOURCE_L3),
68 .msr_base = IA32_L3_CBM_BASE,
69 .msr_update = cat_wrmsr,
70 .cache_level = 3,
71 .cache = {
72 .min_cbm_bits = 1,
73 .cbm_idx_mult = 1,
74 .cbm_idx_offset = 0,
75 },
76 .parse_ctrlval = parse_cbm,
77 .format_str = "%d=%0*x",
78 },
79 {
80 .name = "L3DATA",
81 .domains = domain_init(RDT_RESOURCE_L3DATA),
82 .msr_base = IA32_L3_CBM_BASE,
83 .msr_update = cat_wrmsr,
84 .cache_level = 3,
85 .cache = {
86 .min_cbm_bits = 1,
87 .cbm_idx_mult = 2,
88 .cbm_idx_offset = 0,
89 },
90 .parse_ctrlval = parse_cbm,
91 .format_str = "%d=%0*x",
92 },
93 {
94 .name = "L3CODE",
95 .domains = domain_init(RDT_RESOURCE_L3CODE),
96 .msr_base = IA32_L3_CBM_BASE,
97 .msr_update = cat_wrmsr,
98 .cache_level = 3,
99 .cache = {
100 .min_cbm_bits = 1,
101 .cbm_idx_mult = 2,
102 .cbm_idx_offset = 1,
103 },
104 .parse_ctrlval = parse_cbm,
105 .format_str = "%d=%0*x",
106 },
107 {
108 .name = "L2",
109 .domains = domain_init(RDT_RESOURCE_L2),
110 .msr_base = IA32_L2_CBM_BASE,
111 .msr_update = cat_wrmsr,
112 .cache_level = 2,
113 .cache = {
114 .min_cbm_bits = 1,
115 .cbm_idx_mult = 1,
116 .cbm_idx_offset = 0,
117 },
118 .parse_ctrlval = parse_cbm,
119 .format_str = "%d=%0*x",
120 },
121 {
122 .name = "MB",
123 .domains = domain_init(RDT_RESOURCE_MBA),
124 .msr_base = IA32_MBA_THRTL_BASE,
125 .msr_update = mba_wrmsr,
126 .cache_level = 3,
127 .parse_ctrlval = parse_bw,
128 .format_str = "%d=%*d",
129 },
130 };
131
132 static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid)
133 {
134 return closid * r->cache.cbm_idx_mult + r->cache.cbm_idx_offset;
135 }
136
137 /*
138 * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
139 * as they do not have CPUID enumeration support for Cache allocation.
140 * The check for Vendor/Family/Model is not enough to guarantee that
141 * the MSRs won't #GP fault because only the following SKUs support
142 * CAT:
143 * Intel(R) Xeon(R) CPU E5-2658 v3 @ 2.20GHz
144 * Intel(R) Xeon(R) CPU E5-2648L v3 @ 1.80GHz
145 * Intel(R) Xeon(R) CPU E5-2628L v3 @ 2.00GHz
146 * Intel(R) Xeon(R) CPU E5-2618L v3 @ 2.30GHz
147 * Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz
148 * Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz
149 *
150 * Probe by trying to write the first of the L3 cach mask registers
151 * and checking that the bits stick. Max CLOSids is always 4 and max cbm length
152 * is always 20 on hsw server parts. The minimum cache bitmask length
153 * allowed for HSW server is always 2 bits. Hardcode all of them.
154 */
155 static inline bool cache_alloc_hsw_probe(void)
156 {
157 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
158 boot_cpu_data.x86 == 6 &&
159 boot_cpu_data.x86_model == INTEL_FAM6_HASWELL_X) {
160 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
161 u32 l, h, max_cbm = BIT_MASK(20) - 1;
162
163 if (wrmsr_safe(IA32_L3_CBM_BASE, max_cbm, 0))
164 return false;
165 rdmsr(IA32_L3_CBM_BASE, l, h);
166
167 /* If all the bits were set in MSR, return success */
168 if (l != max_cbm)
169 return false;
170
171 r->num_closid = 4;
172 r->default_ctrl = max_cbm;
173 r->cache.cbm_len = 20;
174 r->cache.min_cbm_bits = 2;
175 r->capable = true;
176 r->enabled = true;
177
178 return true;
179 }
180
181 return false;
182 }
183
184 /*
185 * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
186 * exposed to user interface and the h/w understandable delay values.
187 *
188 * The non-linear delay values have the granularity of power of two
189 * and also the h/w does not guarantee a curve for configured delay
190 * values vs. actual b/w enforced.
191 * Hence we need a mapping that is pre calibrated so the user can
192 * express the memory b/w as a percentage value.
193 */
194 static inline bool rdt_get_mb_table(struct rdt_resource *r)
195 {
196 /*
197 * There are no Intel SKUs as of now to support non-linear delay.
198 */
199 pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
200 boot_cpu_data.x86, boot_cpu_data.x86_model);
201
202 return false;
203 }
204
205 static bool rdt_get_mem_config(struct rdt_resource *r)
206 {
207 union cpuid_0x10_3_eax eax;
208 union cpuid_0x10_x_edx edx;
209 u32 ebx, ecx;
210
211 cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
212 r->num_closid = edx.split.cos_max + 1;
213 r->membw.max_delay = eax.split.max_delay + 1;
214 r->default_ctrl = MAX_MBA_BW;
215 if (ecx & MBA_IS_LINEAR) {
216 r->membw.delay_linear = true;
217 r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay;
218 r->membw.bw_gran = MAX_MBA_BW - r->membw.max_delay;
219 } else {
220 if (!rdt_get_mb_table(r))
221 return false;
222 }
223 r->data_width = 3;
224 rdt_get_mba_infofile(r);
225
226 r->capable = true;
227 r->enabled = true;
228
229 return true;
230 }
231
232 static void rdt_get_cache_config(int idx, struct rdt_resource *r)
233 {
234 union cpuid_0x10_1_eax eax;
235 union cpuid_0x10_x_edx edx;
236 u32 ebx, ecx;
237
238 cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full);
239 r->num_closid = edx.split.cos_max + 1;
240 r->cache.cbm_len = eax.split.cbm_len + 1;
241 r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
242 r->data_width = (r->cache.cbm_len + 3) / 4;
243 r->capable = true;
244 r->enabled = true;
245 }
246
247 static void rdt_get_cdp_l3_config(int type)
248 {
249 struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
250 struct rdt_resource *r = &rdt_resources_all[type];
251
252 r->num_closid = r_l3->num_closid / 2;
253 r->cache.cbm_len = r_l3->cache.cbm_len;
254 r->default_ctrl = r_l3->default_ctrl;
255 r->data_width = (r->cache.cbm_len + 3) / 4;
256 r->capable = true;
257 /*
258 * By default, CDP is disabled. CDP can be enabled by mount parameter
259 * "cdp" during resctrl file system mount time.
260 */
261 r->enabled = false;
262 }
263
264 static int get_cache_id(int cpu, int level)
265 {
266 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
267 int i;
268
269 for (i = 0; i < ci->num_leaves; i++) {
270 if (ci->info_list[i].level == level)
271 return ci->info_list[i].id;
272 }
273
274 return -1;
275 }
276
277 /*
278 * Map the memory b/w percentage value to delay values
279 * that can be written to QOS_MSRs.
280 * There are currently no SKUs which support non linear delay values.
281 */
282 static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
283 {
284 if (r->membw.delay_linear)
285 return MAX_MBA_BW - bw;
286
287 pr_warn_once("Non Linear delay-bw map not supported but queried\n");
288 return r->default_ctrl;
289 }
290
291 static void
292 mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
293 {
294 unsigned int i;
295
296 /* Write the delay values for mba. */
297 for (i = m->low; i < m->high; i++)
298 wrmsrl(r->msr_base + i, delay_bw_map(d->ctrl_val[i], r));
299 }
300
301 static void
302 cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
303 {
304 unsigned int i;
305
306 for (i = m->low; i < m->high; i++)
307 wrmsrl(r->msr_base + cbm_idx(r, i), d->ctrl_val[i]);
308 }
309
310 void rdt_ctrl_update(void *arg)
311 {
312 struct msr_param *m = arg;
313 struct rdt_resource *r = m->res;
314 int cpu = smp_processor_id();
315 struct rdt_domain *d;
316
317 list_for_each_entry(d, &r->domains, list) {
318 /* Find the domain that contains this CPU */
319 if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
320 r->msr_update(d, m, r);
321 return;
322 }
323 }
324 pr_warn_once("cpu %d not found in any domain for resource %s\n",
325 cpu, r->name);
326 }
327
328 /*
329 * rdt_find_domain - Find a domain in a resource that matches input resource id
330 *
331 * Search resource r's domain list to find the resource id. If the resource
332 * id is found in a domain, return the domain. Otherwise, if requested by
333 * caller, return the first domain whose id is bigger than the input id.
334 * The domain list is sorted by id in ascending order.
335 */
336 static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
337 struct list_head **pos)
338 {
339 struct rdt_domain *d;
340 struct list_head *l;
341
342 if (id < 0)
343 return ERR_PTR(id);
344
345 list_for_each(l, &r->domains) {
346 d = list_entry(l, struct rdt_domain, list);
347 /* When id is found, return its domain. */
348 if (id == d->id)
349 return d;
350 /* Stop searching when finding id's position in sorted list. */
351 if (id < d->id)
352 break;
353 }
354
355 if (pos)
356 *pos = l;
357
358 return NULL;
359 }
360
361 static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
362 {
363 struct msr_param m;
364 u32 *dc;
365 int i;
366
367 dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
368 if (!dc)
369 return -ENOMEM;
370
371 d->ctrl_val = dc;
372
373 /*
374 * Initialize the Control MSRs to having no control.
375 * For Cache Allocation: Set all bits in cbm
376 * For Memory Allocation: Set b/w requested to 100
377 */
378 for (i = 0; i < r->num_closid; i++, dc++)
379 *dc = r->default_ctrl;
380
381 m.low = 0;
382 m.high = r->num_closid;
383 r->msr_update(d, &m, r);
384 return 0;
385 }
386
387 /*
388 * domain_add_cpu - Add a cpu to a resource's domain list.
389 *
390 * If an existing domain in the resource r's domain list matches the cpu's
391 * resource id, add the cpu in the domain.
392 *
393 * Otherwise, a new domain is allocated and inserted into the right position
394 * in the domain list sorted by id in ascending order.
395 *
396 * The order in the domain list is visible to users when we print entries
397 * in the schemata file and schemata input is validated to have the same order
398 * as this list.
399 */
400 static void domain_add_cpu(int cpu, struct rdt_resource *r)
401 {
402 int id = get_cache_id(cpu, r->cache_level);
403 struct list_head *add_pos = NULL;
404 struct rdt_domain *d;
405
406 d = rdt_find_domain(r, id, &add_pos);
407 if (IS_ERR(d)) {
408 pr_warn("Could't find cache id for cpu %d\n", cpu);
409 return;
410 }
411
412 if (d) {
413 cpumask_set_cpu(cpu, &d->cpu_mask);
414 return;
415 }
416
417 d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
418 if (!d)
419 return;
420
421 d->id = id;
422
423 if (domain_setup_ctrlval(r, d)) {
424 kfree(d);
425 return;
426 }
427
428 cpumask_set_cpu(cpu, &d->cpu_mask);
429 list_add_tail(&d->list, add_pos);
430 }
431
432 static void domain_remove_cpu(int cpu, struct rdt_resource *r)
433 {
434 int id = get_cache_id(cpu, r->cache_level);
435 struct rdt_domain *d;
436
437 d = rdt_find_domain(r, id, NULL);
438 if (IS_ERR_OR_NULL(d)) {
439 pr_warn("Could't find cache id for cpu %d\n", cpu);
440 return;
441 }
442
443 cpumask_clear_cpu(cpu, &d->cpu_mask);
444 if (cpumask_empty(&d->cpu_mask)) {
445 kfree(d->ctrl_val);
446 list_del(&d->list);
447 kfree(d);
448 }
449 }
450
451 static void clear_closid(int cpu)
452 {
453 struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
454
455 per_cpu(cpu_closid, cpu) = 0;
456 state->closid = 0;
457 wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0);
458 }
459
460 static int intel_rdt_online_cpu(unsigned int cpu)
461 {
462 struct rdt_resource *r;
463
464 mutex_lock(&rdtgroup_mutex);
465 for_each_capable_rdt_resource(r)
466 domain_add_cpu(cpu, r);
467 /* The cpu is set in default rdtgroup after online. */
468 cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
469 clear_closid(cpu);
470 mutex_unlock(&rdtgroup_mutex);
471
472 return 0;
473 }
474
475 static int intel_rdt_offline_cpu(unsigned int cpu)
476 {
477 struct rdtgroup *rdtgrp;
478 struct rdt_resource *r;
479
480 mutex_lock(&rdtgroup_mutex);
481 for_each_capable_rdt_resource(r)
482 domain_remove_cpu(cpu, r);
483 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
484 if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask))
485 break;
486 }
487 clear_closid(cpu);
488 mutex_unlock(&rdtgroup_mutex);
489
490 return 0;
491 }
492
493 /*
494 * Choose a width for the resource name and resource data based on the
495 * resource that has widest name and cbm.
496 */
497 static __init void rdt_init_padding(void)
498 {
499 struct rdt_resource *r;
500 int cl;
501
502 for_each_capable_rdt_resource(r) {
503 cl = strlen(r->name);
504 if (cl > max_name_width)
505 max_name_width = cl;
506
507 if (r->data_width > max_data_width)
508 max_data_width = r->data_width;
509 }
510 }
511
512 static __init bool get_rdt_resources(void)
513 {
514 bool ret = false;
515
516 if (cache_alloc_hsw_probe())
517 return true;
518
519 if (!boot_cpu_has(X86_FEATURE_RDT_A))
520 return false;
521
522 if (boot_cpu_has(X86_FEATURE_CAT_L3)) {
523 rdt_get_cache_config(1, &rdt_resources_all[RDT_RESOURCE_L3]);
524 if (boot_cpu_has(X86_FEATURE_CDP_L3)) {
525 rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA);
526 rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE);
527 }
528 ret = true;
529 }
530 if (boot_cpu_has(X86_FEATURE_CAT_L2)) {
531 /* CPUID 0x10.2 fields are same format at 0x10.1 */
532 rdt_get_cache_config(2, &rdt_resources_all[RDT_RESOURCE_L2]);
533 ret = true;
534 }
535
536 if (boot_cpu_has(X86_FEATURE_MBA)) {
537 if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
538 ret = true;
539 }
540
541 return ret;
542 }
543
544 static int __init intel_rdt_late_init(void)
545 {
546 struct rdt_resource *r;
547 int state, ret;
548
549 if (!get_rdt_resources())
550 return -ENODEV;
551
552 rdt_init_padding();
553
554 state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
555 "x86/rdt/cat:online:",
556 intel_rdt_online_cpu, intel_rdt_offline_cpu);
557 if (state < 0)
558 return state;
559
560 ret = rdtgroup_init();
561 if (ret) {
562 cpuhp_remove_state(state);
563 return ret;
564 }
565
566 for_each_capable_rdt_resource(r)
567 pr_info("Intel RDT %s allocation detected\n", r->name);
568
569 return 0;
570 }
571
572 late_initcall(intel_rdt_late_init);