2 * Resource Director Technology(RDT)
3 * - Cache Allocation code.
5 * Copyright (C) 2016 Intel Corporation
8 * Fenghua Yu <fenghua.yu@intel.com>
9 * Tony Luck <tony.luck@intel.com>
10 * Vikas Shivappa <vikas.shivappa@intel.com>
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
21 * More information about RDT be found in the Intel (R) x86 Architecture
22 * Software Developer Manual June 2016, volume 3, section 17.17.
25 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
27 #include <linux/slab.h>
28 #include <linux/err.h>
29 #include <linux/cacheinfo.h>
30 #include <linux/cpuhotplug.h>
32 #include <asm/intel-family.h>
33 #include <asm/intel_rdt.h>
35 #define MAX_MBA_BW 100u
36 #define MBA_IS_LINEAR 0x4
38 /* Mutex to protect rdtgroup access. */
39 DEFINE_MUTEX(rdtgroup_mutex
);
41 DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid
);
44 * The cached intel_pqr_state is strictly per CPU and can never be
45 * updated from a remote CPU. Functions which modify the state
46 * are called with interrupts disabled and no preemption, which
47 * is sufficient for the protection.
49 DEFINE_PER_CPU(struct intel_pqr_state
, pqr_state
);
52 * Used to store the max resource name width and max resource data width
53 * to display the schemata in a tabular format
55 int max_name_width
, max_data_width
;
58 mba_wrmsr(struct rdt_domain
*d
, struct msr_param
*m
, struct rdt_resource
*r
);
60 cat_wrmsr(struct rdt_domain
*d
, struct msr_param
*m
, struct rdt_resource
*r
);
62 #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
64 struct rdt_resource rdt_resources_all
[] = {
67 .domains
= domain_init(RDT_RESOURCE_L3
),
68 .msr_base
= IA32_L3_CBM_BASE
,
69 .msr_update
= cat_wrmsr
,
76 .parse_ctrlval
= parse_cbm
,
77 .format_str
= "%d=%0*x",
81 .domains
= domain_init(RDT_RESOURCE_L3DATA
),
82 .msr_base
= IA32_L3_CBM_BASE
,
83 .msr_update
= cat_wrmsr
,
90 .parse_ctrlval
= parse_cbm
,
91 .format_str
= "%d=%0*x",
95 .domains
= domain_init(RDT_RESOURCE_L3CODE
),
96 .msr_base
= IA32_L3_CBM_BASE
,
97 .msr_update
= cat_wrmsr
,
104 .parse_ctrlval
= parse_cbm
,
105 .format_str
= "%d=%0*x",
109 .domains
= domain_init(RDT_RESOURCE_L2
),
110 .msr_base
= IA32_L2_CBM_BASE
,
111 .msr_update
= cat_wrmsr
,
118 .parse_ctrlval
= parse_cbm
,
119 .format_str
= "%d=%0*x",
123 .domains
= domain_init(RDT_RESOURCE_MBA
),
124 .msr_base
= IA32_MBA_THRTL_BASE
,
125 .msr_update
= mba_wrmsr
,
127 .parse_ctrlval
= parse_bw
,
128 .format_str
= "%d=%*d",
132 static unsigned int cbm_idx(struct rdt_resource
*r
, unsigned int closid
)
134 return closid
* r
->cache
.cbm_idx_mult
+ r
->cache
.cbm_idx_offset
;
138 * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
139 * as they do not have CPUID enumeration support for Cache allocation.
140 * The check for Vendor/Family/Model is not enough to guarantee that
141 * the MSRs won't #GP fault because only the following SKUs support
143 * Intel(R) Xeon(R) CPU E5-2658 v3 @ 2.20GHz
144 * Intel(R) Xeon(R) CPU E5-2648L v3 @ 1.80GHz
145 * Intel(R) Xeon(R) CPU E5-2628L v3 @ 2.00GHz
146 * Intel(R) Xeon(R) CPU E5-2618L v3 @ 2.30GHz
147 * Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz
148 * Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz
150 * Probe by trying to write the first of the L3 cach mask registers
151 * and checking that the bits stick. Max CLOSids is always 4 and max cbm length
152 * is always 20 on hsw server parts. The minimum cache bitmask length
153 * allowed for HSW server is always 2 bits. Hardcode all of them.
155 static inline bool cache_alloc_hsw_probe(void)
157 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_INTEL
&&
158 boot_cpu_data
.x86
== 6 &&
159 boot_cpu_data
.x86_model
== INTEL_FAM6_HASWELL_X
) {
160 struct rdt_resource
*r
= &rdt_resources_all
[RDT_RESOURCE_L3
];
161 u32 l
, h
, max_cbm
= BIT_MASK(20) - 1;
163 if (wrmsr_safe(IA32_L3_CBM_BASE
, max_cbm
, 0))
165 rdmsr(IA32_L3_CBM_BASE
, l
, h
);
167 /* If all the bits were set in MSR, return success */
172 r
->default_ctrl
= max_cbm
;
173 r
->cache
.cbm_len
= 20;
174 r
->cache
.min_cbm_bits
= 2;
185 * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
186 * exposed to user interface and the h/w understandable delay values.
188 * The non-linear delay values have the granularity of power of two
189 * and also the h/w does not guarantee a curve for configured delay
190 * values vs. actual b/w enforced.
191 * Hence we need a mapping that is pre calibrated so the user can
192 * express the memory b/w as a percentage value.
194 static inline bool rdt_get_mb_table(struct rdt_resource
*r
)
197 * There are no Intel SKUs as of now to support non-linear delay.
199 pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
200 boot_cpu_data
.x86
, boot_cpu_data
.x86_model
);
205 static bool rdt_get_mem_config(struct rdt_resource
*r
)
207 union cpuid_0x10_3_eax eax
;
208 union cpuid_0x10_x_edx edx
;
211 cpuid_count(0x00000010, 3, &eax
.full
, &ebx
, &ecx
, &edx
.full
);
212 r
->num_closid
= edx
.split
.cos_max
+ 1;
213 r
->membw
.max_delay
= eax
.split
.max_delay
+ 1;
214 r
->default_ctrl
= MAX_MBA_BW
;
215 if (ecx
& MBA_IS_LINEAR
) {
216 r
->membw
.delay_linear
= true;
217 r
->membw
.min_bw
= MAX_MBA_BW
- r
->membw
.max_delay
;
218 r
->membw
.bw_gran
= MAX_MBA_BW
- r
->membw
.max_delay
;
220 if (!rdt_get_mb_table(r
))
224 rdt_get_mba_infofile(r
);
232 static void rdt_get_cache_config(int idx
, struct rdt_resource
*r
)
234 union cpuid_0x10_1_eax eax
;
235 union cpuid_0x10_x_edx edx
;
238 cpuid_count(0x00000010, idx
, &eax
.full
, &ebx
, &ecx
, &edx
.full
);
239 r
->num_closid
= edx
.split
.cos_max
+ 1;
240 r
->cache
.cbm_len
= eax
.split
.cbm_len
+ 1;
241 r
->default_ctrl
= BIT_MASK(eax
.split
.cbm_len
+ 1) - 1;
242 r
->data_width
= (r
->cache
.cbm_len
+ 3) / 4;
247 static void rdt_get_cdp_l3_config(int type
)
249 struct rdt_resource
*r_l3
= &rdt_resources_all
[RDT_RESOURCE_L3
];
250 struct rdt_resource
*r
= &rdt_resources_all
[type
];
252 r
->num_closid
= r_l3
->num_closid
/ 2;
253 r
->cache
.cbm_len
= r_l3
->cache
.cbm_len
;
254 r
->default_ctrl
= r_l3
->default_ctrl
;
255 r
->data_width
= (r
->cache
.cbm_len
+ 3) / 4;
258 * By default, CDP is disabled. CDP can be enabled by mount parameter
259 * "cdp" during resctrl file system mount time.
264 static int get_cache_id(int cpu
, int level
)
266 struct cpu_cacheinfo
*ci
= get_cpu_cacheinfo(cpu
);
269 for (i
= 0; i
< ci
->num_leaves
; i
++) {
270 if (ci
->info_list
[i
].level
== level
)
271 return ci
->info_list
[i
].id
;
278 * Map the memory b/w percentage value to delay values
279 * that can be written to QOS_MSRs.
280 * There are currently no SKUs which support non linear delay values.
282 static u32
delay_bw_map(unsigned long bw
, struct rdt_resource
*r
)
284 if (r
->membw
.delay_linear
)
285 return MAX_MBA_BW
- bw
;
287 pr_warn_once("Non Linear delay-bw map not supported but queried\n");
288 return r
->default_ctrl
;
292 mba_wrmsr(struct rdt_domain
*d
, struct msr_param
*m
, struct rdt_resource
*r
)
296 /* Write the delay values for mba. */
297 for (i
= m
->low
; i
< m
->high
; i
++)
298 wrmsrl(r
->msr_base
+ i
, delay_bw_map(d
->ctrl_val
[i
], r
));
302 cat_wrmsr(struct rdt_domain
*d
, struct msr_param
*m
, struct rdt_resource
*r
)
306 for (i
= m
->low
; i
< m
->high
; i
++)
307 wrmsrl(r
->msr_base
+ cbm_idx(r
, i
), d
->ctrl_val
[i
]);
310 void rdt_ctrl_update(void *arg
)
312 struct msr_param
*m
= arg
;
313 struct rdt_resource
*r
= m
->res
;
314 int cpu
= smp_processor_id();
315 struct rdt_domain
*d
;
317 list_for_each_entry(d
, &r
->domains
, list
) {
318 /* Find the domain that contains this CPU */
319 if (cpumask_test_cpu(cpu
, &d
->cpu_mask
)) {
320 r
->msr_update(d
, m
, r
);
324 pr_warn_once("cpu %d not found in any domain for resource %s\n",
329 * rdt_find_domain - Find a domain in a resource that matches input resource id
331 * Search resource r's domain list to find the resource id. If the resource
332 * id is found in a domain, return the domain. Otherwise, if requested by
333 * caller, return the first domain whose id is bigger than the input id.
334 * The domain list is sorted by id in ascending order.
336 static struct rdt_domain
*rdt_find_domain(struct rdt_resource
*r
, int id
,
337 struct list_head
**pos
)
339 struct rdt_domain
*d
;
345 list_for_each(l
, &r
->domains
) {
346 d
= list_entry(l
, struct rdt_domain
, list
);
347 /* When id is found, return its domain. */
350 /* Stop searching when finding id's position in sorted list. */
361 static int domain_setup_ctrlval(struct rdt_resource
*r
, struct rdt_domain
*d
)
367 dc
= kmalloc_array(r
->num_closid
, sizeof(*d
->ctrl_val
), GFP_KERNEL
);
374 * Initialize the Control MSRs to having no control.
375 * For Cache Allocation: Set all bits in cbm
376 * For Memory Allocation: Set b/w requested to 100
378 for (i
= 0; i
< r
->num_closid
; i
++, dc
++)
379 *dc
= r
->default_ctrl
;
382 m
.high
= r
->num_closid
;
383 r
->msr_update(d
, &m
, r
);
388 * domain_add_cpu - Add a cpu to a resource's domain list.
390 * If an existing domain in the resource r's domain list matches the cpu's
391 * resource id, add the cpu in the domain.
393 * Otherwise, a new domain is allocated and inserted into the right position
394 * in the domain list sorted by id in ascending order.
396 * The order in the domain list is visible to users when we print entries
397 * in the schemata file and schemata input is validated to have the same order
400 static void domain_add_cpu(int cpu
, struct rdt_resource
*r
)
402 int id
= get_cache_id(cpu
, r
->cache_level
);
403 struct list_head
*add_pos
= NULL
;
404 struct rdt_domain
*d
;
406 d
= rdt_find_domain(r
, id
, &add_pos
);
408 pr_warn("Could't find cache id for cpu %d\n", cpu
);
413 cpumask_set_cpu(cpu
, &d
->cpu_mask
);
417 d
= kzalloc_node(sizeof(*d
), GFP_KERNEL
, cpu_to_node(cpu
));
423 if (domain_setup_ctrlval(r
, d
)) {
428 cpumask_set_cpu(cpu
, &d
->cpu_mask
);
429 list_add_tail(&d
->list
, add_pos
);
432 static void domain_remove_cpu(int cpu
, struct rdt_resource
*r
)
434 int id
= get_cache_id(cpu
, r
->cache_level
);
435 struct rdt_domain
*d
;
437 d
= rdt_find_domain(r
, id
, NULL
);
438 if (IS_ERR_OR_NULL(d
)) {
439 pr_warn("Could't find cache id for cpu %d\n", cpu
);
443 cpumask_clear_cpu(cpu
, &d
->cpu_mask
);
444 if (cpumask_empty(&d
->cpu_mask
)) {
451 static void clear_closid(int cpu
)
453 struct intel_pqr_state
*state
= this_cpu_ptr(&pqr_state
);
455 per_cpu(cpu_closid
, cpu
) = 0;
457 wrmsr(MSR_IA32_PQR_ASSOC
, state
->rmid
, 0);
460 static int intel_rdt_online_cpu(unsigned int cpu
)
462 struct rdt_resource
*r
;
464 mutex_lock(&rdtgroup_mutex
);
465 for_each_capable_rdt_resource(r
)
466 domain_add_cpu(cpu
, r
);
467 /* The cpu is set in default rdtgroup after online. */
468 cpumask_set_cpu(cpu
, &rdtgroup_default
.cpu_mask
);
470 mutex_unlock(&rdtgroup_mutex
);
475 static int intel_rdt_offline_cpu(unsigned int cpu
)
477 struct rdtgroup
*rdtgrp
;
478 struct rdt_resource
*r
;
480 mutex_lock(&rdtgroup_mutex
);
481 for_each_capable_rdt_resource(r
)
482 domain_remove_cpu(cpu
, r
);
483 list_for_each_entry(rdtgrp
, &rdt_all_groups
, rdtgroup_list
) {
484 if (cpumask_test_and_clear_cpu(cpu
, &rdtgrp
->cpu_mask
))
488 mutex_unlock(&rdtgroup_mutex
);
494 * Choose a width for the resource name and resource data based on the
495 * resource that has widest name and cbm.
497 static __init
void rdt_init_padding(void)
499 struct rdt_resource
*r
;
502 for_each_capable_rdt_resource(r
) {
503 cl
= strlen(r
->name
);
504 if (cl
> max_name_width
)
507 if (r
->data_width
> max_data_width
)
508 max_data_width
= r
->data_width
;
512 static __init
bool get_rdt_resources(void)
516 if (cache_alloc_hsw_probe())
519 if (!boot_cpu_has(X86_FEATURE_RDT_A
))
522 if (boot_cpu_has(X86_FEATURE_CAT_L3
)) {
523 rdt_get_cache_config(1, &rdt_resources_all
[RDT_RESOURCE_L3
]);
524 if (boot_cpu_has(X86_FEATURE_CDP_L3
)) {
525 rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA
);
526 rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE
);
530 if (boot_cpu_has(X86_FEATURE_CAT_L2
)) {
531 /* CPUID 0x10.2 fields are same format at 0x10.1 */
532 rdt_get_cache_config(2, &rdt_resources_all
[RDT_RESOURCE_L2
]);
536 if (boot_cpu_has(X86_FEATURE_MBA
)) {
537 if (rdt_get_mem_config(&rdt_resources_all
[RDT_RESOURCE_MBA
]))
544 static int __init
intel_rdt_late_init(void)
546 struct rdt_resource
*r
;
549 if (!get_rdt_resources())
554 state
= cpuhp_setup_state(CPUHP_AP_ONLINE_DYN
,
555 "x86/rdt/cat:online:",
556 intel_rdt_online_cpu
, intel_rdt_offline_cpu
);
560 ret
= rdtgroup_init();
562 cpuhp_remove_state(state
);
566 for_each_capable_rdt_resource(r
)
567 pr_info("Intel RDT %s allocation detected\n", r
->name
);
572 late_initcall(intel_rdt_late_init
);