]>
Commit | Line | Data |
---|---|---|
78e99b4a FY |
1 | /* |
2 | * Resource Director Technology(RDT) | |
3 | * - Cache Allocation code. | |
4 | * | |
5 | * Copyright (C) 2016 Intel Corporation | |
6 | * | |
7 | * Authors: | |
8 | * Fenghua Yu <fenghua.yu@intel.com> | |
9 | * Tony Luck <tony.luck@intel.com> | |
10 | * Vikas Shivappa <vikas.shivappa@intel.com> | |
11 | * | |
12 | * This program is free software; you can redistribute it and/or modify it | |
13 | * under the terms and conditions of the GNU General Public License, | |
14 | * version 2, as published by the Free Software Foundation. | |
15 | * | |
16 | * This program is distributed in the hope it will be useful, but WITHOUT | |
17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
19 | * more details. | |
20 | * | |
21 | * More information about RDT be found in the Intel (R) x86 Architecture | |
22 | * Software Developer Manual June 2016, volume 3, section 17.17. | |
23 | */ | |
24 | ||
25 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
26 | ||
27 | #include <linux/slab.h> | |
28 | #include <linux/err.h> | |
2264d9c7 TL |
29 | #include <linux/cacheinfo.h> |
30 | #include <linux/cpuhotplug.h> | |
78e99b4a | 31 | |
113c6097 | 32 | #include <asm/intel-family.h> |
05830204 VS |
33 | #include <asm/intel_rdt_sched.h> |
34 | #include "intel_rdt.h" | |
113c6097 | 35 | |
05b93417 VS |
36 | #define MAX_MBA_BW 100u |
37 | #define MBA_IS_LINEAR 0x4 | |
38 | ||
2264d9c7 TL |
39 | /* Mutex to protect rdtgroup access. */ |
40 | DEFINE_MUTEX(rdtgroup_mutex); | |
41 | ||
c39a0e2c VS |
42 | /* |
43 | * The cached intel_pqr_state is strictly per CPU and can never be | |
44 | * updated from a remote CPU. Functions which modify the state | |
45 | * are called with interrupts disabled and no preemption, which | |
46 | * is sufficient for the protection. | |
47 | */ | |
48 | DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); | |
49 | ||
de016df8 VS |
50 | /* |
51 | * Used to store the max resource name width and max resource data width | |
52 | * to display the schemata in a tabular format | |
53 | */ | |
54 | int max_name_width, max_data_width; | |
55 | ||
6a445edc VS |
56 | /* |
57 | * Global boolean for rdt_alloc which is true if any | |
58 | * resource allocation is enabled. | |
59 | */ | |
60 | bool rdt_alloc_capable; | |
61 | ||
05b93417 VS |
62 | static void |
63 | mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); | |
0921c547 TG |
64 | static void |
65 | cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); | |
66 | ||
d3e11b4d TG |
67 | #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains) |
68 | ||
c1c7c3f9 | 69 | struct rdt_resource rdt_resources_all[] = { |
dd131853 | 70 | [RDT_RESOURCE_L3] = |
c1c7c3f9 | 71 | { |
d89b7379 | 72 | .rid = RDT_RESOURCE_L3, |
d3e11b4d TG |
73 | .name = "L3", |
74 | .domains = domain_init(RDT_RESOURCE_L3), | |
75 | .msr_base = IA32_L3_CBM_BASE, | |
0921c547 | 76 | .msr_update = cat_wrmsr, |
d3e11b4d TG |
77 | .cache_level = 3, |
78 | .cache = { | |
79 | .min_cbm_bits = 1, | |
80 | .cbm_idx_mult = 1, | |
81 | .cbm_idx_offset = 0, | |
82 | }, | |
c6ea67de VS |
83 | .parse_ctrlval = parse_cbm, |
84 | .format_str = "%d=%0*x", | |
5dc1d5c6 | 85 | .fflags = RFTYPE_RES_CACHE, |
c1c7c3f9 | 86 | }, |
dd131853 | 87 | [RDT_RESOURCE_L3DATA] = |
c1c7c3f9 | 88 | { |
d89b7379 | 89 | .rid = RDT_RESOURCE_L3DATA, |
d3e11b4d TG |
90 | .name = "L3DATA", |
91 | .domains = domain_init(RDT_RESOURCE_L3DATA), | |
92 | .msr_base = IA32_L3_CBM_BASE, | |
0921c547 | 93 | .msr_update = cat_wrmsr, |
d3e11b4d TG |
94 | .cache_level = 3, |
95 | .cache = { | |
96 | .min_cbm_bits = 1, | |
97 | .cbm_idx_mult = 2, | |
98 | .cbm_idx_offset = 0, | |
99 | }, | |
c6ea67de VS |
100 | .parse_ctrlval = parse_cbm, |
101 | .format_str = "%d=%0*x", | |
5dc1d5c6 | 102 | .fflags = RFTYPE_RES_CACHE, |
c1c7c3f9 | 103 | }, |
dd131853 | 104 | [RDT_RESOURCE_L3CODE] = |
c1c7c3f9 | 105 | { |
d89b7379 | 106 | .rid = RDT_RESOURCE_L3CODE, |
d3e11b4d TG |
107 | .name = "L3CODE", |
108 | .domains = domain_init(RDT_RESOURCE_L3CODE), | |
109 | .msr_base = IA32_L3_CBM_BASE, | |
0921c547 | 110 | .msr_update = cat_wrmsr, |
d3e11b4d TG |
111 | .cache_level = 3, |
112 | .cache = { | |
113 | .min_cbm_bits = 1, | |
114 | .cbm_idx_mult = 2, | |
115 | .cbm_idx_offset = 1, | |
116 | }, | |
c6ea67de VS |
117 | .parse_ctrlval = parse_cbm, |
118 | .format_str = "%d=%0*x", | |
5dc1d5c6 | 119 | .fflags = RFTYPE_RES_CACHE, |
c1c7c3f9 | 120 | }, |
dd131853 | 121 | [RDT_RESOURCE_L2] = |
c1c7c3f9 | 122 | { |
d89b7379 | 123 | .rid = RDT_RESOURCE_L2, |
d3e11b4d TG |
124 | .name = "L2", |
125 | .domains = domain_init(RDT_RESOURCE_L2), | |
126 | .msr_base = IA32_L2_CBM_BASE, | |
0921c547 | 127 | .msr_update = cat_wrmsr, |
d3e11b4d TG |
128 | .cache_level = 2, |
129 | .cache = { | |
130 | .min_cbm_bits = 1, | |
131 | .cbm_idx_mult = 1, | |
132 | .cbm_idx_offset = 0, | |
133 | }, | |
c6ea67de VS |
134 | .parse_ctrlval = parse_cbm, |
135 | .format_str = "%d=%0*x", | |
5dc1d5c6 | 136 | .fflags = RFTYPE_RES_CACHE, |
c1c7c3f9 | 137 | }, |
dd131853 | 138 | [RDT_RESOURCE_MBA] = |
05b93417 | 139 | { |
d89b7379 | 140 | .rid = RDT_RESOURCE_MBA, |
05b93417 VS |
141 | .name = "MB", |
142 | .domains = domain_init(RDT_RESOURCE_MBA), | |
143 | .msr_base = IA32_MBA_THRTL_BASE, | |
144 | .msr_update = mba_wrmsr, | |
145 | .cache_level = 3, | |
64e8ed3d VS |
146 | .parse_ctrlval = parse_bw, |
147 | .format_str = "%d=%*d", | |
5dc1d5c6 | 148 | .fflags = RFTYPE_RES_MB, |
05b93417 | 149 | }, |
c1c7c3f9 FY |
150 | }; |
151 | ||
d3e11b4d | 152 | static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid) |
2264d9c7 | 153 | { |
d3e11b4d | 154 | return closid * r->cache.cbm_idx_mult + r->cache.cbm_idx_offset; |
2264d9c7 TL |
155 | } |
156 | ||
113c6097 FY |
157 | /* |
158 | * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs | |
159 | * as they do not have CPUID enumeration support for Cache allocation. | |
160 | * The check for Vendor/Family/Model is not enough to guarantee that | |
161 | * the MSRs won't #GP fault because only the following SKUs support | |
162 | * CAT: | |
163 | * Intel(R) Xeon(R) CPU E5-2658 v3 @ 2.20GHz | |
164 | * Intel(R) Xeon(R) CPU E5-2648L v3 @ 1.80GHz | |
165 | * Intel(R) Xeon(R) CPU E5-2628L v3 @ 2.00GHz | |
166 | * Intel(R) Xeon(R) CPU E5-2618L v3 @ 2.30GHz | |
167 | * Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz | |
168 | * Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz | |
169 | * | |
170 | * Probe by trying to write the first of the L3 cach mask registers | |
171 | * and checking that the bits stick. Max CLOSids is always 4 and max cbm length | |
172 | * is always 20 on hsw server parts. The minimum cache bitmask length | |
173 | * allowed for HSW server is always 2 bits. Hardcode all of them. | |
174 | */ | |
0576113a | 175 | static inline void cache_alloc_hsw_probe(void) |
113c6097 | 176 | { |
0576113a TL |
177 | struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; |
178 | u32 l, h, max_cbm = BIT_MASK(20) - 1; | |
113c6097 | 179 | |
0576113a TL |
180 | if (wrmsr_safe(IA32_L3_CBM_BASE, max_cbm, 0)) |
181 | return; | |
182 | rdmsr(IA32_L3_CBM_BASE, l, h); | |
c1c7c3f9 | 183 | |
0576113a TL |
184 | /* If all the bits were set in MSR, return success */ |
185 | if (l != max_cbm) | |
186 | return; | |
c1c7c3f9 | 187 | |
0576113a TL |
188 | r->num_closid = 4; |
189 | r->default_ctrl = max_cbm; | |
190 | r->cache.cbm_len = 20; | |
191 | r->cache.shareable_bits = 0xc0000; | |
192 | r->cache.min_cbm_bits = 2; | |
193 | r->alloc_capable = true; | |
194 | r->alloc_enabled = true; | |
113c6097 | 195 | |
0576113a | 196 | rdt_alloc_capable = true; |
113c6097 FY |
197 | } |
198 | ||
05b93417 VS |
199 | /* |
200 | * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values | |
201 | * exposed to user interface and the h/w understandable delay values. | |
202 | * | |
203 | * The non-linear delay values have the granularity of power of two | |
204 | * and also the h/w does not guarantee a curve for configured delay | |
205 | * values vs. actual b/w enforced. | |
206 | * Hence we need a mapping that is pre calibrated so the user can | |
207 | * express the memory b/w as a percentage value. | |
208 | */ | |
209 | static inline bool rdt_get_mb_table(struct rdt_resource *r) | |
210 | { | |
211 | /* | |
212 | * There are no Intel SKUs as of now to support non-linear delay. | |
213 | */ | |
214 | pr_info("MBA b/w map not implemented for cpu:%d, model:%d", | |
215 | boot_cpu_data.x86, boot_cpu_data.x86_model); | |
216 | ||
217 | return false; | |
218 | } | |
219 | ||
220 | static bool rdt_get_mem_config(struct rdt_resource *r) | |
221 | { | |
222 | union cpuid_0x10_3_eax eax; | |
223 | union cpuid_0x10_x_edx edx; | |
224 | u32 ebx, ecx; | |
225 | ||
226 | cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full); | |
227 | r->num_closid = edx.split.cos_max + 1; | |
228 | r->membw.max_delay = eax.split.max_delay + 1; | |
229 | r->default_ctrl = MAX_MBA_BW; | |
230 | if (ecx & MBA_IS_LINEAR) { | |
231 | r->membw.delay_linear = true; | |
232 | r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay; | |
233 | r->membw.bw_gran = MAX_MBA_BW - r->membw.max_delay; | |
234 | } else { | |
235 | if (!rdt_get_mb_table(r)) | |
236 | return false; | |
237 | } | |
238 | r->data_width = 3; | |
239 | ||
1b5c0b75 VS |
240 | r->alloc_capable = true; |
241 | r->alloc_enabled = true; | |
05b93417 VS |
242 | |
243 | return true; | |
244 | } | |
245 | ||
6a445edc | 246 | static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) |
c1c7c3f9 FY |
247 | { |
248 | union cpuid_0x10_1_eax eax; | |
2545e9f5 | 249 | union cpuid_0x10_x_edx edx; |
c1c7c3f9 FY |
250 | u32 ebx, ecx; |
251 | ||
252 | cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full); | |
253 | r->num_closid = edx.split.cos_max + 1; | |
d3e11b4d | 254 | r->cache.cbm_len = eax.split.cbm_len + 1; |
2545e9f5 | 255 | r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; |
0dd2d749 | 256 | r->cache.shareable_bits = ebx & r->default_ctrl; |
d3e11b4d | 257 | r->data_width = (r->cache.cbm_len + 3) / 4; |
1b5c0b75 VS |
258 | r->alloc_capable = true; |
259 | r->alloc_enabled = true; | |
c1c7c3f9 FY |
260 | } |
261 | ||
262 | static void rdt_get_cdp_l3_config(int type) | |
263 | { | |
264 | struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; | |
265 | struct rdt_resource *r = &rdt_resources_all[type]; | |
266 | ||
267 | r->num_closid = r_l3->num_closid / 2; | |
d3e11b4d | 268 | r->cache.cbm_len = r_l3->cache.cbm_len; |
2545e9f5 | 269 | r->default_ctrl = r_l3->default_ctrl; |
95953034 | 270 | r->cache.shareable_bits = r_l3->cache.shareable_bits; |
d3e11b4d | 271 | r->data_width = (r->cache.cbm_len + 3) / 4; |
1b5c0b75 | 272 | r->alloc_capable = true; |
c1c7c3f9 FY |
273 | /* |
274 | * By default, CDP is disabled. CDP can be enabled by mount parameter | |
275 | * "cdp" during resctrl file system mount time. | |
276 | */ | |
1b5c0b75 | 277 | r->alloc_enabled = false; |
c1c7c3f9 FY |
278 | } |
279 | ||
2264d9c7 TL |
280 | static int get_cache_id(int cpu, int level) |
281 | { | |
282 | struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); | |
283 | int i; | |
284 | ||
285 | for (i = 0; i < ci->num_leaves; i++) { | |
286 | if (ci->info_list[i].level == level) | |
287 | return ci->info_list[i].id; | |
288 | } | |
289 | ||
290 | return -1; | |
291 | } | |
292 | ||
05b93417 VS |
293 | /* |
294 | * Map the memory b/w percentage value to delay values | |
295 | * that can be written to QOS_MSRs. | |
296 | * There are currently no SKUs which support non linear delay values. | |
297 | */ | |
298 | static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) | |
299 | { | |
300 | if (r->membw.delay_linear) | |
301 | return MAX_MBA_BW - bw; | |
302 | ||
303 | pr_warn_once("Non Linear delay-bw map not supported but queried\n"); | |
304 | return r->default_ctrl; | |
305 | } | |
306 | ||
307 | static void | |
308 | mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) | |
309 | { | |
310 | unsigned int i; | |
311 | ||
312 | /* Write the delay values for mba. */ | |
313 | for (i = m->low; i < m->high; i++) | |
314 | wrmsrl(r->msr_base + i, delay_bw_map(d->ctrl_val[i], r)); | |
315 | } | |
316 | ||
0921c547 TG |
317 | static void |
318 | cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) | |
319 | { | |
320 | unsigned int i; | |
321 | ||
322 | for (i = m->low; i < m->high; i++) | |
323 | wrmsrl(r->msr_base + cbm_idx(r, i), d->ctrl_val[i]); | |
324 | } | |
325 | ||
edf6fa1c VS |
326 | struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r) |
327 | { | |
328 | struct rdt_domain *d; | |
329 | ||
330 | list_for_each_entry(d, &r->domains, list) { | |
331 | /* Find the domain that contains this CPU */ | |
332 | if (cpumask_test_cpu(cpu, &d->cpu_mask)) | |
333 | return d; | |
334 | } | |
335 | ||
336 | return NULL; | |
337 | } | |
338 | ||
2545e9f5 | 339 | void rdt_ctrl_update(void *arg) |
2264d9c7 | 340 | { |
0921c547 | 341 | struct msr_param *m = arg; |
2264d9c7 | 342 | struct rdt_resource *r = m->res; |
0921c547 | 343 | int cpu = smp_processor_id(); |
2264d9c7 TL |
344 | struct rdt_domain *d; |
345 | ||
e3302683 VS |
346 | d = get_domain_from_cpu(cpu, r); |
347 | if (d) { | |
348 | r->msr_update(d, m, r); | |
349 | return; | |
2264d9c7 | 350 | } |
0921c547 | 351 | pr_warn_once("cpu %d not found in any domain for resource %s\n", |
2264d9c7 | 352 | cpu, r->name); |
2264d9c7 TL |
353 | } |
354 | ||
355 | /* | |
356 | * rdt_find_domain - Find a domain in a resource that matches input resource id | |
357 | * | |
358 | * Search resource r's domain list to find the resource id. If the resource | |
359 | * id is found in a domain, return the domain. Otherwise, if requested by | |
360 | * caller, return the first domain whose id is bigger than the input id. | |
361 | * The domain list is sorted by id in ascending order. | |
362 | */ | |
d89b7379 VS |
363 | struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, |
364 | struct list_head **pos) | |
2264d9c7 TL |
365 | { |
366 | struct rdt_domain *d; | |
367 | struct list_head *l; | |
368 | ||
369 | if (id < 0) | |
370 | return ERR_PTR(id); | |
371 | ||
372 | list_for_each(l, &r->domains) { | |
373 | d = list_entry(l, struct rdt_domain, list); | |
374 | /* When id is found, return its domain. */ | |
375 | if (id == d->id) | |
376 | return d; | |
377 | /* Stop searching when finding id's position in sorted list. */ | |
378 | if (id < d->id) | |
379 | break; | |
380 | } | |
381 | ||
382 | if (pos) | |
383 | *pos = l; | |
384 | ||
385 | return NULL; | |
386 | } | |
387 | ||
0921c547 TG |
388 | static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) |
389 | { | |
390 | struct msr_param m; | |
391 | u32 *dc; | |
392 | int i; | |
393 | ||
394 | dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL); | |
395 | if (!dc) | |
396 | return -ENOMEM; | |
397 | ||
398 | d->ctrl_val = dc; | |
399 | ||
400 | /* | |
401 | * Initialize the Control MSRs to having no control. | |
402 | * For Cache Allocation: Set all bits in cbm | |
403 | * For Memory Allocation: Set b/w requested to 100 | |
404 | */ | |
405 | for (i = 0; i < r->num_closid; i++, dc++) | |
406 | *dc = r->default_ctrl; | |
407 | ||
408 | m.low = 0; | |
409 | m.high = r->num_closid; | |
410 | r->msr_update(d, &m, r); | |
411 | return 0; | |
412 | } | |
413 | ||
edf6fa1c VS |
414 | static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) |
415 | { | |
9f52425b TL |
416 | size_t tsize; |
417 | ||
edf6fa1c VS |
418 | if (is_llc_occupancy_enabled()) { |
419 | d->rmid_busy_llc = kcalloc(BITS_TO_LONGS(r->num_rmid), | |
420 | sizeof(unsigned long), | |
421 | GFP_KERNEL); | |
422 | if (!d->rmid_busy_llc) | |
423 | return -ENOMEM; | |
24247aee | 424 | INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); |
edf6fa1c | 425 | } |
9f52425b TL |
426 | if (is_mbm_total_enabled()) { |
427 | tsize = sizeof(*d->mbm_total); | |
428 | d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL); | |
429 | if (!d->mbm_total) { | |
430 | kfree(d->rmid_busy_llc); | |
431 | return -ENOMEM; | |
432 | } | |
433 | } | |
434 | if (is_mbm_local_enabled()) { | |
435 | tsize = sizeof(*d->mbm_local); | |
436 | d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL); | |
437 | if (!d->mbm_local) { | |
438 | kfree(d->rmid_busy_llc); | |
439 | kfree(d->mbm_total); | |
440 | return -ENOMEM; | |
441 | } | |
442 | } | |
edf6fa1c | 443 | |
e3302683 VS |
444 | if (is_mbm_enabled()) { |
445 | INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); | |
bbc4615e | 446 | mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL); |
e3302683 VS |
447 | } |
448 | ||
edf6fa1c VS |
449 | return 0; |
450 | } | |
451 | ||
2264d9c7 TL |
452 | /* |
453 | * domain_add_cpu - Add a cpu to a resource's domain list. | |
454 | * | |
455 | * If an existing domain in the resource r's domain list matches the cpu's | |
456 | * resource id, add the cpu in the domain. | |
457 | * | |
458 | * Otherwise, a new domain is allocated and inserted into the right position | |
459 | * in the domain list sorted by id in ascending order. | |
460 | * | |
461 | * The order in the domain list is visible to users when we print entries | |
462 | * in the schemata file and schemata input is validated to have the same order | |
463 | * as this list. | |
464 | */ | |
465 | static void domain_add_cpu(int cpu, struct rdt_resource *r) | |
466 | { | |
0921c547 | 467 | int id = get_cache_id(cpu, r->cache_level); |
2264d9c7 TL |
468 | struct list_head *add_pos = NULL; |
469 | struct rdt_domain *d; | |
470 | ||
471 | d = rdt_find_domain(r, id, &add_pos); | |
472 | if (IS_ERR(d)) { | |
473 | pr_warn("Could't find cache id for cpu %d\n", cpu); | |
474 | return; | |
475 | } | |
476 | ||
477 | if (d) { | |
478 | cpumask_set_cpu(cpu, &d->cpu_mask); | |
479 | return; | |
480 | } | |
481 | ||
482 | d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu)); | |
483 | if (!d) | |
484 | return; | |
485 | ||
486 | d->id = id; | |
9f52425b | 487 | cpumask_set_cpu(cpu, &d->cpu_mask); |
2264d9c7 | 488 | |
1b5c0b75 | 489 | if (r->alloc_capable && domain_setup_ctrlval(r, d)) { |
2264d9c7 TL |
490 | kfree(d); |
491 | return; | |
492 | } | |
493 | ||
edf6fa1c VS |
494 | if (r->mon_capable && domain_setup_mon_state(r, d)) { |
495 | kfree(d); | |
496 | return; | |
497 | } | |
498 | ||
2264d9c7 | 499 | list_add_tail(&d->list, add_pos); |
895c663e VS |
500 | |
501 | /* | |
502 | * If resctrl is mounted, add | |
503 | * per domain monitor data directories. | |
504 | */ | |
505 | if (static_branch_unlikely(&rdt_mon_enable_key)) | |
506 | mkdir_mondata_subdir_allrdtgrp(r, d); | |
2264d9c7 TL |
507 | } |
508 | ||
509 | static void domain_remove_cpu(int cpu, struct rdt_resource *r) | |
510 | { | |
511 | int id = get_cache_id(cpu, r->cache_level); | |
512 | struct rdt_domain *d; | |
513 | ||
514 | d = rdt_find_domain(r, id, NULL); | |
515 | if (IS_ERR_OR_NULL(d)) { | |
516 | pr_warn("Could't find cache id for cpu %d\n", cpu); | |
517 | return; | |
518 | } | |
519 | ||
520 | cpumask_clear_cpu(cpu, &d->cpu_mask); | |
521 | if (cpumask_empty(&d->cpu_mask)) { | |
895c663e VS |
522 | /* |
523 | * If resctrl is mounted, remove all the | |
524 | * per domain monitor data directories. | |
525 | */ | |
526 | if (static_branch_unlikely(&rdt_mon_enable_key)) | |
527 | rmdir_mondata_subdir_allrdtgrp(r, d->id); | |
2545e9f5 | 528 | kfree(d->ctrl_val); |
edf6fa1c | 529 | kfree(d->rmid_busy_llc); |
9f52425b TL |
530 | kfree(d->mbm_total); |
531 | kfree(d->mbm_local); | |
2264d9c7 | 532 | list_del(&d->list); |
e3302683 VS |
533 | if (is_mbm_enabled()) |
534 | cancel_delayed_work(&d->mbm_over); | |
24247aee VS |
535 | if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { |
536 | /* | |
537 | * When a package is going down, forcefully | |
538 | * decrement rmid->ebusy. There is no way to know | |
539 | * that the L3 was flushed and hence may lead to | |
540 | * incorrect counts in rare scenarios, but leaving | |
541 | * the RMID as busy creates RMID leaks if the | |
542 | * package never comes back. | |
543 | */ | |
544 | __check_limbo(d, true); | |
545 | cancel_delayed_work(&d->cqm_limbo); | |
546 | } | |
547 | ||
2264d9c7 | 548 | kfree(d); |
24247aee VS |
549 | return; |
550 | } | |
551 | ||
552 | if (r == &rdt_resources_all[RDT_RESOURCE_L3]) { | |
553 | if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { | |
554 | cancel_delayed_work(&d->mbm_over); | |
555 | mbm_setup_overflow_handler(d, 0); | |
556 | } | |
557 | if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu && | |
558 | has_busy_rmid(r, d)) { | |
559 | cancel_delayed_work(&d->cqm_limbo); | |
560 | cqm_setup_limbo_handler(d, 0); | |
561 | } | |
2264d9c7 TL |
562 | } |
563 | } | |
564 | ||
895c663e | 565 | static void clear_closid_rmid(int cpu) |
2264d9c7 TL |
566 | { |
567 | struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); | |
12e0110c | 568 | |
a9110b55 VS |
569 | state->default_closid = 0; |
570 | state->default_rmid = 0; | |
571 | state->cur_closid = 0; | |
572 | state->cur_rmid = 0; | |
895c663e | 573 | wrmsr(IA32_PQR_ASSOC, 0, 0); |
12e0110c TL |
574 | } |
575 | ||
576 | static int intel_rdt_online_cpu(unsigned int cpu) | |
577 | { | |
2264d9c7 TL |
578 | struct rdt_resource *r; |
579 | ||
580 | mutex_lock(&rdtgroup_mutex); | |
895c663e | 581 | for_each_capable_rdt_resource(r) |
2264d9c7 | 582 | domain_add_cpu(cpu, r); |
12e0110c TL |
583 | /* The cpu is set in default rdtgroup after online. */ |
584 | cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask); | |
895c663e | 585 | clear_closid_rmid(cpu); |
2264d9c7 TL |
586 | mutex_unlock(&rdtgroup_mutex); |
587 | ||
588 | return 0; | |
589 | } | |
590 | ||
895c663e VS |
591 | static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) |
592 | { | |
593 | struct rdtgroup *cr; | |
594 | ||
595 | list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) { | |
596 | if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) { | |
597 | break; | |
598 | } | |
599 | } | |
600 | } | |
601 | ||
2264d9c7 TL |
602 | static int intel_rdt_offline_cpu(unsigned int cpu) |
603 | { | |
12e0110c | 604 | struct rdtgroup *rdtgrp; |
2264d9c7 TL |
605 | struct rdt_resource *r; |
606 | ||
607 | mutex_lock(&rdtgroup_mutex); | |
895c663e | 608 | for_each_capable_rdt_resource(r) |
2264d9c7 | 609 | domain_remove_cpu(cpu, r); |
12e0110c | 610 | list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { |
895c663e VS |
611 | if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) { |
612 | clear_childcpus(rdtgrp, cpu); | |
12e0110c | 613 | break; |
895c663e | 614 | } |
12e0110c | 615 | } |
895c663e | 616 | clear_closid_rmid(cpu); |
2264d9c7 TL |
617 | mutex_unlock(&rdtgroup_mutex); |
618 | ||
619 | return 0; | |
620 | } | |
621 | ||
70a1ee92 TG |
622 | /* |
623 | * Choose a width for the resource name and resource data based on the | |
624 | * resource that has widest name and cbm. | |
625 | */ | |
626 | static __init void rdt_init_padding(void) | |
627 | { | |
628 | struct rdt_resource *r; | |
629 | int cl; | |
630 | ||
1b5c0b75 | 631 | for_each_alloc_capable_rdt_resource(r) { |
70a1ee92 TG |
632 | cl = strlen(r->name); |
633 | if (cl > max_name_width) | |
634 | max_name_width = cl; | |
635 | ||
636 | if (r->data_width > max_data_width) | |
637 | max_data_width = r->data_width; | |
638 | } | |
639 | } | |
640 | ||
1d9807fc TL |
641 | enum { |
642 | RDT_FLAG_CMT, | |
643 | RDT_FLAG_MBM_TOTAL, | |
644 | RDT_FLAG_MBM_LOCAL, | |
645 | RDT_FLAG_L3_CAT, | |
646 | RDT_FLAG_L3_CDP, | |
647 | RDT_FLAG_L2_CAT, | |
648 | RDT_FLAG_MBA, | |
649 | }; | |
650 | ||
651 | #define RDT_OPT(idx, n, f) \ | |
652 | [idx] = { \ | |
653 | .name = n, \ | |
654 | .flag = f \ | |
655 | } | |
656 | ||
657 | struct rdt_options { | |
658 | char *name; | |
659 | int flag; | |
660 | bool force_off, force_on; | |
661 | }; | |
662 | ||
663 | static struct rdt_options rdt_options[] __initdata = { | |
664 | RDT_OPT(RDT_FLAG_CMT, "cmt", X86_FEATURE_CQM_OCCUP_LLC), | |
665 | RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL), | |
666 | RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL), | |
667 | RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3), | |
668 | RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3), | |
669 | RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2), | |
670 | RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), | |
671 | }; | |
672 | #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) | |
673 | ||
674 | static int __init set_rdt_options(char *str) | |
675 | { | |
676 | struct rdt_options *o; | |
677 | bool force_off; | |
678 | char *tok; | |
679 | ||
680 | if (*str == '=') | |
681 | str++; | |
682 | while ((tok = strsep(&str, ",")) != NULL) { | |
683 | force_off = *tok == '!'; | |
684 | if (force_off) | |
685 | tok++; | |
686 | for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { | |
687 | if (strcmp(tok, o->name) == 0) { | |
688 | if (force_off) | |
689 | o->force_off = true; | |
690 | else | |
691 | o->force_on = true; | |
692 | break; | |
693 | } | |
694 | } | |
695 | } | |
696 | return 1; | |
697 | } | |
698 | __setup("rdt", set_rdt_options); | |
699 | ||
700 | static bool __init rdt_cpu_has(int flag) | |
701 | { | |
702 | bool ret = boot_cpu_has(flag); | |
703 | struct rdt_options *o; | |
704 | ||
705 | if (!ret) | |
706 | return ret; | |
707 | ||
708 | for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { | |
709 | if (flag == o->flag) { | |
710 | if (o->force_off) | |
711 | ret = false; | |
712 | if (o->force_on) | |
713 | ret = true; | |
714 | break; | |
715 | } | |
716 | } | |
717 | return ret; | |
718 | } | |
719 | ||
6a445edc | 720 | static __init bool get_rdt_alloc_resources(void) |
70a1ee92 TG |
721 | { |
722 | bool ret = false; | |
723 | ||
0576113a | 724 | if (rdt_alloc_capable) |
70a1ee92 TG |
725 | return true; |
726 | ||
727 | if (!boot_cpu_has(X86_FEATURE_RDT_A)) | |
728 | return false; | |
729 | ||
1d9807fc | 730 | if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { |
6a445edc | 731 | rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]); |
1d9807fc | 732 | if (rdt_cpu_has(X86_FEATURE_CDP_L3)) { |
70a1ee92 TG |
733 | rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA); |
734 | rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE); | |
735 | } | |
736 | ret = true; | |
737 | } | |
1d9807fc | 738 | if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { |
70a1ee92 | 739 | /* CPUID 0x10.2 fields are same format at 0x10.1 */ |
6a445edc | 740 | rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]); |
70a1ee92 TG |
741 | ret = true; |
742 | } | |
ab66a33b | 743 | |
1d9807fc | 744 | if (rdt_cpu_has(X86_FEATURE_MBA)) { |
05b93417 VS |
745 | if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA])) |
746 | ret = true; | |
747 | } | |
70a1ee92 TG |
748 | return ret; |
749 | } | |
750 | ||
6a445edc VS |
751 | static __init bool get_rdt_mon_resources(void) |
752 | { | |
1d9807fc | 753 | if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) |
6a445edc | 754 | rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID); |
1d9807fc | 755 | if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) |
6a445edc | 756 | rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID); |
1d9807fc | 757 | if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) |
6a445edc VS |
758 | rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID); |
759 | ||
760 | if (!rdt_mon_features) | |
761 | return false; | |
762 | ||
763 | return !rdt_get_mon_l3_config(&rdt_resources_all[RDT_RESOURCE_L3]); | |
764 | } | |
765 | ||
0576113a TL |
766 | static __init void rdt_quirks(void) |
767 | { | |
768 | switch (boot_cpu_data.x86_model) { | |
769 | case INTEL_FAM6_HASWELL_X: | |
1d9807fc TL |
770 | if (!rdt_options[RDT_FLAG_L3_CAT].force_off) |
771 | cache_alloc_hsw_probe(); | |
0576113a | 772 | break; |
d56593eb TL |
773 | case INTEL_FAM6_SKYLAKE_X: |
774 | if (boot_cpu_data.x86_mask <= 4) | |
775 | set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); | |
0576113a TL |
776 | } |
777 | } | |
778 | ||
6a445edc VS |
779 | static __init bool get_rdt_resources(void) |
780 | { | |
0576113a | 781 | rdt_quirks(); |
6a445edc VS |
782 | rdt_alloc_capable = get_rdt_alloc_resources(); |
783 | rdt_mon_capable = get_rdt_mon_resources(); | |
784 | ||
785 | return (rdt_mon_capable || rdt_alloc_capable); | |
786 | } | |
787 | ||
78e99b4a FY |
788 | static int __init intel_rdt_late_init(void) |
789 | { | |
c1c7c3f9 | 790 | struct rdt_resource *r; |
5ff193fb | 791 | int state, ret; |
c1c7c3f9 | 792 | |
78e99b4a FY |
793 | if (!get_rdt_resources()) |
794 | return -ENODEV; | |
795 | ||
06b57e45 TG |
796 | rdt_init_padding(); |
797 | ||
2264d9c7 TL |
798 | state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, |
799 | "x86/rdt/cat:online:", | |
800 | intel_rdt_online_cpu, intel_rdt_offline_cpu); | |
801 | if (state < 0) | |
802 | return state; | |
803 | ||
5ff193fb FY |
804 | ret = rdtgroup_init(); |
805 | if (ret) { | |
806 | cpuhp_remove_state(state); | |
807 | return ret; | |
808 | } | |
809 | ||
1b5c0b75 | 810 | for_each_alloc_capable_rdt_resource(r) |
c1c7c3f9 | 811 | pr_info("Intel RDT %s allocation detected\n", r->name); |
78e99b4a | 812 | |
6a445edc VS |
813 | for_each_mon_capable_rdt_resource(r) |
814 | pr_info("Intel RDT %s monitoring detected\n", r->name); | |
815 | ||
78e99b4a FY |
816 | return 0; |
817 | } | |
818 | ||
819 | late_initcall(intel_rdt_late_init); |