]>
Commit | Line | Data |
---|---|---|
6ee97d35 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2ef7a295 JL |
2 | /* |
3 | * Arch specific cpu topology information | |
4 | * | |
5 | * Copyright (C) 2016, ARM Ltd. | |
6 | * Written by: Juri Lelli, ARM Ltd. | |
2ef7a295 JL |
7 | */ |
8 | ||
9 | #include <linux/acpi.h> | |
10 | #include <linux/cpu.h> | |
11 | #include <linux/cpufreq.h> | |
12 | #include <linux/device.h> | |
13 | #include <linux/of.h> | |
14 | #include <linux/slab.h> | |
15 | #include <linux/string.h> | |
16 | #include <linux/sched/topology.h> | |
bb1fbdd3 | 17 | #include <linux/cpuset.h> |
60c1b220 AP |
18 | #include <linux/cpumask.h> |
19 | #include <linux/init.h> | |
20 | #include <linux/percpu.h> | |
21 | #include <linux/sched.h> | |
22 | #include <linux/smp.h> | |
2ef7a295 | 23 | |
0e27c567 | 24 | DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; |
2ef7a295 | 25 | |
0e27c567 DE |
26 | void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, |
27 | unsigned long max_freq) | |
2ef7a295 | 28 | { |
0e27c567 DE |
29 | unsigned long scale; |
30 | int i; | |
31 | ||
32 | scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq; | |
33 | ||
34 | for_each_cpu(i, cpus) | |
35 | per_cpu(freq_scale, i) = scale; | |
2ef7a295 JL |
36 | } |
37 | ||
8216f588 | 38 | DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; |
2ef7a295 | 39 | |
4ca4f26a | 40 | void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity) |
2ef7a295 JL |
41 | { |
42 | per_cpu(cpu_scale, cpu) = capacity; | |
43 | } | |
44 | ||
45 | static ssize_t cpu_capacity_show(struct device *dev, | |
46 | struct device_attribute *attr, | |
47 | char *buf) | |
48 | { | |
49 | struct cpu *cpu = container_of(dev, struct cpu, dev); | |
50 | ||
8ec59c0f | 51 | return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id)); |
2ef7a295 JL |
52 | } |
53 | ||
bb1fbdd3 MR |
54 | static void update_topology_flags_workfn(struct work_struct *work); |
55 | static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn); | |
56 | ||
5d777b18 | 57 | static DEVICE_ATTR_RO(cpu_capacity); |
2ef7a295 JL |
58 | |
59 | static int register_cpu_capacity_sysctl(void) | |
60 | { | |
61 | int i; | |
62 | struct device *cpu; | |
63 | ||
64 | for_each_possible_cpu(i) { | |
65 | cpu = get_cpu_device(i); | |
66 | if (!cpu) { | |
67 | pr_err("%s: too early to get CPU%d device!\n", | |
68 | __func__, i); | |
69 | continue; | |
70 | } | |
71 | device_create_file(cpu, &dev_attr_cpu_capacity); | |
72 | } | |
73 | ||
74 | return 0; | |
75 | } | |
76 | subsys_initcall(register_cpu_capacity_sysctl); | |
77 | ||
bb1fbdd3 MR |
78 | static int update_topology; |
79 | ||
80 | int topology_update_cpu_topology(void) | |
81 | { | |
82 | return update_topology; | |
83 | } | |
84 | ||
85 | /* | |
86 | * Updating the sched_domains can't be done directly from cpufreq callbacks | |
87 | * due to locking, so queue the work for later. | |
88 | */ | |
89 | static void update_topology_flags_workfn(struct work_struct *work) | |
90 | { | |
91 | update_topology = 1; | |
92 | rebuild_sched_domains(); | |
93 | pr_debug("sched_domain hierarchy rebuilt, flags updated\n"); | |
94 | update_topology = 0; | |
95 | } | |
96 | ||
2ef7a295 JL |
97 | static u32 capacity_scale; |
98 | static u32 *raw_capacity; | |
62de1161 | 99 | |
82d8ba71 | 100 | static int free_raw_capacity(void) |
62de1161 VK |
101 | { |
102 | kfree(raw_capacity); | |
103 | raw_capacity = NULL; | |
104 | ||
105 | return 0; | |
106 | } | |
2ef7a295 | 107 | |
4ca4f26a | 108 | void topology_normalize_cpu_scale(void) |
2ef7a295 JL |
109 | { |
110 | u64 capacity; | |
111 | int cpu; | |
112 | ||
62de1161 | 113 | if (!raw_capacity) |
2ef7a295 JL |
114 | return; |
115 | ||
116 | pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale); | |
2ef7a295 JL |
117 | for_each_possible_cpu(cpu) { |
118 | pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n", | |
119 | cpu, raw_capacity[cpu]); | |
120 | capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT) | |
121 | / capacity_scale; | |
4ca4f26a | 122 | topology_set_cpu_scale(cpu, capacity); |
2ef7a295 | 123 | pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", |
8ec59c0f | 124 | cpu, topology_get_cpu_scale(cpu)); |
2ef7a295 | 125 | } |
2ef7a295 JL |
126 | } |
127 | ||
805df296 | 128 | bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) |
2ef7a295 | 129 | { |
62de1161 | 130 | static bool cap_parsing_failed; |
805df296 | 131 | int ret; |
2ef7a295 JL |
132 | u32 cpu_capacity; |
133 | ||
134 | if (cap_parsing_failed) | |
805df296 | 135 | return false; |
2ef7a295 | 136 | |
3eeba1a2 | 137 | ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz", |
2ef7a295 JL |
138 | &cpu_capacity); |
139 | if (!ret) { | |
140 | if (!raw_capacity) { | |
141 | raw_capacity = kcalloc(num_possible_cpus(), | |
142 | sizeof(*raw_capacity), | |
143 | GFP_KERNEL); | |
144 | if (!raw_capacity) { | |
2ef7a295 | 145 | cap_parsing_failed = true; |
805df296 | 146 | return false; |
2ef7a295 JL |
147 | } |
148 | } | |
149 | capacity_scale = max(cpu_capacity, capacity_scale); | |
150 | raw_capacity[cpu] = cpu_capacity; | |
6ef2541f RH |
151 | pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n", |
152 | cpu_node, raw_capacity[cpu]); | |
2ef7a295 JL |
153 | } else { |
154 | if (raw_capacity) { | |
6ef2541f RH |
155 | pr_err("cpu_capacity: missing %pOF raw capacity\n", |
156 | cpu_node); | |
2ef7a295 JL |
157 | pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n"); |
158 | } | |
159 | cap_parsing_failed = true; | |
62de1161 | 160 | free_raw_capacity(); |
2ef7a295 JL |
161 | } |
162 | ||
163 | return !ret; | |
164 | } | |
165 | ||
166 | #ifdef CONFIG_CPU_FREQ | |
9de9a449 GI |
167 | static cpumask_var_t cpus_to_visit; |
168 | static void parsing_done_workfn(struct work_struct *work); | |
169 | static DECLARE_WORK(parsing_done_work, parsing_done_workfn); | |
2ef7a295 | 170 | |
9de9a449 | 171 | static int |
2ef7a295 JL |
172 | init_cpu_capacity_callback(struct notifier_block *nb, |
173 | unsigned long val, | |
174 | void *data) | |
175 | { | |
176 | struct cpufreq_policy *policy = data; | |
177 | int cpu; | |
178 | ||
d8bcf4db | 179 | if (!raw_capacity) |
2ef7a295 JL |
180 | return 0; |
181 | ||
40f0fc2a | 182 | if (val != CPUFREQ_CREATE_POLICY) |
93a57081 VK |
183 | return 0; |
184 | ||
185 | pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n", | |
186 | cpumask_pr_args(policy->related_cpus), | |
187 | cpumask_pr_args(cpus_to_visit)); | |
188 | ||
189 | cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus); | |
190 | ||
191 | for_each_cpu(cpu, policy->related_cpus) { | |
8ec59c0f | 192 | raw_capacity[cpu] = topology_get_cpu_scale(cpu) * |
93a57081 VK |
193 | policy->cpuinfo.max_freq / 1000UL; |
194 | capacity_scale = max(raw_capacity[cpu], capacity_scale); | |
2ef7a295 | 195 | } |
93a57081 VK |
196 | |
197 | if (cpumask_empty(cpus_to_visit)) { | |
198 | topology_normalize_cpu_scale(); | |
bb1fbdd3 | 199 | schedule_work(&update_topology_flags_work); |
62de1161 | 200 | free_raw_capacity(); |
93a57081 | 201 | pr_debug("cpu_capacity: parsing done\n"); |
93a57081 VK |
202 | schedule_work(&parsing_done_work); |
203 | } | |
204 | ||
2ef7a295 JL |
205 | return 0; |
206 | } | |
207 | ||
9de9a449 | 208 | static struct notifier_block init_cpu_capacity_notifier = { |
2ef7a295 JL |
209 | .notifier_call = init_cpu_capacity_callback, |
210 | }; | |
211 | ||
212 | static int __init register_cpufreq_notifier(void) | |
213 | { | |
5408211a DE |
214 | int ret; |
215 | ||
2ef7a295 JL |
216 | /* |
217 | * on ACPI-based systems we need to use the default cpu capacity | |
218 | * until we have the necessary code to parse the cpu capacity, so | |
219 | * skip registering cpufreq notifier. | |
220 | */ | |
c105aa31 | 221 | if (!acpi_disabled || !raw_capacity) |
2ef7a295 JL |
222 | return -EINVAL; |
223 | ||
0fd33116 | 224 | if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) |
2ef7a295 | 225 | return -ENOMEM; |
2ef7a295 JL |
226 | |
227 | cpumask_copy(cpus_to_visit, cpu_possible_mask); | |
228 | ||
5408211a DE |
229 | ret = cpufreq_register_notifier(&init_cpu_capacity_notifier, |
230 | CPUFREQ_POLICY_NOTIFIER); | |
231 | ||
232 | if (ret) | |
233 | free_cpumask_var(cpus_to_visit); | |
234 | ||
235 | return ret; | |
2ef7a295 JL |
236 | } |
237 | core_initcall(register_cpufreq_notifier); | |
238 | ||
9de9a449 | 239 | static void parsing_done_workfn(struct work_struct *work) |
2ef7a295 JL |
240 | { |
241 | cpufreq_unregister_notifier(&init_cpu_capacity_notifier, | |
242 | CPUFREQ_POLICY_NOTIFIER); | |
5408211a | 243 | free_cpumask_var(cpus_to_visit); |
2ef7a295 JL |
244 | } |
245 | ||
246 | #else | |
2ef7a295 JL |
247 | core_initcall(free_raw_capacity); |
248 | #endif | |
60c1b220 AP |
249 | |
250 | #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) | |
f3c19481 ZT |
251 | /* |
252 | * This function returns the logic cpu number of the node. | |
253 | * There are basically three kinds of return values: | |
254 | * (1) logic cpu number which is > 0. | |
255 | * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but | |
256 | * there is no possible logical CPU in the kernel to match. This happens | |
257 | * when CONFIG_NR_CPUS is configure to be smaller than the number of | |
258 | * CPU nodes in DT. We need to just ignore this case. | |
259 | * (3) -1 if the node does not exist in the device tree | |
260 | */ | |
60c1b220 AP |
261 | static int __init get_cpu_for_node(struct device_node *node) |
262 | { | |
263 | struct device_node *cpu_node; | |
264 | int cpu; | |
265 | ||
266 | cpu_node = of_parse_phandle(node, "cpu", 0); | |
267 | if (!cpu_node) | |
268 | return -1; | |
269 | ||
270 | cpu = of_cpu_node_to_id(cpu_node); | |
271 | if (cpu >= 0) | |
272 | topology_parse_cpu_capacity(cpu_node, cpu); | |
273 | else | |
f3c19481 ZT |
274 | pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n", |
275 | cpu_node, cpumask_pr_args(cpu_possible_mask)); | |
60c1b220 AP |
276 | |
277 | of_node_put(cpu_node); | |
278 | return cpu; | |
279 | } | |
280 | ||
281 | static int __init parse_core(struct device_node *core, int package_id, | |
282 | int core_id) | |
283 | { | |
284 | char name[10]; | |
285 | bool leaf = true; | |
286 | int i = 0; | |
287 | int cpu; | |
288 | struct device_node *t; | |
289 | ||
290 | do { | |
291 | snprintf(name, sizeof(name), "thread%d", i); | |
292 | t = of_get_child_by_name(core, name); | |
293 | if (t) { | |
294 | leaf = false; | |
295 | cpu = get_cpu_for_node(t); | |
296 | if (cpu >= 0) { | |
297 | cpu_topology[cpu].package_id = package_id; | |
298 | cpu_topology[cpu].core_id = core_id; | |
299 | cpu_topology[cpu].thread_id = i; | |
f3c19481 ZT |
300 | } else if (cpu != -ENODEV) { |
301 | pr_err("%pOF: Can't get CPU for thread\n", t); | |
60c1b220 AP |
302 | of_node_put(t); |
303 | return -EINVAL; | |
304 | } | |
305 | of_node_put(t); | |
306 | } | |
307 | i++; | |
308 | } while (t); | |
309 | ||
310 | cpu = get_cpu_for_node(core); | |
311 | if (cpu >= 0) { | |
312 | if (!leaf) { | |
313 | pr_err("%pOF: Core has both threads and CPU\n", | |
314 | core); | |
315 | return -EINVAL; | |
316 | } | |
317 | ||
318 | cpu_topology[cpu].package_id = package_id; | |
319 | cpu_topology[cpu].core_id = core_id; | |
f3c19481 | 320 | } else if (leaf && cpu != -ENODEV) { |
60c1b220 AP |
321 | pr_err("%pOF: Can't get CPU for leaf core\n", core); |
322 | return -EINVAL; | |
323 | } | |
324 | ||
325 | return 0; | |
326 | } | |
327 | ||
328 | static int __init parse_cluster(struct device_node *cluster, int depth) | |
329 | { | |
330 | char name[10]; | |
331 | bool leaf = true; | |
332 | bool has_cores = false; | |
333 | struct device_node *c; | |
334 | static int package_id __initdata; | |
335 | int core_id = 0; | |
336 | int i, ret; | |
337 | ||
338 | /* | |
339 | * First check for child clusters; we currently ignore any | |
340 | * information about the nesting of clusters and present the | |
341 | * scheduler with a flat list of them. | |
342 | */ | |
343 | i = 0; | |
344 | do { | |
345 | snprintf(name, sizeof(name), "cluster%d", i); | |
346 | c = of_get_child_by_name(cluster, name); | |
347 | if (c) { | |
348 | leaf = false; | |
349 | ret = parse_cluster(c, depth + 1); | |
350 | of_node_put(c); | |
351 | if (ret != 0) | |
352 | return ret; | |
353 | } | |
354 | i++; | |
355 | } while (c); | |
356 | ||
357 | /* Now check for cores */ | |
358 | i = 0; | |
359 | do { | |
360 | snprintf(name, sizeof(name), "core%d", i); | |
361 | c = of_get_child_by_name(cluster, name); | |
362 | if (c) { | |
363 | has_cores = true; | |
364 | ||
365 | if (depth == 0) { | |
366 | pr_err("%pOF: cpu-map children should be clusters\n", | |
367 | c); | |
368 | of_node_put(c); | |
369 | return -EINVAL; | |
370 | } | |
371 | ||
372 | if (leaf) { | |
373 | ret = parse_core(c, package_id, core_id++); | |
374 | } else { | |
375 | pr_err("%pOF: Non-leaf cluster with core %s\n", | |
376 | cluster, name); | |
377 | ret = -EINVAL; | |
378 | } | |
379 | ||
380 | of_node_put(c); | |
381 | if (ret != 0) | |
382 | return ret; | |
383 | } | |
384 | i++; | |
385 | } while (c); | |
386 | ||
387 | if (leaf && !has_cores) | |
388 | pr_warn("%pOF: empty cluster\n", cluster); | |
389 | ||
390 | if (leaf) | |
391 | package_id++; | |
392 | ||
393 | return 0; | |
394 | } | |
395 | ||
396 | static int __init parse_dt_topology(void) | |
397 | { | |
398 | struct device_node *cn, *map; | |
399 | int ret = 0; | |
400 | int cpu; | |
401 | ||
402 | cn = of_find_node_by_path("/cpus"); | |
403 | if (!cn) { | |
404 | pr_err("No CPU information found in DT\n"); | |
405 | return 0; | |
406 | } | |
407 | ||
408 | /* | |
409 | * When topology is provided cpu-map is essentially a root | |
410 | * cluster with restricted subnodes. | |
411 | */ | |
412 | map = of_get_child_by_name(cn, "cpu-map"); | |
413 | if (!map) | |
414 | goto out; | |
415 | ||
416 | ret = parse_cluster(map, 0); | |
417 | if (ret != 0) | |
418 | goto out_map; | |
419 | ||
420 | topology_normalize_cpu_scale(); | |
421 | ||
422 | /* | |
423 | * Check that all cores are in the topology; the SMP code will | |
424 | * only mark cores described in the DT as possible. | |
425 | */ | |
426 | for_each_possible_cpu(cpu) | |
427 | if (cpu_topology[cpu].package_id == -1) | |
428 | ret = -EINVAL; | |
429 | ||
430 | out_map: | |
431 | of_node_put(map); | |
432 | out: | |
433 | of_node_put(cn); | |
434 | return ret; | |
435 | } | |
ca74b316 | 436 | #endif |
60c1b220 AP |
437 | |
438 | /* | |
439 | * cpu topology table | |
440 | */ | |
441 | struct cpu_topology cpu_topology[NR_CPUS]; | |
442 | EXPORT_SYMBOL_GPL(cpu_topology); | |
443 | ||
444 | const struct cpumask *cpu_coregroup_mask(int cpu) | |
445 | { | |
446 | const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); | |
447 | ||
448 | /* Find the smaller of NUMA, core or LLC siblings */ | |
449 | if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { | |
450 | /* not numa in package, lets use the package siblings */ | |
451 | core_mask = &cpu_topology[cpu].core_sibling; | |
452 | } | |
453 | if (cpu_topology[cpu].llc_id != -1) { | |
454 | if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) | |
455 | core_mask = &cpu_topology[cpu].llc_sibling; | |
456 | } | |
457 | ||
458 | return core_mask; | |
459 | } | |
460 | ||
461 | void update_siblings_masks(unsigned int cpuid) | |
462 | { | |
463 | struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; | |
464 | int cpu; | |
465 | ||
466 | /* update core and thread sibling masks */ | |
467 | for_each_online_cpu(cpu) { | |
468 | cpu_topo = &cpu_topology[cpu]; | |
469 | ||
470 | if (cpuid_topo->llc_id == cpu_topo->llc_id) { | |
471 | cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); | |
472 | cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); | |
473 | } | |
474 | ||
475 | if (cpuid_topo->package_id != cpu_topo->package_id) | |
476 | continue; | |
477 | ||
478 | cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); | |
479 | cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); | |
480 | ||
481 | if (cpuid_topo->core_id != cpu_topo->core_id) | |
482 | continue; | |
483 | ||
484 | cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); | |
485 | cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); | |
486 | } | |
487 | } | |
488 | ||
489 | static void clear_cpu_topology(int cpu) | |
490 | { | |
491 | struct cpu_topology *cpu_topo = &cpu_topology[cpu]; | |
492 | ||
493 | cpumask_clear(&cpu_topo->llc_sibling); | |
494 | cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); | |
495 | ||
496 | cpumask_clear(&cpu_topo->core_sibling); | |
497 | cpumask_set_cpu(cpu, &cpu_topo->core_sibling); | |
498 | cpumask_clear(&cpu_topo->thread_sibling); | |
499 | cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); | |
500 | } | |
501 | ||
ca74b316 | 502 | void __init reset_cpu_topology(void) |
60c1b220 AP |
503 | { |
504 | unsigned int cpu; | |
505 | ||
506 | for_each_possible_cpu(cpu) { | |
507 | struct cpu_topology *cpu_topo = &cpu_topology[cpu]; | |
508 | ||
509 | cpu_topo->thread_id = -1; | |
510 | cpu_topo->core_id = -1; | |
511 | cpu_topo->package_id = -1; | |
512 | cpu_topo->llc_id = -1; | |
513 | ||
514 | clear_cpu_topology(cpu); | |
515 | } | |
516 | } | |
517 | ||
518 | void remove_cpu_topology(unsigned int cpu) | |
519 | { | |
520 | int sibling; | |
521 | ||
522 | for_each_cpu(sibling, topology_core_cpumask(cpu)) | |
523 | cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); | |
524 | for_each_cpu(sibling, topology_sibling_cpumask(cpu)) | |
525 | cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); | |
526 | for_each_cpu(sibling, topology_llc_cpumask(cpu)) | |
527 | cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); | |
528 | ||
529 | clear_cpu_topology(cpu); | |
530 | } | |
531 | ||
532 | __weak int __init parse_acpi_topology(void) | |
533 | { | |
534 | return 0; | |
535 | } | |
536 | ||
ca74b316 | 537 | #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) |
60c1b220 AP |
538 | void __init init_cpu_topology(void) |
539 | { | |
540 | reset_cpu_topology(); | |
541 | ||
542 | /* | |
543 | * Discard anything that was parsed if we hit an error so we | |
544 | * don't use partial information. | |
545 | */ | |
546 | if (parse_acpi_topology()) | |
547 | reset_cpu_topology(); | |
548 | else if (of_have_populated_dt() && parse_dt_topology()) | |
549 | reset_cpu_topology(); | |
550 | } | |
551 | #endif |