[mirror_ubuntu-jammy-kernel.git] / drivers / base / arch_topology.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Arch specific cpu topology information
 *
 * Copyright (C) 2016, ARM Ltd.
 * Written by: Juri Lelli, ARM Ltd.
 */

#include <linux/acpi.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/device.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/sched/topology.h>
#include <linux/cpuset.h>
#include <linux/cpumask.h>
#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/smp.h>

DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;

void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
			 unsigned long max_freq)
{
	unsigned long scale;
	int i;

	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;

	for_each_cpu(i, cpus)
		per_cpu(freq_scale, i) = scale;
}

DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;

void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
{
	per_cpu(cpu_scale, cpu) = capacity;
}

static ssize_t cpu_capacity_show(struct device *dev,
				 struct device_attribute *attr,
				 char *buf)
{
	struct cpu *cpu = container_of(dev, struct cpu, dev);

	return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
}

static void update_topology_flags_workfn(struct work_struct *work);
static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);

static DEVICE_ATTR_RO(cpu_capacity);

static int register_cpu_capacity_sysctl(void)
{
	int i;
	struct device *cpu;

	for_each_possible_cpu(i) {
		cpu = get_cpu_device(i);
		if (!cpu) {
			pr_err("%s: too early to get CPU%d device!\n",
			       __func__, i);
			continue;
		}
		device_create_file(cpu, &dev_attr_cpu_capacity);
	}

	return 0;
}
subsys_initcall(register_cpu_capacity_sysctl);

static int update_topology;

int topology_update_cpu_topology(void)
{
	return update_topology;
}

/*
 * Updating the sched_domains can't be done directly from cpufreq callbacks
 * due to locking, so queue the work for later.
 */
static void update_topology_flags_workfn(struct work_struct *work)
{
	update_topology = 1;
	rebuild_sched_domains();
	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
	update_topology = 0;
}

static u32 capacity_scale;
static u32 *raw_capacity;

static int free_raw_capacity(void)
{
	kfree(raw_capacity);
	raw_capacity = NULL;

	return 0;
}

void topology_normalize_cpu_scale(void)
{
	u64 capacity;
	int cpu;

	if (!raw_capacity)
		return;

	pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
	for_each_possible_cpu(cpu) {
		pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
			 cpu, raw_capacity[cpu]);
		capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
			/ capacity_scale;
		topology_set_cpu_scale(cpu, capacity);
		pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
			cpu, topology_get_cpu_scale(cpu));
	}
}

bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
{
	static bool cap_parsing_failed;
	int ret;
	u32 cpu_capacity;

	if (cap_parsing_failed)
		return false;

	ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
				   &cpu_capacity);
	if (!ret) {
		if (!raw_capacity) {
			raw_capacity = kcalloc(num_possible_cpus(),
					       sizeof(*raw_capacity),
					       GFP_KERNEL);
			if (!raw_capacity) {
				cap_parsing_failed = true;
				return false;
			}
		}
		capacity_scale = max(cpu_capacity, capacity_scale);
		raw_capacity[cpu] = cpu_capacity;
		pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
			cpu_node, raw_capacity[cpu]);
	} else {
		if (raw_capacity) {
			pr_err("cpu_capacity: missing %pOF raw capacity\n",
				cpu_node);
			pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
		}
		cap_parsing_failed = true;
		free_raw_capacity();
	}

	return !ret;
}

#ifdef CONFIG_CPU_FREQ
static cpumask_var_t cpus_to_visit;
static void parsing_done_workfn(struct work_struct *work);
static DECLARE_WORK(parsing_done_work, parsing_done_workfn);

static int
init_cpu_capacity_callback(struct notifier_block *nb,
			   unsigned long val,
			   void *data)
{
	struct cpufreq_policy *policy = data;
	int cpu;

	if (!raw_capacity)
		return 0;

	if (val != CPUFREQ_CREATE_POLICY)
		return 0;

	pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
		 cpumask_pr_args(policy->related_cpus),
		 cpumask_pr_args(cpus_to_visit));

	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);

	for_each_cpu(cpu, policy->related_cpus) {
		raw_capacity[cpu] = topology_get_cpu_scale(cpu) *
				    policy->cpuinfo.max_freq / 1000UL;
		capacity_scale = max(raw_capacity[cpu], capacity_scale);
	}

	if (cpumask_empty(cpus_to_visit)) {
		topology_normalize_cpu_scale();
		schedule_work(&update_topology_flags_work);
		free_raw_capacity();
		pr_debug("cpu_capacity: parsing done\n");
		schedule_work(&parsing_done_work);
	}

	return 0;
}

static struct notifier_block init_cpu_capacity_notifier = {
	.notifier_call = init_cpu_capacity_callback,
};

static int __init register_cpufreq_notifier(void)
{
	int ret;

	/*
	 * on ACPI-based systems we need to use the default cpu capacity
	 * until we have the necessary code to parse the cpu capacity, so
	 * skip registering cpufreq notifier.
	 */
	if (!acpi_disabled || !raw_capacity)
		return -EINVAL;

	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
		return -ENOMEM;

	cpumask_copy(cpus_to_visit, cpu_possible_mask);

	ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
					CPUFREQ_POLICY_NOTIFIER);

	if (ret)
		free_cpumask_var(cpus_to_visit);

	return ret;
}
core_initcall(register_cpufreq_notifier);

static void parsing_done_workfn(struct work_struct *work)
{
	cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
					 CPUFREQ_POLICY_NOTIFIER);
	free_cpumask_var(cpus_to_visit);
}

#else
core_initcall(free_raw_capacity);
#endif

#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
/*
 * This function returns the logic cpu number of the node.
 * There are basically three kinds of return values:
 * (1) logic cpu number which is > 0.
 * (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
 * there is no possible logical CPU in the kernel to match. This happens
 * when CONFIG_NR_CPUS is configure to be smaller than the number of
 * CPU nodes in DT. We need to just ignore this case.
 * (3) -1 if the node does not exist in the device tree
 */
static int __init get_cpu_for_node(struct device_node *node)
{
	struct device_node *cpu_node;
	int cpu;

	cpu_node = of_parse_phandle(node, "cpu", 0);
	if (!cpu_node)
		return -1;

	cpu = of_cpu_node_to_id(cpu_node);
	if (cpu >= 0)
		topology_parse_cpu_capacity(cpu_node, cpu);
	else
		pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
			cpu_node, cpumask_pr_args(cpu_possible_mask));

	of_node_put(cpu_node);
	return cpu;
}

static int __init parse_core(struct device_node *core, int package_id,
			     int core_id)
{
	char name[10];
	bool leaf = true;
	int i = 0;
	int cpu;
	struct device_node *t;

	do {
		snprintf(name, sizeof(name), "thread%d", i);
		t = of_get_child_by_name(core, name);
		if (t) {
			leaf = false;
			cpu = get_cpu_for_node(t);
			if (cpu >= 0) {
				cpu_topology[cpu].package_id = package_id;
				cpu_topology[cpu].core_id = core_id;
				cpu_topology[cpu].thread_id = i;
			} else if (cpu != -ENODEV) {
				pr_err("%pOF: Can't get CPU for thread\n", t);
				of_node_put(t);
				return -EINVAL;
			}
			of_node_put(t);
		}
		i++;
	} while (t);

	cpu = get_cpu_for_node(core);
	if (cpu >= 0) {
		if (!leaf) {
			pr_err("%pOF: Core has both threads and CPU\n",
			       core);
			return -EINVAL;
		}

		cpu_topology[cpu].package_id = package_id;
		cpu_topology[cpu].core_id = core_id;
	} else if (leaf && cpu != -ENODEV) {
		pr_err("%pOF: Can't get CPU for leaf core\n", core);
		return -EINVAL;
	}

	return 0;
}

static int __init parse_cluster(struct device_node *cluster, int depth)
{
	char name[10];
	bool leaf = true;
	bool has_cores = false;
	struct device_node *c;
	static int package_id __initdata;
	int core_id = 0;
	int i, ret;

	/*
	 * First check for child clusters; we currently ignore any
	 * information about the nesting of clusters and present the
	 * scheduler with a flat list of them.
	 */
	i = 0;
	do {
		snprintf(name, sizeof(name), "cluster%d", i);
		c = of_get_child_by_name(cluster, name);
		if (c) {
			leaf = false;
			ret = parse_cluster(c, depth + 1);
			of_node_put(c);
			if (ret != 0)
				return ret;
		}
		i++;
	} while (c);

	/* Now check for cores */
	i = 0;
	do {
		snprintf(name, sizeof(name), "core%d", i);
		c = of_get_child_by_name(cluster, name);
		if (c) {
			has_cores = true;

			if (depth == 0) {
				pr_err("%pOF: cpu-map children should be clusters\n",
				       c);
				of_node_put(c);
				return -EINVAL;
			}

			if (leaf) {
				ret = parse_core(c, package_id, core_id++);
			} else {
				pr_err("%pOF: Non-leaf cluster with core %s\n",
				       cluster, name);
				ret = -EINVAL;
			}

			of_node_put(c);
			if (ret != 0)
				return ret;
		}
		i++;
	} while (c);

	if (leaf && !has_cores)
		pr_warn("%pOF: empty cluster\n", cluster);

	if (leaf)
		package_id++;

	return 0;
}

static int __init parse_dt_topology(void)
{
	struct device_node *cn, *map;
	int ret = 0;
	int cpu;

	cn = of_find_node_by_path("/cpus");
	if (!cn) {
		pr_err("No CPU information found in DT\n");
		return 0;
	}

	/*
	 * When topology is provided cpu-map is essentially a root
	 * cluster with restricted subnodes.
	 */
	map = of_get_child_by_name(cn, "cpu-map");
	if (!map)
		goto out;

	ret = parse_cluster(map, 0);
	if (ret != 0)
		goto out_map;

	topology_normalize_cpu_scale();

	/*
	 * Check that all cores are in the topology; the SMP code will
	 * only mark cores described in the DT as possible.
	 */
	for_each_possible_cpu(cpu)
		if (cpu_topology[cpu].package_id == -1)
			ret = -EINVAL;

out_map:
	of_node_put(map);
out:
	of_node_put(cn);
	return ret;
}
#endif

/*
 * cpu topology table
 */
struct cpu_topology cpu_topology[NR_CPUS];
EXPORT_SYMBOL_GPL(cpu_topology);

const struct cpumask *cpu_coregroup_mask(int cpu)
{
	const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));

	/* Find the smaller of NUMA, core or LLC siblings */
	if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
		/* not numa in package, lets use the package siblings */
		core_mask = &cpu_topology[cpu].core_sibling;
	}
	if (cpu_topology[cpu].llc_id != -1) {
		if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
			core_mask = &cpu_topology[cpu].llc_sibling;
	}

	return core_mask;
}

void update_siblings_masks(unsigned int cpuid)
{
	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
	int cpu;

	/* update core and thread sibling masks */
	for_each_online_cpu(cpu) {
		cpu_topo = &cpu_topology[cpu];

		if (cpuid_topo->llc_id == cpu_topo->llc_id) {
			cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
			cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
		}

		if (cpuid_topo->package_id != cpu_topo->package_id)
			continue;

		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);

		if (cpuid_topo->core_id != cpu_topo->core_id)
			continue;

		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
		cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
	}
}

static void clear_cpu_topology(int cpu)
{
	struct cpu_topology *cpu_topo = &cpu_topology[cpu];

	cpumask_clear(&cpu_topo->llc_sibling);
	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);

	cpumask_clear(&cpu_topo->core_sibling);
	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
	cpumask_clear(&cpu_topo->thread_sibling);
	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
}

void __init reset_cpu_topology(void)
{
	unsigned int cpu;

	for_each_possible_cpu(cpu) {
		struct cpu_topology *cpu_topo = &cpu_topology[cpu];

		cpu_topo->thread_id = -1;
		cpu_topo->core_id = -1;
		cpu_topo->package_id = -1;
		cpu_topo->llc_id = -1;

		clear_cpu_topology(cpu);
	}
}

void remove_cpu_topology(unsigned int cpu)
{
	int sibling;

	for_each_cpu(sibling, topology_core_cpumask(cpu))
		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
	for_each_cpu(sibling, topology_llc_cpumask(cpu))
		cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));

	clear_cpu_topology(cpu);
}

__weak int __init parse_acpi_topology(void)
{
	return 0;
}

#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
void __init init_cpu_topology(void)
{
	reset_cpu_topology();

	/*
	 * Discard anything that was parsed if we hit an error so we
	 * don't use partial information.
	 */
	if (parse_acpi_topology())
		reset_cpu_topology();
	else if (of_have_populated_dt() && parse_dt_topology())
		reset_cpu_topology();
}
#endif
Commit	Line	Data
6ee97d35	1	// SPDX-License-Identifier: GPL-2.0
2ef7a295 JL	2	/*
	3	* Arch specific cpu topology information
	4	*
	5	* Copyright (C) 2016, ARM Ltd.
	6	* Written by: Juri Lelli, ARM Ltd.
2ef7a295 JL	7	*/
	8
	9	#include <linux/acpi.h>
	10	#include <linux/cpu.h>
	11	#include <linux/cpufreq.h>
	12	#include <linux/device.h>
	13	#include <linux/of.h>
	14	#include <linux/slab.h>
	15	#include <linux/string.h>
	16	#include <linux/sched/topology.h>
bb1fbdd3	17	#include <linux/cpuset.h>
60c1b220 AP	18	#include <linux/cpumask.h>
	19	#include <linux/init.h>
	20	#include <linux/percpu.h>
	21	#include <linux/sched.h>
	22	#include <linux/smp.h>
2ef7a295	23
0e27c567	24	DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
2ef7a295	25
0e27c567 DE	26	void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
0e27c567 DE	27	unsigned long max_freq)
2ef7a295	28	{
0e27c567 DE	29	unsigned long scale;
	30	int i;
	31
	32	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
	33
	34	for_each_cpu(i, cpus)
	35	per_cpu(freq_scale, i) = scale;
2ef7a295 JL	36	}
2ef7a295 JL	37
8216f588	38	DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
2ef7a295	39
4ca4f26a	40	void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
2ef7a295 JL	41	{
	42	per_cpu(cpu_scale, cpu) = capacity;
	43	}
	44
	45	static ssize_t cpu_capacity_show(struct device *dev,
	46	struct device_attribute *attr,
	47	char *buf)
	48	{
	49	struct cpu *cpu = container_of(dev, struct cpu, dev);
	50
8ec59c0f	51	return sprintf(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
2ef7a295 JL	52	}
2ef7a295 JL	53
bb1fbdd3 MR	54	static void update_topology_flags_workfn(struct work_struct *work);
	55	static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
	56
5d777b18	57	static DEVICE_ATTR_RO(cpu_capacity);
2ef7a295 JL	58
	59	static int register_cpu_capacity_sysctl(void)
	60	{
	61	int i;
	62	struct device *cpu;
	63
	64	for_each_possible_cpu(i) {
	65	cpu = get_cpu_device(i);
	66	if (!cpu) {
	67	pr_err("%s: too early to get CPU%d device!\n",
	68	__func__, i);
	69	continue;
	70	}
	71	device_create_file(cpu, &dev_attr_cpu_capacity);
	72	}
	73
	74	return 0;
	75	}
	76	subsys_initcall(register_cpu_capacity_sysctl);
	77
bb1fbdd3 MR	78	static int update_topology;
	79
	80	int topology_update_cpu_topology(void)
	81	{
	82	return update_topology;
	83	}
	84
	85	/*
	86	* Updating the sched_domains can't be done directly from cpufreq callbacks
	87	* due to locking, so queue the work for later.
	88	*/
	89	static void update_topology_flags_workfn(struct work_struct *work)
	90	{
	91	update_topology = 1;
	92	rebuild_sched_domains();
	93	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
	94	update_topology = 0;
	95	}
	96
2ef7a295 JL	97	static u32 capacity_scale;
2ef7a295 JL	98	static u32 *raw_capacity;
62de1161	99
82d8ba71	100	static int free_raw_capacity(void)
62de1161 VK	101	{
	102	kfree(raw_capacity);
	103	raw_capacity = NULL;
	104
	105	return 0;
	106	}
2ef7a295	107
4ca4f26a	108	void topology_normalize_cpu_scale(void)
2ef7a295 JL	109	{
	110	u64 capacity;
	111	int cpu;
	112
62de1161	113	if (!raw_capacity)
2ef7a295 JL	114	return;
	115
	116	pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
2ef7a295 JL	117	for_each_possible_cpu(cpu) {
	118	pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
	119	cpu, raw_capacity[cpu]);
	120	capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
	121	/ capacity_scale;
4ca4f26a	122	topology_set_cpu_scale(cpu, capacity);
2ef7a295	123	pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
8ec59c0f	124	cpu, topology_get_cpu_scale(cpu));
2ef7a295	125	}
2ef7a295 JL	126	}
2ef7a295 JL	127
805df296	128	bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
2ef7a295	129	{
62de1161	130	static bool cap_parsing_failed;
805df296	131	int ret;
2ef7a295 JL	132	u32 cpu_capacity;
	133
	134	if (cap_parsing_failed)
805df296	135	return false;
2ef7a295	136
3eeba1a2	137	ret = of_property_read_u32(cpu_node, "capacity-dmips-mhz",
2ef7a295 JL	138	&cpu_capacity);
	139	if (!ret) {
	140	if (!raw_capacity) {
	141	raw_capacity = kcalloc(num_possible_cpus(),
	142	sizeof(*raw_capacity),
	143	GFP_KERNEL);
	144	if (!raw_capacity) {
2ef7a295	145	cap_parsing_failed = true;
805df296	146	return false;
2ef7a295 JL	147	}
	148	}
	149	capacity_scale = max(cpu_capacity, capacity_scale);
	150	raw_capacity[cpu] = cpu_capacity;
6ef2541f RH	151	pr_debug("cpu_capacity: %pOF cpu_capacity=%u (raw)\n",
6ef2541f RH	152	cpu_node, raw_capacity[cpu]);
2ef7a295 JL	153	} else {
2ef7a295 JL	154	if (raw_capacity) {
6ef2541f RH	155	pr_err("cpu_capacity: missing %pOF raw capacity\n",
6ef2541f RH	156	cpu_node);
2ef7a295 JL	157	pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
	158	}
	159	cap_parsing_failed = true;
62de1161	160	free_raw_capacity();
2ef7a295 JL	161	}
	162
	163	return !ret;
	164	}
	165
	166	#ifdef CONFIG_CPU_FREQ
9de9a449 GI	167	static cpumask_var_t cpus_to_visit;
	168	static void parsing_done_workfn(struct work_struct *work);
	169	static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
2ef7a295	170
9de9a449	171	static int
2ef7a295 JL	172	init_cpu_capacity_callback(struct notifier_block *nb,
	173	unsigned long val,
	174	void *data)
	175	{
	176	struct cpufreq_policy *policy = data;
	177	int cpu;
	178
d8bcf4db	179	if (!raw_capacity)
2ef7a295 JL	180	return 0;
2ef7a295 JL	181
40f0fc2a	182	if (val != CPUFREQ_CREATE_POLICY)
93a57081 VK	183	return 0;
	184
	185	pr_debug("cpu_capacity: init cpu capacity for CPUs [%pbl] (to_visit=%pbl)\n",
	186	cpumask_pr_args(policy->related_cpus),
	187	cpumask_pr_args(cpus_to_visit));
	188
	189	cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
	190
	191	for_each_cpu(cpu, policy->related_cpus) {
8ec59c0f	192	raw_capacity[cpu] = topology_get_cpu_scale(cpu) *
93a57081 VK	193	policy->cpuinfo.max_freq / 1000UL;
93a57081 VK	194	capacity_scale = max(raw_capacity[cpu], capacity_scale);
2ef7a295	195	}
93a57081 VK	196
	197	if (cpumask_empty(cpus_to_visit)) {
	198	topology_normalize_cpu_scale();
bb1fbdd3	199	schedule_work(&update_topology_flags_work);
62de1161	200	free_raw_capacity();
93a57081	201	pr_debug("cpu_capacity: parsing done\n");
93a57081 VK	202	schedule_work(&parsing_done_work);
	203	}
	204
2ef7a295 JL	205	return 0;
	206	}
	207
9de9a449	208	static struct notifier_block init_cpu_capacity_notifier = {
2ef7a295 JL	209	.notifier_call = init_cpu_capacity_callback,
	210	};
	211
	212	static int __init register_cpufreq_notifier(void)
	213	{
5408211a DE	214	int ret;
5408211a DE	215
2ef7a295 JL	216	/*
	217	* on ACPI-based systems we need to use the default cpu capacity
	218	* until we have the necessary code to parse the cpu capacity, so
	219	* skip registering cpufreq notifier.
	220	*/
c105aa31	221	if (!acpi_disabled \|\| !raw_capacity)
2ef7a295 JL	222	return -EINVAL;
2ef7a295 JL	223
0fd33116	224	if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL))
2ef7a295	225	return -ENOMEM;
2ef7a295 JL	226
	227	cpumask_copy(cpus_to_visit, cpu_possible_mask);
	228
5408211a DE	229	ret = cpufreq_register_notifier(&init_cpu_capacity_notifier,
	230	CPUFREQ_POLICY_NOTIFIER);
	231
	232	if (ret)
	233	free_cpumask_var(cpus_to_visit);
	234
	235	return ret;
2ef7a295 JL	236	}
	237	core_initcall(register_cpufreq_notifier);
	238
9de9a449	239	static void parsing_done_workfn(struct work_struct *work)
2ef7a295 JL	240	{
	241	cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
	242	CPUFREQ_POLICY_NOTIFIER);
5408211a	243	free_cpumask_var(cpus_to_visit);
2ef7a295 JL	244	}
	245
	246	#else
2ef7a295 JL	247	core_initcall(free_raw_capacity);
2ef7a295 JL	248	#endif
60c1b220 AP	249
60c1b220 AP	250	#if defined(CONFIG_ARM64) \|\| defined(CONFIG_RISCV)
f3c19481 ZT	251	/*
	252	* This function returns the logic cpu number of the node.
	253	* There are basically three kinds of return values:
	254	* (1) logic cpu number which is > 0.
	255	* (2) -ENODEV when the device tree(DT) node is valid and found in the DT but
	256	* there is no possible logical CPU in the kernel to match. This happens
	257	* when CONFIG_NR_CPUS is configure to be smaller than the number of
	258	* CPU nodes in DT. We need to just ignore this case.
	259	* (3) -1 if the node does not exist in the device tree
	260	*/
60c1b220 AP	261	static int __init get_cpu_for_node(struct device_node *node)
	262	{
	263	struct device_node *cpu_node;
	264	int cpu;
	265
	266	cpu_node = of_parse_phandle(node, "cpu", 0);
	267	if (!cpu_node)
	268	return -1;
	269
	270	cpu = of_cpu_node_to_id(cpu_node);
	271	if (cpu >= 0)
	272	topology_parse_cpu_capacity(cpu_node, cpu);
	273	else
f3c19481 ZT	274	pr_info("CPU node for %pOF exist but the possible cpu range is :%*pbl\n",
f3c19481 ZT	275	cpu_node, cpumask_pr_args(cpu_possible_mask));
60c1b220 AP	276
	277	of_node_put(cpu_node);
	278	return cpu;
	279	}
	280
	281	static int __init parse_core(struct device_node *core, int package_id,
	282	int core_id)
	283	{
	284	char name[10];
	285	bool leaf = true;
	286	int i = 0;
	287	int cpu;
	288	struct device_node *t;
	289
	290	do {
	291	snprintf(name, sizeof(name), "thread%d", i);
	292	t = of_get_child_by_name(core, name);
	293	if (t) {
	294	leaf = false;
	295	cpu = get_cpu_for_node(t);
	296	if (cpu >= 0) {
	297	cpu_topology[cpu].package_id = package_id;
	298	cpu_topology[cpu].core_id = core_id;
	299	cpu_topology[cpu].thread_id = i;
f3c19481 ZT	300	} else if (cpu != -ENODEV) {
f3c19481 ZT	301	pr_err("%pOF: Can't get CPU for thread\n", t);
60c1b220 AP	302	of_node_put(t);
	303	return -EINVAL;
	304	}
	305	of_node_put(t);
	306	}
	307	i++;
	308	} while (t);
	309
	310	cpu = get_cpu_for_node(core);
	311	if (cpu >= 0) {
	312	if (!leaf) {
	313	pr_err("%pOF: Core has both threads and CPU\n",
	314	core);
	315	return -EINVAL;
	316	}
	317
	318	cpu_topology[cpu].package_id = package_id;
	319	cpu_topology[cpu].core_id = core_id;
f3c19481	320	} else if (leaf && cpu != -ENODEV) {
60c1b220 AP	321	pr_err("%pOF: Can't get CPU for leaf core\n", core);
	322	return -EINVAL;
	323	}
	324
	325	return 0;
	326	}
	327
	328	static int __init parse_cluster(struct device_node *cluster, int depth)
	329	{
	330	char name[10];
	331	bool leaf = true;
	332	bool has_cores = false;
	333	struct device_node *c;
	334	static int package_id __initdata;
	335	int core_id = 0;
	336	int i, ret;
	337
	338	/*
	339	* First check for child clusters; we currently ignore any
	340	* information about the nesting of clusters and present the
	341	* scheduler with a flat list of them.
	342	*/
	343	i = 0;
	344	do {
	345	snprintf(name, sizeof(name), "cluster%d", i);
	346	c = of_get_child_by_name(cluster, name);
	347	if (c) {
	348	leaf = false;
	349	ret = parse_cluster(c, depth + 1);
	350	of_node_put(c);
	351	if (ret != 0)
	352	return ret;
	353	}
	354	i++;
	355	} while (c);
	356
	357	/* Now check for cores */
	358	i = 0;
	359	do {
	360	snprintf(name, sizeof(name), "core%d", i);
	361	c = of_get_child_by_name(cluster, name);
	362	if (c) {
	363	has_cores = true;
	364
	365	if (depth == 0) {
	366	pr_err("%pOF: cpu-map children should be clusters\n",
	367	c);
	368	of_node_put(c);
	369	return -EINVAL;
	370	}
	371
	372	if (leaf) {
	373	ret = parse_core(c, package_id, core_id++);
	374	} else {
	375	pr_err("%pOF: Non-leaf cluster with core %s\n",
	376	cluster, name);
	377	ret = -EINVAL;
	378	}
	379
	380	of_node_put(c);
	381	if (ret != 0)
	382	return ret;
	383	}
	384	i++;
385	} while (c);
386
387	if (leaf && !has_cores)
388	pr_warn("%pOF: empty cluster\n", cluster);
389
390	if (leaf)
391	package_id++;
392
393	return 0;
394	}
395
396	static int __init parse_dt_topology(void)
397	{
398	struct device_node cn, map;
399	int ret = 0;
400	int cpu;
401
402	cn = of_find_node_by_path("/cpus");
403	if (!cn) {
404	pr_err("No CPU information found in DT\n");
405	return 0;
406	}
407
408	/*
409	* When topology is provided cpu-map is essentially a root
410	* cluster with restricted subnodes.
411	*/
412	map = of_get_child_by_name(cn, "cpu-map");
413	if (!map)
414	goto out;
415
416	ret = parse_cluster(map, 0);
417	if (ret != 0)
418	goto out_map;
419
420	topology_normalize_cpu_scale();
421
422	/*
423	* Check that all cores are in the topology; the SMP code will
424	* only mark cores described in the DT as possible.
425	*/
426	for_each_possible_cpu(cpu)
427	if (cpu_topology[cpu].package_id == -1)
428	ret = -EINVAL;
429
430	out_map:
431	of_node_put(map);
432	out:
433	of_node_put(cn);
434	return ret;
435	}
ca74b316	436	#endif
60c1b220 AP	437
	438	/*
	439	* cpu topology table
	440	*/
	441	struct cpu_topology cpu_topology[NR_CPUS];
	442	EXPORT_SYMBOL_GPL(cpu_topology);
	443
	444	const struct cpumask *cpu_coregroup_mask(int cpu)
	445	{
	446	const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
	447
	448	/* Find the smaller of NUMA, core or LLC siblings */
	449	if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
	450	/* not numa in package, lets use the package siblings */
	451	core_mask = &cpu_topology[cpu].core_sibling;
	452	}
	453	if (cpu_topology[cpu].llc_id != -1) {
	454	if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
	455	core_mask = &cpu_topology[cpu].llc_sibling;
	456	}
	457
	458	return core_mask;
	459	}
	460
	461	void update_siblings_masks(unsigned int cpuid)
	462	{
	463	struct cpu_topology cpu_topo, cpuid_topo = &cpu_topology[cpuid];
	464	int cpu;
	465
	466	/* update core and thread sibling masks */
	467	for_each_online_cpu(cpu) {
	468	cpu_topo = &cpu_topology[cpu];
	469
	470	if (cpuid_topo->llc_id == cpu_topo->llc_id) {
	471	cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
	472	cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
	473	}
	474
	475	if (cpuid_topo->package_id != cpu_topo->package_id)
	476	continue;
	477
	478	cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
	479	cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
	480
	481	if (cpuid_topo->core_id != cpu_topo->core_id)
	482	continue;
	483
	484	cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
	485	cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
	486	}
	487	}
	488
	489	static void clear_cpu_topology(int cpu)
	490	{
	491	struct cpu_topology *cpu_topo = &cpu_topology[cpu];
	492
	493	cpumask_clear(&cpu_topo->llc_sibling);
	494	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
	495
	496	cpumask_clear(&cpu_topo->core_sibling);
	497	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
	498	cpumask_clear(&cpu_topo->thread_sibling);
	499	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
	500	}
501
ca74b316	502	void __init reset_cpu_topology(void)
60c1b220 AP	503	{
	504	unsigned int cpu;
	505
	506	for_each_possible_cpu(cpu) {
	507	struct cpu_topology *cpu_topo = &cpu_topology[cpu];
	508
	509	cpu_topo->thread_id = -1;
	510	cpu_topo->core_id = -1;
	511	cpu_topo->package_id = -1;
	512	cpu_topo->llc_id = -1;
	513
	514	clear_cpu_topology(cpu);
	515	}
	516	}
	517
	518	void remove_cpu_topology(unsigned int cpu)
	519	{
	520	int sibling;
	521
	522	for_each_cpu(sibling, topology_core_cpumask(cpu))
	523	cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
	524	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
	525	cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
	526	for_each_cpu(sibling, topology_llc_cpumask(cpu))
	527	cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
	528
	529	clear_cpu_topology(cpu);
	530	}
	531
	532	__weak int __init parse_acpi_topology(void)
	533	{
	534	return 0;
	535	}
	536
ca74b316	537	#if defined(CONFIG_ARM64) \|\| defined(CONFIG_RISCV)
60c1b220 AP	538	void __init init_cpu_topology(void)
	539	{
	540	reset_cpu_topology();
	541
	542	/*
	543	* Discard anything that was parsed if we hit an error so we
	544	* don't use partial information.
	545	*/
	546	if (parse_acpi_topology())
	547	reset_cpu_topology();
	548	else if (of_have_populated_dt() && parse_dt_topology())
	549	reset_cpu_topology();
	550	}
	551	#endif