]>
Commit | Line | Data |
---|---|---|
1 | // SPDX-License-Identifier: GPL-2.0+ | |
2 | /* | |
3 | * Hygon Processor Support for Linux | |
4 | * | |
5 | * Copyright (C) 2018 Chengdu Haiguang IC Design Co., Ltd. | |
6 | * | |
7 | * Author: Pu Wen <puwen@hygon.cn> | |
8 | */ | |
9 | #include <linux/io.h> | |
10 | ||
11 | #include <asm/cpu.h> | |
12 | #include <asm/smp.h> | |
13 | #include <asm/numa.h> | |
14 | #include <asm/cacheinfo.h> | |
15 | #include <asm/spec-ctrl.h> | |
16 | #include <asm/delay.h> | |
17 | ||
18 | #include "cpu.h" | |
19 | ||
20 | #define APICID_SOCKET_ID_BIT 6 | |
21 | ||
22 | /* | |
23 | * nodes_per_socket: Stores the number of nodes per socket. | |
24 | * Refer to CPUID Fn8000_001E_ECX Node Identifiers[10:8] | |
25 | */ | |
26 | static u32 nodes_per_socket = 1; | |
27 | ||
28 | #ifdef CONFIG_NUMA | |
29 | /* | |
30 | * To workaround broken NUMA config. Read the comment in | |
31 | * srat_detect_node(). | |
32 | */ | |
33 | static int nearby_node(int apicid) | |
34 | { | |
35 | int i, node; | |
36 | ||
37 | for (i = apicid - 1; i >= 0; i--) { | |
38 | node = __apicid_to_node[i]; | |
39 | if (node != NUMA_NO_NODE && node_online(node)) | |
40 | return node; | |
41 | } | |
42 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | |
43 | node = __apicid_to_node[i]; | |
44 | if (node != NUMA_NO_NODE && node_online(node)) | |
45 | return node; | |
46 | } | |
47 | return first_node(node_online_map); /* Shouldn't happen */ | |
48 | } | |
49 | #endif | |
50 | ||
51 | static void hygon_get_topology_early(struct cpuinfo_x86 *c) | |
52 | { | |
53 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) | |
54 | smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; | |
55 | } | |
56 | ||
57 | /* | |
58 | * Fixup core topology information for | |
59 | * (1) Hygon multi-node processors | |
60 | * Assumption: Number of cores in each internal node is the same. | |
61 | * (2) Hygon processors supporting compute units | |
62 | */ | |
63 | static void hygon_get_topology(struct cpuinfo_x86 *c) | |
64 | { | |
65 | int cpu = smp_processor_id(); | |
66 | ||
67 | /* get information required for multi-node processors */ | |
68 | if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { | |
69 | int err; | |
70 | u32 eax, ebx, ecx, edx; | |
71 | ||
72 | cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); | |
73 | ||
74 | c->cpu_die_id = ecx & 0xff; | |
75 | ||
76 | c->cpu_core_id = ebx & 0xff; | |
77 | ||
78 | if (smp_num_siblings > 1) | |
79 | c->x86_max_cores /= smp_num_siblings; | |
80 | ||
81 | /* | |
82 | * In case leaf B is available, use it to derive | |
83 | * topology information. | |
84 | */ | |
85 | err = detect_extended_topology(c); | |
86 | if (!err) | |
87 | c->x86_coreid_bits = get_count_order(c->x86_max_cores); | |
88 | ||
89 | /* Socket ID is ApicId[6] for these processors. */ | |
90 | c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; | |
91 | ||
92 | cacheinfo_hygon_init_llc_id(c, cpu); | |
93 | } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { | |
94 | u64 value; | |
95 | ||
96 | rdmsrl(MSR_FAM10H_NODE_ID, value); | |
97 | c->cpu_die_id = value & 7; | |
98 | ||
99 | per_cpu(cpu_llc_id, cpu) = c->cpu_die_id; | |
100 | } else | |
101 | return; | |
102 | ||
103 | if (nodes_per_socket > 1) | |
104 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); | |
105 | } | |
106 | ||
107 | /* | |
108 | * On Hygon setup the lower bits of the APIC id distinguish the cores. | |
109 | * Assumes number of cores is a power of two. | |
110 | */ | |
111 | static void hygon_detect_cmp(struct cpuinfo_x86 *c) | |
112 | { | |
113 | unsigned int bits; | |
114 | int cpu = smp_processor_id(); | |
115 | ||
116 | bits = c->x86_coreid_bits; | |
117 | /* Low order bits define the core id (index of core in socket) */ | |
118 | c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); | |
119 | /* Convert the initial APIC ID into the socket ID */ | |
120 | c->phys_proc_id = c->initial_apicid >> bits; | |
121 | /* use socket ID also for last level cache */ | |
122 | per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id; | |
123 | } | |
124 | ||
125 | static void srat_detect_node(struct cpuinfo_x86 *c) | |
126 | { | |
127 | #ifdef CONFIG_NUMA | |
128 | int cpu = smp_processor_id(); | |
129 | int node; | |
130 | unsigned int apicid = c->apicid; | |
131 | ||
132 | node = numa_cpu_node(cpu); | |
133 | if (node == NUMA_NO_NODE) | |
134 | node = per_cpu(cpu_llc_id, cpu); | |
135 | ||
136 | /* | |
137 | * On multi-fabric platform (e.g. Numascale NumaChip) a | |
138 | * platform-specific handler needs to be called to fixup some | |
139 | * IDs of the CPU. | |
140 | */ | |
141 | if (x86_cpuinit.fixup_cpu_id) | |
142 | x86_cpuinit.fixup_cpu_id(c, node); | |
143 | ||
144 | if (!node_online(node)) { | |
145 | /* | |
146 | * Two possibilities here: | |
147 | * | |
148 | * - The CPU is missing memory and no node was created. In | |
149 | * that case try picking one from a nearby CPU. | |
150 | * | |
151 | * - The APIC IDs differ from the HyperTransport node IDs. | |
152 | * Assume they are all increased by a constant offset, but | |
153 | * in the same order as the HT nodeids. If that doesn't | |
154 | * result in a usable node fall back to the path for the | |
155 | * previous case. | |
156 | * | |
157 | * This workaround operates directly on the mapping between | |
158 | * APIC ID and NUMA node, assuming certain relationship | |
159 | * between APIC ID, HT node ID and NUMA topology. As going | |
160 | * through CPU mapping may alter the outcome, directly | |
161 | * access __apicid_to_node[]. | |
162 | */ | |
163 | int ht_nodeid = c->initial_apicid; | |
164 | ||
165 | if (__apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | |
166 | node = __apicid_to_node[ht_nodeid]; | |
167 | /* Pick a nearby node */ | |
168 | if (!node_online(node)) | |
169 | node = nearby_node(apicid); | |
170 | } | |
171 | numa_set_node(cpu, node); | |
172 | #endif | |
173 | } | |
174 | ||
175 | static void early_init_hygon_mc(struct cpuinfo_x86 *c) | |
176 | { | |
177 | #ifdef CONFIG_SMP | |
178 | unsigned int bits, ecx; | |
179 | ||
180 | /* Multi core CPU? */ | |
181 | if (c->extended_cpuid_level < 0x80000008) | |
182 | return; | |
183 | ||
184 | ecx = cpuid_ecx(0x80000008); | |
185 | ||
186 | c->x86_max_cores = (ecx & 0xff) + 1; | |
187 | ||
188 | /* CPU telling us the core id bits shift? */ | |
189 | bits = (ecx >> 12) & 0xF; | |
190 | ||
191 | /* Otherwise recompute */ | |
192 | if (bits == 0) { | |
193 | while ((1 << bits) < c->x86_max_cores) | |
194 | bits++; | |
195 | } | |
196 | ||
197 | c->x86_coreid_bits = bits; | |
198 | #endif | |
199 | } | |
200 | ||
201 | static void bsp_init_hygon(struct cpuinfo_x86 *c) | |
202 | { | |
203 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { | |
204 | u64 val; | |
205 | ||
206 | rdmsrl(MSR_K7_HWCR, val); | |
207 | if (!(val & BIT(24))) | |
208 | pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n"); | |
209 | } | |
210 | ||
211 | if (cpu_has(c, X86_FEATURE_MWAITX)) | |
212 | use_mwaitx_delay(); | |
213 | ||
214 | if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { | |
215 | u32 ecx; | |
216 | ||
217 | ecx = cpuid_ecx(0x8000001e); | |
218 | __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; | |
219 | } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { | |
220 | u64 value; | |
221 | ||
222 | rdmsrl(MSR_FAM10H_NODE_ID, value); | |
223 | __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; | |
224 | } | |
225 | ||
226 | if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && | |
227 | !boot_cpu_has(X86_FEATURE_VIRT_SSBD)) { | |
228 | /* | |
229 | * Try to cache the base value so further operations can | |
230 | * avoid RMW. If that faults, do not enable SSBD. | |
231 | */ | |
232 | if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { | |
233 | setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD); | |
234 | setup_force_cpu_cap(X86_FEATURE_SSBD); | |
235 | x86_amd_ls_cfg_ssbd_mask = 1ULL << 10; | |
236 | } | |
237 | } | |
238 | } | |
239 | ||
240 | static void early_init_hygon(struct cpuinfo_x86 *c) | |
241 | { | |
242 | u32 dummy; | |
243 | ||
244 | early_init_hygon_mc(c); | |
245 | ||
246 | set_cpu_cap(c, X86_FEATURE_K8); | |
247 | ||
248 | rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); | |
249 | ||
250 | /* | |
251 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate | |
252 | * with P/T states and does not stop in deep C-states | |
253 | */ | |
254 | if (c->x86_power & (1 << 8)) { | |
255 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | |
256 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | |
257 | } | |
258 | ||
259 | /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ | |
260 | if (c->x86_power & BIT(12)) | |
261 | set_cpu_cap(c, X86_FEATURE_ACC_POWER); | |
262 | ||
263 | /* Bit 14 indicates the Runtime Average Power Limit interface. */ | |
264 | if (c->x86_power & BIT(14)) | |
265 | set_cpu_cap(c, X86_FEATURE_RAPL); | |
266 | ||
267 | #ifdef CONFIG_X86_64 | |
268 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | |
269 | #endif | |
270 | ||
271 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) | |
272 | /* | |
273 | * ApicID can always be treated as an 8-bit value for Hygon APIC So, we | |
274 | * can safely set X86_FEATURE_EXTD_APICID unconditionally. | |
275 | */ | |
276 | if (boot_cpu_has(X86_FEATURE_APIC)) | |
277 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); | |
278 | #endif | |
279 | ||
280 | /* | |
281 | * This is only needed to tell the kernel whether to use VMCALL | |
282 | * and VMMCALL. VMMCALL is never executed except under virt, so | |
283 | * we can set it unconditionally. | |
284 | */ | |
285 | set_cpu_cap(c, X86_FEATURE_VMMCALL); | |
286 | ||
287 | hygon_get_topology_early(c); | |
288 | } | |
289 | ||
290 | static void init_hygon(struct cpuinfo_x86 *c) | |
291 | { | |
292 | early_init_hygon(c); | |
293 | ||
294 | /* | |
295 | * Bit 31 in normal CPUID used for nonstandard 3DNow ID; | |
296 | * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway | |
297 | */ | |
298 | clear_cpu_cap(c, 0*32+31); | |
299 | ||
300 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | |
301 | ||
302 | /* get apicid instead of initial apic id from cpuid */ | |
303 | c->apicid = hard_smp_processor_id(); | |
304 | ||
305 | set_cpu_cap(c, X86_FEATURE_ZEN); | |
306 | set_cpu_cap(c, X86_FEATURE_CPB); | |
307 | ||
308 | cpu_detect_cache_sizes(c); | |
309 | ||
310 | hygon_detect_cmp(c); | |
311 | hygon_get_topology(c); | |
312 | srat_detect_node(c); | |
313 | ||
314 | init_hygon_cacheinfo(c); | |
315 | ||
316 | if (cpu_has(c, X86_FEATURE_XMM2)) { | |
317 | /* | |
318 | * Use LFENCE for execution serialization. On families which | |
319 | * don't have that MSR, LFENCE is already serializing. | |
320 | * msr_set_bit() uses the safe accessors, too, even if the MSR | |
321 | * is not present. | |
322 | */ | |
323 | msr_set_bit(MSR_F10H_DECFG, | |
324 | MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); | |
325 | ||
326 | /* A serializing LFENCE stops RDTSC speculation */ | |
327 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | |
328 | } | |
329 | ||
330 | /* | |
331 | * Hygon processors have APIC timer running in deep C states. | |
332 | */ | |
333 | set_cpu_cap(c, X86_FEATURE_ARAT); | |
334 | ||
335 | /* Hygon CPUs don't reset SS attributes on SYSRET, Xen does. */ | |
336 | if (!cpu_has(c, X86_FEATURE_XENPV)) | |
337 | set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); | |
338 | ||
339 | check_null_seg_clears_base(c); | |
340 | } | |
341 | ||
342 | static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c) | |
343 | { | |
344 | u32 ebx, eax, ecx, edx; | |
345 | u16 mask = 0xfff; | |
346 | ||
347 | if (c->extended_cpuid_level < 0x80000006) | |
348 | return; | |
349 | ||
350 | cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | |
351 | ||
352 | tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; | |
353 | tlb_lli_4k[ENTRIES] = ebx & mask; | |
354 | ||
355 | /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ | |
356 | if (!((eax >> 16) & mask)) | |
357 | tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff; | |
358 | else | |
359 | tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; | |
360 | ||
361 | /* a 4M entry uses two 2M entries */ | |
362 | tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; | |
363 | ||
364 | /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ | |
365 | if (!(eax & mask)) { | |
366 | cpuid(0x80000005, &eax, &ebx, &ecx, &edx); | |
367 | tlb_lli_2m[ENTRIES] = eax & 0xff; | |
368 | } else | |
369 | tlb_lli_2m[ENTRIES] = eax & mask; | |
370 | ||
371 | tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; | |
372 | } | |
373 | ||
374 | static const struct cpu_dev hygon_cpu_dev = { | |
375 | .c_vendor = "Hygon", | |
376 | .c_ident = { "HygonGenuine" }, | |
377 | .c_early_init = early_init_hygon, | |
378 | .c_detect_tlb = cpu_detect_tlb_hygon, | |
379 | .c_bsp_init = bsp_init_hygon, | |
380 | .c_init = init_hygon, | |
381 | .c_x86_vendor = X86_VENDOR_HYGON, | |
382 | }; | |
383 | ||
384 | cpu_dev_register(hygon_cpu_dev); |