]>
Commit | Line | Data |
---|---|---|
55190f88 BH |
1 | /* |
2 | * PowerNV setup code. | |
3 | * | |
4 | * Copyright 2011 IBM Corp. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the License, or (at your option) any later version. | |
10 | */ | |
11 | ||
12 | #undef DEBUG | |
13 | ||
14 | #include <linux/cpu.h> | |
15 | #include <linux/errno.h> | |
16 | #include <linux/sched.h> | |
17 | #include <linux/kernel.h> | |
18 | #include <linux/tty.h> | |
19 | #include <linux/reboot.h> | |
20 | #include <linux/init.h> | |
21 | #include <linux/console.h> | |
22 | #include <linux/delay.h> | |
23 | #include <linux/irq.h> | |
24 | #include <linux/seq_file.h> | |
25 | #include <linux/of.h> | |
26a2056e | 26 | #include <linux/of_fdt.h> |
55190f88 BH |
27 | #include <linux/interrupt.h> |
28 | #include <linux/bug.h> | |
cd15b048 | 29 | #include <linux/pci.h> |
fb5153d0 | 30 | #include <linux/cpufreq.h> |
55190f88 BH |
31 | |
32 | #include <asm/machdep.h> | |
33 | #include <asm/firmware.h> | |
34 | #include <asm/xics.h> | |
243e2511 | 35 | #include <asm/xive.h> |
daea1175 | 36 | #include <asm/opal.h> |
13906db6 | 37 | #include <asm/kexec.h> |
b2a80878 | 38 | #include <asm/smp.h> |
ad7b3c45 | 39 | #include <asm/setup.h> |
c508a5c5 | 40 | #include <asm/security_features.h> |
55190f88 BH |
41 | |
42 | #include "powernv.h" | |
43 | ||
c508a5c5 ME |
44 | |
45 | static bool fw_feature_is(const char *state, const char *name, | |
46 | struct device_node *fw_features) | |
47 | { | |
48 | struct device_node *np; | |
49 | bool rc = false; | |
50 | ||
51 | np = of_get_child_by_name(fw_features, name); | |
52 | if (np) { | |
53 | rc = of_property_read_bool(np, state); | |
54 | of_node_put(np); | |
55 | } | |
56 | ||
57 | return rc; | |
58 | } | |
59 | ||
60 | static void init_fw_feat_flags(struct device_node *np) | |
61 | { | |
62 | if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np)) | |
63 | security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); | |
64 | ||
65 | if (fw_feature_is("enabled", "fw-bcctrl-serialized", np)) | |
66 | security_ftr_set(SEC_FTR_BCCTRL_SERIALISED); | |
67 | ||
68 | if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np)) | |
69 | security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30); | |
70 | ||
71 | if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np)) | |
72 | security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2); | |
73 | ||
74 | if (fw_feature_is("enabled", "fw-l1d-thread-split", np)) | |
75 | security_ftr_set(SEC_FTR_L1D_THREAD_PRIV); | |
76 | ||
77 | if (fw_feature_is("enabled", "fw-count-cache-disabled", np)) | |
78 | security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); | |
79 | ||
80 | /* | |
81 | * The features below are enabled by default, so we instead look to see | |
82 | * if firmware has *disabled* them, and clear them if so. | |
83 | */ | |
84 | if (fw_feature_is("disabled", "speculation-policy-favor-security", np)) | |
85 | security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); | |
86 | ||
87 | if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np)) | |
88 | security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); | |
89 | ||
90 | if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np)) | |
91 | security_ftr_clear(SEC_FTR_L1D_FLUSH_HV); | |
92 | ||
93 | if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np)) | |
94 | security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); | |
95 | } | |
96 | ||
ad7b3c45 AM |
97 | static void pnv_setup_rfi_flush(void) |
98 | { | |
99 | struct device_node *np, *fw_features; | |
100 | enum l1d_flush_type type; | |
81f2291b | 101 | int enable; |
ad7b3c45 AM |
102 | |
103 | /* Default to fallback in case fw-features are not available */ | |
104 | type = L1D_FLUSH_FALLBACK; | |
81f2291b | 105 | enable = 1; |
ad7b3c45 AM |
106 | |
107 | np = of_find_node_by_name(NULL, "ibm,opal"); | |
108 | fw_features = of_get_child_by_name(np, "fw-features"); | |
109 | of_node_put(np); | |
110 | ||
111 | if (fw_features) { | |
c508a5c5 ME |
112 | init_fw_feat_flags(fw_features); |
113 | ||
ad7b3c45 AM |
114 | np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); |
115 | if (np && of_property_read_bool(np, "enabled")) | |
116 | type = L1D_FLUSH_MTTRIG; | |
117 | ||
118 | of_node_put(np); | |
119 | ||
120 | np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); | |
121 | if (np && of_property_read_bool(np, "enabled")) | |
122 | type = L1D_FLUSH_ORI; | |
123 | ||
124 | of_node_put(np); | |
125 | ||
81f2291b ME |
126 | /* Enable unless firmware says NOT to */ |
127 | enable = 2; | |
ad7b3c45 | 128 | np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); |
81f2291b ME |
129 | if (np && of_property_read_bool(np, "disabled")) |
130 | enable--; | |
ad7b3c45 AM |
131 | |
132 | of_node_put(np); | |
133 | ||
134 | np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); | |
81f2291b ME |
135 | if (np && of_property_read_bool(np, "disabled")) |
136 | enable--; | |
ad7b3c45 | 137 | |
d5f5451a ME |
138 | np = of_get_child_by_name(fw_features, "speculation-policy-favor-security"); |
139 | if (np && of_property_read_bool(np, "disabled")) | |
140 | enable = 0; | |
141 | ||
ad7b3c45 AM |
142 | of_node_put(np); |
143 | of_node_put(fw_features); | |
144 | } | |
145 | ||
81f2291b | 146 | setup_rfi_flush(type, enable > 0); |
ad7b3c45 AM |
147 | } |
148 | ||
55190f88 BH |
149 | static void __init pnv_setup_arch(void) |
150 | { | |
4817fc32 AB |
151 | set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); |
152 | ||
ad7b3c45 AM |
153 | pnv_setup_rfi_flush(); |
154 | ||
55190f88 BH |
155 | /* Initialize SMP */ |
156 | pnv_smp_init(); | |
157 | ||
61305a96 BH |
158 | /* Setup PCI */ |
159 | pnv_pci_init(); | |
55190f88 | 160 | |
628daa8d BH |
161 | /* Setup RTC and NVRAM callbacks */ |
162 | if (firmware_has_feature(FW_FEATURE_OPAL)) | |
163 | opal_nvram_init(); | |
55190f88 BH |
164 | |
165 | /* Enable NAP mode */ | |
166 | powersave_nap = 1; | |
167 | ||
168 | /* XXX PMCS */ | |
169 | } | |
170 | ||
f2d57694 | 171 | static void __init pnv_init(void) |
55190f88 | 172 | { |
3fafe9c2 BH |
173 | /* |
174 | * Initialize the LPC bus now so that legacy serial | |
175 | * ports can be found on it | |
176 | */ | |
177 | opal_lpc_init(); | |
178 | ||
daea1175 BH |
179 | #ifdef CONFIG_HVC_OPAL |
180 | if (firmware_has_feature(FW_FEATURE_OPAL)) | |
181 | hvc_opal_init_early(); | |
182 | else | |
183 | #endif | |
184 | add_preferred_console("hvc", 0, NULL); | |
55190f88 BH |
185 | } |
186 | ||
187 | static void __init pnv_init_IRQ(void) | |
188 | { | |
243e2511 BH |
189 | /* Try using a XIVE if available, otherwise use a XICS */ |
190 | if (!xive_native_init()) | |
191 | xics_init(); | |
55190f88 BH |
192 | |
193 | WARN_ON(!ppc_md.get_irq); | |
194 | } | |
195 | ||
196 | static void pnv_show_cpuinfo(struct seq_file *m) | |
197 | { | |
198 | struct device_node *root; | |
199 | const char *model = ""; | |
200 | ||
201 | root = of_find_node_by_path("/"); | |
202 | if (root) | |
203 | model = of_get_property(root, "model", NULL); | |
204 | seq_printf(m, "machine\t\t: PowerNV %s\n", model); | |
e4d54f71 SS |
205 | if (firmware_has_feature(FW_FEATURE_OPAL)) |
206 | seq_printf(m, "firmware\t: OPAL\n"); | |
14a43e69 BH |
207 | else |
208 | seq_printf(m, "firmware\t: BML\n"); | |
55190f88 | 209 | of_node_put(root); |
3a4c2601 AK |
210 | if (radix_enabled()) |
211 | seq_printf(m, "MMU\t\t: Radix\n"); | |
212 | else | |
213 | seq_printf(m, "MMU\t\t: Hash\n"); | |
55190f88 BH |
214 | } |
215 | ||
2196c6f1 VH |
216 | static void pnv_prepare_going_down(void) |
217 | { | |
218 | /* | |
219 | * Disable all notifiers from OPAL, we can't | |
220 | * service interrupts anymore anyway | |
221 | */ | |
81f2f7ce | 222 | opal_event_shutdown(); |
2196c6f1 VH |
223 | |
224 | /* Soft disable interrupts */ | |
225 | local_irq_disable(); | |
226 | ||
227 | /* | |
228 | * Return secondary CPUs to firwmare if a flash update | |
229 | * is pending otherwise we will get all sort of error | |
230 | * messages about CPU being stuck etc.. This will also | |
231 | * have the side effect of hard disabling interrupts so | |
232 | * past this point, the kernel is effectively dead. | |
233 | */ | |
234 | opal_flash_term_callback(); | |
235 | } | |
236 | ||
ec27329f | 237 | static void __noreturn pnv_restart(char *cmd) |
55190f88 | 238 | { |
ec27329f BH |
239 | long rc = OPAL_BUSY; |
240 | ||
2196c6f1 | 241 | pnv_prepare_going_down(); |
e8e71fa4 | 242 | |
ec27329f BH |
243 | while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { |
244 | rc = opal_cec_reboot(); | |
245 | if (rc == OPAL_BUSY_EVENT) | |
246 | opal_poll_events(NULL); | |
247 | else | |
248 | mdelay(10); | |
249 | } | |
250 | for (;;) | |
251 | opal_poll_events(NULL); | |
55190f88 BH |
252 | } |
253 | ||
ec27329f | 254 | static void __noreturn pnv_power_off(void) |
55190f88 | 255 | { |
ec27329f BH |
256 | long rc = OPAL_BUSY; |
257 | ||
2196c6f1 | 258 | pnv_prepare_going_down(); |
e8e71fa4 | 259 | |
ec27329f BH |
260 | while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { |
261 | rc = opal_cec_power_down(0); | |
262 | if (rc == OPAL_BUSY_EVENT) | |
263 | opal_poll_events(NULL); | |
264 | else | |
265 | mdelay(10); | |
266 | } | |
267 | for (;;) | |
268 | opal_poll_events(NULL); | |
55190f88 BH |
269 | } |
270 | ||
ec27329f | 271 | static void __noreturn pnv_halt(void) |
55190f88 | 272 | { |
ec27329f | 273 | pnv_power_off(); |
55190f88 BH |
274 | } |
275 | ||
628daa8d | 276 | static void pnv_progress(char *s, unsigned short hex) |
55190f88 BH |
277 | { |
278 | } | |
279 | ||
73ed148a BH |
280 | static void pnv_shutdown(void) |
281 | { | |
282 | /* Let the PCI code clear up IODA tables */ | |
283 | pnv_pci_shutdown(); | |
284 | ||
f7d98d18 VH |
285 | /* |
286 | * Stop OPAL activity: Unregister all OPAL interrupts so they | |
287 | * don't fire up while we kexec and make sure all potentially | |
288 | * DMA'ing ops are complete (such as dump retrieval). | |
73ed148a BH |
289 | */ |
290 | opal_shutdown(); | |
291 | } | |
292 | ||
da665885 | 293 | #ifdef CONFIG_KEXEC_CORE |
298b34d7 BH |
294 | static void pnv_kexec_wait_secondaries_down(void) |
295 | { | |
296 | int my_cpu, i, notified = -1; | |
297 | ||
298 | my_cpu = get_cpu(); | |
299 | ||
300 | for_each_online_cpu(i) { | |
301 | uint8_t status; | |
1b70386c | 302 | int64_t rc, timeout = 1000; |
298b34d7 BH |
303 | |
304 | if (i == my_cpu) | |
305 | continue; | |
306 | ||
307 | for (;;) { | |
308 | rc = opal_query_cpu_status(get_hard_smp_processor_id(i), | |
309 | &status); | |
310 | if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED) | |
311 | break; | |
312 | barrier(); | |
313 | if (i != notified) { | |
314 | printk(KERN_INFO "kexec: waiting for cpu %d " | |
315 | "(physical %d) to enter OPAL\n", | |
316 | i, paca[i].hw_cpu_id); | |
317 | notified = i; | |
318 | } | |
1b70386c SMJ |
319 | |
320 | /* | |
321 | * On crash secondaries might be unreachable or hung, | |
322 | * so timeout if we've waited too long | |
323 | * */ | |
324 | mdelay(1); | |
325 | if (timeout-- == 0) { | |
326 | printk(KERN_ERR "kexec: timed out waiting for " | |
327 | "cpu %d (physical %d) to enter OPAL\n", | |
328 | i, paca[i].hw_cpu_id); | |
329 | break; | |
330 | } | |
298b34d7 BH |
331 | } |
332 | } | |
333 | } | |
334 | ||
628daa8d | 335 | static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) |
55190f88 | 336 | { |
1c0eaf0f BH |
337 | u64 reinit_flags; |
338 | ||
243e2511 BH |
339 | if (xive_enabled()) |
340 | xive_kexec_teardown_cpu(secondary); | |
341 | else | |
342 | xics_kexec_teardown_cpu(secondary); | |
13906db6 | 343 | |
e4d54f71 | 344 | /* On OPAL, we return all CPUs to firmware */ |
e4d54f71 | 345 | if (!firmware_has_feature(FW_FEATURE_OPAL)) |
298b34d7 BH |
346 | return; |
347 | ||
348 | if (secondary) { | |
349 | /* Return secondary CPUs to firmware on OPAL v3 */ | |
13906db6 BH |
350 | mb(); |
351 | get_paca()->kexec_state = KEXEC_STATE_REAL_MODE; | |
352 | mb(); | |
353 | ||
354 | /* Return the CPU to OPAL */ | |
355 | opal_return_cpu(); | |
298b34d7 BH |
356 | } else { |
357 | /* Primary waits for the secondaries to have reached OPAL */ | |
358 | pnv_kexec_wait_secondaries_down(); | |
e72bb8a5 | 359 | |
243e2511 BH |
360 | /* Switch XIVE back to emulation mode */ |
361 | if (xive_enabled()) | |
362 | xive_shutdown(); | |
363 | ||
e72bb8a5 SMJ |
364 | /* |
365 | * We might be running as little-endian - now that interrupts | |
366 | * are disabled, reset the HILE bit to big-endian so we don't | |
367 | * take interrupts in the wrong endian later | |
1c0eaf0f BH |
368 | * |
369 | * We reinit to enable both radix and hash on P9 to ensure | |
370 | * the mode used by the next kernel is always supported. | |
e72bb8a5 | 371 | */ |
1c0eaf0f BH |
372 | reinit_flags = OPAL_REINIT_CPUS_HILE_BE; |
373 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | |
374 | reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX | | |
375 | OPAL_REINIT_CPUS_MMU_HASH; | |
376 | opal_reinit_cpus(reinit_flags); | |
13906db6 | 377 | } |
55190f88 | 378 | } |
da665885 | 379 | #endif /* CONFIG_KEXEC_CORE */ |
55190f88 | 380 | |
6d97d7a2 AB |
381 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
382 | static unsigned long pnv_memory_block_size(void) | |
383 | { | |
955e9a0f AB |
384 | /* |
385 | * We map the kernel linear region with 1GB large pages on radix. For | |
386 | * memory hot unplug to work our memory block size must be at least | |
387 | * this size. | |
388 | */ | |
389 | if (radix_enabled()) | |
390 | return 1UL * 1024 * 1024 * 1024; | |
391 | else | |
392 | return 256UL * 1024 * 1024; | |
6d97d7a2 AB |
393 | } |
394 | #endif | |
395 | ||
628daa8d | 396 | static void __init pnv_setup_machdep_opal(void) |
55190f88 | 397 | { |
628daa8d | 398 | ppc_md.get_boot_time = opal_get_boot_time; |
628daa8d | 399 | ppc_md.restart = pnv_restart; |
9178ba29 | 400 | pm_power_off = pnv_power_off; |
628daa8d | 401 | ppc_md.halt = pnv_halt; |
ed79ba9e | 402 | ppc_md.machine_check_exception = opal_machine_check; |
55672ecf | 403 | ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; |
0869b6fd MS |
404 | ppc_md.hmi_exception_early = opal_hmi_exception_early; |
405 | ppc_md.handle_hmi_exception = opal_handle_hmi_exception; | |
55190f88 BH |
406 | } |
407 | ||
55190f88 BH |
408 | static int __init pnv_probe(void) |
409 | { | |
406b0b6a | 410 | if (!of_machine_is_compatible("ibm,powernv")) |
55190f88 BH |
411 | return 0; |
412 | ||
628daa8d BH |
413 | if (firmware_has_feature(FW_FEATURE_OPAL)) |
414 | pnv_setup_machdep_opal(); | |
628daa8d | 415 | |
55190f88 BH |
416 | pr_debug("PowerNV detected !\n"); |
417 | ||
f2d57694 BH |
418 | pnv_init(); |
419 | ||
55190f88 BH |
420 | return 1; |
421 | } | |
422 | ||
fb5153d0 GS |
423 | /* |
424 | * Returns the cpu frequency for 'cpu' in Hz. This is used by | |
425 | * /proc/cpuinfo | |
426 | */ | |
e51df2c1 | 427 | static unsigned long pnv_get_proc_freq(unsigned int cpu) |
fb5153d0 GS |
428 | { |
429 | unsigned long ret_freq; | |
430 | ||
431 | ret_freq = cpufreq_quick_get(cpu) * 1000ul; | |
432 | ||
433 | /* | |
434 | * If the backend cpufreq driver does not exist, | |
435 | * then fallback to old way of reporting the clockrate. | |
436 | */ | |
437 | if (!ret_freq) | |
438 | ret_freq = ppc_proc_freq; | |
439 | return ret_freq; | |
440 | } | |
441 | ||
55190f88 BH |
442 | define_machine(powernv) { |
443 | .name = "PowerNV", | |
444 | .probe = pnv_probe, | |
628daa8d | 445 | .setup_arch = pnv_setup_arch, |
55190f88 BH |
446 | .init_IRQ = pnv_init_IRQ, |
447 | .show_cpuinfo = pnv_show_cpuinfo, | |
fb5153d0 | 448 | .get_proc_freq = pnv_get_proc_freq, |
55190f88 | 449 | .progress = pnv_progress, |
73ed148a | 450 | .machine_shutdown = pnv_shutdown, |
5593e303 | 451 | .power_save = NULL, |
55190f88 | 452 | .calibrate_decr = generic_calibrate_decr, |
da665885 | 453 | #ifdef CONFIG_KEXEC_CORE |
55190f88 BH |
454 | .kexec_cpu_down = pnv_kexec_cpu_down, |
455 | #endif | |
6d97d7a2 AB |
456 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
457 | .memory_block_size = pnv_memory_block_size, | |
458 | #endif | |
55190f88 | 459 | }; |