]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blob - arch/powerpc/platforms/powernv/idle.c
Merge branch 'topic/paca' into next
[mirror_ubuntu-eoan-kernel.git] / arch / powerpc / platforms / powernv / idle.c
1 /*
2 * PowerNV cpuidle code
3 *
4 * Copyright 2015 IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12 #include <linux/types.h>
13 #include <linux/mm.h>
14 #include <linux/slab.h>
15 #include <linux/of.h>
16 #include <linux/device.h>
17 #include <linux/cpu.h>
18
19 #include <asm/firmware.h>
20 #include <asm/machdep.h>
21 #include <asm/opal.h>
22 #include <asm/cputhreads.h>
23 #include <asm/cpuidle.h>
24 #include <asm/code-patching.h>
25 #include <asm/smp.h>
26 #include <asm/runlatch.h>
27 #include <asm/dbell.h>
28
29 #include "powernv.h"
30 #include "subcore.h"
31
32 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */
33 #define MAX_STOP_STATE 0xF
34
35 #define P9_STOP_SPR_MSR 2000
36 #define P9_STOP_SPR_PSSCR 855
37
38 static u32 supported_cpuidle_states;
39
40 /*
41 * The default stop state that will be used by ppc_md.power_save
42 * function on platforms that support stop instruction.
43 */
44 static u64 pnv_default_stop_val;
45 static u64 pnv_default_stop_mask;
46 static bool default_stop_found;
47
48 /*
49 * First deep stop state. Used to figure out when to save/restore
50 * hypervisor context.
51 */
52 u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
53
54 /*
55 * psscr value and mask of the deepest stop idle state.
56 * Used when a cpu is offlined.
57 */
58 static u64 pnv_deepest_stop_psscr_val;
59 static u64 pnv_deepest_stop_psscr_mask;
60 static u64 pnv_deepest_stop_flag;
61 static bool deepest_stop_found;
62
63 static int pnv_save_sprs_for_deep_states(void)
64 {
65 int cpu;
66 int rc;
67
68 /*
69 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
70 * all cpus at boot. Get these reg values of current cpu and use the
71 * same across all cpus.
72 */
73 uint64_t lpcr_val = mfspr(SPRN_LPCR);
74 uint64_t hid0_val = mfspr(SPRN_HID0);
75 uint64_t hid1_val = mfspr(SPRN_HID1);
76 uint64_t hid4_val = mfspr(SPRN_HID4);
77 uint64_t hid5_val = mfspr(SPRN_HID5);
78 uint64_t hmeer_val = mfspr(SPRN_HMEER);
79 uint64_t msr_val = MSR_IDLE;
80 uint64_t psscr_val = pnv_deepest_stop_psscr_val;
81
82 for_each_possible_cpu(cpu) {
83 uint64_t pir = get_hard_smp_processor_id(cpu);
84 uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
85
86 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
87 if (rc != 0)
88 return rc;
89
90 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
91 if (rc != 0)
92 return rc;
93
94 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
95 rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
96 if (rc)
97 return rc;
98
99 rc = opal_slw_set_reg(pir,
100 P9_STOP_SPR_PSSCR, psscr_val);
101
102 if (rc)
103 return rc;
104 }
105
106 /* HIDs are per core registers */
107 if (cpu_thread_in_core(cpu) == 0) {
108
109 rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
110 if (rc != 0)
111 return rc;
112
113 rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
114 if (rc != 0)
115 return rc;
116
117 /* Only p8 needs to set extra HID regiters */
118 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
119
120 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
121 if (rc != 0)
122 return rc;
123
124 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
125 if (rc != 0)
126 return rc;
127
128 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
129 if (rc != 0)
130 return rc;
131 }
132 }
133 }
134
135 return 0;
136 }
137
138 static void pnv_alloc_idle_core_states(void)
139 {
140 int i, j;
141 int nr_cores = cpu_nr_cores();
142 u32 *core_idle_state;
143
144 /*
145 * core_idle_state - The lower 8 bits track the idle state of
146 * each thread of the core.
147 *
148 * The most significant bit is the lock bit.
149 *
150 * Initially all the bits corresponding to threads_per_core
151 * are set. They are cleared when the thread enters deep idle
152 * state like sleep and winkle/stop.
153 *
154 * Initially the lock bit is cleared. The lock bit has 2
155 * purposes:
156 * a. While the first thread in the core waking up from
157 * idle is restoring core state, it prevents other
158 * threads in the core from switching to process
159 * context.
160 * b. While the last thread in the core is saving the
161 * core state, it prevents a different thread from
162 * waking up.
163 */
164 for (i = 0; i < nr_cores; i++) {
165 int first_cpu = i * threads_per_core;
166 int node = cpu_to_node(first_cpu);
167 size_t paca_ptr_array_size;
168
169 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
170 *core_idle_state = (1 << threads_per_core) - 1;
171 paca_ptr_array_size = (threads_per_core *
172 sizeof(struct paca_struct *));
173
174 for (j = 0; j < threads_per_core; j++) {
175 int cpu = first_cpu + j;
176
177 paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
178 paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
179 paca_ptrs[cpu]->thread_mask = 1 << j;
180 if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
181 continue;
182 paca_ptrs[cpu]->thread_sibling_pacas =
183 kmalloc_node(paca_ptr_array_size,
184 GFP_KERNEL, node);
185 }
186 }
187
188 update_subcore_sibling_mask();
189
190 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
191 int rc = pnv_save_sprs_for_deep_states();
192
193 if (likely(!rc))
194 return;
195
196 /*
197 * The stop-api is unable to restore hypervisor
198 * resources on wakeup from platform idle states which
199 * lose full context. So disable such states.
200 */
201 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
202 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
203 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
204
205 if (cpu_has_feature(CPU_FTR_ARCH_300) &&
206 (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
207 /*
208 * Use the default stop state for CPU-Hotplug
209 * if available.
210 */
211 if (default_stop_found) {
212 pnv_deepest_stop_psscr_val =
213 pnv_default_stop_val;
214 pnv_deepest_stop_psscr_mask =
215 pnv_default_stop_mask;
216 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
217 pnv_deepest_stop_psscr_val);
218 } else { /* Fallback to snooze loop for CPU-Hotplug */
219 deepest_stop_found = false;
220 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
221 }
222 }
223 }
224 }
225
226 u32 pnv_get_supported_cpuidle_states(void)
227 {
228 return supported_cpuidle_states;
229 }
230 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
231
232 static void pnv_fastsleep_workaround_apply(void *info)
233
234 {
235 int rc;
236 int *err = info;
237
238 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
239 OPAL_CONFIG_IDLE_APPLY);
240 if (rc)
241 *err = 1;
242 }
243
244 /*
245 * Used to store fastsleep workaround state
246 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
247 * 1 - Workaround applied once, never undone.
248 */
249 static u8 fastsleep_workaround_applyonce;
250
251 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
252 struct device_attribute *attr, char *buf)
253 {
254 return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
255 }
256
257 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
258 struct device_attribute *attr, const char *buf,
259 size_t count)
260 {
261 cpumask_t primary_thread_mask;
262 int err;
263 u8 val;
264
265 if (kstrtou8(buf, 0, &val) || val != 1)
266 return -EINVAL;
267
268 if (fastsleep_workaround_applyonce == 1)
269 return count;
270
271 /*
272 * fastsleep_workaround_applyonce = 1 implies
273 * fastsleep workaround needs to be left in 'applied' state on all
274 * the cores. Do this by-
275 * 1. Patching out the call to 'undo' workaround in fastsleep exit path
276 * 2. Sending ipi to all the cores which have at least one online thread
277 * 3. Patching out the call to 'apply' workaround in fastsleep entry
278 * path
279 * There is no need to send ipi to cores which have all threads
280 * offlined, as last thread of the core entering fastsleep or deeper
281 * state would have applied workaround.
282 */
283 err = patch_instruction(
284 (unsigned int *)pnv_fastsleep_workaround_at_exit,
285 PPC_INST_NOP);
286 if (err) {
287 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
288 goto fail;
289 }
290
291 get_online_cpus();
292 primary_thread_mask = cpu_online_cores_map();
293 on_each_cpu_mask(&primary_thread_mask,
294 pnv_fastsleep_workaround_apply,
295 &err, 1);
296 put_online_cpus();
297 if (err) {
298 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
299 goto fail;
300 }
301
302 err = patch_instruction(
303 (unsigned int *)pnv_fastsleep_workaround_at_entry,
304 PPC_INST_NOP);
305 if (err) {
306 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
307 goto fail;
308 }
309
310 fastsleep_workaround_applyonce = 1;
311
312 return count;
313 fail:
314 return -EIO;
315 }
316
317 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
318 show_fastsleep_workaround_applyonce,
319 store_fastsleep_workaround_applyonce);
320
321 static unsigned long __power7_idle_type(unsigned long type)
322 {
323 unsigned long srr1;
324
325 if (!prep_irq_for_idle_irqsoff())
326 return 0;
327
328 __ppc64_runlatch_off();
329 srr1 = power7_idle_insn(type);
330 __ppc64_runlatch_on();
331
332 fini_irq_for_idle_irqsoff();
333
334 return srr1;
335 }
336
337 void power7_idle_type(unsigned long type)
338 {
339 unsigned long srr1;
340
341 srr1 = __power7_idle_type(type);
342 irq_set_pending_from_srr1(srr1);
343 }
344
345 void power7_idle(void)
346 {
347 if (!powersave_nap)
348 return;
349
350 power7_idle_type(PNV_THREAD_NAP);
351 }
352
353 static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
354 unsigned long stop_psscr_mask)
355 {
356 unsigned long psscr;
357 unsigned long srr1;
358
359 if (!prep_irq_for_idle_irqsoff())
360 return 0;
361
362 psscr = mfspr(SPRN_PSSCR);
363 psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
364
365 __ppc64_runlatch_off();
366 srr1 = power9_idle_stop(psscr);
367 __ppc64_runlatch_on();
368
369 fini_irq_for_idle_irqsoff();
370
371 return srr1;
372 }
373
374 void power9_idle_type(unsigned long stop_psscr_val,
375 unsigned long stop_psscr_mask)
376 {
377 unsigned long srr1;
378
379 srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
380 irq_set_pending_from_srr1(srr1);
381 }
382
383 /*
384 * Used for ppc_md.power_save which needs a function with no parameters
385 */
386 void power9_idle(void)
387 {
388 power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
389 }
390
391 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
392 /*
393 * This is used in working around bugs in thread reconfiguration
394 * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
395 * memory and the way that XER[SO] is checkpointed.
396 * This function forces the core into SMT4 in order by asking
397 * all other threads not to stop, and sending a message to any
398 * that are in a stop state.
399 * Must be called with preemption disabled.
400 *
401 * DO NOT call this unless cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG) is
402 * true; otherwise this function will hang the system, due to the
403 * optimization in power9_idle_stop.
404 */
405 void pnv_power9_force_smt4_catch(void)
406 {
407 int cpu, cpu0, thr;
408 int awake_threads = 1; /* this thread is awake */
409 int poke_threads = 0;
410 int need_awake = threads_per_core;
411
412 cpu = smp_processor_id();
413 cpu0 = cpu & ~(threads_per_core - 1);
414 for (thr = 0; thr < threads_per_core; ++thr) {
415 if (cpu != cpu0 + thr)
416 atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
417 }
418 /* order setting dont_stop vs testing requested_psscr */
419 mb();
420 for (thr = 0; thr < threads_per_core; ++thr) {
421 if (!paca_ptrs[cpu0+thr]->requested_psscr)
422 ++awake_threads;
423 else
424 poke_threads |= (1 << thr);
425 }
426
427 /* If at least 3 threads are awake, the core is in SMT4 already */
428 if (awake_threads < need_awake) {
429 /* We have to wake some threads; we'll use msgsnd */
430 for (thr = 0; thr < threads_per_core; ++thr) {
431 if (poke_threads & (1 << thr)) {
432 ppc_msgsnd_sync();
433 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
434 paca_ptrs[cpu0+thr]->hw_cpu_id);
435 }
436 }
437 /* now spin until at least 3 threads are awake */
438 do {
439 for (thr = 0; thr < threads_per_core; ++thr) {
440 if ((poke_threads & (1 << thr)) &&
441 !paca_ptrs[cpu0+thr]->requested_psscr) {
442 ++awake_threads;
443 poke_threads &= ~(1 << thr);
444 }
445 }
446 } while (awake_threads < need_awake);
447 }
448 }
449 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
450
451 void pnv_power9_force_smt4_release(void)
452 {
453 int cpu, cpu0, thr;
454
455 cpu = smp_processor_id();
456 cpu0 = cpu & ~(threads_per_core - 1);
457
458 /* clear all the dont_stop flags */
459 for (thr = 0; thr < threads_per_core; ++thr) {
460 if (cpu != cpu0 + thr)
461 atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
462 }
463 }
464 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
465 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
466
467 #ifdef CONFIG_HOTPLUG_CPU
468 static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
469 {
470 u64 pir = get_hard_smp_processor_id(cpu);
471
472 mtspr(SPRN_LPCR, lpcr_val);
473
474 /*
475 * Program the LPCR via stop-api only if the deepest stop state
476 * can lose hypervisor context.
477 */
478 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
479 opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
480 }
481
482 /*
483 * pnv_cpu_offline: A function that puts the CPU into the deepest
484 * available platform idle state on a CPU-Offline.
485 * interrupts hard disabled and no lazy irq pending.
486 */
487 unsigned long pnv_cpu_offline(unsigned int cpu)
488 {
489 unsigned long srr1;
490 u32 idle_states = pnv_get_supported_cpuidle_states();
491 u64 lpcr_val;
492
493 /*
494 * We don't want to take decrementer interrupts while we are
495 * offline, so clear LPCR:PECE1. We keep PECE2 (and
496 * LPCR_PECE_HVEE on P9) enabled as to let IPIs in.
497 *
498 * If the CPU gets woken up by a special wakeup, ensure that
499 * the SLW engine sets LPCR with decrementer bit cleared, else
500 * the CPU will come back to the kernel due to a spurious
501 * wakeup.
502 */
503 lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
504 pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
505
506 __ppc64_runlatch_off();
507
508 if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
509 unsigned long psscr;
510
511 psscr = mfspr(SPRN_PSSCR);
512 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
513 pnv_deepest_stop_psscr_val;
514 srr1 = power9_idle_stop(psscr);
515
516 } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
517 (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
518 srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
519 } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
520 (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
521 srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
522 } else if (idle_states & OPAL_PM_NAP_ENABLED) {
523 srr1 = power7_idle_insn(PNV_THREAD_NAP);
524 } else {
525 /* This is the fallback method. We emulate snooze */
526 while (!generic_check_cpu_restart(cpu)) {
527 HMT_low();
528 HMT_very_low();
529 }
530 srr1 = 0;
531 HMT_medium();
532 }
533
534 __ppc64_runlatch_on();
535
536 /*
537 * Re-enable decrementer interrupts in LPCR.
538 *
539 * Further, we want stop states to be woken up by decrementer
540 * for non-hotplug cases. So program the LPCR via stop api as
541 * well.
542 */
543 lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
544 pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
545
546 return srr1;
547 }
548 #endif
549
550 /*
551 * Power ISA 3.0 idle initialization.
552 *
553 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
554 * Register (PSSCR) to control idle behavior.
555 *
556 * PSSCR layout:
557 * ----------------------------------------------------------
558 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
559 * ----------------------------------------------------------
560 * 0 4 41 42 43 44 48 54 56 60
561 *
562 * PSSCR key fields:
563 * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the
564 * lowest power-saving state the thread entered since stop instruction was
565 * last executed.
566 *
567 * Bit 41 - Status Disable(SD)
568 * 0 - Shows PLS entries
569 * 1 - PLS entries are all 0
570 *
571 * Bit 42 - Enable State Loss
572 * 0 - No state is lost irrespective of other fields
573 * 1 - Allows state loss
574 *
575 * Bit 43 - Exit Criterion
576 * 0 - Exit from power-save mode on any interrupt
577 * 1 - Exit from power-save mode controlled by LPCR's PECE bits
578 *
579 * Bits 44:47 - Power-Saving Level Limit
580 * This limits the power-saving level that can be entered into.
581 *
582 * Bits 60:63 - Requested Level
583 * Used to specify which power-saving level must be entered on executing
584 * stop instruction
585 */
586
587 int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
588 {
589 int err = 0;
590
591 /*
592 * psscr_mask == 0xf indicates an older firmware.
593 * Set remaining fields of psscr to the default values.
594 * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
595 */
596 if (*psscr_mask == 0xf) {
597 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
598 *psscr_mask = PSSCR_HV_DEFAULT_MASK;
599 return err;
600 }
601
602 /*
603 * New firmware is expected to set the psscr_val bits correctly.
604 * Validate that the following invariants are correctly maintained by
605 * the new firmware.
606 * - ESL bit value matches the EC bit value.
607 * - ESL bit is set for all the deep stop states.
608 */
609 if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
610 err = ERR_EC_ESL_MISMATCH;
611 } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
612 GET_PSSCR_ESL(*psscr_val) == 0) {
613 err = ERR_DEEP_STATE_ESL_MISMATCH;
614 }
615
616 return err;
617 }
618
619 /*
620 * pnv_arch300_idle_init: Initializes the default idle state, first
621 * deep idle state and deepest idle state on
622 * ISA 3.0 CPUs.
623 *
624 * @np: /ibm,opal/power-mgt device node
625 * @flags: cpu-idle-state-flags array
626 * @dt_idle_states: Number of idle state entries
627 * Returns 0 on success
628 */
629 static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
630 int dt_idle_states)
631 {
632 u64 *psscr_val = NULL;
633 u64 *psscr_mask = NULL;
634 u32 *residency_ns = NULL;
635 u64 max_residency_ns = 0;
636 int rc = 0, i;
637
638 psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL);
639 psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL);
640 residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns),
641 GFP_KERNEL);
642
643 if (!psscr_val || !psscr_mask || !residency_ns) {
644 rc = -1;
645 goto out;
646 }
647
648 if (of_property_read_u64_array(np,
649 "ibm,cpu-idle-state-psscr",
650 psscr_val, dt_idle_states)) {
651 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
652 rc = -1;
653 goto out;
654 }
655
656 if (of_property_read_u64_array(np,
657 "ibm,cpu-idle-state-psscr-mask",
658 psscr_mask, dt_idle_states)) {
659 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
660 rc = -1;
661 goto out;
662 }
663
664 if (of_property_read_u32_array(np,
665 "ibm,cpu-idle-state-residency-ns",
666 residency_ns, dt_idle_states)) {
667 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
668 rc = -1;
669 goto out;
670 }
671
672 /*
673 * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
674 * and the pnv_default_stop_{val,mask}.
675 *
676 * pnv_first_deep_stop_state should be set to the first stop
677 * level to cause hypervisor state loss.
678 *
679 * pnv_deepest_stop_{val,mask} should be set to values corresponding to
680 * the deepest stop state.
681 *
682 * pnv_default_stop_{val,mask} should be set to values corresponding to
683 * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
684 */
685 pnv_first_deep_stop_state = MAX_STOP_STATE;
686 for (i = 0; i < dt_idle_states; i++) {
687 int err;
688 u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK;
689
690 if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) &&
691 (pnv_first_deep_stop_state > psscr_rl))
692 pnv_first_deep_stop_state = psscr_rl;
693
694 err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i],
695 flags[i]);
696 if (err) {
697 report_invalid_psscr_val(psscr_val[i], err);
698 continue;
699 }
700
701 if (max_residency_ns < residency_ns[i]) {
702 max_residency_ns = residency_ns[i];
703 pnv_deepest_stop_psscr_val = psscr_val[i];
704 pnv_deepest_stop_psscr_mask = psscr_mask[i];
705 pnv_deepest_stop_flag = flags[i];
706 deepest_stop_found = true;
707 }
708
709 if (!default_stop_found &&
710 (flags[i] & OPAL_PM_STOP_INST_FAST)) {
711 pnv_default_stop_val = psscr_val[i];
712 pnv_default_stop_mask = psscr_mask[i];
713 default_stop_found = true;
714 }
715 }
716
717 if (unlikely(!default_stop_found)) {
718 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
719 } else {
720 ppc_md.power_save = power9_idle;
721 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
722 pnv_default_stop_val, pnv_default_stop_mask);
723 }
724
725 if (unlikely(!deepest_stop_found)) {
726 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
727 } else {
728 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
729 pnv_deepest_stop_psscr_val,
730 pnv_deepest_stop_psscr_mask);
731 }
732
733 pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
734 pnv_first_deep_stop_state);
735 out:
736 kfree(psscr_val);
737 kfree(psscr_mask);
738 kfree(residency_ns);
739 return rc;
740 }
741
742 /*
743 * Probe device tree for supported idle states
744 */
745 static void __init pnv_probe_idle_states(void)
746 {
747 struct device_node *np;
748 int dt_idle_states;
749 u32 *flags = NULL;
750 int i;
751
752 np = of_find_node_by_path("/ibm,opal/power-mgt");
753 if (!np) {
754 pr_warn("opal: PowerMgmt Node not found\n");
755 goto out;
756 }
757 dt_idle_states = of_property_count_u32_elems(np,
758 "ibm,cpu-idle-state-flags");
759 if (dt_idle_states < 0) {
760 pr_warn("cpuidle-powernv: no idle states found in the DT\n");
761 goto out;
762 }
763
764 flags = kcalloc(dt_idle_states, sizeof(*flags), GFP_KERNEL);
765
766 if (of_property_read_u32_array(np,
767 "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
768 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
769 goto out;
770 }
771
772 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
773 if (pnv_power9_idle_init(np, flags, dt_idle_states))
774 goto out;
775 }
776
777 for (i = 0; i < dt_idle_states; i++)
778 supported_cpuidle_states |= flags[i];
779
780 out:
781 kfree(flags);
782 }
783 static int __init pnv_init_idle_states(void)
784 {
785
786 supported_cpuidle_states = 0;
787
788 if (cpuidle_disable != IDLE_NO_OVERRIDE)
789 goto out;
790
791 pnv_probe_idle_states();
792
793 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
794 patch_instruction(
795 (unsigned int *)pnv_fastsleep_workaround_at_entry,
796 PPC_INST_NOP);
797 patch_instruction(
798 (unsigned int *)pnv_fastsleep_workaround_at_exit,
799 PPC_INST_NOP);
800 } else {
801 /*
802 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
803 * workaround is needed to use fastsleep. Provide sysfs
804 * control to choose how this workaround has to be applied.
805 */
806 device_create_file(cpu_subsys.dev_root,
807 &dev_attr_fastsleep_workaround_applyonce);
808 }
809
810 pnv_alloc_idle_core_states();
811
812 /*
813 * For each CPU, record its PACA address in each of it's
814 * sibling thread's PACA at the slot corresponding to this
815 * CPU's index in the core.
816 */
817 if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
818 int cpu;
819
820 pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n");
821 for_each_possible_cpu(cpu) {
822 int base_cpu = cpu_first_thread_sibling(cpu);
823 int idx = cpu_thread_in_core(cpu);
824 int i;
825
826 for (i = 0; i < threads_per_core; i++) {
827 int j = base_cpu + i;
828
829 paca_ptrs[j]->thread_sibling_pacas[idx] =
830 paca_ptrs[cpu];
831 }
832 }
833 }
834
835 if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
836 ppc_md.power_save = power7_idle;
837
838 out:
839 return 0;
840 }
841 machine_subsys_initcall(powernv, pnv_init_idle_states);