2 * This file contains idle entry/exit functions for POWER7,
3 * POWER8 and POWER9 CPUs.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
11 #include <linux/threads.h>
12 #include <asm/processor.h>
14 #include <asm/cputable.h>
15 #include <asm/thread_info.h>
16 #include <asm/ppc_asm.h>
17 #include <asm/asm-offsets.h>
18 #include <asm/ppc-opcode.h>
19 #include <asm/hw_irq.h>
20 #include <asm/kvm_book3s_asm.h>
22 #include <asm/cpuidle.h>
23 #include <asm/book3s/64/mmu-hash.h>
29 * Use unused space in the interrupt stack to save and restore
30 * registers for winkle support.
44 #define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16
49 * Used by threads before entering deep idle states. Saves SPRs
50 * in interrupt stack frame
54 * Note all register i.e per-core, per-subcore or per-thread is saved
55 * here since any thread in the core might wake up first
61 * Note - SDR1 is dropped in Power ISA v3. Hence not restoring
67 ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
88 * Used by threads when the lock bit of core_idle_state is set.
89 * Threads will spin in HMT_LOW until the lock bit is cleared.
90 * r14 - pointer to core_idle_state
91 * r15 - used to load contents of core_idle_state
92 * r9 - used as a temporary variable
98 andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT
102 andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
103 bne core_idle_lock_held
107 * Pass requested state in r3:
108 * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
109 * - Requested STOP state in POWER9
111 * To check IRQ_HAPPENED in r4
115 * Address to 'rfid' to in r5
117 _GLOBAL(pnv_powersave_common)
118 /* Use r3 to pass state nap/sleep/winkle */
119 /* NAP is a state loss, we create a regs frame on the
120 * stack, fill it up with the state we care about and
121 * stick a pointer to it in PACAR1. We really only
122 * need to save PC, some CR bits and the NV GPRs,
123 * but for now an interrupt frame will do.
127 stdu r1,-INT_FRAME_SIZE(r1)
131 /* Hard disable interrupts */
135 mtmsrd r9,1 /* hard-disable interrupts */
137 /* Check if something happened while soft-disabled */
138 lbz r0,PACAIRQHAPPENED(r13)
139 andi. r0,r0,~PACA_IRQ_HARD_DIS@l
143 addi r1,r1,INT_FRAME_SIZE
145 li r3,0 /* Return 0 (no nap) */
149 1: /* We mark irqs hard disabled as this is the state we'll
150 * be in when returning and we need to tell arch_local_irq_restore()
153 li r0,PACA_IRQ_HARD_DIS
154 stb r0,PACAIRQHAPPENED(r13)
156 /* We haven't lost state ... yet */
158 stb r0,PACA_NAPSTATELOST(r13)
160 /* Continue saving state */
169 * Go to real mode to do the nap, as required by the architecture.
170 * Also, we need to be in real mode before setting hwthread_state,
171 * because as soon as we do that, another thread can switch
172 * the MMU context to the guest.
174 LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
177 mtmsrd r6, 1 /* clear RI before setting SRR0/1 */
182 .globl pnv_enter_arch207_idle_mode
183 pnv_enter_arch207_idle_mode:
184 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
185 /* Tell KVM we're entering idle */
186 li r4,KVM_HWTHREAD_IN_IDLE
187 /******************************************************/
188 /* N O T E W E L L ! ! ! N O T E W E L L */
189 /* The following store to HSTATE_HWTHREAD_STATE(r13) */
190 /* MUST occur in real mode, i.e. with the MMU off, */
191 /* and the MMU must stay off until we clear this flag */
192 /* and test HSTATE_HWTHREAD_REQ(r13) in the system */
193 /* reset interrupt vector in exceptions-64s.S. */
194 /* The reason is that another thread can switch the */
195 /* MMU to a guest context whenever this flag is set */
196 /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
197 /* that would potentially cause this thread to start */
198 /* executing instructions from guest memory in */
199 /* hypervisor mode, leading to a host crash or data */
200 /* corruption, or worse. */
201 /******************************************************/
202 stb r4,HSTATE_HWTHREAD_STATE(r13)
204 stb r3,PACA_THREAD_IDLE_STATE(r13)
205 cmpwi cr3,r3,PNV_THREAD_SLEEP
207 IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
210 /* Sleep or winkle */
211 lbz r7,PACA_THREAD_MASK(r13)
212 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
216 andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
217 bnel core_idle_lock_held
219 andc r15,r15,r7 /* Clear thread bit */
221 andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
224 * If cr0 = 0, then current thread is the last thread of the core entering
225 * sleep. Last thread needs to execute the hardware bug workaround code if
226 * required by the platform.
227 * Make the workaround call unconditionally here. The below branch call is
228 * patched out when the idle states are discovered if the platform does not
231 .global pnv_fastsleep_workaround_at_entry
232 pnv_fastsleep_workaround_at_entry:
233 beq fastsleep_workaround_at_entry
239 common_enter: /* common code for all the threads entering sleep or winkle */
241 IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
243 fastsleep_workaround_at_entry:
244 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
249 /* Fast sleep workaround */
252 bl opal_config_cpu_idle_state
261 bl save_sprs_to_stack
263 IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
266 * r3 - PSSCR value corresponding to the requested stop state.
269 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
270 /* Tell KVM we're entering idle */
271 li r4,KVM_HWTHREAD_IN_IDLE
272 /* DO THIS IN REAL MODE! See comment above. */
273 stb r4,HSTATE_HWTHREAD_STATE(r13)
276 * Check if we are executing the lite variant with ESL=EC=0
278 andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
279 clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
280 bne .Lhandle_esl_ec_set
281 IDLE_STATE_ENTER_SEQ(PPC_STOP)
282 li r3,0 /* Since we didn't lose state, return 0 */
287 * POWER9 DD2 can incorrectly set PMAO when waking up after a
288 * state-loss idle. Saving and restoring MMCR0 over idle is a
295 * Check if the requested state is a deep idle state.
297 LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
298 ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
300 bge .Lhandle_deep_stop
301 IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
304 * Entering deep idle state.
305 * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
306 * stack and enter stop
308 lbz r7,PACA_THREAD_MASK(r13)
309 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
313 andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
314 bnel core_idle_lock_held
315 andc r15,r15,r7 /* Clear thread bit */
321 bl save_sprs_to_stack
323 IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
326 /* Now check if user or arch enabled NAP mode */
327 LOAD_REG_ADDRBASE(r3,powersave_nap)
328 lwz r4,ADDROFF(powersave_nap)(r3)
337 LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
338 b pnv_powersave_common
341 _GLOBAL(power7_sleep)
342 li r3,PNV_THREAD_SLEEP
344 LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
345 b pnv_powersave_common
348 _GLOBAL(power7_winkle)
349 li r3,PNV_THREAD_WINKLE
351 LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
352 b pnv_powersave_common
355 #define CHECK_HMI_INTERRUPT \
356 mfspr r0,SPRN_SRR1; \
357 BEGIN_FTR_SECTION_NESTED(66); \
358 rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \
359 FTR_SECTION_ELSE_NESTED(66); \
360 rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \
361 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
362 cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
364 /* Invoke opal call to handle hmi */ \
365 ld r2,PACATOC(r13); \
367 std r3,ORIG_GPR3(r1); /* Save original r3 */ \
368 li r3,0; /* NULL argument */ \
369 bl hmi_exception_realmode; \
371 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
375 * r3 - The PSSCR value corresponding to the stop state.
376 * r4 - The PSSCR mask corrresonding to the stop state.
378 _GLOBAL(power9_idle_stop)
383 LOAD_REG_ADDR(r5,power_enter_stop)
385 b pnv_powersave_common
390 * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
391 * HSPRG0 will be set to the HSPRG0 value of one of the
392 * threads in this core. Thus the value we have in r13
393 * may not be this thread's paca pointer.
395 * Fortunately, the TIR remains invariant. Since this thread's
396 * paca pointer is recorded in all its sibling's paca, we can
397 * correctly recover this thread's paca pointer if we
398 * know the index of this thread in the core.
400 * This index can be obtained from the TIR.
402 * i.e, thread's position in the core = TIR.
403 * If this value is i, then this thread's paca is
404 * paca->thread_sibling_pacas[i].
406 power9_dd1_recover_paca:
409 * Since each entry in thread_sibling_pacas is 8 bytes
410 * we need to left-shift by 3 bits. Thus r4 = i * 8
413 /* Get &paca->thread_sibling_pacas[0] in r5 */
414 ld r5, PACA_SIBLING_PACA_PTRS(r13)
415 /* Load paca->thread_sibling_pacas[i] into r13 */
420 * Indicate that we have lost NVGPR state
421 * which needs to be restored from the stack.
424 stb r0,PACA_NAPSTATELOST(r13)
428 * Called from reset vector. Check whether we have woken up with
429 * hypervisor state loss. If yes, restore hypervisor state and return
430 * back to reset vector.
432 * r13 - Contents of HSPRG0
433 * cr3 - set to gt if waking up with partial/complete hypervisor state loss
435 _GLOBAL(pnv_restore_hyp_resource)
437 BEGIN_FTR_SECTION_NESTED(70)
439 bl power9_dd1_recover_paca
441 FTR_SECTION_ELSE_NESTED(70)
443 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 70)
445 * Workaround for POWER9, if we lost resources, the ERAT
446 * might have been mixed up and needs flushing. We also need
447 * to reload MMCR0 (see comment above).
456 * POWER ISA 3. Use PSSCR to determine if we
457 * are waking up from deep idle state
459 LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
460 ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
464 * 0-3 bits correspond to Power-Saving Level Status
465 * which indicates the idle state we are waking up from
469 bge cr4,pnv_wakeup_tb_loss
471 * Waking up without hypervisor state loss. Return to
476 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
479 * POWER ISA 2.07 or less.
480 * Check if last bit of HSPGR0 is set. This indicates whether we are
481 * waking up from winkle.
486 /* Now that we are sure r13 is corrected, load TOC */
489 mtspr SPRN_HSPRG0,r13
491 lbz r0,PACA_THREAD_IDLE_STATE(r13)
492 cmpwi cr2,r0,PNV_THREAD_NAP
493 bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */
496 * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
497 * up from nap. At this stage CR3 shouldn't contains 'gt' since that
498 * indicates we are waking with hypervisor state loss from nap.
502 blr /* Return back to System Reset vector from where
503 pnv_restore_hyp_resource was invoked */
506 * Called if waking up from idle state which can cause either partial or
507 * complete hyp state loss.
508 * In POWER8, called if waking up from fastsleep or winkle
509 * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state
512 * cr3 - gt if waking up with partial/complete hypervisor state loss
513 * cr4 - gt or eq if waking up from complete hypervisor state loss.
515 _GLOBAL(pnv_wakeup_tb_loss)
518 * Before entering any idle state, the NVGPRs are saved in the stack.
519 * If there was a state loss, or PACA_NAPSTATELOST was set, then the
520 * NVGPRs are restored. If we are here, it is likely that state is lost,
521 * but not guaranteed -- neither ISA207 nor ISA300 tests to reach
522 * here are the same as the test to restore NVGPRS:
523 * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300,
524 * and SRR1 test for restoring NVGPRs.
526 * We are about to clobber NVGPRs now, so set NAPSTATELOST to
527 * guarantee they will always be restored. This might be tightened
528 * with careful reading of specs (particularly for ISA300) but this
529 * is already a slow wakeup path and it's simpler to be safe.
532 stb r0,PACA_NAPSTATELOST(r13)
536 * Save SRR1 and LR in NVGPRs as they might be clobbered in
537 * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
538 * to determine the wakeup reason if we branch to kvm_start_guest. LR
539 * is required to return back to reset vector after hypervisor state
540 * restore is complete.
546 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
548 lbz r7,PACA_THREAD_MASK(r13)
549 ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
552 andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
554 * Lock bit is set in one of the 2 cases-
555 * a. In the sleep/winkle enter path, the last thread is executing
556 * fastsleep workaround code.
557 * b. In the wake up path, another thread is executing fastsleep
558 * workaround undo code or resyncing timebase or restoring context
559 * In either case loop until the lock bit is cleared.
561 bnel core_idle_lock_held
567 * cr2 - eq if first thread to wakeup in core
568 * cr3- gt if waking up with partial/complete hypervisor state loss
569 * cr4 - gt or eq if waking up from complete hypervisor state loss.
572 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
578 lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
580 cmpwi r4,0 /* Check if first in subcore */
582 or r15,r15,r7 /* Set thread bit */
583 beq first_thread_in_subcore
584 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
586 or r15,r15,r7 /* Set thread bit */
587 beq cr2,first_thread_in_core
589 /* Not first thread in core or subcore to wake up */
592 first_thread_in_subcore:
594 * If waking up from sleep, subcore state is not lost. Hence
595 * skip subcore state restore
597 blt cr4,subcore_state_restored
599 /* Restore per-subcore state */
608 subcore_state_restored:
610 * Check if the thread is also the first thread in the core. If not,
611 * skip to clear_lock.
615 first_thread_in_core:
618 * First thread in the core waking up from any state which can cause
619 * partial or complete hypervisor state loss. It needs to
620 * call the fastsleep workaround code if the platform requires it.
621 * Call it unconditionally here. The below branch instruction will
622 * be patched out if the platform does not have fastsleep or does not
623 * require the workaround. Patching will be performed during the
624 * discovery of idle-states.
626 .global pnv_fastsleep_workaround_at_exit
627 pnv_fastsleep_workaround_at_exit:
628 b fastsleep_workaround_at_exit
632 * Use cr3 which indicates that we are waking up with atleast partial
633 * hypervisor state loss to determine if TIMEBASE RESYNC is needed.
636 /* Time base re-sync */
637 bl opal_resync_timebase;
639 * If waking up from sleep, per core state is not lost, skip to
645 * First thread in the core to wake up and its waking up with
646 * complete hypervisor state loss. Restore per core hypervisor
654 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
662 andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
668 * Common to all threads.
670 * If waking up from sleep, hypervisor state is not lost. Hence
671 * skip hypervisor state restore.
673 blt cr4,hypervisor_state_restored
675 /* Waking up from winkle */
677 BEGIN_MMU_FTR_SECTION
679 END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
680 /* Restore SLB from PACA */
681 ld r8,PACA_SLBSHADOWPTR(r13)
684 li r3, SLBSHADOW_SAVEAREA
688 andis. r7,r5,SLB_ESID_V@h
695 /* Restore per thread state */
706 /* Call cur_cpu_spec->cpu_restore() */
707 LOAD_REG_ADDR(r4, cur_cpu_spec)
709 ld r12,CPU_SPEC_RESTORE(r4)
710 #ifdef PPC64_ELF_ABI_v1
716 hypervisor_state_restored:
720 blr /* Return back to System Reset vector from where
721 pnv_restore_hyp_resource was invoked */
723 fastsleep_workaround_at_exit:
726 bl opal_config_cpu_idle_state
730 * R3 here contains the value that will be returned to the caller
733 _GLOBAL(pnv_wakeup_loss)
737 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
743 addi r1,r1,INT_FRAME_SIZE
750 * R3 here contains the value that will be returned to the caller
753 _GLOBAL(pnv_wakeup_noloss)
754 lbz r0,PACA_NAPSTATELOST(r13)
759 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
764 addi r1,r1,INT_FRAME_SIZE