]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
powerpc/syscall: Rename syscall_64.c into interrupt.c
authorChristophe Leroy <christophe.leroy@csgroup.eu>
Mon, 8 Feb 2021 15:10:27 +0000 (15:10 +0000)
committerMichael Ellerman <mpe@ellerman.id.au>
Thu, 11 Feb 2021 12:35:10 +0000 (23:35 +1100)
syscall_64.c will be reused almost as is for PPC32.

As this file also contains functions to handle other types
of interrupts rename it interrupt.c

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/cddc2deaa8f049d3ec419738e69804934919b935.1612796617.git.christophe.leroy@csgroup.eu
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/interrupt.c [new file with mode: 0644]
arch/powerpc/kernel/syscall_64.c [deleted file]

index fe2ef598e2ead0a5f429cd30a90e0edd48e22664..b7a82276d569753714dcc884b616fd33771a654c 100644 (file)
@@ -49,7 +49,7 @@ obj-y                         := cputable.o syscalls.o \
                                   hw_breakpoint_constraints.o
 obj-y                          += ptrace/
 obj-$(CONFIG_PPC64)            += setup_64.o \
-                                  paca.o nvram_64.o note.o syscall_64.o
+                                  paca.o nvram_64.o note.o interrupt.o
 obj-$(CONFIG_COMPAT)           += sys_ppc32.o signal_32.o
 obj-$(CONFIG_VDSO32)           += vdso32/
 obj-$(CONFIG_PPC_WATCHDOG)     += watchdog.o
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
new file mode 100644 (file)
index 0000000..d6be4f9
--- /dev/null
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/context_tracking.h>
+#include <linux/err.h>
+#include <asm/asm-prototypes.h>
+#include <asm/kup.h>
+#include <asm/cputime.h>
+#include <asm/interrupt.h>
+#include <asm/hw_irq.h>
+#include <asm/interrupt.h>
+#include <asm/kprobes.h>
+#include <asm/paca.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/signal.h>
+#include <asm/switch_to.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+#include <asm/unistd.h>
+
+typedef long (*syscall_fn)(long, long, long, long, long, long);
+
+/* Has to run notrace because it is entered not completely "reconciled" */
+notrace long system_call_exception(long r3, long r4, long r5,
+                                  long r6, long r7, long r8,
+                                  unsigned long r0, struct pt_regs *regs)
+{
+       syscall_fn f;
+
+       if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+               BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+
+       CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
+       user_exit_irqoff();
+
+       trace_hardirqs_off(); /* finish reconciling */
+
+       if (IS_ENABLED(CONFIG_PPC_BOOK3S))
+               BUG_ON(!(regs->msr & MSR_RI));
+       BUG_ON(!(regs->msr & MSR_PR));
+       BUG_ON(!FULL_REGS(regs));
+       BUG_ON(regs->softe != IRQS_ENABLED);
+
+#ifdef CONFIG_PPC_PKEY
+       if (mmu_has_feature(MMU_FTR_PKEY)) {
+               unsigned long amr, iamr;
+               bool flush_needed = false;
+               /*
+                * When entering from userspace we mostly have the AMR/IAMR
+                * different from kernel default values. Hence don't compare.
+                */
+               amr = mfspr(SPRN_AMR);
+               iamr = mfspr(SPRN_IAMR);
+               regs->amr  = amr;
+               regs->iamr = iamr;
+               if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
+                       mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
+                       flush_needed = true;
+               }
+               if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
+                       mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
+                       flush_needed = true;
+               }
+               if (flush_needed)
+                       isync();
+       } else
+#endif
+               kuap_check_amr();
+
+       account_cpu_user_entry();
+
+       account_stolen_time();
+
+       /*
+        * This is not required for the syscall exit path, but makes the
+        * stack frame look nicer. If this was initialised in the first stack
+        * frame, or if the unwinder was taught the first stack frame always
+        * returns to user with IRQS_ENABLED, this store could be avoided!
+        */
+       regs->softe = IRQS_ENABLED;
+
+       local_irq_enable();
+
+       if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
+               if (unlikely(regs->trap == 0x7ff0)) {
+                       /* Unsupported scv vector */
+                       _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+                       return regs->gpr[3];
+               }
+               /*
+                * We use the return value of do_syscall_trace_enter() as the
+                * syscall number. If the syscall was rejected for any reason
+                * do_syscall_trace_enter() returns an invalid syscall number
+                * and the test against NR_syscalls will fail and the return
+                * value to be used is in regs->gpr[3].
+                */
+               r0 = do_syscall_trace_enter(regs);
+               if (unlikely(r0 >= NR_syscalls))
+                       return regs->gpr[3];
+               r3 = regs->gpr[3];
+               r4 = regs->gpr[4];
+               r5 = regs->gpr[5];
+               r6 = regs->gpr[6];
+               r7 = regs->gpr[7];
+               r8 = regs->gpr[8];
+
+       } else if (unlikely(r0 >= NR_syscalls)) {
+               if (unlikely(regs->trap == 0x7ff0)) {
+                       /* Unsupported scv vector */
+                       _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+                       return regs->gpr[3];
+               }
+               return -ENOSYS;
+       }
+
+       /* May be faster to do array_index_nospec? */
+       barrier_nospec();
+
+       if (unlikely(is_32bit_task())) {
+               f = (void *)compat_sys_call_table[r0];
+
+               r3 &= 0x00000000ffffffffULL;
+               r4 &= 0x00000000ffffffffULL;
+               r5 &= 0x00000000ffffffffULL;
+               r6 &= 0x00000000ffffffffULL;
+               r7 &= 0x00000000ffffffffULL;
+               r8 &= 0x00000000ffffffffULL;
+
+       } else {
+               f = (void *)sys_call_table[r0];
+       }
+
+       return f(r3, r4, r5, r6, r7, r8);
+}
+
+/*
+ * local irqs must be disabled. Returns false if the caller must re-enable
+ * them, check for new work, and try again.
+ *
+ * This should be called with local irqs disabled, but if they were previously
+ * enabled when the interrupt handler returns (indicating a process-context /
+ * synchronous interrupt) then irqs_enabled should be true.
+ */
+static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri)
+{
+       /* This must be done with RI=1 because tracing may touch vmaps */
+       trace_hardirqs_on();
+
+       /* This pattern matches prep_irq_for_idle */
+       if (clear_ri)
+               __hard_EE_RI_disable();
+       else
+               __hard_irq_disable();
+       if (unlikely(lazy_irq_pending_nocheck())) {
+               /* Took an interrupt, may have more exit work to do. */
+               if (clear_ri)
+                       __hard_RI_enable();
+               trace_hardirqs_off();
+               local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+               return false;
+       }
+       local_paca->irq_happened = 0;
+       irq_soft_mask_set(IRQS_ENABLED);
+
+       return true;
+}
+
+static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled)
+{
+       if (__prep_irq_for_enabled_exit(clear_ri))
+               return true;
+
+       /*
+        * Must replay pending soft-masked interrupts now. Don't just
+        * local_irq_enabe(); local_irq_disable(); because if we are
+        * returning from an asynchronous interrupt here, another one
+        * might hit after irqs are enabled, and it would exit via this
+        * same path allowing another to fire, and so on unbounded.
+        *
+        * If interrupts were enabled when this interrupt exited,
+        * indicating a process context (synchronous) interrupt,
+        * local_irq_enable/disable can be used, which will enable
+        * interrupts rather than keeping them masked (unclear how
+        * much benefit this is over just replaying for all cases,
+        * because we immediately disable again, so all we're really
+        * doing is allowing hard interrupts to execute directly for
+        * a very small time, rather than being masked and replayed).
+        */
+       if (irqs_enabled) {
+               local_irq_enable();
+               local_irq_disable();
+       } else {
+               replay_soft_interrupts();
+       }
+
+       return false;
+}
+
+/*
+ * This should be called after a syscall returns, with r3 the return value
+ * from the syscall. If this function returns non-zero, the system call
+ * exit assembly should additionally load all GPR registers and CTR and XER
+ * from the interrupt frame.
+ *
+ * The function graph tracer can not trace the return side of this function,
+ * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
+ */
+notrace unsigned long syscall_exit_prepare(unsigned long r3,
+                                          struct pt_regs *regs,
+                                          long scv)
+{
+       unsigned long *ti_flagsp = &current_thread_info()->flags;
+       unsigned long ti_flags;
+       unsigned long ret = 0;
+
+       CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+       kuap_check_amr();
+
+       regs->result = r3;
+
+       /* Check whether the syscall is issued inside a restartable sequence */
+       rseq_syscall(regs);
+
+       ti_flags = *ti_flagsp;
+
+       if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) {
+               if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+                       r3 = -r3;
+                       regs->ccr |= 0x10000000; /* Set SO bit in CR */
+               }
+       }
+
+       if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+               if (ti_flags & _TIF_RESTOREALL)
+                       ret = _TIF_RESTOREALL;
+               else
+                       regs->gpr[3] = r3;
+               clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
+       } else {
+               regs->gpr[3] = r3;
+       }
+
+       if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+               do_syscall_trace_leave(regs);
+               ret |= _TIF_RESTOREALL;
+       }
+
+       local_irq_disable();
+
+again:
+       ti_flags = READ_ONCE(*ti_flagsp);
+       while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+               local_irq_enable();
+               if (ti_flags & _TIF_NEED_RESCHED) {
+                       schedule();
+               } else {
+                       /*
+                        * SIGPENDING must restore signal handler function
+                        * argument GPRs, and some non-volatiles (e.g., r1).
+                        * Restore all for now. This could be made lighter.
+                        */
+                       if (ti_flags & _TIF_SIGPENDING)
+                               ret |= _TIF_RESTOREALL;
+                       do_notify_resume(regs, ti_flags);
+               }
+               local_irq_disable();
+               ti_flags = READ_ONCE(*ti_flagsp);
+       }
+
+       if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
+               if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+                               unlikely((ti_flags & _TIF_RESTORE_TM))) {
+                       restore_tm_state(regs);
+               } else {
+                       unsigned long mathflags = MSR_FP;
+
+                       if (cpu_has_feature(CPU_FTR_VSX))
+                               mathflags |= MSR_VEC | MSR_VSX;
+                       else if (cpu_has_feature(CPU_FTR_ALTIVEC))
+                               mathflags |= MSR_VEC;
+
+                       /*
+                        * If userspace MSR has all available FP bits set,
+                        * then they are live and no need to restore. If not,
+                        * it means the regs were given up and restore_math
+                        * may decide to restore them (to avoid taking an FP
+                        * fault).
+                        */
+                       if ((regs->msr & mathflags) != mathflags)
+                               restore_math(regs);
+               }
+       }
+
+       user_enter_irqoff();
+
+       /* scv need not set RI=0 because SRRs are not used */
+       if (unlikely(!__prep_irq_for_enabled_exit(!scv))) {
+               user_exit_irqoff();
+               local_irq_enable();
+               local_irq_disable();
+               goto again;
+       }
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       local_paca->tm_scratch = regs->msr;
+#endif
+
+       account_cpu_user_exit();
+
+#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
+       /*
+        * We do this at the end so that we do context switch with KERNEL AMR
+        */
+       kuap_user_restore(regs);
+#endif
+       return ret;
+}
+
+#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
+{
+#ifdef CONFIG_PPC_BOOK3E
+       struct thread_struct *ts = &current->thread;
+#endif
+       unsigned long *ti_flagsp = &current_thread_info()->flags;
+       unsigned long ti_flags;
+       unsigned long flags;
+       unsigned long ret = 0;
+
+       if (IS_ENABLED(CONFIG_PPC_BOOK3S))
+               BUG_ON(!(regs->msr & MSR_RI));
+       BUG_ON(!(regs->msr & MSR_PR));
+       BUG_ON(!FULL_REGS(regs));
+       BUG_ON(regs->softe != IRQS_ENABLED);
+       CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+       /*
+        * We don't need to restore AMR on the way back to userspace for KUAP.
+        * AMR can only have been unlocked if we interrupted the kernel.
+        */
+       kuap_check_amr();
+
+       local_irq_save(flags);
+
+again:
+       ti_flags = READ_ONCE(*ti_flagsp);
+       while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+               local_irq_enable(); /* returning to user: may enable */
+               if (ti_flags & _TIF_NEED_RESCHED) {
+                       schedule();
+               } else {
+                       if (ti_flags & _TIF_SIGPENDING)
+                               ret |= _TIF_RESTOREALL;
+                       do_notify_resume(regs, ti_flags);
+               }
+               local_irq_disable();
+               ti_flags = READ_ONCE(*ti_flagsp);
+       }
+
+       if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
+               if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+                               unlikely((ti_flags & _TIF_RESTORE_TM))) {
+                       restore_tm_state(regs);
+               } else {
+                       unsigned long mathflags = MSR_FP;
+
+                       if (cpu_has_feature(CPU_FTR_VSX))
+                               mathflags |= MSR_VEC | MSR_VSX;
+                       else if (cpu_has_feature(CPU_FTR_ALTIVEC))
+                               mathflags |= MSR_VEC;
+
+                       /* See above restore_math comment */
+                       if ((regs->msr & mathflags) != mathflags)
+                               restore_math(regs);
+               }
+       }
+
+       user_enter_irqoff();
+
+       if (unlikely(!__prep_irq_for_enabled_exit(true))) {
+               user_exit_irqoff();
+               local_irq_enable();
+               local_irq_disable();
+               goto again;
+       }
+
+#ifdef CONFIG_PPC_BOOK3E
+       if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) {
+               /*
+                * Check to see if the dbcr0 register is set up to debug.
+                * Use the internal debug mode bit to do this.
+                */
+               mtmsr(mfmsr() & ~MSR_DE);
+               mtspr(SPRN_DBCR0, ts->debug.dbcr0);
+               mtspr(SPRN_DBSR, -1);
+       }
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       local_paca->tm_scratch = regs->msr;
+#endif
+
+       account_cpu_user_exit();
+
+       /*
+        * We do this at the end so that we do context switch with KERNEL AMR
+        */
+       kuap_user_restore(regs);
+       return ret;
+}
+
+void unrecoverable_exception(struct pt_regs *regs);
+void preempt_schedule_irq(void);
+
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
+{
+       unsigned long *ti_flagsp = &current_thread_info()->flags;
+       unsigned long flags;
+       unsigned long ret = 0;
+       unsigned long amr;
+
+       if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI)))
+               unrecoverable_exception(regs);
+       BUG_ON(regs->msr & MSR_PR);
+       BUG_ON(!FULL_REGS(regs));
+       /*
+        * CT_WARN_ON comes here via program_check_exception,
+        * so avoid recursion.
+        */
+       if (TRAP(regs) != 0x700)
+               CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+       amr = kuap_get_and_check_amr();
+
+       if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) {
+               clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp);
+               ret = 1;
+       }
+
+       local_irq_save(flags);
+
+       if (regs->softe == IRQS_ENABLED) {
+               /* Returning to a kernel context with local irqs enabled. */
+               WARN_ON_ONCE(!(regs->msr & MSR_EE));
+again:
+               if (IS_ENABLED(CONFIG_PREEMPT)) {
+                       /* Return to preemptible kernel context */
+                       if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) {
+                               if (preempt_count() == 0)
+                                       preempt_schedule_irq();
+                       }
+               }
+
+               if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags))))
+                       goto again;
+       } else {
+               /* Returning to a kernel context with local irqs disabled. */
+               __hard_EE_RI_disable();
+               if (regs->msr & MSR_EE)
+                       local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+       }
+
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       local_paca->tm_scratch = regs->msr;
+#endif
+
+       /*
+        * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr,
+        * which would cause Read-After-Write stalls. Hence, we take the AMR
+        * value from the check above.
+        */
+       kuap_kernel_restore(regs, amr);
+
+       return ret;
+}
+#endif
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
deleted file mode 100644 (file)
index d6be4f9..0000000
+++ /dev/null
@@ -1,479 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <linux/context_tracking.h>
-#include <linux/err.h>
-#include <asm/asm-prototypes.h>
-#include <asm/kup.h>
-#include <asm/cputime.h>
-#include <asm/interrupt.h>
-#include <asm/hw_irq.h>
-#include <asm/interrupt.h>
-#include <asm/kprobes.h>
-#include <asm/paca.h>
-#include <asm/ptrace.h>
-#include <asm/reg.h>
-#include <asm/signal.h>
-#include <asm/switch_to.h>
-#include <asm/syscall.h>
-#include <asm/time.h>
-#include <asm/unistd.h>
-
-typedef long (*syscall_fn)(long, long, long, long, long, long);
-
-/* Has to run notrace because it is entered not completely "reconciled" */
-notrace long system_call_exception(long r3, long r4, long r5,
-                                  long r6, long r7, long r8,
-                                  unsigned long r0, struct pt_regs *regs)
-{
-       syscall_fn f;
-
-       if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
-               BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
-
-       CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
-       user_exit_irqoff();
-
-       trace_hardirqs_off(); /* finish reconciling */
-
-       if (IS_ENABLED(CONFIG_PPC_BOOK3S))
-               BUG_ON(!(regs->msr & MSR_RI));
-       BUG_ON(!(regs->msr & MSR_PR));
-       BUG_ON(!FULL_REGS(regs));
-       BUG_ON(regs->softe != IRQS_ENABLED);
-
-#ifdef CONFIG_PPC_PKEY
-       if (mmu_has_feature(MMU_FTR_PKEY)) {
-               unsigned long amr, iamr;
-               bool flush_needed = false;
-               /*
-                * When entering from userspace we mostly have the AMR/IAMR
-                * different from kernel default values. Hence don't compare.
-                */
-               amr = mfspr(SPRN_AMR);
-               iamr = mfspr(SPRN_IAMR);
-               regs->amr  = amr;
-               regs->iamr = iamr;
-               if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
-                       mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
-                       flush_needed = true;
-               }
-               if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
-                       mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
-                       flush_needed = true;
-               }
-               if (flush_needed)
-                       isync();
-       } else
-#endif
-               kuap_check_amr();
-
-       account_cpu_user_entry();
-
-       account_stolen_time();
-
-       /*
-        * This is not required for the syscall exit path, but makes the
-        * stack frame look nicer. If this was initialised in the first stack
-        * frame, or if the unwinder was taught the first stack frame always
-        * returns to user with IRQS_ENABLED, this store could be avoided!
-        */
-       regs->softe = IRQS_ENABLED;
-
-       local_irq_enable();
-
-       if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
-               if (unlikely(regs->trap == 0x7ff0)) {
-                       /* Unsupported scv vector */
-                       _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-                       return regs->gpr[3];
-               }
-               /*
-                * We use the return value of do_syscall_trace_enter() as the
-                * syscall number. If the syscall was rejected for any reason
-                * do_syscall_trace_enter() returns an invalid syscall number
-                * and the test against NR_syscalls will fail and the return
-                * value to be used is in regs->gpr[3].
-                */
-               r0 = do_syscall_trace_enter(regs);
-               if (unlikely(r0 >= NR_syscalls))
-                       return regs->gpr[3];
-               r3 = regs->gpr[3];
-               r4 = regs->gpr[4];
-               r5 = regs->gpr[5];
-               r6 = regs->gpr[6];
-               r7 = regs->gpr[7];
-               r8 = regs->gpr[8];
-
-       } else if (unlikely(r0 >= NR_syscalls)) {
-               if (unlikely(regs->trap == 0x7ff0)) {
-                       /* Unsupported scv vector */
-                       _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-                       return regs->gpr[3];
-               }
-               return -ENOSYS;
-       }
-
-       /* May be faster to do array_index_nospec? */
-       barrier_nospec();
-
-       if (unlikely(is_32bit_task())) {
-               f = (void *)compat_sys_call_table[r0];
-
-               r3 &= 0x00000000ffffffffULL;
-               r4 &= 0x00000000ffffffffULL;
-               r5 &= 0x00000000ffffffffULL;
-               r6 &= 0x00000000ffffffffULL;
-               r7 &= 0x00000000ffffffffULL;
-               r8 &= 0x00000000ffffffffULL;
-
-       } else {
-               f = (void *)sys_call_table[r0];
-       }
-
-       return f(r3, r4, r5, r6, r7, r8);
-}
-
-/*
- * local irqs must be disabled. Returns false if the caller must re-enable
- * them, check for new work, and try again.
- *
- * This should be called with local irqs disabled, but if they were previously
- * enabled when the interrupt handler returns (indicating a process-context /
- * synchronous interrupt) then irqs_enabled should be true.
- */
-static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri)
-{
-       /* This must be done with RI=1 because tracing may touch vmaps */
-       trace_hardirqs_on();
-
-       /* This pattern matches prep_irq_for_idle */
-       if (clear_ri)
-               __hard_EE_RI_disable();
-       else
-               __hard_irq_disable();
-       if (unlikely(lazy_irq_pending_nocheck())) {
-               /* Took an interrupt, may have more exit work to do. */
-               if (clear_ri)
-                       __hard_RI_enable();
-               trace_hardirqs_off();
-               local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-
-               return false;
-       }
-       local_paca->irq_happened = 0;
-       irq_soft_mask_set(IRQS_ENABLED);
-
-       return true;
-}
-
-static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled)
-{
-       if (__prep_irq_for_enabled_exit(clear_ri))
-               return true;
-
-       /*
-        * Must replay pending soft-masked interrupts now. Don't just
-        * local_irq_enabe(); local_irq_disable(); because if we are
-        * returning from an asynchronous interrupt here, another one
-        * might hit after irqs are enabled, and it would exit via this
-        * same path allowing another to fire, and so on unbounded.
-        *
-        * If interrupts were enabled when this interrupt exited,
-        * indicating a process context (synchronous) interrupt,
-        * local_irq_enable/disable can be used, which will enable
-        * interrupts rather than keeping them masked (unclear how
-        * much benefit this is over just replaying for all cases,
-        * because we immediately disable again, so all we're really
-        * doing is allowing hard interrupts to execute directly for
-        * a very small time, rather than being masked and replayed).
-        */
-       if (irqs_enabled) {
-               local_irq_enable();
-               local_irq_disable();
-       } else {
-               replay_soft_interrupts();
-       }
-
-       return false;
-}
-
-/*
- * This should be called after a syscall returns, with r3 the return value
- * from the syscall. If this function returns non-zero, the system call
- * exit assembly should additionally load all GPR registers and CTR and XER
- * from the interrupt frame.
- *
- * The function graph tracer can not trace the return side of this function,
- * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
- */
-notrace unsigned long syscall_exit_prepare(unsigned long r3,
-                                          struct pt_regs *regs,
-                                          long scv)
-{
-       unsigned long *ti_flagsp = &current_thread_info()->flags;
-       unsigned long ti_flags;
-       unsigned long ret = 0;
-
-       CT_WARN_ON(ct_state() == CONTEXT_USER);
-
-       kuap_check_amr();
-
-       regs->result = r3;
-
-       /* Check whether the syscall is issued inside a restartable sequence */
-       rseq_syscall(regs);
-
-       ti_flags = *ti_flagsp;
-
-       if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) {
-               if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
-                       r3 = -r3;
-                       regs->ccr |= 0x10000000; /* Set SO bit in CR */
-               }
-       }
-
-       if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
-               if (ti_flags & _TIF_RESTOREALL)
-                       ret = _TIF_RESTOREALL;
-               else
-                       regs->gpr[3] = r3;
-               clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
-       } else {
-               regs->gpr[3] = r3;
-       }
-
-       if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
-               do_syscall_trace_leave(regs);
-               ret |= _TIF_RESTOREALL;
-       }
-
-       local_irq_disable();
-
-again:
-       ti_flags = READ_ONCE(*ti_flagsp);
-       while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
-               local_irq_enable();
-               if (ti_flags & _TIF_NEED_RESCHED) {
-                       schedule();
-               } else {
-                       /*
-                        * SIGPENDING must restore signal handler function
-                        * argument GPRs, and some non-volatiles (e.g., r1).
-                        * Restore all for now. This could be made lighter.
-                        */
-                       if (ti_flags & _TIF_SIGPENDING)
-                               ret |= _TIF_RESTOREALL;
-                       do_notify_resume(regs, ti_flags);
-               }
-               local_irq_disable();
-               ti_flags = READ_ONCE(*ti_flagsp);
-       }
-
-       if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
-               if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
-                               unlikely((ti_flags & _TIF_RESTORE_TM))) {
-                       restore_tm_state(regs);
-               } else {
-                       unsigned long mathflags = MSR_FP;
-
-                       if (cpu_has_feature(CPU_FTR_VSX))
-                               mathflags |= MSR_VEC | MSR_VSX;
-                       else if (cpu_has_feature(CPU_FTR_ALTIVEC))
-                               mathflags |= MSR_VEC;
-
-                       /*
-                        * If userspace MSR has all available FP bits set,
-                        * then they are live and no need to restore. If not,
-                        * it means the regs were given up and restore_math
-                        * may decide to restore them (to avoid taking an FP
-                        * fault).
-                        */
-                       if ((regs->msr & mathflags) != mathflags)
-                               restore_math(regs);
-               }
-       }
-
-       user_enter_irqoff();
-
-       /* scv need not set RI=0 because SRRs are not used */
-       if (unlikely(!__prep_irq_for_enabled_exit(!scv))) {
-               user_exit_irqoff();
-               local_irq_enable();
-               local_irq_disable();
-               goto again;
-       }
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-       local_paca->tm_scratch = regs->msr;
-#endif
-
-       account_cpu_user_exit();
-
-#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
-       /*
-        * We do this at the end so that we do context switch with KERNEL AMR
-        */
-       kuap_user_restore(regs);
-#endif
-       return ret;
-}
-
-#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
-notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
-{
-#ifdef CONFIG_PPC_BOOK3E
-       struct thread_struct *ts = &current->thread;
-#endif
-       unsigned long *ti_flagsp = &current_thread_info()->flags;
-       unsigned long ti_flags;
-       unsigned long flags;
-       unsigned long ret = 0;
-
-       if (IS_ENABLED(CONFIG_PPC_BOOK3S))
-               BUG_ON(!(regs->msr & MSR_RI));
-       BUG_ON(!(regs->msr & MSR_PR));
-       BUG_ON(!FULL_REGS(regs));
-       BUG_ON(regs->softe != IRQS_ENABLED);
-       CT_WARN_ON(ct_state() == CONTEXT_USER);
-
-       /*
-        * We don't need to restore AMR on the way back to userspace for KUAP.
-        * AMR can only have been unlocked if we interrupted the kernel.
-        */
-       kuap_check_amr();
-
-       local_irq_save(flags);
-
-again:
-       ti_flags = READ_ONCE(*ti_flagsp);
-       while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
-               local_irq_enable(); /* returning to user: may enable */
-               if (ti_flags & _TIF_NEED_RESCHED) {
-                       schedule();
-               } else {
-                       if (ti_flags & _TIF_SIGPENDING)
-                               ret |= _TIF_RESTOREALL;
-                       do_notify_resume(regs, ti_flags);
-               }
-               local_irq_disable();
-               ti_flags = READ_ONCE(*ti_flagsp);
-       }
-
-       if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
-               if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
-                               unlikely((ti_flags & _TIF_RESTORE_TM))) {
-                       restore_tm_state(regs);
-               } else {
-                       unsigned long mathflags = MSR_FP;
-
-                       if (cpu_has_feature(CPU_FTR_VSX))
-                               mathflags |= MSR_VEC | MSR_VSX;
-                       else if (cpu_has_feature(CPU_FTR_ALTIVEC))
-                               mathflags |= MSR_VEC;
-
-                       /* See above restore_math comment */
-                       if ((regs->msr & mathflags) != mathflags)
-                               restore_math(regs);
-               }
-       }
-
-       user_enter_irqoff();
-
-       if (unlikely(!__prep_irq_for_enabled_exit(true))) {
-               user_exit_irqoff();
-               local_irq_enable();
-               local_irq_disable();
-               goto again;
-       }
-
-#ifdef CONFIG_PPC_BOOK3E
-       if (unlikely(ts->debug.dbcr0 & DBCR0_IDM)) {
-               /*
-                * Check to see if the dbcr0 register is set up to debug.
-                * Use the internal debug mode bit to do this.
-                */
-               mtmsr(mfmsr() & ~MSR_DE);
-               mtspr(SPRN_DBCR0, ts->debug.dbcr0);
-               mtspr(SPRN_DBSR, -1);
-       }
-#endif
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-       local_paca->tm_scratch = regs->msr;
-#endif
-
-       account_cpu_user_exit();
-
-       /*
-        * We do this at the end so that we do context switch with KERNEL AMR
-        */
-       kuap_user_restore(regs);
-       return ret;
-}
-
-void unrecoverable_exception(struct pt_regs *regs);
-void preempt_schedule_irq(void);
-
-notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
-{
-       unsigned long *ti_flagsp = &current_thread_info()->flags;
-       unsigned long flags;
-       unsigned long ret = 0;
-       unsigned long amr;
-
-       if (IS_ENABLED(CONFIG_PPC_BOOK3S) && unlikely(!(regs->msr & MSR_RI)))
-               unrecoverable_exception(regs);
-       BUG_ON(regs->msr & MSR_PR);
-       BUG_ON(!FULL_REGS(regs));
-       /*
-        * CT_WARN_ON comes here via program_check_exception,
-        * so avoid recursion.
-        */
-       if (TRAP(regs) != 0x700)
-               CT_WARN_ON(ct_state() == CONTEXT_USER);
-
-       amr = kuap_get_and_check_amr();
-
-       if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) {
-               clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp);
-               ret = 1;
-       }
-
-       local_irq_save(flags);
-
-       if (regs->softe == IRQS_ENABLED) {
-               /* Returning to a kernel context with local irqs enabled. */
-               WARN_ON_ONCE(!(regs->msr & MSR_EE));
-again:
-               if (IS_ENABLED(CONFIG_PREEMPT)) {
-                       /* Return to preemptible kernel context */
-                       if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) {
-                               if (preempt_count() == 0)
-                                       preempt_schedule_irq();
-                       }
-               }
-
-               if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags))))
-                       goto again;
-       } else {
-               /* Returning to a kernel context with local irqs disabled. */
-               __hard_EE_RI_disable();
-               if (regs->msr & MSR_EE)
-                       local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
-       }
-
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-       local_paca->tm_scratch = regs->msr;
-#endif
-
-       /*
-        * Don't want to mfspr(SPRN_AMR) here, because this comes after mtmsr,
-        * which would cause Read-After-Write stalls. Hence, we take the AMR
-        * value from the check above.
-        */
-       kuap_kernel_restore(regs, amr);
-
-       return ret;
-}
-#endif