]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
xtensa: support coprocessors on SMP
authorMax Filippov <jcmvbkbc@gmail.com>
Fri, 15 Apr 2022 10:05:31 +0000 (03:05 -0700)
committerMax Filippov <jcmvbkbc@gmail.com>
Mon, 2 May 2022 02:51:23 +0000 (19:51 -0700)
Current coprocessor support on xtensa only works correctly on
uniprocessor configurations. Make it work on SMP too and keep it lazy.

Make coprocessor_owner array per-CPU and move it to struct exc_table for
easy access from the fast_coprocessor exception handler. Allow task to
have live coprocessors only on single CPU, record this CPU number in the
struct thread_info::cp_owner_cpu. Change struct thread_info::cpenable
meaning to be 'coprocessors live on cp_owner_cpu'.
Introduce C-level coprocessor exception handler that flushes and
releases live coprocessors of the task taking 'coprocessor disabled'
exception and call it from the fast_coprocessor handler when the task
has live coprocessors on other CPU.
Make coprocessor_flush_all and coprocessor_release_all work correctly
when called from any CPU by sending IPI to the cp_owner_cpu. Add
function coprocessor_flush_release_all to do flush followed by release
atomically. Add function local_coprocessors_flush_release_all to flush
and release all coprocessors on the local CPU and use it to flush
coprocessor contexts from the CPU that goes offline.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
arch/xtensa/include/asm/coprocessor.h
arch/xtensa/include/asm/thread_info.h
arch/xtensa/include/asm/traps.h
arch/xtensa/kernel/asm-offsets.c
arch/xtensa/kernel/coprocessor.S
arch/xtensa/kernel/entry.S
arch/xtensa/kernel/process.c
arch/xtensa/kernel/ptrace.c
arch/xtensa/kernel/signal.c
arch/xtensa/kernel/smp.c
arch/xtensa/kernel/traps.c

index a360efced7e7df6e0a161ff5b1f39972b714c6c5..3b1a0d5d2169dccc8c22b4baa0958f1fb85ca6db 100644 (file)
@@ -142,10 +142,12 @@ typedef struct { XCHAL_CP6_SA_LIST(2) } xtregs_cp6_t
 typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
        __attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));
 
-extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
+struct thread_info;
 void coprocessor_flush(struct thread_info *ti, int cp_index);
 void coprocessor_release_all(struct thread_info *ti);
 void coprocessor_flush_all(struct thread_info *ti);
+void coprocessor_flush_release_all(struct thread_info *ti);
+void local_coprocessors_flush_release_all(void);
 
 #endif /* XTENSA_HAVE_COPROCESSORS */
 
index f6fcbba1d02fcd3e725d169fa1fc8391cab7f67c..52974317a6b6f5d6423aca9fb25d80a7873158c5 100644 (file)
@@ -52,12 +52,17 @@ struct thread_info {
        __u32                   cpu;            /* current CPU */
        __s32                   preempt_count;  /* 0 => preemptable,< 0 => BUG*/
 
-       unsigned long           cpenable;
 #if XCHAL_HAVE_EXCLUSIVE
        /* result of the most recent exclusive store */
        unsigned long           atomctl8;
 #endif
 
+       /*
+        * If i-th bit is set then coprocessor state is loaded into the
+        * coprocessor i on CPU cp_owner_cpu.
+        */
+       unsigned long           cpenable;
+       u32                     cp_owner_cpu;
        /* Allocate storage for extra user states and coprocessor states. */
 #if XTENSA_HAVE_COPROCESSORS
        xtregs_coprocessor_t    xtregs_cp;
index 514376eff58cfbaa2ebc7add7a680bb521a9fa50..6f74ccc0c7eadc6a184132f643ea8f380ae29742 100644 (file)
@@ -27,6 +27,10 @@ struct exc_table {
        void *fixup;
        /* For passing a parameter to fixup */
        void *fixup_param;
+#if XTENSA_HAVE_COPROCESSORS
+       /* Pointers to owner struct thread_info */
+       struct thread_info *coprocessor_owner[XCHAL_CP_MAX];
+#endif
        /* Fast user exception handlers */
        void *fast_user_handler[EXCCAUSE_N];
        /* Fast kernel exception handlers */
@@ -35,6 +39,8 @@ struct exc_table {
        xtensa_exception_handler *default_handler[EXCCAUSE_N];
 };
 
+DECLARE_PER_CPU(struct exc_table, exc_table);
+
 xtensa_exception_handler *
 __init trap_set_handler(int cause, xtensa_exception_handler *handler);
 
index 37278e2785fb0e0344e22fb0ecfdc6fbe6700f1d..e3b9cf4c22899ea31f93682298574a74912ec5f7 100644 (file)
@@ -91,10 +91,12 @@ int main(void)
        /* struct thread_info (offset from start_struct) */
        DEFINE(THREAD_RA, offsetof (struct task_struct, thread.ra));
        DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
-       DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
 #if XCHAL_HAVE_EXCLUSIVE
        DEFINE(THREAD_ATOMCTL8, offsetof (struct thread_info, atomctl8));
 #endif
+       DEFINE(THREAD_CPENABLE, offsetof(struct thread_info, cpenable));
+       DEFINE(THREAD_CPU, offsetof(struct thread_info, cpu));
+       DEFINE(THREAD_CP_OWNER_CPU, offsetof(struct thread_info, cp_owner_cpu));
 #if XTENSA_HAVE_COPROCESSORS
        DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
        DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
@@ -137,6 +139,10 @@ int main(void)
        DEFINE(EXC_TABLE_DOUBLE_SAVE, offsetof(struct exc_table, double_save));
        DEFINE(EXC_TABLE_FIXUP, offsetof(struct exc_table, fixup));
        DEFINE(EXC_TABLE_PARAM, offsetof(struct exc_table, fixup_param));
+#if XTENSA_HAVE_COPROCESSORS
+       DEFINE(EXC_TABLE_COPROCESSOR_OWNER,
+              offsetof(struct exc_table, coprocessor_owner));
+#endif
        DEFINE(EXC_TABLE_FAST_USER,
               offsetof(struct exc_table, fast_user_handler));
        DEFINE(EXC_TABLE_FAST_KERNEL,
index 95412409c49eb88ba3eda38d597ee26d7aa800a1..ef33e76e07d835bc527b24f33fabfd5fe4abb03c 100644 (file)
 #include <asm/current.h>
 #include <asm/regs.h>
 
+/*
+ * Rules for coprocessor state manipulation on SMP:
+ *
+ * - a task may have live coprocessors only on one CPU.
+ *
+ * - whether coprocessor context of task T is live on some CPU is
+ *   denoted by T's thread_info->cpenable.
+ *
+ * - non-zero thread_info->cpenable means that thread_info->cp_owner_cpu
+ *   is valid in the T's thread_info. Zero thread_info->cpenable means that
+ *   coprocessor context is valid in the T's thread_info.
+ *
+ * - if a coprocessor context of task T is live on CPU X, only CPU X changes
+ *   T's thread_info->cpenable, cp_owner_cpu and coprocessor save area.
+ *   This is done by making sure that for the task T with live coprocessor
+ *   on CPU X cpenable SR is 0 when T runs on any other CPU Y.
+ *   When fast_coprocessor exception is taken on CPU Y it goes to the
+ *   C-level do_coprocessor that uses IPI to make CPU X flush T's coprocessors.
+ */
+
 #if XTENSA_HAVE_COPROCESSORS
 
 /*
 
 ENTRY(fast_coprocessor)
 
+       s32i    a3, a2, PT_AREG3
+
+#ifdef CONFIG_SMP
+       /*
+        * Check if any coprocessor context is live on another CPU
+        * and if so go through the C-level coprocessor exception handler
+        * to flush it to memory.
+        */
+       GET_THREAD_INFO (a0, a2)
+       l32i    a3, a0, THREAD_CPENABLE
+       beqz    a3, .Lload_local
+
+       /*
+        * Pairs with smp_wmb in local_coprocessor_release_all
+        * and with both memws below.
+        */
+       memw
+       l32i    a3, a0, THREAD_CPU
+       l32i    a0, a0, THREAD_CP_OWNER_CPU
+       beq     a0, a3, .Lload_local
+
+       rsr     a0, ps
+       l32i    a3, a2, PT_AREG3
+       bbci.l  a0, PS_UM_BIT, 1f
+       call0   user_exception
+1:     call0   kernel_exception
+#endif
+
        /* Save remaining registers a1-a3 and SAR */
 
-       s32i    a3, a2, PT_AREG3
+.Lload_local:
        rsr     a3, sar
        s32i    a1, a2, PT_AREG1
        s32i    a3, a2, PT_SAR
@@ -117,6 +165,9 @@ ENTRY(fast_coprocessor)
        s32i    a5, a1, PT_AREG5
        s32i    a6, a1, PT_AREG6
        s32i    a7, a1, PT_AREG7
+       s32i    a8, a1, PT_AREG8
+       s32i    a9, a1, PT_AREG9
+       s32i    a10, a1, PT_AREG10
 
        /* Find coprocessor number. Subtract first CP EXCCAUSE from EXCCAUSE */
 
@@ -139,51 +190,66 @@ ENTRY(fast_coprocessor)
        addx8   a7, a3, a7
        addx4   a7, a3, a7
 
-       /* Retrieve previous owner. (a3 still holds CP number) */
+       /* Retrieve previous owner (a8). */
 
-       movi    a0, coprocessor_owner   # list of owners
+       rsr     a0, excsave1            # exc_table
        addx4   a0, a3, a0              # entry for CP
-       l32i    a4, a0, 0
+       l32i    a8, a0, EXC_TABLE_COPROCESSOR_OWNER
 
-       beqz    a4, 1f                  # skip 'save' if no previous owner
+       /* Set new owner (a9). */
 
-       /* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
+       GET_THREAD_INFO (a9, a1)
+       l32i    a4, a9, THREAD_CPU
+       s32i    a9, a0, EXC_TABLE_COPROCESSOR_OWNER
+       s32i    a4, a9, THREAD_CP_OWNER_CPU
 
-       l32i    a5, a4, THREAD_CPENABLE
-       xor     a5, a5, a2              # (1 << cp-id) still in a2
-       s32i    a5, a4, THREAD_CPENABLE
+       /*
+        * Enable coprocessor for the new owner. (a2 = 1 << CP number)
+        * This can be done before loading context into the coprocessor.
+        */
+       l32i    a4, a9, THREAD_CPENABLE
+       or      a4, a4, a2
 
        /*
-        * Get context save area and call save routine.
-        * (a4 still holds previous owner (thread_info), a3 CP number)
+        * Make sure THREAD_CP_OWNER_CPU is in memory before updating
+        * THREAD_CPENABLE
         */
+       memw                            # (2)
+       s32i    a4, a9, THREAD_CPENABLE
 
-       l32i    a2, a7, CP_REGS_TAB_OFFSET
-       l32i    a3, a7, CP_REGS_TAB_SAVE
-       add     a2, a2, a4
-       callx0  a3
+       beqz    a8, 1f                  # skip 'save' if no previous owner
 
-       /* Note that only a0 and a1 were preserved. */
+       /* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
 
-       rsr     a3, exccause
-       addi    a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
-       movi    a0, coprocessor_owner
-       addx4   a0, a3, a0
+       l32i    a10, a8, THREAD_CPENABLE
+       xor     a10, a10, a2
 
-       /* Set new 'owner' (a0 points to the CP owner, a3 contains the CP nr) */
+       /* Get context save area and call save routine. */
 
-1:     GET_THREAD_INFO (a4, a1)
-       s32i    a4, a0, 0
+       l32i    a2, a7, CP_REGS_TAB_OFFSET
+       l32i    a3, a7, CP_REGS_TAB_SAVE
+       add     a2, a2, a8
+       callx0  a3
 
+       /*
+        * Make sure coprocessor context and THREAD_CP_OWNER_CPU are in memory
+        * before updating THREAD_CPENABLE
+        */
+       memw                            # (3)
+       s32i    a10, a8, THREAD_CPENABLE
+1:
        /* Get context save area and call load routine. */
 
        l32i    a2, a7, CP_REGS_TAB_OFFSET
        l32i    a3, a7, CP_REGS_TAB_LOAD
-       add     a2, a2, a4
+       add     a2, a2, a9
        callx0  a3
 
        /* Restore all registers and return from exception handler. */
 
+       l32i    a10, a1, PT_AREG10
+       l32i    a9, a1, PT_AREG9
+       l32i    a8, a1, PT_AREG8
        l32i    a7, a1, PT_AREG7
        l32i    a6, a1, PT_AREG6
        l32i    a5, a1, PT_AREG5
@@ -233,12 +299,4 @@ ENTRY(coprocessor_flush)
 
 ENDPROC(coprocessor_flush)
 
-       .data
-
-ENTRY(coprocessor_owner)
-
-       .fill XCHAL_CP_MAX, 4, 0
-
-END(coprocessor_owner)
-
 #endif /* XTENSA_HAVE_COPROCESSORS */
index f2c789a5a92a7fe8fe8e964d2b98d0d891e17cdd..3255d4f6184415300e441780c4da4ff18a376e20 100644 (file)
@@ -2071,8 +2071,16 @@ ENTRY(_switch_to)
 
 #if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
        l32i    a3, a5, THREAD_CPENABLE
-       xsr     a3, cpenable
-       s32i    a3, a4, THREAD_CPENABLE
+#ifdef CONFIG_SMP
+       beqz    a3, 1f
+       memw                    # pairs with memw (2) in fast_coprocessor
+       l32i    a6, a5, THREAD_CP_OWNER_CPU
+       l32i    a7, a5, THREAD_CPU
+       beq     a6, a7, 1f      # load 0 into CPENABLE if current CPU is not the owner
+       movi    a3, 0
+1:
+#endif
+       wsr     a3, cpenable
 #endif
 
 #if XCHAL_HAVE_EXCLUSIVE
index e8bfbca5f0014faa0910728aa39707d14fb201af..7e38292dd07abff7d016c7249715c73f827e15c0 100644 (file)
@@ -47,6 +47,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/regs.h>
 #include <asm/hw_breakpoint.h>
+#include <asm/traps.h>
 
 extern void ret_from_fork(void);
 extern void ret_from_kernel_thread(void);
@@ -63,52 +64,114 @@ EXPORT_SYMBOL(__stack_chk_guard);
 
 #if XTENSA_HAVE_COPROCESSORS
 
-void coprocessor_release_all(struct thread_info *ti)
+void local_coprocessors_flush_release_all(void)
 {
-       unsigned long cpenable;
-       int i;
+       struct thread_info **coprocessor_owner;
+       struct thread_info *unique_owner[XCHAL_CP_MAX];
+       int n = 0;
+       int i, j;
 
-       /* Make sure we don't switch tasks during this operation. */
+       coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+       xtensa_set_sr(XCHAL_CP_MASK, cpenable);
 
-       preempt_disable();
+       for (i = 0; i < XCHAL_CP_MAX; i++) {
+               struct thread_info *ti = coprocessor_owner[i];
 
-       /* Walk through all cp owners and release it for the requested one. */
+               if (ti) {
+                       coprocessor_flush(ti, i);
 
-       cpenable = ti->cpenable;
+                       for (j = 0; j < n; j++)
+                               if (unique_owner[j] == ti)
+                                       break;
+                       if (j == n)
+                               unique_owner[n++] = ti;
 
-       for (i = 0; i < XCHAL_CP_MAX; i++) {
-               if (coprocessor_owner[i] == ti) {
-                       coprocessor_owner[i] = 0;
-                       cpenable &= ~(1 << i);
+                       coprocessor_owner[i] = NULL;
                }
        }
+       for (i = 0; i < n; i++) {
+               /* pairs with memw (1) in fast_coprocessor and memw in switch_to */
+               smp_wmb();
+               unique_owner[i]->cpenable = 0;
+       }
+       xtensa_set_sr(0, cpenable);
+}
 
-       ti->cpenable = cpenable;
+static void local_coprocessor_release_all(void *info)
+{
+       struct thread_info *ti = info;
+       struct thread_info **coprocessor_owner;
+       int i;
+
+       coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+
+       /* Walk through all cp owners and release it for the requested one. */
+
+       for (i = 0; i < XCHAL_CP_MAX; i++) {
+               if (coprocessor_owner[i] == ti)
+                       coprocessor_owner[i] = NULL;
+       }
+       /* pairs with memw (1) in fast_coprocessor and memw in switch_to */
+       smp_wmb();
+       ti->cpenable = 0;
        if (ti == current_thread_info())
                xtensa_set_sr(0, cpenable);
+}
 
-       preempt_enable();
+void coprocessor_release_all(struct thread_info *ti)
+{
+       if (ti->cpenable) {
+               /* pairs with memw (2) in fast_coprocessor */
+               smp_rmb();
+               smp_call_function_single(ti->cp_owner_cpu,
+                                        local_coprocessor_release_all,
+                                        ti, true);
+       }
 }
 
-void coprocessor_flush_all(struct thread_info *ti)
+static void local_coprocessor_flush_all(void *info)
 {
-       unsigned long cpenable, old_cpenable;
+       struct thread_info *ti = info;
+       struct thread_info **coprocessor_owner;
+       unsigned long old_cpenable;
        int i;
 
-       preempt_disable();
-
-       old_cpenable = xtensa_get_sr(cpenable);
-       cpenable = ti->cpenable;
-       xtensa_set_sr(cpenable, cpenable);
+       coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
+       old_cpenable = xtensa_xsr(ti->cpenable, cpenable);
 
        for (i = 0; i < XCHAL_CP_MAX; i++) {
-               if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
+               if (coprocessor_owner[i] == ti)
                        coprocessor_flush(ti, i);
-               cpenable >>= 1;
        }
        xtensa_set_sr(old_cpenable, cpenable);
+}
 
-       preempt_enable();
+void coprocessor_flush_all(struct thread_info *ti)
+{
+       if (ti->cpenable) {
+               /* pairs with memw (2) in fast_coprocessor */
+               smp_rmb();
+               smp_call_function_single(ti->cp_owner_cpu,
+                                        local_coprocessor_flush_all,
+                                        ti, true);
+       }
+}
+
+static void local_coprocessor_flush_release_all(void *info)
+{
+       local_coprocessor_flush_all(info);
+       local_coprocessor_release_all(info);
+}
+
+void coprocessor_flush_release_all(struct thread_info *ti)
+{
+       if (ti->cpenable) {
+               /* pairs with memw (2) in fast_coprocessor */
+               smp_rmb();
+               smp_call_function_single(ti->cp_owner_cpu,
+                                        local_coprocessor_flush_release_all,
+                                        ti, true);
+       }
 }
 
 #endif
@@ -140,8 +203,7 @@ void flush_thread(void)
 {
 #if XTENSA_HAVE_COPROCESSORS
        struct thread_info *ti = current_thread_info();
-       coprocessor_flush_all(ti);
-       coprocessor_release_all(ti);
+       coprocessor_flush_release_all(ti);
 #endif
        flush_ptrace_hw_breakpoint(current);
 }
index 323c678a691ff6dc5dfd97bb340daadd0856b6c2..22cdaa6729d3e281815efc3e76c6b93ac671131c 100644 (file)
@@ -171,8 +171,7 @@ static int tie_set(struct task_struct *target,
 
 #if XTENSA_HAVE_COPROCESSORS
        /* Flush all coprocessors before we overwrite them. */
-       coprocessor_flush_all(ti);
-       coprocessor_release_all(ti);
+       coprocessor_flush_release_all(ti);
        ti->xtregs_cp.cp0 = newregs->cp0;
        ti->xtregs_cp.cp1 = newregs->cp1;
        ti->xtregs_cp.cp2 = newregs->cp2;
index 6f68649e86ba5af13bc130a03ba5c79e1b7b561e..c9ffd42db873638bb634619c66dc9711eeaebb23 100644 (file)
@@ -162,8 +162,7 @@ setup_sigcontext(struct rt_sigframe __user *frame, struct pt_regs *regs)
                return err;
 
 #if XTENSA_HAVE_COPROCESSORS
-       coprocessor_flush_all(ti);
-       coprocessor_release_all(ti);
+       coprocessor_flush_release_all(ti);
        err |= __copy_to_user(&frame->xtregs.cp, &ti->xtregs_cp,
                              sizeof (frame->xtregs.cp));
 #endif
index 1254da07ead1f42712918b90c54b45ebf83cb5bb..4dc109dd6214e27c47241ac1fcc081b3923dcfbd 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/thread_info.h>
 
 #include <asm/cacheflush.h>
+#include <asm/coprocessor.h>
 #include <asm/kdebug.h>
 #include <asm/mmu_context.h>
 #include <asm/mxregs.h>
@@ -272,6 +273,12 @@ int __cpu_disable(void)
         */
        set_cpu_online(cpu, false);
 
+#if XTENSA_HAVE_COPROCESSORS
+       /*
+        * Flush coprocessor contexts that are active on the current CPU.
+        */
+       local_coprocessors_flush_release_all();
+#endif
        /*
         * OK - migrate IRQs away from this CPU
         */
index 62c497605128459b35016d403ec63febefb65fce..138a86fbe9d70e9d99e2cc6b204f591feeab88b6 100644 (file)
@@ -57,6 +57,9 @@ static void do_nmi(struct pt_regs *regs);
 static void do_unaligned_user(struct pt_regs *regs);
 #endif
 static void do_multihit(struct pt_regs *regs);
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs);
+#endif
 static void do_debug(struct pt_regs *regs);
 
 /*
@@ -69,7 +72,8 @@ static void do_debug(struct pt_regs *regs);
 #define USER           0x02
 
 #define COPROCESSOR(x)                                                 \
-{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER|KRNL, fast_coprocessor }
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER|KRNL, fast_coprocessor },\
+{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, 0, do_coprocessor }
 
 typedef struct {
        int cause;
@@ -327,6 +331,13 @@ static void do_unaligned_user(struct pt_regs *regs)
 }
 #endif
 
+#if XTENSA_HAVE_COPROCESSORS
+static void do_coprocessor(struct pt_regs *regs)
+{
+       coprocessor_flush_release_all(current_thread_info());
+}
+#endif
+
 /* Handle debug events.
  * When CONFIG_HAVE_HW_BREAKPOINT is on this handler is called with
  * preemption disabled to avoid rescheduling and keep mapping of hardware