arch/metag/kernel/smp.c

   1 /*
   2  *  Copyright (C) 2009,2010,2011 Imagination Technologies Ltd.
   3  *
   4  *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 as
   8  * published by the Free Software Foundation.
   9  */
  10 #include <linux/atomic.h>
  11 #include <linux/completion.h>
  12 #include <linux/delay.h>
  13 #include <linux/init.h>
  14 #include <linux/spinlock.h>
  15 #include <linux/sched/mm.h>
  16 #include <linux/sched/hotplug.h>
  17 #include <linux/sched/task_stack.h>
  18 #include <linux/interrupt.h>
  19 #include <linux/cache.h>
  20 #include <linux/profile.h>
  21 #include <linux/errno.h>
  22 #include <linux/mm.h>
  23 #include <linux/err.h>
  24 #include <linux/cpu.h>
  25 #include <linux/smp.h>
  26 #include <linux/seq_file.h>
  27 #include <linux/irq.h>
  28 #include <linux/bootmem.h>
  29
  30 #include <asm/cacheflush.h>
  31 #include <asm/cachepart.h>
  32 #include <asm/core_reg.h>
  33 #include <asm/cpu.h>
  34 #include <asm/global_lock.h>
  35 #include <asm/metag_mem.h>
  36 #include <asm/mmu_context.h>
  37 #include <asm/pgtable.h>
  38 #include <asm/pgalloc.h>
  39 #include <asm/processor.h>
  40 #include <asm/setup.h>
  41 #include <asm/tlbflush.h>
  42 #include <asm/hwthread.h>
  43 #include <asm/traps.h>
  44
  45 #define SYSC_DCPART(n)  (SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
  46 #define SYSC_ICPART(n)  (SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
  47
  48 DECLARE_PER_CPU(PTBI, pTBI);
  49
  50 void *secondary_data_stack;
  51
  52 /*
  53  * structures for inter-processor calls
  54  * - A collection of single bit ipi messages.
  55  */
  56 struct ipi_data {
  57         spinlock_t lock;
  58         unsigned long ipi_count;
  59         unsigned long bits;
  60 };
  61
  62 static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
  63         .lock   = __SPIN_LOCK_UNLOCKED(ipi_data.lock),
  64 };
  65
  66 static DEFINE_SPINLOCK(boot_lock);
  67
  68 static DECLARE_COMPLETION(cpu_running);
  69
  70 /*
  71  * "thread" is assumed to be a valid Meta hardware thread ID.
  72  */
  73 static int boot_secondary(unsigned int thread, struct task_struct *idle)
  74 {
  75         u32 val;
  76
  77         /*
  78          * set synchronisation state between this boot processor
  79          * and the secondary one
  80          */
  81         spin_lock(&boot_lock);
  82
  83         core_reg_write(TXUPC_ID, 0, thread, (unsigned int)secondary_startup);
  84         core_reg_write(TXUPC_ID, 1, thread, 0);
  85
  86         /*
  87          * Give the thread privilege (PSTAT) and clear potentially problematic
  88          * bits in the process (namely ISTAT, CBMarker, CBMarkerI, LSM_STEP).
  89          */
  90         core_reg_write(TXUCT_ID, TXSTATUS_REGNUM, thread, TXSTATUS_PSTAT_BIT);
  91
  92         /* Clear the minim enable bit. */
  93         val = core_reg_read(TXUCT_ID, TXPRIVEXT_REGNUM, thread);
  94         core_reg_write(TXUCT_ID, TXPRIVEXT_REGNUM, thread, val & ~0x80);
  95
  96         /*
  97          * set the ThreadEnable bit (0x1) in the TXENABLE register
  98          * for the specified thread - off it goes!
  99          */
 100         val = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, thread);
 101         core_reg_write(TXUCT_ID, TXENABLE_REGNUM, thread, val | 0x1);
 102
 103         /*
 104          * now the secondary core is starting up let it run its
 105          * calibrations, then wait for it to finish
 106          */
 107         spin_unlock(&boot_lock);
 108
 109         return 0;
 110 }
 111
 112 /**
 113  * describe_cachepart_change: describe a change to cache partitions.
 114  * @thread:     Hardware thread number.
 115  * @label:      Label of cache type, e.g. "dcache" or "icache".
 116  * @sz:         Total size of the cache.
 117  * @old:        Old cache partition configuration (*CPART* register).
 118  * @new:        New cache partition configuration (*CPART* register).
 119  *
 120  * If the cache partition has changed, prints a message to the log describing
 121  * those changes.
 122  */
 123 static void describe_cachepart_change(unsigned int thread, const char *label,
 124                                       unsigned int sz, unsigned int old,
 125                                       unsigned int new)
 126 {
 127         unsigned int lor1, land1, gor1, gand1;
 128         unsigned int lor2, land2, gor2, gand2;
 129         unsigned int diff = old ^ new;
 130
 131         if (!diff)
 132                 return;
 133
 134         pr_info("Thread %d: %s partition changed:", thread, label);
 135         if (diff & (SYSC_xCPARTL_OR_BITS | SYSC_xCPARTL_AND_BITS)) {
 136                 lor1   = (old & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
 137                 lor2   = (new & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
 138                 land1  = (old & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
 139                 land2  = (new & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
 140                 pr_cont(" L:%#x+%#x->%#x+%#x",
 141                         (lor1 * sz) >> 4,
 142                         ((land1 + 1) * sz) >> 4,
 143                         (lor2 * sz) >> 4,
 144                         ((land2 + 1) * sz) >> 4);
 145         }
 146         if (diff & (SYSC_xCPARTG_OR_BITS | SYSC_xCPARTG_AND_BITS)) {
 147                 gor1   = (old & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
 148                 gor2   = (new & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
 149                 gand1  = (old & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
 150                 gand2  = (new & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
 151                 pr_cont(" G:%#x+%#x->%#x+%#x",
 152                         (gor1 * sz) >> 4,
 153                         ((gand1 + 1) * sz) >> 4,
 154                         (gor2 * sz) >> 4,
 155                         ((gand2 + 1) * sz) >> 4);
 156         }
 157         if (diff & SYSC_CWRMODE_BIT)
 158                 pr_cont(" %sWR",
 159                         (new & SYSC_CWRMODE_BIT) ? "+" : "-");
 160         if (diff & SYSC_DCPART_GCON_BIT)
 161                 pr_cont(" %sGCOn",
 162                         (new & SYSC_DCPART_GCON_BIT) ? "+" : "-");
 163         pr_cont("\n");
 164 }
 165
 166 /**
 167  * setup_smp_cache: ensure cache coherency for new SMP thread.
 168  * @thread:     New hardware thread number.
 169  *
 170  * Ensures that coherency is enabled and that the threads share the same cache
 171  * partitions.
 172  */
 173 static void setup_smp_cache(unsigned int thread)
 174 {
 175         unsigned int this_thread, lflags;
 176         unsigned int dcsz, dcpart_this, dcpart_old, dcpart_new;
 177         unsigned int icsz, icpart_old, icpart_new;
 178
 179         /*
 180          * Copy over the current thread's cache partition configuration to the
 181          * new thread so that they share cache partitions.
 182          */
 183         __global_lock2(lflags);
 184         this_thread = hard_processor_id();
 185         /* Share dcache partition */
 186         dcpart_this = metag_in32(SYSC_DCPART(this_thread));
 187         dcpart_old = metag_in32(SYSC_DCPART(thread));
 188         dcpart_new = dcpart_this;
 189 #if PAGE_OFFSET < LINGLOBAL_BASE
 190         /*
 191          * For the local data cache to be coherent the threads must also have
 192          * GCOn enabled.
 193          */
 194         dcpart_new |= SYSC_DCPART_GCON_BIT;
 195         metag_out32(dcpart_new, SYSC_DCPART(this_thread));
 196 #endif
 197         metag_out32(dcpart_new, SYSC_DCPART(thread));
 198         /* Share icache partition too */
 199         icpart_new = metag_in32(SYSC_ICPART(this_thread));
 200         icpart_old = metag_in32(SYSC_ICPART(thread));
 201         metag_out32(icpart_new, SYSC_ICPART(thread));
 202         __global_unlock2(lflags);
 203
 204         /*
 205          * Log if the cache partitions were altered so the user is aware of any
 206          * potential unintentional cache wastage.
 207          */
 208         dcsz = get_dcache_size();
 209         icsz = get_dcache_size();
 210         describe_cachepart_change(this_thread, "dcache", dcsz,
 211                                   dcpart_this, dcpart_new);
 212         describe_cachepart_change(thread, "dcache", dcsz,
 213                                   dcpart_old, dcpart_new);
 214         describe_cachepart_change(thread, "icache", icsz,
 215                                   icpart_old, icpart_new);
 216 }
 217
 218 int __cpu_up(unsigned int cpu, struct task_struct *idle)
 219 {
 220         unsigned int thread = cpu_2_hwthread_id[cpu];
 221         int ret;
 222
 223         load_pgd(swapper_pg_dir, thread);
 224
 225         flush_tlb_all();
 226
 227         setup_smp_cache(thread);
 228
 229         /*
 230          * Tell the secondary CPU where to find its idle thread's stack.
 231          */
 232         secondary_data_stack = task_stack_page(idle);
 233
 234         wmb();
 235
 236         /*
 237          * Now bring the CPU into our world.
 238          */
 239         ret = boot_secondary(thread, idle);
 240         if (ret == 0) {
 241                 /*
 242                  * CPU was successfully started, wait for it
 243                  * to come online or time out.
 244                  */
 245                 wait_for_completion_timeout(&cpu_running,
 246                                             msecs_to_jiffies(1000));
 247
 248                 if (!cpu_online(cpu))
 249                         ret = -EIO;
 250         }
 251
 252         secondary_data_stack = NULL;
 253
 254         if (ret) {
 255                 pr_crit("CPU%u: processor failed to boot\n", cpu);
 256
 257                 /*
 258                  * FIXME: We need to clean up the new idle thread. --rmk
 259                  */
 260         }
 261
 262         return ret;
 263 }
 264
 265 #ifdef CONFIG_HOTPLUG_CPU
 266
 267 /*
 268  * __cpu_disable runs on the processor to be shutdown.
 269  */
 270 int __cpu_disable(void)
 271 {
 272         unsigned int cpu = smp_processor_id();
 273
 274         /*
 275          * Take this CPU offline.  Once we clear this, we can't return,
 276          * and we must not schedule until we're ready to give up the cpu.
 277          */
 278         set_cpu_online(cpu, false);
 279
 280         /*
 281          * OK - migrate IRQs away from this CPU
 282          */
 283         migrate_irqs();
 284
 285         /*
 286          * Flush user cache and TLB mappings, and then remove this CPU
 287          * from the vm mask set of all processes.
 288          */
 289         flush_cache_all();
 290         local_flush_tlb_all();
 291
 292         clear_tasks_mm_cpumask(cpu);
 293
 294         return 0;
 295 }
 296
 297 /*
 298  * called on the thread which is asking for a CPU to be shutdown -
 299  * waits until shutdown has completed, or it is timed out.
 300  */
 301 void __cpu_die(unsigned int cpu)
 302 {
 303         if (!cpu_wait_death(cpu, 1))
 304                 pr_err("CPU%u: unable to kill\n", cpu);
 305 }
 306
 307 /*
 308  * Called from the idle thread for the CPU which has been shutdown.
 309  *
 310  * Note that we do not return from this function. If this cpu is
 311  * brought online again it will need to run secondary_startup().
 312  */
 313 void cpu_die(void)
 314 {
 315         local_irq_disable();
 316         idle_task_exit();
 317         irq_ctx_exit(smp_processor_id());
 318
 319         (void)cpu_report_death();
 320
 321         asm ("XOR       TXENABLE, D0Re0,D0Re0\n");
 322 }
 323 #endif /* CONFIG_HOTPLUG_CPU */
 324
 325 /*
 326  * Called by both boot and secondaries to move global data into
 327  * per-processor storage.
 328  */
 329 void smp_store_cpu_info(unsigned int cpuid)
 330 {
 331         struct cpuinfo_metag *cpu_info = &per_cpu(cpu_data, cpuid);
 332
 333         cpu_info->loops_per_jiffy = loops_per_jiffy;
 334 }
 335
 336 /*
 337  * This is the secondary CPU boot entry.  We're using this CPUs
 338  * idle thread stack and the global page tables.
 339  */
 340 asmlinkage void secondary_start_kernel(void)
 341 {
 342         struct mm_struct *mm = &init_mm;
 343         unsigned int cpu = smp_processor_id();
 344
 345         /*
 346          * All kernel threads share the same mm context; grab a
 347          * reference and switch to it.
 348          */
 349         mmget(mm);
 350         mmgrab(mm);
 351         current->active_mm = mm;
 352         cpumask_set_cpu(cpu, mm_cpumask(mm));
 353         enter_lazy_tlb(mm, current);
 354         local_flush_tlb_all();
 355
 356         /*
 357          * TODO: Some day it might be useful for each Linux CPU to
 358          * have its own TBI structure. That would allow each Linux CPU
 359          * to run different interrupt handlers for the same IRQ
 360          * number.
 361          *
 362          * For now, simply copying the pointer to the boot CPU's TBI
 363          * structure is sufficient because we always want to run the
 364          * same interrupt handler whatever CPU takes the interrupt.
 365          */
 366         per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
 367
 368         if (!per_cpu(pTBI, cpu))
 369                 panic("No TBI found!");
 370
 371         per_cpu_trap_init(cpu);
 372         irq_ctx_init(cpu);
 373
 374         preempt_disable();
 375
 376         setup_priv();
 377
 378         notify_cpu_starting(cpu);
 379
 380         pr_info("CPU%u (thread %u): Booted secondary processor\n",
 381                 cpu, cpu_2_hwthread_id[cpu]);
 382
 383         calibrate_delay();
 384         smp_store_cpu_info(cpu);
 385
 386         /*
 387          * OK, now it's safe to let the boot CPU continue
 388          */
 389         set_cpu_online(cpu, true);
 390         complete(&cpu_running);
 391
 392         /*
 393          * Enable local interrupts.
 394          */
 395         tbi_startup_interrupt(TBID_SIGNUM_TRT);
 396         local_irq_enable();
 397
 398         /*
 399          * OK, it's off to the idle thread for us
 400          */
 401         cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 402 }
 403
 404 void __init smp_cpus_done(unsigned int max_cpus)
 405 {
 406         int cpu;
 407         unsigned long bogosum = 0;
 408
 409         for_each_online_cpu(cpu)
 410                 bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy;
 411
 412         pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 413                 num_online_cpus(),
 414                 bogosum / (500000/HZ),
 415                 (bogosum / (5000/HZ)) % 100);
 416 }
 417
 418 void __init smp_prepare_cpus(unsigned int max_cpus)
 419 {
 420         unsigned int cpu = smp_processor_id();
 421
 422         init_new_context(current, &init_mm);
 423         current_thread_info()->cpu = cpu;
 424
 425         smp_store_cpu_info(cpu);
 426         init_cpu_present(cpu_possible_mask);
 427 }
 428
 429 void __init smp_prepare_boot_cpu(void)
 430 {
 431         unsigned int cpu = smp_processor_id();
 432
 433         per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
 434
 435         if (!per_cpu(pTBI, cpu))
 436                 panic("No TBI found!");
 437 }
 438
 439 static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg);
 440
 441 static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg)
 442 {
 443         unsigned long flags;
 444         unsigned int cpu;
 445         cpumask_t map;
 446
 447         cpumask_clear(&map);
 448         local_irq_save(flags);
 449
 450         for_each_cpu(cpu, mask) {
 451                 struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
 452
 453                 spin_lock(&ipi->lock);
 454
 455                 /*
 456                  * KICK interrupts are queued in hardware so we'll get
 457                  * multiple interrupts if we call smp_cross_call()
 458                  * multiple times for one msg. The problem is that we
 459                  * only have one bit for each message - we can't queue
 460                  * them in software.
 461                  *
 462                  * The first time through ipi_handler() we'll clear
 463                  * the msg bit, having done all the work. But when we
 464                  * return we'll get _another_ interrupt (and another,
 465                  * and another until we've handled all the queued
 466                  * KICKs). Running ipi_handler() when there's no work
 467                  * to do is bad because that's how kick handler
 468                  * chaining detects who the KICK was intended for.
 469                  * See arch/metag/kernel/kick.c for more details.
 470                  *
 471                  * So only add 'cpu' to 'map' if we haven't already
 472                  * queued a KICK interrupt for 'msg'.
 473                  */
 474                 if (!(ipi->bits & (1 << msg))) {
 475                         ipi->bits |= 1 << msg;
 476                         cpumask_set_cpu(cpu, &map);
 477                 }
 478
 479                 spin_unlock(&ipi->lock);
 480         }
 481
 482         /*
 483          * Call the platform specific cross-CPU call function.
 484          */
 485         smp_cross_call(map, msg);
 486
 487         local_irq_restore(flags);
 488 }
 489
 490 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 491 {
 492         send_ipi_message(mask, IPI_CALL_FUNC);
 493 }
 494
 495 void arch_send_call_function_single_ipi(int cpu)
 496 {
 497         send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
 498 }
 499
 500 void show_ipi_list(struct seq_file *p)
 501 {
 502         unsigned int cpu;
 503
 504         seq_puts(p, "IPI:");
 505
 506         for_each_present_cpu(cpu)
 507                 seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count);
 508
 509         seq_putc(p, '\n');
 510 }
 511
 512 static DEFINE_SPINLOCK(stop_lock);
 513
 514 /*
 515  * Main handler for inter-processor interrupts
 516  *
 517  * For Meta, the ipimask now only identifies a single
 518  * category of IPI (Bit 1 IPIs have been replaced by a
 519  * different mechanism):
 520  *
 521  *  Bit 0 - Inter-processor function call
 522  */
 523 static int do_IPI(void)
 524 {
 525         unsigned int cpu = smp_processor_id();
 526         struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
 527         unsigned long msgs, nextmsg;
 528         int handled = 0;
 529
 530         ipi->ipi_count++;
 531
 532         spin_lock(&ipi->lock);
 533         msgs = ipi->bits;
 534         nextmsg = msgs & -msgs;
 535         ipi->bits &= ~nextmsg;
 536         spin_unlock(&ipi->lock);
 537
 538         if (nextmsg) {
 539                 handled = 1;
 540
 541                 nextmsg = ffz(~nextmsg);
 542                 switch (nextmsg) {
 543                 case IPI_RESCHEDULE:
 544                         scheduler_ipi();
 545                         break;
 546
 547                 case IPI_CALL_FUNC:
 548                         generic_smp_call_function_interrupt();
 549                         break;
 550
 551                 default:
 552                         pr_crit("CPU%u: Unknown IPI message 0x%lx\n",
 553                                 cpu, nextmsg);
 554                         break;
 555                 }
 556         }
 557
 558         return handled;
 559 }
 560
 561 void smp_send_reschedule(int cpu)
 562 {
 563         send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
 564 }
 565
 566 static void stop_this_cpu(void *data)
 567 {
 568         unsigned int cpu = smp_processor_id();
 569
 570         if (system_state == SYSTEM_BOOTING ||
 571             system_state == SYSTEM_RUNNING) {
 572                 spin_lock(&stop_lock);
 573                 pr_crit("CPU%u: stopping\n", cpu);
 574                 dump_stack();
 575                 spin_unlock(&stop_lock);
 576         }
 577
 578         set_cpu_online(cpu, false);
 579
 580         local_irq_disable();
 581
 582         hard_processor_halt(HALT_OK);
 583 }
 584
 585 void smp_send_stop(void)
 586 {
 587         smp_call_function(stop_this_cpu, NULL, 0);
 588 }
 589
 590 /*
 591  * not supported here
 592  */
 593 int setup_profiling_timer(unsigned int multiplier)
 594 {
 595         return -EINVAL;
 596 }
 597
 598 /*
 599  * We use KICKs for inter-processor interrupts.
 600  *
 601  * For every CPU in "callmap" the IPI data must already have been
 602  * stored in that CPU's "ipi_data" member prior to calling this
 603  * function.
 604  */
 605 static void kick_raise_softirq(cpumask_t callmap, unsigned int irq)
 606 {
 607         int cpu;
 608
 609         for_each_cpu(cpu, &callmap) {
 610                 unsigned int thread;
 611
 612                 thread = cpu_2_hwthread_id[cpu];
 613
 614                 BUG_ON(thread == BAD_HWTHREAD_ID);
 615
 616                 metag_out32(1, T0KICKI + (thread * TnXKICK_STRIDE));
 617         }
 618 }
 619
 620 static TBIRES ipi_handler(TBIRES State, int SigNum, int Triggers,
 621                    int Inst, PTBI pTBI, int *handled)
 622 {
 623         *handled = do_IPI();
 624
 625         return State;
 626 }
 627
 628 static struct kick_irq_handler ipi_irq = {
 629         .func = ipi_handler,
 630 };
 631
 632 static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg)
 633 {
 634         kick_raise_softirq(callmap, 1);
 635 }
 636
 637 static inline unsigned int get_core_count(void)
 638 {
 639         int i;
 640         unsigned int ret = 0;
 641
 642         for (i = 0; i < CONFIG_NR_CPUS; i++) {
 643                 if (core_reg_read(TXUCT_ID, TXENABLE_REGNUM, i))
 644                         ret++;
 645         }
 646
 647         return ret;
 648 }
 649
 650 /*
 651  * Initialise the CPU possible map early - this describes the CPUs
 652  * which may be present or become present in the system.
 653  */
 654 void __init smp_init_cpus(void)
 655 {
 656         unsigned int i, ncores = get_core_count();
 657
 658         /* If no hwthread_map early param was set use default mapping */
 659         for (i = 0; i < NR_CPUS; i++)
 660                 if (cpu_2_hwthread_id[i] == BAD_HWTHREAD_ID) {
 661                         cpu_2_hwthread_id[i] = i;
 662                         hwthread_id_2_cpu[i] = i;
 663                 }
 664
 665         for (i = 0; i < ncores; i++)
 666                 set_cpu_possible(i, true);
 667
 668         kick_register_func(&ipi_irq);
 669 }