arch/powerpc/mm/mmu_context_nohash.c

   1 /*
   2  * This file contains the routines for handling the MMU on those
   3  * PowerPC implementations where the MMU is not using the hash
   4  * table, such as 8xx, 4xx, BookE's etc...
   5  *
   6  * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
   7  *                IBM Corp.
   8  *
   9  *  Derived from previous arch/powerpc/mm/mmu_context.c
  10  *  and arch/powerpc/include/asm/mmu_context.h
  11  *
  12  *  This program is free software; you can redistribute it and/or
  13  *  modify it under the terms of the GNU General Public License
  14  *  as published by the Free Software Foundation; either version
  15  *  2 of the License, or (at your option) any later version.
  16  *
  17  * TODO:
  18  *
  19  *   - The global context lock will not scale very well
  20  *   - The maps should be dynamically allocated to allow for processors
  21  *     that support more PID bits at runtime
  22  *   - Implement flush_tlb_mm() by making the context stale and picking
  23  *     a new one
  24  *   - More aggressively clear stale map bits and maybe find some way to
  25  *     also clear mm->cpu_vm_mask bits when processes are migrated
  26  */
  27
  28 //#define DEBUG_MAP_CONSISTENCY
  29 //#define DEBUG_CLAMP_LAST_CONTEXT   31
  30 //#define DEBUG_HARDER
  31
  32 /* We don't use DEBUG because it tends to be compiled in always nowadays
  33  * and this would generate way too much output
  34  */
  35 #ifdef DEBUG_HARDER
  36 #define pr_hard(args...)        printk(KERN_DEBUG args)
  37 #define pr_hardcont(args...)    printk(KERN_CONT args)
  38 #else
  39 #define pr_hard(args...)        do { } while(0)
  40 #define pr_hardcont(args...)    do { } while(0)
  41 #endif
  42
  43 #include <linux/kernel.h>
  44 #include <linux/mm.h>
  45 #include <linux/init.h>
  46 #include <linux/spinlock.h>
  47 #include <linux/memblock.h>
  48 #include <linux/notifier.h>
  49 #include <linux/cpu.h>
  50 #include <linux/slab.h>
  51
  52 #include <asm/mmu_context.h>
  53 #include <asm/tlbflush.h>
  54
  55 #include "mmu_decl.h"
  56
  57 /*
  58  * The MPC8xx has only 16 contexts. We rotate through them on each task switch.
  59  * A better way would be to keep track of tasks that own contexts, and implement
  60  * an LRU usage. That way very active tasks don't always have to pay the TLB
  61  * reload overhead. The kernel pages are mapped shared, so the kernel can run on
  62  * behalf of any task that makes a kernel entry. Shared does not mean they are
  63  * not protected, just that the ASID comparison is not performed. -- Dan
  64  *
  65  * The IBM4xx has 256 contexts, so we can just rotate through these as a way of
  66  * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison
  67  * is disabled, so we can use a TID of zero to represent all kernel pages as
  68  * shared among all contexts. -- Dan
  69  *
  70  * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should
  71  * normally never have to steal though the facility is present if needed.
  72  * -- BenH
  73  */
  74 #define FIRST_CONTEXT 1
  75 #ifdef DEBUG_CLAMP_LAST_CONTEXT
  76 #define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT
  77 #elif defined(CONFIG_PPC_8xx)
  78 #define LAST_CONTEXT 16
  79 #elif defined(CONFIG_PPC_47x)
  80 #define LAST_CONTEXT 65535
  81 #else
  82 #define LAST_CONTEXT 255
  83 #endif
  84
  85 static unsigned int next_context, nr_free_contexts;
  86 static unsigned long *context_map;
  87 #ifdef CONFIG_SMP
  88 static unsigned long *stale_map[NR_CPUS];
  89 #endif
  90 static struct mm_struct **context_mm;
  91 static DEFINE_RAW_SPINLOCK(context_lock);
  92
  93 #define CTX_MAP_SIZE    \
  94         (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1))
  95
  96
  97 /* Steal a context from a task that has one at the moment.
  98  *
  99  * This is used when we are running out of available PID numbers
 100  * on the processors.
 101  *
 102  * This isn't an LRU system, it just frees up each context in
 103  * turn (sort-of pseudo-random replacement :).  This would be the
 104  * place to implement an LRU scheme if anyone was motivated to do it.
 105  *  -- paulus
 106  *
 107  * For context stealing, we use a slightly different approach for
 108  * SMP and UP. Basically, the UP one is simpler and doesn't use
 109  * the stale map as we can just flush the local CPU
 110  *  -- benh
 111  */
 112 #ifdef CONFIG_SMP
 113 static unsigned int steal_context_smp(unsigned int id)
 114 {
 115         struct mm_struct *mm;
 116         unsigned int cpu, max, i;
 117
 118         max = LAST_CONTEXT - FIRST_CONTEXT;
 119
 120         /* Attempt to free next_context first and then loop until we manage */
 121         while (max--) {
 122                 /* Pick up the victim mm */
 123                 mm = context_mm[id];
 124
 125                 /* We have a candidate victim, check if it's active, on SMP
 126                  * we cannot steal active contexts
 127                  */
 128                 if (mm->context.active) {
 129                         id++;
 130                         if (id > LAST_CONTEXT)
 131                                 id = FIRST_CONTEXT;
 132                         continue;
 133                 }
 134                 pr_hardcont(" | steal %d from 0x%p", id, mm);
 135
 136                 /* Mark this mm has having no context anymore */
 137                 mm->context.id = MMU_NO_CONTEXT;
 138
 139                 /* Mark it stale on all CPUs that used this mm. For threaded
 140                  * implementations, we set it on all threads on each core
 141                  * represented in the mask. A future implementation will use
 142                  * a core map instead but this will do for now.
 143                  */
 144                 for_each_cpu(cpu, mm_cpumask(mm)) {
 145                         for (i = cpu_first_thread_sibling(cpu);
 146                              i <= cpu_last_thread_sibling(cpu); i++) {
 147                                 if (stale_map[i])
 148                                         __set_bit(id, stale_map[i]);
 149                         }
 150                         cpu = i - 1;
 151                 }
 152                 return id;
 153         }
 154
 155         /* This will happen if you have more CPUs than available contexts,
 156          * all we can do here is wait a bit and try again
 157          */
 158         raw_spin_unlock(&context_lock);
 159         cpu_relax();
 160         raw_spin_lock(&context_lock);
 161
 162         /* This will cause the caller to try again */
 163         return MMU_NO_CONTEXT;
 164 }
 165 #endif  /* CONFIG_SMP */
 166
 167 static unsigned int steal_all_contexts(void)
 168 {
 169         struct mm_struct *mm;
 170 #ifdef CONFIG_SMP
 171         int cpu = smp_processor_id();
 172 #endif
 173         unsigned int id;
 174
 175         for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
 176                 /* Pick up the victim mm */
 177                 mm = context_mm[id];
 178
 179                 pr_hardcont(" | steal %d from 0x%p", id, mm);
 180
 181                 /* Mark this mm as having no context anymore */
 182                 mm->context.id = MMU_NO_CONTEXT;
 183                 if (id != FIRST_CONTEXT) {
 184                         context_mm[id] = NULL;
 185                         __clear_bit(id, context_map);
 186 #ifdef DEBUG_MAP_CONSISTENCY
 187                         mm->context.active = 0;
 188 #endif
 189                 }
 190 #ifdef CONFIG_SMP
 191                 __clear_bit(id, stale_map[cpu]);
 192 #endif
 193         }
 194
 195         /* Flush the TLB for all contexts (not to be used on SMP) */
 196         _tlbil_all();
 197
 198         nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT;
 199
 200         return FIRST_CONTEXT;
 201 }
 202
 203 /* Note that this will also be called on SMP if all other CPUs are
 204  * offlined, which means that it may be called for cpu != 0. For
 205  * this to work, we somewhat assume that CPUs that are onlined
 206  * come up with a fully clean TLB (or are cleaned when offlined)
 207  */
 208 static unsigned int steal_context_up(unsigned int id)
 209 {
 210         struct mm_struct *mm;
 211 #ifdef CONFIG_SMP
 212         int cpu = smp_processor_id();
 213 #endif
 214
 215         /* Pick up the victim mm */
 216         mm = context_mm[id];
 217
 218         pr_hardcont(" | steal %d from 0x%p", id, mm);
 219
 220         /* Flush the TLB for that context */
 221         local_flush_tlb_mm(mm);
 222
 223         /* Mark this mm has having no context anymore */
 224         mm->context.id = MMU_NO_CONTEXT;
 225
 226         /* XXX This clear should ultimately be part of local_flush_tlb_mm */
 227 #ifdef CONFIG_SMP
 228         __clear_bit(id, stale_map[cpu]);
 229 #endif
 230
 231         return id;
 232 }
 233
 234 #ifdef DEBUG_MAP_CONSISTENCY
 235 static void context_check_map(void)
 236 {
 237         unsigned int id, nrf, nact;
 238
 239         nrf = nact = 0;
 240         for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
 241                 int used = test_bit(id, context_map);
 242                 if (!used)
 243                         nrf++;
 244                 if (used != (context_mm[id] != NULL))
 245                         pr_err("MMU: Context %d is %s and MM is %p !\n",
 246                                id, used ? "used" : "free", context_mm[id]);
 247                 if (context_mm[id] != NULL)
 248                         nact += context_mm[id]->context.active;
 249         }
 250         if (nrf != nr_free_contexts) {
 251                 pr_err("MMU: Free context count out of sync ! (%d vs %d)\n",
 252                        nr_free_contexts, nrf);
 253                 nr_free_contexts = nrf;
 254         }
 255         if (nact > num_online_cpus())
 256                 pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
 257                        nact, num_online_cpus());
 258         if (FIRST_CONTEXT > 0 && !test_bit(0, context_map))
 259                 pr_err("MMU: Context 0 has been freed !!!\n");
 260 }
 261 #else
 262 static void context_check_map(void) { }
 263 #endif
 264
 265 void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 266                         struct task_struct *tsk)
 267 {
 268         unsigned int id;
 269 #ifdef CONFIG_SMP
 270         unsigned int i, cpu = smp_processor_id();
 271 #endif
 272         unsigned long *map;
 273
 274         /* No lockless fast path .. yet */
 275         raw_spin_lock(&context_lock);
 276
 277         pr_hard("[%d] activating context for mm @%p, active=%d, id=%d",
 278                 cpu, next, next->context.active, next->context.id);
 279
 280 #ifdef CONFIG_SMP
 281         /* Mark us active and the previous one not anymore */
 282         next->context.active++;
 283         if (prev) {
 284                 pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active);
 285                 WARN_ON(prev->context.active < 1);
 286                 prev->context.active--;
 287         }
 288
 289  again:
 290 #endif /* CONFIG_SMP */
 291
 292         /* If we already have a valid assigned context, skip all that */
 293         id = next->context.id;
 294         if (likely(id != MMU_NO_CONTEXT)) {
 295 #ifdef DEBUG_MAP_CONSISTENCY
 296                 if (context_mm[id] != next)
 297                         pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n",
 298                                next, id, id, context_mm[id]);
 299 #endif
 300                 goto ctxt_ok;
 301         }
 302
 303         /* We really don't have a context, let's try to acquire one */
 304         id = next_context;
 305         if (id > LAST_CONTEXT)
 306                 id = FIRST_CONTEXT;
 307         map = context_map;
 308
 309         /* No more free contexts, let's try to steal one */
 310         if (nr_free_contexts == 0) {
 311 #ifdef CONFIG_SMP
 312                 if (num_online_cpus() > 1) {
 313                         id = steal_context_smp(id);
 314                         if (id == MMU_NO_CONTEXT)
 315                                 goto again;
 316                         goto stolen;
 317                 }
 318 #endif /* CONFIG_SMP */
 319                 if (IS_ENABLED(CONFIG_PPC_8xx))
 320                         id = steal_all_contexts();
 321                 else
 322                         id = steal_context_up(id);
 323                 goto stolen;
 324         }
 325         nr_free_contexts--;
 326
 327         /* We know there's at least one free context, try to find it */
 328         while (__test_and_set_bit(id, map)) {
 329                 id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
 330                 if (id > LAST_CONTEXT)
 331                         id = FIRST_CONTEXT;
 332         }
 333  stolen:
 334         next_context = id + 1;
 335         context_mm[id] = next;
 336         next->context.id = id;
 337         pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts);
 338
 339         context_check_map();
 340  ctxt_ok:
 341
 342         /* If that context got marked stale on this CPU, then flush the
 343          * local TLB for it and unmark it before we use it
 344          */
 345 #ifdef CONFIG_SMP
 346         if (test_bit(id, stale_map[cpu])) {
 347                 pr_hardcont(" | stale flush %d [%d..%d]",
 348                             id, cpu_first_thread_sibling(cpu),
 349                             cpu_last_thread_sibling(cpu));
 350
 351                 local_flush_tlb_mm(next);
 352
 353                 /* XXX This clear should ultimately be part of local_flush_tlb_mm */
 354                 for (i = cpu_first_thread_sibling(cpu);
 355                      i <= cpu_last_thread_sibling(cpu); i++) {
 356                         if (stale_map[i])
 357                                 __clear_bit(id, stale_map[i]);
 358                 }
 359         }
 360 #endif
 361
 362         /* Flick the MMU and release lock */
 363         pr_hardcont(" -> %d\n", id);
 364         set_context(id, next->pgd);
 365         raw_spin_unlock(&context_lock);
 366 }
 367
 368 /*
 369  * Set up the context for a new address space.
 370  */
 371 int init_new_context(struct task_struct *t, struct mm_struct *mm)
 372 {
 373         pr_hard("initing context for mm @%p\n", mm);
 374
 375         /*
 376          * We have MMU_NO_CONTEXT set to be ~0. Hence check
 377          * explicitly against context.id == 0. This ensures that we properly
 378          * initialize context slice details for newly allocated mm's (which will
 379          * have id == 0) and don't alter context slice inherited via fork (which
 380          * will have id != 0).
 381          */
 382         if (mm->context.id == 0)
 383                 slice_init_new_context_exec(mm);
 384         mm->context.id = MMU_NO_CONTEXT;
 385         mm->context.active = 0;
 386         pte_frag_set(&mm->context, NULL);
 387         return 0;
 388 }
 389
 390 /*
 391  * We're finished using the context for an address space.
 392  */
 393 void destroy_context(struct mm_struct *mm)
 394 {
 395         unsigned long flags;
 396         unsigned int id;
 397
 398         if (mm->context.id == MMU_NO_CONTEXT)
 399                 return;
 400
 401         WARN_ON(mm->context.active != 0);
 402
 403         raw_spin_lock_irqsave(&context_lock, flags);
 404         id = mm->context.id;
 405         if (id != MMU_NO_CONTEXT) {
 406                 __clear_bit(id, context_map);
 407                 mm->context.id = MMU_NO_CONTEXT;
 408 #ifdef DEBUG_MAP_CONSISTENCY
 409                 mm->context.active = 0;
 410 #endif
 411                 context_mm[id] = NULL;
 412                 nr_free_contexts++;
 413         }
 414         raw_spin_unlock_irqrestore(&context_lock, flags);
 415 }
 416
 417 #ifdef CONFIG_SMP
 418 static int mmu_ctx_cpu_prepare(unsigned int cpu)
 419 {
 420         /* We don't touch CPU 0 map, it's allocated at aboot and kept
 421          * around forever
 422          */
 423         if (cpu == boot_cpuid)
 424                 return 0;
 425
 426         pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
 427         stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
 428         return 0;
 429 }
 430
 431 static int mmu_ctx_cpu_dead(unsigned int cpu)
 432 {
 433 #ifdef CONFIG_HOTPLUG_CPU
 434         if (cpu == boot_cpuid)
 435                 return 0;
 436
 437         pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
 438         kfree(stale_map[cpu]);
 439         stale_map[cpu] = NULL;
 440
 441         /* We also clear the cpu_vm_mask bits of CPUs going away */
 442         clear_tasks_mm_cpumask(cpu);
 443 #endif
 444         return 0;
 445 }
 446
 447 #endif /* CONFIG_SMP */
 448
 449 /*
 450  * Initialize the context management stuff.
 451  */
 452 void __init mmu_context_init(void)
 453 {
 454         /* Mark init_mm as being active on all possible CPUs since
 455          * we'll get called with prev == init_mm the first time
 456          * we schedule on a given CPU
 457          */
 458         init_mm.context.active = NR_CPUS;
 459
 460         /*
 461          * Allocate the maps used by context management
 462          */
 463         context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
 464         context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1),
 465                                     SMP_CACHE_BYTES);
 466 #ifdef CONFIG_SMP
 467         stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
 468
 469         cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
 470                                   "powerpc/mmu/ctx:prepare",
 471                                   mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
 472 #endif
 473
 474         printk(KERN_INFO
 475                "MMU: Allocated %zu bytes of context maps for %d contexts\n",
 476                2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)),
 477                LAST_CONTEXT - FIRST_CONTEXT + 1);
 478
 479         /*
 480          * Some processors have too few contexts to reserve one for
 481          * init_mm, and require using context 0 for a normal task.
 482          * Other processors reserve the use of context zero for the kernel.
 483          * This code assumes FIRST_CONTEXT < 32.
 484          */
 485         context_map[0] = (1 << FIRST_CONTEXT) - 1;
 486         next_context = FIRST_CONTEXT;
 487         nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
 488 }