arch/powerpc/mm/mmu_context_nohash.c

   1 /*
   2  * This file contains the routines for handling the MMU on those
   3  * PowerPC implementations where the MMU is not using the hash
   4  * table, such as 8xx, 4xx, BookE's etc...
   5  *
   6  * Copyright 2008 Ben Herrenschmidt <benh@kernel.crashing.org>
   7  *                IBM Corp.
   8  *
   9  *  Derived from previous arch/powerpc/mm/mmu_context.c
  10  *  and arch/powerpc/include/asm/mmu_context.h
  11  *
  12  *  This program is free software; you can redistribute it and/or
  13  *  modify it under the terms of the GNU General Public License
  14  *  as published by the Free Software Foundation; either version
  15  *  2 of the License, or (at your option) any later version.
  16  *
  17  * TODO:
  18  *
  19  *   - The global context lock will not scale very well
  20  *   - The maps should be dynamically allocated to allow for processors
  21  *     that support more PID bits at runtime
  22  *   - Implement flush_tlb_mm() by making the context stale and picking
  23  *     a new one
  24  *   - More aggressively clear stale map bits and maybe find some way to
  25  *     also clear mm->cpu_vm_mask bits when processes are migrated
  26  */
  27
  28 #undef DEBUG
  29 #define DEBUG_STEAL_ONLY
  30 #undef DEBUG_MAP_CONSISTENCY
  31
  32 #include <linux/kernel.h>
  33 #include <linux/mm.h>
  34 #include <linux/init.h>
  35
  36 #include <asm/mmu_context.h>
  37 #include <asm/tlbflush.h>
  38 #include <linux/spinlock.h>
  39
  40 /*
  41  *   The MPC8xx has only 16 contexts.  We rotate through them on each
  42  * task switch.  A better way would be to keep track of tasks that
  43  * own contexts, and implement an LRU usage.  That way very active
  44  * tasks don't always have to pay the TLB reload overhead.  The
  45  * kernel pages are mapped shared, so the kernel can run on behalf
  46  * of any task that makes a kernel entry.  Shared does not mean they
  47  * are not protected, just that the ASID comparison is not performed.
  48  *      -- Dan
  49  *
  50  * The IBM4xx has 256 contexts, so we can just rotate through these
  51  * as a way of "switching" contexts.  If the TID of the TLB is zero,
  52  * the PID/TID comparison is disabled, so we can use a TID of zero
  53  * to represent all kernel pages as shared among all contexts.
  54  *      -- Dan
  55  */
  56
  57 #ifdef CONFIG_8xx
  58 #define LAST_CONTEXT            15
  59 #define FIRST_CONTEXT           0
  60
  61 #elif defined(CONFIG_4xx)
  62 #define LAST_CONTEXT            255
  63 #define FIRST_CONTEXT           1
  64
  65 #elif defined(CONFIG_E200) || defined(CONFIG_E500)
  66 #define LAST_CONTEXT            255
  67 #define FIRST_CONTEXT           1
  68
  69 #else
  70 #error Unsupported processor type
  71 #endif
  72
  73 static unsigned int next_context, nr_free_contexts;
  74 static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
  75 static unsigned long stale_map[NR_CPUS][LAST_CONTEXT / BITS_PER_LONG + 1];
  76 static struct mm_struct *context_mm[LAST_CONTEXT+1];
  77 static spinlock_t context_lock = SPIN_LOCK_UNLOCKED;
  78
  79 /* Steal a context from a task that has one at the moment.
  80  *
  81  * This is used when we are running out of available PID numbers
  82  * on the processors.
  83  *
  84  * This isn't an LRU system, it just frees up each context in
  85  * turn (sort-of pseudo-random replacement :).  This would be the
  86  * place to implement an LRU scheme if anyone was motivated to do it.
  87  *  -- paulus
  88  *
  89  * For context stealing, we use a slightly different approach for
  90  * SMP and UP. Basically, the UP one is simpler and doesn't use
  91  * the stale map as we can just flush the local CPU
  92  *  -- benh
  93  */
  94 #ifdef CONFIG_SMP
  95 static unsigned int steal_context_smp(unsigned int id)
  96 {
  97         struct mm_struct *mm;
  98         unsigned int cpu, max;
  99
 100  again:
 101         max = LAST_CONTEXT - FIRST_CONTEXT;
 102
 103         /* Attempt to free next_context first and then loop until we manage */
 104         while (max--) {
 105                 /* Pick up the victim mm */
 106                 mm = context_mm[id];
 107
 108                 /* We have a candidate victim, check if it's active, on SMP
 109                  * we cannot steal active contexts
 110                  */
 111                 if (mm->context.active) {
 112                         id++;
 113                         if (id > LAST_CONTEXT)
 114                                 id = FIRST_CONTEXT;
 115                         continue;
 116                 }
 117                 pr_debug("[%d] steal context %d from mm @%p\n",
 118                          smp_processor_id(), id, mm);
 119
 120                 /* Mark this mm has having no context anymore */
 121                 mm->context.id = MMU_NO_CONTEXT;
 122
 123                 /* Mark it stale on all CPUs that used this mm */
 124                 for_each_cpu_mask_nr(cpu, mm->cpu_vm_mask)
 125                         __set_bit(id, stale_map[cpu]);
 126                 return id;
 127         }
 128
 129         /* This will happen if you have more CPUs than available contexts,
 130          * all we can do here is wait a bit and try again
 131          */
 132         spin_unlock(&context_lock);
 133         cpu_relax();
 134         spin_lock(&context_lock);
 135         goto again;
 136 }
 137 #endif  /* CONFIG_SMP */
 138
 139 /* Note that this will also be called on SMP if all other CPUs are
 140  * offlined, which means that it may be called for cpu != 0. For
 141  * this to work, we somewhat assume that CPUs that are onlined
 142  * come up with a fully clean TLB (or are cleaned when offlined)
 143  */
 144 static unsigned int steal_context_up(unsigned int id)
 145 {
 146         struct mm_struct *mm;
 147         int cpu = smp_processor_id();
 148
 149         /* Pick up the victim mm */
 150         mm = context_mm[id];
 151
 152         pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm);
 153
 154         /* Mark this mm has having no context anymore */
 155         mm->context.id = MMU_NO_CONTEXT;
 156
 157         /* Flush the TLB for that context */
 158         local_flush_tlb_mm(mm);
 159
 160         /* XXX This clear should ultimately be part of local_flush_tlb_mm */
 161         __clear_bit(id, stale_map[cpu]);
 162
 163         return id;
 164 }
 165
 166 #ifdef DEBUG_MAP_CONSISTENCY
 167 static void context_check_map(void)
 168 {
 169         unsigned int id, nrf, nact;
 170
 171         nrf = nact = 0;
 172         for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
 173                 int used = test_bit(id, context_map);
 174                 if (!used)
 175                         nrf++;
 176                 if (used != (context_mm[id] != NULL))
 177                         pr_err("MMU: Context %d is %s and MM is %p !\n",
 178                                id, used ? "used" : "free", context_mm[id]);
 179                 if (context_mm[id] != NULL)
 180                         nact += context_mm[id]->context.active;
 181         }
 182         if (nrf != nr_free_contexts) {
 183                 pr_err("MMU: Free context count out of sync ! (%d vs %d)\n",
 184                        nr_free_contexts, nrf);
 185                 nr_free_contexts = nrf;
 186         }
 187         if (nact > num_online_cpus())
 188                 pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
 189                        nact, num_online_cpus());
 190 }
 191 #else
 192 static void context_check_map(void) { }
 193 #endif
 194
 195 void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
 196 {
 197         unsigned int id, cpu = smp_processor_id();
 198         unsigned long *map;
 199
 200         /* No lockless fast path .. yet */
 201         spin_lock(&context_lock);
 202
 203 #ifndef DEBUG_STEAL_ONLY
 204         pr_debug("[%d] activating context for mm @%p, active=%d, id=%d\n",
 205                  cpu, next, next->context.active, next->context.id);
 206 #endif
 207
 208 #ifdef CONFIG_SMP
 209         /* Mark us active and the previous one not anymore */
 210         next->context.active++;
 211         if (prev) {
 212                 WARN_ON(prev->context.active < 1);
 213                 prev->context.active--;
 214         }
 215 #endif /* CONFIG_SMP */
 216
 217         /* If we already have a valid assigned context, skip all that */
 218         id = next->context.id;
 219         if (likely(id != MMU_NO_CONTEXT))
 220                 goto ctxt_ok;
 221
 222         /* We really don't have a context, let's try to acquire one */
 223         id = next_context;
 224         if (id > LAST_CONTEXT)
 225                 id = FIRST_CONTEXT;
 226         map = context_map;
 227
 228         /* No more free contexts, let's try to steal one */
 229         if (nr_free_contexts == 0) {
 230 #ifdef CONFIG_SMP
 231                 if (num_online_cpus() > 1) {
 232                         id = steal_context_smp(id);
 233                         goto stolen;
 234                 }
 235 #endif /* CONFIG_SMP */
 236                 id = steal_context_up(id);
 237                 goto stolen;
 238         }
 239         nr_free_contexts--;
 240
 241         /* We know there's at least one free context, try to find it */
 242         while (__test_and_set_bit(id, map)) {
 243                 id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
 244                 if (id > LAST_CONTEXT)
 245                         id = FIRST_CONTEXT;
 246         }
 247  stolen:
 248         next_context = id + 1;
 249         context_mm[id] = next;
 250         next->context.id = id;
 251
 252 #ifndef DEBUG_STEAL_ONLY
 253         pr_debug("[%d] picked up new id %d, nrf is now %d\n",
 254                  cpu, id, nr_free_contexts);
 255 #endif
 256
 257         context_check_map();
 258  ctxt_ok:
 259
 260         /* If that context got marked stale on this CPU, then flush the
 261          * local TLB for it and unmark it before we use it
 262          */
 263         if (test_bit(id, stale_map[cpu])) {
 264                 pr_debug("[%d] flushing stale context %d for mm @%p !\n",
 265                          cpu, id, next);
 266                 local_flush_tlb_mm(next);
 267
 268                 /* XXX This clear should ultimately be part of local_flush_tlb_mm */
 269                 __clear_bit(id, stale_map[cpu]);
 270         }
 271
 272         /* Flick the MMU and release lock */
 273         set_context(id, next->pgd);
 274         spin_unlock(&context_lock);
 275 }
 276
 277 /*
 278  * Set up the context for a new address space.
 279  */
 280 int init_new_context(struct task_struct *t, struct mm_struct *mm)
 281 {
 282         mm->context.id = MMU_NO_CONTEXT;
 283         mm->context.active = 0;
 284
 285         return 0;
 286 }
 287
 288 /*
 289  * We're finished using the context for an address space.
 290  */
 291 void destroy_context(struct mm_struct *mm)
 292 {
 293         unsigned int id;
 294
 295         if (mm->context.id == MMU_NO_CONTEXT)
 296                 return;
 297
 298         WARN_ON(mm->context.active != 0);
 299
 300         spin_lock(&context_lock);
 301         id = mm->context.id;
 302         if (id != MMU_NO_CONTEXT) {
 303                 __clear_bit(id, context_map);
 304                 mm->context.id = MMU_NO_CONTEXT;
 305 #ifdef DEBUG_MAP_CONSISTENCY
 306                 mm->context.active = 0;
 307                 context_mm[id] = NULL;
 308 #endif
 309                 nr_free_contexts++;
 310         }
 311         spin_unlock(&context_lock);
 312 }
 313
 314
 315 /*
 316  * Initialize the context management stuff.
 317  */
 318 void __init mmu_context_init(void)
 319 {
 320         /* Mark init_mm as being active on all possible CPUs since
 321          * we'll get called with prev == init_mm the first time
 322          * we schedule on a given CPU
 323          */
 324         init_mm.context.active = NR_CPUS;
 325
 326         /*
 327          * Some processors have too few contexts to reserve one for
 328          * init_mm, and require using context 0 for a normal task.
 329          * Other processors reserve the use of context zero for the kernel.
 330          * This code assumes FIRST_CONTEXT < 32.
 331          */
 332         context_map[0] = (1 << FIRST_CONTEXT) - 1;
 333         next_context = FIRST_CONTEXT;
 334         nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
 335 }
 336