arch/x86/include/asm/tlbflush.h

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 #ifndef _ASM_X86_TLBFLUSH_H
   3 #define _ASM_X86_TLBFLUSH_H
   4
   5 #include <linux/mm.h>
   6 #include <linux/sched.h>
   7
   8 #include <asm/processor.h>
   9 #include <asm/cpufeature.h>
  10 #include <asm/special_insns.h>
  11 #include <asm/smp.h>
  12
  13 static inline void __invpcid(unsigned long pcid, unsigned long addr,
  14                              unsigned long type)
  15 {
  16         struct { u64 d[2]; } desc = { { pcid, addr } };
  17
  18         /*
  19          * The memory clobber is because the whole point is to invalidate
  20          * stale TLB entries and, especially if we're flushing global
  21          * mappings, we don't want the compiler to reorder any subsequent
  22          * memory accesses before the TLB flush.
  23          *
  24          * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
  25          * invpcid (%rcx), %rax in long mode.
  26          */
  27         asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
  28                       : : "m" (desc), "a" (type), "c" (&desc) : "memory");
  29 }
  30
  31 #define INVPCID_TYPE_INDIV_ADDR         0
  32 #define INVPCID_TYPE_SINGLE_CTXT        1
  33 #define INVPCID_TYPE_ALL_INCL_GLOBAL    2
  34 #define INVPCID_TYPE_ALL_NON_GLOBAL     3
  35
  36 /* Flush all mappings for a given pcid and addr, not including globals. */
  37 static inline void invpcid_flush_one(unsigned long pcid,
  38                                      unsigned long addr)
  39 {
  40         __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
  41 }
  42
  43 /* Flush all mappings for a given PCID, not including globals. */
  44 static inline void invpcid_flush_single_context(unsigned long pcid)
  45 {
  46         __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
  47 }
  48
  49 /* Flush all mappings, including globals, for all PCIDs. */
  50 static inline void invpcid_flush_all(void)
  51 {
  52         __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
  53 }
  54
  55 /* Flush all mappings for all PCIDs except globals. */
  56 static inline void invpcid_flush_all_nonglobals(void)
  57 {
  58         __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
  59 }
  60
  61 static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
  62 {
  63         /*
  64          * Bump the generation count.  This also serves as a full barrier
  65          * that synchronizes with switch_mm(): callers are required to order
  66          * their read of mm_cpumask after their writes to the paging
  67          * structures.
  68          */
  69         return atomic64_inc_return(&mm->context.tlb_gen);
  70 }
  71
  72 #ifdef CONFIG_PARAVIRT
  73 #include <asm/paravirt.h>
  74 #else
  75 #define __flush_tlb() __native_flush_tlb()
  76 #define __flush_tlb_global() __native_flush_tlb_global()
  77 #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
  78 #endif
  79
  80 static inline bool tlb_defer_switch_to_init_mm(void)
  81 {
  82         /*
  83          * If we have PCID, then switching to init_mm is reasonably
  84          * fast.  If we don't have PCID, then switching to init_mm is
  85          * quite slow, so we try to defer it in the hopes that we can
  86          * avoid it entirely.  The latter approach runs the risk of
  87          * receiving otherwise unnecessary IPIs.
  88          *
  89          * This choice is just a heuristic.  The tlb code can handle this
  90          * function returning true or false regardless of whether we have
  91          * PCID.
  92          */
  93         return !static_cpu_has(X86_FEATURE_PCID);
  94 }
  95
  96 /*
  97  * 6 because 6 should be plenty and struct tlb_state will fit in
  98  * two cache lines.
  99  */
 100 #define TLB_NR_DYN_ASIDS 6
 101
 102 struct tlb_context {
 103         u64 ctx_id;
 104         u64 tlb_gen;
 105 };
 106
 107 struct tlb_state {
 108         /*
 109          * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts
 110          * are on.  This means that it may not match current->active_mm,
 111          * which will contain the previous user mm when we're in lazy TLB
 112          * mode even if we've already switched back to swapper_pg_dir.
 113          */
 114         struct mm_struct *loaded_mm;
 115         u16 loaded_mm_asid;
 116         u16 next_asid;
 117
 118         /*
 119          * We can be in one of several states:
 120          *
 121          *  - Actively using an mm.  Our CPU's bit will be set in
 122          *    mm_cpumask(loaded_mm) and is_lazy == false;
 123          *
 124          *  - Not using a real mm.  loaded_mm == &init_mm.  Our CPU's bit
 125          *    will not be set in mm_cpumask(&init_mm) and is_lazy == false.
 126          *
 127          *  - Lazily using a real mm.  loaded_mm != &init_mm, our bit
 128          *    is set in mm_cpumask(loaded_mm), but is_lazy == true.
 129          *    We're heuristically guessing that the CR3 load we
 130          *    skipped more than makes up for the overhead added by
 131          *    lazy mode.
 132          */
 133         bool is_lazy;
 134
 135         /*
 136          * Access to this CR4 shadow and to H/W CR4 is protected by
 137          * disabling interrupts when modifying either one.
 138          */
 139         unsigned long cr4;
 140
 141         /*
 142          * This is a list of all contexts that might exist in the TLB.
 143          * There is one per ASID that we use, and the ASID (what the
 144          * CPU calls PCID) is the index into ctxts.
 145          *
 146          * For each context, ctx_id indicates which mm the TLB's user
 147          * entries came from.  As an invariant, the TLB will never
 148          * contain entries that are out-of-date as when that mm reached
 149          * the tlb_gen in the list.
 150          *
 151          * To be clear, this means that it's legal for the TLB code to
 152          * flush the TLB without updating tlb_gen.  This can happen
 153          * (for now, at least) due to paravirt remote flushes.
 154          *
 155          * NB: context 0 is a bit special, since it's also used by
 156          * various bits of init code.  This is fine -- code that
 157          * isn't aware of PCID will end up harmlessly flushing
 158          * context 0.
 159          */
 160         struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
 161 };
 162 DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
 163
 164 /* Initialize cr4 shadow for this CPU. */
 165 static inline void cr4_init_shadow(void)
 166 {
 167         this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
 168 }
 169
 170 /* Set in this cpu's CR4. */
 171 static inline void cr4_set_bits(unsigned long mask)
 172 {
 173         unsigned long cr4;
 174
 175         cr4 = this_cpu_read(cpu_tlbstate.cr4);
 176         if ((cr4 | mask) != cr4) {
 177                 cr4 |= mask;
 178                 this_cpu_write(cpu_tlbstate.cr4, cr4);
 179                 __write_cr4(cr4);
 180         }
 181 }
 182
 183 /* Clear in this cpu's CR4. */
 184 static inline void cr4_clear_bits(unsigned long mask)
 185 {
 186         unsigned long cr4;
 187
 188         cr4 = this_cpu_read(cpu_tlbstate.cr4);
 189         if ((cr4 & ~mask) != cr4) {
 190                 cr4 &= ~mask;
 191                 this_cpu_write(cpu_tlbstate.cr4, cr4);
 192                 __write_cr4(cr4);
 193         }
 194 }
 195
 196 static inline void cr4_toggle_bits(unsigned long mask)
 197 {
 198         unsigned long cr4;
 199
 200         cr4 = this_cpu_read(cpu_tlbstate.cr4);
 201         cr4 ^= mask;
 202         this_cpu_write(cpu_tlbstate.cr4, cr4);
 203         __write_cr4(cr4);
 204 }
 205
 206 /* Read the CR4 shadow. */
 207 static inline unsigned long cr4_read_shadow(void)
 208 {
 209         return this_cpu_read(cpu_tlbstate.cr4);
 210 }
 211
 212 /*
 213  * Save some of cr4 feature set we're using (e.g.  Pentium 4MB
 214  * enable and PPro Global page enable), so that any CPU's that boot
 215  * up after us can get the correct flags.  This should only be used
 216  * during boot on the boot cpu.
 217  */
 218 extern unsigned long mmu_cr4_features;
 219 extern u32 *trampoline_cr4_features;
 220
 221 static inline void cr4_set_bits_and_update_boot(unsigned long mask)
 222 {
 223         mmu_cr4_features |= mask;
 224         if (trampoline_cr4_features)
 225                 *trampoline_cr4_features = mmu_cr4_features;
 226         cr4_set_bits(mask);
 227 }
 228
 229 extern void initialize_tlbstate_and_flush(void);
 230
 231 /*
 232  * flush the entire current user mapping
 233  */
 234 static inline void __native_flush_tlb(void)
 235 {
 236         /*
 237          * If current->mm == NULL then we borrow a mm which may change during a
 238          * task switch and therefore we must not be preempted while we write CR3
 239          * back:
 240          */
 241         preempt_disable();
 242         native_write_cr3(__native_read_cr3());
 243         preempt_enable();
 244 }
 245
 246 /*
 247  * flush everything
 248  */
 249 static inline void __native_flush_tlb_global(void)
 250 {
 251         unsigned long cr4, flags;
 252
 253         if (static_cpu_has(X86_FEATURE_INVPCID)) {
 254                 /*
 255                  * Using INVPCID is considerably faster than a pair of writes
 256                  * to CR4 sandwiched inside an IRQ flag save/restore.
 257                  */
 258                 invpcid_flush_all();
 259                 return;
 260         }
 261
 262         /*
 263          * Read-modify-write to CR4 - protect it from preemption and
 264          * from interrupts. (Use the raw variant because this code can
 265          * be called from deep inside debugging code.)
 266          */
 267         raw_local_irq_save(flags);
 268
 269         cr4 = this_cpu_read(cpu_tlbstate.cr4);
 270         /* toggle PGE */
 271         native_write_cr4(cr4 ^ X86_CR4_PGE);
 272         /* write old PGE again and flush TLBs */
 273         native_write_cr4(cr4);
 274
 275         raw_local_irq_restore(flags);
 276 }
 277
 278 /*
 279  * flush one page in the user mapping
 280  */
 281 static inline void __native_flush_tlb_single(unsigned long addr)
 282 {
 283         asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
 284 }
 285
 286 /*
 287  * flush everything
 288  */
 289 static inline void __flush_tlb_all(void)
 290 {
 291         if (boot_cpu_has(X86_FEATURE_PGE)) {
 292                 __flush_tlb_global();
 293         } else {
 294                 /*
 295                  * !PGE -> !PCID (setup_pcid()), thus every flush is total.
 296                  */
 297                 __flush_tlb();
 298         }
 299
 300         /*
 301          * Note: if we somehow had PCID but not PGE, then this wouldn't work --
 302          * we'd end up flushing kernel translations for the current ASID but
 303          * we might fail to flush kernel translations for other cached ASIDs.
 304          *
 305          * To avoid this issue, we force PCID off if PGE is off.
 306          */
 307 }
 308
 309 /*
 310  * flush one page in the kernel mapping
 311  */
 312 static inline void __flush_tlb_one(unsigned long addr)
 313 {
 314         count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
 315         __flush_tlb_single(addr);
 316 }
 317
 318 #define TLB_FLUSH_ALL   -1UL
 319
 320 /*
 321  * TLB flushing:
 322  *
 323  *  - flush_tlb_all() flushes all processes TLBs
 324  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
 325  *  - flush_tlb_page(vma, vmaddr) flushes one page
 326  *  - flush_tlb_range(vma, start, end) flushes a range of pages
 327  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
 328  *  - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
 329  *
 330  * ..but the i386 has somewhat limited tlb flushing capabilities,
 331  * and page-granular flushes are available only on i486 and up.
 332  */
 333 struct flush_tlb_info {
 334         /*
 335          * We support several kinds of flushes.
 336          *
 337          * - Fully flush a single mm.  .mm will be set, .end will be
 338          *   TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to
 339          *   which the IPI sender is trying to catch us up.
 340          *
 341          * - Partially flush a single mm.  .mm will be set, .start and
 342          *   .end will indicate the range, and .new_tlb_gen will be set
 343          *   such that the changes between generation .new_tlb_gen-1 and
 344          *   .new_tlb_gen are entirely contained in the indicated range.
 345          *
 346          * - Fully flush all mms whose tlb_gens have been updated.  .mm
 347          *   will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen
 348          *   will be zero.
 349          */
 350         struct mm_struct        *mm;
 351         unsigned long           start;
 352         unsigned long           end;
 353         u64                     new_tlb_gen;
 354 };
 355
 356 #define local_flush_tlb() __flush_tlb()
 357
 358 #define flush_tlb_mm(mm)        flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
 359
 360 #define flush_tlb_range(vma, start, end)        \
 361                 flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
 362
 363 extern void flush_tlb_all(void);
 364 extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 365                                 unsigned long end, unsigned long vmflag);
 366 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 367
 368 static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
 369 {
 370         flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
 371 }
 372
 373 void native_flush_tlb_others(const struct cpumask *cpumask,
 374                              const struct flush_tlb_info *info);
 375
 376 static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
 377                                         struct mm_struct *mm)
 378 {
 379         inc_mm_tlb_gen(mm);
 380         cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
 381 }
 382
 383 extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
 384
 385 #ifndef CONFIG_PARAVIRT
 386 #define flush_tlb_others(mask, info)    \
 387         native_flush_tlb_others(mask, info)
 388 #endif
 389
 390 #endif /* _ASM_X86_TLBFLUSH_H */