x86/mm/tlb: Make lazy TLB mode lazier

author Rik van Riel <riel@surriel.com>

Wed, 26 Sep 2018 03:58:44 +0000 (23:58 -0400)

committer Peter Zijlstra <peterz@infradead.org>

Tue, 9 Oct 2018 14:51:12 +0000 (16:51 +0200)
author Rik van Riel <riel@surriel.com>
Wed, 26 Sep 2018 03:58:44 +0000 (23:58 -0400)
committer Peter Zijlstra <peterz@infradead.org>
Tue, 9 Oct 2018 14:51:12 +0000 (16:51 +0200)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index 92e46f4c058c64a0bf68f52212a70e5401752acf..7d68489cfdb15ff0838aba5beb6207c36b27cd77 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -185,6 +185,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
  {
         struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
         u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+       bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
         unsigned cpu = smp_processor_id();
         u64 next_tlb_gen;
         bool need_flush;
@@ -242,17 +243,40 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                            next->context.ctx_id);
  
                 /*
-                * We don't currently support having a real mm loaded without
-                * our cpu set in mm_cpumask().  We have all the bookkeeping
-                * in place to figure out whether we would need to flush
-                * if our cpu were cleared in mm_cpumask(), but we don't
-                * currently use it.
+                * Even in lazy TLB mode, the CPU should stay set in the
+                * mm_cpumask. The TLB shootdown code can figure out from
+                * from cpu_tlbstate.is_lazy whether or not to send an IPI.
                  */
                 if (WARN_ON_ONCE(real_prev != &init_mm &&
                                  !cpumask_test_cpu(cpu, mm_cpumask(next))))
                         cpumask_set_cpu(cpu, mm_cpumask(next));
  
-               return;
+               /*
+                * If the CPU is not in lazy TLB mode, we are just switching
+                * from one thread in a process to another thread in the same
+                * process. No TLB flush required.
+                */
+               if (!was_lazy)
+                       return;
+
+               /*
+                * Read the tlb_gen to check whether a flush is needed.
+                * If the TLB is up to date, just use it.
+                * The barrier synchronizes with the tlb_gen increment in
+                * the TLB shootdown code.
+                */
+               smp_mb();
+               next_tlb_gen = atomic64_read(&next->context.tlb_gen);
+               if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
+                               next_tlb_gen)
+                       return;
+
+               /*
+                * TLB contents went out of date while we were in lazy
+                * mode. Fall through to the TLB switching code below.
+                */
+               new_asid = prev_asid;
+               need_flush = true;
         } else {
                 u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
  
@@ -346,8 +370,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
         this_cpu_write(cpu_tlbstate.loaded_mm, next);
         this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
  
-       load_mm_cr4(next);
-       switch_ldt(real_prev, next);
+       if (next != real_prev) {
+               load_mm_cr4(next);
+               switch_ldt(real_prev, next);
+       }
  }
  
  /*
@@ -455,6 +481,9 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
                  * paging-structure cache to avoid speculatively reading
                  * garbage into our TLB.  Since switching to init_mm is barely
                  * slower than a minimal flush, just switch to init_mm.
+                *
+                * This should be rare, with native_flush_tlb_others skipping
+                * IPIs to lazy TLB mode CPUs.
                  */
                 switch_mm_irqs_off(NULL, &init_mm, NULL);
                 return;
@@ -557,6 +586,11 @@ static void flush_tlb_func_remote(void *info)
         flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
  }
  
+static bool tlb_is_not_lazy(int cpu, void *data)
+{
+       return !per_cpu(cpu_tlbstate.is_lazy, cpu);
+}
+
  void native_flush_tlb_others(const struct cpumask *cpumask,
                              const struct flush_tlb_info *info)
  {
@@ -592,8 +626,23 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
                                                (void *)info, 1);
                 return;
         }
-       smp_call_function_many(cpumask, flush_tlb_func_remote,
+
+       /*
+        * If no page tables were freed, we can skip sending IPIs to
+        * CPUs in lazy TLB mode. They will flush the CPU themselves
+        * at the next context switch.
+        *
+        * However, if page tables are getting freed, we need to send the
+        * IPI everywhere, to prevent CPUs in lazy TLB mode from tripping
+        * up on the new contents of what used to be page tables, while
+        * doing a speculative memory access.
+        */
+       if (info->freed_tables)
+               smp_call_function_many(cpumask, flush_tlb_func_remote,
                                (void *)info, 1);
+       else
+               on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote,
+                               (void *)info, 1, GFP_ATOMIC, cpumask);
  }
  
  /*
author	Rik van Riel <riel@surriel.com>
	Wed, 26 Sep 2018 03:58:44 +0000 (23:58 -0400)
committer	Peter Zijlstra <peterz@infradead.org>
	Tue, 9 Oct 2018 14:51:12 +0000 (16:51 +0200)