Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 5 Aug 2014 00:15:45 +0000 (17:15 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 5 Aug 2014 00:15:45 +0000 (17:15 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 5 Aug 2014 00:15:45 +0000 (17:15 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 5 Aug 2014 00:15:45 +0000 (17:15 -0700)
diff --git a/Documentation/x86/tlb.txt b/Documentation/x86/tlb.txt

new file mode 100644 (file)

index 0000000..2b3a82e
--- /dev/null
+++ b/Documentation/x86/tlb.txt
@@ -0,0 +1,75 @@
+When the kernel unmaps or modified the attributes of a range of
+memory, it has two choices:
+ 1. Flush the entire TLB with a two-instruction sequence.  This is
+    a quick operation, but it causes collateral damage: TLB entries
+    from areas other than the one we are trying to flush will be
+    destroyed and must be refilled later, at some cost.
+ 2. Use the invlpg instruction to invalidate a single page at a
+    time.  This could potentialy cost many more instructions, but
+    it is a much more precise operation, causing no collateral
+    damage to other TLB entries.
+
+Which method to do depends on a few things:
+ 1. The size of the flush being performed.  A flush of the entire
+    address space is obviously better performed by flushing the
+    entire TLB than doing 2^48/PAGE_SIZE individual flushes.
+ 2. The contents of the TLB.  If the TLB is empty, then there will
+    be no collateral damage caused by doing the global flush, and
+    all of the individual flush will have ended up being wasted
+    work.
+ 3. The size of the TLB.  The larger the TLB, the more collateral
+    damage we do with a full flush.  So, the larger the TLB, the
+    more attrative an individual flush looks.  Data and
+    instructions have separate TLBs, as do different page sizes.
+ 4. The microarchitecture.  The TLB has become a multi-level
+    cache on modern CPUs, and the global flushes have become more
+    expensive relative to single-page flushes.
+
+There is obviously no way the kernel can know all these things,
+especially the contents of the TLB during a given flush.  The
+sizes of the flush will vary greatly depending on the workload as
+well.  There is essentially no "right" point to choose.
+
+You may be doing too many individual invalidations if you see the
+invlpg instruction (or instructions _near_ it) show up high in
+profiles.  If you believe that individual invalidations being
+called too often, you can lower the tunable:
+
+       /sys/debug/kernel/x86/tlb_single_page_flush_ceiling
+
+This will cause us to do the global flush for more cases.
+Lowering it to 0 will disable the use of the individual flushes.
+Setting it to 1 is a very conservative setting and it should
+never need to be 0 under normal circumstances.
+
+Despite the fact that a single individual flush on x86 is
+guaranteed to flush a full 2MB [1], hugetlbfs always uses the full
+flushes.  THP is treated exactly the same as normal memory.
+
+You might see invlpg inside of flush_tlb_mm_range() show up in
+profiles, or you can use the trace_tlb_flush() tracepoints. to
+determine how long the flush operations are taking.
+
+Essentially, you are balancing the cycles you spend doing invlpg
+with the cycles that you spend refilling the TLB later.
+
+You can measure how expensive TLB refills are by using
+performance counters and 'perf stat', like this:
+
+perf stat -e
+       cpu/event=0x8,umask=0x84,name=dtlb_load_misses_walk_duration/,
+       cpu/event=0x8,umask=0x82,name=dtlb_load_misses_walk_completed/,
+       cpu/event=0x49,umask=0x4,name=dtlb_store_misses_walk_duration/,
+       cpu/event=0x49,umask=0x2,name=dtlb_store_misses_walk_completed/,
+       cpu/event=0x85,umask=0x4,name=itlb_misses_walk_duration/,
+       cpu/event=0x85,umask=0x2,name=itlb_misses_walk_completed/
+
+That works on an IvyBridge-era CPU (i5-3320M).  Different CPUs
+may have differently-named counters, but they should at least
+be there in some form.  You can use pmu-tools 'ocperf list'
+(https://github.com/andikleen/pmu-tools) to find the right
+counters for a given CPU.
+
+1. A footnote in Intel's SDM "4.10.4.2 Recommended Invalidation"
+   says: "One execution of INVLPG is sufficient even for a page
+   with size greater than 4 KBytes."
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h

index be12c534fd592e84fd81ca0b3b6c5182ecb2f6fe..166af2a8e865b370c605cecf6777b79882e6dcb2 100644 (file)
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -3,6 +3,10 @@
  
  #include <asm/desc.h>
  #include <linux/atomic.h>
+#include <linux/mm_types.h>
+
+#include <trace/events/tlb.h>
+
  #include <asm/pgalloc.h>
  #include <asm/tlbflush.h>
  #include <asm/paravirt.h>
@@ -44,6 +48,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  
                 /* Re-load page tables */
                 load_cr3(next->pgd);
+               trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
  
                 /* Stop flush ipis for the previous mm */
                 cpumask_clear_cpu(cpu, mm_cpumask(prev));
@@ -71,6 +76,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
                          * to make sure to use no freed page tables.
                          */
                         load_cr3(next->pgd);
+                       trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
                         load_LDT_nolock(&next->context);
                 }
         }
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h

index 32cc237f8e2062edf2eb8cc26d0a738dd1cd8822..ee30b9f0b91c9d36f04bb4b13200a5d675e8a873 100644 (file)
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -72,7 +72,6 @@ extern u16 __read_mostly tlb_lld_4k[NR_INFO];
  extern u16 __read_mostly tlb_lld_2m[NR_INFO];
  extern u16 __read_mostly tlb_lld_4m[NR_INFO];
  extern u16 __read_mostly tlb_lld_1g[NR_INFO];
-extern s8  __read_mostly tlb_flushall_shift;
  
  /*
   *  CPU type and hardware bug flags. Kept separately for each CPU.
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c

index bc360d3df60e7675fac1142d4571b59f8294572b..60e5497681f5ecc5083f6a31c1e2067301c58bb2 100644 (file)
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -724,11 +724,6 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
  }
  #endif
  
-static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
-{
-       tlb_flushall_shift = 6;
-}
-
  static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
  {
         u32 ebx, eax, ecx, edx;
@@ -776,8 +771,6 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
                 tlb_lli_2m[ENTRIES] = eax & mask;
  
         tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
-
-       cpu_set_tlb_flushall_shift(c);
  }
  
  static const struct cpu_dev amd_cpu_dev = {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index 188a8c5cc09425180ea38d251b2d1a2556223ee7..333fd5209336d3df51e8ed676e4aa4b937a48b6b 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -481,26 +481,17 @@ u16 __read_mostly tlb_lld_2m[NR_INFO];
  u16 __read_mostly tlb_lld_4m[NR_INFO];
  u16 __read_mostly tlb_lld_1g[NR_INFO];
  
-/*
- * tlb_flushall_shift shows the balance point in replacing cr3 write
- * with multiple 'invlpg'. It will do this replacement when
- *   flush_tlb_lines <= active_lines/2^tlb_flushall_shift.
- * If tlb_flushall_shift is -1, means the replacement will be disabled.
- */
-s8  __read_mostly tlb_flushall_shift = -1;
-
  void cpu_detect_tlb(struct cpuinfo_x86 *c)
  {
         if (this_cpu->c_detect_tlb)
                 this_cpu->c_detect_tlb(c);
  
         printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n"
-               "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n"
-               "tlb_flushall_shift: %d\n",
+               "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n",
                 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
                 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
                 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
-               tlb_lld_1g[ENTRIES], tlb_flushall_shift);
+               tlb_lld_1g[ENTRIES]);
  }
  
  void detect_ht(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c

index 9483ee5b3991035c2b0f0e8366015049fb4747ed..74e804ddc5c7567ec8b875fbca0e6f69d5922763 100644 (file)
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -634,31 +634,6 @@ static void intel_tlb_lookup(const unsigned char desc)
         }
  }
  
-static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
-{
-       switch ((c->x86 << 8) + c->x86_model) {
-       case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
-       case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
-       case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
-       case 0x61d: /* six-core 45 nm xeon "Dunnington" */
-               tlb_flushall_shift = -1;
-               break;
-       case 0x63a: /* Ivybridge */
-               tlb_flushall_shift = 2;
-               break;
-       case 0x61a: /* 45 nm nehalem, "Bloomfield" */
-       case 0x61e: /* 45 nm nehalem, "Lynnfield" */
-       case 0x625: /* 32 nm nehalem, "Clarkdale" */
-       case 0x62c: /* 32 nm nehalem, "Gulftown" */
-       case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
-       case 0x62f: /* 32 nm Xeon E7 */
-       case 0x62a: /* SandyBridge */
-       case 0x62d: /* SandyBridge, "Romely-EP" */
-       default:
-               tlb_flushall_shift = 6;
-       }
-}
-
  static void intel_detect_tlb(struct cpuinfo_x86 *c)
  {
         int i, j, n;
@@ -683,7 +658,6 @@ static void intel_detect_tlb(struct cpuinfo_x86 *c)
                 for (j = 1 ; j < 16 ; j++)
                         intel_tlb_lookup(desc[j]);
         }
-       intel_tlb_flushall_shift_set(c);
  }
  
  static const struct cpu_dev intel_cpu_dev = {
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c

index 36642793e315fc8bb4b8682e5bf8f4dbe8f4ab7c..1dbade870f90dd8b27140e10275df6d0610506be 100644 (file)
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -577,6 +577,8 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
  
  static const char nx_warning[] = KERN_CRIT
  "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
+static const char smep_warning[] = KERN_CRIT
+"unable to execute userspace code (SMEP?) (uid: %d)\n";
  
  static void
  show_fault_oops(struct pt_regs *regs, unsigned long error_code,
@@ -597,6 +599,10 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
  
                 if (pte && pte_present(*pte) && !pte_exec(*pte))
                         printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
+               if (pte && pte_present(*pte) && pte_exec(*pte) &&
+                               (pgd_flags(*pgd) & _PAGE_USER) &&
+                               (read_cr4() & X86_CR4_SMEP))
+                       printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
         }
  
         printk(KERN_ALERT "BUG: unable to handle kernel ");
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c

index f9713061811332100ee6560ec736b0c18c509e45..66dba36f2343571532f09e1c7767a7a85ea28f4f 100644 (file)
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -18,6 +18,13 @@
  #include <asm/dma.h>           /* for MAX_DMA_PFN */
  #include <asm/microcode.h>
  
+/*
+ * We need to define the tracepoints somewhere, and tlb.c
+ * is only compied when SMP=y.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/tlb.h>
+
  #include "mm_internal.h"
  
  static unsigned long __initdata pgt_buf_start;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index dd8dda167a242621515c901a3a5d62b4fcadf37b..1fe33987de027f73c2997960198978a557bb1027 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -49,6 +49,7 @@ void leave_mm(int cpu)
         if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
                 cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
                 load_cr3(swapper_pg_dir);
+               trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
         }
  }
  EXPORT_SYMBOL_GPL(leave_mm);
@@ -102,20 +103,24 @@ static void flush_tlb_func(void *info)
  
         if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
                 return;
+       if (!f->flush_end)
+               f->flush_end = f->flush_start + PAGE_SIZE;
  
         count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
         if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
-               if (f->flush_end == TLB_FLUSH_ALL)
+               if (f->flush_end == TLB_FLUSH_ALL) {
                         local_flush_tlb();
-               else if (!f->flush_end)
-                       __flush_tlb_single(f->flush_start);
-               else {
+                       trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL);
+               } else {
                         unsigned long addr;
+                       unsigned long nr_pages =
+                               f->flush_end - f->flush_start / PAGE_SIZE;
                         addr = f->flush_start;
                         while (addr < f->flush_end) {
                                 __flush_tlb_single(addr);
                                 addr += PAGE_SIZE;
                         }
+                       trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages);
                 }
         } else
                 leave_mm(smp_processor_id());
@@ -153,46 +158,45 @@ void flush_tlb_current_task(void)
  
         count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
         local_flush_tlb();
+       trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
         if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
                 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
         preempt_enable();
  }
  
+/*
+ * See Documentation/x86/tlb.txt for details.  We choose 33
+ * because it is large enough to cover the vast majority (at
+ * least 95%) of allocations, and is small enough that we are
+ * confident it will not cause too much overhead.  Each single
+ * flush is about 100 ns, so this caps the maximum overhead at
+ * _about_ 3,000 ns.
+ *
+ * This is in units of pages.
+ */
+unsigned long tlb_single_page_flush_ceiling = 33;
+
  void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
                                 unsigned long end, unsigned long vmflag)
  {
         unsigned long addr;
-       unsigned act_entries, tlb_entries = 0;
-       unsigned long nr_base_pages;
+       /* do a global flush by default */
+       unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
  
         preempt_disable();
         if (current->active_mm != mm)
-               goto flush_all;
+               goto out;
  
         if (!current->mm) {
                 leave_mm(smp_processor_id());
-               goto flush_all;
+               goto out;
         }
  
-       if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1
-                                       || vmflag & VM_HUGETLB) {
-               local_flush_tlb();
-               goto flush_all;
-       }
-
-       /* In modern CPU, last level tlb used for both data/ins */
-       if (vmflag & VM_EXEC)
-               tlb_entries = tlb_lli_4k[ENTRIES];
-       else
-               tlb_entries = tlb_lld_4k[ENTRIES];
+       if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
+               base_pages_to_flush = (end - start) >> PAGE_SHIFT;
  
-       /* Assume all of TLB entries was occupied by this task */
-       act_entries = tlb_entries >> tlb_flushall_shift;
-       act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm;
-       nr_base_pages = (end - start) >> PAGE_SHIFT;
-
-       /* tlb_flushall_shift is on balance point, details in commit log */
-       if (nr_base_pages > act_entries) {
+       if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
+               base_pages_to_flush = TLB_FLUSH_ALL;
                 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
                 local_flush_tlb();
         } else {
@@ -201,17 +205,15 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
                         count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
                         __flush_tlb_single(addr);
                 }
-
-               if (cpumask_any_but(mm_cpumask(mm),
-                               smp_processor_id()) < nr_cpu_ids)
-                       flush_tlb_others(mm_cpumask(mm), mm, start, end);
-               preempt_enable();
-               return;
         }
-
-flush_all:
+       trace_tlb_flush(TLB_LOCAL_MM_SHOOTDOWN, base_pages_to_flush);
+out:
+       if (base_pages_to_flush == TLB_FLUSH_ALL) {
+               start = 0UL;
+               end = TLB_FLUSH_ALL;
+       }
         if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
-               flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
+               flush_tlb_others(mm_cpumask(mm), mm, start, end);
         preempt_enable();
  }
  
@@ -260,32 +262,26 @@ static void do_kernel_range_flush(void *info)
  
  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
  {
-       unsigned act_entries;
-       struct flush_tlb_info info;
-
-       /* In modern CPU, last level tlb used for both data/ins */
-       act_entries = tlb_lld_4k[ENTRIES];
  
         /* Balance as user space task's flush, a bit conservative */
-       if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 ||
-               (end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift)
-
+       if (end == TLB_FLUSH_ALL ||
+           (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
                 on_each_cpu(do_flush_tlb_all, NULL, 1);
-       else {
+       } else {
+               struct flush_tlb_info info;
                 info.flush_start = start;
                 info.flush_end = end;
                 on_each_cpu(do_kernel_range_flush, &info, 1);
         }
  }
  
-#ifdef CONFIG_DEBUG_TLBFLUSH
  static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
                              size_t count, loff_t *ppos)
  {
         char buf[32];
         unsigned int len;
  
-       len = sprintf(buf, "%hd\n", tlb_flushall_shift);
+       len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
         return simple_read_from_buffer(user_buf, count, ppos, buf, len);
  }
  
@@ -294,20 +290,20 @@ static ssize_t tlbflush_write_file(struct file *file,
  {
         char buf[32];
         ssize_t len;
-       s8 shift;
+       int ceiling;
  
         len = min(count, sizeof(buf) - 1);
         if (copy_from_user(buf, user_buf, len))
                 return -EFAULT;
  
         buf[len] = '\0';
-       if (kstrtos8(buf, 0, &shift))
+       if (kstrtoint(buf, 0, &ceiling))
                 return -EINVAL;
  
-       if (shift < -1 || shift >= BITS_PER_LONG)
+       if (ceiling < 0)
                 return -EINVAL;
  
-       tlb_flushall_shift = shift;
+       tlb_single_page_flush_ceiling = ceiling;
         return count;
  }
  
@@ -317,11 +313,10 @@ static const struct file_operations fops_tlbflush = {
         .llseek = default_llseek,
  };
  
-static int __init create_tlb_flushall_shift(void)
+static int __init create_tlb_single_page_flush_ceiling(void)
  {
-       debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR,
+       debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
                             arch_debugfs_dir, NULL, &fops_tlbflush);
         return 0;
  }
-late_initcall(create_tlb_flushall_shift);
-#endif
+late_initcall(create_tlb_single_page_flush_ceiling);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 96c5750e3110e7bfd0b58b464738e25aa6bac8ab..796deac19fcfb4bb21d9e8ba9e22c7d4e15df156 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -516,4 +516,12 @@ struct vm_special_mapping
         struct page **pages;
  };
  
+enum tlb_flush_reason {
+       TLB_FLUSH_ON_TASK_SWITCH,
+       TLB_REMOTE_SHOOTDOWN,
+       TLB_LOCAL_SHOOTDOWN,
+       TLB_LOCAL_MM_SHOOTDOWN,
+       NR_TLB_FLUSH_REASONS,
+};
+
  #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/trace/events/tlb.h b/include/trace/events/tlb.h

new file mode 100644 (file)

index 0000000..13391d2
--- /dev/null
+++ b/include/trace/events/tlb.h
@@ -0,0 +1,40 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tlb
+
+#if !defined(_TRACE_TLB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TLB_H
+
+#include <linux/mm_types.h>
+#include <linux/tracepoint.h>
+
+#define TLB_FLUSH_REASON       \
+       { TLB_FLUSH_ON_TASK_SWITCH,     "flush on task switch" },       \
+       { TLB_REMOTE_SHOOTDOWN,         "remote shootdown" },           \
+       { TLB_LOCAL_SHOOTDOWN,          "local shootdown" },            \
+       { TLB_LOCAL_MM_SHOOTDOWN,       "local mm shootdown" }
+
+TRACE_EVENT(tlb_flush,
+
+       TP_PROTO(int reason, unsigned long pages),
+       TP_ARGS(reason, pages),
+
+       TP_STRUCT__entry(
+               __field(          int, reason)
+               __field(unsigned long,  pages)
+       ),
+
+       TP_fast_assign(
+               __entry->reason = reason;
+               __entry->pages  = pages;
+       ),
+
+       TP_printk("pages:%ld reason:%s (%d)",
+               __entry->pages,
+               __print_symbolic(__entry->reason, TLB_FLUSH_REASON),
+               __entry->reason)
+);
+
+#endif /* _TRACE_TLB_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 5 Aug 2014 00:15:45 +0000 (17:15 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 5 Aug 2014 00:15:45 +0000 (17:15 -0700)
Documentation/x86/tlb.txt	[new file with mode: 0644]	patch \| blob
arch/x86/include/asm/mmu_context.h		patch \| blob \| blame \| history
arch/x86/include/asm/processor.h		patch \| blob \| blame \| history
arch/x86/kernel/cpu/amd.c		patch \| blob \| blame \| history
arch/x86/kernel/cpu/common.c		patch \| blob \| blame \| history
arch/x86/kernel/cpu/intel.c		patch \| blob \| blame \| history
arch/x86/mm/fault.c		patch \| blob \| blame \| history
arch/x86/mm/init.c		patch \| blob \| blame \| history
arch/x86/mm/tlb.c		patch \| blob \| blame \| history
include/linux/mm_types.h		patch \| blob \| blame \| history
include/trace/events/tlb.h	[new file with mode: 0644]	patch \| blob