]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Mar 2016 00:58:53 +0000 (17:58 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Mar 2016 00:58:53 +0000 (17:58 -0700)
Pull perf updates from Ingo Molnar:
 "Main kernel side changes:

   - Big reorganization of the x86 perf support code.  The old code grew
     organically deep inside arch/x86/kernel/cpu/perf* and its naming
     became somewhat messy.

     The new location is under arch/x86/events/, using the following
     cleaner hierarchy of source code files:

       perf/x86: Move perf_event.c .................. => x86/events/core.c
       perf/x86: Move perf_event_amd.c .............. => x86/events/amd/core.c
       perf/x86: Move perf_event_amd_ibs.c .......... => x86/events/amd/ibs.c
       perf/x86: Move perf_event_amd_iommu.[ch] ..... => x86/events/amd/iommu.[ch]
       perf/x86: Move perf_event_amd_uncore.c ....... => x86/events/amd/uncore.c
       perf/x86: Move perf_event_intel_bts.c ........ => x86/events/intel/bts.c
       perf/x86: Move perf_event_intel.c ............ => x86/events/intel/core.c
       perf/x86: Move perf_event_intel_cqm.c ........ => x86/events/intel/cqm.c
       perf/x86: Move perf_event_intel_cstate.c ..... => x86/events/intel/cstate.c
       perf/x86: Move perf_event_intel_ds.c ......... => x86/events/intel/ds.c
       perf/x86: Move perf_event_intel_lbr.c ........ => x86/events/intel/lbr.c
       perf/x86: Move perf_event_intel_pt.[ch] ...... => x86/events/intel/pt.[ch]
       perf/x86: Move perf_event_intel_rapl.c ....... => x86/events/intel/rapl.c
       perf/x86: Move perf_event_intel_uncore.[ch] .. => x86/events/intel/uncore.[ch]
       perf/x86: Move perf_event_intel_uncore_nhmex.c => x86/events/intel/uncore_nmhex.c
       perf/x86: Move perf_event_intel_uncore_snb.c   => x86/events/intel/uncore_snb.c
       perf/x86: Move perf_event_intel_uncore_snbep.c => x86/events/intel/uncore_snbep.c
       perf/x86: Move perf_event_knc.c .............. => x86/events/intel/knc.c
       perf/x86: Move perf_event_p4.c ............... => x86/events/intel/p4.c
       perf/x86: Move perf_event_p6.c ............... => x86/events/intel/p6.c
       perf/x86: Move perf_event_msr.c .............. => x86/events/msr.c

     (Borislav Petkov)

   - Update various x86 PMU constraint and hw support details (Stephane
     Eranian)

   - Optimize kprobes for BPF execution (Martin KaFai Lau)

   - Rewrite, refactor and fix the Intel uncore PMU driver code (Thomas
     Gleixner)

   - Rewrite, refactor and fix the Intel RAPL PMU code (Thomas Gleixner)

   - Various fixes and smaller cleanups.

  There are lots of perf tooling updates as well.  A few highlights:

  perf report/top:

     - Hierarchy histogram mode for 'perf top' and 'perf report',
       showing multiple levels, one per --sort entry: (Namhyung Kim)

       On a mostly idle system:

         # perf top --hierarchy -s comm,dso

       Then expand some levels and use 'P' to take a snapshot:

         # cat perf.hist.0
         -  92.32%         perf
               58.20%         perf
               22.29%         libc-2.22.so
                5.97%         [kernel]
                4.18%         libelf-0.165.so
                1.69%         [unknown]
         -   4.71%         qemu-system-x86
                3.10%         [kernel]
                1.60%         qemu-system-x86_64 (deleted)
         +   2.97%         swapper
         #

     - Add 'L' hotkey to dynamicly set the percent threshold for
       histogram entries and callchains, i.e.  dynamicly do what the
       --percent-limit command line option to 'top' and 'report' does.
       (Namhyung Kim)

  perf mem:

     - Allow specifying events via -e in 'perf mem record', also listing
       what events can be specified via 'perf mem record -e list' (Jiri
       Olsa)

  perf record:

     - Add 'perf record' --all-user/--all-kernel options, so that one
       can tell that all the events in the command line should be
       restricted to the user or kernel levels (Jiri Olsa), i.e.:

         perf record -e cycles:u,instructions:u

       is equivalent to:

         perf record --all-user -e cycles,instructions

     - Make 'perf record' collect CPU cache info in the perf.data file header:

         $ perf record usleep 1
         [ perf record: Woken up 1 times to write data ]
         [ perf record: Captured and wrote 0.017 MB perf.data (7 samples) ]
         $ perf report --header-only -I | tail -10 | head -8
         # CPU cache info:
         #  L1 Data                 32K [0-1]
         #  L1 Instruction          32K [0-1]
         #  L1 Data                 32K [2-3]
         #  L1 Instruction          32K [2-3]
         #  L2 Unified             256K [0-1]
         #  L2 Unified             256K [2-3]
         #  L3 Unified            4096K [0-3]

       Will be used in 'perf c2c' and eventually in 'perf diff' to
       allow, for instance running the same workload in multiple
       machines and then when using 'diff' show the hardware difference.
       (Jiri Olsa)

     - Improved support for Java, using the JVMTI agent library to do
       jitdumps that then will be inserted in synthesized
       PERF_RECORD_MMAP2 events via 'perf inject' pointed to synthesized
       ELF files stored in ~/.debug and keyed with build-ids, to allow
       symbol resolution and even annotation with source line info, see
       the changeset comments to see how to use it (Stephane Eranian)

  perf script/trace:

     - Decode data_src values (e.g.  perf.data files generated by 'perf
       mem record') in 'perf script': (Jiri Olsa)

         # perf script
           perf 693 [1] 4.088652: 1 cpu/mem-loads,ldlat=30/P: ffff88007d0b0f40 68100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK No <SNIP>
                                                                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
     - Improve support to 'data_src', 'weight' and 'addr' fields in
       'perf script' (Jiri Olsa)

     - Handle empty print fmts in 'perf script -s' i.e. when running
       python or perl scripts (Taeung Song)

  perf stat:

     - 'perf stat' now shows shadow metrics (insn per cycle, etc) in
       interval mode too.  E.g:

         # perf stat -I 1000 -e instructions,cycles sleep 1
         #         time   counts unit events
            1.000215928  519,620      instructions     #  0.69 insn per cycle
            1.000215928  752,003      cycles
         <SNIP>

     - Port 'perf kvm stat' to PowerPC (Hemant Kumar)

     - Implement CSV metrics output in 'perf stat' (Andi Kleen)

  perf BPF support:

     - Support converting data from bpf events in 'perf data' (Wang Nan)

     - Print bpf-output events in 'perf script': (Wang Nan).

         # perf record -e bpf-output/no-inherit,name=evt/ -e ./test_bpf_output_3.c/map:channel.event=evt/ usleep 1000
         # perf script
            usleep  4882 21384.532523:   evt:  ffffffff810e97d1 sys_nanosleep ([kernel.kallsyms])
             BPF output: 0000: 52 61 69 73 65 20 61 20  Raise a
                         0008: 42 50 46 20 65 76 65 6e  BPF even
                         0010: 74 21 00 00              t!..
             BPF string: "Raise a BPF event!"
         #

     - Add API to set values of map entries in a BPF object, be it
       individual map slots or ranges (Wang Nan)

     - Introduce support for the 'bpf-output' event (Wang Nan)

     - Add glue to read perf events in a BPF program (Wang Nan)

     - Improve support for bpf-output events in 'perf trace' (Wang Nan)

  ... and tons of other changes as well - see the shortlog and git log
  for details!"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (342 commits)
  perf stat: Add --metric-only support for -A
  perf stat: Implement --metric-only mode
  perf stat: Document CSV format in manpage
  perf hists browser: Check sort keys before hot key actions
  perf hists browser: Allow thread filtering for comm sort key
  perf tools: Add sort__has_comm variable
  perf tools: Recalc total periods using top-level entries in hierarchy
  perf tools: Remove nr_sort_keys field
  perf hists browser: Cleanup hist_browser__fprintf_hierarchy_entry()
  perf tools: Remove hist_entry->fmt field
  perf tools: Fix command line filters in hierarchy mode
  perf tools: Add more sort entry check functions
  perf tools: Fix hist_entry__filter() for hierarchy
  perf jitdump: Build only on supported archs
  tools lib traceevent: Add '~' operation within arg_num_eval()
  perf tools: Omit unnecessary cast in perf_pmu__parse_scale
  perf tools: Pass perf_hpp_list all the way through setup_sort_list
  perf tools: Fix perf script python database export crash
  perf jitdump: DWARF is also needed
  perf bench mem: Prepare the x86-64 build for upstream memcpy_mcsafe() changes
  ...

244 files changed:
Documentation/sysctl/kernel.txt
MAINTAINERS
arch/x86/Kbuild
arch/x86/events/Makefile [new file with mode: 0644]
arch/x86/events/amd/core.c [new file with mode: 0644]
arch/x86/events/amd/ibs.c [new file with mode: 0644]
arch/x86/events/amd/iommu.c [new file with mode: 0644]
arch/x86/events/amd/iommu.h [new file with mode: 0644]
arch/x86/events/amd/uncore.c [new file with mode: 0644]
arch/x86/events/core.c [new file with mode: 0644]
arch/x86/events/intel/bts.c [new file with mode: 0644]
arch/x86/events/intel/core.c [new file with mode: 0644]
arch/x86/events/intel/cqm.c [new file with mode: 0644]
arch/x86/events/intel/cstate.c [new file with mode: 0644]
arch/x86/events/intel/ds.c [new file with mode: 0644]
arch/x86/events/intel/knc.c [new file with mode: 0644]
arch/x86/events/intel/lbr.c [new file with mode: 0644]
arch/x86/events/intel/p4.c [new file with mode: 0644]
arch/x86/events/intel/p6.c [new file with mode: 0644]
arch/x86/events/intel/pt.c [new file with mode: 0644]
arch/x86/events/intel/pt.h [new file with mode: 0644]
arch/x86/events/intel/rapl.c [new file with mode: 0644]
arch/x86/events/intel/uncore.c [new file with mode: 0644]
arch/x86/events/intel/uncore.h [new file with mode: 0644]
arch/x86/events/intel/uncore_nhmex.c [new file with mode: 0644]
arch/x86/events/intel/uncore_snb.c [new file with mode: 0644]
arch/x86/events/intel/uncore_snbep.c [new file with mode: 0644]
arch/x86/events/msr.c [new file with mode: 0644]
arch/x86/events/perf_event.h [new file with mode: 0644]
arch/x86/include/asm/elf.h
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/topology.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/cpu/Makefile
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/bugs_64.c
arch/x86/kernel/cpu/centaur.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cyrix.c
arch/x86/kernel/cpu/hypervisor.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/intel_cacheinfo.c
arch/x86/kernel/cpu/intel_pt.h [deleted file]
arch/x86/kernel/cpu/mcheck/mce-inject.c
arch/x86/kernel/cpu/mcheck/p5.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpu/mcheck/threshold.c
arch/x86/kernel/cpu/mcheck/winchip.c
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/cpu/mshyperv.c
arch/x86/kernel/cpu/mtrr/centaur.c
arch/x86/kernel/cpu/mtrr/cleanup.c
arch/x86/kernel/cpu/mtrr/generic.c
arch/x86/kernel/cpu/mtrr/main.c
arch/x86/kernel/cpu/perf_event.c [deleted file]
arch/x86/kernel/cpu/perf_event.h [deleted file]
arch/x86/kernel/cpu/perf_event_amd.c [deleted file]
arch/x86/kernel/cpu/perf_event_amd_ibs.c [deleted file]
arch/x86/kernel/cpu/perf_event_amd_iommu.c [deleted file]
arch/x86/kernel/cpu/perf_event_amd_iommu.h [deleted file]
arch/x86/kernel/cpu/perf_event_amd_uncore.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_bts.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_cqm.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_cstate.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_ds.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_lbr.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_pt.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_rapl.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_uncore.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_uncore.h [deleted file]
arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c [deleted file]
arch/x86/kernel/cpu/perf_event_knc.c [deleted file]
arch/x86/kernel/cpu/perf_event_msr.c [deleted file]
arch/x86/kernel/cpu/perf_event_p4.c [deleted file]
arch/x86/kernel/cpu/perf_event_p6.c [deleted file]
arch/x86/kernel/cpu/rdrand.c
arch/x86/kernel/cpu/topology.c
arch/x86/kernel/cpu/transmeta.c
arch/x86/kernel/cpu/vmware.c
arch/x86/kernel/mpparse.c
arch/x86/kernel/nmi.c
arch/x86/kernel/smpboot.c
arch/x86/lguest/boot.c
arch/x86/xen/enlighten.c
arch/x86/xen/pmu.c
include/linux/perf_event.h
kernel/events/core.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_syscalls.c
lib/cpumask.c
tools/build/Makefile.build
tools/build/Makefile.feature
tools/build/feature/Makefile
tools/build/feature/test-all.c
tools/build/feature/test-compile.c
tools/build/feature/test-libcrypto.c [new file with mode: 0644]
tools/lib/api/Build
tools/lib/api/Makefile
tools/lib/api/debug-internal.h [new file with mode: 0644]
tools/lib/api/debug.c [new file with mode: 0644]
tools/lib/api/debug.h [new file with mode: 0644]
tools/lib/api/fs/fs.c
tools/lib/api/fs/fs.h
tools/lib/bpf/libbpf.c
tools/lib/traceevent/event-parse.c
tools/lib/traceevent/event-parse.h
tools/perf/Documentation/perf-config.txt
tools/perf/Documentation/perf-inject.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Documentation/perfconfig.example
tools/perf/Documentation/tips.txt
tools/perf/Makefile
tools/perf/Makefile.perf
tools/perf/arch/arm/Makefile
tools/perf/arch/arm64/Makefile
tools/perf/arch/powerpc/Makefile
tools/perf/arch/powerpc/util/Build
tools/perf/arch/powerpc/util/book3s_hcalls.h [new file with mode: 0644]
tools/perf/arch/powerpc/util/book3s_hv_exits.h [new file with mode: 0644]
tools/perf/arch/powerpc/util/kvm-stat.c [new file with mode: 0644]
tools/perf/arch/s390/util/kvm-stat.c
tools/perf/arch/x86/Makefile
tools/perf/arch/x86/tests/rdpmc.c
tools/perf/arch/x86/util/intel-bts.c
tools/perf/arch/x86/util/intel-pt.c
tools/perf/arch/x86/util/kvm-stat.c
tools/perf/bench/mem-memcpy-x86-64-asm.S
tools/perf/builtin-annotate.c
tools/perf/builtin-buildid-cache.c
tools/perf/builtin-config.c
tools/perf/builtin-diff.c
tools/perf/builtin-help.c
tools/perf/builtin-inject.c
tools/perf/builtin-kmem.c
tools/perf/builtin-kvm.c
tools/perf/builtin-mem.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/config/Makefile
tools/perf/jvmti/Makefile [new file with mode: 0644]
tools/perf/jvmti/jvmti_agent.c [new file with mode: 0644]
tools/perf/jvmti/jvmti_agent.h [new file with mode: 0644]
tools/perf/jvmti/libjvmti.c [new file with mode: 0644]
tools/perf/perf.c
tools/perf/perf.h
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
tools/perf/tests/.gitignore
tools/perf/tests/Build
tools/perf/tests/bp_signal.c
tools/perf/tests/bpf-script-test-relocation.c [new file with mode: 0644]
tools/perf/tests/bpf.c
tools/perf/tests/code-reading.c
tools/perf/tests/hists_cumulate.c
tools/perf/tests/hists_filter.c
tools/perf/tests/hists_output.c
tools/perf/tests/llvm.c
tools/perf/tests/llvm.h
tools/perf/tests/make
tools/perf/tests/parse-events.c
tools/perf/tests/vmlinux-kallsyms.c
tools/perf/ui/browser.c
tools/perf/ui/browser.h
tools/perf/ui/browsers/annotate.c
tools/perf/ui/browsers/hists.c
tools/perf/ui/gtk/hists.c
tools/perf/ui/hist.c
tools/perf/ui/stdio/hist.c
tools/perf/util/Build
tools/perf/util/auxtrace.c
tools/perf/util/auxtrace.h
tools/perf/util/bpf-loader.c
tools/perf/util/bpf-loader.h
tools/perf/util/build-id.c
tools/perf/util/build-id.h
tools/perf/util/cache.h
tools/perf/util/callchain.c
tools/perf/util/color.c
tools/perf/util/config.c
tools/perf/util/cpumap.c
tools/perf/util/cpumap.h
tools/perf/util/ctype.c
tools/perf/util/data-convert-bt.c
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/demangle-java.c [new file with mode: 0644]
tools/perf/util/demangle-java.h [new file with mode: 0644]
tools/perf/util/dso.c
tools/perf/util/env.c
tools/perf/util/env.h
tools/perf/util/event.c
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/genelf.c [new file with mode: 0644]
tools/perf/util/genelf.h [new file with mode: 0644]
tools/perf/util/genelf_debug.c [new file with mode: 0644]
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/help-unknown-cmd.c
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/jit.h [new file with mode: 0644]
tools/perf/util/jitdump.c [new file with mode: 0644]
tools/perf/util/jitdump.h [new file with mode: 0644]
tools/perf/util/kvm-stat.h
tools/perf/util/machine.h
tools/perf/util/mem-events.c [new file with mode: 0644]
tools/perf/util/mem-events.h [new file with mode: 0644]
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/parse-events.l
tools/perf/util/parse-events.y
tools/perf/util/pmu.c
tools/perf/util/scripting-engines/trace-event-perl.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/setup.py
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/stat-shadow.c
tools/perf/util/stat.c
tools/perf/util/stat.h
tools/perf/util/strbuf.c
tools/perf/util/strbuf.h
tools/perf/util/symbol-elf.c
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/trace-event.c
tools/perf/util/tsc.c
tools/perf/util/util.c
tools/perf/util/util.h
tools/power/x86/turbostat/turbostat.c

index a93b414672a71ac6fa9bac1e848215804bde139c..f886fbb1ad05d78ecda7c8a3289528ce9613665a 100644 (file)
@@ -58,6 +58,8 @@ show up in /proc/sys/kernel:
 - panic_on_stackoverflow
 - panic_on_unrecovered_nmi
 - panic_on_warn
+- perf_cpu_time_max_percent
+- perf_event_paranoid
 - pid_max
 - powersave-nap               [ PPC only ]
 - printk
@@ -639,6 +641,17 @@ allowed to execute.
 
 ==============================================================
 
+perf_event_paranoid:
+
+Controls use of the performance events system by unprivileged
+users (without CAP_SYS_ADMIN).  The default value is 1.
+
+ -1: Allow use of (almost) all events by all users
+>=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
+>=1: Disallow CPU event access by users without CAP_SYS_ADMIN
+>=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
+
+==============================================================
 
 pid_max:
 
index 6ee06ea47be4d1cb2f865587f5e175e3f0235c32..2061ea77667c36260a72f92ba12c3ecdb625d3c6 100644 (file)
@@ -8475,6 +8475,7 @@ PERFORMANCE EVENTS SUBSYSTEM
 M:     Peter Zijlstra <peterz@infradead.org>
 M:     Ingo Molnar <mingo@redhat.com>
 M:     Arnaldo Carvalho de Melo <acme@kernel.org>
+R:     Alexander Shishkin <alexander.shishkin@linux.intel.com>
 L:     linux-kernel@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
 S:     Supported
index 1538562cc720e78132d0eb31c38116b029d56ce0..eb3abf8ac44eb33f333ed29727dfd49ba6bfbf38 100644 (file)
@@ -1,6 +1,7 @@
-
 obj-y += entry/
 
+obj-$(CONFIG_PERF_EVENTS) += events/
+
 obj-$(CONFIG_KVM) += kvm/
 
 # Xen paravirtualization support
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
new file mode 100644 (file)
index 0000000..fdfea15
--- /dev/null
@@ -0,0 +1,13 @@
+obj-y                  += core.o
+
+obj-$(CONFIG_CPU_SUP_AMD)               += amd/core.o amd/uncore.o
+obj-$(CONFIG_X86_LOCAL_APIC)            += amd/ibs.o msr.o
+ifdef CONFIG_AMD_IOMMU
+obj-$(CONFIG_CPU_SUP_AMD)               += amd/iommu.o
+endif
+obj-$(CONFIG_CPU_SUP_INTEL)            += intel/core.o intel/bts.o intel/cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL)            += intel/cstate.o intel/ds.o intel/knc.o 
+obj-$(CONFIG_CPU_SUP_INTEL)            += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
+obj-$(CONFIG_CPU_SUP_INTEL)            += intel/rapl.o msr.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
new file mode 100644 (file)
index 0000000..049ada8
--- /dev/null
@@ -0,0 +1,731 @@
+#include <linux/perf_event.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <asm/apicdef.h>
+
+#include "../perf_event.h"
+
+static __initconst const u64 amd_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+               [ C(RESULT_MISS)   ] = 0x0141, /* Data Cache Misses          */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
+               [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
+               [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
+               [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+               [ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
+               [ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
+               [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
+               [ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const u64 amd_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]                   = 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]                 = 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]             = 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]                 = 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]          = 0x00c2,
+  [PERF_COUNT_HW_BRANCH_MISSES]                        = 0x00c3,
+  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]      = 0x00d0, /* "Decoder empty" event */
+  [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]       = 0x00d1, /* "Dispatch stalls" event */
+};
+
+static u64 amd_pmu_event_map(int hw_event)
+{
+       return amd_perfmon_event_map[hw_event];
+}
+
+/*
+ * Previously calculated offsets
+ */
+static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
+static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
+
+/*
+ * Legacy CPUs:
+ *   4 counters starting at 0xc0010000 each offset by 1
+ *
+ * CPUs with core performance counter extensions:
+ *   6 counters starting at 0xc0010200 each offset by 2
+ */
+static inline int amd_pmu_addr_offset(int index, bool eventsel)
+{
+       int offset;
+
+       if (!index)
+               return index;
+
+       if (eventsel)
+               offset = event_offsets[index];
+       else
+               offset = count_offsets[index];
+
+       if (offset)
+               return offset;
+
+       if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+               offset = index;
+       else
+               offset = index << 1;
+
+       if (eventsel)
+               event_offsets[index] = offset;
+       else
+               count_offsets[index] = offset;
+
+       return offset;
+}
+
+static int amd_core_hw_config(struct perf_event *event)
+{
+       if (event->attr.exclude_host && event->attr.exclude_guest)
+               /*
+                * When HO == GO == 1 the hardware treats that as GO == HO == 0
+                * and will count in both modes. We don't want to count in that
+                * case so we emulate no-counting by setting US = OS = 0.
+                */
+               event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
+                                     ARCH_PERFMON_EVENTSEL_OS);
+       else if (event->attr.exclude_host)
+               event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
+       else if (event->attr.exclude_guest)
+               event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
+
+       return 0;
+}
+
+/*
+ * AMD64 events are detected based on their event codes.
+ */
+static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
+{
+       return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
+}
+
+static inline int amd_is_nb_event(struct hw_perf_event *hwc)
+{
+       return (hwc->config & 0xe0) == 0xe0;
+}
+
+static inline int amd_has_nb(struct cpu_hw_events *cpuc)
+{
+       struct amd_nb *nb = cpuc->amd_nb;
+
+       return nb && nb->nb_id != -1;
+}
+
+static int amd_pmu_hw_config(struct perf_event *event)
+{
+       int ret;
+
+       /* pass precise event sampling to ibs: */
+       if (event->attr.precise_ip && get_ibs_caps())
+               return -ENOENT;
+
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
+       ret = x86_pmu_hw_config(event);
+       if (ret)
+               return ret;
+
+       if (event->attr.type == PERF_TYPE_RAW)
+               event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+
+       return amd_core_hw_config(event);
+}
+
+static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
+                                          struct perf_event *event)
+{
+       struct amd_nb *nb = cpuc->amd_nb;
+       int i;
+
+       /*
+        * need to scan whole list because event may not have
+        * been assigned during scheduling
+        *
+        * no race condition possible because event can only
+        * be removed on one CPU at a time AND PMU is disabled
+        * when we come here
+        */
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               if (cmpxchg(nb->owners + i, event, NULL) == event)
+                       break;
+       }
+}
+
+ /*
+  * AMD64 NorthBridge events need special treatment because
+  * counter access needs to be synchronized across all cores
+  * of a package. Refer to BKDG section 3.12
+  *
+  * NB events are events measuring L3 cache, Hypertransport
+  * traffic. They are identified by an event code >= 0xe00.
+  * They measure events on the NorthBride which is shared
+  * by all cores on a package. NB events are counted on a
+  * shared set of counters. When a NB event is programmed
+  * in a counter, the data actually comes from a shared
+  * counter. Thus, access to those counters needs to be
+  * synchronized.
+  *
+  * We implement the synchronization such that no two cores
+  * can be measuring NB events using the same counters. Thus,
+  * we maintain a per-NB allocation table. The available slot
+  * is propagated using the event_constraint structure.
+  *
+  * We provide only one choice for each NB event based on
+  * the fact that only NB events have restrictions. Consequently,
+  * if a counter is available, there is a guarantee the NB event
+  * will be assigned to it. If no slot is available, an empty
+  * constraint is returned and scheduling will eventually fail
+  * for this event.
+  *
+  * Note that all cores attached the same NB compete for the same
+  * counters to host NB events, this is why we use atomic ops. Some
+  * multi-chip CPUs may have more than one NB.
+  *
+  * Given that resources are allocated (cmpxchg), they must be
+  * eventually freed for others to use. This is accomplished by
+  * calling __amd_put_nb_event_constraints()
+  *
+  * Non NB events are not impacted by this restriction.
+  */
+static struct event_constraint *
+__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+                              struct event_constraint *c)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct amd_nb *nb = cpuc->amd_nb;
+       struct perf_event *old;
+       int idx, new = -1;
+
+       if (!c)
+               c = &unconstrained;
+
+       if (cpuc->is_fake)
+               return c;
+
+       /*
+        * detect if already present, if so reuse
+        *
+        * cannot merge with actual allocation
+        * because of possible holes
+        *
+        * event can already be present yet not assigned (in hwc->idx)
+        * because of successive calls to x86_schedule_events() from
+        * hw_perf_group_sched_in() without hw_perf_enable()
+        */
+       for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
+               if (new == -1 || hwc->idx == idx)
+                       /* assign free slot, prefer hwc->idx */
+                       old = cmpxchg(nb->owners + idx, NULL, event);
+               else if (nb->owners[idx] == event)
+                       /* event already present */
+                       old = event;
+               else
+                       continue;
+
+               if (old && old != event)
+                       continue;
+
+               /* reassign to this slot */
+               if (new != -1)
+                       cmpxchg(nb->owners + new, event, NULL);
+               new = idx;
+
+               /* already present, reuse */
+               if (old == event)
+                       break;
+       }
+
+       if (new == -1)
+               return &emptyconstraint;
+
+       return &nb->event_constraints[new];
+}
+
+static struct amd_nb *amd_alloc_nb(int cpu)
+{
+       struct amd_nb *nb;
+       int i;
+
+       nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
+       if (!nb)
+               return NULL;
+
+       nb->nb_id = -1;
+
+       /*
+        * initialize all possible NB constraints
+        */
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               __set_bit(i, nb->event_constraints[i].idxmsk);
+               nb->event_constraints[i].weight = 1;
+       }
+       return nb;
+}
+
+static int amd_pmu_cpu_prepare(int cpu)
+{
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+       WARN_ON_ONCE(cpuc->amd_nb);
+
+       if (boot_cpu_data.x86_max_cores < 2)
+               return NOTIFY_OK;
+
+       cpuc->amd_nb = amd_alloc_nb(cpu);
+       if (!cpuc->amd_nb)
+               return NOTIFY_BAD;
+
+       return NOTIFY_OK;
+}
+
+static void amd_pmu_cpu_starting(int cpu)
+{
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+       void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
+       struct amd_nb *nb;
+       int i, nb_id;
+
+       cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
+
+       if (boot_cpu_data.x86_max_cores < 2)
+               return;
+
+       nb_id = amd_get_nb_id(cpu);
+       WARN_ON_ONCE(nb_id == BAD_APICID);
+
+       for_each_online_cpu(i) {
+               nb = per_cpu(cpu_hw_events, i).amd_nb;
+               if (WARN_ON_ONCE(!nb))
+                       continue;
+
+               if (nb->nb_id == nb_id) {
+                       *onln = cpuc->amd_nb;
+                       cpuc->amd_nb = nb;
+                       break;
+               }
+       }
+
+       cpuc->amd_nb->nb_id = nb_id;
+       cpuc->amd_nb->refcnt++;
+}
+
+static void amd_pmu_cpu_dead(int cpu)
+{
+       struct cpu_hw_events *cpuhw;
+
+       if (boot_cpu_data.x86_max_cores < 2)
+               return;
+
+       cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+       if (cpuhw->amd_nb) {
+               struct amd_nb *nb = cpuhw->amd_nb;
+
+               if (nb->nb_id == -1 || --nb->refcnt == 0)
+                       kfree(nb);
+
+               cpuhw->amd_nb = NULL;
+       }
+}
+
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                         struct perf_event *event)
+{
+       /*
+        * if not NB event or no NB, then no constraints
+        */
+       if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
+               return &unconstrained;
+
+       return __amd_get_nb_event_constraints(cpuc, event, NULL);
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+                                     struct perf_event *event)
+{
+       if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
+               __amd_put_nb_event_constraints(cpuc, event);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7,32-35");
+PMU_FORMAT_ATTR(umask, "config:8-15"   );
+PMU_FORMAT_ATTR(edge,  "config:18"     );
+PMU_FORMAT_ATTR(inv,   "config:23"     );
+PMU_FORMAT_ATTR(cmask, "config:24-31"  );
+
+static struct attribute *amd_format_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask.attr,
+       NULL,
+};
+
+/* AMD Family 15h */
+
+#define AMD_EVENT_TYPE_MASK    0x000000F0ULL
+
+#define AMD_EVENT_FP           0x00000000ULL ... 0x00000010ULL
+#define AMD_EVENT_LS           0x00000020ULL ... 0x00000030ULL
+#define AMD_EVENT_DC           0x00000040ULL ... 0x00000050ULL
+#define AMD_EVENT_CU           0x00000060ULL ... 0x00000070ULL
+#define AMD_EVENT_IC_DE                0x00000080ULL ... 0x00000090ULL
+#define AMD_EVENT_EX_LS                0x000000C0ULL
+#define AMD_EVENT_DE           0x000000D0ULL
+#define AMD_EVENT_NB           0x000000E0ULL ... 0x000000F0ULL
+
+/*
+ * AMD family 15h event code/PMC mappings:
+ *
+ * type = event_code & 0x0F0:
+ *
+ * 0x000       FP      PERF_CTL[5:3]
+ * 0x010       FP      PERF_CTL[5:3]
+ * 0x020       LS      PERF_CTL[5:0]
+ * 0x030       LS      PERF_CTL[5:0]
+ * 0x040       DC      PERF_CTL[5:0]
+ * 0x050       DC      PERF_CTL[5:0]
+ * 0x060       CU      PERF_CTL[2:0]
+ * 0x070       CU      PERF_CTL[2:0]
+ * 0x080       IC/DE   PERF_CTL[2:0]
+ * 0x090       IC/DE   PERF_CTL[2:0]
+ * 0x0A0       ---
+ * 0x0B0       ---
+ * 0x0C0       EX/LS   PERF_CTL[5:0]
+ * 0x0D0       DE      PERF_CTL[2:0]
+ * 0x0E0       NB      NB_PERF_CTL[3:0]
+ * 0x0F0       NB      NB_PERF_CTL[3:0]
+ *
+ * Exceptions:
+ *
+ * 0x000       FP      PERF_CTL[3], PERF_CTL[5:3] (*)
+ * 0x003       FP      PERF_CTL[3]
+ * 0x004       FP      PERF_CTL[3], PERF_CTL[5:3] (*)
+ * 0x00B       FP      PERF_CTL[3]
+ * 0x00D       FP      PERF_CTL[3]
+ * 0x023       DE      PERF_CTL[2:0]
+ * 0x02D       LS      PERF_CTL[3]
+ * 0x02E       LS      PERF_CTL[3,0]
+ * 0x031       LS      PERF_CTL[2:0] (**)
+ * 0x043       CU      PERF_CTL[2:0]
+ * 0x045       CU      PERF_CTL[2:0]
+ * 0x046       CU      PERF_CTL[2:0]
+ * 0x054       CU      PERF_CTL[2:0]
+ * 0x055       CU      PERF_CTL[2:0]
+ * 0x08F       IC      PERF_CTL[0]
+ * 0x187       DE      PERF_CTL[0]
+ * 0x188       DE      PERF_CTL[0]
+ * 0x0DB       EX      PERF_CTL[5:0]
+ * 0x0DC       LS      PERF_CTL[5:0]
+ * 0x0DD       LS      PERF_CTL[5:0]
+ * 0x0DE       LS      PERF_CTL[5:0]
+ * 0x0DF       LS      PERF_CTL[5:0]
+ * 0x1C0       EX      PERF_CTL[5:3]
+ * 0x1D6       EX      PERF_CTL[5:0]
+ * 0x1D8       EX      PERF_CTL[5:0]
+ *
+ * (*)  depending on the umask all FPU counters may be used
+ * (**) only one unitmask enabled at a time
+ */
+
+static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
+static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
+static struct event_constraint amd_f15_PMC3  = EVENT_CONSTRAINT(0, 0x08, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
+static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+
+static struct event_constraint *
+amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
+                              struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       unsigned int event_code = amd_get_event_code(hwc);
+
+       switch (event_code & AMD_EVENT_TYPE_MASK) {
+       case AMD_EVENT_FP:
+               switch (event_code) {
+               case 0x000:
+                       if (!(hwc->config & 0x0000F000ULL))
+                               break;
+                       if (!(hwc->config & 0x00000F00ULL))
+                               break;
+                       return &amd_f15_PMC3;
+               case 0x004:
+                       if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
+                               break;
+                       return &amd_f15_PMC3;
+               case 0x003:
+               case 0x00B:
+               case 0x00D:
+                       return &amd_f15_PMC3;
+               }
+               return &amd_f15_PMC53;
+       case AMD_EVENT_LS:
+       case AMD_EVENT_DC:
+       case AMD_EVENT_EX_LS:
+               switch (event_code) {
+               case 0x023:
+               case 0x043:
+               case 0x045:
+               case 0x046:
+               case 0x054:
+               case 0x055:
+                       return &amd_f15_PMC20;
+               case 0x02D:
+                       return &amd_f15_PMC3;
+               case 0x02E:
+                       return &amd_f15_PMC30;
+               case 0x031:
+                       if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
+                               return &amd_f15_PMC20;
+                       return &emptyconstraint;
+               case 0x1C0:
+                       return &amd_f15_PMC53;
+               default:
+                       return &amd_f15_PMC50;
+               }
+       case AMD_EVENT_CU:
+       case AMD_EVENT_IC_DE:
+       case AMD_EVENT_DE:
+               switch (event_code) {
+               case 0x08F:
+               case 0x187:
+               case 0x188:
+                       return &amd_f15_PMC0;
+               case 0x0DB ... 0x0DF:
+               case 0x1D6:
+               case 0x1D8:
+                       return &amd_f15_PMC50;
+               default:
+                       return &amd_f15_PMC20;
+               }
+       case AMD_EVENT_NB:
+               /* moved to perf_event_amd_uncore.c */
+               return &emptyconstraint;
+       default:
+               return &emptyconstraint;
+       }
+}
+
+static ssize_t amd_event_sysfs_show(char *page, u64 config)
+{
+       u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
+                   (config & AMD64_EVENTSEL_EVENT) >> 24;
+
+       return x86_event_sysfs_show(page, config, event);
+}
+
+static __initconst const struct x86_pmu amd_pmu = {
+       .name                   = "AMD",
+       .handle_irq             = x86_pmu_handle_irq,
+       .disable_all            = x86_pmu_disable_all,
+       .enable_all             = x86_pmu_enable_all,
+       .enable                 = x86_pmu_enable_event,
+       .disable                = x86_pmu_disable_event,
+       .hw_config              = amd_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_K7_EVNTSEL0,
+       .perfctr                = MSR_K7_PERFCTR0,
+       .addr_offset            = amd_pmu_addr_offset,
+       .event_map              = amd_pmu_event_map,
+       .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
+       .num_counters           = AMD64_NUM_COUNTERS,
+       .cntval_bits            = 48,
+       .cntval_mask            = (1ULL << 48) - 1,
+       .apic                   = 1,
+       /* use highest bit to detect overflow */
+       .max_period             = (1ULL << 47) - 1,
+       .get_event_constraints  = amd_get_event_constraints,
+       .put_event_constraints  = amd_put_event_constraints,
+
+       .format_attrs           = amd_format_attr,
+       .events_sysfs_show      = amd_event_sysfs_show,
+
+       .cpu_prepare            = amd_pmu_cpu_prepare,
+       .cpu_starting           = amd_pmu_cpu_starting,
+       .cpu_dead               = amd_pmu_cpu_dead,
+};
+
+static int __init amd_core_pmu_init(void)
+{
+       if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+               return 0;
+
+       switch (boot_cpu_data.x86) {
+       case 0x15:
+               pr_cont("Fam15h ");
+               x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
+               break;
+
+       default:
+               pr_err("core perfctr but no constraints; unknown hardware!\n");
+               return -ENODEV;
+       }
+
+       /*
+        * If core performance counter extensions exists, we must use
+        * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
+        * amd_pmu_addr_offset().
+        */
+       x86_pmu.eventsel        = MSR_F15H_PERF_CTL;
+       x86_pmu.perfctr         = MSR_F15H_PERF_CTR;
+       x86_pmu.num_counters    = AMD64_NUM_COUNTERS_CORE;
+
+       pr_cont("core perfctr, ");
+       return 0;
+}
+
+__init int amd_pmu_init(void)
+{
+       int ret;
+
+       /* Performance-monitoring supported from K7 and later: */
+       if (boot_cpu_data.x86 < 6)
+               return -ENODEV;
+
+       x86_pmu = amd_pmu;
+
+       ret = amd_core_pmu_init();
+       if (ret)
+               return ret;
+
+       /* Events are common for all AMDs */
+       memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
+              sizeof(hw_cache_event_ids));
+
+       return 0;
+}
+
+void amd_pmu_enable_virt(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       cpuc->perf_ctr_virt_mask = 0;
+
+       /* Reload all events */
+       x86_pmu_disable_all();
+       x86_pmu_enable_all(0);
+}
+EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
+
+void amd_pmu_disable_virt(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       /*
+        * We only mask out the Host-only bit so that host-only counting works
+        * when SVM is disabled. If someone sets up a guest-only counter when
+        * SVM is disabled the Guest-only bits still gets set and the counter
+        * will not count anything.
+        */
+       cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
+
+       /* Reload all events */
+       x86_pmu_disable_all();
+       x86_pmu_enable_all(0);
+}
+EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
new file mode 100644 (file)
index 0000000..51087c2
--- /dev/null
@@ -0,0 +1,959 @@
+/*
+ * Performance events - AMD IBS
+ *
+ *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/ptrace.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/apic.h>
+
+#include "../perf_event.h"
+
+static u32 ibs_caps;
+
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
+
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+
+#include <asm/nmi.h>
+
+#define IBS_FETCH_CONFIG_MASK  (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
+#define IBS_OP_CONFIG_MASK     IBS_OP_MAX_CNT
+
+enum ibs_states {
+       IBS_ENABLED     = 0,
+       IBS_STARTED     = 1,
+       IBS_STOPPING    = 2,
+
+       IBS_MAX_STATES,
+};
+
+struct cpu_perf_ibs {
+       struct perf_event       *event;
+       unsigned long           state[BITS_TO_LONGS(IBS_MAX_STATES)];
+};
+
+struct perf_ibs {
+       struct pmu                      pmu;
+       unsigned int                    msr;
+       u64                             config_mask;
+       u64                             cnt_mask;
+       u64                             enable_mask;
+       u64                             valid_mask;
+       u64                             max_period;
+       unsigned long                   offset_mask[1];
+       int                             offset_max;
+       struct cpu_perf_ibs __percpu    *pcpu;
+
+       struct attribute                **format_attrs;
+       struct attribute_group          format_group;
+       const struct attribute_group    *attr_groups[2];
+
+       u64                             (*get_count)(u64 config);
+};
+
+struct perf_ibs_data {
+       u32             size;
+       union {
+               u32     data[0];        /* data buffer starts here */
+               u32     caps;
+       };
+       u64             regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
+
+static int
+perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
+{
+       s64 left = local64_read(&hwc->period_left);
+       s64 period = hwc->sample_period;
+       int overflow = 0;
+
+       /*
+        * If we are way outside a reasonable range then just skip forward:
+        */
+       if (unlikely(left <= -period)) {
+               left = period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               overflow = 1;
+       }
+
+       if (unlikely(left < (s64)min)) {
+               left += period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               overflow = 1;
+       }
+
+       /*
+        * If the hw period that triggers the sw overflow is too short
+        * we might hit the irq handler. This biases the results.
+        * Thus we shorten the next-to-last period and set the last
+        * period to the max period.
+        */
+       if (left > max) {
+               left -= max;
+               if (left > max)
+                       left = max;
+               else if (left < min)
+                       left = min;
+       }
+
+       *hw_period = (u64)left;
+
+       return overflow;
+}
+
+static  int
+perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       int shift = 64 - width;
+       u64 prev_raw_count;
+       u64 delta;
+
+       /*
+        * Careful: an NMI might modify the previous event value.
+        *
+        * Our tactic to handle this is to first atomically read and
+        * exchange a new raw count - then add that new-prev delta
+        * count to the generic event atomically:
+        */
+       prev_raw_count = local64_read(&hwc->prev_count);
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                                       new_raw_count) != prev_raw_count)
+               return 0;
+
+       /*
+        * Now we have the new raw value and have updated the prev
+        * timestamp already. We can now calculate the elapsed delta
+        * (event-)time and add that to the generic event.
+        *
+        * Careful, not all hw sign-extends above the physical width
+        * of the count.
+        */
+       delta = (new_raw_count << shift) - (prev_raw_count << shift);
+       delta >>= shift;
+
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
+
+       return 1;
+}
+
+static struct perf_ibs perf_ibs_fetch;
+static struct perf_ibs perf_ibs_op;
+
+static struct perf_ibs *get_ibs_pmu(int type)
+{
+       if (perf_ibs_fetch.pmu.type == type)
+               return &perf_ibs_fetch;
+       if (perf_ibs_op.pmu.type == type)
+               return &perf_ibs_op;
+       return NULL;
+}
+
+/*
+ * Use IBS for precise event sampling:
+ *
+ *  perf record -a -e cpu-cycles:p ...    # use ibs op counting cycle count
+ *  perf record -a -e r076:p ...          # same as -e cpu-cycles:p
+ *  perf record -a -e r0C1:p ...          # use ibs op counting micro-ops
+ *
+ * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
+ * MSRC001_1033) is used to select either cycle or micro-ops counting
+ * mode.
+ *
+ * The rip of IBS samples has skid 0. Thus, IBS supports precise
+ * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
+ * rip is invalid when IBS was not able to record the rip correctly.
+ * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
+ *
+ */
+static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
+{
+       switch (event->attr.precise_ip) {
+       case 0:
+               return -ENOENT;
+       case 1:
+       case 2:
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       switch (event->attr.type) {
+       case PERF_TYPE_HARDWARE:
+               switch (event->attr.config) {
+               case PERF_COUNT_HW_CPU_CYCLES:
+                       *config = 0;
+                       return 0;
+               }
+               break;
+       case PERF_TYPE_RAW:
+               switch (event->attr.config) {
+               case 0x0076:
+                       *config = 0;
+                       return 0;
+               case 0x00C1:
+                       *config = IBS_OP_CNT_CTL;
+                       return 0;
+               }
+               break;
+       default:
+               return -ENOENT;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static const struct perf_event_attr ibs_notsupp = {
+       .exclude_user   = 1,
+       .exclude_kernel = 1,
+       .exclude_hv     = 1,
+       .exclude_idle   = 1,
+       .exclude_host   = 1,
+       .exclude_guest  = 1,
+};
+
+static int perf_ibs_init(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct perf_ibs *perf_ibs;
+       u64 max_cnt, config;
+       int ret;
+
+       perf_ibs = get_ibs_pmu(event->attr.type);
+       if (perf_ibs) {
+               config = event->attr.config;
+       } else {
+               perf_ibs = &perf_ibs_op;
+               ret = perf_ibs_precise_event(event, &config);
+               if (ret)
+                       return ret;
+       }
+
+       if (event->pmu != &perf_ibs->pmu)
+               return -ENOENT;
+
+       if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
+               return -EINVAL;
+
+       if (config & ~perf_ibs->config_mask)
+               return -EINVAL;
+
+       if (hwc->sample_period) {
+               if (config & perf_ibs->cnt_mask)
+                       /* raw max_cnt may not be set */
+                       return -EINVAL;
+               if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
+                       /*
+                        * lower 4 bits can not be set in ibs max cnt,
+                        * but allowing it in case we adjust the
+                        * sample period to set a frequency.
+                        */
+                       return -EINVAL;
+               hwc->sample_period &= ~0x0FULL;
+               if (!hwc->sample_period)
+                       hwc->sample_period = 0x10;
+       } else {
+               max_cnt = config & perf_ibs->cnt_mask;
+               config &= ~perf_ibs->cnt_mask;
+               event->attr.sample_period = max_cnt << 4;
+               hwc->sample_period = event->attr.sample_period;
+       }
+
+       if (!hwc->sample_period)
+               return -EINVAL;
+
+       /*
+        * If we modify hwc->sample_period, we also need to update
+        * hwc->last_period and hwc->period_left.
+        */
+       hwc->last_period = hwc->sample_period;
+       local64_set(&hwc->period_left, hwc->sample_period);
+
+       hwc->config_base = perf_ibs->msr;
+       hwc->config = config;
+
+       return 0;
+}
+
+static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
+                              struct hw_perf_event *hwc, u64 *period)
+{
+       int overflow;
+
+       /* ignore lower 4 bits in min count: */
+       overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
+       local64_set(&hwc->prev_count, 0);
+
+       return overflow;
+}
+
+static u64 get_ibs_fetch_count(u64 config)
+{
+       return (config & IBS_FETCH_CNT) >> 12;
+}
+
+static u64 get_ibs_op_count(u64 config)
+{
+       u64 count = 0;
+
+       if (config & IBS_OP_VAL)
+               count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
+
+       if (ibs_caps & IBS_CAPS_RDWROPCNT)
+               count += (config & IBS_OP_CUR_CNT) >> 32;
+
+       return count;
+}
+
+static void
+perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
+                     u64 *config)
+{
+       u64 count = perf_ibs->get_count(*config);
+
+       /*
+        * Set width to 64 since we do not overflow on max width but
+        * instead on max count. In perf_ibs_set_period() we clear
+        * prev count manually on overflow.
+        */
+       while (!perf_event_try_update(event, count, 64)) {
+               rdmsrl(event->hw.config_base, *config);
+               count = perf_ibs->get_count(*config);
+       }
+}
+
+static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
+                                        struct hw_perf_event *hwc, u64 config)
+{
+       wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
+}
+
+/*
+ * Erratum #420 Instruction-Based Sampling Engine May Generate
+ * Interrupt that Cannot Be Cleared:
+ *
+ * Must clear counter mask first, then clear the enable bit. See
+ * Revision Guide for AMD Family 10h Processors, Publication #41322.
+ */
+static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
+                                         struct hw_perf_event *hwc, u64 config)
+{
+       config &= ~perf_ibs->cnt_mask;
+       wrmsrl(hwc->config_base, config);
+       config &= ~perf_ibs->enable_mask;
+       wrmsrl(hwc->config_base, config);
+}
+
+/*
+ * We cannot restore the ibs pmu state, so we always needs to update
+ * the event while stopping it and then reset the state when starting
+ * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
+ * perf_ibs_start()/perf_ibs_stop() and instead always do it.
+ */
+static void perf_ibs_start(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+       u64 period;
+
+       if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+               return;
+
+       WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+       hwc->state = 0;
+
+       perf_ibs_set_period(perf_ibs, hwc, &period);
+       set_bit(IBS_STARTED, pcpu->state);
+       perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+       perf_event_update_userpage(event);
+}
+
+static void perf_ibs_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+       u64 config;
+       int stopping;
+
+       stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
+
+       if (!stopping && (hwc->state & PERF_HES_UPTODATE))
+               return;
+
+       rdmsrl(hwc->config_base, config);
+
+       if (stopping) {
+               set_bit(IBS_STOPPING, pcpu->state);
+               perf_ibs_disable_event(perf_ibs, hwc, config);
+               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+               hwc->state |= PERF_HES_STOPPED;
+       }
+
+       if (hwc->state & PERF_HES_UPTODATE)
+               return;
+
+       /*
+        * Clear valid bit to not count rollovers on update, rollovers
+        * are only updated in the irq handler.
+        */
+       config &= ~perf_ibs->valid_mask;
+
+       perf_ibs_event_update(perf_ibs, event, &config);
+       hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int perf_ibs_add(struct perf_event *event, int flags)
+{
+       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+       if (test_and_set_bit(IBS_ENABLED, pcpu->state))
+               return -ENOSPC;
+
+       event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+       pcpu->event = event;
+
+       if (flags & PERF_EF_START)
+               perf_ibs_start(event, PERF_EF_RELOAD);
+
+       return 0;
+}
+
+static void perf_ibs_del(struct perf_event *event, int flags)
+{
+       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+       if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
+               return;
+
+       perf_ibs_stop(event, PERF_EF_UPDATE);
+
+       pcpu->event = NULL;
+
+       perf_event_update_userpage(event);
+}
+
+static void perf_ibs_read(struct perf_event *event) { }
+
+PMU_FORMAT_ATTR(rand_en,       "config:57");
+PMU_FORMAT_ATTR(cnt_ctl,       "config:19");
+
+static struct attribute *ibs_fetch_format_attrs[] = {
+       &format_attr_rand_en.attr,
+       NULL,
+};
+
+static struct attribute *ibs_op_format_attrs[] = {
+       NULL,   /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
+       NULL,
+};
+
+static struct perf_ibs perf_ibs_fetch = {
+       .pmu = {
+               .task_ctx_nr    = perf_invalid_context,
+
+               .event_init     = perf_ibs_init,
+               .add            = perf_ibs_add,
+               .del            = perf_ibs_del,
+               .start          = perf_ibs_start,
+               .stop           = perf_ibs_stop,
+               .read           = perf_ibs_read,
+       },
+       .msr                    = MSR_AMD64_IBSFETCHCTL,
+       .config_mask            = IBS_FETCH_CONFIG_MASK,
+       .cnt_mask               = IBS_FETCH_MAX_CNT,
+       .enable_mask            = IBS_FETCH_ENABLE,
+       .valid_mask             = IBS_FETCH_VAL,
+       .max_period             = IBS_FETCH_MAX_CNT << 4,
+       .offset_mask            = { MSR_AMD64_IBSFETCH_REG_MASK },
+       .offset_max             = MSR_AMD64_IBSFETCH_REG_COUNT,
+       .format_attrs           = ibs_fetch_format_attrs,
+
+       .get_count              = get_ibs_fetch_count,
+};
+
+static struct perf_ibs perf_ibs_op = {
+       .pmu = {
+               .task_ctx_nr    = perf_invalid_context,
+
+               .event_init     = perf_ibs_init,
+               .add            = perf_ibs_add,
+               .del            = perf_ibs_del,
+               .start          = perf_ibs_start,
+               .stop           = perf_ibs_stop,
+               .read           = perf_ibs_read,
+       },
+       .msr                    = MSR_AMD64_IBSOPCTL,
+       .config_mask            = IBS_OP_CONFIG_MASK,
+       .cnt_mask               = IBS_OP_MAX_CNT,
+       .enable_mask            = IBS_OP_ENABLE,
+       .valid_mask             = IBS_OP_VAL,
+       .max_period             = IBS_OP_MAX_CNT << 4,
+       .offset_mask            = { MSR_AMD64_IBSOP_REG_MASK },
+       .offset_max             = MSR_AMD64_IBSOP_REG_COUNT,
+       .format_attrs           = ibs_op_format_attrs,
+
+       .get_count              = get_ibs_op_count,
+};
+
+static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
+{
+       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+       struct perf_event *event = pcpu->event;
+       struct hw_perf_event *hwc = &event->hw;
+       struct perf_sample_data data;
+       struct perf_raw_record raw;
+       struct pt_regs regs;
+       struct perf_ibs_data ibs_data;
+       int offset, size, check_rip, offset_max, throttle = 0;
+       unsigned int msr;
+       u64 *buf, *config, period;
+
+       if (!test_bit(IBS_STARTED, pcpu->state)) {
+               /*
+                * Catch spurious interrupts after stopping IBS: After
+                * disabling IBS there could be still incoming NMIs
+                * with samples that even have the valid bit cleared.
+                * Mark all this NMIs as handled.
+                */
+               return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
+       }
+
+       msr = hwc->config_base;
+       buf = ibs_data.regs;
+       rdmsrl(msr, *buf);
+       if (!(*buf++ & perf_ibs->valid_mask))
+               return 0;
+
+       config = &ibs_data.regs[0];
+       perf_ibs_event_update(perf_ibs, event, config);
+       perf_sample_data_init(&data, 0, hwc->last_period);
+       if (!perf_ibs_set_period(perf_ibs, hwc, &period))
+               goto out;       /* no sw counter overflow */
+
+       ibs_data.caps = ibs_caps;
+       size = 1;
+       offset = 1;
+       check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
+       if (event->attr.sample_type & PERF_SAMPLE_RAW)
+               offset_max = perf_ibs->offset_max;
+       else if (check_rip)
+               offset_max = 2;
+       else
+               offset_max = 1;
+       do {
+               rdmsrl(msr + offset, *buf++);
+               size++;
+               offset = find_next_bit(perf_ibs->offset_mask,
+                                      perf_ibs->offset_max,
+                                      offset + 1);
+       } while (offset < offset_max);
+       if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+               /*
+                * Read IbsBrTarget and IbsOpData4 separately
+                * depending on their availability.
+                * Can't add to offset_max as they are staggered
+                */
+               if (ibs_caps & IBS_CAPS_BRNTRGT) {
+                       rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
+                       size++;
+               }
+               if (ibs_caps & IBS_CAPS_OPDATA4) {
+                       rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
+                       size++;
+               }
+       }
+       ibs_data.size = sizeof(u64) * size;
+
+       regs = *iregs;
+       if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
+               regs.flags &= ~PERF_EFLAGS_EXACT;
+       } else {
+               set_linear_ip(&regs, ibs_data.regs[1]);
+               regs.flags |= PERF_EFLAGS_EXACT;
+       }
+
+       if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+               raw.size = sizeof(u32) + ibs_data.size;
+               raw.data = ibs_data.data;
+               data.raw = &raw;
+       }
+
+       throttle = perf_event_overflow(event, &data, &regs);
+out:
+       if (throttle)
+               perf_ibs_disable_event(perf_ibs, hwc, *config);
+       else
+               perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+       perf_event_update_userpage(event);
+
+       return 1;
+}
+
+static int
+perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+{
+       int handled = 0;
+
+       handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
+       handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
+
+       if (handled)
+               inc_irq_stat(apic_perf_irqs);
+
+       return handled;
+}
+NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
+
+static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
+{
+       struct cpu_perf_ibs __percpu *pcpu;
+       int ret;
+
+       pcpu = alloc_percpu(struct cpu_perf_ibs);
+       if (!pcpu)
+               return -ENOMEM;
+
+       perf_ibs->pcpu = pcpu;
+
+       /* register attributes */
+       if (perf_ibs->format_attrs[0]) {
+               memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
+               perf_ibs->format_group.name     = "format";
+               perf_ibs->format_group.attrs    = perf_ibs->format_attrs;
+
+               memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
+               perf_ibs->attr_groups[0]        = &perf_ibs->format_group;
+               perf_ibs->pmu.attr_groups       = perf_ibs->attr_groups;
+       }
+
+       ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
+       if (ret) {
+               perf_ibs->pcpu = NULL;
+               free_percpu(pcpu);
+       }
+
+       return ret;
+}
+
+static __init int perf_event_ibs_init(void)
+{
+       struct attribute **attr = ibs_op_format_attrs;
+
+       if (!ibs_caps)
+               return -ENODEV; /* ibs not supported by the cpu */
+
+       perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+
+       if (ibs_caps & IBS_CAPS_OPCNT) {
+               perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+               *attr++ = &format_attr_cnt_ctl.attr;
+       }
+       perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+
+       register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
+       pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
+
+       return 0;
+}
+
+#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
+
+static __init int perf_event_ibs_init(void) { return 0; }
+
+#endif
+
+/* IBS - apic initialization, for perf and oprofile */
+
+static __init u32 __get_ibs_caps(void)
+{
+       u32 caps;
+       unsigned int max_level;
+
+       if (!boot_cpu_has(X86_FEATURE_IBS))
+               return 0;
+
+       /* check IBS cpuid feature flags */
+       max_level = cpuid_eax(0x80000000);
+       if (max_level < IBS_CPUID_FEATURES)
+               return IBS_CAPS_DEFAULT;
+
+       caps = cpuid_eax(IBS_CPUID_FEATURES);
+       if (!(caps & IBS_CAPS_AVAIL))
+               /* cpuid flags not valid */
+               return IBS_CAPS_DEFAULT;
+
+       return caps;
+}
+
+u32 get_ibs_caps(void)
+{
+       return ibs_caps;
+}
+
+EXPORT_SYMBOL(get_ibs_caps);
+
+static inline int get_eilvt(int offset)
+{
+       return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
+}
+
+static inline int put_eilvt(int offset)
+{
+       return !setup_APIC_eilvt(offset, 0, 0, 1);
+}
+
+/*
+ * Check and reserve APIC extended interrupt LVT offset for IBS if available.
+ */
+static inline int ibs_eilvt_valid(void)
+{
+       int offset;
+       u64 val;
+       int valid = 0;
+
+       preempt_disable();
+
+       rdmsrl(MSR_AMD64_IBSCTL, val);
+       offset = val & IBSCTL_LVT_OFFSET_MASK;
+
+       if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
+               pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
+                      smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+               goto out;
+       }
+
+       if (!get_eilvt(offset)) {
+               pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
+                      smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+               goto out;
+       }
+
+       valid = 1;
+out:
+       preempt_enable();
+
+       return valid;
+}
+
+static int setup_ibs_ctl(int ibs_eilvt_off)
+{
+       struct pci_dev *cpu_cfg;
+       int nodes;
+       u32 value = 0;
+
+       nodes = 0;
+       cpu_cfg = NULL;
+       do {
+               cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
+                                        PCI_DEVICE_ID_AMD_10H_NB_MISC,
+                                        cpu_cfg);
+               if (!cpu_cfg)
+                       break;
+               ++nodes;
+               pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
+                                      | IBSCTL_LVT_OFFSET_VALID);
+               pci_read_config_dword(cpu_cfg, IBSCTL, &value);
+               if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
+                       pci_dev_put(cpu_cfg);
+                       pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n",
+                                value);
+                       return -EINVAL;
+               }
+       } while (1);
+
+       if (!nodes) {
+               pr_debug("No CPU node configured for IBS\n");
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+/*
+ * This runs only on the current cpu. We try to find an LVT offset and
+ * setup the local APIC. For this we must disable preemption. On
+ * success we initialize all nodes with this offset. This updates then
+ * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
+ * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
+ * is using the new offset.
+ */
+static void force_ibs_eilvt_setup(void)
+{
+       int offset;
+       int ret;
+
+       preempt_disable();
+       /* find the next free available EILVT entry, skip offset 0 */
+       for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
+               if (get_eilvt(offset))
+                       break;
+       }
+       preempt_enable();
+
+       if (offset == APIC_EILVT_NR_MAX) {
+               pr_debug("No EILVT entry available\n");
+               return;
+       }
+
+       ret = setup_ibs_ctl(offset);
+       if (ret)
+               goto out;
+
+       if (!ibs_eilvt_valid())
+               goto out;
+
+       pr_info("IBS: LVT offset %d assigned\n", offset);
+
+       return;
+out:
+       preempt_disable();
+       put_eilvt(offset);
+       preempt_enable();
+       return;
+}
+
+static void ibs_eilvt_setup(void)
+{
+       /*
+        * Force LVT offset assignment for family 10h: The offsets are
+        * not assigned by the BIOS for this family, so the OS is
+        * responsible for doing it. If the OS assignment fails, fall
+        * back to BIOS settings and try to setup this.
+        */
+       if (boot_cpu_data.x86 == 0x10)
+               force_ibs_eilvt_setup();
+}
+
+static inline int get_ibs_lvt_offset(void)
+{
+       u64 val;
+
+       rdmsrl(MSR_AMD64_IBSCTL, val);
+       if (!(val & IBSCTL_LVT_OFFSET_VALID))
+               return -EINVAL;
+
+       return val & IBSCTL_LVT_OFFSET_MASK;
+}
+
+static void setup_APIC_ibs(void *dummy)
+{
+       int offset;
+
+       offset = get_ibs_lvt_offset();
+       if (offset < 0)
+               goto failed;
+
+       if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
+               return;
+failed:
+       pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
+               smp_processor_id());
+}
+
+static void clear_APIC_ibs(void *dummy)
+{
+       int offset;
+
+       offset = get_ibs_lvt_offset();
+       if (offset >= 0)
+               setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
+}
+
+#ifdef CONFIG_PM
+
+static int perf_ibs_suspend(void)
+{
+       clear_APIC_ibs(NULL);
+       return 0;
+}
+
+static void perf_ibs_resume(void)
+{
+       ibs_eilvt_setup();
+       setup_APIC_ibs(NULL);
+}
+
+static struct syscore_ops perf_ibs_syscore_ops = {
+       .resume         = perf_ibs_resume,
+       .suspend        = perf_ibs_suspend,
+};
+
+static void perf_ibs_pm_init(void)
+{
+       register_syscore_ops(&perf_ibs_syscore_ops);
+}
+
+#else
+
+static inline void perf_ibs_pm_init(void) { }
+
+#endif
+
+static int
+perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_STARTING:
+               setup_APIC_ibs(NULL);
+               break;
+       case CPU_DYING:
+               clear_APIC_ibs(NULL);
+               break;
+       default:
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static __init int amd_ibs_init(void)
+{
+       u32 caps;
+       int ret = -EINVAL;
+
+       caps = __get_ibs_caps();
+       if (!caps)
+               return -ENODEV; /* ibs not supported by the cpu */
+
+       ibs_eilvt_setup();
+
+       if (!ibs_eilvt_valid())
+               goto out;
+
+       perf_ibs_pm_init();
+       cpu_notifier_register_begin();
+       ibs_caps = caps;
+       /* make ibs_caps visible to other cpus: */
+       smp_mb();
+       smp_call_function(setup_APIC_ibs, NULL, 1);
+       __perf_cpu_notifier(perf_ibs_cpu_notifier);
+       cpu_notifier_register_done();
+
+       ret = perf_event_ibs_init();
+out:
+       if (ret)
+               pr_err("Failed to setup IBS, %d\n", ret);
+       return ret;
+}
+
+/* Since we need the pci subsystem to init ibs we can't do this earlier: */
+device_initcall(amd_ibs_init);
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
new file mode 100644 (file)
index 0000000..635e5eb
--- /dev/null
@@ -0,0 +1,499 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
+
+#include "../perf_event.h"
+#include "iommu.h"
+
+#define COUNTER_SHIFT          16
+
+#define _GET_BANK(ev)       ((u8)(ev->hw.extra_reg.reg >> 8))
+#define _GET_CNTR(ev)       ((u8)(ev->hw.extra_reg.reg))
+
+/* iommu pmu config masks */
+#define _GET_CSOURCE(ev)    ((ev->hw.config & 0xFFULL))
+#define _GET_DEVID(ev)      ((ev->hw.config >> 8)  & 0xFFFFULL)
+#define _GET_PASID(ev)      ((ev->hw.config >> 24) & 0xFFFFULL)
+#define _GET_DOMID(ev)      ((ev->hw.config >> 40) & 0xFFFFULL)
+#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config)  & 0xFFFFULL)
+#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
+#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
+
+static struct perf_amd_iommu __perf_iommu;
+
+struct perf_amd_iommu {
+       struct pmu pmu;
+       u8 max_banks;
+       u8 max_counters;
+       u64 cntr_assign_mask;
+       raw_spinlock_t lock;
+       const struct attribute_group *attr_groups[4];
+};
+
+#define format_group   attr_groups[0]
+#define cpumask_group  attr_groups[1]
+#define events_group   attr_groups[2]
+#define null_group     attr_groups[3]
+
+/*---------------------------------------------
+ * sysfs format attributes
+ *---------------------------------------------*/
+PMU_FORMAT_ATTR(csource,    "config:0-7");
+PMU_FORMAT_ATTR(devid,      "config:8-23");
+PMU_FORMAT_ATTR(pasid,      "config:24-39");
+PMU_FORMAT_ATTR(domid,      "config:40-55");
+PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
+PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
+PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
+
+static struct attribute *iommu_format_attrs[] = {
+       &format_attr_csource.attr,
+       &format_attr_devid.attr,
+       &format_attr_pasid.attr,
+       &format_attr_domid.attr,
+       &format_attr_devid_mask.attr,
+       &format_attr_pasid_mask.attr,
+       &format_attr_domid_mask.attr,
+       NULL,
+};
+
+static struct attribute_group amd_iommu_format_group = {
+       .name = "format",
+       .attrs = iommu_format_attrs,
+};
+
+/*---------------------------------------------
+ * sysfs events attributes
+ *---------------------------------------------*/
+struct amd_iommu_event_desc {
+       struct kobj_attribute attr;
+       const char *event;
+};
+
+static ssize_t _iommu_event_show(struct kobject *kobj,
+                               struct kobj_attribute *attr, char *buf)
+{
+       struct amd_iommu_event_desc *event =
+               container_of(attr, struct amd_iommu_event_desc, attr);
+       return sprintf(buf, "%s\n", event->event);
+}
+
+#define AMD_IOMMU_EVENT_DESC(_name, _event)                    \
+{                                                              \
+       .attr  = __ATTR(_name, 0444, _iommu_event_show, NULL),  \
+       .event = _event,                                        \
+}
+
+static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
+       AMD_IOMMU_EVENT_DESC(mem_pass_untrans,        "csource=0x01"),
+       AMD_IOMMU_EVENT_DESC(mem_pass_pretrans,       "csource=0x02"),
+       AMD_IOMMU_EVENT_DESC(mem_pass_excl,           "csource=0x03"),
+       AMD_IOMMU_EVENT_DESC(mem_target_abort,        "csource=0x04"),
+       AMD_IOMMU_EVENT_DESC(mem_trans_total,         "csource=0x05"),
+       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit,   "csource=0x06"),
+       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis,   "csource=0x07"),
+       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit,   "csource=0x08"),
+       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis,   "csource=0x09"),
+       AMD_IOMMU_EVENT_DESC(mem_dte_hit,             "csource=0x0a"),
+       AMD_IOMMU_EVENT_DESC(mem_dte_mis,             "csource=0x0b"),
+       AMD_IOMMU_EVENT_DESC(page_tbl_read_tot,       "csource=0x0c"),
+       AMD_IOMMU_EVENT_DESC(page_tbl_read_nst,       "csource=0x0d"),
+       AMD_IOMMU_EVENT_DESC(page_tbl_read_gst,       "csource=0x0e"),
+       AMD_IOMMU_EVENT_DESC(int_dte_hit,             "csource=0x0f"),
+       AMD_IOMMU_EVENT_DESC(int_dte_mis,             "csource=0x10"),
+       AMD_IOMMU_EVENT_DESC(cmd_processed,           "csource=0x11"),
+       AMD_IOMMU_EVENT_DESC(cmd_processed_inv,       "csource=0x12"),
+       AMD_IOMMU_EVENT_DESC(tlb_inv,                 "csource=0x13"),
+       { /* end: all zeroes */ },
+};
+
+/*---------------------------------------------
+ * sysfs cpumask attributes
+ *---------------------------------------------*/
+static cpumask_t iommu_cpumask;
+
+static ssize_t _iommu_cpumask_show(struct device *dev,
+                                  struct device_attribute *attr,
+                                  char *buf)
+{
+       return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask);
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
+
+static struct attribute *iommu_cpumask_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL,
+};
+
+static struct attribute_group amd_iommu_cpumask_group = {
+       .attrs = iommu_cpumask_attrs,
+};
+
+/*---------------------------------------------*/
+
+static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
+{
+       unsigned long flags;
+       int shift, bank, cntr, retval;
+       int max_banks = perf_iommu->max_banks;
+       int max_cntrs = perf_iommu->max_counters;
+
+       raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+
+       for (bank = 0, shift = 0; bank < max_banks; bank++) {
+               for (cntr = 0; cntr < max_cntrs; cntr++) {
+                       shift = bank + (bank*3) + cntr;
+                       if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
+                               continue;
+                       } else {
+                               perf_iommu->cntr_assign_mask |= (1ULL<<shift);
+                               retval = ((u16)((u16)bank<<8) | (u8)(cntr));
+                               goto out;
+                       }
+               }
+       }
+       retval = -ENOSPC;
+out:
+       raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+       return retval;
+}
+
+static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
+                                       u8 bank, u8 cntr)
+{
+       unsigned long flags;
+       int max_banks, max_cntrs;
+       int shift = 0;
+
+       max_banks = perf_iommu->max_banks;
+       max_cntrs = perf_iommu->max_counters;
+
+       if ((bank > max_banks) || (cntr > max_cntrs))
+               return -EINVAL;
+
+       shift = bank + cntr + (bank*3);
+
+       raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+       perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
+       raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+
+       return 0;
+}
+
+static int perf_iommu_event_init(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct perf_amd_iommu *perf_iommu;
+       u64 config, config1;
+
+       /* test the event attr type check for PMU enumeration */
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       /*
+        * IOMMU counters are shared across all cores.
+        * Therefore, it does not support per-process mode.
+        * Also, it does not support event sampling mode.
+        */
+       if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+               return -EINVAL;
+
+       /* IOMMU counters do not have usr/os/guest/host bits */
+       if (event->attr.exclude_user || event->attr.exclude_kernel ||
+           event->attr.exclude_host || event->attr.exclude_guest)
+               return -EINVAL;
+
+       if (event->cpu < 0)
+               return -EINVAL;
+
+       perf_iommu = &__perf_iommu;
+
+       if (event->pmu != &perf_iommu->pmu)
+               return -ENOENT;
+
+       if (perf_iommu) {
+               config = event->attr.config;
+               config1 = event->attr.config1;
+       } else {
+               return -EINVAL;
+       }
+
+       /* integrate with iommu base devid (0000), assume one iommu */
+       perf_iommu->max_banks =
+               amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
+       perf_iommu->max_counters =
+               amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
+       if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
+               return -EINVAL;
+
+       /* update the hw_perf_event struct with the iommu config data */
+       hwc->config = config;
+       hwc->extra_reg.config = config1;
+
+       return 0;
+}
+
+static void perf_iommu_enable_event(struct perf_event *ev)
+{
+       u8 csource = _GET_CSOURCE(ev);
+       u16 devid = _GET_DEVID(ev);
+       u64 reg = 0ULL;
+
+       reg = csource;
+       amd_iommu_pc_get_set_reg_val(devid,
+                       _GET_BANK(ev), _GET_CNTR(ev) ,
+                        IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+
+       reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
+       if (reg)
+               reg |= (1UL << 31);
+       amd_iommu_pc_get_set_reg_val(devid,
+                       _GET_BANK(ev), _GET_CNTR(ev) ,
+                        IOMMU_PC_DEVID_MATCH_REG, &reg, true);
+
+       reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
+       if (reg)
+               reg |= (1UL << 31);
+       amd_iommu_pc_get_set_reg_val(devid,
+                       _GET_BANK(ev), _GET_CNTR(ev) ,
+                        IOMMU_PC_PASID_MATCH_REG, &reg, true);
+
+       reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
+       if (reg)
+               reg |= (1UL << 31);
+       amd_iommu_pc_get_set_reg_val(devid,
+                       _GET_BANK(ev), _GET_CNTR(ev) ,
+                        IOMMU_PC_DOMID_MATCH_REG, &reg, true);
+}
+
+static void perf_iommu_disable_event(struct perf_event *event)
+{
+       u64 reg = 0ULL;
+
+       amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+                       _GET_BANK(event), _GET_CNTR(event),
+                       IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+}
+
+static void perf_iommu_start(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       pr_debug("perf: amd_iommu:perf_iommu_start\n");
+       if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+               return;
+
+       WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+       hwc->state = 0;
+
+       if (flags & PERF_EF_RELOAD) {
+               u64 prev_raw_count =  local64_read(&hwc->prev_count);
+               amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+                               _GET_BANK(event), _GET_CNTR(event),
+                               IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
+       }
+
+       perf_iommu_enable_event(event);
+       perf_event_update_userpage(event);
+
+}
+
+static void perf_iommu_read(struct perf_event *event)
+{
+       u64 count = 0ULL;
+       u64 prev_raw_count = 0ULL;
+       u64 delta = 0ULL;
+       struct hw_perf_event *hwc = &event->hw;
+       pr_debug("perf: amd_iommu:perf_iommu_read\n");
+
+       amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+                               _GET_BANK(event), _GET_CNTR(event),
+                               IOMMU_PC_COUNTER_REG, &count, false);
+
+       /* IOMMU pc counter register is only 48 bits */
+       count &= 0xFFFFFFFFFFFFULL;
+
+       prev_raw_count =  local64_read(&hwc->prev_count);
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                                       count) != prev_raw_count)
+               return;
+
+       /* Handling 48-bit counter overflowing */
+       delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
+       delta >>= COUNTER_SHIFT;
+       local64_add(delta, &event->count);
+
+}
+
+static void perf_iommu_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 config;
+
+       pr_debug("perf: amd_iommu:perf_iommu_stop\n");
+
+       if (hwc->state & PERF_HES_UPTODATE)
+               return;
+
+       perf_iommu_disable_event(event);
+       WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+       hwc->state |= PERF_HES_STOPPED;
+
+       if (hwc->state & PERF_HES_UPTODATE)
+               return;
+
+       config = hwc->config;
+       perf_iommu_read(event);
+       hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int perf_iommu_add(struct perf_event *event, int flags)
+{
+       int retval;
+       struct perf_amd_iommu *perf_iommu =
+                       container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+       pr_debug("perf: amd_iommu:perf_iommu_add\n");
+       event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+       /* request an iommu bank/counter */
+       retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
+       if (retval != -ENOSPC)
+               event->hw.extra_reg.reg = (u16)retval;
+       else
+               return retval;
+
+       if (flags & PERF_EF_START)
+               perf_iommu_start(event, PERF_EF_RELOAD);
+
+       return 0;
+}
+
+static void perf_iommu_del(struct perf_event *event, int flags)
+{
+       struct perf_amd_iommu *perf_iommu =
+                       container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+       pr_debug("perf: amd_iommu:perf_iommu_del\n");
+       perf_iommu_stop(event, PERF_EF_UPDATE);
+
+       /* clear the assigned iommu bank/counter */
+       clear_avail_iommu_bnk_cntr(perf_iommu,
+                                    _GET_BANK(event),
+                                    _GET_CNTR(event));
+
+       perf_event_update_userpage(event);
+}
+
+static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
+{
+       struct attribute **attrs;
+       struct attribute_group *attr_group;
+       int i = 0, j;
+
+       while (amd_iommu_v2_event_descs[i].attr.attr.name)
+               i++;
+
+       attr_group = kzalloc(sizeof(struct attribute *)
+               * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
+       if (!attr_group)
+               return -ENOMEM;
+
+       attrs = (struct attribute **)(attr_group + 1);
+       for (j = 0; j < i; j++)
+               attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
+
+       attr_group->name = "events";
+       attr_group->attrs = attrs;
+       perf_iommu->events_group = attr_group;
+
+       return 0;
+}
+
+static __init void amd_iommu_pc_exit(void)
+{
+       if (__perf_iommu.events_group != NULL) {
+               kfree(__perf_iommu.events_group);
+               __perf_iommu.events_group = NULL;
+       }
+}
+
+static __init int _init_perf_amd_iommu(
+       struct perf_amd_iommu *perf_iommu, char *name)
+{
+       int ret;
+
+       raw_spin_lock_init(&perf_iommu->lock);
+
+       /* Init format attributes */
+       perf_iommu->format_group = &amd_iommu_format_group;
+
+       /* Init cpumask attributes to only core 0 */
+       cpumask_set_cpu(0, &iommu_cpumask);
+       perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
+
+       /* Init events attributes */
+       if (_init_events_attrs(perf_iommu) != 0)
+               pr_err("perf: amd_iommu: Only support raw events.\n");
+
+       /* Init null attributes */
+       perf_iommu->null_group = NULL;
+       perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
+
+       ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
+       if (ret) {
+               pr_err("perf: amd_iommu: Failed to initialized.\n");
+               amd_iommu_pc_exit();
+       } else {
+               pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
+                       amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
+                       amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
+       }
+
+       return ret;
+}
+
+static struct perf_amd_iommu __perf_iommu = {
+       .pmu = {
+               .event_init     = perf_iommu_event_init,
+               .add            = perf_iommu_add,
+               .del            = perf_iommu_del,
+               .start          = perf_iommu_start,
+               .stop           = perf_iommu_stop,
+               .read           = perf_iommu_read,
+       },
+       .max_banks              = 0x00,
+       .max_counters           = 0x00,
+       .cntr_assign_mask       = 0ULL,
+       .format_group           = NULL,
+       .cpumask_group          = NULL,
+       .events_group           = NULL,
+       .null_group             = NULL,
+};
+
+static __init int amd_iommu_pc_init(void)
+{
+       /* Make sure the IOMMU PC resource is available */
+       if (!amd_iommu_pc_supported())
+               return -ENODEV;
+
+       _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
+
+       return 0;
+}
+
+device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h
new file mode 100644 (file)
index 0000000..845d173
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _PERF_EVENT_AMD_IOMMU_H_
+#define _PERF_EVENT_AMD_IOMMU_H_
+
+/* iommu pc mmio region register indexes */
+#define IOMMU_PC_COUNTER_REG                   0x00
+#define IOMMU_PC_COUNTER_SRC_REG               0x08
+#define IOMMU_PC_PASID_MATCH_REG               0x10
+#define IOMMU_PC_DOMID_MATCH_REG               0x18
+#define IOMMU_PC_DEVID_MATCH_REG               0x20
+#define IOMMU_PC_COUNTER_REPORT_REG            0x28
+
+/* maximun specified bank/counters */
+#define PC_MAX_SPEC_BNKS                       64
+#define PC_MAX_SPEC_CNTRS                      16
+
+/* iommu pc reg masks*/
+#define IOMMU_BASE_DEVID                       0x0000
+
+/* amd_iommu_init.c external support functions */
+extern bool amd_iommu_pc_supported(void);
+
+extern u8 amd_iommu_pc_get_max_banks(u16 devid);
+
+extern u8 amd_iommu_pc_get_max_counters(u16 devid);
+
+extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
+                       u8 fxn, u64 *value, bool is_write);
+
+#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
new file mode 100644 (file)
index 0000000..3db9569
--- /dev/null
@@ -0,0 +1,603 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <jacob.shin@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+
+#include <asm/cpufeature.h>
+#include <asm/perf_event.h>
+#include <asm/msr.h>
+
+#define NUM_COUNTERS_NB                4
+#define NUM_COUNTERS_L2                4
+#define MAX_COUNTERS           NUM_COUNTERS_NB
+
+#define RDPMC_BASE_NB          6
+#define RDPMC_BASE_L2          10
+
+#define COUNTER_SHIFT          16
+
+struct amd_uncore {
+       int id;
+       int refcnt;
+       int cpu;
+       int num_counters;
+       int rdpmc_base;
+       u32 msr_base;
+       cpumask_t *active_mask;
+       struct pmu *pmu;
+       struct perf_event *events[MAX_COUNTERS];
+       struct amd_uncore *free_when_cpu_online;
+};
+
+static struct amd_uncore * __percpu *amd_uncore_nb;
+static struct amd_uncore * __percpu *amd_uncore_l2;
+
+static struct pmu amd_nb_pmu;
+static struct pmu amd_l2_pmu;
+
+static cpumask_t amd_nb_active_mask;
+static cpumask_t amd_l2_active_mask;
+
+static bool is_nb_event(struct perf_event *event)
+{
+       return event->pmu->type == amd_nb_pmu.type;
+}
+
+static bool is_l2_event(struct perf_event *event)
+{
+       return event->pmu->type == amd_l2_pmu.type;
+}
+
+static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
+{
+       if (is_nb_event(event) && amd_uncore_nb)
+               return *per_cpu_ptr(amd_uncore_nb, event->cpu);
+       else if (is_l2_event(event) && amd_uncore_l2)
+               return *per_cpu_ptr(amd_uncore_l2, event->cpu);
+
+       return NULL;
+}
+
+static void amd_uncore_read(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 prev, new;
+       s64 delta;
+
+       /*
+        * since we do not enable counter overflow interrupts,
+        * we do not have to worry about prev_count changing on us
+        */
+
+       prev = local64_read(&hwc->prev_count);
+       rdpmcl(hwc->event_base_rdpmc, new);
+       local64_set(&hwc->prev_count, new);
+       delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
+       delta >>= COUNTER_SHIFT;
+       local64_add(delta, &event->count);
+}
+
+static void amd_uncore_start(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (flags & PERF_EF_RELOAD)
+               wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
+
+       hwc->state = 0;
+       wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
+       perf_event_update_userpage(event);
+}
+
+static void amd_uncore_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       wrmsrl(hwc->config_base, hwc->config);
+       hwc->state |= PERF_HES_STOPPED;
+
+       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               amd_uncore_read(event);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
+}
+
+static int amd_uncore_add(struct perf_event *event, int flags)
+{
+       int i;
+       struct amd_uncore *uncore = event_to_amd_uncore(event);
+       struct hw_perf_event *hwc = &event->hw;
+
+       /* are we already assigned? */
+       if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
+               goto out;
+
+       for (i = 0; i < uncore->num_counters; i++) {
+               if (uncore->events[i] == event) {
+                       hwc->idx = i;
+                       goto out;
+               }
+       }
+
+       /* if not, take the first available counter */
+       hwc->idx = -1;
+       for (i = 0; i < uncore->num_counters; i++) {
+               if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
+                       hwc->idx = i;
+                       break;
+               }
+       }
+
+out:
+       if (hwc->idx == -1)
+               return -EBUSY;
+
+       hwc->config_base = uncore->msr_base + (2 * hwc->idx);
+       hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
+       hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
+       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+       if (flags & PERF_EF_START)
+               amd_uncore_start(event, PERF_EF_RELOAD);
+
+       return 0;
+}
+
+static void amd_uncore_del(struct perf_event *event, int flags)
+{
+       int i;
+       struct amd_uncore *uncore = event_to_amd_uncore(event);
+       struct hw_perf_event *hwc = &event->hw;
+
+       amd_uncore_stop(event, PERF_EF_UPDATE);
+
+       for (i = 0; i < uncore->num_counters; i++) {
+               if (cmpxchg(&uncore->events[i], event, NULL) == event)
+                       break;
+       }
+
+       hwc->idx = -1;
+}
+
+static int amd_uncore_event_init(struct perf_event *event)
+{
+       struct amd_uncore *uncore;
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       /*
+        * NB and L2 counters (MSRs) are shared across all cores that share the
+        * same NB / L2 cache. Interrupts can be directed to a single target
+        * core, however, event counts generated by processes running on other
+        * cores cannot be masked out. So we do not support sampling and
+        * per-thread events.
+        */
+       if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+               return -EINVAL;
+
+       /* NB and L2 counters do not have usr/os/guest/host bits */
+       if (event->attr.exclude_user || event->attr.exclude_kernel ||
+           event->attr.exclude_host || event->attr.exclude_guest)
+               return -EINVAL;
+
+       /* and we do not enable counter overflow interrupts */
+       hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
+       hwc->idx = -1;
+
+       if (event->cpu < 0)
+               return -EINVAL;
+
+       uncore = event_to_amd_uncore(event);
+       if (!uncore)
+               return -ENODEV;
+
+       /*
+        * since request can come in to any of the shared cores, we will remap
+        * to a single common cpu.
+        */
+       event->cpu = uncore->cpu;
+
+       return 0;
+}
+
+static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
+                                           struct device_attribute *attr,
+                                           char *buf)
+{
+       cpumask_t *active_mask;
+       struct pmu *pmu = dev_get_drvdata(dev);
+
+       if (pmu->type == amd_nb_pmu.type)
+               active_mask = &amd_nb_active_mask;
+       else if (pmu->type == amd_l2_pmu.type)
+               active_mask = &amd_l2_active_mask;
+       else
+               return 0;
+
+       return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
+
+static struct attribute *amd_uncore_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL,
+};
+
+static struct attribute_group amd_uncore_attr_group = {
+       .attrs = amd_uncore_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7,32-35");
+PMU_FORMAT_ATTR(umask, "config:8-15");
+
+static struct attribute *amd_uncore_format_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       NULL,
+};
+
+static struct attribute_group amd_uncore_format_group = {
+       .name = "format",
+       .attrs = amd_uncore_format_attr,
+};
+
+static const struct attribute_group *amd_uncore_attr_groups[] = {
+       &amd_uncore_attr_group,
+       &amd_uncore_format_group,
+       NULL,
+};
+
+static struct pmu amd_nb_pmu = {
+       .attr_groups    = amd_uncore_attr_groups,
+       .name           = "amd_nb",
+       .event_init     = amd_uncore_event_init,
+       .add            = amd_uncore_add,
+       .del            = amd_uncore_del,
+       .start          = amd_uncore_start,
+       .stop           = amd_uncore_stop,
+       .read           = amd_uncore_read,
+};
+
+static struct pmu amd_l2_pmu = {
+       .attr_groups    = amd_uncore_attr_groups,
+       .name           = "amd_l2",
+       .event_init     = amd_uncore_event_init,
+       .add            = amd_uncore_add,
+       .del            = amd_uncore_del,
+       .start          = amd_uncore_start,
+       .stop           = amd_uncore_stop,
+       .read           = amd_uncore_read,
+};
+
+static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
+{
+       return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
+                       cpu_to_node(cpu));
+}
+
+static int amd_uncore_cpu_up_prepare(unsigned int cpu)
+{
+       struct amd_uncore *uncore_nb = NULL, *uncore_l2;
+
+       if (amd_uncore_nb) {
+               uncore_nb = amd_uncore_alloc(cpu);
+               if (!uncore_nb)
+                       goto fail;
+               uncore_nb->cpu = cpu;
+               uncore_nb->num_counters = NUM_COUNTERS_NB;
+               uncore_nb->rdpmc_base = RDPMC_BASE_NB;
+               uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
+               uncore_nb->active_mask = &amd_nb_active_mask;
+               uncore_nb->pmu = &amd_nb_pmu;
+               *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
+       }
+
+       if (amd_uncore_l2) {
+               uncore_l2 = amd_uncore_alloc(cpu);
+               if (!uncore_l2)
+                       goto fail;
+               uncore_l2->cpu = cpu;
+               uncore_l2->num_counters = NUM_COUNTERS_L2;
+               uncore_l2->rdpmc_base = RDPMC_BASE_L2;
+               uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
+               uncore_l2->active_mask = &amd_l2_active_mask;
+               uncore_l2->pmu = &amd_l2_pmu;
+               *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
+       }
+
+       return 0;
+
+fail:
+       if (amd_uncore_nb)
+               *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
+       kfree(uncore_nb);
+       return -ENOMEM;
+}
+
+static struct amd_uncore *
+amd_uncore_find_online_sibling(struct amd_uncore *this,
+                              struct amd_uncore * __percpu *uncores)
+{
+       unsigned int cpu;
+       struct amd_uncore *that;
+
+       for_each_online_cpu(cpu) {
+               that = *per_cpu_ptr(uncores, cpu);
+
+               if (!that)
+                       continue;
+
+               if (this == that)
+                       continue;
+
+               if (this->id == that->id) {
+                       that->free_when_cpu_online = this;
+                       this = that;
+                       break;
+               }
+       }
+
+       this->refcnt++;
+       return this;
+}
+
+static void amd_uncore_cpu_starting(unsigned int cpu)
+{
+       unsigned int eax, ebx, ecx, edx;
+       struct amd_uncore *uncore;
+
+       if (amd_uncore_nb) {
+               uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
+               cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+               uncore->id = ecx & 0xff;
+
+               uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
+               *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
+       }
+
+       if (amd_uncore_l2) {
+               unsigned int apicid = cpu_data(cpu).apicid;
+               unsigned int nshared;
+
+               uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
+               cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
+               nshared = ((eax >> 14) & 0xfff) + 1;
+               uncore->id = apicid - (apicid % nshared);
+
+               uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
+               *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+       }
+}
+
+static void uncore_online(unsigned int cpu,
+                         struct amd_uncore * __percpu *uncores)
+{
+       struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+       kfree(uncore->free_when_cpu_online);
+       uncore->free_when_cpu_online = NULL;
+
+       if (cpu == uncore->cpu)
+               cpumask_set_cpu(cpu, uncore->active_mask);
+}
+
+static void amd_uncore_cpu_online(unsigned int cpu)
+{
+       if (amd_uncore_nb)
+               uncore_online(cpu, amd_uncore_nb);
+
+       if (amd_uncore_l2)
+               uncore_online(cpu, amd_uncore_l2);
+}
+
+static void uncore_down_prepare(unsigned int cpu,
+                               struct amd_uncore * __percpu *uncores)
+{
+       unsigned int i;
+       struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
+
+       if (this->cpu != cpu)
+               return;
+
+       /* this cpu is going down, migrate to a shared sibling if possible */
+       for_each_online_cpu(i) {
+               struct amd_uncore *that = *per_cpu_ptr(uncores, i);
+
+               if (cpu == i)
+                       continue;
+
+               if (this == that) {
+                       perf_pmu_migrate_context(this->pmu, cpu, i);
+                       cpumask_clear_cpu(cpu, that->active_mask);
+                       cpumask_set_cpu(i, that->active_mask);
+                       that->cpu = i;
+                       break;
+               }
+       }
+}
+
+static void amd_uncore_cpu_down_prepare(unsigned int cpu)
+{
+       if (amd_uncore_nb)
+               uncore_down_prepare(cpu, amd_uncore_nb);
+
+       if (amd_uncore_l2)
+               uncore_down_prepare(cpu, amd_uncore_l2);
+}
+
+static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
+{
+       struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+       if (cpu == uncore->cpu)
+               cpumask_clear_cpu(cpu, uncore->active_mask);
+
+       if (!--uncore->refcnt)
+               kfree(uncore);
+       *per_cpu_ptr(uncores, cpu) = NULL;
+}
+
+static void amd_uncore_cpu_dead(unsigned int cpu)
+{
+       if (amd_uncore_nb)
+               uncore_dead(cpu, amd_uncore_nb);
+
+       if (amd_uncore_l2)
+               uncore_dead(cpu, amd_uncore_l2);
+}
+
+static int
+amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
+                       void *hcpu)
+{
+       unsigned int cpu = (long)hcpu;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_UP_PREPARE:
+               if (amd_uncore_cpu_up_prepare(cpu))
+                       return notifier_from_errno(-ENOMEM);
+               break;
+
+       case CPU_STARTING:
+               amd_uncore_cpu_starting(cpu);
+               break;
+
+       case CPU_ONLINE:
+               amd_uncore_cpu_online(cpu);
+               break;
+
+       case CPU_DOWN_PREPARE:
+               amd_uncore_cpu_down_prepare(cpu);
+               break;
+
+       case CPU_UP_CANCELED:
+       case CPU_DEAD:
+               amd_uncore_cpu_dead(cpu);
+               break;
+
+       default:
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block amd_uncore_cpu_notifier_block = {
+       .notifier_call  = amd_uncore_cpu_notifier,
+       .priority       = CPU_PRI_PERF + 1,
+};
+
+static void __init init_cpu_already_online(void *dummy)
+{
+       unsigned int cpu = smp_processor_id();
+
+       amd_uncore_cpu_starting(cpu);
+       amd_uncore_cpu_online(cpu);
+}
+
+static void cleanup_cpu_online(void *dummy)
+{
+       unsigned int cpu = smp_processor_id();
+
+       amd_uncore_cpu_dead(cpu);
+}
+
+static int __init amd_uncore_init(void)
+{
+       unsigned int cpu, cpu2;
+       int ret = -ENODEV;
+
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+               goto fail_nodev;
+
+       if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
+               goto fail_nodev;
+
+       if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
+               amd_uncore_nb = alloc_percpu(struct amd_uncore *);
+               if (!amd_uncore_nb) {
+                       ret = -ENOMEM;
+                       goto fail_nb;
+               }
+               ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
+               if (ret)
+                       goto fail_nb;
+
+               pr_info("perf: AMD NB counters detected\n");
+               ret = 0;
+       }
+
+       if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
+               amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
+               if (!amd_uncore_l2) {
+                       ret = -ENOMEM;
+                       goto fail_l2;
+               }
+               ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
+               if (ret)
+                       goto fail_l2;
+
+               pr_info("perf: AMD L2I counters detected\n");
+               ret = 0;
+       }
+
+       if (ret)
+               goto fail_nodev;
+
+       cpu_notifier_register_begin();
+
+       /* init cpus already online before registering for hotplug notifier */
+       for_each_online_cpu(cpu) {
+               ret = amd_uncore_cpu_up_prepare(cpu);
+               if (ret)
+                       goto fail_online;
+               smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
+       }
+
+       __register_cpu_notifier(&amd_uncore_cpu_notifier_block);
+       cpu_notifier_register_done();
+
+       return 0;
+
+
+fail_online:
+       for_each_online_cpu(cpu2) {
+               if (cpu2 == cpu)
+                       break;
+               smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
+       }
+       cpu_notifier_register_done();
+
+       /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
+       amd_uncore_nb = amd_uncore_l2 = NULL;
+
+       if (boot_cpu_has(X86_FEATURE_PERFCTR_L2))
+               perf_pmu_unregister(&amd_l2_pmu);
+fail_l2:
+       if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
+               perf_pmu_unregister(&amd_nb_pmu);
+       if (amd_uncore_l2)
+               free_percpu(amd_uncore_l2);
+fail_nb:
+       if (amd_uncore_nb)
+               free_percpu(amd_uncore_nb);
+
+fail_nodev:
+       return ret;
+}
+device_initcall(amd_uncore_init);
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
new file mode 100644 (file)
index 0000000..5e830d0
--- /dev/null
@@ -0,0 +1,2442 @@
+/*
+ * Performance events x86 architecture code
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
+ *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ *  Copyright (C) 2009 Google, Inc., Stephane Eranian
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/capability.h>
+#include <linux/notifier.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+
+#include <asm/apic.h>
+#include <asm/stacktrace.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include <asm/alternative.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/timer.h>
+#include <asm/desc.h>
+#include <asm/ldt.h>
+
+#include "perf_event.h"
+
+struct x86_pmu x86_pmu __read_mostly;
+
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+       .enabled = 1,
+};
+
+struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE;
+
+u64 __read_mostly hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+u64 __read_mostly hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+/*
+ * Propagate event elapsed time into the generic event.
+ * Can only be executed on the CPU where the event is active.
+ * Returns the delta events processed.
+ */
+u64 x86_perf_event_update(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       int shift = 64 - x86_pmu.cntval_bits;
+       u64 prev_raw_count, new_raw_count;
+       int idx = hwc->idx;
+       s64 delta;
+
+       if (idx == INTEL_PMC_IDX_FIXED_BTS)
+               return 0;
+
+       /*
+        * Careful: an NMI might modify the previous event value.
+        *
+        * Our tactic to handle this is to first atomically read and
+        * exchange a new raw count - then add that new-prev delta
+        * count to the generic event atomically:
+        */
+again:
+       prev_raw_count = local64_read(&hwc->prev_count);
+       rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                                       new_raw_count) != prev_raw_count)
+               goto again;
+
+       /*
+        * Now we have the new raw value and have updated the prev
+        * timestamp already. We can now calculate the elapsed delta
+        * (event-)time and add that to the generic event.
+        *
+        * Careful, not all hw sign-extends above the physical width
+        * of the count.
+        */
+       delta = (new_raw_count << shift) - (prev_raw_count << shift);
+       delta >>= shift;
+
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
+
+       return new_raw_count;
+}
+
+/*
+ * Find and validate any extra registers to set up.
+ */
+static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg;
+       struct extra_reg *er;
+
+       reg = &event->hw.extra_reg;
+
+       if (!x86_pmu.extra_regs)
+               return 0;
+
+       for (er = x86_pmu.extra_regs; er->msr; er++) {
+               if (er->event != (config & er->config_mask))
+                       continue;
+               if (event->attr.config1 & ~er->valid_mask)
+                       return -EINVAL;
+               /* Check if the extra msrs can be safely accessed*/
+               if (!er->extra_msr_access)
+                       return -ENXIO;
+
+               reg->idx = er->idx;
+               reg->config = event->attr.config1;
+               reg->reg = er->msr;
+               break;
+       }
+       return 0;
+}
+
+static atomic_t active_events;
+static atomic_t pmc_refcount;
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+static bool reserve_pmc_hardware(void)
+{
+       int i;
+
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
+                       goto perfctr_fail;
+       }
+
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
+                       goto eventsel_fail;
+       }
+
+       return true;
+
+eventsel_fail:
+       for (i--; i >= 0; i--)
+               release_evntsel_nmi(x86_pmu_config_addr(i));
+
+       i = x86_pmu.num_counters;
+
+perfctr_fail:
+       for (i--; i >= 0; i--)
+               release_perfctr_nmi(x86_pmu_event_addr(i));
+
+       return false;
+}
+
+static void release_pmc_hardware(void)
+{
+       int i;
+
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               release_perfctr_nmi(x86_pmu_event_addr(i));
+               release_evntsel_nmi(x86_pmu_config_addr(i));
+       }
+}
+
+#else
+
+static bool reserve_pmc_hardware(void) { return true; }
+static void release_pmc_hardware(void) {}
+
+#endif
+
+static bool check_hw_exists(void)
+{
+       u64 val, val_fail, val_new= ~0;
+       int i, reg, reg_fail, ret = 0;
+       int bios_fail = 0;
+       int reg_safe = -1;
+
+       /*
+        * Check to see if the BIOS enabled any of the counters, if so
+        * complain and bail.
+        */
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               reg = x86_pmu_config_addr(i);
+               ret = rdmsrl_safe(reg, &val);
+               if (ret)
+                       goto msr_fail;
+               if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
+                       bios_fail = 1;
+                       val_fail = val;
+                       reg_fail = reg;
+               } else {
+                       reg_safe = i;
+               }
+       }
+
+       if (x86_pmu.num_counters_fixed) {
+               reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+               ret = rdmsrl_safe(reg, &val);
+               if (ret)
+                       goto msr_fail;
+               for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
+                       if (val & (0x03 << i*4)) {
+                               bios_fail = 1;
+                               val_fail = val;
+                               reg_fail = reg;
+                       }
+               }
+       }
+
+       /*
+        * If all the counters are enabled, the below test will always
+        * fail.  The tools will also become useless in this scenario.
+        * Just fail and disable the hardware counters.
+        */
+
+       if (reg_safe == -1) {
+               reg = reg_safe;
+               goto msr_fail;
+       }
+
+       /*
+        * Read the current value, change it and read it back to see if it
+        * matches, this is needed to detect certain hardware emulators
+        * (qemu/kvm) that don't trap on the MSR access and always return 0s.
+        */
+       reg = x86_pmu_event_addr(reg_safe);
+       if (rdmsrl_safe(reg, &val))
+               goto msr_fail;
+       val ^= 0xffffUL;
+       ret = wrmsrl_safe(reg, val);
+       ret |= rdmsrl_safe(reg, &val_new);
+       if (ret || val != val_new)
+               goto msr_fail;
+
+       /*
+        * We still allow the PMU driver to operate:
+        */
+       if (bios_fail) {
+               pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
+               pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
+                             reg_fail, val_fail);
+       }
+
+       return true;
+
+msr_fail:
+       pr_cont("Broken PMU hardware detected, using software events only.\n");
+       pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+               boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
+               reg, val_new);
+
+       return false;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+       x86_release_hardware();
+       atomic_dec(&active_events);
+}
+
+void hw_perf_lbr_event_destroy(struct perf_event *event)
+{
+       hw_perf_event_destroy(event);
+
+       /* undo the lbr/bts event accounting */
+       x86_del_exclusive(x86_lbr_exclusive_lbr);
+}
+
+static inline int x86_pmu_initialized(void)
+{
+       return x86_pmu.handle_irq != NULL;
+}
+
+static inline int
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
+{
+       struct perf_event_attr *attr = &event->attr;
+       unsigned int cache_type, cache_op, cache_result;
+       u64 config, val;
+
+       config = attr->config;
+
+       cache_type = (config >>  0) & 0xff;
+       if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+               return -EINVAL;
+
+       cache_op = (config >>  8) & 0xff;
+       if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+               return -EINVAL;
+
+       cache_result = (config >> 16) & 0xff;
+       if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+               return -EINVAL;
+
+       val = hw_cache_event_ids[cache_type][cache_op][cache_result];
+
+       if (val == 0)
+               return -ENOENT;
+
+       if (val == -1)
+               return -EINVAL;
+
+       hwc->config |= val;
+       attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
+       return x86_pmu_extra_regs(val, event);
+}
+
+int x86_reserve_hardware(void)
+{
+       int err = 0;
+
+       if (!atomic_inc_not_zero(&pmc_refcount)) {
+               mutex_lock(&pmc_reserve_mutex);
+               if (atomic_read(&pmc_refcount) == 0) {
+                       if (!reserve_pmc_hardware())
+                               err = -EBUSY;
+                       else
+                               reserve_ds_buffers();
+               }
+               if (!err)
+                       atomic_inc(&pmc_refcount);
+               mutex_unlock(&pmc_reserve_mutex);
+       }
+
+       return err;
+}
+
+void x86_release_hardware(void)
+{
+       if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
+               release_pmc_hardware();
+               release_ds_buffers();
+               mutex_unlock(&pmc_reserve_mutex);
+       }
+}
+
+/*
+ * Check if we can create event of a certain type (that no conflicting events
+ * are present).
+ */
+int x86_add_exclusive(unsigned int what)
+{
+       int i;
+
+       if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
+               mutex_lock(&pmc_reserve_mutex);
+               for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
+                       if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
+                               goto fail_unlock;
+               }
+               atomic_inc(&x86_pmu.lbr_exclusive[what]);
+               mutex_unlock(&pmc_reserve_mutex);
+       }
+
+       atomic_inc(&active_events);
+       return 0;
+
+fail_unlock:
+       mutex_unlock(&pmc_reserve_mutex);
+       return -EBUSY;
+}
+
+void x86_del_exclusive(unsigned int what)
+{
+       atomic_dec(&x86_pmu.lbr_exclusive[what]);
+       atomic_dec(&active_events);
+}
+
+int x86_setup_perfctr(struct perf_event *event)
+{
+       struct perf_event_attr *attr = &event->attr;
+       struct hw_perf_event *hwc = &event->hw;
+       u64 config;
+
+       if (!is_sampling_event(event)) {
+               hwc->sample_period = x86_pmu.max_period;
+               hwc->last_period = hwc->sample_period;
+               local64_set(&hwc->period_left, hwc->sample_period);
+       }
+
+       if (attr->type == PERF_TYPE_RAW)
+               return x86_pmu_extra_regs(event->attr.config, event);
+
+       if (attr->type == PERF_TYPE_HW_CACHE)
+               return set_ext_hw_attr(hwc, event);
+
+       if (attr->config >= x86_pmu.max_events)
+               return -EINVAL;
+
+       /*
+        * The generic map:
+        */
+       config = x86_pmu.event_map(attr->config);
+
+       if (config == 0)
+               return -ENOENT;
+
+       if (config == -1LL)
+               return -EINVAL;
+
+       /*
+        * Branch tracing:
+        */
+       if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+           !attr->freq && hwc->sample_period == 1) {
+               /* BTS is not supported by this architecture. */
+               if (!x86_pmu.bts_active)
+                       return -EOPNOTSUPP;
+
+               /* BTS is currently only allowed for user-mode. */
+               if (!attr->exclude_kernel)
+                       return -EOPNOTSUPP;
+
+               /* disallow bts if conflicting events are present */
+               if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+                       return -EBUSY;
+
+               event->destroy = hw_perf_lbr_event_destroy;
+       }
+
+       hwc->config |= config;
+
+       return 0;
+}
+
+/*
+ * check that branch_sample_type is compatible with
+ * settings needed for precise_ip > 1 which implies
+ * using the LBR to capture ALL taken branches at the
+ * priv levels of the measurement
+ */
+static inline int precise_br_compat(struct perf_event *event)
+{
+       u64 m = event->attr.branch_sample_type;
+       u64 b = 0;
+
+       /* must capture all branches */
+       if (!(m & PERF_SAMPLE_BRANCH_ANY))
+               return 0;
+
+       m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
+
+       if (!event->attr.exclude_user)
+               b |= PERF_SAMPLE_BRANCH_USER;
+
+       if (!event->attr.exclude_kernel)
+               b |= PERF_SAMPLE_BRANCH_KERNEL;
+
+       /*
+        * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
+        */
+
+       return m == b;
+}
+
+int x86_pmu_hw_config(struct perf_event *event)
+{
+       if (event->attr.precise_ip) {
+               int precise = 0;
+
+               /* Support for constant skid */
+               if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
+                       precise++;
+
+                       /* Support for IP fixup */
+                       if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
+                               precise++;
+
+                       if (x86_pmu.pebs_prec_dist)
+                               precise++;
+               }
+
+               if (event->attr.precise_ip > precise)
+                       return -EOPNOTSUPP;
+       }
+       /*
+        * check that PEBS LBR correction does not conflict with
+        * whatever the user is asking with attr->branch_sample_type
+        */
+       if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
+               u64 *br_type = &event->attr.branch_sample_type;
+
+               if (has_branch_stack(event)) {
+                       if (!precise_br_compat(event))
+                               return -EOPNOTSUPP;
+
+                       /* branch_sample_type is compatible */
+
+               } else {
+                       /*
+                        * user did not specify  branch_sample_type
+                        *
+                        * For PEBS fixups, we capture all
+                        * the branches at the priv level of the
+                        * event.
+                        */
+                       *br_type = PERF_SAMPLE_BRANCH_ANY;
+
+                       if (!event->attr.exclude_user)
+                               *br_type |= PERF_SAMPLE_BRANCH_USER;
+
+                       if (!event->attr.exclude_kernel)
+                               *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
+               }
+       }
+
+       if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
+               event->attach_state |= PERF_ATTACH_TASK_DATA;
+
+       /*
+        * Generate PMC IRQs:
+        * (keep 'enabled' bit clear for now)
+        */
+       event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
+
+       /*
+        * Count user and OS events unless requested not to
+        */
+       if (!event->attr.exclude_user)
+               event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
+       if (!event->attr.exclude_kernel)
+               event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
+
+       if (event->attr.type == PERF_TYPE_RAW)
+               event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
+
+       if (event->attr.sample_period && x86_pmu.limit_period) {
+               if (x86_pmu.limit_period(event, event->attr.sample_period) >
+                               event->attr.sample_period)
+                       return -EINVAL;
+       }
+
+       return x86_setup_perfctr(event);
+}
+
+/*
+ * Setup the hardware configuration for a given attr_type
+ */
+static int __x86_pmu_event_init(struct perf_event *event)
+{
+       int err;
+
+       if (!x86_pmu_initialized())
+               return -ENODEV;
+
+       err = x86_reserve_hardware();
+       if (err)
+               return err;
+
+       atomic_inc(&active_events);
+       event->destroy = hw_perf_event_destroy;
+
+       event->hw.idx = -1;
+       event->hw.last_cpu = -1;
+       event->hw.last_tag = ~0ULL;
+
+       /* mark unused */
+       event->hw.extra_reg.idx = EXTRA_REG_NONE;
+       event->hw.branch_reg.idx = EXTRA_REG_NONE;
+
+       return x86_pmu.hw_config(event);
+}
+
+void x86_pmu_disable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               u64 val;
+
+               if (!test_bit(idx, cpuc->active_mask))
+                       continue;
+               rdmsrl(x86_pmu_config_addr(idx), val);
+               if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
+                       continue;
+               val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+               wrmsrl(x86_pmu_config_addr(idx), val);
+       }
+}
+
+/*
+ * There may be PMI landing after enabled=0. The PMI hitting could be before or
+ * after disable_all.
+ *
+ * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
+ * It will not be re-enabled in the NMI handler again, because enabled=0. After
+ * handling the NMI, disable_all will be called, which will not change the
+ * state either. If PMI hits after disable_all, the PMU is already disabled
+ * before entering NMI handler. The NMI handler will not change the state
+ * either.
+ *
+ * So either situation is harmless.
+ */
+static void x86_pmu_disable(struct pmu *pmu)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (!x86_pmu_initialized())
+               return;
+
+       if (!cpuc->enabled)
+               return;
+
+       cpuc->n_added = 0;
+       cpuc->enabled = 0;
+       barrier();
+
+       x86_pmu.disable_all();
+}
+
+void x86_pmu_enable_all(int added)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
+
+               if (!test_bit(idx, cpuc->active_mask))
+                       continue;
+
+               __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+       }
+}
+
+static struct pmu pmu;
+
+static inline int is_x86_event(struct perf_event *event)
+{
+       return event->pmu == &pmu;
+}
+
+/*
+ * Event scheduler state:
+ *
+ * Assign events iterating over all events and counters, beginning
+ * with events with least weights first. Keep the current iterator
+ * state in struct sched_state.
+ */
+struct sched_state {
+       int     weight;
+       int     event;          /* event index */
+       int     counter;        /* counter index */
+       int     unassigned;     /* number of events to be assigned left */
+       int     nr_gp;          /* number of GP counters used */
+       unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+};
+
+/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
+#define        SCHED_STATES_MAX        2
+
+struct perf_sched {
+       int                     max_weight;
+       int                     max_events;
+       int                     max_gp;
+       int                     saved_states;
+       struct event_constraint **constraints;
+       struct sched_state      state;
+       struct sched_state      saved[SCHED_STATES_MAX];
+};
+
+/*
+ * Initialize interator that runs through all events and counters.
+ */
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
+                           int num, int wmin, int wmax, int gpmax)
+{
+       int idx;
+
+       memset(sched, 0, sizeof(*sched));
+       sched->max_events       = num;
+       sched->max_weight       = wmax;
+       sched->max_gp           = gpmax;
+       sched->constraints      = constraints;
+
+       for (idx = 0; idx < num; idx++) {
+               if (constraints[idx]->weight == wmin)
+                       break;
+       }
+
+       sched->state.event      = idx;          /* start with min weight */
+       sched->state.weight     = wmin;
+       sched->state.unassigned = num;
+}
+
+static void perf_sched_save_state(struct perf_sched *sched)
+{
+       if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
+               return;
+
+       sched->saved[sched->saved_states] = sched->state;
+       sched->saved_states++;
+}
+
+static bool perf_sched_restore_state(struct perf_sched *sched)
+{
+       if (!sched->saved_states)
+               return false;
+
+       sched->saved_states--;
+       sched->state = sched->saved[sched->saved_states];
+
+       /* continue with next counter: */
+       clear_bit(sched->state.counter++, sched->state.used);
+
+       return true;
+}
+
+/*
+ * Select a counter for the current event to schedule. Return true on
+ * success.
+ */
+static bool __perf_sched_find_counter(struct perf_sched *sched)
+{
+       struct event_constraint *c;
+       int idx;
+
+       if (!sched->state.unassigned)
+               return false;
+
+       if (sched->state.event >= sched->max_events)
+               return false;
+
+       c = sched->constraints[sched->state.event];
+       /* Prefer fixed purpose counters */
+       if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
+               idx = INTEL_PMC_IDX_FIXED;
+               for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+                       if (!__test_and_set_bit(idx, sched->state.used))
+                               goto done;
+               }
+       }
+
+       /* Grab the first unused counter starting with idx */
+       idx = sched->state.counter;
+       for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
+               if (!__test_and_set_bit(idx, sched->state.used)) {
+                       if (sched->state.nr_gp++ >= sched->max_gp)
+                               return false;
+
+                       goto done;
+               }
+       }
+
+       return false;
+
+done:
+       sched->state.counter = idx;
+
+       if (c->overlap)
+               perf_sched_save_state(sched);
+
+       return true;
+}
+
+static bool perf_sched_find_counter(struct perf_sched *sched)
+{
+       while (!__perf_sched_find_counter(sched)) {
+               if (!perf_sched_restore_state(sched))
+                       return false;
+       }
+
+       return true;
+}
+
+/*
+ * Go through all unassigned events and find the next one to schedule.
+ * Take events with the least weight first. Return true on success.
+ */
+static bool perf_sched_next_event(struct perf_sched *sched)
+{
+       struct event_constraint *c;
+
+       if (!sched->state.unassigned || !--sched->state.unassigned)
+               return false;
+
+       do {
+               /* next event */
+               sched->state.event++;
+               if (sched->state.event >= sched->max_events) {
+                       /* next weight */
+                       sched->state.event = 0;
+                       sched->state.weight++;
+                       if (sched->state.weight > sched->max_weight)
+                               return false;
+               }
+               c = sched->constraints[sched->state.event];
+       } while (c->weight != sched->state.weight);
+
+       sched->state.counter = 0;       /* start with first counter */
+
+       return true;
+}
+
+/*
+ * Assign a counter for each event.
+ */
+int perf_assign_events(struct event_constraint **constraints, int n,
+                       int wmin, int wmax, int gpmax, int *assign)
+{
+       struct perf_sched sched;
+
+       perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
+
+       do {
+               if (!perf_sched_find_counter(&sched))
+                       break;  /* failed */
+               if (assign)
+                       assign[sched.state.event] = sched.state.counter;
+       } while (perf_sched_next_event(&sched));
+
+       return sched.state.unassigned;
+}
+EXPORT_SYMBOL_GPL(perf_assign_events);
+
+int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+{
+       struct event_constraint *c;
+       unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+       struct perf_event *e;
+       int i, wmin, wmax, unsched = 0;
+       struct hw_perf_event *hwc;
+
+       bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+
+       if (x86_pmu.start_scheduling)
+               x86_pmu.start_scheduling(cpuc);
+
+       for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+               cpuc->event_constraint[i] = NULL;
+               c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
+               cpuc->event_constraint[i] = c;
+
+               wmin = min(wmin, c->weight);
+               wmax = max(wmax, c->weight);
+       }
+
+       /*
+        * fastpath, try to reuse previous register
+        */
+       for (i = 0; i < n; i++) {
+               hwc = &cpuc->event_list[i]->hw;
+               c = cpuc->event_constraint[i];
+
+               /* never assigned */
+               if (hwc->idx == -1)
+                       break;
+
+               /* constraint still honored */
+               if (!test_bit(hwc->idx, c->idxmsk))
+                       break;
+
+               /* not already used */
+               if (test_bit(hwc->idx, used_mask))
+                       break;
+
+               __set_bit(hwc->idx, used_mask);
+               if (assign)
+                       assign[i] = hwc->idx;
+       }
+
+       /* slow path */
+       if (i != n) {
+               int gpmax = x86_pmu.num_counters;
+
+               /*
+                * Do not allow scheduling of more than half the available
+                * generic counters.
+                *
+                * This helps avoid counter starvation of sibling thread by
+                * ensuring at most half the counters cannot be in exclusive
+                * mode. There is no designated counters for the limits. Any
+                * N/2 counters can be used. This helps with events with
+                * specific counter constraints.
+                */
+               if (is_ht_workaround_enabled() && !cpuc->is_fake &&
+                   READ_ONCE(cpuc->excl_cntrs->exclusive_present))
+                       gpmax /= 2;
+
+               unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
+                                            wmax, gpmax, assign);
+       }
+
+       /*
+        * In case of success (unsched = 0), mark events as committed,
+        * so we do not put_constraint() in case new events are added
+        * and fail to be scheduled
+        *
+        * We invoke the lower level commit callback to lock the resource
+        *
+        * We do not need to do all of this in case we are called to
+        * validate an event group (assign == NULL)
+        */
+       if (!unsched && assign) {
+               for (i = 0; i < n; i++) {
+                       e = cpuc->event_list[i];
+                       e->hw.flags |= PERF_X86_EVENT_COMMITTED;
+                       if (x86_pmu.commit_scheduling)
+                               x86_pmu.commit_scheduling(cpuc, i, assign[i]);
+               }
+       } else {
+               for (i = 0; i < n; i++) {
+                       e = cpuc->event_list[i];
+                       /*
+                        * do not put_constraint() on comitted events,
+                        * because they are good to go
+                        */
+                       if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
+                               continue;
+
+                       /*
+                        * release events that failed scheduling
+                        */
+                       if (x86_pmu.put_event_constraints)
+                               x86_pmu.put_event_constraints(cpuc, e);
+               }
+       }
+
+       if (x86_pmu.stop_scheduling)
+               x86_pmu.stop_scheduling(cpuc);
+
+       return unsched ? -EINVAL : 0;
+}
+
+/*
+ * dogrp: true if must collect siblings events (group)
+ * returns total number of events and error code
+ */
+static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
+{
+       struct perf_event *event;
+       int n, max_count;
+
+       max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
+
+       /* current number of events already accepted */
+       n = cpuc->n_events;
+
+       if (is_x86_event(leader)) {
+               if (n >= max_count)
+                       return -EINVAL;
+               cpuc->event_list[n] = leader;
+               n++;
+       }
+       if (!dogrp)
+               return n;
+
+       list_for_each_entry(event, &leader->sibling_list, group_entry) {
+               if (!is_x86_event(event) ||
+                   event->state <= PERF_EVENT_STATE_OFF)
+                       continue;
+
+               if (n >= max_count)
+                       return -EINVAL;
+
+               cpuc->event_list[n] = event;
+               n++;
+       }
+       return n;
+}
+
+static inline void x86_assign_hw_event(struct perf_event *event,
+                               struct cpu_hw_events *cpuc, int i)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       hwc->idx = cpuc->assign[i];
+       hwc->last_cpu = smp_processor_id();
+       hwc->last_tag = ++cpuc->tags[i];
+
+       if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
+               hwc->config_base = 0;
+               hwc->event_base = 0;
+       } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
+               hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+               hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
+               hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
+       } else {
+               hwc->config_base = x86_pmu_config_addr(hwc->idx);
+               hwc->event_base  = x86_pmu_event_addr(hwc->idx);
+               hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
+       }
+}
+
+static inline int match_prev_assignment(struct hw_perf_event *hwc,
+                                       struct cpu_hw_events *cpuc,
+                                       int i)
+{
+       return hwc->idx == cpuc->assign[i] &&
+               hwc->last_cpu == smp_processor_id() &&
+               hwc->last_tag == cpuc->tags[i];
+}
+
+static void x86_pmu_start(struct perf_event *event, int flags);
+
+static void x86_pmu_enable(struct pmu *pmu)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct perf_event *event;
+       struct hw_perf_event *hwc;
+       int i, added = cpuc->n_added;
+
+       if (!x86_pmu_initialized())
+               return;
+
+       if (cpuc->enabled)
+               return;
+
+       if (cpuc->n_added) {
+               int n_running = cpuc->n_events - cpuc->n_added;
+               /*
+                * apply assignment obtained either from
+                * hw_perf_group_sched_in() or x86_pmu_enable()
+                *
+                * step1: save events moving to new counters
+                */
+               for (i = 0; i < n_running; i++) {
+                       event = cpuc->event_list[i];
+                       hwc = &event->hw;
+
+                       /*
+                        * we can avoid reprogramming counter if:
+                        * - assigned same counter as last time
+                        * - running on same CPU as last time
+                        * - no other event has used the counter since
+                        */
+                       if (hwc->idx == -1 ||
+                           match_prev_assignment(hwc, cpuc, i))
+                               continue;
+
+                       /*
+                        * Ensure we don't accidentally enable a stopped
+                        * counter simply because we rescheduled.
+                        */
+                       if (hwc->state & PERF_HES_STOPPED)
+                               hwc->state |= PERF_HES_ARCH;
+
+                       x86_pmu_stop(event, PERF_EF_UPDATE);
+               }
+
+               /*
+                * step2: reprogram moved events into new counters
+                */
+               for (i = 0; i < cpuc->n_events; i++) {
+                       event = cpuc->event_list[i];
+                       hwc = &event->hw;
+
+                       if (!match_prev_assignment(hwc, cpuc, i))
+                               x86_assign_hw_event(event, cpuc, i);
+                       else if (i < n_running)
+                               continue;
+
+                       if (hwc->state & PERF_HES_ARCH)
+                               continue;
+
+                       x86_pmu_start(event, PERF_EF_RELOAD);
+               }
+               cpuc->n_added = 0;
+               perf_events_lapic_init();
+       }
+
+       cpuc->enabled = 1;
+       barrier();
+
+       x86_pmu.enable_all(added);
+}
+
+static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the event disabled in hw:
+ */
+int x86_perf_event_set_period(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       s64 left = local64_read(&hwc->period_left);
+       s64 period = hwc->sample_period;
+       int ret = 0, idx = hwc->idx;
+
+       if (idx == INTEL_PMC_IDX_FIXED_BTS)
+               return 0;
+
+       /*
+        * If we are way outside a reasonable range then just skip forward:
+        */
+       if (unlikely(left <= -period)) {
+               left = period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+
+       if (unlikely(left <= 0)) {
+               left += period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+       /*
+        * Quirk: certain CPUs dont like it if just 1 hw_event is left:
+        */
+       if (unlikely(left < 2))
+               left = 2;
+
+       if (left > x86_pmu.max_period)
+               left = x86_pmu.max_period;
+
+       if (x86_pmu.limit_period)
+               left = x86_pmu.limit_period(event, left);
+
+       per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
+
+       if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
+           local64_read(&hwc->prev_count) != (u64)-left) {
+               /*
+                * The hw event starts counting from this event offset,
+                * mark it to be able to extra future deltas:
+                */
+               local64_set(&hwc->prev_count, (u64)-left);
+
+               wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+       }
+
+       /*
+        * Due to erratum on certan cpu we need
+        * a second write to be sure the register
+        * is updated properly
+        */
+       if (x86_pmu.perfctr_second_write) {
+               wrmsrl(hwc->event_base,
+                       (u64)(-left) & x86_pmu.cntval_mask);
+       }
+
+       perf_event_update_userpage(event);
+
+       return ret;
+}
+
+void x86_pmu_enable_event(struct perf_event *event)
+{
+       if (__this_cpu_read(cpu_hw_events.enabled))
+               __x86_pmu_enable_event(&event->hw,
+                                      ARCH_PERFMON_EVENTSEL_ENABLE);
+}
+
+/*
+ * Add a single event to the PMU.
+ *
+ * The event is added to the group of enabled events
+ * but only if it can be scehduled with existing events.
+ */
+static int x86_pmu_add(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc;
+       int assign[X86_PMC_IDX_MAX];
+       int n, n0, ret;
+
+       hwc = &event->hw;
+
+       n0 = cpuc->n_events;
+       ret = n = collect_events(cpuc, event, false);
+       if (ret < 0)
+               goto out;
+
+       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+       if (!(flags & PERF_EF_START))
+               hwc->state |= PERF_HES_ARCH;
+
+       /*
+        * If group events scheduling transaction was started,
+        * skip the schedulability test here, it will be performed
+        * at commit time (->commit_txn) as a whole.
+        */
+       if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+               goto done_collect;
+
+       ret = x86_pmu.schedule_events(cpuc, n, assign);
+       if (ret)
+               goto out;
+       /*
+        * copy new assignment, now we know it is possible
+        * will be used by hw_perf_enable()
+        */
+       memcpy(cpuc->assign, assign, n*sizeof(int));
+
+done_collect:
+       /*
+        * Commit the collect_events() state. See x86_pmu_del() and
+        * x86_pmu_*_txn().
+        */
+       cpuc->n_events = n;
+       cpuc->n_added += n - n0;
+       cpuc->n_txn += n - n0;
+
+       ret = 0;
+out:
+       return ret;
+}
+
+static void x86_pmu_start(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx = event->hw.idx;
+
+       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+               return;
+
+       if (WARN_ON_ONCE(idx == -1))
+               return;
+
+       if (flags & PERF_EF_RELOAD) {
+               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+               x86_perf_event_set_period(event);
+       }
+
+       event->hw.state = 0;
+
+       cpuc->events[idx] = event;
+       __set_bit(idx, cpuc->active_mask);
+       __set_bit(idx, cpuc->running);
+       x86_pmu.enable(event);
+       perf_event_update_userpage(event);
+}
+
+void perf_event_print_debug(void)
+{
+       u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
+       u64 pebs, debugctl;
+       struct cpu_hw_events *cpuc;
+       unsigned long flags;
+       int cpu, idx;
+
+       if (!x86_pmu.num_counters)
+               return;
+
+       local_irq_save(flags);
+
+       cpu = smp_processor_id();
+       cpuc = &per_cpu(cpu_hw_events, cpu);
+
+       if (x86_pmu.version >= 2) {
+               rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+               rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+               rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
+               rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+
+               pr_info("\n");
+               pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
+               pr_info("CPU#%d: status:     %016llx\n", cpu, status);
+               pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
+               pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
+               if (x86_pmu.pebs_constraints) {
+                       rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
+                       pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
+               }
+               if (x86_pmu.lbr_nr) {
+                       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+                       pr_info("CPU#%d: debugctl:   %016llx\n", cpu, debugctl);
+               }
+       }
+       pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
+               rdmsrl(x86_pmu_event_addr(idx), pmc_count);
+
+               prev_left = per_cpu(pmc_prev_left[idx], cpu);
+
+               pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
+                       cpu, idx, pmc_ctrl);
+               pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
+                       cpu, idx, pmc_count);
+               pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
+                       cpu, idx, prev_left);
+       }
+       for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
+               rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+
+               pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
+                       cpu, idx, pmc_count);
+       }
+       local_irq_restore(flags);
+}
+
+void x86_pmu_stop(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
+               x86_pmu.disable(event);
+               cpuc->events[hwc->idx] = NULL;
+               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+               hwc->state |= PERF_HES_STOPPED;
+       }
+
+       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               /*
+                * Drain the remaining delta count out of a event
+                * that we are disabling:
+                */
+               x86_perf_event_update(event);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
+}
+
+static void x86_pmu_del(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int i;
+
+       /*
+        * event is descheduled
+        */
+       event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
+
+       /*
+        * If we're called during a txn, we don't need to do anything.
+        * The events never got scheduled and ->cancel_txn will truncate
+        * the event_list.
+        *
+        * XXX assumes any ->del() called during a TXN will only be on
+        * an event added during that same TXN.
+        */
+       if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+               return;
+
+       /*
+        * Not a TXN, therefore cleanup properly.
+        */
+       x86_pmu_stop(event, PERF_EF_UPDATE);
+
+       for (i = 0; i < cpuc->n_events; i++) {
+               if (event == cpuc->event_list[i])
+                       break;
+       }
+
+       if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
+               return;
+
+       /* If we have a newly added event; make sure to decrease n_added. */
+       if (i >= cpuc->n_events - cpuc->n_added)
+               --cpuc->n_added;
+
+       if (x86_pmu.put_event_constraints)
+               x86_pmu.put_event_constraints(cpuc, event);
+
+       /* Delete the array entry. */
+       while (++i < cpuc->n_events) {
+               cpuc->event_list[i-1] = cpuc->event_list[i];
+               cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+       }
+       --cpuc->n_events;
+
+       perf_event_update_userpage(event);
+}
+
+int x86_pmu_handle_irq(struct pt_regs *regs)
+{
+       struct perf_sample_data data;
+       struct cpu_hw_events *cpuc;
+       struct perf_event *event;
+       int idx, handled = 0;
+       u64 val;
+
+       cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       /*
+        * Some chipsets need to unmask the LVTPC in a particular spot
+        * inside the nmi handler.  As a result, the unmasking was pushed
+        * into all the nmi handlers.
+        *
+        * This generic handler doesn't seem to have any issues where the
+        * unmasking occurs so it was left at the top.
+        */
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               if (!test_bit(idx, cpuc->active_mask)) {
+                       /*
+                        * Though we deactivated the counter some cpus
+                        * might still deliver spurious interrupts still
+                        * in flight. Catch them:
+                        */
+                       if (__test_and_clear_bit(idx, cpuc->running))
+                               handled++;
+                       continue;
+               }
+
+               event = cpuc->events[idx];
+
+               val = x86_perf_event_update(event);
+               if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
+                       continue;
+
+               /*
+                * event overflow
+                */
+               handled++;
+               perf_sample_data_init(&data, 0, event->hw.last_period);
+
+               if (!x86_perf_event_set_period(event))
+                       continue;
+
+               if (perf_event_overflow(event, &data, regs))
+                       x86_pmu_stop(event, 0);
+       }
+
+       if (handled)
+               inc_irq_stat(apic_perf_irqs);
+
+       return handled;
+}
+
+void perf_events_lapic_init(void)
+{
+       if (!x86_pmu.apic || !x86_pmu_initialized())
+               return;
+
+       /*
+        * Always use NMI for PMU
+        */
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+}
+
+static int
+perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+{
+       u64 start_clock;
+       u64 finish_clock;
+       int ret;
+
+       /*
+        * All PMUs/events that share this PMI handler should make sure to
+        * increment active_events for their events.
+        */
+       if (!atomic_read(&active_events))
+               return NMI_DONE;
+
+       start_clock = sched_clock();
+       ret = x86_pmu.handle_irq(regs);
+       finish_clock = sched_clock();
+
+       perf_sample_event_took(finish_clock - start_clock);
+
+       return ret;
+}
+NOKPROBE_SYMBOL(perf_event_nmi_handler);
+
+struct event_constraint emptyconstraint;
+struct event_constraint unconstrained;
+
+static int
+x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (long)hcpu;
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+       int i, ret = NOTIFY_OK;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_UP_PREPARE:
+               for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
+                       cpuc->kfree_on_online[i] = NULL;
+               if (x86_pmu.cpu_prepare)
+                       ret = x86_pmu.cpu_prepare(cpu);
+               break;
+
+       case CPU_STARTING:
+               if (x86_pmu.cpu_starting)
+                       x86_pmu.cpu_starting(cpu);
+               break;
+
+       case CPU_ONLINE:
+               for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
+                       kfree(cpuc->kfree_on_online[i]);
+                       cpuc->kfree_on_online[i] = NULL;
+               }
+               break;
+
+       case CPU_DYING:
+               if (x86_pmu.cpu_dying)
+                       x86_pmu.cpu_dying(cpu);
+               break;
+
+       case CPU_UP_CANCELED:
+       case CPU_DEAD:
+               if (x86_pmu.cpu_dead)
+                       x86_pmu.cpu_dead(cpu);
+               break;
+
+       default:
+               break;
+       }
+
+       return ret;
+}
+
+static void __init pmu_check_apic(void)
+{
+       if (cpu_has_apic)
+               return;
+
+       x86_pmu.apic = 0;
+       pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
+       pr_info("no hardware sampling interrupt available.\n");
+
+       /*
+        * If we have a PMU initialized but no APIC
+        * interrupts, we cannot sample hardware
+        * events (user-space has to fall back and
+        * sample via a hrtimer based software event):
+        */
+       pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+}
+
+static struct attribute_group x86_pmu_format_group = {
+       .name = "format",
+       .attrs = NULL,
+};
+
+/*
+ * Remove all undefined events (x86_pmu.event_map(id) == 0)
+ * out of events_attr attributes.
+ */
+static void __init filter_events(struct attribute **attrs)
+{
+       struct device_attribute *d;
+       struct perf_pmu_events_attr *pmu_attr;
+       int offset = 0;
+       int i, j;
+
+       for (i = 0; attrs[i]; i++) {
+               d = (struct device_attribute *)attrs[i];
+               pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
+               /* str trumps id */
+               if (pmu_attr->event_str)
+                       continue;
+               if (x86_pmu.event_map(i + offset))
+                       continue;
+
+               for (j = i; attrs[j]; j++)
+                       attrs[j] = attrs[j + 1];
+
+               /* Check the shifted attr. */
+               i--;
+
+               /*
+                * event_map() is index based, the attrs array is organized
+                * by increasing event index. If we shift the events, then
+                * we need to compensate for the event_map(), otherwise
+                * we are looking up the wrong event in the map
+                */
+               offset++;
+       }
+}
+
+/* Merge two pointer arrays */
+__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
+{
+       struct attribute **new;
+       int j, i;
+
+       for (j = 0; a[j]; j++)
+               ;
+       for (i = 0; b[i]; i++)
+               j++;
+       j++;
+
+       new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+       if (!new)
+               return NULL;
+
+       j = 0;
+       for (i = 0; a[i]; i++)
+               new[j++] = a[i];
+       for (i = 0; b[i]; i++)
+               new[j++] = b[i];
+       new[j] = NULL;
+
+       return new;
+}
+
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+                         char *page)
+{
+       struct perf_pmu_events_attr *pmu_attr = \
+               container_of(attr, struct perf_pmu_events_attr, attr);
+       u64 config = x86_pmu.event_map(pmu_attr->id);
+
+       /* string trumps id */
+       if (pmu_attr->event_str)
+               return sprintf(page, "%s", pmu_attr->event_str);
+
+       return x86_pmu.events_sysfs_show(page, config);
+}
+
+EVENT_ATTR(cpu-cycles,                 CPU_CYCLES              );
+EVENT_ATTR(instructions,               INSTRUCTIONS            );
+EVENT_ATTR(cache-references,           CACHE_REFERENCES        );
+EVENT_ATTR(cache-misses,               CACHE_MISSES            );
+EVENT_ATTR(branch-instructions,                BRANCH_INSTRUCTIONS     );
+EVENT_ATTR(branch-misses,              BRANCH_MISSES           );
+EVENT_ATTR(bus-cycles,                 BUS_CYCLES              );
+EVENT_ATTR(stalled-cycles-frontend,    STALLED_CYCLES_FRONTEND );
+EVENT_ATTR(stalled-cycles-backend,     STALLED_CYCLES_BACKEND  );
+EVENT_ATTR(ref-cycles,                 REF_CPU_CYCLES          );
+
+static struct attribute *empty_attrs;
+
+static struct attribute *events_attr[] = {
+       EVENT_PTR(CPU_CYCLES),
+       EVENT_PTR(INSTRUCTIONS),
+       EVENT_PTR(CACHE_REFERENCES),
+       EVENT_PTR(CACHE_MISSES),
+       EVENT_PTR(BRANCH_INSTRUCTIONS),
+       EVENT_PTR(BRANCH_MISSES),
+       EVENT_PTR(BUS_CYCLES),
+       EVENT_PTR(STALLED_CYCLES_FRONTEND),
+       EVENT_PTR(STALLED_CYCLES_BACKEND),
+       EVENT_PTR(REF_CPU_CYCLES),
+       NULL,
+};
+
+static struct attribute_group x86_pmu_events_group = {
+       .name = "events",
+       .attrs = events_attr,
+};
+
+ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
+{
+       u64 umask  = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+       u64 cmask  = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
+       bool edge  = (config & ARCH_PERFMON_EVENTSEL_EDGE);
+       bool pc    = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
+       bool any   = (config & ARCH_PERFMON_EVENTSEL_ANY);
+       bool inv   = (config & ARCH_PERFMON_EVENTSEL_INV);
+       ssize_t ret;
+
+       /*
+       * We have whole page size to spend and just little data
+       * to write, so we can safely use sprintf.
+       */
+       ret = sprintf(page, "event=0x%02llx", event);
+
+       if (umask)
+               ret += sprintf(page + ret, ",umask=0x%02llx", umask);
+
+       if (edge)
+               ret += sprintf(page + ret, ",edge");
+
+       if (pc)
+               ret += sprintf(page + ret, ",pc");
+
+       if (any)
+               ret += sprintf(page + ret, ",any");
+
+       if (inv)
+               ret += sprintf(page + ret, ",inv");
+
+       if (cmask)
+               ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
+
+       ret += sprintf(page + ret, "\n");
+
+       return ret;
+}
+
+static int __init init_hw_perf_events(void)
+{
+       struct x86_pmu_quirk *quirk;
+       int err;
+
+       pr_info("Performance Events: ");
+
+       switch (boot_cpu_data.x86_vendor) {
+       case X86_VENDOR_INTEL:
+               err = intel_pmu_init();
+               break;
+       case X86_VENDOR_AMD:
+               err = amd_pmu_init();
+               break;
+       default:
+               err = -ENOTSUPP;
+       }
+       if (err != 0) {
+               pr_cont("no PMU driver, software events only.\n");
+               return 0;
+       }
+
+       pmu_check_apic();
+
+       /* sanity check that the hardware exists or is emulated */
+       if (!check_hw_exists())
+               return 0;
+
+       pr_cont("%s PMU driver.\n", x86_pmu.name);
+
+       x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+
+       for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
+               quirk->func();
+
+       if (!x86_pmu.intel_ctrl)
+               x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+
+       perf_events_lapic_init();
+       register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
+
+       unconstrained = (struct event_constraint)
+               __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
+                                  0, x86_pmu.num_counters, 0, 0);
+
+       x86_pmu_format_group.attrs = x86_pmu.format_attrs;
+
+       if (x86_pmu.event_attrs)
+               x86_pmu_events_group.attrs = x86_pmu.event_attrs;
+
+       if (!x86_pmu.events_sysfs_show)
+               x86_pmu_events_group.attrs = &empty_attrs;
+       else
+               filter_events(x86_pmu_events_group.attrs);
+
+       if (x86_pmu.cpu_events) {
+               struct attribute **tmp;
+
+               tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
+               if (!WARN_ON(!tmp))
+                       x86_pmu_events_group.attrs = tmp;
+       }
+
+       pr_info("... version:                %d\n",     x86_pmu.version);
+       pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
+       pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
+       pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
+       pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
+       pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
+       pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
+
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+       perf_cpu_notifier(x86_pmu_notifier);
+
+       return 0;
+}
+early_initcall(init_hw_perf_events);
+
+static inline void x86_pmu_read(struct perf_event *event)
+{
+       x86_perf_event_update(event);
+}
+
+/*
+ * Start group events scheduling transaction
+ * Set the flag to make pmu::enable() not perform the
+ * schedulability test, it will be performed at commit time
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
+ */
+static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       WARN_ON_ONCE(cpuc->txn_flags);          /* txn already in flight */
+
+       cpuc->txn_flags = txn_flags;
+       if (txn_flags & ~PERF_PMU_TXN_ADD)
+               return;
+
+       perf_pmu_disable(pmu);
+       __this_cpu_write(cpu_hw_events.n_txn, 0);
+}
+
+/*
+ * Stop group events scheduling transaction
+ * Clear the flag and pmu::enable() will perform the
+ * schedulability test.
+ */
+static void x86_pmu_cancel_txn(struct pmu *pmu)
+{
+       unsigned int txn_flags;
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
+
+       txn_flags = cpuc->txn_flags;
+       cpuc->txn_flags = 0;
+       if (txn_flags & ~PERF_PMU_TXN_ADD)
+               return;
+
+       /*
+        * Truncate collected array by the number of events added in this
+        * transaction. See x86_pmu_add() and x86_pmu_*_txn().
+        */
+       __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
+       __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
+       perf_pmu_enable(pmu);
+}
+
+/*
+ * Commit group events scheduling transaction
+ * Perform the group schedulability test as a whole
+ * Return 0 if success
+ *
+ * Does not cancel the transaction on failure; expects the caller to do this.
+ */
+static int x86_pmu_commit_txn(struct pmu *pmu)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int assign[X86_PMC_IDX_MAX];
+       int n, ret;
+
+       WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
+
+       if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
+               cpuc->txn_flags = 0;
+               return 0;
+       }
+
+       n = cpuc->n_events;
+
+       if (!x86_pmu_initialized())
+               return -EAGAIN;
+
+       ret = x86_pmu.schedule_events(cpuc, n, assign);
+       if (ret)
+               return ret;
+
+       /*
+        * copy new assignment, now we know it is possible
+        * will be used by hw_perf_enable()
+        */
+       memcpy(cpuc->assign, assign, n*sizeof(int));
+
+       cpuc->txn_flags = 0;
+       perf_pmu_enable(pmu);
+       return 0;
+}
+/*
+ * a fake_cpuc is used to validate event groups. Due to
+ * the extra reg logic, we need to also allocate a fake
+ * per_core and per_cpu structure. Otherwise, group events
+ * using extra reg may conflict without the kernel being
+ * able to catch this when the last event gets added to
+ * the group.
+ */
+static void free_fake_cpuc(struct cpu_hw_events *cpuc)
+{
+       kfree(cpuc->shared_regs);
+       kfree(cpuc);
+}
+
+static struct cpu_hw_events *allocate_fake_cpuc(void)
+{
+       struct cpu_hw_events *cpuc;
+       int cpu = raw_smp_processor_id();
+
+       cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
+       if (!cpuc)
+               return ERR_PTR(-ENOMEM);
+
+       /* only needed, if we have extra_regs */
+       if (x86_pmu.extra_regs) {
+               cpuc->shared_regs = allocate_shared_regs(cpu);
+               if (!cpuc->shared_regs)
+                       goto error;
+       }
+       cpuc->is_fake = 1;
+       return cpuc;
+error:
+       free_fake_cpuc(cpuc);
+       return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * validate that we can schedule this event
+ */
+static int validate_event(struct perf_event *event)
+{
+       struct cpu_hw_events *fake_cpuc;
+       struct event_constraint *c;
+       int ret = 0;
+
+       fake_cpuc = allocate_fake_cpuc();
+       if (IS_ERR(fake_cpuc))
+               return PTR_ERR(fake_cpuc);
+
+       c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
+
+       if (!c || !c->weight)
+               ret = -EINVAL;
+
+       if (x86_pmu.put_event_constraints)
+               x86_pmu.put_event_constraints(fake_cpuc, event);
+
+       free_fake_cpuc(fake_cpuc);
+
+       return ret;
+}
+
+/*
+ * validate a single event group
+ *
+ * validation include:
+ *     - check events are compatible which each other
+ *     - events do not compete for the same counter
+ *     - number of events <= number of counters
+ *
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
+static int validate_group(struct perf_event *event)
+{
+       struct perf_event *leader = event->group_leader;
+       struct cpu_hw_events *fake_cpuc;
+       int ret = -EINVAL, n;
+
+       fake_cpuc = allocate_fake_cpuc();
+       if (IS_ERR(fake_cpuc))
+               return PTR_ERR(fake_cpuc);
+       /*
+        * the event is not yet connected with its
+        * siblings therefore we must first collect
+        * existing siblings, then add the new event
+        * before we can simulate the scheduling
+        */
+       n = collect_events(fake_cpuc, leader, true);
+       if (n < 0)
+               goto out;
+
+       fake_cpuc->n_events = n;
+       n = collect_events(fake_cpuc, event, false);
+       if (n < 0)
+               goto out;
+
+       fake_cpuc->n_events = n;
+
+       ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
+
+out:
+       free_fake_cpuc(fake_cpuc);
+       return ret;
+}
+
+static int x86_pmu_event_init(struct perf_event *event)
+{
+       struct pmu *tmp;
+       int err;
+
+       switch (event->attr.type) {
+       case PERF_TYPE_RAW:
+       case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
+               break;
+
+       default:
+               return -ENOENT;
+       }
+
+       err = __x86_pmu_event_init(event);
+       if (!err) {
+               /*
+                * we temporarily connect event to its pmu
+                * such that validate_group() can classify
+                * it as an x86 event using is_x86_event()
+                */
+               tmp = event->pmu;
+               event->pmu = &pmu;
+
+               if (event->group_leader != event)
+                       err = validate_group(event);
+               else
+                       err = validate_event(event);
+
+               event->pmu = tmp;
+       }
+       if (err) {
+               if (event->destroy)
+                       event->destroy(event);
+       }
+
+       if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
+               event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
+
+       return err;
+}
+
+static void refresh_pce(void *ignored)
+{
+       if (current->mm)
+               load_mm_cr4(current->mm);
+}
+
+static void x86_pmu_event_mapped(struct perf_event *event)
+{
+       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+               return;
+
+       if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
+               on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
+static void x86_pmu_event_unmapped(struct perf_event *event)
+{
+       if (!current->mm)
+               return;
+
+       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+               return;
+
+       if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
+               on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
+static int x86_pmu_event_idx(struct perf_event *event)
+{
+       int idx = event->hw.idx;
+
+       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+               return 0;
+
+       if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
+               idx -= INTEL_PMC_IDX_FIXED;
+               idx |= 1 << 30;
+       }
+
+       return idx + 1;
+}
+
+static ssize_t get_attr_rdpmc(struct device *cdev,
+                             struct device_attribute *attr,
+                             char *buf)
+{
+       return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
+}
+
+static ssize_t set_attr_rdpmc(struct device *cdev,
+                             struct device_attribute *attr,
+                             const char *buf, size_t count)
+{
+       unsigned long val;
+       ssize_t ret;
+
+       ret = kstrtoul(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       if (val > 2)
+               return -EINVAL;
+
+       if (x86_pmu.attr_rdpmc_broken)
+               return -ENOTSUPP;
+
+       if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
+               /*
+                * Changing into or out of always available, aka
+                * perf-event-bypassing mode.  This path is extremely slow,
+                * but only root can trigger it, so it's okay.
+                */
+               if (val == 2)
+                       static_key_slow_inc(&rdpmc_always_available);
+               else
+                       static_key_slow_dec(&rdpmc_always_available);
+               on_each_cpu(refresh_pce, NULL, 1);
+       }
+
+       x86_pmu.attr_rdpmc = val;
+
+       return count;
+}
+
+static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
+
+static struct attribute *x86_pmu_attrs[] = {
+       &dev_attr_rdpmc.attr,
+       NULL,
+};
+
+static struct attribute_group x86_pmu_attr_group = {
+       .attrs = x86_pmu_attrs,
+};
+
+static const struct attribute_group *x86_pmu_attr_groups[] = {
+       &x86_pmu_attr_group,
+       &x86_pmu_format_group,
+       &x86_pmu_events_group,
+       NULL,
+};
+
+static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+       if (x86_pmu.sched_task)
+               x86_pmu.sched_task(ctx, sched_in);
+}
+
+void perf_check_microcode(void)
+{
+       if (x86_pmu.check_microcode)
+               x86_pmu.check_microcode();
+}
+EXPORT_SYMBOL_GPL(perf_check_microcode);
+
+static struct pmu pmu = {
+       .pmu_enable             = x86_pmu_enable,
+       .pmu_disable            = x86_pmu_disable,
+
+       .attr_groups            = x86_pmu_attr_groups,
+
+       .event_init             = x86_pmu_event_init,
+
+       .event_mapped           = x86_pmu_event_mapped,
+       .event_unmapped         = x86_pmu_event_unmapped,
+
+       .add                    = x86_pmu_add,
+       .del                    = x86_pmu_del,
+       .start                  = x86_pmu_start,
+       .stop                   = x86_pmu_stop,
+       .read                   = x86_pmu_read,
+
+       .start_txn              = x86_pmu_start_txn,
+       .cancel_txn             = x86_pmu_cancel_txn,
+       .commit_txn             = x86_pmu_commit_txn,
+
+       .event_idx              = x86_pmu_event_idx,
+       .sched_task             = x86_pmu_sched_task,
+       .task_ctx_size          = sizeof(struct x86_perf_task_context),
+};
+
+void arch_perf_update_userpage(struct perf_event *event,
+                              struct perf_event_mmap_page *userpg, u64 now)
+{
+       struct cyc2ns_data *data;
+
+       userpg->cap_user_time = 0;
+       userpg->cap_user_time_zero = 0;
+       userpg->cap_user_rdpmc =
+               !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
+       userpg->pmc_width = x86_pmu.cntval_bits;
+
+       if (!sched_clock_stable())
+               return;
+
+       data = cyc2ns_read_begin();
+
+       /*
+        * Internal timekeeping for enabled/running/stopped times
+        * is always in the local_clock domain.
+        */
+       userpg->cap_user_time = 1;
+       userpg->time_mult = data->cyc2ns_mul;
+       userpg->time_shift = data->cyc2ns_shift;
+       userpg->time_offset = data->cyc2ns_offset - now;
+
+       /*
+        * cap_user_time_zero doesn't make sense when we're using a different
+        * time base for the records.
+        */
+       if (event->clock == &local_clock) {
+               userpg->cap_user_time_zero = 1;
+               userpg->time_zero = data->cyc2ns_offset;
+       }
+
+       cyc2ns_read_end(data);
+}
+
+/*
+ * callchain support
+ */
+
+static int backtrace_stack(void *data, char *name)
+{
+       return 0;
+}
+
+static void backtrace_address(void *data, unsigned long addr, int reliable)
+{
+       struct perf_callchain_entry *entry = data;
+
+       perf_callchain_store(entry, addr);
+}
+
+static const struct stacktrace_ops backtrace_ops = {
+       .stack                  = backtrace_stack,
+       .address                = backtrace_address,
+       .walk_stack             = print_context_stack_bp,
+};
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               /* TODO: We don't support guest os callchain now */
+               return;
+       }
+
+       perf_callchain_store(entry, regs->ip);
+
+       dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
+}
+
+static inline int
+valid_user_frame(const void __user *fp, unsigned long size)
+{
+       return (__range_not_ok(fp, size, TASK_SIZE) == 0);
+}
+
+static unsigned long get_segment_base(unsigned int segment)
+{
+       struct desc_struct *desc;
+       int idx = segment >> 3;
+
+       if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+               struct ldt_struct *ldt;
+
+               if (idx > LDT_ENTRIES)
+                       return 0;
+
+               /* IRQs are off, so this synchronizes with smp_store_release */
+               ldt = lockless_dereference(current->active_mm->context.ldt);
+               if (!ldt || idx > ldt->size)
+                       return 0;
+
+               desc = &ldt->entries[idx];
+#else
+               return 0;
+#endif
+       } else {
+               if (idx > GDT_ENTRIES)
+                       return 0;
+
+               desc = raw_cpu_ptr(gdt_page.gdt) + idx;
+       }
+
+       return get_desc_base(desc);
+}
+
+#ifdef CONFIG_IA32_EMULATION
+
+#include <asm/compat.h>
+
+static inline int
+perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+       /* 32-bit process in 64-bit kernel. */
+       unsigned long ss_base, cs_base;
+       struct stack_frame_ia32 frame;
+       const void __user *fp;
+
+       if (!test_thread_flag(TIF_IA32))
+               return 0;
+
+       cs_base = get_segment_base(regs->cs);
+       ss_base = get_segment_base(regs->ss);
+
+       fp = compat_ptr(ss_base + regs->bp);
+       pagefault_disable();
+       while (entry->nr < PERF_MAX_STACK_DEPTH) {
+               unsigned long bytes;
+               frame.next_frame     = 0;
+               frame.return_address = 0;
+
+               if (!access_ok(VERIFY_READ, fp, 8))
+                       break;
+
+               bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
+               if (bytes != 0)
+                       break;
+               bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
+               if (bytes != 0)
+                       break;
+
+               if (!valid_user_frame(fp, sizeof(frame)))
+                       break;
+
+               perf_callchain_store(entry, cs_base + frame.return_address);
+               fp = compat_ptr(ss_base + frame.next_frame);
+       }
+       pagefault_enable();
+       return 1;
+}
+#else
+static inline int
+perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+    return 0;
+}
+#endif
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+       struct stack_frame frame;
+       const void __user *fp;
+
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               /* TODO: We don't support guest os callchain now */
+               return;
+       }
+
+       /*
+        * We don't know what to do with VM86 stacks.. ignore them for now.
+        */
+       if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
+               return;
+
+       fp = (void __user *)regs->bp;
+
+       perf_callchain_store(entry, regs->ip);
+
+       if (!current->mm)
+               return;
+
+       if (perf_callchain_user32(regs, entry))
+               return;
+
+       pagefault_disable();
+       while (entry->nr < PERF_MAX_STACK_DEPTH) {
+               unsigned long bytes;
+               frame.next_frame             = NULL;
+               frame.return_address = 0;
+
+               if (!access_ok(VERIFY_READ, fp, 16))
+                       break;
+
+               bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
+               if (bytes != 0)
+                       break;
+               bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
+               if (bytes != 0)
+                       break;
+
+               if (!valid_user_frame(fp, sizeof(frame)))
+                       break;
+
+               perf_callchain_store(entry, frame.return_address);
+               fp = (void __user *)frame.next_frame;
+       }
+       pagefault_enable();
+}
+
+/*
+ * Deal with code segment offsets for the various execution modes:
+ *
+ *   VM86 - the good olde 16 bit days, where the linear address is
+ *          20 bits and we use regs->ip + 0x10 * regs->cs.
+ *
+ *   IA32 - Where we need to look at GDT/LDT segment descriptor tables
+ *          to figure out what the 32bit base address is.
+ *
+ *    X32 - has TIF_X32 set, but is running in x86_64
+ *
+ * X86_64 - CS,DS,SS,ES are all zero based.
+ */
+static unsigned long code_segment_base(struct pt_regs *regs)
+{
+       /*
+        * For IA32 we look at the GDT/LDT segment base to convert the
+        * effective IP to a linear address.
+        */
+
+#ifdef CONFIG_X86_32
+       /*
+        * If we are in VM86 mode, add the segment offset to convert to a
+        * linear address.
+        */
+       if (regs->flags & X86_VM_MASK)
+               return 0x10 * regs->cs;
+
+       if (user_mode(regs) && regs->cs != __USER_CS)
+               return get_segment_base(regs->cs);
+#else
+       if (user_mode(regs) && !user_64bit_mode(regs) &&
+           regs->cs != __USER32_CS)
+               return get_segment_base(regs->cs);
+#endif
+       return 0;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+               return perf_guest_cbs->get_guest_ip();
+
+       return regs->ip + code_segment_base(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+       int misc = 0;
+
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               if (perf_guest_cbs->is_user_mode())
+                       misc |= PERF_RECORD_MISC_GUEST_USER;
+               else
+                       misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+       } else {
+               if (user_mode(regs))
+                       misc |= PERF_RECORD_MISC_USER;
+               else
+                       misc |= PERF_RECORD_MISC_KERNEL;
+       }
+
+       if (regs->flags & PERF_EFLAGS_EXACT)
+               misc |= PERF_RECORD_MISC_EXACT_IP;
+
+       return misc;
+}
+
+void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+{
+       cap->version            = x86_pmu.version;
+       cap->num_counters_gp    = x86_pmu.num_counters;
+       cap->num_counters_fixed = x86_pmu.num_counters_fixed;
+       cap->bit_width_gp       = x86_pmu.cntval_bits;
+       cap->bit_width_fixed    = x86_pmu.cntval_bits;
+       cap->events_mask        = (unsigned int)x86_pmu.events_maskl;
+       cap->events_mask_len    = x86_pmu.events_mask_len;
+}
+EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
new file mode 100644 (file)
index 0000000..b99dc92
--- /dev/null
@@ -0,0 +1,544 @@
+/*
+ * BTS PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#undef DEBUG
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/coredump.h>
+
+#include <asm-generic/sizes.h>
+#include <asm/perf_event.h>
+
+#include "../perf_event.h"
+
+struct bts_ctx {
+       struct perf_output_handle       handle;
+       struct debug_store              ds_back;
+       int                             started;
+};
+
+static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
+
+#define BTS_RECORD_SIZE                24
+#define BTS_SAFETY_MARGIN      4080
+
+struct bts_phys {
+       struct page     *page;
+       unsigned long   size;
+       unsigned long   offset;
+       unsigned long   displacement;
+};
+
+struct bts_buffer {
+       size_t          real_size;      /* multiple of BTS_RECORD_SIZE */
+       unsigned int    nr_pages;
+       unsigned int    nr_bufs;
+       unsigned int    cur_buf;
+       bool            snapshot;
+       local_t         data_size;
+       local_t         lost;
+       local_t         head;
+       unsigned long   end;
+       void            **data_pages;
+       struct bts_phys buf[0];
+};
+
+struct pmu bts_pmu;
+
+static size_t buf_size(struct page *page)
+{
+       return 1 << (PAGE_SHIFT + page_private(page));
+}
+
+static void *
+bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
+{
+       struct bts_buffer *buf;
+       struct page *page;
+       int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
+       unsigned long offset;
+       size_t size = nr_pages << PAGE_SHIFT;
+       int pg, nbuf, pad;
+
+       /* count all the high order buffers */
+       for (pg = 0, nbuf = 0; pg < nr_pages;) {
+               page = virt_to_page(pages[pg]);
+               if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
+                       return NULL;
+               pg += 1 << page_private(page);
+               nbuf++;
+       }
+
+       /*
+        * to avoid interrupts in overwrite mode, only allow one physical
+        */
+       if (overwrite && nbuf > 1)
+               return NULL;
+
+       buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
+       if (!buf)
+               return NULL;
+
+       buf->nr_pages = nr_pages;
+       buf->nr_bufs = nbuf;
+       buf->snapshot = overwrite;
+       buf->data_pages = pages;
+       buf->real_size = size - size % BTS_RECORD_SIZE;
+
+       for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
+               unsigned int __nr_pages;
+
+               page = virt_to_page(pages[pg]);
+               __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
+               buf->buf[nbuf].page = page;
+               buf->buf[nbuf].offset = offset;
+               buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+               buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
+               pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
+               buf->buf[nbuf].size -= pad;
+
+               pg += __nr_pages;
+               offset += __nr_pages << PAGE_SHIFT;
+       }
+
+       return buf;
+}
+
+static void bts_buffer_free_aux(void *data)
+{
+       kfree(data);
+}
+
+static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
+{
+       return buf->buf[idx].offset + buf->buf[idx].displacement;
+}
+
+static void
+bts_config_buffer(struct bts_buffer *buf)
+{
+       int cpu = raw_smp_processor_id();
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       struct bts_phys *phys = &buf->buf[buf->cur_buf];
+       unsigned long index, thresh = 0, end = phys->size;
+       struct page *page = phys->page;
+
+       index = local_read(&buf->head);
+
+       if (!buf->snapshot) {
+               if (buf->end < phys->offset + buf_size(page))
+                       end = buf->end - phys->offset - phys->displacement;
+
+               index -= phys->offset + phys->displacement;
+
+               if (end - index > BTS_SAFETY_MARGIN)
+                       thresh = end - BTS_SAFETY_MARGIN;
+               else if (end - index > BTS_RECORD_SIZE)
+                       thresh = end - BTS_RECORD_SIZE;
+               else
+                       thresh = end;
+       }
+
+       ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
+       ds->bts_index = ds->bts_buffer_base + index;
+       ds->bts_absolute_maximum = ds->bts_buffer_base + end;
+       ds->bts_interrupt_threshold = !buf->snapshot
+               ? ds->bts_buffer_base + thresh
+               : ds->bts_absolute_maximum + BTS_RECORD_SIZE;
+}
+
+static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
+{
+       unsigned long index = head - phys->offset;
+
+       memset(page_address(phys->page) + index, 0, phys->size - index);
+}
+
+static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
+{
+       if (buf->snapshot)
+               return false;
+
+       if (local_read(&buf->data_size) >= bts->handle.size ||
+           bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
+               return true;
+
+       return false;
+}
+
+static void bts_update(struct bts_ctx *bts)
+{
+       int cpu = raw_smp_processor_id();
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       struct bts_buffer *buf = perf_get_aux(&bts->handle);
+       unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
+
+       if (!buf)
+               return;
+
+       head = index + bts_buffer_offset(buf, buf->cur_buf);
+       old = local_xchg(&buf->head, head);
+
+       if (!buf->snapshot) {
+               if (old == head)
+                       return;
+
+               if (ds->bts_index >= ds->bts_absolute_maximum)
+                       local_inc(&buf->lost);
+
+               /*
+                * old and head are always in the same physical buffer, so we
+                * can subtract them to get the data size.
+                */
+               local_add(head - old, &buf->data_size);
+       } else {
+               local_set(&buf->data_size, head);
+       }
+}
+
+static void __bts_event_start(struct perf_event *event)
+{
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+       struct bts_buffer *buf = perf_get_aux(&bts->handle);
+       u64 config = 0;
+
+       if (!buf || bts_buffer_is_full(buf, bts))
+               return;
+
+       event->hw.itrace_started = 1;
+       event->hw.state = 0;
+
+       if (!buf->snapshot)
+               config |= ARCH_PERFMON_EVENTSEL_INT;
+       if (!event->attr.exclude_kernel)
+               config |= ARCH_PERFMON_EVENTSEL_OS;
+       if (!event->attr.exclude_user)
+               config |= ARCH_PERFMON_EVENTSEL_USR;
+
+       bts_config_buffer(buf);
+
+       /*
+        * local barrier to make sure that ds configuration made it
+        * before we enable BTS
+        */
+       wmb();
+
+       intel_pmu_enable_bts(config);
+}
+
+static void bts_event_start(struct perf_event *event, int flags)
+{
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+       __bts_event_start(event);
+
+       /* PMI handler: this counter is running and likely generating PMIs */
+       ACCESS_ONCE(bts->started) = 1;
+}
+
+static void __bts_event_stop(struct perf_event *event)
+{
+       /*
+        * No extra synchronization is mandated by the documentation to have
+        * BTS data stores globally visible.
+        */
+       intel_pmu_disable_bts();
+
+       if (event->hw.state & PERF_HES_STOPPED)
+               return;
+
+       ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
+}
+
+static void bts_event_stop(struct perf_event *event, int flags)
+{
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+       /* PMI handler: don't restart this counter */
+       ACCESS_ONCE(bts->started) = 0;
+
+       __bts_event_stop(event);
+
+       if (flags & PERF_EF_UPDATE)
+               bts_update(bts);
+}
+
+void intel_bts_enable_local(void)
+{
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+       if (bts->handle.event && bts->started)
+               __bts_event_start(bts->handle.event);
+}
+
+void intel_bts_disable_local(void)
+{
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+       if (bts->handle.event)
+               __bts_event_stop(bts->handle.event);
+}
+
+static int
+bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
+{
+       unsigned long head, space, next_space, pad, gap, skip, wakeup;
+       unsigned int next_buf;
+       struct bts_phys *phys, *next_phys;
+       int ret;
+
+       if (buf->snapshot)
+               return 0;
+
+       head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
+       if (WARN_ON_ONCE(head != local_read(&buf->head)))
+               return -EINVAL;
+
+       phys = &buf->buf[buf->cur_buf];
+       space = phys->offset + phys->displacement + phys->size - head;
+       pad = space;
+       if (space > handle->size) {
+               space = handle->size;
+               space -= space % BTS_RECORD_SIZE;
+       }
+       if (space <= BTS_SAFETY_MARGIN) {
+               /* See if next phys buffer has more space */
+               next_buf = buf->cur_buf + 1;
+               if (next_buf >= buf->nr_bufs)
+                       next_buf = 0;
+               next_phys = &buf->buf[next_buf];
+               gap = buf_size(phys->page) - phys->displacement - phys->size +
+                     next_phys->displacement;
+               skip = pad + gap;
+               if (handle->size >= skip) {
+                       next_space = next_phys->size;
+                       if (next_space + skip > handle->size) {
+                               next_space = handle->size - skip;
+                               next_space -= next_space % BTS_RECORD_SIZE;
+                       }
+                       if (next_space > space || !space) {
+                               if (pad)
+                                       bts_buffer_pad_out(phys, head);
+                               ret = perf_aux_output_skip(handle, skip);
+                               if (ret)
+                                       return ret;
+                               /* Advance to next phys buffer */
+                               phys = next_phys;
+                               space = next_space;
+                               head = phys->offset + phys->displacement;
+                               /*
+                                * After this, cur_buf and head won't match ds
+                                * anymore, so we must not be racing with
+                                * bts_update().
+                                */
+                               buf->cur_buf = next_buf;
+                               local_set(&buf->head, head);
+                       }
+               }
+       }
+
+       /* Don't go far beyond wakeup watermark */
+       wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
+                handle->head;
+       if (space > wakeup) {
+               space = wakeup;
+               space -= space % BTS_RECORD_SIZE;
+       }
+
+       buf->end = head + space;
+
+       /*
+        * If we have no space, the lost notification would have been sent when
+        * we hit absolute_maximum - see bts_update()
+        */
+       if (!space)
+               return -ENOSPC;
+
+       return 0;
+}
+
+int intel_bts_interrupt(void)
+{
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+       struct perf_event *event = bts->handle.event;
+       struct bts_buffer *buf;
+       s64 old_head;
+       int err;
+
+       if (!event || !bts->started)
+               return 0;
+
+       buf = perf_get_aux(&bts->handle);
+       /*
+        * Skip snapshot counters: they don't use the interrupt, but
+        * there's no other way of telling, because the pointer will
+        * keep moving
+        */
+       if (!buf || buf->snapshot)
+               return 0;
+
+       old_head = local_read(&buf->head);
+       bts_update(bts);
+
+       /* no new data */
+       if (old_head == local_read(&buf->head))
+               return 0;
+
+       perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+                           !!local_xchg(&buf->lost, 0));
+
+       buf = perf_aux_output_begin(&bts->handle, event);
+       if (!buf)
+               return 1;
+
+       err = bts_buffer_reset(buf, &bts->handle);
+       if (err)
+               perf_aux_output_end(&bts->handle, 0, false);
+
+       return 1;
+}
+
+static void bts_event_del(struct perf_event *event, int mode)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+       struct bts_buffer *buf = perf_get_aux(&bts->handle);
+
+       bts_event_stop(event, PERF_EF_UPDATE);
+
+       if (buf) {
+               if (buf->snapshot)
+                       bts->handle.head =
+                               local_xchg(&buf->data_size,
+                                          buf->nr_pages << PAGE_SHIFT);
+               perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+                                   !!local_xchg(&buf->lost, 0));
+       }
+
+       cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
+       cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
+       cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
+       cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
+}
+
+static int bts_event_add(struct perf_event *event, int mode)
+{
+       struct bts_buffer *buf;
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int ret = -EBUSY;
+
+       event->hw.state = PERF_HES_STOPPED;
+
+       if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+               return -EBUSY;
+
+       if (bts->handle.event)
+               return -EBUSY;
+
+       buf = perf_aux_output_begin(&bts->handle, event);
+       if (!buf)
+               return -EINVAL;
+
+       ret = bts_buffer_reset(buf, &bts->handle);
+       if (ret) {
+               perf_aux_output_end(&bts->handle, 0, false);
+               return ret;
+       }
+
+       bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
+       bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
+       bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
+
+       if (mode & PERF_EF_START) {
+               bts_event_start(event, 0);
+               if (hwc->state & PERF_HES_STOPPED) {
+                       bts_event_del(event, 0);
+                       return -EBUSY;
+               }
+       }
+
+       return 0;
+}
+
+static void bts_event_destroy(struct perf_event *event)
+{
+       x86_release_hardware();
+       x86_del_exclusive(x86_lbr_exclusive_bts);
+}
+
+static int bts_event_init(struct perf_event *event)
+{
+       int ret;
+
+       if (event->attr.type != bts_pmu.type)
+               return -ENOENT;
+
+       if (x86_add_exclusive(x86_lbr_exclusive_bts))
+               return -EBUSY;
+
+       /*
+        * BTS leaks kernel addresses even when CPL0 tracing is
+        * disabled, so disallow intel_bts driver for unprivileged
+        * users on paranoid systems since it provides trace data
+        * to the user in a zero-copy fashion.
+        *
+        * Note that the default paranoia setting permits unprivileged
+        * users to profile the kernel.
+        */
+       if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
+           !capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       ret = x86_reserve_hardware();
+       if (ret) {
+               x86_del_exclusive(x86_lbr_exclusive_bts);
+               return ret;
+       }
+
+       event->destroy = bts_event_destroy;
+
+       return 0;
+}
+
+static void bts_event_read(struct perf_event *event)
+{
+}
+
+static __init int bts_init(void)
+{
+       if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
+               return -ENODEV;
+
+       bts_pmu.capabilities    = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
+       bts_pmu.task_ctx_nr     = perf_sw_context;
+       bts_pmu.event_init      = bts_event_init;
+       bts_pmu.add             = bts_event_add;
+       bts_pmu.del             = bts_event_del;
+       bts_pmu.start           = bts_event_start;
+       bts_pmu.stop            = bts_event_stop;
+       bts_pmu.read            = bts_event_read;
+       bts_pmu.setup_aux       = bts_buffer_setup_aux;
+       bts_pmu.free_aux        = bts_buffer_free_aux;
+
+       return perf_pmu_register(&bts_pmu, "intel_bts", -1);
+}
+arch_initcall(bts_init);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
new file mode 100644 (file)
index 0000000..68fa55b
--- /dev/null
@@ -0,0 +1,3796 @@
+/*
+ * Per core/cpu state
+ *
+ * Used to coordinate shared registers between HT threads or
+ * among events on a single PMU.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/nmi.h>
+
+#include <asm/cpufeature.h>
+#include <asm/hardirq.h>
+#include <asm/apic.h>
+
+#include "../perf_event.h"
+
+/*
+ * Intel PerfMon, used on Core and later.
+ */
+static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
+{
+       [PERF_COUNT_HW_CPU_CYCLES]              = 0x003c,
+       [PERF_COUNT_HW_INSTRUCTIONS]            = 0x00c0,
+       [PERF_COUNT_HW_CACHE_REFERENCES]        = 0x4f2e,
+       [PERF_COUNT_HW_CACHE_MISSES]            = 0x412e,
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x00c4,
+       [PERF_COUNT_HW_BRANCH_MISSES]           = 0x00c5,
+       [PERF_COUNT_HW_BUS_CYCLES]              = 0x013c,
+       [PERF_COUNT_HW_REF_CPU_CYCLES]          = 0x0300, /* pseudo-encoding */
+};
+
+static struct event_constraint intel_core_event_constraints[] __read_mostly =
+{
+       INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+       INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+       INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+       INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+       INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+       INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_core2_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+       INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+       INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+       INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+       INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+       INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
+       INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+       INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
+       INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
+       INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
+       INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
+       INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
+       INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+       INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
+       INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
+       INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+       INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+       EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
+{
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
+       EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+       INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
+       INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+       INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_snb_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
+       INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+       INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+       INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
+       INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+
+       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
+       INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
+       INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
+       INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+
+       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+       EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
+{
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
+       EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_v1_event_constraints[] __read_mostly =
+{
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_gen_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_slm_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_skl_event_constraints[] = {
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
+       INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2),    /* INST_RETIRED.PREC_DIST */
+       EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
+       INTEL_UEVENT_EXTRA_REG(0x01b7,
+                              MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x02b7,
+                              MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+       EVENT_EXTRA_END
+};
+
+static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+       EVENT_EXTRA_END
+};
+
+static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+       EVENT_EXTRA_END
+};
+
+static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+       /*
+        * Note the low 8 bits eventsel code is not a continuous field, containing
+        * some #GPing bits. These are masked out.
+        */
+       INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+       EVENT_EXTRA_END
+};
+
+EVENT_ATTR_STR(mem-loads,      mem_ld_nhm,     "event=0x0b,umask=0x10,ldlat=3");
+EVENT_ATTR_STR(mem-loads,      mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores,     mem_st_snb,     "event=0xcd,umask=0x2");
+
+struct attribute *nhm_events_attrs[] = {
+       EVENT_PTR(mem_ld_nhm),
+       NULL,
+};
+
+struct attribute *snb_events_attrs[] = {
+       EVENT_PTR(mem_ld_snb),
+       EVENT_PTR(mem_st_snb),
+       NULL,
+};
+
+static struct event_constraint intel_hsw_event_constraints[] = {
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+       INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+       /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
+       /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
+       /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
+       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
+
+       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_bdw_event_constraints[] = {
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
+       INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
+       INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4),        /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
+       EVENT_CONSTRAINT_END
+};
+
+static u64 intel_pmu_event_map(int hw_event)
+{
+       return intel_perfmon_event_map[hw_event];
+}
+
+/*
+ * Notes on the events:
+ * - data reads do not include code reads (comparable to earlier tables)
+ * - data counts include speculative execution (except L1 write, dtlb, bpu)
+ * - remote node access includes remote memory, remote cache, remote mmio.
+ * - prefetches are not included in the counts.
+ * - icache miss does not include decoded icache
+ */
+
+#define SKL_DEMAND_DATA_RD             BIT_ULL(0)
+#define SKL_DEMAND_RFO                 BIT_ULL(1)
+#define SKL_ANY_RESPONSE               BIT_ULL(16)
+#define SKL_SUPPLIER_NONE              BIT_ULL(17)
+#define SKL_L3_MISS_LOCAL_DRAM         BIT_ULL(26)
+#define SKL_L3_MISS_REMOTE_HOP0_DRAM   BIT_ULL(27)
+#define SKL_L3_MISS_REMOTE_HOP1_DRAM   BIT_ULL(28)
+#define SKL_L3_MISS_REMOTE_HOP2P_DRAM  BIT_ULL(29)
+#define SKL_L3_MISS                    (SKL_L3_MISS_LOCAL_DRAM| \
+                                        SKL_L3_MISS_REMOTE_HOP0_DRAM| \
+                                        SKL_L3_MISS_REMOTE_HOP1_DRAM| \
+                                        SKL_L3_MISS_REMOTE_HOP2P_DRAM)
+#define SKL_SPL_HIT                    BIT_ULL(30)
+#define SKL_SNOOP_NONE                 BIT_ULL(31)
+#define SKL_SNOOP_NOT_NEEDED           BIT_ULL(32)
+#define SKL_SNOOP_MISS                 BIT_ULL(33)
+#define SKL_SNOOP_HIT_NO_FWD           BIT_ULL(34)
+#define SKL_SNOOP_HIT_WITH_FWD         BIT_ULL(35)
+#define SKL_SNOOP_HITM                 BIT_ULL(36)
+#define SKL_SNOOP_NON_DRAM             BIT_ULL(37)
+#define SKL_ANY_SNOOP                  (SKL_SPL_HIT|SKL_SNOOP_NONE| \
+                                        SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
+                                        SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
+                                        SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
+#define SKL_DEMAND_READ                        SKL_DEMAND_DATA_RD
+#define SKL_SNOOP_DRAM                 (SKL_SNOOP_NONE| \
+                                        SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
+                                        SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
+                                        SKL_SNOOP_HITM|SKL_SPL_HIT)
+#define SKL_DEMAND_WRITE               SKL_DEMAND_RFO
+#define SKL_LLC_ACCESS                 SKL_ANY_RESPONSE
+#define SKL_L3_MISS_REMOTE             (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
+                                        SKL_L3_MISS_REMOTE_HOP1_DRAM| \
+                                        SKL_L3_MISS_REMOTE_HOP2P_DRAM)
+
+static __initconst const u64 skl_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
+               [ C(RESULT_MISS)   ] = 0x151,   /* L1D.REPLACEMENT */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x283,   /* ICACHE_64B.MISS */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
+               [ C(RESULT_MISS)   ] = 0x608,   /* DTLB_LOAD_MISSES.WALK_COMPLETED */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
+               [ C(RESULT_MISS)   ] = 0x649,   /* DTLB_STORE_MISSES.WALK_COMPLETED */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x2085,  /* ITLB_MISSES.STLB_HIT */
+               [ C(RESULT_MISS)   ] = 0xe85,   /* ITLB_MISSES.WALK_COMPLETED */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0xc4,    /* BR_INST_RETIRED.ALL_BRANCHES */
+               [ C(RESULT_MISS)   ] = 0xc5,    /* BR_MISP_RETIRED.ALL_BRANCHES */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+};
+
+static __initconst const u64 skl_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
+                                      SKL_LLC_ACCESS|SKL_ANY_SNOOP,
+               [ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
+                                      SKL_L3_MISS|SKL_ANY_SNOOP|
+                                      SKL_SUPPLIER_NONE,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
+                                      SKL_LLC_ACCESS|SKL_ANY_SNOOP,
+               [ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
+                                      SKL_L3_MISS|SKL_ANY_SNOOP|
+                                      SKL_SUPPLIER_NONE,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
+                                      SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
+               [ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
+                                      SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
+                                      SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
+               [ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
+                                      SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+};
+
+#define SNB_DMND_DATA_RD       (1ULL << 0)
+#define SNB_DMND_RFO           (1ULL << 1)
+#define SNB_DMND_IFETCH                (1ULL << 2)
+#define SNB_DMND_WB            (1ULL << 3)
+#define SNB_PF_DATA_RD         (1ULL << 4)
+#define SNB_PF_RFO             (1ULL << 5)
+#define SNB_PF_IFETCH          (1ULL << 6)
+#define SNB_LLC_DATA_RD                (1ULL << 7)
+#define SNB_LLC_RFO            (1ULL << 8)
+#define SNB_LLC_IFETCH         (1ULL << 9)
+#define SNB_BUS_LOCKS          (1ULL << 10)
+#define SNB_STRM_ST            (1ULL << 11)
+#define SNB_OTHER              (1ULL << 15)
+#define SNB_RESP_ANY           (1ULL << 16)
+#define SNB_NO_SUPP            (1ULL << 17)
+#define SNB_LLC_HITM           (1ULL << 18)
+#define SNB_LLC_HITE           (1ULL << 19)
+#define SNB_LLC_HITS           (1ULL << 20)
+#define SNB_LLC_HITF           (1ULL << 21)
+#define SNB_LOCAL              (1ULL << 22)
+#define SNB_REMOTE             (0xffULL << 23)
+#define SNB_SNP_NONE           (1ULL << 31)
+#define SNB_SNP_NOT_NEEDED     (1ULL << 32)
+#define SNB_SNP_MISS           (1ULL << 33)
+#define SNB_NO_FWD             (1ULL << 34)
+#define SNB_SNP_FWD            (1ULL << 35)
+#define SNB_HITM               (1ULL << 36)
+#define SNB_NON_DRAM           (1ULL << 37)
+
+#define SNB_DMND_READ          (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
+#define SNB_DMND_WRITE         (SNB_DMND_RFO|SNB_LLC_RFO)
+#define SNB_DMND_PREFETCH      (SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SNB_SNP_ANY            (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
+                                SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
+                                SNB_HITM)
+
+#define SNB_DRAM_ANY           (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
+#define SNB_DRAM_REMOTE                (SNB_REMOTE|SNB_SNP_ANY)
+
+#define SNB_L3_ACCESS          SNB_RESP_ANY
+#define SNB_L3_MISS            (SNB_DRAM_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 snb_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_L3_MISS,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_L3_MISS,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
+               [ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
+               [ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
+               [ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
+       },
+ },
+};
+
+static __initconst const u64 snb_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS        */
+               [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPLACEMENT              */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES       */
+               [ C(RESULT_MISS)   ] = 0x0851, /* L1D.ALL_M_REPLACEMENT        */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x024e, /* HW_PRE_REQ.DL1_MISS          */
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0280, /* ICACHE.MISSES */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_WRITE) ] = {
+               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
+               [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
+               [ C(RESULT_MISS)   ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT         */
+               [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK    */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+               [ C(RESULT_MISS)   ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+
+};
+
+/*
+ * Notes on the events:
+ * - data reads do not include code reads (comparable to earlier tables)
+ * - data counts include speculative execution (except L1 write, dtlb, bpu)
+ * - remote node access includes remote memory, remote cache, remote mmio.
+ * - prefetches are not included in the counts because they are not
+ *   reliably counted.
+ */
+
+#define HSW_DEMAND_DATA_RD             BIT_ULL(0)
+#define HSW_DEMAND_RFO                 BIT_ULL(1)
+#define HSW_ANY_RESPONSE               BIT_ULL(16)
+#define HSW_SUPPLIER_NONE              BIT_ULL(17)
+#define HSW_L3_MISS_LOCAL_DRAM         BIT_ULL(22)
+#define HSW_L3_MISS_REMOTE_HOP0                BIT_ULL(27)
+#define HSW_L3_MISS_REMOTE_HOP1                BIT_ULL(28)
+#define HSW_L3_MISS_REMOTE_HOP2P       BIT_ULL(29)
+#define HSW_L3_MISS                    (HSW_L3_MISS_LOCAL_DRAM| \
+                                        HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
+                                        HSW_L3_MISS_REMOTE_HOP2P)
+#define HSW_SNOOP_NONE                 BIT_ULL(31)
+#define HSW_SNOOP_NOT_NEEDED           BIT_ULL(32)
+#define HSW_SNOOP_MISS                 BIT_ULL(33)
+#define HSW_SNOOP_HIT_NO_FWD           BIT_ULL(34)
+#define HSW_SNOOP_HIT_WITH_FWD         BIT_ULL(35)
+#define HSW_SNOOP_HITM                 BIT_ULL(36)
+#define HSW_SNOOP_NON_DRAM             BIT_ULL(37)
+#define HSW_ANY_SNOOP                  (HSW_SNOOP_NONE| \
+                                        HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
+                                        HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
+                                        HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
+#define HSW_SNOOP_DRAM                 (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
+#define HSW_DEMAND_READ                        HSW_DEMAND_DATA_RD
+#define HSW_DEMAND_WRITE               HSW_DEMAND_RFO
+#define HSW_L3_MISS_REMOTE             (HSW_L3_MISS_REMOTE_HOP0|\
+                                        HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
+#define HSW_LLC_ACCESS                 HSW_ANY_RESPONSE
+
+#define BDW_L3_MISS_LOCAL              BIT(26)
+#define BDW_L3_MISS                    (BDW_L3_MISS_LOCAL| \
+                                        HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
+                                        HSW_L3_MISS_REMOTE_HOP2P)
+
+
+static __initconst const u64 hsw_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_UOPS_RETIRED.ALL_LOADS */
+               [ C(RESULT_MISS)   ] = 0x151,   /* L1D.REPLACEMENT */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_UOPS_RETIRED.ALL_STORES */
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x280,   /* ICACHE.MISSES */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_UOPS_RETIRED.ALL_LOADS */
+               [ C(RESULT_MISS)   ] = 0x108,   /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_UOPS_RETIRED.ALL_STORES */
+               [ C(RESULT_MISS)   ] = 0x149,   /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x6085,  /* ITLB_MISSES.STLB_HIT */
+               [ C(RESULT_MISS)   ] = 0x185,   /* ITLB_MISSES.MISS_CAUSES_A_WALK */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0xc4,    /* BR_INST_RETIRED.ALL_BRANCHES */
+               [ C(RESULT_MISS)   ] = 0xc5,    /* BR_MISP_RETIRED.ALL_BRANCHES */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+};
+
+static __initconst const u64 hsw_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
+                                      HSW_LLC_ACCESS,
+               [ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
+                                      HSW_L3_MISS|HSW_ANY_SNOOP,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
+                                      HSW_LLC_ACCESS,
+               [ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
+                                      HSW_L3_MISS|HSW_ANY_SNOOP,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
+                                      HSW_L3_MISS_LOCAL_DRAM|
+                                      HSW_SNOOP_DRAM,
+               [ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
+                                      HSW_L3_MISS_REMOTE|
+                                      HSW_SNOOP_DRAM,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
+                                      HSW_L3_MISS_LOCAL_DRAM|
+                                      HSW_SNOOP_DRAM,
+               [ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
+                                      HSW_L3_MISS_REMOTE|
+                                      HSW_SNOOP_DRAM,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+};
+
+static __initconst const u64 westmere_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+               [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+               [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+               [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+               [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       /*
+        * Use RFO, not WRITEBACK, because a write miss would typically occur
+        * on RFO.
+        */
+       [ C(OP_WRITE) ] = {
+               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+               [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+               [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+               [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+               [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+};
+
+/*
+ * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
+ * See IA32 SDM Vol 3B 30.6.1.3
+ */
+
+#define NHM_DMND_DATA_RD       (1 << 0)
+#define NHM_DMND_RFO           (1 << 1)
+#define NHM_DMND_IFETCH                (1 << 2)
+#define NHM_DMND_WB            (1 << 3)
+#define NHM_PF_DATA_RD         (1 << 4)
+#define NHM_PF_DATA_RFO                (1 << 5)
+#define NHM_PF_IFETCH          (1 << 6)
+#define NHM_OFFCORE_OTHER      (1 << 7)
+#define NHM_UNCORE_HIT         (1 << 8)
+#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
+#define NHM_OTHER_CORE_HITM    (1 << 10)
+                               /* reserved */
+#define NHM_REMOTE_CACHE_FWD   (1 << 12)
+#define NHM_REMOTE_DRAM                (1 << 13)
+#define NHM_LOCAL_DRAM         (1 << 14)
+#define NHM_NON_DRAM           (1 << 15)
+
+#define NHM_LOCAL              (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_REMOTE             (NHM_REMOTE_DRAM)
+
+#define NHM_DMND_READ          (NHM_DMND_DATA_RD)
+#define NHM_DMND_WRITE         (NHM_DMND_RFO|NHM_DMND_WB)
+#define NHM_DMND_PREFETCH      (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
+
+#define NHM_L3_HIT     (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
+#define NHM_L3_MISS    (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_L3_ACCESS  (NHM_L3_HIT|NHM_L3_MISS)
+
+static __initconst const u64 nehalem_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
+               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
+               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
+               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE,
+       },
+ },
+};
+
+static __initconst const u64 nehalem_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+               [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+               [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+               [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+               [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       /*
+        * Use RFO, not WRITEBACK, because a write miss would typically occur
+        * on RFO.
+        */
+       [ C(OP_WRITE) ] = {
+               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
+               [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
+               [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+               [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+               [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+};
+
+static __initconst const u64 core2_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
+               [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
+               [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
+               [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+               [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+               [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
+               [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
+               [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+               [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+               [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+static __initconst const u64 atom_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
+               [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+               [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+               [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
+               [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
+               [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+               [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+               [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+static struct extra_reg intel_slm_extra_regs[] __read_mostly =
+{
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
+       EVENT_EXTRA_END
+};
+
+#define SLM_DMND_READ          SNB_DMND_DATA_RD
+#define SLM_DMND_WRITE         SNB_DMND_RFO
+#define SLM_DMND_PREFETCH      (SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SLM_SNP_ANY            (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
+#define SLM_LLC_ACCESS         SNB_RESP_ANY
+#define SLM_LLC_MISS           (SLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 slm_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
+               [ C(RESULT_MISS)   ] = SLM_DMND_WRITE|SLM_LLC_MISS,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
+               [ C(RESULT_MISS)   ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
+       },
+ },
+};
+
+static __initconst const u64 slm_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0x0104, /* LD_DCU_MISS */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
+               [ C(RESULT_MISS)   ] = 0x0280, /* ICACGE.MISSES */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_WRITE) ] = {
+               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0x0804, /* LD_DTLB_MISS */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+               [ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+               [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+#define KNL_OT_L2_HITE         BIT_ULL(19) /* Other Tile L2 Hit */
+#define KNL_OT_L2_HITF         BIT_ULL(20) /* Other Tile L2 Hit */
+#define KNL_MCDRAM_LOCAL       BIT_ULL(21)
+#define KNL_MCDRAM_FAR         BIT_ULL(22)
+#define KNL_DDR_LOCAL          BIT_ULL(23)
+#define KNL_DDR_FAR            BIT_ULL(24)
+#define KNL_DRAM_ANY           (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \
+                                   KNL_DDR_LOCAL | KNL_DDR_FAR)
+#define KNL_L2_READ            SLM_DMND_READ
+#define KNL_L2_WRITE           SLM_DMND_WRITE
+#define KNL_L2_PREFETCH                SLM_DMND_PREFETCH
+#define KNL_L2_ACCESS          SLM_LLC_ACCESS
+#define KNL_L2_MISS            (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \
+                                  KNL_DRAM_ANY | SNB_SNP_ANY | \
+                                                 SNB_NON_DRAM)
+
+static __initconst const u64 knl_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+       [C(LL)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS,
+                       [C(RESULT_MISS)]   = 0,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS,
+                       [C(RESULT_MISS)]   = KNL_L2_WRITE | KNL_L2_MISS,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS,
+                       [C(RESULT_MISS)]   = KNL_L2_PREFETCH | KNL_L2_MISS,
+               },
+       },
+};
+
+/*
+ * Used from PMIs where the LBRs are already disabled.
+ *
+ * This function could be called consecutively. It is required to remain in
+ * disabled state if called consecutively.
+ *
+ * During consecutive calls, the same disable value will be written to related
+ * registers, so the PMU state remains unchanged. hw.state in
+ * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive
+ * calls.
+ */
+static void __intel_pmu_disable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+       if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+               intel_pmu_disable_bts();
+       else
+               intel_bts_disable_local();
+
+       intel_pmu_pebs_disable_all();
+}
+
+static void intel_pmu_disable_all(void)
+{
+       __intel_pmu_disable_all();
+       intel_pmu_lbr_disable_all();
+}
+
+static void __intel_pmu_enable_all(int added, bool pmi)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       intel_pmu_pebs_enable_all();
+       intel_pmu_lbr_enable_all(pmi);
+       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
+                       x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
+
+       if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+               struct perf_event *event =
+                       cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
+
+               if (WARN_ON_ONCE(!event))
+                       return;
+
+               intel_pmu_enable_bts(event->hw.config);
+       } else
+               intel_bts_enable_local();
+}
+
+static void intel_pmu_enable_all(int added)
+{
+       __intel_pmu_enable_all(added, false);
+}
+
+/*
+ * Workaround for:
+ *   Intel Errata AAK100 (model 26)
+ *   Intel Errata AAP53  (model 30)
+ *   Intel Errata BD53   (model 44)
+ *
+ * The official story:
+ *   These chips need to be 'reset' when adding counters by programming the
+ *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
+ *   in sequence on the same PMC or on different PMCs.
+ *
+ * In practise it appears some of these events do in fact count, and
+ * we need to programm all 4 events.
+ */
+static void intel_pmu_nhm_workaround(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       static const unsigned long nhm_magic[4] = {
+               0x4300B5,
+               0x4300D2,
+               0x4300B1,
+               0x4300B1
+       };
+       struct perf_event *event;
+       int i;
+
+       /*
+        * The Errata requires below steps:
+        * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
+        * 2) Configure 4 PERFEVTSELx with the magic events and clear
+        *    the corresponding PMCx;
+        * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
+        * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
+        * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
+        */
+
+       /*
+        * The real steps we choose are a little different from above.
+        * A) To reduce MSR operations, we don't run step 1) as they
+        *    are already cleared before this function is called;
+        * B) Call x86_perf_event_update to save PMCx before configuring
+        *    PERFEVTSELx with magic number;
+        * C) With step 5), we do clear only when the PERFEVTSELx is
+        *    not used currently.
+        * D) Call x86_perf_event_set_period to restore PMCx;
+        */
+
+       /* We always operate 4 pairs of PERF Counters */
+       for (i = 0; i < 4; i++) {
+               event = cpuc->events[i];
+               if (event)
+                       x86_perf_event_update(event);
+       }
+
+       for (i = 0; i < 4; i++) {
+               wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
+               wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
+       }
+
+       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
+       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+
+       for (i = 0; i < 4; i++) {
+               event = cpuc->events[i];
+
+               if (event) {
+                       x86_perf_event_set_period(event);
+                       __x86_pmu_enable_event(&event->hw,
+                                       ARCH_PERFMON_EVENTSEL_ENABLE);
+               } else
+                       wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
+       }
+}
+
+static void intel_pmu_nhm_enable_all(int added)
+{
+       if (added)
+               intel_pmu_nhm_workaround();
+       intel_pmu_enable_all(added);
+}
+
+static inline u64 intel_pmu_get_status(void)
+{
+       u64 status;
+
+       rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+
+       return status;
+}
+
+static inline void intel_pmu_ack_status(u64 ack)
+{
+       wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+}
+
+static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
+{
+       int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+       u64 ctrl_val, mask;
+
+       mask = 0xfULL << (idx * 4);
+
+       rdmsrl(hwc->config_base, ctrl_val);
+       ctrl_val &= ~mask;
+       wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static inline bool event_is_checkpointed(struct perf_event *event)
+{
+       return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+}
+
+static void intel_pmu_disable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
+               intel_pmu_disable_bts();
+               intel_pmu_drain_bts_buffer();
+               return;
+       }
+
+       cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
+       cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+       cpuc->intel_cp_status &= ~(1ull << hwc->idx);
+
+       /*
+        * must disable before any actual event
+        * because any event may be combined with LBR
+        */
+       if (needs_branch_stack(event))
+               intel_pmu_lbr_disable(event);
+
+       if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+               intel_pmu_disable_fixed(hwc);
+               return;
+       }
+
+       x86_pmu_disable_event(event);
+
+       if (unlikely(event->attr.precise_ip))
+               intel_pmu_pebs_disable(event);
+}
+
+static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
+{
+       int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+       u64 ctrl_val, bits, mask;
+
+       /*
+        * Enable IRQ generation (0x8),
+        * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+        * if requested:
+        */
+       bits = 0x8ULL;
+       if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+               bits |= 0x2;
+       if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+               bits |= 0x1;
+
+       /*
+        * ANY bit is supported in v3 and up
+        */
+       if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
+               bits |= 0x4;
+
+       bits <<= (idx * 4);
+       mask = 0xfULL << (idx * 4);
+
+       rdmsrl(hwc->config_base, ctrl_val);
+       ctrl_val &= ~mask;
+       ctrl_val |= bits;
+       wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void intel_pmu_enable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
+               if (!__this_cpu_read(cpu_hw_events.enabled))
+                       return;
+
+               intel_pmu_enable_bts(hwc->config);
+               return;
+       }
+       /*
+        * must enabled before any actual event
+        * because any event may be combined with LBR
+        */
+       if (needs_branch_stack(event))
+               intel_pmu_lbr_enable(event);
+
+       if (event->attr.exclude_host)
+               cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
+       if (event->attr.exclude_guest)
+               cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
+
+       if (unlikely(event_is_checkpointed(event)))
+               cpuc->intel_cp_status |= (1ull << hwc->idx);
+
+       if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+               intel_pmu_enable_fixed(hwc);
+               return;
+       }
+
+       if (unlikely(event->attr.precise_ip))
+               intel_pmu_pebs_enable(event);
+
+       __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+}
+
+/*
+ * Save and restart an expired event. Called by NMI contexts,
+ * so it has to be careful about preempting normal event ops:
+ */
+int intel_pmu_save_and_restart(struct perf_event *event)
+{
+       x86_perf_event_update(event);
+       /*
+        * For a checkpointed counter always reset back to 0.  This
+        * avoids a situation where the counter overflows, aborts the
+        * transaction and is then set back to shortly before the
+        * overflow, and overflows and aborts again.
+        */
+       if (unlikely(event_is_checkpointed(event))) {
+               /* No race with NMIs because the counter should not be armed */
+               wrmsrl(event->hw.event_base, 0);
+               local64_set(&event->hw.prev_count, 0);
+       }
+       return x86_perf_event_set_period(event);
+}
+
+static void intel_pmu_reset(void)
+{
+       struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
+       unsigned long flags;
+       int idx;
+
+       if (!x86_pmu.num_counters)
+               return;
+
+       local_irq_save(flags);
+
+       pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
+               wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
+       }
+       for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
+               wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+
+       if (ds)
+               ds->bts_index = ds->bts_buffer_base;
+
+       /* Ack all overflows and disable fixed counters */
+       if (x86_pmu.version >= 2) {
+               intel_pmu_ack_status(intel_pmu_get_status());
+               wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+       }
+
+       /* Reset LBRs and LBR freezing */
+       if (x86_pmu.lbr_nr) {
+               update_debugctlmsr(get_debugctlmsr() &
+                       ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
+       }
+
+       local_irq_restore(flags);
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int intel_pmu_handle_irq(struct pt_regs *regs)
+{
+       struct perf_sample_data data;
+       struct cpu_hw_events *cpuc;
+       int bit, loops;
+       u64 status;
+       int handled;
+
+       cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       /*
+        * No known reason to not always do late ACK,
+        * but just in case do it opt-in.
+        */
+       if (!x86_pmu.late_ack)
+               apic_write(APIC_LVTPC, APIC_DM_NMI);
+       __intel_pmu_disable_all();
+       handled = intel_pmu_drain_bts_buffer();
+       handled += intel_bts_interrupt();
+       status = intel_pmu_get_status();
+       if (!status)
+               goto done;
+
+       loops = 0;
+again:
+       intel_pmu_lbr_read();
+       intel_pmu_ack_status(status);
+       if (++loops > 100) {
+               static bool warned = false;
+               if (!warned) {
+                       WARN(1, "perfevents: irq loop stuck!\n");
+                       perf_event_print_debug();
+                       warned = true;
+               }
+               intel_pmu_reset();
+               goto done;
+       }
+
+       inc_irq_stat(apic_perf_irqs);
+
+
+       /*
+        * Ignore a range of extra bits in status that do not indicate
+        * overflow by themselves.
+        */
+       status &= ~(GLOBAL_STATUS_COND_CHG |
+                   GLOBAL_STATUS_ASIF |
+                   GLOBAL_STATUS_LBRS_FROZEN);
+       if (!status)
+               goto done;
+
+       /*
+        * PEBS overflow sets bit 62 in the global status register
+        */
+       if (__test_and_clear_bit(62, (unsigned long *)&status)) {
+               handled++;
+               x86_pmu.drain_pebs(regs);
+               /*
+                * There are cases where, even though, the PEBS ovfl bit is set
+                * in GLOBAL_OVF_STATUS, the PEBS events may also have their
+                * overflow bits set for their counters. We must clear them
+                * here because they have been processed as exact samples in
+                * the drain_pebs() routine. They must not be processed again
+                * in the for_each_bit_set() loop for regular samples below.
+                */
+               status &= ~cpuc->pebs_enabled;
+               status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
+       }
+
+       /*
+        * Intel PT
+        */
+       if (__test_and_clear_bit(55, (unsigned long *)&status)) {
+               handled++;
+               intel_pt_interrupt();
+       }
+
+       /*
+        * Checkpointed counters can lead to 'spurious' PMIs because the
+        * rollback caused by the PMI will have cleared the overflow status
+        * bit. Therefore always force probe these counters.
+        */
+       status |= cpuc->intel_cp_status;
+
+       for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+               struct perf_event *event = cpuc->events[bit];
+
+               handled++;
+
+               if (!test_bit(bit, cpuc->active_mask))
+                       continue;
+
+               if (!intel_pmu_save_and_restart(event))
+                       continue;
+
+               perf_sample_data_init(&data, 0, event->hw.last_period);
+
+               if (has_branch_stack(event))
+                       data.br_stack = &cpuc->lbr_stack;
+
+               if (perf_event_overflow(event, &data, regs))
+                       x86_pmu_stop(event, 0);
+       }
+
+       /*
+        * Repeat if there is more work to be done:
+        */
+       status = intel_pmu_get_status();
+       if (status)
+               goto again;
+
+done:
+       /* Only restore PMU state when it's active. See x86_pmu_disable(). */
+       if (cpuc->enabled)
+               __intel_pmu_enable_all(0, true);
+
+       /*
+        * Only unmask the NMI after the overflow counters
+        * have been reset. This avoids spurious NMIs on
+        * Haswell CPUs.
+        */
+       if (x86_pmu.late_ack)
+               apic_write(APIC_LVTPC, APIC_DM_NMI);
+       return handled;
+}
+
+static struct event_constraint *
+intel_bts_constraints(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       unsigned int hw_event, bts_event;
+
+       if (event->attr.freq)
+               return NULL;
+
+       hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
+       bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+
+       if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
+               return &bts_constraint;
+
+       return NULL;
+}
+
+static int intel_alt_er(int idx, u64 config)
+{
+       int alt_idx = idx;
+
+       if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
+               return idx;
+
+       if (idx == EXTRA_REG_RSP_0)
+               alt_idx = EXTRA_REG_RSP_1;
+
+       if (idx == EXTRA_REG_RSP_1)
+               alt_idx = EXTRA_REG_RSP_0;
+
+       if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
+               return idx;
+
+       return alt_idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+       event->hw.extra_reg.idx = idx;
+
+       if (idx == EXTRA_REG_RSP_0) {
+               event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+               event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
+               event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+       } else if (idx == EXTRA_REG_RSP_1) {
+               event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+               event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
+               event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
+       }
+}
+
+/*
+ * manage allocation of shared extra msr for certain events
+ *
+ * sharing can be:
+ * per-cpu: to be shared between the various events on a single PMU
+ * per-core: per-cpu + shared by HT threads
+ */
+static struct event_constraint *
+__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
+                                  struct perf_event *event,
+                                  struct hw_perf_event_extra *reg)
+{
+       struct event_constraint *c = &emptyconstraint;
+       struct er_account *era;
+       unsigned long flags;
+       int idx = reg->idx;
+
+       /*
+        * reg->alloc can be set due to existing state, so for fake cpuc we
+        * need to ignore this, otherwise we might fail to allocate proper fake
+        * state for this extra reg constraint. Also see the comment below.
+        */
+       if (reg->alloc && !cpuc->is_fake)
+               return NULL; /* call x86_get_event_constraint() */
+
+again:
+       era = &cpuc->shared_regs->regs[idx];
+       /*
+        * we use spin_lock_irqsave() to avoid lockdep issues when
+        * passing a fake cpuc
+        */
+       raw_spin_lock_irqsave(&era->lock, flags);
+
+       if (!atomic_read(&era->ref) || era->config == reg->config) {
+
+               /*
+                * If its a fake cpuc -- as per validate_{group,event}() we
+                * shouldn't touch event state and we can avoid doing so
+                * since both will only call get_event_constraints() once
+                * on each event, this avoids the need for reg->alloc.
+                *
+                * Not doing the ER fixup will only result in era->reg being
+                * wrong, but since we won't actually try and program hardware
+                * this isn't a problem either.
+                */
+               if (!cpuc->is_fake) {
+                       if (idx != reg->idx)
+                               intel_fixup_er(event, idx);
+
+                       /*
+                        * x86_schedule_events() can call get_event_constraints()
+                        * multiple times on events in the case of incremental
+                        * scheduling(). reg->alloc ensures we only do the ER
+                        * allocation once.
+                        */
+                       reg->alloc = 1;
+               }
+
+               /* lock in msr value */
+               era->config = reg->config;
+               era->reg = reg->reg;
+
+               /* one more user */
+               atomic_inc(&era->ref);
+
+               /*
+                * need to call x86_get_event_constraint()
+                * to check if associated event has constraints
+                */
+               c = NULL;
+       } else {
+               idx = intel_alt_er(idx, reg->config);
+               if (idx != reg->idx) {
+                       raw_spin_unlock_irqrestore(&era->lock, flags);
+                       goto again;
+               }
+       }
+       raw_spin_unlock_irqrestore(&era->lock, flags);
+
+       return c;
+}
+
+static void
+__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
+                                  struct hw_perf_event_extra *reg)
+{
+       struct er_account *era;
+
+       /*
+        * Only put constraint if extra reg was actually allocated. Also takes
+        * care of event which do not use an extra shared reg.
+        *
+        * Also, if this is a fake cpuc we shouldn't touch any event state
+        * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+        * either since it'll be thrown out.
+        */
+       if (!reg->alloc || cpuc->is_fake)
+               return;
+
+       era = &cpuc->shared_regs->regs[reg->idx];
+
+       /* one fewer user */
+       atomic_dec(&era->ref);
+
+       /* allocate again next time */
+       reg->alloc = 0;
+}
+
+static struct event_constraint *
+intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
+                             struct perf_event *event)
+{
+       struct event_constraint *c = NULL, *d;
+       struct hw_perf_event_extra *xreg, *breg;
+
+       xreg = &event->hw.extra_reg;
+       if (xreg->idx != EXTRA_REG_NONE) {
+               c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
+               if (c == &emptyconstraint)
+                       return c;
+       }
+       breg = &event->hw.branch_reg;
+       if (breg->idx != EXTRA_REG_NONE) {
+               d = __intel_shared_reg_get_constraints(cpuc, event, breg);
+               if (d == &emptyconstraint) {
+                       __intel_shared_reg_put_constraints(cpuc, xreg);
+                       c = d;
+               }
+       }
+       return c;
+}
+
+struct event_constraint *
+x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                         struct perf_event *event)
+{
+       struct event_constraint *c;
+
+       if (x86_pmu.event_constraints) {
+               for_each_event_constraint(c, x86_pmu.event_constraints) {
+                       if ((event->hw.config & c->cmask) == c->code) {
+                               event->hw.flags |= c->flags;
+                               return c;
+                       }
+               }
+       }
+
+       return &unconstrained;
+}
+
+static struct event_constraint *
+__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                           struct perf_event *event)
+{
+       struct event_constraint *c;
+
+       c = intel_bts_constraints(event);
+       if (c)
+               return c;
+
+       c = intel_shared_regs_constraints(cpuc, event);
+       if (c)
+               return c;
+
+       c = intel_pebs_constraints(event);
+       if (c)
+               return c;
+
+       return x86_get_event_constraints(cpuc, idx, event);
+}
+
+static void
+intel_start_scheduling(struct cpu_hw_events *cpuc)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xl;
+       int tid = cpuc->excl_thread_id;
+
+       /*
+        * nothing needed if in group validation mode
+        */
+       if (cpuc->is_fake || !is_ht_workaround_enabled())
+               return;
+
+       /*
+        * no exclusion needed
+        */
+       if (WARN_ON_ONCE(!excl_cntrs))
+               return;
+
+       xl = &excl_cntrs->states[tid];
+
+       xl->sched_started = true;
+       /*
+        * lock shared state until we are done scheduling
+        * in stop_event_scheduling()
+        * makes scheduling appear as a transaction
+        */
+       raw_spin_lock(&excl_cntrs->lock);
+}
+
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct event_constraint *c = cpuc->event_constraint[idx];
+       struct intel_excl_states *xl;
+       int tid = cpuc->excl_thread_id;
+
+       if (cpuc->is_fake || !is_ht_workaround_enabled())
+               return;
+
+       if (WARN_ON_ONCE(!excl_cntrs))
+               return;
+
+       if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
+               return;
+
+       xl = &excl_cntrs->states[tid];
+
+       lockdep_assert_held(&excl_cntrs->lock);
+
+       if (c->flags & PERF_X86_EVENT_EXCL)
+               xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
+       else
+               xl->state[cntr] = INTEL_EXCL_SHARED;
+}
+
+static void
+intel_stop_scheduling(struct cpu_hw_events *cpuc)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xl;
+       int tid = cpuc->excl_thread_id;
+
+       /*
+        * nothing needed if in group validation mode
+        */
+       if (cpuc->is_fake || !is_ht_workaround_enabled())
+               return;
+       /*
+        * no exclusion needed
+        */
+       if (WARN_ON_ONCE(!excl_cntrs))
+               return;
+
+       xl = &excl_cntrs->states[tid];
+
+       xl->sched_started = false;
+       /*
+        * release shared state lock (acquired in intel_start_scheduling())
+        */
+       raw_spin_unlock(&excl_cntrs->lock);
+}
+
+static struct event_constraint *
+intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+                          int idx, struct event_constraint *c)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xlo;
+       int tid = cpuc->excl_thread_id;
+       int is_excl, i;
+
+       /*
+        * validating a group does not require
+        * enforcing cross-thread  exclusion
+        */
+       if (cpuc->is_fake || !is_ht_workaround_enabled())
+               return c;
+
+       /*
+        * no exclusion needed
+        */
+       if (WARN_ON_ONCE(!excl_cntrs))
+               return c;
+
+       /*
+        * because we modify the constraint, we need
+        * to make a copy. Static constraints come
+        * from static const tables.
+        *
+        * only needed when constraint has not yet
+        * been cloned (marked dynamic)
+        */
+       if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+               struct event_constraint *cx;
+
+               /*
+                * grab pre-allocated constraint entry
+                */
+               cx = &cpuc->constraint_list[idx];
+
+               /*
+                * initialize dynamic constraint
+                * with static constraint
+                */
+               *cx = *c;
+
+               /*
+                * mark constraint as dynamic, so we
+                * can free it later on
+                */
+               cx->flags |= PERF_X86_EVENT_DYNAMIC;
+               c = cx;
+       }
+
+       /*
+        * From here on, the constraint is dynamic.
+        * Either it was just allocated above, or it
+        * was allocated during a earlier invocation
+        * of this function
+        */
+
+       /*
+        * state of sibling HT
+        */
+       xlo = &excl_cntrs->states[tid ^ 1];
+
+       /*
+        * event requires exclusive counter access
+        * across HT threads
+        */
+       is_excl = c->flags & PERF_X86_EVENT_EXCL;
+       if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
+               event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
+               if (!cpuc->n_excl++)
+                       WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
+       }
+
+       /*
+        * Modify static constraint with current dynamic
+        * state of thread
+        *
+        * EXCLUSIVE: sibling counter measuring exclusive event
+        * SHARED   : sibling counter measuring non-exclusive event
+        * UNUSED   : sibling counter unused
+        */
+       for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
+               /*
+                * exclusive event in sibling counter
+                * our corresponding counter cannot be used
+                * regardless of our event
+                */
+               if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+                       __clear_bit(i, c->idxmsk);
+               /*
+                * if measuring an exclusive event, sibling
+                * measuring non-exclusive, then counter cannot
+                * be used
+                */
+               if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+                       __clear_bit(i, c->idxmsk);
+       }
+
+       /*
+        * recompute actual bit weight for scheduling algorithm
+        */
+       c->weight = hweight64(c->idxmsk64);
+
+       /*
+        * if we return an empty mask, then switch
+        * back to static empty constraint to avoid
+        * the cost of freeing later on
+        */
+       if (c->weight == 0)
+               c = &emptyconstraint;
+
+       return c;
+}
+
+static struct event_constraint *
+intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                           struct perf_event *event)
+{
+       struct event_constraint *c1 = NULL;
+       struct event_constraint *c2;
+
+       if (idx >= 0) /* fake does < 0 */
+               c1 = cpuc->event_constraint[idx];
+
+       /*
+        * first time only
+        * - static constraint: no change across incremental scheduling calls
+        * - dynamic constraint: handled by intel_get_excl_constraints()
+        */
+       c2 = __intel_get_event_constraints(cpuc, idx, event);
+       if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
+               bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
+               c1->weight = c2->weight;
+               c2 = c1;
+       }
+
+       if (cpuc->excl_cntrs)
+               return intel_get_excl_constraints(cpuc, event, idx, c2);
+
+       return c2;
+}
+
+static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
+               struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       int tid = cpuc->excl_thread_id;
+       struct intel_excl_states *xl;
+
+       /*
+        * nothing needed if in group validation mode
+        */
+       if (cpuc->is_fake)
+               return;
+
+       if (WARN_ON_ONCE(!excl_cntrs))
+               return;
+
+       if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
+               hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
+               if (!--cpuc->n_excl)
+                       WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
+       }
+
+       /*
+        * If event was actually assigned, then mark the counter state as
+        * unused now.
+        */
+       if (hwc->idx >= 0) {
+               xl = &excl_cntrs->states[tid];
+
+               /*
+                * put_constraint may be called from x86_schedule_events()
+                * which already has the lock held so here make locking
+                * conditional.
+                */
+               if (!xl->sched_started)
+                       raw_spin_lock(&excl_cntrs->lock);
+
+               xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
+
+               if (!xl->sched_started)
+                       raw_spin_unlock(&excl_cntrs->lock);
+       }
+}
+
+static void
+intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
+                                       struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg;
+
+       reg = &event->hw.extra_reg;
+       if (reg->idx != EXTRA_REG_NONE)
+               __intel_shared_reg_put_constraints(cpuc, reg);
+
+       reg = &event->hw.branch_reg;
+       if (reg->idx != EXTRA_REG_NONE)
+               __intel_shared_reg_put_constraints(cpuc, reg);
+}
+
+static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
+                                       struct perf_event *event)
+{
+       intel_put_shared_regs_event_constraints(cpuc, event);
+
+       /*
+        * is PMU has exclusive counter restrictions, then
+        * all events are subject to and must call the
+        * put_excl_constraints() routine
+        */
+       if (cpuc->excl_cntrs)
+               intel_put_excl_constraints(cpuc, event);
+}
+
+static void intel_pebs_aliases_core2(struct perf_event *event)
+{
+       if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+               /*
+                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+                * (0x003c) so that we can use it with PEBS.
+                *
+                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+                * PEBS capable. However we can use INST_RETIRED.ANY_P
+                * (0x00c0), which is a PEBS capable event, to get the same
+                * count.
+                *
+                * INST_RETIRED.ANY_P counts the number of cycles that retires
+                * CNTMASK instructions. By setting CNTMASK to a value (16)
+                * larger than the maximum number of instructions that can be
+                * retired per cycle (4) and then inverting the condition, we
+                * count all cycles that retire 16 or less instructions, which
+                * is every cycle.
+                *
+                * Thereby we gain a PEBS capable cycle counter.
+                */
+               u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
+
+               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+               event->hw.config = alt_config;
+       }
+}
+
+static void intel_pebs_aliases_snb(struct perf_event *event)
+{
+       if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+               /*
+                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+                * (0x003c) so that we can use it with PEBS.
+                *
+                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+                * PEBS capable. However we can use UOPS_RETIRED.ALL
+                * (0x01c2), which is a PEBS capable event, to get the same
+                * count.
+                *
+                * UOPS_RETIRED.ALL counts the number of cycles that retires
+                * CNTMASK micro-ops. By setting CNTMASK to a value (16)
+                * larger than the maximum number of micro-ops that can be
+                * retired per cycle (4) and then inverting the condition, we
+                * count all cycles that retire 16 or less micro-ops, which
+                * is every cycle.
+                *
+                * Thereby we gain a PEBS capable cycle counter.
+                */
+               u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
+
+               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+               event->hw.config = alt_config;
+       }
+}
+
+static void intel_pebs_aliases_precdist(struct perf_event *event)
+{
+       if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+               /*
+                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+                * (0x003c) so that we can use it with PEBS.
+                *
+                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+                * PEBS capable. However we can use INST_RETIRED.PREC_DIST
+                * (0x01c0), which is a PEBS capable event, to get the same
+                * count.
+                *
+                * The PREC_DIST event has special support to minimize sample
+                * shadowing effects. One drawback is that it can be
+                * only programmed on counter 1, but that seems like an
+                * acceptable trade off.
+                */
+               u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16);
+
+               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+               event->hw.config = alt_config;
+       }
+}
+
+static void intel_pebs_aliases_ivb(struct perf_event *event)
+{
+       if (event->attr.precise_ip < 3)
+               return intel_pebs_aliases_snb(event);
+       return intel_pebs_aliases_precdist(event);
+}
+
+static void intel_pebs_aliases_skl(struct perf_event *event)
+{
+       if (event->attr.precise_ip < 3)
+               return intel_pebs_aliases_core2(event);
+       return intel_pebs_aliases_precdist(event);
+}
+
+static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+{
+       unsigned long flags = x86_pmu.free_running_flags;
+
+       if (event->attr.use_clockid)
+               flags &= ~PERF_SAMPLE_TIME;
+       return flags;
+}
+
+static int intel_pmu_hw_config(struct perf_event *event)
+{
+       int ret = x86_pmu_hw_config(event);
+
+       if (ret)
+               return ret;
+
+       if (event->attr.precise_ip) {
+               if (!event->attr.freq) {
+                       event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
+                       if (!(event->attr.sample_type &
+                             ~intel_pmu_free_running_flags(event)))
+                               event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
+               }
+               if (x86_pmu.pebs_aliases)
+                       x86_pmu.pebs_aliases(event);
+       }
+
+       if (needs_branch_stack(event)) {
+               ret = intel_pmu_setup_lbr_filter(event);
+               if (ret)
+                       return ret;
+
+               /*
+                * BTS is set up earlier in this path, so don't account twice
+                */
+               if (!intel_pmu_has_bts(event)) {
+                       /* disallow lbr if conflicting events are present */
+                       if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+                               return -EBUSY;
+
+                       event->destroy = hw_perf_lbr_event_destroy;
+               }
+       }
+
+       if (event->attr.type != PERF_TYPE_RAW)
+               return 0;
+
+       if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
+               return 0;
+
+       if (x86_pmu.version < 3)
+               return -EINVAL;
+
+       if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
+
+       return 0;
+}
+
+struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+{
+       if (x86_pmu.guest_get_msrs)
+               return x86_pmu.guest_get_msrs(nr);
+       *nr = 0;
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
+
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+
+       arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
+       arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
+       arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
+       /*
+        * If PMU counter has PEBS enabled it is not enough to disable counter
+        * on a guest entry since PEBS memory write can overshoot guest entry
+        * and corrupt guest memory. Disabling PEBS solves the problem.
+        */
+       arr[1].msr = MSR_IA32_PEBS_ENABLE;
+       arr[1].host = cpuc->pebs_enabled;
+       arr[1].guest = 0;
+
+       *nr = 2;
+       return arr;
+}
+
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+       int idx;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++)  {
+               struct perf_event *event = cpuc->events[idx];
+
+               arr[idx].msr = x86_pmu_config_addr(idx);
+               arr[idx].host = arr[idx].guest = 0;
+
+               if (!test_bit(idx, cpuc->active_mask))
+                       continue;
+
+               arr[idx].host = arr[idx].guest =
+                       event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
+
+               if (event->attr.exclude_host)
+                       arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+               else if (event->attr.exclude_guest)
+                       arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+       }
+
+       *nr = x86_pmu.num_counters;
+       return arr;
+}
+
+static void core_pmu_enable_event(struct perf_event *event)
+{
+       if (!event->attr.exclude_host)
+               x86_pmu_enable_event(event);
+}
+
+static void core_pmu_enable_all(int added)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
+
+               if (!test_bit(idx, cpuc->active_mask) ||
+                               cpuc->events[idx]->attr.exclude_host)
+                       continue;
+
+               __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+       }
+}
+
+static int hsw_hw_config(struct perf_event *event)
+{
+       int ret = intel_pmu_hw_config(event);
+
+       if (ret)
+               return ret;
+       if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
+               return 0;
+       event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
+
+       /*
+        * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
+        * PEBS or in ANY thread mode. Since the results are non-sensical forbid
+        * this combination.
+        */
+       if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
+            ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
+             event->attr.precise_ip > 0))
+               return -EOPNOTSUPP;
+
+       if (event_is_checkpointed(event)) {
+               /*
+                * Sampling of checkpointed events can cause situations where
+                * the CPU constantly aborts because of a overflow, which is
+                * then checkpointed back and ignored. Forbid checkpointing
+                * for sampling.
+                *
+                * But still allow a long sampling period, so that perf stat
+                * from KVM works.
+                */
+               if (event->attr.sample_period > 0 &&
+                   event->attr.sample_period < 0x7fffffff)
+                       return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+static struct event_constraint counter2_constraint =
+                       EVENT_CONSTRAINT(0, 0x4, 0);
+
+static struct event_constraint *
+hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                         struct perf_event *event)
+{
+       struct event_constraint *c;
+
+       c = intel_get_event_constraints(cpuc, idx, event);
+
+       /* Handle special quirk on in_tx_checkpointed only in counter 2 */
+       if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
+               if (c->idxmsk64 & (1U << 2))
+                       return &counter2_constraint;
+               return &emptyconstraint;
+       }
+
+       return c;
+}
+
+/*
+ * Broadwell:
+ *
+ * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
+ * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
+ * the two to enforce a minimum period of 128 (the smallest value that has bits
+ * 0-5 cleared and >= 100).
+ *
+ * Because of how the code in x86_perf_event_set_period() works, the truncation
+ * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
+ * to make up for the 'lost' events due to carrying the 'error' in period_left.
+ *
+ * Therefore the effective (average) period matches the requested period,
+ * despite coarser hardware granularity.
+ */
+static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
+{
+       if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
+                       X86_CONFIG(.event=0xc0, .umask=0x01)) {
+               if (left < 128)
+                       left = 128;
+               left &= ~0x3fu;
+       }
+       return left;
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7"    );
+PMU_FORMAT_ATTR(umask, "config:8-15"   );
+PMU_FORMAT_ATTR(edge,  "config:18"     );
+PMU_FORMAT_ATTR(pc,    "config:19"     );
+PMU_FORMAT_ATTR(any,   "config:21"     ); /* v3 + */
+PMU_FORMAT_ATTR(inv,   "config:23"     );
+PMU_FORMAT_ATTR(cmask, "config:24-31"  );
+PMU_FORMAT_ATTR(in_tx,  "config:32");
+PMU_FORMAT_ATTR(in_tx_cp, "config:33");
+
+static struct attribute *intel_arch_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_pc.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask.attr,
+       NULL,
+};
+
+ssize_t intel_event_sysfs_show(char *page, u64 config)
+{
+       u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
+
+       return x86_event_sysfs_show(page, config, event);
+}
+
+struct intel_shared_regs *allocate_shared_regs(int cpu)
+{
+       struct intel_shared_regs *regs;
+       int i;
+
+       regs = kzalloc_node(sizeof(struct intel_shared_regs),
+                           GFP_KERNEL, cpu_to_node(cpu));
+       if (regs) {
+               /*
+                * initialize the locks to keep lockdep happy
+                */
+               for (i = 0; i < EXTRA_REG_MAX; i++)
+                       raw_spin_lock_init(&regs->regs[i].lock);
+
+               regs->core_id = -1;
+       }
+       return regs;
+}
+
+static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
+{
+       struct intel_excl_cntrs *c;
+
+       c = kzalloc_node(sizeof(struct intel_excl_cntrs),
+                        GFP_KERNEL, cpu_to_node(cpu));
+       if (c) {
+               raw_spin_lock_init(&c->lock);
+               c->core_id = -1;
+       }
+       return c;
+}
+
+static int intel_pmu_cpu_prepare(int cpu)
+{
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+       if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
+               cpuc->shared_regs = allocate_shared_regs(cpu);
+               if (!cpuc->shared_regs)
+                       goto err;
+       }
+
+       if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+               size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
+
+               cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
+               if (!cpuc->constraint_list)
+                       goto err_shared_regs;
+
+               cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
+               if (!cpuc->excl_cntrs)
+                       goto err_constraint_list;
+
+               cpuc->excl_thread_id = 0;
+       }
+
+       return NOTIFY_OK;
+
+err_constraint_list:
+       kfree(cpuc->constraint_list);
+       cpuc->constraint_list = NULL;
+
+err_shared_regs:
+       kfree(cpuc->shared_regs);
+       cpuc->shared_regs = NULL;
+
+err:
+       return NOTIFY_BAD;
+}
+
+static void intel_pmu_cpu_starting(int cpu)
+{
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+       int core_id = topology_core_id(cpu);
+       int i;
+
+       init_debug_store_on_cpu(cpu);
+       /*
+        * Deal with CPUs that don't clear their LBRs on power-up.
+        */
+       intel_pmu_lbr_reset();
+
+       cpuc->lbr_sel = NULL;
+
+       if (!cpuc->shared_regs)
+               return;
+
+       if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
+               for_each_cpu(i, topology_sibling_cpumask(cpu)) {
+                       struct intel_shared_regs *pc;
+
+                       pc = per_cpu(cpu_hw_events, i).shared_regs;
+                       if (pc && pc->core_id == core_id) {
+                               cpuc->kfree_on_online[0] = cpuc->shared_regs;
+                               cpuc->shared_regs = pc;
+                               break;
+                       }
+               }
+               cpuc->shared_regs->core_id = core_id;
+               cpuc->shared_regs->refcnt++;
+       }
+
+       if (x86_pmu.lbr_sel_map)
+               cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
+
+       if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+               for_each_cpu(i, topology_sibling_cpumask(cpu)) {
+                       struct intel_excl_cntrs *c;
+
+                       c = per_cpu(cpu_hw_events, i).excl_cntrs;
+                       if (c && c->core_id == core_id) {
+                               cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
+                               cpuc->excl_cntrs = c;
+                               cpuc->excl_thread_id = 1;
+                               break;
+                       }
+               }
+               cpuc->excl_cntrs->core_id = core_id;
+               cpuc->excl_cntrs->refcnt++;
+       }
+}
+
+static void free_excl_cntrs(int cpu)
+{
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+       struct intel_excl_cntrs *c;
+
+       c = cpuc->excl_cntrs;
+       if (c) {
+               if (c->core_id == -1 || --c->refcnt == 0)
+                       kfree(c);
+               cpuc->excl_cntrs = NULL;
+               kfree(cpuc->constraint_list);
+               cpuc->constraint_list = NULL;
+       }
+}
+
+static void intel_pmu_cpu_dying(int cpu)
+{
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+       struct intel_shared_regs *pc;
+
+       pc = cpuc->shared_regs;
+       if (pc) {
+               if (pc->core_id == -1 || --pc->refcnt == 0)
+                       kfree(pc);
+               cpuc->shared_regs = NULL;
+       }
+
+       free_excl_cntrs(cpu);
+
+       fini_debug_store_on_cpu(cpu);
+}
+
+static void intel_pmu_sched_task(struct perf_event_context *ctx,
+                                bool sched_in)
+{
+       if (x86_pmu.pebs_active)
+               intel_pmu_pebs_sched_task(ctx, sched_in);
+       if (x86_pmu.lbr_nr)
+               intel_pmu_lbr_sched_task(ctx, sched_in);
+}
+
+PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
+
+PMU_FORMAT_ATTR(ldlat, "config1:0-15");
+
+PMU_FORMAT_ATTR(frontend, "config1:0-23");
+
+static struct attribute *intel_arch3_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_pc.attr,
+       &format_attr_any.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask.attr,
+       &format_attr_in_tx.attr,
+       &format_attr_in_tx_cp.attr,
+
+       &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
+       &format_attr_ldlat.attr, /* PEBS load latency */
+       NULL,
+};
+
+static struct attribute *skl_format_attr[] = {
+       &format_attr_frontend.attr,
+       NULL,
+};
+
+static __initconst const struct x86_pmu core_pmu = {
+       .name                   = "core",
+       .handle_irq             = x86_pmu_handle_irq,
+       .disable_all            = x86_pmu_disable_all,
+       .enable_all             = core_pmu_enable_all,
+       .enable                 = core_pmu_enable_event,
+       .disable                = x86_pmu_disable_event,
+       .hw_config              = x86_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
+       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
+       .event_map              = intel_pmu_event_map,
+       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
+       .apic                   = 1,
+       .free_running_flags     = PEBS_FREERUNNING_FLAGS,
+
+       /*
+        * Intel PMCs cannot be accessed sanely above 32-bit width,
+        * so we install an artificial 1<<31 period regardless of
+        * the generic event period:
+        */
+       .max_period             = (1ULL<<31) - 1,
+       .get_event_constraints  = intel_get_event_constraints,
+       .put_event_constraints  = intel_put_event_constraints,
+       .event_constraints      = intel_core_event_constraints,
+       .guest_get_msrs         = core_guest_get_msrs,
+       .format_attrs           = intel_arch_formats_attr,
+       .events_sysfs_show      = intel_event_sysfs_show,
+
+       /*
+        * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
+        * together with PMU version 1 and thus be using core_pmu with
+        * shared_regs. We need following callbacks here to allocate
+        * it properly.
+        */
+       .cpu_prepare            = intel_pmu_cpu_prepare,
+       .cpu_starting           = intel_pmu_cpu_starting,
+       .cpu_dying              = intel_pmu_cpu_dying,
+};
+
+static __initconst const struct x86_pmu intel_pmu = {
+       .name                   = "Intel",
+       .handle_irq             = intel_pmu_handle_irq,
+       .disable_all            = intel_pmu_disable_all,
+       .enable_all             = intel_pmu_enable_all,
+       .enable                 = intel_pmu_enable_event,
+       .disable                = intel_pmu_disable_event,
+       .hw_config              = intel_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
+       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
+       .event_map              = intel_pmu_event_map,
+       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
+       .apic                   = 1,
+       .free_running_flags     = PEBS_FREERUNNING_FLAGS,
+       /*
+        * Intel PMCs cannot be accessed sanely above 32 bit width,
+        * so we install an artificial 1<<31 period regardless of
+        * the generic event period:
+        */
+       .max_period             = (1ULL << 31) - 1,
+       .get_event_constraints  = intel_get_event_constraints,
+       .put_event_constraints  = intel_put_event_constraints,
+       .pebs_aliases           = intel_pebs_aliases_core2,
+
+       .format_attrs           = intel_arch3_formats_attr,
+       .events_sysfs_show      = intel_event_sysfs_show,
+
+       .cpu_prepare            = intel_pmu_cpu_prepare,
+       .cpu_starting           = intel_pmu_cpu_starting,
+       .cpu_dying              = intel_pmu_cpu_dying,
+       .guest_get_msrs         = intel_guest_get_msrs,
+       .sched_task             = intel_pmu_sched_task,
+};
+
+static __init void intel_clovertown_quirk(void)
+{
+       /*
+        * PEBS is unreliable due to:
+        *
+        *   AJ67  - PEBS may experience CPL leaks
+        *   AJ68  - PEBS PMI may be delayed by one event
+        *   AJ69  - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
+        *   AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
+        *
+        * AJ67 could be worked around by restricting the OS/USR flags.
+        * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
+        *
+        * AJ106 could possibly be worked around by not allowing LBR
+        *       usage from PEBS, including the fixup.
+        * AJ68  could possibly be worked around by always programming
+        *       a pebs_event_reset[0] value and coping with the lost events.
+        *
+        * But taken together it might just make sense to not enable PEBS on
+        * these chips.
+        */
+       pr_warn("PEBS disabled due to CPU errata\n");
+       x86_pmu.pebs = 0;
+       x86_pmu.pebs_constraints = NULL;
+}
+
+static int intel_snb_pebs_broken(int cpu)
+{
+       u32 rev = UINT_MAX; /* default to broken for unknown models */
+
+       switch (cpu_data(cpu).x86_model) {
+       case 42: /* SNB */
+               rev = 0x28;
+               break;
+
+       case 45: /* SNB-EP */
+               switch (cpu_data(cpu).x86_mask) {
+               case 6: rev = 0x618; break;
+               case 7: rev = 0x70c; break;
+               }
+       }
+
+       return (cpu_data(cpu).microcode < rev);
+}
+
+static void intel_snb_check_microcode(void)
+{
+       int pebs_broken = 0;
+       int cpu;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               if ((pebs_broken = intel_snb_pebs_broken(cpu)))
+                       break;
+       }
+       put_online_cpus();
+
+       if (pebs_broken == x86_pmu.pebs_broken)
+               return;
+
+       /*
+        * Serialized by the microcode lock..
+        */
+       if (x86_pmu.pebs_broken) {
+               pr_info("PEBS enabled due to microcode update\n");
+               x86_pmu.pebs_broken = 0;
+       } else {
+               pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
+               x86_pmu.pebs_broken = 1;
+       }
+}
+
+/*
+ * Under certain circumstances, access certain MSR may cause #GP.
+ * The function tests if the input MSR can be safely accessed.
+ */
+static bool check_msr(unsigned long msr, u64 mask)
+{
+       u64 val_old, val_new, val_tmp;
+
+       /*
+        * Read the current value, change it and read it back to see if it
+        * matches, this is needed to detect certain hardware emulators
+        * (qemu/kvm) that don't trap on the MSR access and always return 0s.
+        */
+       if (rdmsrl_safe(msr, &val_old))
+               return false;
+
+       /*
+        * Only change the bits which can be updated by wrmsrl.
+        */
+       val_tmp = val_old ^ mask;
+       if (wrmsrl_safe(msr, val_tmp) ||
+           rdmsrl_safe(msr, &val_new))
+               return false;
+
+       if (val_new != val_tmp)
+               return false;
+
+       /* Here it's sure that the MSR can be safely accessed.
+        * Restore the old value and return.
+        */
+       wrmsrl(msr, val_old);
+
+       return true;
+}
+
+static __init void intel_sandybridge_quirk(void)
+{
+       x86_pmu.check_microcode = intel_snb_check_microcode;
+       intel_snb_check_microcode();
+}
+
+static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
+       { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
+       { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
+       { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
+       { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
+       { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
+       { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
+       { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
+};
+
+static __init void intel_arch_events_quirk(void)
+{
+       int bit;
+
+       /* disable event that reported as not presend by cpuid */
+       for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
+               intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
+               pr_warn("CPUID marked event: \'%s\' unavailable\n",
+                       intel_arch_events_map[bit].name);
+       }
+}
+
+static __init void intel_nehalem_quirk(void)
+{
+       union cpuid10_ebx ebx;
+
+       ebx.full = x86_pmu.events_maskl;
+       if (ebx.split.no_branch_misses_retired) {
+               /*
+                * Erratum AAJ80 detected, we work it around by using
+                * the BR_MISP_EXEC.ANY event. This will over-count
+                * branch-misses, but it's still much better than the
+                * architectural event which is often completely bogus:
+                */
+               intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+               ebx.split.no_branch_misses_retired = 0;
+               x86_pmu.events_maskl = ebx.full;
+               pr_info("CPU erratum AAJ80 worked around\n");
+       }
+}
+
+/*
+ * enable software workaround for errata:
+ * SNB: BJ122
+ * IVB: BV98
+ * HSW: HSD29
+ *
+ * Only needed when HT is enabled. However detecting
+ * if HT is enabled is difficult (model specific). So instead,
+ * we enable the workaround in the early boot, and verify if
+ * it is needed in a later initcall phase once we have valid
+ * topology information to check if HT is actually enabled
+ */
+static __init void intel_ht_bug(void)
+{
+       x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
+
+       x86_pmu.start_scheduling = intel_start_scheduling;
+       x86_pmu.commit_scheduling = intel_commit_scheduling;
+       x86_pmu.stop_scheduling = intel_stop_scheduling;
+}
+
+EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
+
+/* Haswell special events */
+EVENT_ATTR_STR(tx-start,       tx_start,       "event=0xc9,umask=0x1");
+EVENT_ATTR_STR(tx-commit,      tx_commit,      "event=0xc9,umask=0x2");
+EVENT_ATTR_STR(tx-abort,       tx_abort,       "event=0xc9,umask=0x4");
+EVENT_ATTR_STR(tx-capacity,    tx_capacity,    "event=0x54,umask=0x2");
+EVENT_ATTR_STR(tx-conflict,    tx_conflict,    "event=0x54,umask=0x1");
+EVENT_ATTR_STR(el-start,       el_start,       "event=0xc8,umask=0x1");
+EVENT_ATTR_STR(el-commit,      el_commit,      "event=0xc8,umask=0x2");
+EVENT_ATTR_STR(el-abort,       el_abort,       "event=0xc8,umask=0x4");
+EVENT_ATTR_STR(el-capacity,    el_capacity,    "event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-conflict,    el_conflict,    "event=0x54,umask=0x1");
+EVENT_ATTR_STR(cycles-t,       cycles_t,       "event=0x3c,in_tx=1");
+EVENT_ATTR_STR(cycles-ct,      cycles_ct,      "event=0x3c,in_tx=1,in_tx_cp=1");
+
+static struct attribute *hsw_events_attrs[] = {
+       EVENT_PTR(tx_start),
+       EVENT_PTR(tx_commit),
+       EVENT_PTR(tx_abort),
+       EVENT_PTR(tx_capacity),
+       EVENT_PTR(tx_conflict),
+       EVENT_PTR(el_start),
+       EVENT_PTR(el_commit),
+       EVENT_PTR(el_abort),
+       EVENT_PTR(el_capacity),
+       EVENT_PTR(el_conflict),
+       EVENT_PTR(cycles_t),
+       EVENT_PTR(cycles_ct),
+       EVENT_PTR(mem_ld_hsw),
+       EVENT_PTR(mem_st_hsw),
+       NULL
+};
+
+__init int intel_pmu_init(void)
+{
+       union cpuid10_edx edx;
+       union cpuid10_eax eax;
+       union cpuid10_ebx ebx;
+       struct event_constraint *c;
+       unsigned int unused;
+       struct extra_reg *er;
+       int version, i;
+
+       if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+               switch (boot_cpu_data.x86) {
+               case 0x6:
+                       return p6_pmu_init();
+               case 0xb:
+                       return knc_pmu_init();
+               case 0xf:
+                       return p4_pmu_init();
+               }
+               return -ENODEV;
+       }
+
+       /*
+        * Check whether the Architectural PerfMon supports
+        * Branch Misses Retired hw_event or not.
+        */
+       cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
+       if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
+               return -ENODEV;
+
+       version = eax.split.version_id;
+       if (version < 2)
+               x86_pmu = core_pmu;
+       else
+               x86_pmu = intel_pmu;
+
+       x86_pmu.version                 = version;
+       x86_pmu.num_counters            = eax.split.num_counters;
+       x86_pmu.cntval_bits             = eax.split.bit_width;
+       x86_pmu.cntval_mask             = (1ULL << eax.split.bit_width) - 1;
+
+       x86_pmu.events_maskl            = ebx.full;
+       x86_pmu.events_mask_len         = eax.split.mask_length;
+
+       x86_pmu.max_pebs_events         = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+
+       /*
+        * Quirk: v2 perfmon does not report fixed-purpose events, so
+        * assume at least 3 events:
+        */
+       if (version > 1)
+               x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
+
+       if (boot_cpu_has(X86_FEATURE_PDCM)) {
+               u64 capabilities;
+
+               rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
+               x86_pmu.intel_cap.capabilities = capabilities;
+       }
+
+       intel_ds_init();
+
+       x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
+
+       /*
+        * Install the hw-cache-events table:
+        */
+       switch (boot_cpu_data.x86_model) {
+       case 14: /* 65nm Core "Yonah" */
+               pr_cont("Core events, ");
+               break;
+
+       case 15: /* 65nm Core2 "Merom"          */
+               x86_add_quirk(intel_clovertown_quirk);
+       case 22: /* 65nm Core2 "Merom-L"        */
+       case 23: /* 45nm Core2 "Penryn"         */
+       case 29: /* 45nm Core2 "Dunnington (MP) */
+               memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+
+               intel_pmu_lbr_init_core();
+
+               x86_pmu.event_constraints = intel_core2_event_constraints;
+               x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
+               pr_cont("Core2 events, ");
+               break;
+
+       case 30: /* 45nm Nehalem    */
+       case 26: /* 45nm Nehalem-EP */
+       case 46: /* 45nm Nehalem-EX */
+               memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_nhm();
+
+               x86_pmu.event_constraints = intel_nehalem_event_constraints;
+               x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
+               x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+               x86_pmu.extra_regs = intel_nehalem_extra_regs;
+
+               x86_pmu.cpu_events = nhm_events_attrs;
+
+               /* UOPS_ISSUED.STALLED_CYCLES */
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+               /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+                       X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
+
+               intel_pmu_pebs_data_source_nhm();
+               x86_add_quirk(intel_nehalem_quirk);
+
+               pr_cont("Nehalem events, ");
+               break;
+
+       case 28: /* 45nm Atom "Pineview"   */
+       case 38: /* 45nm Atom "Lincroft"   */
+       case 39: /* 32nm Atom "Penwell"    */
+       case 53: /* 32nm Atom "Cloverview" */
+       case 54: /* 32nm Atom "Cedarview"  */
+               memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+
+               intel_pmu_lbr_init_atom();
+
+               x86_pmu.event_constraints = intel_gen_event_constraints;
+               x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
+               pr_cont("Atom events, ");
+               break;
+
+       case 55: /* 22nm Atom "Silvermont"                */
+       case 76: /* 14nm Atom "Airmont"                   */
+       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+               memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
+                       sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_atom();
+
+               x86_pmu.event_constraints = intel_slm_event_constraints;
+               x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_slm_extra_regs;
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               pr_cont("Silvermont events, ");
+               break;
+
+       case 37: /* 32nm Westmere    */
+       case 44: /* 32nm Westmere-EP */
+       case 47: /* 32nm Westmere-EX */
+               memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_nhm();
+
+               x86_pmu.event_constraints = intel_westmere_event_constraints;
+               x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+               x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_westmere_extra_regs;
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+
+               x86_pmu.cpu_events = nhm_events_attrs;
+
+               /* UOPS_ISSUED.STALLED_CYCLES */
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+               /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+                       X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
+
+               intel_pmu_pebs_data_source_nhm();
+               pr_cont("Westmere events, ");
+               break;
+
+       case 42: /* 32nm SandyBridge         */
+       case 45: /* 32nm SandyBridge-E/EN/EP */
+               x86_add_quirk(intel_sandybridge_quirk);
+               x86_add_quirk(intel_ht_bug);
+               memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_snb();
+
+               x86_pmu.event_constraints = intel_snb_event_constraints;
+               x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+               if (boot_cpu_data.x86_model == 45)
+                       x86_pmu.extra_regs = intel_snbep_extra_regs;
+               else
+                       x86_pmu.extra_regs = intel_snb_extra_regs;
+
+
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               x86_pmu.cpu_events = snb_events_attrs;
+
+               /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+               /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+                       X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
+
+               pr_cont("SandyBridge events, ");
+               break;
+
+       case 58: /* 22nm IvyBridge       */
+       case 62: /* 22nm IvyBridge-EP/EX */
+               x86_add_quirk(intel_ht_bug);
+               memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+               /* dTLB-load-misses on IVB is different than SNB */
+               hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
+
+               memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_snb();
+
+               x86_pmu.event_constraints = intel_ivb_event_constraints;
+               x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+               x86_pmu.pebs_prec_dist = true;
+               if (boot_cpu_data.x86_model == 62)
+                       x86_pmu.extra_regs = intel_snbep_extra_regs;
+               else
+                       x86_pmu.extra_regs = intel_snb_extra_regs;
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               x86_pmu.cpu_events = snb_events_attrs;
+
+               /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+
+               pr_cont("IvyBridge events, ");
+               break;
+
+
+       case 60: /* 22nm Haswell Core */
+       case 63: /* 22nm Haswell Server */
+       case 69: /* 22nm Haswell ULT */
+       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+               x86_add_quirk(intel_ht_bug);
+               x86_pmu.late_ack = true;
+               memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_hsw();
+
+               x86_pmu.event_constraints = intel_hsw_event_constraints;
+               x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_snbep_extra_regs;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+               x86_pmu.pebs_prec_dist = true;
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               x86_pmu.hw_config = hsw_hw_config;
+               x86_pmu.get_event_constraints = hsw_get_event_constraints;
+               x86_pmu.cpu_events = hsw_events_attrs;
+               x86_pmu.lbr_double_abort = true;
+               pr_cont("Haswell events, ");
+               break;
+
+       case 61: /* 14nm Broadwell Core-M */
+       case 86: /* 14nm Broadwell Xeon D */
+       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+       case 79: /* 14nm Broadwell Server */
+               x86_pmu.late_ack = true;
+               memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+               /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */
+               hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ |
+                                                                        BDW_L3_MISS|HSW_SNOOP_DRAM;
+               hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS|
+                                                                         HSW_SNOOP_DRAM;
+               hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ|
+                                                                            BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
+               hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
+                                                                             BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
+
+               intel_pmu_lbr_init_hsw();
+
+               x86_pmu.event_constraints = intel_bdw_event_constraints;
+               x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_snbep_extra_regs;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+               x86_pmu.pebs_prec_dist = true;
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               x86_pmu.hw_config = hsw_hw_config;
+               x86_pmu.get_event_constraints = hsw_get_event_constraints;
+               x86_pmu.cpu_events = hsw_events_attrs;
+               x86_pmu.limit_period = bdw_limit_period;
+               pr_cont("Broadwell events, ");
+               break;
+
+       case 87: /* Knights Landing Xeon Phi */
+               memcpy(hw_cache_event_ids,
+                      slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs,
+                      knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+               intel_pmu_lbr_init_knl();
+
+               x86_pmu.event_constraints = intel_slm_event_constraints;
+               x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_knl_extra_regs;
+
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               pr_cont("Knights Landing events, ");
+               break;
+
+       case 78: /* 14nm Skylake Mobile */
+       case 94: /* 14nm Skylake Desktop */
+               x86_pmu.late_ack = true;
+               memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+               intel_pmu_lbr_init_skl();
+
+               x86_pmu.event_constraints = intel_skl_event_constraints;
+               x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_skl_extra_regs;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_skl;
+               x86_pmu.pebs_prec_dist = true;
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               x86_pmu.hw_config = hsw_hw_config;
+               x86_pmu.get_event_constraints = hsw_get_event_constraints;
+               x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
+                                                 skl_format_attr);
+               WARN_ON(!x86_pmu.format_attrs);
+               x86_pmu.cpu_events = hsw_events_attrs;
+               pr_cont("Skylake events, ");
+               break;
+
+       default:
+               switch (x86_pmu.version) {
+               case 1:
+                       x86_pmu.event_constraints = intel_v1_event_constraints;
+                       pr_cont("generic architected perfmon v1, ");
+                       break;
+               default:
+                       /*
+                        * default constraints for v2 and up
+                        */
+                       x86_pmu.event_constraints = intel_gen_event_constraints;
+                       pr_cont("generic architected perfmon, ");
+                       break;
+               }
+       }
+
+       if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
+               WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+                    x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
+               x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
+       }
+       x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+
+       if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
+               WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+                    x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
+               x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
+       }
+
+       x86_pmu.intel_ctrl |=
+               ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
+
+       if (x86_pmu.event_constraints) {
+               /*
+                * event on fixed counter2 (REF_CYCLES) only works on this
+                * counter, so do not extend mask to generic counters
+                */
+               for_each_event_constraint(c, x86_pmu.event_constraints) {
+                       if (c->cmask == FIXED_EVENT_FLAGS
+                           && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
+                               c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
+                       }
+                       c->idxmsk64 &=
+                               ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
+                       c->weight = hweight64(c->idxmsk64);
+               }
+       }
+
+       /*
+        * Access LBR MSR may cause #GP under certain circumstances.
+        * E.g. KVM doesn't support LBR MSR
+        * Check all LBT MSR here.
+        * Disable LBR access if any LBR MSRs can not be accessed.
+        */
+       if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
+               x86_pmu.lbr_nr = 0;
+       for (i = 0; i < x86_pmu.lbr_nr; i++) {
+               if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
+                     check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
+                       x86_pmu.lbr_nr = 0;
+       }
+
+       /*
+        * Access extra MSR may cause #GP under certain circumstances.
+        * E.g. KVM doesn't support offcore event
+        * Check all extra_regs here.
+        */
+       if (x86_pmu.extra_regs) {
+               for (er = x86_pmu.extra_regs; er->msr; er++) {
+                       er->extra_msr_access = check_msr(er->msr, 0x11UL);
+                       /* Disable LBR select mapping */
+                       if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
+                               x86_pmu.lbr_sel_map = NULL;
+               }
+       }
+
+       /* Support full width counters using alternative MSR range */
+       if (x86_pmu.intel_cap.full_width_write) {
+               x86_pmu.max_period = x86_pmu.cntval_mask;
+               x86_pmu.perfctr = MSR_IA32_PMC0;
+               pr_cont("full-width counters, ");
+       }
+
+       return 0;
+}
+
+/*
+ * HT bug: phase 2 init
+ * Called once we have valid topology information to check
+ * whether or not HT is enabled
+ * If HT is off, then we disable the workaround
+ */
+static __init int fixup_ht_bug(void)
+{
+       int cpu = smp_processor_id();
+       int w, c;
+       /*
+        * problem not present on this CPU model, nothing to do
+        */
+       if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
+               return 0;
+
+       w = cpumask_weight(topology_sibling_cpumask(cpu));
+       if (w > 1) {
+               pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
+               return 0;
+       }
+
+       if (lockup_detector_suspend() != 0) {
+               pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n");
+               return 0;
+       }
+
+       x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
+
+       x86_pmu.start_scheduling = NULL;
+       x86_pmu.commit_scheduling = NULL;
+       x86_pmu.stop_scheduling = NULL;
+
+       lockup_detector_resume();
+
+       get_online_cpus();
+
+       for_each_online_cpu(c) {
+               free_excl_cntrs(c);
+       }
+
+       put_online_cpus();
+       pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
+       return 0;
+}
+subsys_initcall(fixup_ht_bug)
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
new file mode 100644 (file)
index 0000000..93cb412
--- /dev/null
@@ -0,0 +1,1381 @@
+/*
+ * Intel Cache Quality-of-Service Monitoring (CQM) support.
+ *
+ * Based very, very heavily on work by Peter Zijlstra.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+#define MSR_IA32_PQR_ASSOC     0x0c8f
+#define MSR_IA32_QM_CTR                0x0c8e
+#define MSR_IA32_QM_EVTSEL     0x0c8d
+
+static u32 cqm_max_rmid = -1;
+static unsigned int cqm_l3_scale; /* supposedly cacheline size */
+
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @rmid:              The cached Resource Monitoring ID
+ * @closid:            The cached Class Of Service ID
+ * @rmid_usecnt:       The usage counter for rmid
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+       u32                     rmid;
+       u32                     closid;
+       int                     rmid_usecnt;
+};
+
+/*
+ * The cached intel_pqr_state is strictly per CPU and can never be
+ * updated from a remote CPU. Both functions which modify the state
+ * (intel_cqm_event_start and intel_cqm_event_stop) are called with
+ * interrupts disabled, which is sufficient for the protection.
+ */
+static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+
+/*
+ * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
+ * Also protects event->hw.cqm_rmid
+ *
+ * Hold either for stability, both for modification of ->hw.cqm_rmid.
+ */
+static DEFINE_MUTEX(cache_mutex);
+static DEFINE_RAW_SPINLOCK(cache_lock);
+
+/*
+ * Groups of events that have the same target(s), one RMID per group.
+ */
+static LIST_HEAD(cache_groups);
+
+/*
+ * Mask of CPUs for reading CQM values. We only need one per-socket.
+ */
+static cpumask_t cqm_cpumask;
+
+#define RMID_VAL_ERROR         (1ULL << 63)
+#define RMID_VAL_UNAVAIL       (1ULL << 62)
+
+#define QOS_L3_OCCUP_EVENT_ID  (1 << 0)
+
+#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID
+
+/*
+ * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
+ *
+ * This rmid is always free and is guaranteed to have an associated
+ * near-zero occupancy value, i.e. no cachelines are tagged with this
+ * RMID, once __intel_cqm_rmid_rotate() returns.
+ */
+static u32 intel_cqm_rotation_rmid;
+
+#define INVALID_RMID           (-1)
+
+/*
+ * Is @rmid valid for programming the hardware?
+ *
+ * rmid 0 is reserved by the hardware for all non-monitored tasks, which
+ * means that we should never come across an rmid with that value.
+ * Likewise, an rmid value of -1 is used to indicate "no rmid currently
+ * assigned" and is used as part of the rotation code.
+ */
+static inline bool __rmid_valid(u32 rmid)
+{
+       if (!rmid || rmid == INVALID_RMID)
+               return false;
+
+       return true;
+}
+
+static u64 __rmid_read(u32 rmid)
+{
+       u64 val;
+
+       /*
+        * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
+        * it just says that to increase confusion.
+        */
+       wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
+       rdmsrl(MSR_IA32_QM_CTR, val);
+
+       /*
+        * Aside from the ERROR and UNAVAIL bits, assume this thing returns
+        * the number of cachelines tagged with @rmid.
+        */
+       return val;
+}
+
+enum rmid_recycle_state {
+       RMID_YOUNG = 0,
+       RMID_AVAILABLE,
+       RMID_DIRTY,
+};
+
+struct cqm_rmid_entry {
+       u32 rmid;
+       enum rmid_recycle_state state;
+       struct list_head list;
+       unsigned long queue_time;
+};
+
+/*
+ * cqm_rmid_free_lru - A least recently used list of RMIDs.
+ *
+ * Oldest entry at the head, newest (most recently used) entry at the
+ * tail. This list is never traversed, it's only used to keep track of
+ * the lru order. That is, we only pick entries of the head or insert
+ * them on the tail.
+ *
+ * All entries on the list are 'free', and their RMIDs are not currently
+ * in use. To mark an RMID as in use, remove its entry from the lru
+ * list.
+ *
+ *
+ * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
+ *
+ * This list is contains RMIDs that no one is currently using but that
+ * may have a non-zero occupancy value associated with them. The
+ * rotation worker moves RMIDs from the limbo list to the free list once
+ * the occupancy value drops below __intel_cqm_threshold.
+ *
+ * Both lists are protected by cache_mutex.
+ */
+static LIST_HEAD(cqm_rmid_free_lru);
+static LIST_HEAD(cqm_rmid_limbo_lru);
+
+/*
+ * We use a simple array of pointers so that we can lookup a struct
+ * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
+ * and __put_rmid() from having to worry about dealing with struct
+ * cqm_rmid_entry - they just deal with rmids, i.e. integers.
+ *
+ * Once this array is initialized it is read-only. No locks are required
+ * to access it.
+ *
+ * All entries for all RMIDs can be looked up in the this array at all
+ * times.
+ */
+static struct cqm_rmid_entry **cqm_rmid_ptrs;
+
+static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
+{
+       struct cqm_rmid_entry *entry;
+
+       entry = cqm_rmid_ptrs[rmid];
+       WARN_ON(entry->rmid != rmid);
+
+       return entry;
+}
+
+/*
+ * Returns < 0 on fail.
+ *
+ * We expect to be called with cache_mutex held.
+ */
+static u32 __get_rmid(void)
+{
+       struct cqm_rmid_entry *entry;
+
+       lockdep_assert_held(&cache_mutex);
+
+       if (list_empty(&cqm_rmid_free_lru))
+               return INVALID_RMID;
+
+       entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
+       list_del(&entry->list);
+
+       return entry->rmid;
+}
+
+static void __put_rmid(u32 rmid)
+{
+       struct cqm_rmid_entry *entry;
+
+       lockdep_assert_held(&cache_mutex);
+
+       WARN_ON(!__rmid_valid(rmid));
+       entry = __rmid_entry(rmid);
+
+       entry->queue_time = jiffies;
+       entry->state = RMID_YOUNG;
+
+       list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
+}
+
+static int intel_cqm_setup_rmid_cache(void)
+{
+       struct cqm_rmid_entry *entry;
+       unsigned int nr_rmids;
+       int r = 0;
+
+       nr_rmids = cqm_max_rmid + 1;
+       cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
+                               nr_rmids, GFP_KERNEL);
+       if (!cqm_rmid_ptrs)
+               return -ENOMEM;
+
+       for (; r <= cqm_max_rmid; r++) {
+               struct cqm_rmid_entry *entry;
+
+               entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+               if (!entry)
+                       goto fail;
+
+               INIT_LIST_HEAD(&entry->list);
+               entry->rmid = r;
+               cqm_rmid_ptrs[r] = entry;
+
+               list_add_tail(&entry->list, &cqm_rmid_free_lru);
+       }
+
+       /*
+        * RMID 0 is special and is always allocated. It's used for all
+        * tasks that are not monitored.
+        */
+       entry = __rmid_entry(0);
+       list_del(&entry->list);
+
+       mutex_lock(&cache_mutex);
+       intel_cqm_rotation_rmid = __get_rmid();
+       mutex_unlock(&cache_mutex);
+
+       return 0;
+fail:
+       while (r--)
+               kfree(cqm_rmid_ptrs[r]);
+
+       kfree(cqm_rmid_ptrs);
+       return -ENOMEM;
+}
+
+/*
+ * Determine if @a and @b measure the same set of tasks.
+ *
+ * If @a and @b measure the same set of tasks then we want to share a
+ * single RMID.
+ */
+static bool __match_event(struct perf_event *a, struct perf_event *b)
+{
+       /* Per-cpu and task events don't mix */
+       if ((a->attach_state & PERF_ATTACH_TASK) !=
+           (b->attach_state & PERF_ATTACH_TASK))
+               return false;
+
+#ifdef CONFIG_CGROUP_PERF
+       if (a->cgrp != b->cgrp)
+               return false;
+#endif
+
+       /* If not task event, we're machine wide */
+       if (!(b->attach_state & PERF_ATTACH_TASK))
+               return true;
+
+       /*
+        * Events that target same task are placed into the same cache group.
+        */
+       if (a->hw.target == b->hw.target)
+               return true;
+
+       /*
+        * Are we an inherited event?
+        */
+       if (b->parent == a)
+               return true;
+
+       return false;
+}
+
+#ifdef CONFIG_CGROUP_PERF
+static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
+{
+       if (event->attach_state & PERF_ATTACH_TASK)
+               return perf_cgroup_from_task(event->hw.target, event->ctx);
+
+       return event->cgrp;
+}
+#endif
+
+/*
+ * Determine if @a's tasks intersect with @b's tasks
+ *
+ * There are combinations of events that we explicitly prohibit,
+ *
+ *                PROHIBITS
+ *     system-wide    ->       cgroup and task
+ *     cgroup        ->        system-wide
+ *                           ->        task in cgroup
+ *     task          ->        system-wide
+ *                           ->        task in cgroup
+ *
+ * Call this function before allocating an RMID.
+ */
+static bool __conflict_event(struct perf_event *a, struct perf_event *b)
+{
+#ifdef CONFIG_CGROUP_PERF
+       /*
+        * We can have any number of cgroups but only one system-wide
+        * event at a time.
+        */
+       if (a->cgrp && b->cgrp) {
+               struct perf_cgroup *ac = a->cgrp;
+               struct perf_cgroup *bc = b->cgrp;
+
+               /*
+                * This condition should have been caught in
+                * __match_event() and we should be sharing an RMID.
+                */
+               WARN_ON_ONCE(ac == bc);
+
+               if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
+                   cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
+                       return true;
+
+               return false;
+       }
+
+       if (a->cgrp || b->cgrp) {
+               struct perf_cgroup *ac, *bc;
+
+               /*
+                * cgroup and system-wide events are mutually exclusive
+                */
+               if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) ||
+                   (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
+                       return true;
+
+               /*
+                * Ensure neither event is part of the other's cgroup
+                */
+               ac = event_to_cgroup(a);
+               bc = event_to_cgroup(b);
+               if (ac == bc)
+                       return true;
+
+               /*
+                * Must have cgroup and non-intersecting task events.
+                */
+               if (!ac || !bc)
+                       return false;
+
+               /*
+                * We have cgroup and task events, and the task belongs
+                * to a cgroup. Check for for overlap.
+                */
+               if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
+                   cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
+                       return true;
+
+               return false;
+       }
+#endif
+       /*
+        * If one of them is not a task, same story as above with cgroups.
+        */
+       if (!(a->attach_state & PERF_ATTACH_TASK) ||
+           !(b->attach_state & PERF_ATTACH_TASK))
+               return true;
+
+       /*
+        * Must be non-overlapping.
+        */
+       return false;
+}
+
+struct rmid_read {
+       u32 rmid;
+       atomic64_t value;
+};
+
+static void __intel_cqm_event_count(void *info);
+
+/*
+ * Exchange the RMID of a group of events.
+ */
+static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
+{
+       struct perf_event *event;
+       struct list_head *head = &group->hw.cqm_group_entry;
+       u32 old_rmid = group->hw.cqm_rmid;
+
+       lockdep_assert_held(&cache_mutex);
+
+       /*
+        * If our RMID is being deallocated, perform a read now.
+        */
+       if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
+               struct rmid_read rr = {
+                       .value = ATOMIC64_INIT(0),
+                       .rmid = old_rmid,
+               };
+
+               on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count,
+                                &rr, 1);
+               local64_set(&group->count, atomic64_read(&rr.value));
+       }
+
+       raw_spin_lock_irq(&cache_lock);
+
+       group->hw.cqm_rmid = rmid;
+       list_for_each_entry(event, head, hw.cqm_group_entry)
+               event->hw.cqm_rmid = rmid;
+
+       raw_spin_unlock_irq(&cache_lock);
+
+       return old_rmid;
+}
+
+/*
+ * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
+ * cachelines are still tagged with RMIDs in limbo, we progressively
+ * increment the threshold until we find an RMID in limbo with <=
+ * __intel_cqm_threshold lines tagged. This is designed to mitigate the
+ * problem where cachelines tagged with an RMID are not steadily being
+ * evicted.
+ *
+ * On successful rotations we decrease the threshold back towards zero.
+ *
+ * __intel_cqm_max_threshold provides an upper bound on the threshold,
+ * and is measured in bytes because it's exposed to userland.
+ */
+static unsigned int __intel_cqm_threshold;
+static unsigned int __intel_cqm_max_threshold;
+
+/*
+ * Test whether an RMID has a zero occupancy value on this cpu.
+ */
+static void intel_cqm_stable(void *arg)
+{
+       struct cqm_rmid_entry *entry;
+
+       list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
+               if (entry->state != RMID_AVAILABLE)
+                       break;
+
+               if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
+                       entry->state = RMID_DIRTY;
+       }
+}
+
+/*
+ * If we have group events waiting for an RMID that don't conflict with
+ * events already running, assign @rmid.
+ */
+static bool intel_cqm_sched_in_event(u32 rmid)
+{
+       struct perf_event *leader, *event;
+
+       lockdep_assert_held(&cache_mutex);
+
+       leader = list_first_entry(&cache_groups, struct perf_event,
+                                 hw.cqm_groups_entry);
+       event = leader;
+
+       list_for_each_entry_continue(event, &cache_groups,
+                                    hw.cqm_groups_entry) {
+               if (__rmid_valid(event->hw.cqm_rmid))
+                       continue;
+
+               if (__conflict_event(event, leader))
+                       continue;
+
+               intel_cqm_xchg_rmid(event, rmid);
+               return true;
+       }
+
+       return false;
+}
+
+/*
+ * Initially use this constant for both the limbo queue time and the
+ * rotation timer interval, pmu::hrtimer_interval_ms.
+ *
+ * They don't need to be the same, but the two are related since if you
+ * rotate faster than you recycle RMIDs, you may run out of available
+ * RMIDs.
+ */
+#define RMID_DEFAULT_QUEUE_TIME 250    /* ms */
+
+static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
+
+/*
+ * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
+ * @nr_available: number of freeable RMIDs on the limbo list
+ *
+ * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
+ * cachelines are tagged with those RMIDs. After this we can reuse them
+ * and know that the current set of active RMIDs is stable.
+ *
+ * Return %true or %false depending on whether stabilization needs to be
+ * reattempted.
+ *
+ * If we return %true then @nr_available is updated to indicate the
+ * number of RMIDs on the limbo list that have been queued for the
+ * minimum queue time (RMID_AVAILABLE), but whose data occupancy values
+ * are above __intel_cqm_threshold.
+ */
+static bool intel_cqm_rmid_stabilize(unsigned int *available)
+{
+       struct cqm_rmid_entry *entry, *tmp;
+
+       lockdep_assert_held(&cache_mutex);
+
+       *available = 0;
+       list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
+               unsigned long min_queue_time;
+               unsigned long now = jiffies;
+
+               /*
+                * We hold RMIDs placed into limbo for a minimum queue
+                * time. Before the minimum queue time has elapsed we do
+                * not recycle RMIDs.
+                *
+                * The reasoning is that until a sufficient time has
+                * passed since we stopped using an RMID, any RMID
+                * placed onto the limbo list will likely still have
+                * data tagged in the cache, which means we'll probably
+                * fail to recycle it anyway.
+                *
+                * We can save ourselves an expensive IPI by skipping
+                * any RMIDs that have not been queued for the minimum
+                * time.
+                */
+               min_queue_time = entry->queue_time +
+                       msecs_to_jiffies(__rmid_queue_time_ms);
+
+               if (time_after(min_queue_time, now))
+                       break;
+
+               entry->state = RMID_AVAILABLE;
+               (*available)++;
+       }
+
+       /*
+        * Fast return if none of the RMIDs on the limbo list have been
+        * sitting on the queue for the minimum queue time.
+        */
+       if (!*available)
+               return false;
+
+       /*
+        * Test whether an RMID is free for each package.
+        */
+       on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
+
+       list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
+               /*
+                * Exhausted all RMIDs that have waited min queue time.
+                */
+               if (entry->state == RMID_YOUNG)
+                       break;
+
+               if (entry->state == RMID_DIRTY)
+                       continue;
+
+               list_del(&entry->list); /* remove from limbo */
+
+               /*
+                * The rotation RMID gets priority if it's
+                * currently invalid. In which case, skip adding
+                * the RMID to the the free lru.
+                */
+               if (!__rmid_valid(intel_cqm_rotation_rmid)) {
+                       intel_cqm_rotation_rmid = entry->rmid;
+                       continue;
+               }
+
+               /*
+                * If we have groups waiting for RMIDs, hand
+                * them one now provided they don't conflict.
+                */
+               if (intel_cqm_sched_in_event(entry->rmid))
+                       continue;
+
+               /*
+                * Otherwise place it onto the free list.
+                */
+               list_add_tail(&entry->list, &cqm_rmid_free_lru);
+       }
+
+
+       return __rmid_valid(intel_cqm_rotation_rmid);
+}
+
+/*
+ * Pick a victim group and move it to the tail of the group list.
+ * @next: The first group without an RMID
+ */
+static void __intel_cqm_pick_and_rotate(struct perf_event *next)
+{
+       struct perf_event *rotor;
+       u32 rmid;
+
+       lockdep_assert_held(&cache_mutex);
+
+       rotor = list_first_entry(&cache_groups, struct perf_event,
+                                hw.cqm_groups_entry);
+
+       /*
+        * The group at the front of the list should always have a valid
+        * RMID. If it doesn't then no groups have RMIDs assigned and we
+        * don't need to rotate the list.
+        */
+       if (next == rotor)
+               return;
+
+       rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
+       __put_rmid(rmid);
+
+       list_rotate_left(&cache_groups);
+}
+
+/*
+ * Deallocate the RMIDs from any events that conflict with @event, and
+ * place them on the back of the group list.
+ */
+static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
+{
+       struct perf_event *group, *g;
+       u32 rmid;
+
+       lockdep_assert_held(&cache_mutex);
+
+       list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) {
+               if (group == event)
+                       continue;
+
+               rmid = group->hw.cqm_rmid;
+
+               /*
+                * Skip events that don't have a valid RMID.
+                */
+               if (!__rmid_valid(rmid))
+                       continue;
+
+               /*
+                * No conflict? No problem! Leave the event alone.
+                */
+               if (!__conflict_event(group, event))
+                       continue;
+
+               intel_cqm_xchg_rmid(group, INVALID_RMID);
+               __put_rmid(rmid);
+       }
+}
+
+/*
+ * Attempt to rotate the groups and assign new RMIDs.
+ *
+ * We rotate for two reasons,
+ *   1. To handle the scheduling of conflicting events
+ *   2. To recycle RMIDs
+ *
+ * Rotating RMIDs is complicated because the hardware doesn't give us
+ * any clues.
+ *
+ * There's problems with the hardware interface; when you change the
+ * task:RMID map cachelines retain their 'old' tags, giving a skewed
+ * picture. In order to work around this, we must always keep one free
+ * RMID - intel_cqm_rotation_rmid.
+ *
+ * Rotation works by taking away an RMID from a group (the old RMID),
+ * and assigning the free RMID to another group (the new RMID). We must
+ * then wait for the old RMID to not be used (no cachelines tagged).
+ * This ensure that all cachelines are tagged with 'active' RMIDs. At
+ * this point we can start reading values for the new RMID and treat the
+ * old RMID as the free RMID for the next rotation.
+ *
+ * Return %true or %false depending on whether we did any rotating.
+ */
+static bool __intel_cqm_rmid_rotate(void)
+{
+       struct perf_event *group, *start = NULL;
+       unsigned int threshold_limit;
+       unsigned int nr_needed = 0;
+       unsigned int nr_available;
+       bool rotated = false;
+
+       mutex_lock(&cache_mutex);
+
+again:
+       /*
+        * Fast path through this function if there are no groups and no
+        * RMIDs that need cleaning.
+        */
+       if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
+               goto out;
+
+       list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
+               if (!__rmid_valid(group->hw.cqm_rmid)) {
+                       if (!start)
+                               start = group;
+                       nr_needed++;
+               }
+       }
+
+       /*
+        * We have some event groups, but they all have RMIDs assigned
+        * and no RMIDs need cleaning.
+        */
+       if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
+               goto out;
+
+       if (!nr_needed)
+               goto stabilize;
+
+       /*
+        * We have more event groups without RMIDs than available RMIDs,
+        * or we have event groups that conflict with the ones currently
+        * scheduled.
+        *
+        * We force deallocate the rmid of the group at the head of
+        * cache_groups. The first event group without an RMID then gets
+        * assigned intel_cqm_rotation_rmid. This ensures we always make
+        * forward progress.
+        *
+        * Rotate the cache_groups list so the previous head is now the
+        * tail.
+        */
+       __intel_cqm_pick_and_rotate(start);
+
+       /*
+        * If the rotation is going to succeed, reduce the threshold so
+        * that we don't needlessly reuse dirty RMIDs.
+        */
+       if (__rmid_valid(intel_cqm_rotation_rmid)) {
+               intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
+               intel_cqm_rotation_rmid = __get_rmid();
+
+               intel_cqm_sched_out_conflicting_events(start);
+
+               if (__intel_cqm_threshold)
+                       __intel_cqm_threshold--;
+       }
+
+       rotated = true;
+
+stabilize:
+       /*
+        * We now need to stablize the RMID we freed above (if any) to
+        * ensure that the next time we rotate we have an RMID with zero
+        * occupancy value.
+        *
+        * Alternatively, if we didn't need to perform any rotation,
+        * we'll have a bunch of RMIDs in limbo that need stabilizing.
+        */
+       threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
+
+       while (intel_cqm_rmid_stabilize(&nr_available) &&
+              __intel_cqm_threshold < threshold_limit) {
+               unsigned int steal_limit;
+
+               /*
+                * Don't spin if nobody is actively waiting for an RMID,
+                * the rotation worker will be kicked as soon as an
+                * event needs an RMID anyway.
+                */
+               if (!nr_needed)
+                       break;
+
+               /* Allow max 25% of RMIDs to be in limbo. */
+               steal_limit = (cqm_max_rmid + 1) / 4;
+
+               /*
+                * We failed to stabilize any RMIDs so our rotation
+                * logic is now stuck. In order to make forward progress
+                * we have a few options:
+                *
+                *   1. rotate ("steal") another RMID
+                *   2. increase the threshold
+                *   3. do nothing
+                *
+                * We do both of 1. and 2. until we hit the steal limit.
+                *
+                * The steal limit prevents all RMIDs ending up on the
+                * limbo list. This can happen if every RMID has a
+                * non-zero occupancy above threshold_limit, and the
+                * occupancy values aren't dropping fast enough.
+                *
+                * Note that there is prioritisation at work here - we'd
+                * rather increase the number of RMIDs on the limbo list
+                * than increase the threshold, because increasing the
+                * threshold skews the event data (because we reuse
+                * dirty RMIDs) - threshold bumps are a last resort.
+                */
+               if (nr_available < steal_limit)
+                       goto again;
+
+               __intel_cqm_threshold++;
+       }
+
+out:
+       mutex_unlock(&cache_mutex);
+       return rotated;
+}
+
+static void intel_cqm_rmid_rotate(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
+
+static struct pmu intel_cqm_pmu;
+
+static void intel_cqm_rmid_rotate(struct work_struct *work)
+{
+       unsigned long delay;
+
+       __intel_cqm_rmid_rotate();
+
+       delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
+       schedule_delayed_work(&intel_cqm_rmid_work, delay);
+}
+
+/*
+ * Find a group and setup RMID.
+ *
+ * If we're part of a group, we use the group's RMID.
+ */
+static void intel_cqm_setup_event(struct perf_event *event,
+                                 struct perf_event **group)
+{
+       struct perf_event *iter;
+       bool conflict = false;
+       u32 rmid;
+
+       list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+               rmid = iter->hw.cqm_rmid;
+
+               if (__match_event(iter, event)) {
+                       /* All tasks in a group share an RMID */
+                       event->hw.cqm_rmid = rmid;
+                       *group = iter;
+                       return;
+               }
+
+               /*
+                * We only care about conflicts for events that are
+                * actually scheduled in (and hence have a valid RMID).
+                */
+               if (__conflict_event(iter, event) && __rmid_valid(rmid))
+                       conflict = true;
+       }
+
+       if (conflict)
+               rmid = INVALID_RMID;
+       else
+               rmid = __get_rmid();
+
+       event->hw.cqm_rmid = rmid;
+}
+
+static void intel_cqm_event_read(struct perf_event *event)
+{
+       unsigned long flags;
+       u32 rmid;
+       u64 val;
+
+       /*
+        * Task events are handled by intel_cqm_event_count().
+        */
+       if (event->cpu == -1)
+               return;
+
+       raw_spin_lock_irqsave(&cache_lock, flags);
+       rmid = event->hw.cqm_rmid;
+
+       if (!__rmid_valid(rmid))
+               goto out;
+
+       val = __rmid_read(rmid);
+
+       /*
+        * Ignore this reading on error states and do not update the value.
+        */
+       if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+               goto out;
+
+       local64_set(&event->count, val);
+out:
+       raw_spin_unlock_irqrestore(&cache_lock, flags);
+}
+
+static void __intel_cqm_event_count(void *info)
+{
+       struct rmid_read *rr = info;
+       u64 val;
+
+       val = __rmid_read(rr->rmid);
+
+       if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+               return;
+
+       atomic64_add(val, &rr->value);
+}
+
+static inline bool cqm_group_leader(struct perf_event *event)
+{
+       return !list_empty(&event->hw.cqm_groups_entry);
+}
+
+static u64 intel_cqm_event_count(struct perf_event *event)
+{
+       unsigned long flags;
+       struct rmid_read rr = {
+               .value = ATOMIC64_INIT(0),
+       };
+
+       /*
+        * We only need to worry about task events. System-wide events
+        * are handled like usual, i.e. entirely with
+        * intel_cqm_event_read().
+        */
+       if (event->cpu != -1)
+               return __perf_event_count(event);
+
+       /*
+        * Only the group leader gets to report values. This stops us
+        * reporting duplicate values to userspace, and gives us a clear
+        * rule for which task gets to report the values.
+        *
+        * Note that it is impossible to attribute these values to
+        * specific packages - we forfeit that ability when we create
+        * task events.
+        */
+       if (!cqm_group_leader(event))
+               return 0;
+
+       /*
+        * Getting up-to-date values requires an SMP IPI which is not
+        * possible if we're being called in interrupt context. Return
+        * the cached values instead.
+        */
+       if (unlikely(in_interrupt()))
+               goto out;
+
+       /*
+        * Notice that we don't perform the reading of an RMID
+        * atomically, because we can't hold a spin lock across the
+        * IPIs.
+        *
+        * Speculatively perform the read, since @event might be
+        * assigned a different (possibly invalid) RMID while we're
+        * busying performing the IPI calls. It's therefore necessary to
+        * check @event's RMID afterwards, and if it has changed,
+        * discard the result of the read.
+        */
+       rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
+
+       if (!__rmid_valid(rr.rmid))
+               goto out;
+
+       on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
+
+       raw_spin_lock_irqsave(&cache_lock, flags);
+       if (event->hw.cqm_rmid == rr.rmid)
+               local64_set(&event->count, atomic64_read(&rr.value));
+       raw_spin_unlock_irqrestore(&cache_lock, flags);
+out:
+       return __perf_event_count(event);
+}
+
+static void intel_cqm_event_start(struct perf_event *event, int mode)
+{
+       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+       u32 rmid = event->hw.cqm_rmid;
+
+       if (!(event->hw.cqm_state & PERF_HES_STOPPED))
+               return;
+
+       event->hw.cqm_state &= ~PERF_HES_STOPPED;
+
+       if (state->rmid_usecnt++) {
+               if (!WARN_ON_ONCE(state->rmid != rmid))
+                       return;
+       } else {
+               WARN_ON_ONCE(state->rmid);
+       }
+
+       state->rmid = rmid;
+       wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+}
+
+static void intel_cqm_event_stop(struct perf_event *event, int mode)
+{
+       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+
+       if (event->hw.cqm_state & PERF_HES_STOPPED)
+               return;
+
+       event->hw.cqm_state |= PERF_HES_STOPPED;
+
+       intel_cqm_event_read(event);
+
+       if (!--state->rmid_usecnt) {
+               state->rmid = 0;
+               wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
+       } else {
+               WARN_ON_ONCE(!state->rmid);
+       }
+}
+
+static int intel_cqm_event_add(struct perf_event *event, int mode)
+{
+       unsigned long flags;
+       u32 rmid;
+
+       raw_spin_lock_irqsave(&cache_lock, flags);
+
+       event->hw.cqm_state = PERF_HES_STOPPED;
+       rmid = event->hw.cqm_rmid;
+
+       if (__rmid_valid(rmid) && (mode & PERF_EF_START))
+               intel_cqm_event_start(event, mode);
+
+       raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+       return 0;
+}
+
+static void intel_cqm_event_destroy(struct perf_event *event)
+{
+       struct perf_event *group_other = NULL;
+
+       mutex_lock(&cache_mutex);
+
+       /*
+        * If there's another event in this group...
+        */
+       if (!list_empty(&event->hw.cqm_group_entry)) {
+               group_other = list_first_entry(&event->hw.cqm_group_entry,
+                                              struct perf_event,
+                                              hw.cqm_group_entry);
+               list_del(&event->hw.cqm_group_entry);
+       }
+
+       /*
+        * And we're the group leader..
+        */
+       if (cqm_group_leader(event)) {
+               /*
+                * If there was a group_other, make that leader, otherwise
+                * destroy the group and return the RMID.
+                */
+               if (group_other) {
+                       list_replace(&event->hw.cqm_groups_entry,
+                                    &group_other->hw.cqm_groups_entry);
+               } else {
+                       u32 rmid = event->hw.cqm_rmid;
+
+                       if (__rmid_valid(rmid))
+                               __put_rmid(rmid);
+                       list_del(&event->hw.cqm_groups_entry);
+               }
+       }
+
+       mutex_unlock(&cache_mutex);
+}
+
+static int intel_cqm_event_init(struct perf_event *event)
+{
+       struct perf_event *group = NULL;
+       bool rotate = false;
+
+       if (event->attr.type != intel_cqm_pmu.type)
+               return -ENOENT;
+
+       if (event->attr.config & ~QOS_EVENT_MASK)
+               return -EINVAL;
+
+       /* unsupported modes and filters */
+       if (event->attr.exclude_user   ||
+           event->attr.exclude_kernel ||
+           event->attr.exclude_hv     ||
+           event->attr.exclude_idle   ||
+           event->attr.exclude_host   ||
+           event->attr.exclude_guest  ||
+           event->attr.sample_period) /* no sampling */
+               return -EINVAL;
+
+       INIT_LIST_HEAD(&event->hw.cqm_group_entry);
+       INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
+
+       event->destroy = intel_cqm_event_destroy;
+
+       mutex_lock(&cache_mutex);
+
+       /* Will also set rmid */
+       intel_cqm_setup_event(event, &group);
+
+       if (group) {
+               list_add_tail(&event->hw.cqm_group_entry,
+                             &group->hw.cqm_group_entry);
+       } else {
+               list_add_tail(&event->hw.cqm_groups_entry,
+                             &cache_groups);
+
+               /*
+                * All RMIDs are either in use or have recently been
+                * used. Kick the rotation worker to clean/free some.
+                *
+                * We only do this for the group leader, rather than for
+                * every event in a group to save on needless work.
+                */
+               if (!__rmid_valid(event->hw.cqm_rmid))
+                       rotate = true;
+       }
+
+       mutex_unlock(&cache_mutex);
+
+       if (rotate)
+               schedule_delayed_work(&intel_cqm_rmid_work, 0);
+
+       return 0;
+}
+
+EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01");
+EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1");
+EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
+EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
+EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
+
+static struct attribute *intel_cqm_events_attr[] = {
+       EVENT_PTR(intel_cqm_llc),
+       EVENT_PTR(intel_cqm_llc_pkg),
+       EVENT_PTR(intel_cqm_llc_unit),
+       EVENT_PTR(intel_cqm_llc_scale),
+       EVENT_PTR(intel_cqm_llc_snapshot),
+       NULL,
+};
+
+static struct attribute_group intel_cqm_events_group = {
+       .name = "events",
+       .attrs = intel_cqm_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+static struct attribute *intel_cqm_formats_attr[] = {
+       &format_attr_event.attr,
+       NULL,
+};
+
+static struct attribute_group intel_cqm_format_group = {
+       .name = "format",
+       .attrs = intel_cqm_formats_attr,
+};
+
+static ssize_t
+max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
+                          char *page)
+{
+       ssize_t rv;
+
+       mutex_lock(&cache_mutex);
+       rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
+       mutex_unlock(&cache_mutex);
+
+       return rv;
+}
+
+static ssize_t
+max_recycle_threshold_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
+{
+       unsigned int bytes, cachelines;
+       int ret;
+
+       ret = kstrtouint(buf, 0, &bytes);
+       if (ret)
+               return ret;
+
+       mutex_lock(&cache_mutex);
+
+       __intel_cqm_max_threshold = bytes;
+       cachelines = bytes / cqm_l3_scale;
+
+       /*
+        * The new maximum takes effect immediately.
+        */
+       if (__intel_cqm_threshold > cachelines)
+               __intel_cqm_threshold = cachelines;
+
+       mutex_unlock(&cache_mutex);
+
+       return count;
+}
+
+static DEVICE_ATTR_RW(max_recycle_threshold);
+
+static struct attribute *intel_cqm_attrs[] = {
+       &dev_attr_max_recycle_threshold.attr,
+       NULL,
+};
+
+static const struct attribute_group intel_cqm_group = {
+       .attrs = intel_cqm_attrs,
+};
+
+static const struct attribute_group *intel_cqm_attr_groups[] = {
+       &intel_cqm_events_group,
+       &intel_cqm_format_group,
+       &intel_cqm_group,
+       NULL,
+};
+
+static struct pmu intel_cqm_pmu = {
+       .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
+       .attr_groups         = intel_cqm_attr_groups,
+       .task_ctx_nr         = perf_sw_context,
+       .event_init          = intel_cqm_event_init,
+       .add                 = intel_cqm_event_add,
+       .del                 = intel_cqm_event_stop,
+       .start               = intel_cqm_event_start,
+       .stop                = intel_cqm_event_stop,
+       .read                = intel_cqm_event_read,
+       .count               = intel_cqm_event_count,
+};
+
+static inline void cqm_pick_event_reader(int cpu)
+{
+       int reader;
+
+       /* First online cpu in package becomes the reader */
+       reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu));
+       if (reader >= nr_cpu_ids)
+               cpumask_set_cpu(cpu, &cqm_cpumask);
+}
+
+static void intel_cqm_cpu_starting(unsigned int cpu)
+{
+       struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
+       struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+       state->rmid = 0;
+       state->closid = 0;
+       state->rmid_usecnt = 0;
+
+       WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
+       WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
+}
+
+static void intel_cqm_cpu_exit(unsigned int cpu)
+{
+       int target;
+
+       /* Is @cpu the current cqm reader for this package ? */
+       if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
+               return;
+
+       /* Find another online reader in this package */
+       target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+
+       if (target < nr_cpu_ids)
+               cpumask_set_cpu(target, &cqm_cpumask);
+}
+
+static int intel_cqm_cpu_notifier(struct notifier_block *nb,
+                                 unsigned long action, void *hcpu)
+{
+       unsigned int cpu  = (unsigned long)hcpu;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_DOWN_PREPARE:
+               intel_cqm_cpu_exit(cpu);
+               break;
+       case CPU_STARTING:
+               intel_cqm_cpu_starting(cpu);
+               cqm_pick_event_reader(cpu);
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static const struct x86_cpu_id intel_cqm_match[] = {
+       { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC },
+       {}
+};
+
+static int __init intel_cqm_init(void)
+{
+       char *str, scale[20];
+       int i, cpu, ret;
+
+       if (!x86_match_cpu(intel_cqm_match))
+               return -ENODEV;
+
+       cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
+
+       /*
+        * It's possible that not all resources support the same number
+        * of RMIDs. Instead of making scheduling much more complicated
+        * (where we have to match a task's RMID to a cpu that supports
+        * that many RMIDs) just find the minimum RMIDs supported across
+        * all cpus.
+        *
+        * Also, check that the scales match on all cpus.
+        */
+       cpu_notifier_register_begin();
+
+       for_each_online_cpu(cpu) {
+               struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+               if (c->x86_cache_max_rmid < cqm_max_rmid)
+                       cqm_max_rmid = c->x86_cache_max_rmid;
+
+               if (c->x86_cache_occ_scale != cqm_l3_scale) {
+                       pr_err("Multiple LLC scale values, disabling\n");
+                       ret = -EINVAL;
+                       goto out;
+               }
+       }
+
+       /*
+        * A reasonable upper limit on the max threshold is the number
+        * of lines tagged per RMID if all RMIDs have the same number of
+        * lines tagged in the LLC.
+        *
+        * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
+        */
+       __intel_cqm_max_threshold =
+               boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
+
+       snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
+       str = kstrdup(scale, GFP_KERNEL);
+       if (!str) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       event_attr_intel_cqm_llc_scale.event_str = str;
+
+       ret = intel_cqm_setup_rmid_cache();
+       if (ret)
+               goto out;
+
+       for_each_online_cpu(i) {
+               intel_cqm_cpu_starting(i);
+               cqm_pick_event_reader(i);
+       }
+
+       __perf_cpu_notifier(intel_cqm_cpu_notifier);
+
+       ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
+       if (ret)
+               pr_err("Intel CQM perf registration failed: %d\n", ret);
+       else
+               pr_info("Intel CQM monitoring enabled\n");
+
+out:
+       cpu_notifier_register_done();
+
+       return ret;
+}
+device_initcall(intel_cqm_init);
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
new file mode 100644 (file)
index 0000000..7946c42
--- /dev/null
@@ -0,0 +1,694 @@
+/*
+ * perf_event_intel_cstate.c: support cstate residency counters
+ *
+ * Copyright (C) 2015, Intel Corp.
+ * Author: Kan Liang (kan.liang@intel.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ */
+
+/*
+ * This file export cstate related free running (read-only) counters
+ * for perf. These counters may be use simultaneously by other tools,
+ * such as turbostat. However, it still make sense to implement them
+ * in perf. Because we can conveniently collect them together with
+ * other events, and allow to use them from tools without special MSR
+ * access code.
+ *
+ * The events only support system-wide mode counting. There is no
+ * sampling support because it is not supported by the hardware.
+ *
+ * According to counters' scope and category, two PMUs are registered
+ * with the perf_event core subsystem.
+ *  - 'cstate_core': The counter is available for each physical core.
+ *    The counters include CORE_C*_RESIDENCY.
+ *  - 'cstate_pkg': The counter is available for each physical package.
+ *    The counters include PKG_C*_RESIDENCY.
+ *
+ * All of these counters are specified in the Intel® 64 and IA-32
+ * Architectures Software Developer.s Manual Vol3b.
+ *
+ * Model specific counters:
+ *     MSR_CORE_C1_RES: CORE C1 Residency Counter
+ *                      perf code: 0x00
+ *                      Available model: SLM,AMT
+ *                      Scope: Core (each processor core has a MSR)
+ *     MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
+ *                            perf code: 0x01
+ *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Core
+ *     MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
+ *                            perf code: 0x02
+ *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Core
+ *     MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
+ *                            perf code: 0x03
+ *                            Available model: SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Core
+ *     MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
+ *                            perf code: 0x00
+ *                            Available model: SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Package (physical package)
+ *     MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
+ *                            perf code: 0x01
+ *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Package (physical package)
+ *     MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
+ *                            perf code: 0x02
+ *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Package (physical package)
+ *     MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
+ *                            perf code: 0x03
+ *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Package (physical package)
+ *     MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
+ *                            perf code: 0x04
+ *                            Available model: HSW ULT only
+ *                            Scope: Package (physical package)
+ *     MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
+ *                            perf code: 0x05
+ *                            Available model: HSW ULT only
+ *                            Scope: Package (physical package)
+ *     MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
+ *                            perf code: 0x06
+ *                            Available model: HSW ULT only
+ *                            Scope: Package (physical package)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)                \
+static ssize_t __cstate_##_var##_show(struct kobject *kobj,    \
+                               struct kobj_attribute *attr,    \
+                               char *page)                     \
+{                                                              \
+       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);             \
+       return sprintf(page, _format "\n");                     \
+}                                                              \
+static struct kobj_attribute format_attr_##_var =              \
+       __ATTR(_name, 0444, __cstate_##_var##_show, NULL)
+
+static ssize_t cstate_get_attr_cpumask(struct device *dev,
+                                      struct device_attribute *attr,
+                                      char *buf);
+
+struct perf_cstate_msr {
+       u64     msr;
+       struct  perf_pmu_events_attr *attr;
+       bool    (*test)(int idx);
+};
+
+
+/* cstate_core PMU */
+
+static struct pmu cstate_core_pmu;
+static bool has_cstate_core;
+
+enum perf_cstate_core_id {
+       /*
+        * cstate_core events
+        */
+       PERF_CSTATE_CORE_C1_RES = 0,
+       PERF_CSTATE_CORE_C3_RES,
+       PERF_CSTATE_CORE_C6_RES,
+       PERF_CSTATE_CORE_C7_RES,
+
+       PERF_CSTATE_CORE_EVENT_MAX,
+};
+
+bool test_core(int idx)
+{
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+           boot_cpu_data.x86 != 6)
+               return false;
+
+       switch (boot_cpu_data.x86_model) {
+       case 30: /* 45nm Nehalem    */
+       case 26: /* 45nm Nehalem-EP */
+       case 46: /* 45nm Nehalem-EX */
+
+       case 37: /* 32nm Westmere    */
+       case 44: /* 32nm Westmere-EP */
+       case 47: /* 32nm Westmere-EX */
+               if (idx == PERF_CSTATE_CORE_C3_RES ||
+                   idx == PERF_CSTATE_CORE_C6_RES)
+                       return true;
+               break;
+       case 42: /* 32nm SandyBridge         */
+       case 45: /* 32nm SandyBridge-E/EN/EP */
+
+       case 58: /* 22nm IvyBridge       */
+       case 62: /* 22nm IvyBridge-EP/EX */
+
+       case 60: /* 22nm Haswell Core */
+       case 63: /* 22nm Haswell Server */
+       case 69: /* 22nm Haswell ULT */
+       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+       case 61: /* 14nm Broadwell Core-M */
+       case 86: /* 14nm Broadwell Xeon D */
+       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+       case 79: /* 14nm Broadwell Server */
+
+       case 78: /* 14nm Skylake Mobile */
+       case 94: /* 14nm Skylake Desktop */
+               if (idx == PERF_CSTATE_CORE_C3_RES ||
+                   idx == PERF_CSTATE_CORE_C6_RES ||
+                   idx == PERF_CSTATE_CORE_C7_RES)
+                       return true;
+               break;
+       case 55: /* 22nm Atom "Silvermont"                */
+       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+       case 76: /* 14nm Atom "Airmont"                   */
+               if (idx == PERF_CSTATE_CORE_C1_RES ||
+                   idx == PERF_CSTATE_CORE_C6_RES)
+                       return true;
+               break;
+       }
+
+       return false;
+}
+
+PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
+
+static struct perf_cstate_msr core_msr[] = {
+       [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,          &evattr_cstate_core_c1, test_core, },
+       [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,    &evattr_cstate_core_c3, test_core, },
+       [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,    &evattr_cstate_core_c6, test_core, },
+       [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,    &evattr_cstate_core_c7, test_core, },
+};
+
+static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
+       NULL,
+};
+
+static struct attribute_group core_events_attr_group = {
+       .name = "events",
+       .attrs = core_events_attrs,
+};
+
+DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
+static struct attribute *core_format_attrs[] = {
+       &format_attr_core_event.attr,
+       NULL,
+};
+
+static struct attribute_group core_format_attr_group = {
+       .name = "format",
+       .attrs = core_format_attrs,
+};
+
+static cpumask_t cstate_core_cpu_mask;
+static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
+
+static struct attribute *cstate_cpumask_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+       .attrs = cstate_cpumask_attrs,
+};
+
+static const struct attribute_group *core_attr_groups[] = {
+       &core_events_attr_group,
+       &core_format_attr_group,
+       &cpumask_attr_group,
+       NULL,
+};
+
+/* cstate_core PMU end */
+
+
+/* cstate_pkg PMU */
+
+static struct pmu cstate_pkg_pmu;
+static bool has_cstate_pkg;
+
+enum perf_cstate_pkg_id {
+       /*
+        * cstate_pkg events
+        */
+       PERF_CSTATE_PKG_C2_RES = 0,
+       PERF_CSTATE_PKG_C3_RES,
+       PERF_CSTATE_PKG_C6_RES,
+       PERF_CSTATE_PKG_C7_RES,
+       PERF_CSTATE_PKG_C8_RES,
+       PERF_CSTATE_PKG_C9_RES,
+       PERF_CSTATE_PKG_C10_RES,
+
+       PERF_CSTATE_PKG_EVENT_MAX,
+};
+
+bool test_pkg(int idx)
+{
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+           boot_cpu_data.x86 != 6)
+               return false;
+
+       switch (boot_cpu_data.x86_model) {
+       case 30: /* 45nm Nehalem    */
+       case 26: /* 45nm Nehalem-EP */
+       case 46: /* 45nm Nehalem-EX */
+
+       case 37: /* 32nm Westmere    */
+       case 44: /* 32nm Westmere-EP */
+       case 47: /* 32nm Westmere-EX */
+               if (idx == PERF_CSTATE_CORE_C3_RES ||
+                   idx == PERF_CSTATE_CORE_C6_RES ||
+                   idx == PERF_CSTATE_CORE_C7_RES)
+                       return true;
+               break;
+       case 42: /* 32nm SandyBridge         */
+       case 45: /* 32nm SandyBridge-E/EN/EP */
+
+       case 58: /* 22nm IvyBridge       */
+       case 62: /* 22nm IvyBridge-EP/EX */
+
+       case 60: /* 22nm Haswell Core */
+       case 63: /* 22nm Haswell Server */
+       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+       case 61: /* 14nm Broadwell Core-M */
+       case 86: /* 14nm Broadwell Xeon D */
+       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+       case 79: /* 14nm Broadwell Server */
+
+       case 78: /* 14nm Skylake Mobile */
+       case 94: /* 14nm Skylake Desktop */
+               if (idx == PERF_CSTATE_PKG_C2_RES ||
+                   idx == PERF_CSTATE_PKG_C3_RES ||
+                   idx == PERF_CSTATE_PKG_C6_RES ||
+                   idx == PERF_CSTATE_PKG_C7_RES)
+                       return true;
+               break;
+       case 55: /* 22nm Atom "Silvermont"                */
+       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+       case 76: /* 14nm Atom "Airmont"                   */
+               if (idx == PERF_CSTATE_CORE_C6_RES)
+                       return true;
+               break;
+       case 69: /* 22nm Haswell ULT */
+               if (idx == PERF_CSTATE_PKG_C2_RES ||
+                   idx == PERF_CSTATE_PKG_C3_RES ||
+                   idx == PERF_CSTATE_PKG_C6_RES ||
+                   idx == PERF_CSTATE_PKG_C7_RES ||
+                   idx == PERF_CSTATE_PKG_C8_RES ||
+                   idx == PERF_CSTATE_PKG_C9_RES ||
+                   idx == PERF_CSTATE_PKG_C10_RES)
+                       return true;
+               break;
+       }
+
+       return false;
+}
+
+PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
+PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
+PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
+
+static struct perf_cstate_msr pkg_msr[] = {
+       [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,      &evattr_cstate_pkg_c2,  test_pkg, },
+       [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,      &evattr_cstate_pkg_c3,  test_pkg, },
+       [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,      &evattr_cstate_pkg_c6,  test_pkg, },
+       [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,      &evattr_cstate_pkg_c7,  test_pkg, },
+       [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,      &evattr_cstate_pkg_c8,  test_pkg, },
+       [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,      &evattr_cstate_pkg_c9,  test_pkg, },
+       [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,    &evattr_cstate_pkg_c10, test_pkg, },
+};
+
+static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
+       NULL,
+};
+
+static struct attribute_group pkg_events_attr_group = {
+       .name = "events",
+       .attrs = pkg_events_attrs,
+};
+
+DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
+static struct attribute *pkg_format_attrs[] = {
+       &format_attr_pkg_event.attr,
+       NULL,
+};
+static struct attribute_group pkg_format_attr_group = {
+       .name = "format",
+       .attrs = pkg_format_attrs,
+};
+
+static cpumask_t cstate_pkg_cpu_mask;
+
+static const struct attribute_group *pkg_attr_groups[] = {
+       &pkg_events_attr_group,
+       &pkg_format_attr_group,
+       &cpumask_attr_group,
+       NULL,
+};
+
+/* cstate_pkg PMU end*/
+
+static ssize_t cstate_get_attr_cpumask(struct device *dev,
+                                      struct device_attribute *attr,
+                                      char *buf)
+{
+       struct pmu *pmu = dev_get_drvdata(dev);
+
+       if (pmu == &cstate_core_pmu)
+               return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
+       else if (pmu == &cstate_pkg_pmu)
+               return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
+       else
+               return 0;
+}
+
+static int cstate_pmu_event_init(struct perf_event *event)
+{
+       u64 cfg = event->attr.config;
+       int ret = 0;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       /* unsupported modes and filters */
+       if (event->attr.exclude_user   ||
+           event->attr.exclude_kernel ||
+           event->attr.exclude_hv     ||
+           event->attr.exclude_idle   ||
+           event->attr.exclude_host   ||
+           event->attr.exclude_guest  ||
+           event->attr.sample_period) /* no sampling */
+               return -EINVAL;
+
+       if (event->pmu == &cstate_core_pmu) {
+               if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
+                       return -EINVAL;
+               if (!core_msr[cfg].attr)
+                       return -EINVAL;
+               event->hw.event_base = core_msr[cfg].msr;
+       } else if (event->pmu == &cstate_pkg_pmu) {
+               if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
+                       return -EINVAL;
+               if (!pkg_msr[cfg].attr)
+                       return -EINVAL;
+               event->hw.event_base = pkg_msr[cfg].msr;
+       } else
+               return -ENOENT;
+
+       /* must be done before validate_group */
+       event->hw.config = cfg;
+       event->hw.idx = -1;
+
+       return ret;
+}
+
+static inline u64 cstate_pmu_read_counter(struct perf_event *event)
+{
+       u64 val;
+
+       rdmsrl(event->hw.event_base, val);
+       return val;
+}
+
+static void cstate_pmu_event_update(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 prev_raw_count, new_raw_count;
+
+again:
+       prev_raw_count = local64_read(&hwc->prev_count);
+       new_raw_count = cstate_pmu_read_counter(event);
+
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                           new_raw_count) != prev_raw_count)
+               goto again;
+
+       local64_add(new_raw_count - prev_raw_count, &event->count);
+}
+
+static void cstate_pmu_event_start(struct perf_event *event, int mode)
+{
+       local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
+}
+
+static void cstate_pmu_event_stop(struct perf_event *event, int mode)
+{
+       cstate_pmu_event_update(event);
+}
+
+static void cstate_pmu_event_del(struct perf_event *event, int mode)
+{
+       cstate_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int cstate_pmu_event_add(struct perf_event *event, int mode)
+{
+       if (mode & PERF_EF_START)
+               cstate_pmu_event_start(event, mode);
+
+       return 0;
+}
+
+static void cstate_cpu_exit(int cpu)
+{
+       int i, id, target;
+
+       /* cpu exit for cstate core */
+       if (has_cstate_core) {
+               id = topology_core_id(cpu);
+               target = -1;
+
+               for_each_online_cpu(i) {
+                       if (i == cpu)
+                               continue;
+                       if (id == topology_core_id(i)) {
+                               target = i;
+                               break;
+                       }
+               }
+               if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
+                       cpumask_set_cpu(target, &cstate_core_cpu_mask);
+               WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
+               if (target >= 0)
+                       perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+       }
+
+       /* cpu exit for cstate pkg */
+       if (has_cstate_pkg) {
+               id = topology_physical_package_id(cpu);
+               target = -1;
+
+               for_each_online_cpu(i) {
+                       if (i == cpu)
+                               continue;
+                       if (id == topology_physical_package_id(i)) {
+                               target = i;
+                               break;
+                       }
+               }
+               if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
+                       cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
+               WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
+               if (target >= 0)
+                       perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+       }
+}
+
+static void cstate_cpu_init(int cpu)
+{
+       int i, id;
+
+       /* cpu init for cstate core */
+       if (has_cstate_core) {
+               id = topology_core_id(cpu);
+               for_each_cpu(i, &cstate_core_cpu_mask) {
+                       if (id == topology_core_id(i))
+                               break;
+               }
+               if (i >= nr_cpu_ids)
+                       cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+       }
+
+       /* cpu init for cstate pkg */
+       if (has_cstate_pkg) {
+               id = topology_physical_package_id(cpu);
+               for_each_cpu(i, &cstate_pkg_cpu_mask) {
+                       if (id == topology_physical_package_id(i))
+                               break;
+               }
+               if (i >= nr_cpu_ids)
+                       cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
+       }
+}
+
+static int cstate_cpu_notifier(struct notifier_block *self,
+                                 unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (long)hcpu;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_UP_PREPARE:
+               break;
+       case CPU_STARTING:
+               cstate_cpu_init(cpu);
+               break;
+       case CPU_UP_CANCELED:
+       case CPU_DYING:
+               break;
+       case CPU_ONLINE:
+       case CPU_DEAD:
+               break;
+       case CPU_DOWN_PREPARE:
+               cstate_cpu_exit(cpu);
+               break;
+       default:
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+/*
+ * Probe the cstate events and insert the available one into sysfs attrs
+ * Return false if there is no available events.
+ */
+static bool cstate_probe_msr(struct perf_cstate_msr *msr,
+                            struct attribute   **events_attrs,
+                            int max_event_nr)
+{
+       int i, j = 0;
+       u64 val;
+
+       /* Probe the cstate events. */
+       for (i = 0; i < max_event_nr; i++) {
+               if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
+                       msr[i].attr = NULL;
+       }
+
+       /* List remaining events in the sysfs attrs. */
+       for (i = 0; i < max_event_nr; i++) {
+               if (msr[i].attr)
+                       events_attrs[j++] = &msr[i].attr->attr.attr;
+       }
+       events_attrs[j] = NULL;
+
+       return (j > 0) ? true : false;
+}
+
+static int __init cstate_init(void)
+{
+       /* SLM has different MSR for PKG C6 */
+       switch (boot_cpu_data.x86_model) {
+       case 55:
+       case 76:
+       case 77:
+               pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
+       }
+
+       if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
+               has_cstate_core = true;
+
+       if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
+               has_cstate_pkg = true;
+
+       return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
+}
+
+static void __init cstate_cpumask_init(void)
+{
+       int cpu;
+
+       cpu_notifier_register_begin();
+
+       for_each_online_cpu(cpu)
+               cstate_cpu_init(cpu);
+
+       __perf_cpu_notifier(cstate_cpu_notifier);
+
+       cpu_notifier_register_done();
+}
+
+static struct pmu cstate_core_pmu = {
+       .attr_groups    = core_attr_groups,
+       .name           = "cstate_core",
+       .task_ctx_nr    = perf_invalid_context,
+       .event_init     = cstate_pmu_event_init,
+       .add            = cstate_pmu_event_add, /* must have */
+       .del            = cstate_pmu_event_del, /* must have */
+       .start          = cstate_pmu_event_start,
+       .stop           = cstate_pmu_event_stop,
+       .read           = cstate_pmu_event_update,
+       .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static struct pmu cstate_pkg_pmu = {
+       .attr_groups    = pkg_attr_groups,
+       .name           = "cstate_pkg",
+       .task_ctx_nr    = perf_invalid_context,
+       .event_init     = cstate_pmu_event_init,
+       .add            = cstate_pmu_event_add, /* must have */
+       .del            = cstate_pmu_event_del, /* must have */
+       .start          = cstate_pmu_event_start,
+       .stop           = cstate_pmu_event_stop,
+       .read           = cstate_pmu_event_update,
+       .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static void __init cstate_pmus_register(void)
+{
+       int err;
+
+       if (has_cstate_core) {
+               err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
+               if (WARN_ON(err))
+                       pr_info("Failed to register PMU %s error %d\n",
+                               cstate_core_pmu.name, err);
+       }
+
+       if (has_cstate_pkg) {
+               err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
+               if (WARN_ON(err))
+                       pr_info("Failed to register PMU %s error %d\n",
+                               cstate_pkg_pmu.name, err);
+       }
+}
+
+static int __init cstate_pmu_init(void)
+{
+       int err;
+
+       if (cpu_has_hypervisor)
+               return -ENODEV;
+
+       err = cstate_init();
+       if (err)
+               return err;
+
+       cstate_cpumask_init();
+
+       cstate_pmus_register();
+
+       return 0;
+}
+
+device_initcall(cstate_pmu_init);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
new file mode 100644 (file)
index 0000000..ce7211a
--- /dev/null
@@ -0,0 +1,1410 @@
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include <asm/perf_event.h>
+#include <asm/insn.h>
+
+#include "../perf_event.h"
+
+/* The size of a BTS record in bytes: */
+#define BTS_RECORD_SIZE                24
+
+#define BTS_BUFFER_SIZE                (PAGE_SIZE << 4)
+#define PEBS_BUFFER_SIZE       (PAGE_SIZE << 4)
+#define PEBS_FIXUP_SIZE                PAGE_SIZE
+
+/*
+ * pebs_record_32 for p4 and core not supported
+
+struct pebs_record_32 {
+       u32 flags, ip;
+       u32 ax, bc, cx, dx;
+       u32 si, di, bp, sp;
+};
+
+ */
+
+union intel_x86_pebs_dse {
+       u64 val;
+       struct {
+               unsigned int ld_dse:4;
+               unsigned int ld_stlb_miss:1;
+               unsigned int ld_locked:1;
+               unsigned int ld_reserved:26;
+       };
+       struct {
+               unsigned int st_l1d_hit:1;
+               unsigned int st_reserved1:3;
+               unsigned int st_stlb_miss:1;
+               unsigned int st_locked:1;
+               unsigned int st_reserved2:26;
+       };
+};
+
+
+/*
+ * Map PEBS Load Latency Data Source encodings to generic
+ * memory data source information
+ */
+#define P(a, b) PERF_MEM_S(a, b)
+#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
+#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
+
+/* Version for Sandy Bridge and later */
+static u64 pebs_data_source[] = {
+       P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
+       OP_LH | P(LVL, L1)  | P(SNOOP, NONE),   /* 0x01: L1 local */
+       OP_LH | P(LVL, LFB) | P(SNOOP, NONE),   /* 0x02: LFB hit */
+       OP_LH | P(LVL, L2)  | P(SNOOP, NONE),   /* 0x03: L2 hit */
+       OP_LH | P(LVL, L3)  | P(SNOOP, NONE),   /* 0x04: L3 hit */
+       OP_LH | P(LVL, L3)  | P(SNOOP, MISS),   /* 0x05: L3 hit, snoop miss */
+       OP_LH | P(LVL, L3)  | P(SNOOP, HIT),    /* 0x06: L3 hit, snoop hit */
+       OP_LH | P(LVL, L3)  | P(SNOOP, HITM),   /* 0x07: L3 hit, snoop hitm */
+       OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
+       OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
+       OP_LH | P(LVL, LOC_RAM)  | P(SNOOP, HIT),  /* 0x0a: L3 miss, shared */
+       OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
+       OP_LH | P(LVL, LOC_RAM)  | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
+       OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
+       OP_LH | P(LVL, IO)  | P(SNOOP, NONE), /* 0x0e: I/O */
+       OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
+};
+
+/* Patch up minor differences in the bits */
+void __init intel_pmu_pebs_data_source_nhm(void)
+{
+       pebs_data_source[0x05] = OP_LH | P(LVL, L3)  | P(SNOOP, HIT);
+       pebs_data_source[0x06] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
+       pebs_data_source[0x07] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
+}
+
+static u64 precise_store_data(u64 status)
+{
+       union intel_x86_pebs_dse dse;
+       u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
+
+       dse.val = status;
+
+       /*
+        * bit 4: TLB access
+        * 1 = stored missed 2nd level TLB
+        *
+        * so it either hit the walker or the OS
+        * otherwise hit 2nd level TLB
+        */
+       if (dse.st_stlb_miss)
+               val |= P(TLB, MISS);
+       else
+               val |= P(TLB, HIT);
+
+       /*
+        * bit 0: hit L1 data cache
+        * if not set, then all we know is that
+        * it missed L1D
+        */
+       if (dse.st_l1d_hit)
+               val |= P(LVL, HIT);
+       else
+               val |= P(LVL, MISS);
+
+       /*
+        * bit 5: Locked prefix
+        */
+       if (dse.st_locked)
+               val |= P(LOCK, LOCKED);
+
+       return val;
+}
+
+static u64 precise_datala_hsw(struct perf_event *event, u64 status)
+{
+       union perf_mem_data_src dse;
+
+       dse.val = PERF_MEM_NA;
+
+       if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+               dse.mem_op = PERF_MEM_OP_STORE;
+       else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
+               dse.mem_op = PERF_MEM_OP_LOAD;
+
+       /*
+        * L1 info only valid for following events:
+        *
+        * MEM_UOPS_RETIRED.STLB_MISS_STORES
+        * MEM_UOPS_RETIRED.LOCK_STORES
+        * MEM_UOPS_RETIRED.SPLIT_STORES
+        * MEM_UOPS_RETIRED.ALL_STORES
+        */
+       if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
+               if (status & 1)
+                       dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+               else
+                       dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
+       }
+       return dse.val;
+}
+
+static u64 load_latency_data(u64 status)
+{
+       union intel_x86_pebs_dse dse;
+       u64 val;
+       int model = boot_cpu_data.x86_model;
+       int fam = boot_cpu_data.x86;
+
+       dse.val = status;
+
+       /*
+        * use the mapping table for bit 0-3
+        */
+       val = pebs_data_source[dse.ld_dse];
+
+       /*
+        * Nehalem models do not support TLB, Lock infos
+        */
+       if (fam == 0x6 && (model == 26 || model == 30
+           || model == 31 || model == 46)) {
+               val |= P(TLB, NA) | P(LOCK, NA);
+               return val;
+       }
+       /*
+        * bit 4: TLB access
+        * 0 = did not miss 2nd level TLB
+        * 1 = missed 2nd level TLB
+        */
+       if (dse.ld_stlb_miss)
+               val |= P(TLB, MISS) | P(TLB, L2);
+       else
+               val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+       /*
+        * bit 5: locked prefix
+        */
+       if (dse.ld_locked)
+               val |= P(LOCK, LOCKED);
+
+       return val;
+}
+
+struct pebs_record_core {
+       u64 flags, ip;
+       u64 ax, bx, cx, dx;
+       u64 si, di, bp, sp;
+       u64 r8,  r9,  r10, r11;
+       u64 r12, r13, r14, r15;
+};
+
+struct pebs_record_nhm {
+       u64 flags, ip;
+       u64 ax, bx, cx, dx;
+       u64 si, di, bp, sp;
+       u64 r8,  r9,  r10, r11;
+       u64 r12, r13, r14, r15;
+       u64 status, dla, dse, lat;
+};
+
+/*
+ * Same as pebs_record_nhm, with two additional fields.
+ */
+struct pebs_record_hsw {
+       u64 flags, ip;
+       u64 ax, bx, cx, dx;
+       u64 si, di, bp, sp;
+       u64 r8,  r9,  r10, r11;
+       u64 r12, r13, r14, r15;
+       u64 status, dla, dse, lat;
+       u64 real_ip, tsx_tuning;
+};
+
+union hsw_tsx_tuning {
+       struct {
+               u32 cycles_last_block     : 32,
+                   hle_abort             : 1,
+                   rtm_abort             : 1,
+                   instruction_abort     : 1,
+                   non_instruction_abort : 1,
+                   retry                 : 1,
+                   data_conflict         : 1,
+                   capacity_writes       : 1,
+                   capacity_reads        : 1;
+       };
+       u64         value;
+};
+
+#define PEBS_HSW_TSX_FLAGS     0xff00000000ULL
+
+/* Same as HSW, plus TSC */
+
+struct pebs_record_skl {
+       u64 flags, ip;
+       u64 ax, bx, cx, dx;
+       u64 si, di, bp, sp;
+       u64 r8,  r9,  r10, r11;
+       u64 r12, r13, r14, r15;
+       u64 status, dla, dse, lat;
+       u64 real_ip, tsx_tuning;
+       u64 tsc;
+};
+
+void init_debug_store_on_cpu(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+       if (!ds)
+               return;
+
+       wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
+                    (u32)((u64)(unsigned long)ds),
+                    (u32)((u64)(unsigned long)ds >> 32));
+}
+
+void fini_debug_store_on_cpu(int cpu)
+{
+       if (!per_cpu(cpu_hw_events, cpu).ds)
+               return;
+
+       wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
+}
+
+static DEFINE_PER_CPU(void *, insn_buffer);
+
+static int alloc_pebs_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       int node = cpu_to_node(cpu);
+       int max;
+       void *buffer, *ibuffer;
+
+       if (!x86_pmu.pebs)
+               return 0;
+
+       buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+       if (unlikely(!buffer))
+               return -ENOMEM;
+
+       /*
+        * HSW+ already provides us the eventing ip; no need to allocate this
+        * buffer then.
+        */
+       if (x86_pmu.intel_cap.pebs_format < 2) {
+               ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+               if (!ibuffer) {
+                       kfree(buffer);
+                       return -ENOMEM;
+               }
+               per_cpu(insn_buffer, cpu) = ibuffer;
+       }
+
+       max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
+
+       ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+       ds->pebs_index = ds->pebs_buffer_base;
+       ds->pebs_absolute_maximum = ds->pebs_buffer_base +
+               max * x86_pmu.pebs_record_size;
+
+       return 0;
+}
+
+static void release_pebs_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+       if (!ds || !x86_pmu.pebs)
+               return;
+
+       kfree(per_cpu(insn_buffer, cpu));
+       per_cpu(insn_buffer, cpu) = NULL;
+
+       kfree((void *)(unsigned long)ds->pebs_buffer_base);
+       ds->pebs_buffer_base = 0;
+}
+
+static int alloc_bts_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       int node = cpu_to_node(cpu);
+       int max, thresh;
+       void *buffer;
+
+       if (!x86_pmu.bts)
+               return 0;
+
+       buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+       if (unlikely(!buffer)) {
+               WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
+               return -ENOMEM;
+       }
+
+       max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
+       thresh = max / 16;
+
+       ds->bts_buffer_base = (u64)(unsigned long)buffer;
+       ds->bts_index = ds->bts_buffer_base;
+       ds->bts_absolute_maximum = ds->bts_buffer_base +
+               max * BTS_RECORD_SIZE;
+       ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
+               thresh * BTS_RECORD_SIZE;
+
+       return 0;
+}
+
+static void release_bts_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+       if (!ds || !x86_pmu.bts)
+               return;
+
+       kfree((void *)(unsigned long)ds->bts_buffer_base);
+       ds->bts_buffer_base = 0;
+}
+
+static int alloc_ds_buffer(int cpu)
+{
+       int node = cpu_to_node(cpu);
+       struct debug_store *ds;
+
+       ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
+       if (unlikely(!ds))
+               return -ENOMEM;
+
+       per_cpu(cpu_hw_events, cpu).ds = ds;
+
+       return 0;
+}
+
+static void release_ds_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+       if (!ds)
+               return;
+
+       per_cpu(cpu_hw_events, cpu).ds = NULL;
+       kfree(ds);
+}
+
+void release_ds_buffers(void)
+{
+       int cpu;
+
+       if (!x86_pmu.bts && !x86_pmu.pebs)
+               return;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu)
+               fini_debug_store_on_cpu(cpu);
+
+       for_each_possible_cpu(cpu) {
+               release_pebs_buffer(cpu);
+               release_bts_buffer(cpu);
+               release_ds_buffer(cpu);
+       }
+       put_online_cpus();
+}
+
+void reserve_ds_buffers(void)
+{
+       int bts_err = 0, pebs_err = 0;
+       int cpu;
+
+       x86_pmu.bts_active = 0;
+       x86_pmu.pebs_active = 0;
+
+       if (!x86_pmu.bts && !x86_pmu.pebs)
+               return;
+
+       if (!x86_pmu.bts)
+               bts_err = 1;
+
+       if (!x86_pmu.pebs)
+               pebs_err = 1;
+
+       get_online_cpus();
+
+       for_each_possible_cpu(cpu) {
+               if (alloc_ds_buffer(cpu)) {
+                       bts_err = 1;
+                       pebs_err = 1;
+               }
+
+               if (!bts_err && alloc_bts_buffer(cpu))
+                       bts_err = 1;
+
+               if (!pebs_err && alloc_pebs_buffer(cpu))
+                       pebs_err = 1;
+
+               if (bts_err && pebs_err)
+                       break;
+       }
+
+       if (bts_err) {
+               for_each_possible_cpu(cpu)
+                       release_bts_buffer(cpu);
+       }
+
+       if (pebs_err) {
+               for_each_possible_cpu(cpu)
+                       release_pebs_buffer(cpu);
+       }
+
+       if (bts_err && pebs_err) {
+               for_each_possible_cpu(cpu)
+                       release_ds_buffer(cpu);
+       } else {
+               if (x86_pmu.bts && !bts_err)
+                       x86_pmu.bts_active = 1;
+
+               if (x86_pmu.pebs && !pebs_err)
+                       x86_pmu.pebs_active = 1;
+
+               for_each_online_cpu(cpu)
+                       init_debug_store_on_cpu(cpu);
+       }
+
+       put_online_cpus();
+}
+
+/*
+ * BTS
+ */
+
+struct event_constraint bts_constraint =
+       EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
+
+void intel_pmu_enable_bts(u64 config)
+{
+       unsigned long debugctlmsr;
+
+       debugctlmsr = get_debugctlmsr();
+
+       debugctlmsr |= DEBUGCTLMSR_TR;
+       debugctlmsr |= DEBUGCTLMSR_BTS;
+       if (config & ARCH_PERFMON_EVENTSEL_INT)
+               debugctlmsr |= DEBUGCTLMSR_BTINT;
+
+       if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+               debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
+
+       if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+               debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
+
+       update_debugctlmsr(debugctlmsr);
+}
+
+void intel_pmu_disable_bts(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       unsigned long debugctlmsr;
+
+       if (!cpuc->ds)
+               return;
+
+       debugctlmsr = get_debugctlmsr();
+
+       debugctlmsr &=
+               ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
+                 DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
+
+       update_debugctlmsr(debugctlmsr);
+}
+
+int intel_pmu_drain_bts_buffer(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct debug_store *ds = cpuc->ds;
+       struct bts_record {
+               u64     from;
+               u64     to;
+               u64     flags;
+       };
+       struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
+       struct bts_record *at, *base, *top;
+       struct perf_output_handle handle;
+       struct perf_event_header header;
+       struct perf_sample_data data;
+       unsigned long skip = 0;
+       struct pt_regs regs;
+
+       if (!event)
+               return 0;
+
+       if (!x86_pmu.bts_active)
+               return 0;
+
+       base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+       top  = (struct bts_record *)(unsigned long)ds->bts_index;
+
+       if (top <= base)
+               return 0;
+
+       memset(&regs, 0, sizeof(regs));
+
+       ds->bts_index = ds->bts_buffer_base;
+
+       perf_sample_data_init(&data, 0, event->hw.last_period);
+
+       /*
+        * BTS leaks kernel addresses in branches across the cpl boundary,
+        * such as traps or system calls, so unless the user is asking for
+        * kernel tracing (and right now it's not possible), we'd need to
+        * filter them out. But first we need to count how many of those we
+        * have in the current batch. This is an extra O(n) pass, however,
+        * it's much faster than the other one especially considering that
+        * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
+        * alloc_bts_buffer()).
+        */
+       for (at = base; at < top; at++) {
+               /*
+                * Note that right now *this* BTS code only works if
+                * attr::exclude_kernel is set, but let's keep this extra
+                * check here in case that changes.
+                */
+               if (event->attr.exclude_kernel &&
+                   (kernel_ip(at->from) || kernel_ip(at->to)))
+                       skip++;
+       }
+
+       /*
+        * Prepare a generic sample, i.e. fill in the invariant fields.
+        * We will overwrite the from and to address before we output
+        * the sample.
+        */
+       perf_prepare_sample(&header, &data, event, &regs);
+
+       if (perf_output_begin(&handle, event, header.size *
+                             (top - base - skip)))
+               return 1;
+
+       for (at = base; at < top; at++) {
+               /* Filter out any records that contain kernel addresses. */
+               if (event->attr.exclude_kernel &&
+                   (kernel_ip(at->from) || kernel_ip(at->to)))
+                       continue;
+
+               data.ip         = at->from;
+               data.addr       = at->to;
+
+               perf_output_sample(&handle, &header, &data, event);
+       }
+
+       perf_output_end(&handle);
+
+       /* There's new data available. */
+       event->hw.interrupts++;
+       event->pending_kill = POLL_IN;
+       return 1;
+}
+
+static inline void intel_pmu_drain_pebs_buffer(void)
+{
+       struct pt_regs regs;
+
+       x86_pmu.drain_pebs(&regs);
+}
+
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+       if (!sched_in)
+               intel_pmu_drain_pebs_buffer();
+}
+
+/*
+ * PEBS
+ */
+struct event_constraint intel_core2_pebs_event_constraints[] = {
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
+       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_atom_pebs_event_constraints[] = {
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
+       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_slm_pebs_event_constraints[] = {
+       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_nehalem_pebs_event_constraints[] = {
+       INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
+       INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
+       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_westmere_pebs_event_constraints[] = {
+       INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
+       INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
+       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_snb_pebs_event_constraints[] = {
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+       INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
+       INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
+       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
+        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_ivb_pebs_event_constraints[] = {
+        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
+       INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
+       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+        EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_hsw_pebs_event_constraints[] = {
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+       INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
+       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_bdw_pebs_event_constraints[] = {
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+       INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
+       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+       EVENT_CONSTRAINT_END
+};
+
+
+struct event_constraint intel_skl_pebs_event_constraints[] = {
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),      /* INST_RETIRED.PREC_DIST */
+       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+       /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+       INTEL_PLD_CONSTRAINT(0x1cd, 0xf),                     /* MEM_TRANS_RETIRED.* */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint *intel_pebs_constraints(struct perf_event *event)
+{
+       struct event_constraint *c;
+
+       if (!event->attr.precise_ip)
+               return NULL;
+
+       if (x86_pmu.pebs_constraints) {
+               for_each_event_constraint(c, x86_pmu.pebs_constraints) {
+                       if ((event->hw.config & c->cmask) == c->code) {
+                               event->hw.flags |= c->flags;
+                               return c;
+                       }
+               }
+       }
+
+       return &emptyconstraint;
+}
+
+static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+{
+       return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+}
+
+void intel_pmu_pebs_enable(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       struct debug_store *ds = cpuc->ds;
+       bool first_pebs;
+       u64 threshold;
+
+       hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
+
+       first_pebs = !pebs_is_enabled(cpuc);
+       cpuc->pebs_enabled |= 1ULL << hwc->idx;
+
+       if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+               cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
+       else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
+               cpuc->pebs_enabled |= 1ULL << 63;
+
+       /*
+        * When the event is constrained enough we can use a larger
+        * threshold and run the event with less frequent PMI.
+        */
+       if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
+               threshold = ds->pebs_absolute_maximum -
+                       x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+
+               if (first_pebs)
+                       perf_sched_cb_inc(event->ctx->pmu);
+       } else {
+               threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+
+               /*
+                * If not all events can use larger buffer,
+                * roll back to threshold = 1
+                */
+               if (!first_pebs &&
+                   (ds->pebs_interrupt_threshold > threshold))
+                       perf_sched_cb_dec(event->ctx->pmu);
+       }
+
+       /* Use auto-reload if possible to save a MSR write in the PMI */
+       if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+               ds->pebs_event_reset[hwc->idx] =
+                       (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
+       }
+
+       if (first_pebs || ds->pebs_interrupt_threshold > threshold)
+               ds->pebs_interrupt_threshold = threshold;
+}
+
+void intel_pmu_pebs_disable(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       struct debug_store *ds = cpuc->ds;
+       bool large_pebs = ds->pebs_interrupt_threshold >
+               ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+
+       if (large_pebs)
+               intel_pmu_drain_pebs_buffer();
+
+       cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+
+       if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+               cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
+       else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
+               cpuc->pebs_enabled &= ~(1ULL << 63);
+
+       if (large_pebs && !pebs_is_enabled(cpuc))
+               perf_sched_cb_dec(event->ctx->pmu);
+
+       if (cpuc->enabled)
+               wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+
+       hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
+}
+
+void intel_pmu_pebs_enable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (cpuc->pebs_enabled)
+               wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+}
+
+void intel_pmu_pebs_disable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (cpuc->pebs_enabled)
+               wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
+}
+
+static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       unsigned long from = cpuc->lbr_entries[0].from;
+       unsigned long old_to, to = cpuc->lbr_entries[0].to;
+       unsigned long ip = regs->ip;
+       int is_64bit = 0;
+       void *kaddr;
+       int size;
+
+       /*
+        * We don't need to fixup if the PEBS assist is fault like
+        */
+       if (!x86_pmu.intel_cap.pebs_trap)
+               return 1;
+
+       /*
+        * No LBR entry, no basic block, no rewinding
+        */
+       if (!cpuc->lbr_stack.nr || !from || !to)
+               return 0;
+
+       /*
+        * Basic blocks should never cross user/kernel boundaries
+        */
+       if (kernel_ip(ip) != kernel_ip(to))
+               return 0;
+
+       /*
+        * unsigned math, either ip is before the start (impossible) or
+        * the basic block is larger than 1 page (sanity)
+        */
+       if ((ip - to) > PEBS_FIXUP_SIZE)
+               return 0;
+
+       /*
+        * We sampled a branch insn, rewind using the LBR stack
+        */
+       if (ip == to) {
+               set_linear_ip(regs, from);
+               return 1;
+       }
+
+       size = ip - to;
+       if (!kernel_ip(ip)) {
+               int bytes;
+               u8 *buf = this_cpu_read(insn_buffer);
+
+               /* 'size' must fit our buffer, see above */
+               bytes = copy_from_user_nmi(buf, (void __user *)to, size);
+               if (bytes != 0)
+                       return 0;
+
+               kaddr = buf;
+       } else {
+               kaddr = (void *)to;
+       }
+
+       do {
+               struct insn insn;
+
+               old_to = to;
+
+#ifdef CONFIG_X86_64
+               is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
+#endif
+               insn_init(&insn, kaddr, size, is_64bit);
+               insn_get_length(&insn);
+               /*
+                * Make sure there was not a problem decoding the
+                * instruction and getting the length.  This is
+                * doubly important because we have an infinite
+                * loop if insn.length=0.
+                */
+               if (!insn.length)
+                       break;
+
+               to += insn.length;
+               kaddr += insn.length;
+               size -= insn.length;
+       } while (to < ip);
+
+       if (to == ip) {
+               set_linear_ip(regs, old_to);
+               return 1;
+       }
+
+       /*
+        * Even though we decoded the basic block, the instruction stream
+        * never matched the given IP, either the TO or the IP got corrupted.
+        */
+       return 0;
+}
+
+static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
+{
+       if (pebs->tsx_tuning) {
+               union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+               return tsx.cycles_last_block;
+       }
+       return 0;
+}
+
+static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
+{
+       u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
+
+       /* For RTM XABORTs also log the abort code from AX */
+       if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
+               txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+       return txn;
+}
+
+static void setup_pebs_sample_data(struct perf_event *event,
+                                  struct pt_regs *iregs, void *__pebs,
+                                  struct perf_sample_data *data,
+                                  struct pt_regs *regs)
+{
+#define PERF_X86_EVENT_PEBS_HSW_PREC \
+               (PERF_X86_EVENT_PEBS_ST_HSW | \
+                PERF_X86_EVENT_PEBS_LD_HSW | \
+                PERF_X86_EVENT_PEBS_NA_HSW)
+       /*
+        * We cast to the biggest pebs_record but are careful not to
+        * unconditionally access the 'extra' entries.
+        */
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct pebs_record_skl *pebs = __pebs;
+       u64 sample_type;
+       int fll, fst, dsrc;
+       int fl = event->hw.flags;
+
+       if (pebs == NULL)
+               return;
+
+       sample_type = event->attr.sample_type;
+       dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
+
+       fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
+       fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
+
+       perf_sample_data_init(data, 0, event->hw.last_period);
+
+       data->period = event->hw.last_period;
+
+       /*
+        * Use latency for weight (only avail with PEBS-LL)
+        */
+       if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
+               data->weight = pebs->lat;
+
+       /*
+        * data.data_src encodes the data source
+        */
+       if (dsrc) {
+               u64 val = PERF_MEM_NA;
+               if (fll)
+                       val = load_latency_data(pebs->dse);
+               else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
+                       val = precise_datala_hsw(event, pebs->dse);
+               else if (fst)
+                       val = precise_store_data(pebs->dse);
+               data->data_src.val = val;
+       }
+
+       /*
+        * We use the interrupt regs as a base because the PEBS record
+        * does not contain a full regs set, specifically it seems to
+        * lack segment descriptors, which get used by things like
+        * user_mode().
+        *
+        * In the simple case fix up only the IP and BP,SP regs, for
+        * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
+        * A possible PERF_SAMPLE_REGS will have to transfer all regs.
+        */
+       *regs = *iregs;
+       regs->flags = pebs->flags;
+       set_linear_ip(regs, pebs->ip);
+       regs->bp = pebs->bp;
+       regs->sp = pebs->sp;
+
+       if (sample_type & PERF_SAMPLE_REGS_INTR) {
+               regs->ax = pebs->ax;
+               regs->bx = pebs->bx;
+               regs->cx = pebs->cx;
+               regs->dx = pebs->dx;
+               regs->si = pebs->si;
+               regs->di = pebs->di;
+               regs->bp = pebs->bp;
+               regs->sp = pebs->sp;
+
+               regs->flags = pebs->flags;
+#ifndef CONFIG_X86_32
+               regs->r8 = pebs->r8;
+               regs->r9 = pebs->r9;
+               regs->r10 = pebs->r10;
+               regs->r11 = pebs->r11;
+               regs->r12 = pebs->r12;
+               regs->r13 = pebs->r13;
+               regs->r14 = pebs->r14;
+               regs->r15 = pebs->r15;
+#endif
+       }
+
+       if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
+               regs->ip = pebs->real_ip;
+               regs->flags |= PERF_EFLAGS_EXACT;
+       } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
+               regs->flags |= PERF_EFLAGS_EXACT;
+       else
+               regs->flags &= ~PERF_EFLAGS_EXACT;
+
+       if ((sample_type & PERF_SAMPLE_ADDR) &&
+           x86_pmu.intel_cap.pebs_format >= 1)
+               data->addr = pebs->dla;
+
+       if (x86_pmu.intel_cap.pebs_format >= 2) {
+               /* Only set the TSX weight when no memory weight. */
+               if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
+                       data->weight = intel_hsw_weight(pebs);
+
+               if (sample_type & PERF_SAMPLE_TRANSACTION)
+                       data->txn = intel_hsw_transaction(pebs);
+       }
+
+       /*
+        * v3 supplies an accurate time stamp, so we use that
+        * for the time stamp.
+        *
+        * We can only do this for the default trace clock.
+        */
+       if (x86_pmu.intel_cap.pebs_format >= 3 &&
+               event->attr.use_clockid == 0)
+               data->time = native_sched_clock_from_tsc(pebs->tsc);
+
+       if (has_branch_stack(event))
+               data->br_stack = &cpuc->lbr_stack;
+}
+
+static inline void *
+get_next_pebs_record_by_bit(void *base, void *top, int bit)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       void *at;
+       u64 pebs_status;
+
+       /*
+        * fmt0 does not have a status bitfield (does not use
+        * perf_record_nhm format)
+        */
+       if (x86_pmu.intel_cap.pebs_format < 1)
+               return base;
+
+       if (base == NULL)
+               return NULL;
+
+       for (at = base; at < top; at += x86_pmu.pebs_record_size) {
+               struct pebs_record_nhm *p = at;
+
+               if (test_bit(bit, (unsigned long *)&p->status)) {
+                       /* PEBS v3 has accurate status bits */
+                       if (x86_pmu.intel_cap.pebs_format >= 3)
+                               return at;
+
+                       if (p->status == (1 << bit))
+                               return at;
+
+                       /* clear non-PEBS bit and re-check */
+                       pebs_status = p->status & cpuc->pebs_enabled;
+                       pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+                       if (pebs_status == (1 << bit))
+                               return at;
+               }
+       }
+       return NULL;
+}
+
+static void __intel_pmu_pebs_event(struct perf_event *event,
+                                  struct pt_regs *iregs,
+                                  void *base, void *top,
+                                  int bit, int count)
+{
+       struct perf_sample_data data;
+       struct pt_regs regs;
+       void *at = get_next_pebs_record_by_bit(base, top, bit);
+
+       if (!intel_pmu_save_and_restart(event) &&
+           !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
+               return;
+
+       while (count > 1) {
+               setup_pebs_sample_data(event, iregs, at, &data, &regs);
+               perf_event_output(event, &data, &regs);
+               at += x86_pmu.pebs_record_size;
+               at = get_next_pebs_record_by_bit(at, top, bit);
+               count--;
+       }
+
+       setup_pebs_sample_data(event, iregs, at, &data, &regs);
+
+       /*
+        * All but the last records are processed.
+        * The last one is left to be able to call the overflow handler.
+        */
+       if (perf_event_overflow(event, &data, &regs)) {
+               x86_pmu_stop(event, 0);
+               return;
+       }
+
+}
+
+static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct debug_store *ds = cpuc->ds;
+       struct perf_event *event = cpuc->events[0]; /* PMC0 only */
+       struct pebs_record_core *at, *top;
+       int n;
+
+       if (!x86_pmu.pebs_active)
+               return;
+
+       at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
+       top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
+
+       /*
+        * Whatever else happens, drain the thing
+        */
+       ds->pebs_index = ds->pebs_buffer_base;
+
+       if (!test_bit(0, cpuc->active_mask))
+               return;
+
+       WARN_ON_ONCE(!event);
+
+       if (!event->attr.precise_ip)
+               return;
+
+       n = top - at;
+       if (n <= 0)
+               return;
+
+       __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
+}
+
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct debug_store *ds = cpuc->ds;
+       struct perf_event *event;
+       void *base, *at, *top;
+       short counts[MAX_PEBS_EVENTS] = {};
+       short error[MAX_PEBS_EVENTS] = {};
+       int bit, i;
+
+       if (!x86_pmu.pebs_active)
+               return;
+
+       base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
+       ds->pebs_index = ds->pebs_buffer_base;
+
+       if (unlikely(base >= top))
+               return;
+
+       for (at = base; at < top; at += x86_pmu.pebs_record_size) {
+               struct pebs_record_nhm *p = at;
+               u64 pebs_status;
+
+               /* PEBS v3 has accurate status bits */
+               if (x86_pmu.intel_cap.pebs_format >= 3) {
+                       for_each_set_bit(bit, (unsigned long *)&p->status,
+                                        MAX_PEBS_EVENTS)
+                               counts[bit]++;
+
+                       continue;
+               }
+
+               pebs_status = p->status & cpuc->pebs_enabled;
+               pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
+
+               /*
+                * On some CPUs the PEBS status can be zero when PEBS is
+                * racing with clearing of GLOBAL_STATUS.
+                *
+                * Normally we would drop that record, but in the
+                * case when there is only a single active PEBS event
+                * we can assume it's for that event.
+                */
+               if (!pebs_status && cpuc->pebs_enabled &&
+                       !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
+                       pebs_status = cpuc->pebs_enabled;
+
+               bit = find_first_bit((unsigned long *)&pebs_status,
+                                       x86_pmu.max_pebs_events);
+               if (bit >= x86_pmu.max_pebs_events)
+                       continue;
+
+               /*
+                * The PEBS hardware does not deal well with the situation
+                * when events happen near to each other and multiple bits
+                * are set. But it should happen rarely.
+                *
+                * If these events include one PEBS and multiple non-PEBS
+                * events, it doesn't impact PEBS record. The record will
+                * be handled normally. (slow path)
+                *
+                * If these events include two or more PEBS events, the
+                * records for the events can be collapsed into a single
+                * one, and it's not possible to reconstruct all events
+                * that caused the PEBS record. It's called collision.
+                * If collision happened, the record will be dropped.
+                */
+               if (p->status != (1ULL << bit)) {
+                       for_each_set_bit(i, (unsigned long *)&pebs_status,
+                                        x86_pmu.max_pebs_events)
+                               error[i]++;
+                       continue;
+               }
+
+               counts[bit]++;
+       }
+
+       for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
+               if ((counts[bit] == 0) && (error[bit] == 0))
+                       continue;
+
+               event = cpuc->events[bit];
+               WARN_ON_ONCE(!event);
+               WARN_ON_ONCE(!event->attr.precise_ip);
+
+               /* log dropped samples number */
+               if (error[bit])
+                       perf_log_lost_samples(event, error[bit]);
+
+               if (counts[bit]) {
+                       __intel_pmu_pebs_event(event, iregs, base,
+                                              top, bit, counts[bit]);
+               }
+       }
+}
+
+/*
+ * BTS, PEBS probe and setup
+ */
+
+void __init intel_ds_init(void)
+{
+       /*
+        * No support for 32bit formats
+        */
+       if (!boot_cpu_has(X86_FEATURE_DTES64))
+               return;
+
+       x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
+       x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+       x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
+       if (x86_pmu.pebs) {
+               char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
+               int format = x86_pmu.intel_cap.pebs_format;
+
+               switch (format) {
+               case 0:
+                       pr_cont("PEBS fmt0%c, ", pebs_type);
+                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
+                       /*
+                        * Using >PAGE_SIZE buffers makes the WRMSR to
+                        * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
+                        * mysteriously hang on Core2.
+                        *
+                        * As a workaround, we don't do this.
+                        */
+                       x86_pmu.pebs_buffer_size = PAGE_SIZE;
+                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
+                       break;
+
+               case 1:
+                       pr_cont("PEBS fmt1%c, ", pebs_type);
+                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
+                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+                       break;
+
+               case 2:
+                       pr_cont("PEBS fmt2%c, ", pebs_type);
+                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
+                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+                       break;
+
+               case 3:
+                       pr_cont("PEBS fmt3%c, ", pebs_type);
+                       x86_pmu.pebs_record_size =
+                                               sizeof(struct pebs_record_skl);
+                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+                       x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
+                       break;
+
+               default:
+                       pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
+                       x86_pmu.pebs = 0;
+               }
+       }
+}
+
+void perf_restore_debug_store(void)
+{
+       struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
+
+       if (!x86_pmu.bts && !x86_pmu.pebs)
+               return;
+
+       wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
+}
diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c
new file mode 100644 (file)
index 0000000..548d5f7
--- /dev/null
@@ -0,0 +1,321 @@
+/* Driver for Intel Xeon Phi "Knights Corner" PMU */
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include <asm/hardirq.h>
+
+#include "../perf_event.h"
+
+static const u64 knc_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]           = 0x002a,
+  [PERF_COUNT_HW_INSTRUCTIONS]         = 0x0016,
+  [PERF_COUNT_HW_CACHE_REFERENCES]     = 0x0028,
+  [PERF_COUNT_HW_CACHE_MISSES]         = 0x0029,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]  = 0x0012,
+  [PERF_COUNT_HW_BRANCH_MISSES]                = 0x002b,
+};
+
+static const u64 __initconst knc_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               /* On Xeon Phi event "0" is a valid DATA_READ          */
+               /*   (L1 Data Cache Reads) Instruction.                */
+               /* We code this as ARCH_PERFMON_EVENTSEL_INT as this   */
+               /* bit will always be set in x86_pmu_hw_config().      */
+               [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+                                               /* DATA_READ           */
+               [ C(RESULT_MISS)   ] = 0x0003,  /* DATA_READ_MISS      */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0001,  /* DATA_WRITE          */
+               [ C(RESULT_MISS)   ] = 0x0004,  /* DATA_WRITE_MISS     */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0011,  /* L1_DATA_PF1         */
+               [ C(RESULT_MISS)   ] = 0x001c,  /* L1_DATA_PF1_MISS    */
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x000c,  /* CODE_READ          */
+               [ C(RESULT_MISS)   ] = 0x000e,  /* CODE_CACHE_MISS    */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0x10cb,  /* L2_READ_MISS */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x10cc,  /* L2_WRITE_HIT */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x10fc,  /* L2_DATA_PF2      */
+               [ C(RESULT_MISS)   ] = 0x10fe,  /* L2_DATA_PF2_MISS */
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+                                               /* DATA_READ */
+                                               /* see note on L1 OP_READ */
+               [ C(RESULT_MISS)   ] = 0x0002,  /* DATA_PAGE_WALK */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0001,  /* DATA_WRITE */
+               [ C(RESULT_MISS)   ] = 0x0002,  /* DATA_PAGE_WALK */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x000c,  /* CODE_READ */
+               [ C(RESULT_MISS)   ] = 0x000d,  /* CODE_PAGE_WALK */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0012,  /* BRANCHES */
+               [ C(RESULT_MISS)   ] = 0x002b,  /* BRANCHES_MISPREDICTED */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+
+static u64 knc_pmu_event_map(int hw_event)
+{
+       return knc_perfmon_event_map[hw_event];
+}
+
+static struct event_constraint knc_event_constraints[] =
+{
+       INTEL_EVENT_CONSTRAINT(0xc3, 0x1),      /* HWP_L2HIT */
+       INTEL_EVENT_CONSTRAINT(0xc4, 0x1),      /* HWP_L2MISS */
+       INTEL_EVENT_CONSTRAINT(0xc8, 0x1),      /* L2_READ_HIT_E */
+       INTEL_EVENT_CONSTRAINT(0xc9, 0x1),      /* L2_READ_HIT_M */
+       INTEL_EVENT_CONSTRAINT(0xca, 0x1),      /* L2_READ_HIT_S */
+       INTEL_EVENT_CONSTRAINT(0xcb, 0x1),      /* L2_READ_MISS */
+       INTEL_EVENT_CONSTRAINT(0xcc, 0x1),      /* L2_WRITE_HIT */
+       INTEL_EVENT_CONSTRAINT(0xce, 0x1),      /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
+       INTEL_EVENT_CONSTRAINT(0xcf, 0x1),      /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
+       INTEL_EVENT_CONSTRAINT(0xd7, 0x1),      /* L2_VICTIM_REQ_WITH_DATA */
+       INTEL_EVENT_CONSTRAINT(0xe3, 0x1),      /* SNP_HITM_BUNIT */
+       INTEL_EVENT_CONSTRAINT(0xe6, 0x1),      /* SNP_HIT_L2 */
+       INTEL_EVENT_CONSTRAINT(0xe7, 0x1),      /* SNP_HITM_L2 */
+       INTEL_EVENT_CONSTRAINT(0xf1, 0x1),      /* L2_DATA_READ_MISS_CACHE_FILL */
+       INTEL_EVENT_CONSTRAINT(0xf2, 0x1),      /* L2_DATA_WRITE_MISS_CACHE_FILL */
+       INTEL_EVENT_CONSTRAINT(0xf6, 0x1),      /* L2_DATA_READ_MISS_MEM_FILL */
+       INTEL_EVENT_CONSTRAINT(0xf7, 0x1),      /* L2_DATA_WRITE_MISS_MEM_FILL */
+       INTEL_EVENT_CONSTRAINT(0xfc, 0x1),      /* L2_DATA_PF2 */
+       INTEL_EVENT_CONSTRAINT(0xfd, 0x1),      /* L2_DATA_PF2_DROP */
+       INTEL_EVENT_CONSTRAINT(0xfe, 0x1),      /* L2_DATA_PF2_MISS */
+       INTEL_EVENT_CONSTRAINT(0xff, 0x1),      /* L2_DATA_HIT_INFLIGHT_PF2 */
+       EVENT_CONSTRAINT_END
+};
+
+#define MSR_KNC_IA32_PERF_GLOBAL_STATUS                0x0000002d
+#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL   0x0000002e
+#define MSR_KNC_IA32_PERF_GLOBAL_CTRL          0x0000002f
+
+#define KNC_ENABLE_COUNTER0                    0x00000001
+#define KNC_ENABLE_COUNTER1                    0x00000002
+
+static void knc_pmu_disable_all(void)
+{
+       u64 val;
+
+       rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+       val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+       wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static void knc_pmu_enable_all(int added)
+{
+       u64 val;
+
+       rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+       val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+       wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static inline void
+knc_pmu_disable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 val;
+
+       val = hwc->config;
+       val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+
+       (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+static void knc_pmu_enable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 val;
+
+       val = hwc->config;
+       val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+
+       (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+static inline u64 knc_pmu_get_status(void)
+{
+       u64 status;
+
+       rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
+
+       return status;
+}
+
+static inline void knc_pmu_ack_status(u64 ack)
+{
+       wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
+}
+
+static int knc_pmu_handle_irq(struct pt_regs *regs)
+{
+       struct perf_sample_data data;
+       struct cpu_hw_events *cpuc;
+       int handled = 0;
+       int bit, loops;
+       u64 status;
+
+       cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       knc_pmu_disable_all();
+
+       status = knc_pmu_get_status();
+       if (!status) {
+               knc_pmu_enable_all(0);
+               return handled;
+       }
+
+       loops = 0;
+again:
+       knc_pmu_ack_status(status);
+       if (++loops > 100) {
+               WARN_ONCE(1, "perf: irq loop stuck!\n");
+               perf_event_print_debug();
+               goto done;
+       }
+
+       inc_irq_stat(apic_perf_irqs);
+
+       for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+               struct perf_event *event = cpuc->events[bit];
+
+               handled++;
+
+               if (!test_bit(bit, cpuc->active_mask))
+                       continue;
+
+               if (!intel_pmu_save_and_restart(event))
+                       continue;
+
+               perf_sample_data_init(&data, 0, event->hw.last_period);
+
+               if (perf_event_overflow(event, &data, regs))
+                       x86_pmu_stop(event, 0);
+       }
+
+       /*
+        * Repeat if there is more work to be done:
+        */
+       status = knc_pmu_get_status();
+       if (status)
+               goto again;
+
+done:
+       /* Only restore PMU state when it's active. See x86_pmu_disable(). */
+       if (cpuc->enabled)
+               knc_pmu_enable_all(0);
+
+       return handled;
+}
+
+
+PMU_FORMAT_ATTR(event, "config:0-7"    );
+PMU_FORMAT_ATTR(umask, "config:8-15"   );
+PMU_FORMAT_ATTR(edge,  "config:18"     );
+PMU_FORMAT_ATTR(inv,   "config:23"     );
+PMU_FORMAT_ATTR(cmask, "config:24-31"  );
+
+static struct attribute *intel_knc_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask.attr,
+       NULL,
+};
+
+static const struct x86_pmu knc_pmu __initconst = {
+       .name                   = "knc",
+       .handle_irq             = knc_pmu_handle_irq,
+       .disable_all            = knc_pmu_disable_all,
+       .enable_all             = knc_pmu_enable_all,
+       .enable                 = knc_pmu_enable_event,
+       .disable                = knc_pmu_disable_event,
+       .hw_config              = x86_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_KNC_EVNTSEL0,
+       .perfctr                = MSR_KNC_PERFCTR0,
+       .event_map              = knc_pmu_event_map,
+       .max_events             = ARRAY_SIZE(knc_perfmon_event_map),
+       .apic                   = 1,
+       .max_period             = (1ULL << 39) - 1,
+       .version                = 0,
+       .num_counters           = 2,
+       .cntval_bits            = 40,
+       .cntval_mask            = (1ULL << 40) - 1,
+       .get_event_constraints  = x86_get_event_constraints,
+       .event_constraints      = knc_event_constraints,
+       .format_attrs           = intel_knc_formats_attr,
+};
+
+__init int knc_pmu_init(void)
+{
+       x86_pmu = knc_pmu;
+
+       memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, 
+               sizeof(hw_cache_event_ids));
+
+       return 0;
+}
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
new file mode 100644 (file)
index 0000000..69dd118
--- /dev/null
@@ -0,0 +1,1062 @@
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include <asm/perf_event.h>
+#include <asm/msr.h>
+#include <asm/insn.h>
+
+#include "../perf_event.h"
+
+enum {
+       LBR_FORMAT_32           = 0x00,
+       LBR_FORMAT_LIP          = 0x01,
+       LBR_FORMAT_EIP          = 0x02,
+       LBR_FORMAT_EIP_FLAGS    = 0x03,
+       LBR_FORMAT_EIP_FLAGS2   = 0x04,
+       LBR_FORMAT_INFO         = 0x05,
+       LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_INFO,
+};
+
+static enum {
+       LBR_EIP_FLAGS           = 1,
+       LBR_TSX                 = 2,
+} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
+       [LBR_FORMAT_EIP_FLAGS]  = LBR_EIP_FLAGS,
+       [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
+};
+
+/*
+ * Intel LBR_SELECT bits
+ * Intel Vol3a, April 2011, Section 16.7 Table 16-10
+ *
+ * Hardware branch filter (not available on all CPUs)
+ */
+#define LBR_KERNEL_BIT         0 /* do not capture at ring0 */
+#define LBR_USER_BIT           1 /* do not capture at ring > 0 */
+#define LBR_JCC_BIT            2 /* do not capture conditional branches */
+#define LBR_REL_CALL_BIT       3 /* do not capture relative calls */
+#define LBR_IND_CALL_BIT       4 /* do not capture indirect calls */
+#define LBR_RETURN_BIT         5 /* do not capture near returns */
+#define LBR_IND_JMP_BIT                6 /* do not capture indirect jumps */
+#define LBR_REL_JMP_BIT                7 /* do not capture relative jumps */
+#define LBR_FAR_BIT            8 /* do not capture far branches */
+#define LBR_CALL_STACK_BIT     9 /* enable call stack */
+
+/*
+ * Following bit only exists in Linux; we mask it out before writing it to
+ * the actual MSR. But it helps the constraint perf code to understand
+ * that this is a separate configuration.
+ */
+#define LBR_NO_INFO_BIT               63 /* don't read LBR_INFO. */
+
+#define LBR_KERNEL     (1 << LBR_KERNEL_BIT)
+#define LBR_USER       (1 << LBR_USER_BIT)
+#define LBR_JCC                (1 << LBR_JCC_BIT)
+#define LBR_REL_CALL   (1 << LBR_REL_CALL_BIT)
+#define LBR_IND_CALL   (1 << LBR_IND_CALL_BIT)
+#define LBR_RETURN     (1 << LBR_RETURN_BIT)
+#define LBR_REL_JMP    (1 << LBR_REL_JMP_BIT)
+#define LBR_IND_JMP    (1 << LBR_IND_JMP_BIT)
+#define LBR_FAR                (1 << LBR_FAR_BIT)
+#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
+#define LBR_NO_INFO    (1ULL << LBR_NO_INFO_BIT)
+
+#define LBR_PLM (LBR_KERNEL | LBR_USER)
+
+#define LBR_SEL_MASK   0x1ff   /* valid bits in LBR_SELECT */
+#define LBR_NOT_SUPP   -1      /* LBR filter not supported */
+#define LBR_IGN                0       /* ignored */
+
+#define LBR_ANY                 \
+       (LBR_JCC        |\
+        LBR_REL_CALL   |\
+        LBR_IND_CALL   |\
+        LBR_RETURN     |\
+        LBR_REL_JMP    |\
+        LBR_IND_JMP    |\
+        LBR_FAR)
+
+#define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
+#define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
+#define LBR_FROM_FLAG_ABORT    (1ULL << 61)
+
+/*
+ * x86control flow change classification
+ * x86control flow changes include branches, interrupts, traps, faults
+ */
+enum {
+       X86_BR_NONE             = 0,      /* unknown */
+
+       X86_BR_USER             = 1 << 0, /* branch target is user */
+       X86_BR_KERNEL           = 1 << 1, /* branch target is kernel */
+
+       X86_BR_CALL             = 1 << 2, /* call */
+       X86_BR_RET              = 1 << 3, /* return */
+       X86_BR_SYSCALL          = 1 << 4, /* syscall */
+       X86_BR_SYSRET           = 1 << 5, /* syscall return */
+       X86_BR_INT              = 1 << 6, /* sw interrupt */
+       X86_BR_IRET             = 1 << 7, /* return from interrupt */
+       X86_BR_JCC              = 1 << 8, /* conditional */
+       X86_BR_JMP              = 1 << 9, /* jump */
+       X86_BR_IRQ              = 1 << 10,/* hw interrupt or trap or fault */
+       X86_BR_IND_CALL         = 1 << 11,/* indirect calls */
+       X86_BR_ABORT            = 1 << 12,/* transaction abort */
+       X86_BR_IN_TX            = 1 << 13,/* in transaction */
+       X86_BR_NO_TX            = 1 << 14,/* not in transaction */
+       X86_BR_ZERO_CALL        = 1 << 15,/* zero length call */
+       X86_BR_CALL_STACK       = 1 << 16,/* call stack */
+       X86_BR_IND_JMP          = 1 << 17,/* indirect jump */
+};
+
+#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
+
+#define X86_BR_ANY       \
+       (X86_BR_CALL    |\
+        X86_BR_RET     |\
+        X86_BR_SYSCALL |\
+        X86_BR_SYSRET  |\
+        X86_BR_INT     |\
+        X86_BR_IRET    |\
+        X86_BR_JCC     |\
+        X86_BR_JMP      |\
+        X86_BR_IRQ      |\
+        X86_BR_ABORT    |\
+        X86_BR_IND_CALL |\
+        X86_BR_IND_JMP  |\
+        X86_BR_ZERO_CALL)
+
+#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
+
+#define X86_BR_ANY_CALL                 \
+       (X86_BR_CALL            |\
+        X86_BR_IND_CALL        |\
+        X86_BR_ZERO_CALL       |\
+        X86_BR_SYSCALL         |\
+        X86_BR_IRQ             |\
+        X86_BR_INT)
+
+static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
+
+/*
+ * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
+ * otherwise it becomes near impossible to get a reliable stack.
+ */
+
+static void __intel_pmu_lbr_enable(bool pmi)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       u64 debugctl, lbr_select = 0, orig_debugctl;
+
+       /*
+        * No need to unfreeze manually, as v4 can do that as part
+        * of the GLOBAL_STATUS ack.
+        */
+       if (pmi && x86_pmu.version >= 4)
+               return;
+
+       /*
+        * No need to reprogram LBR_SELECT in a PMI, as it
+        * did not change.
+        */
+       if (cpuc->lbr_sel)
+               lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
+       if (!pmi && cpuc->lbr_sel)
+               wrmsrl(MSR_LBR_SELECT, lbr_select);
+
+       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+       orig_debugctl = debugctl;
+       debugctl |= DEBUGCTLMSR_LBR;
+       /*
+        * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
+        * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
+        * may cause superfluous increase/decrease of LBR_TOS.
+        */
+       if (!(lbr_select & LBR_CALL_STACK))
+               debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+       if (orig_debugctl != debugctl)
+               wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+}
+
+static void __intel_pmu_lbr_disable(void)
+{
+       u64 debugctl;
+
+       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+       debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+       wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+}
+
+static void intel_pmu_lbr_reset_32(void)
+{
+       int i;
+
+       for (i = 0; i < x86_pmu.lbr_nr; i++)
+               wrmsrl(x86_pmu.lbr_from + i, 0);
+}
+
+static void intel_pmu_lbr_reset_64(void)
+{
+       int i;
+
+       for (i = 0; i < x86_pmu.lbr_nr; i++) {
+               wrmsrl(x86_pmu.lbr_from + i, 0);
+               wrmsrl(x86_pmu.lbr_to   + i, 0);
+               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+                       wrmsrl(MSR_LBR_INFO_0 + i, 0);
+       }
+}
+
+void intel_pmu_lbr_reset(void)
+{
+       if (!x86_pmu.lbr_nr)
+               return;
+
+       if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
+               intel_pmu_lbr_reset_32();
+       else
+               intel_pmu_lbr_reset_64();
+}
+
+/*
+ * TOS = most recently recorded branch
+ */
+static inline u64 intel_pmu_lbr_tos(void)
+{
+       u64 tos;
+
+       rdmsrl(x86_pmu.lbr_tos, tos);
+       return tos;
+}
+
+enum {
+       LBR_NONE,
+       LBR_VALID,
+};
+
+static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
+{
+       int i;
+       unsigned lbr_idx, mask;
+       u64 tos;
+
+       if (task_ctx->lbr_callstack_users == 0 ||
+           task_ctx->lbr_stack_state == LBR_NONE) {
+               intel_pmu_lbr_reset();
+               return;
+       }
+
+       mask = x86_pmu.lbr_nr - 1;
+       tos = task_ctx->tos;
+       for (i = 0; i < tos; i++) {
+               lbr_idx = (tos - i) & mask;
+               wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+               wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+                       wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
+       }
+       wrmsrl(x86_pmu.lbr_tos, tos);
+       task_ctx->lbr_stack_state = LBR_NONE;
+}
+
+static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
+{
+       int i;
+       unsigned lbr_idx, mask;
+       u64 tos;
+
+       if (task_ctx->lbr_callstack_users == 0) {
+               task_ctx->lbr_stack_state = LBR_NONE;
+               return;
+       }
+
+       mask = x86_pmu.lbr_nr - 1;
+       tos = intel_pmu_lbr_tos();
+       for (i = 0; i < tos; i++) {
+               lbr_idx = (tos - i) & mask;
+               rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+               rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+                       rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
+       }
+       task_ctx->tos = tos;
+       task_ctx->lbr_stack_state = LBR_VALID;
+}
+
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct x86_perf_task_context *task_ctx;
+
+       /*
+        * If LBR callstack feature is enabled and the stack was saved when
+        * the task was scheduled out, restore the stack. Otherwise flush
+        * the LBR stack.
+        */
+       task_ctx = ctx ? ctx->task_ctx_data : NULL;
+       if (task_ctx) {
+               if (sched_in) {
+                       __intel_pmu_lbr_restore(task_ctx);
+                       cpuc->lbr_context = ctx;
+               } else {
+                       __intel_pmu_lbr_save(task_ctx);
+               }
+               return;
+       }
+
+       /*
+        * When sampling the branck stack in system-wide, it may be
+        * necessary to flush the stack on context switch. This happens
+        * when the branch stack does not tag its entries with the pid
+        * of the current task. Otherwise it becomes impossible to
+        * associate a branch entry with a task. This ambiguity is more
+        * likely to appear when the branch stack supports priv level
+        * filtering and the user sets it to monitor only at the user
+        * level (which could be a useful measurement in system-wide
+        * mode). In that case, the risk is high of having a branch
+        * stack with branch from multiple tasks.
+        */
+       if (sched_in) {
+               intel_pmu_lbr_reset();
+               cpuc->lbr_context = ctx;
+       }
+}
+
+static inline bool branch_user_callstack(unsigned br_sel)
+{
+       return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
+}
+
+void intel_pmu_lbr_enable(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct x86_perf_task_context *task_ctx;
+
+       if (!x86_pmu.lbr_nr)
+               return;
+
+       /*
+        * Reset the LBR stack if we changed task context to
+        * avoid data leaks.
+        */
+       if (event->ctx->task && cpuc->lbr_context != event->ctx) {
+               intel_pmu_lbr_reset();
+               cpuc->lbr_context = event->ctx;
+       }
+       cpuc->br_sel = event->hw.branch_reg.reg;
+
+       if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+                                       event->ctx->task_ctx_data) {
+               task_ctx = event->ctx->task_ctx_data;
+               task_ctx->lbr_callstack_users++;
+       }
+
+       cpuc->lbr_users++;
+       perf_sched_cb_inc(event->ctx->pmu);
+}
+
+void intel_pmu_lbr_disable(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct x86_perf_task_context *task_ctx;
+
+       if (!x86_pmu.lbr_nr)
+               return;
+
+       if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+                                       event->ctx->task_ctx_data) {
+               task_ctx = event->ctx->task_ctx_data;
+               task_ctx->lbr_callstack_users--;
+       }
+
+       cpuc->lbr_users--;
+       WARN_ON_ONCE(cpuc->lbr_users < 0);
+       perf_sched_cb_dec(event->ctx->pmu);
+
+       if (cpuc->enabled && !cpuc->lbr_users) {
+               __intel_pmu_lbr_disable();
+               /* avoid stale pointer */
+               cpuc->lbr_context = NULL;
+       }
+}
+
+void intel_pmu_lbr_enable_all(bool pmi)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (cpuc->lbr_users)
+               __intel_pmu_lbr_enable(pmi);
+}
+
+void intel_pmu_lbr_disable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (cpuc->lbr_users)
+               __intel_pmu_lbr_disable();
+}
+
+static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
+{
+       unsigned long mask = x86_pmu.lbr_nr - 1;
+       u64 tos = intel_pmu_lbr_tos();
+       int i;
+
+       for (i = 0; i < x86_pmu.lbr_nr; i++) {
+               unsigned long lbr_idx = (tos - i) & mask;
+               union {
+                       struct {
+                               u32 from;
+                               u32 to;
+                       };
+                       u64     lbr;
+               } msr_lastbranch;
+
+               rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
+
+               cpuc->lbr_entries[i].from       = msr_lastbranch.from;
+               cpuc->lbr_entries[i].to         = msr_lastbranch.to;
+               cpuc->lbr_entries[i].mispred    = 0;
+               cpuc->lbr_entries[i].predicted  = 0;
+               cpuc->lbr_entries[i].reserved   = 0;
+       }
+       cpuc->lbr_stack.nr = i;
+}
+
+/*
+ * Due to lack of segmentation in Linux the effective address (offset)
+ * is the same as the linear address, allowing us to merge the LIP and EIP
+ * LBR formats.
+ */
+static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
+{
+       bool need_info = false;
+       unsigned long mask = x86_pmu.lbr_nr - 1;
+       int lbr_format = x86_pmu.intel_cap.lbr_format;
+       u64 tos = intel_pmu_lbr_tos();
+       int i;
+       int out = 0;
+       int num = x86_pmu.lbr_nr;
+
+       if (cpuc->lbr_sel) {
+               need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
+               if (cpuc->lbr_sel->config & LBR_CALL_STACK)
+                       num = tos;
+       }
+
+       for (i = 0; i < num; i++) {
+               unsigned long lbr_idx = (tos - i) & mask;
+               u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
+               int skip = 0;
+               u16 cycles = 0;
+               int lbr_flags = lbr_desc[lbr_format];
+
+               rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
+               rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
+
+               if (lbr_format == LBR_FORMAT_INFO && need_info) {
+                       u64 info;
+
+                       rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
+                       mis = !!(info & LBR_INFO_MISPRED);
+                       pred = !mis;
+                       in_tx = !!(info & LBR_INFO_IN_TX);
+                       abort = !!(info & LBR_INFO_ABORT);
+                       cycles = (info & LBR_INFO_CYCLES);
+               }
+               if (lbr_flags & LBR_EIP_FLAGS) {
+                       mis = !!(from & LBR_FROM_FLAG_MISPRED);
+                       pred = !mis;
+                       skip = 1;
+               }
+               if (lbr_flags & LBR_TSX) {
+                       in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+                       abort = !!(from & LBR_FROM_FLAG_ABORT);
+                       skip = 3;
+               }
+               from = (u64)((((s64)from) << skip) >> skip);
+
+               /*
+                * Some CPUs report duplicated abort records,
+                * with the second entry not having an abort bit set.
+                * Skip them here. This loop runs backwards,
+                * so we need to undo the previous record.
+                * If the abort just happened outside the window
+                * the extra entry cannot be removed.
+                */
+               if (abort && x86_pmu.lbr_double_abort && out > 0)
+                       out--;
+
+               cpuc->lbr_entries[out].from      = from;
+               cpuc->lbr_entries[out].to        = to;
+               cpuc->lbr_entries[out].mispred   = mis;
+               cpuc->lbr_entries[out].predicted = pred;
+               cpuc->lbr_entries[out].in_tx     = in_tx;
+               cpuc->lbr_entries[out].abort     = abort;
+               cpuc->lbr_entries[out].cycles    = cycles;
+               cpuc->lbr_entries[out].reserved  = 0;
+               out++;
+       }
+       cpuc->lbr_stack.nr = out;
+}
+
+void intel_pmu_lbr_read(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (!cpuc->lbr_users)
+               return;
+
+       if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
+               intel_pmu_lbr_read_32(cpuc);
+       else
+               intel_pmu_lbr_read_64(cpuc);
+
+       intel_pmu_lbr_filter(cpuc);
+}
+
+/*
+ * SW filter is used:
+ * - in case there is no HW filter
+ * - in case the HW filter has errata or limitations
+ */
+static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+{
+       u64 br_type = event->attr.branch_sample_type;
+       int mask = 0;
+
+       if (br_type & PERF_SAMPLE_BRANCH_USER)
+               mask |= X86_BR_USER;
+
+       if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
+               mask |= X86_BR_KERNEL;
+
+       /* we ignore BRANCH_HV here */
+
+       if (br_type & PERF_SAMPLE_BRANCH_ANY)
+               mask |= X86_BR_ANY;
+
+       if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+               mask |= X86_BR_ANY_CALL;
+
+       if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+               mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
+
+       if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
+               mask |= X86_BR_IND_CALL;
+
+       if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
+               mask |= X86_BR_ABORT;
+
+       if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
+               mask |= X86_BR_IN_TX;
+
+       if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
+               mask |= X86_BR_NO_TX;
+
+       if (br_type & PERF_SAMPLE_BRANCH_COND)
+               mask |= X86_BR_JCC;
+
+       if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
+               if (!x86_pmu_has_lbr_callstack())
+                       return -EOPNOTSUPP;
+               if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
+                       return -EINVAL;
+               mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
+                       X86_BR_CALL_STACK;
+       }
+
+       if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+               mask |= X86_BR_IND_JMP;
+
+       if (br_type & PERF_SAMPLE_BRANCH_CALL)
+               mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
+       /*
+        * stash actual user request into reg, it may
+        * be used by fixup code for some CPU
+        */
+       event->hw.branch_reg.reg = mask;
+       return 0;
+}
+
+/*
+ * setup the HW LBR filter
+ * Used only when available, may not be enough to disambiguate
+ * all branches, may need the help of the SW filter
+ */
+static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg;
+       u64 br_type = event->attr.branch_sample_type;
+       u64 mask = 0, v;
+       int i;
+
+       for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
+               if (!(br_type & (1ULL << i)))
+                       continue;
+
+               v = x86_pmu.lbr_sel_map[i];
+               if (v == LBR_NOT_SUPP)
+                       return -EOPNOTSUPP;
+
+               if (v != LBR_IGN)
+                       mask |= v;
+       }
+
+       reg = &event->hw.branch_reg;
+       reg->idx = EXTRA_REG_LBR;
+
+       /*
+        * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
+        * in suppress mode. So LBR_SELECT should be set to
+        * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
+        */
+       reg->config = mask ^ x86_pmu.lbr_sel_mask;
+
+       if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
+           (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
+           (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+               reg->config |= LBR_NO_INFO;
+
+       return 0;
+}
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event)
+{
+       int ret = 0;
+
+       /*
+        * no LBR on this PMU
+        */
+       if (!x86_pmu.lbr_nr)
+               return -EOPNOTSUPP;
+
+       /*
+        * setup SW LBR filter
+        */
+       ret = intel_pmu_setup_sw_lbr_filter(event);
+       if (ret)
+               return ret;
+
+       /*
+        * setup HW LBR filter, if any
+        */
+       if (x86_pmu.lbr_sel_map)
+               ret = intel_pmu_setup_hw_lbr_filter(event);
+
+       return ret;
+}
+
+/*
+ * return the type of control flow change at address "from"
+ * intruction is not necessarily a branch (in case of interrupt).
+ *
+ * The branch type returned also includes the priv level of the
+ * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
+ *
+ * If a branch type is unknown OR the instruction cannot be
+ * decoded (e.g., text page not present), then X86_BR_NONE is
+ * returned.
+ */
+static int branch_type(unsigned long from, unsigned long to, int abort)
+{
+       struct insn insn;
+       void *addr;
+       int bytes_read, bytes_left;
+       int ret = X86_BR_NONE;
+       int ext, to_plm, from_plm;
+       u8 buf[MAX_INSN_SIZE];
+       int is64 = 0;
+
+       to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
+       from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
+
+       /*
+        * maybe zero if lbr did not fill up after a reset by the time
+        * we get a PMU interrupt
+        */
+       if (from == 0 || to == 0)
+               return X86_BR_NONE;
+
+       if (abort)
+               return X86_BR_ABORT | to_plm;
+
+       if (from_plm == X86_BR_USER) {
+               /*
+                * can happen if measuring at the user level only
+                * and we interrupt in a kernel thread, e.g., idle.
+                */
+               if (!current->mm)
+                       return X86_BR_NONE;
+
+               /* may fail if text not present */
+               bytes_left = copy_from_user_nmi(buf, (void __user *)from,
+                                               MAX_INSN_SIZE);
+               bytes_read = MAX_INSN_SIZE - bytes_left;
+               if (!bytes_read)
+                       return X86_BR_NONE;
+
+               addr = buf;
+       } else {
+               /*
+                * The LBR logs any address in the IP, even if the IP just
+                * faulted. This means userspace can control the from address.
+                * Ensure we don't blindy read any address by validating it is
+                * a known text address.
+                */
+               if (kernel_text_address(from)) {
+                       addr = (void *)from;
+                       /*
+                        * Assume we can get the maximum possible size
+                        * when grabbing kernel data.  This is not
+                        * _strictly_ true since we could possibly be
+                        * executing up next to a memory hole, but
+                        * it is very unlikely to be a problem.
+                        */
+                       bytes_read = MAX_INSN_SIZE;
+               } else {
+                       return X86_BR_NONE;
+               }
+       }
+
+       /*
+        * decoder needs to know the ABI especially
+        * on 64-bit systems running 32-bit apps
+        */
+#ifdef CONFIG_X86_64
+       is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
+#endif
+       insn_init(&insn, addr, bytes_read, is64);
+       insn_get_opcode(&insn);
+       if (!insn.opcode.got)
+               return X86_BR_ABORT;
+
+       switch (insn.opcode.bytes[0]) {
+       case 0xf:
+               switch (insn.opcode.bytes[1]) {
+               case 0x05: /* syscall */
+               case 0x34: /* sysenter */
+                       ret = X86_BR_SYSCALL;
+                       break;
+               case 0x07: /* sysret */
+               case 0x35: /* sysexit */
+                       ret = X86_BR_SYSRET;
+                       break;
+               case 0x80 ... 0x8f: /* conditional */
+                       ret = X86_BR_JCC;
+                       break;
+               default:
+                       ret = X86_BR_NONE;
+               }
+               break;
+       case 0x70 ... 0x7f: /* conditional */
+               ret = X86_BR_JCC;
+               break;
+       case 0xc2: /* near ret */
+       case 0xc3: /* near ret */
+       case 0xca: /* far ret */
+       case 0xcb: /* far ret */
+               ret = X86_BR_RET;
+               break;
+       case 0xcf: /* iret */
+               ret = X86_BR_IRET;
+               break;
+       case 0xcc ... 0xce: /* int */
+               ret = X86_BR_INT;
+               break;
+       case 0xe8: /* call near rel */
+               insn_get_immediate(&insn);
+               if (insn.immediate1.value == 0) {
+                       /* zero length call */
+                       ret = X86_BR_ZERO_CALL;
+                       break;
+               }
+       case 0x9a: /* call far absolute */
+               ret = X86_BR_CALL;
+               break;
+       case 0xe0 ... 0xe3: /* loop jmp */
+               ret = X86_BR_JCC;
+               break;
+       case 0xe9 ... 0xeb: /* jmp */
+               ret = X86_BR_JMP;
+               break;
+       case 0xff: /* call near absolute, call far absolute ind */
+               insn_get_modrm(&insn);
+               ext = (insn.modrm.bytes[0] >> 3) & 0x7;
+               switch (ext) {
+               case 2: /* near ind call */
+               case 3: /* far ind call */
+                       ret = X86_BR_IND_CALL;
+                       break;
+               case 4:
+               case 5:
+                       ret = X86_BR_IND_JMP;
+                       break;
+               }
+               break;
+       default:
+               ret = X86_BR_NONE;
+       }
+       /*
+        * interrupts, traps, faults (and thus ring transition) may
+        * occur on any instructions. Thus, to classify them correctly,
+        * we need to first look at the from and to priv levels. If they
+        * are different and to is in the kernel, then it indicates
+        * a ring transition. If the from instruction is not a ring
+        * transition instr (syscall, systenter, int), then it means
+        * it was a irq, trap or fault.
+        *
+        * we have no way of detecting kernel to kernel faults.
+        */
+       if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
+           && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
+               ret = X86_BR_IRQ;
+
+       /*
+        * branch priv level determined by target as
+        * is done by HW when LBR_SELECT is implemented
+        */
+       if (ret != X86_BR_NONE)
+               ret |= to_plm;
+
+       return ret;
+}
+
+/*
+ * implement actual branch filter based on user demand.
+ * Hardware may not exactly satisfy that request, thus
+ * we need to inspect opcodes. Mismatched branches are
+ * discarded. Therefore, the number of branches returned
+ * in PERF_SAMPLE_BRANCH_STACK sample may vary.
+ */
+static void
+intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
+{
+       u64 from, to;
+       int br_sel = cpuc->br_sel;
+       int i, j, type;
+       bool compress = false;
+
+       /* if sampling all branches, then nothing to filter */
+       if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+               return;
+
+       for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+
+               from = cpuc->lbr_entries[i].from;
+               to = cpuc->lbr_entries[i].to;
+
+               type = branch_type(from, to, cpuc->lbr_entries[i].abort);
+               if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
+                       if (cpuc->lbr_entries[i].in_tx)
+                               type |= X86_BR_IN_TX;
+                       else
+                               type |= X86_BR_NO_TX;
+               }
+
+               /* if type does not correspond, then discard */
+               if (type == X86_BR_NONE || (br_sel & type) != type) {
+                       cpuc->lbr_entries[i].from = 0;
+                       compress = true;
+               }
+       }
+
+       if (!compress)
+               return;
+
+       /* remove all entries with from=0 */
+       for (i = 0; i < cpuc->lbr_stack.nr; ) {
+               if (!cpuc->lbr_entries[i].from) {
+                       j = i;
+                       while (++j < cpuc->lbr_stack.nr)
+                               cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
+                       cpuc->lbr_stack.nr--;
+                       if (!cpuc->lbr_entries[i].from)
+                               continue;
+               }
+               i++;
+       }
+}
+
+/*
+ * Map interface branch filters onto LBR filters
+ */
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
+       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
+       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
+       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_REL_JMP
+                                               | LBR_IND_JMP | LBR_FAR,
+       /*
+        * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
+        */
+       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
+        LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
+       /*
+        * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
+        */
+       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
+       [PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
+       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
+};
+
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
+       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
+       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
+       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
+                                               | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
+       [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
+       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
+       [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
+};
+
+static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
+       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
+       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
+       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
+                                               | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
+       [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
+       [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = LBR_REL_CALL | LBR_IND_CALL
+                                               | LBR_RETURN | LBR_CALL_STACK,
+       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
+       [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
+};
+
+/* core */
+void __init intel_pmu_lbr_init_core(void)
+{
+       x86_pmu.lbr_nr     = 4;
+       x86_pmu.lbr_tos    = MSR_LBR_TOS;
+       x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
+       x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+
+       /*
+        * SW branch filter usage:
+        * - compensate for lack of HW filter
+        */
+       pr_cont("4-deep LBR, ");
+}
+
+/* nehalem/westmere */
+void __init intel_pmu_lbr_init_nhm(void)
+{
+       x86_pmu.lbr_nr     = 16;
+       x86_pmu.lbr_tos    = MSR_LBR_TOS;
+       x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
+
+       /*
+        * SW branch filter usage:
+        * - workaround LBR_SEL errata (see above)
+        * - support syscall, sysret capture.
+        *   That requires LBR_FAR but that means far
+        *   jmp need to be filtered out
+        */
+       pr_cont("16-deep LBR, ");
+}
+
+/* sandy bridge */
+void __init intel_pmu_lbr_init_snb(void)
+{
+       x86_pmu.lbr_nr   = 16;
+       x86_pmu.lbr_tos  = MSR_LBR_TOS;
+       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
+
+       /*
+        * SW branch filter usage:
+        * - support syscall, sysret capture.
+        *   That requires LBR_FAR but that means far
+        *   jmp need to be filtered out
+        */
+       pr_cont("16-deep LBR, ");
+}
+
+/* haswell */
+void intel_pmu_lbr_init_hsw(void)
+{
+       x86_pmu.lbr_nr   = 16;
+       x86_pmu.lbr_tos  = MSR_LBR_TOS;
+       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+
+       pr_cont("16-deep LBR, ");
+}
+
+/* skylake */
+__init void intel_pmu_lbr_init_skl(void)
+{
+       x86_pmu.lbr_nr   = 32;
+       x86_pmu.lbr_tos  = MSR_LBR_TOS;
+       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+
+       /*
+        * SW branch filter usage:
+        * - support syscall, sysret capture.
+        *   That requires LBR_FAR but that means far
+        *   jmp need to be filtered out
+        */
+       pr_cont("32-deep LBR, ");
+}
+
+/* atom */
+void __init intel_pmu_lbr_init_atom(void)
+{
+       /*
+        * only models starting at stepping 10 seems
+        * to have an operational LBR which can freeze
+        * on PMU interrupt
+        */
+       if (boot_cpu_data.x86_model == 28
+           && boot_cpu_data.x86_mask < 10) {
+               pr_cont("LBR disabled due to erratum");
+               return;
+       }
+
+       x86_pmu.lbr_nr     = 8;
+       x86_pmu.lbr_tos    = MSR_LBR_TOS;
+       x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
+       x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+
+       /*
+        * SW branch filter usage:
+        * - compensate for lack of HW filter
+        */
+       pr_cont("8-deep LBR, ");
+}
+
+/* Knights Landing */
+void intel_pmu_lbr_init_knl(void)
+{
+       x86_pmu.lbr_nr     = 8;
+       x86_pmu.lbr_tos    = MSR_LBR_TOS;
+       x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
+
+       pr_cont("8-deep LBR, ");
+}
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
new file mode 100644 (file)
index 0000000..0a5ede1
--- /dev/null
@@ -0,0 +1,1376 @@
+/*
+ * Netburst Performance Events (P4, old Xeon)
+ *
+ *  Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
+ *  Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+
+#include <asm/perf_event_p4.h>
+#include <asm/hardirq.h>
+#include <asm/apic.h>
+
+#include "../perf_event.h"
+
+#define P4_CNTR_LIMIT 3
+/*
+ * array indices: 0,1 - HT threads, used with HT enabled cpu
+ */
+struct p4_event_bind {
+       unsigned int opcode;                    /* Event code and ESCR selector */
+       unsigned int escr_msr[2];               /* ESCR MSR for this event */
+       unsigned int escr_emask;                /* valid ESCR EventMask bits */
+       unsigned int shared;                    /* event is shared across threads */
+       char cntr[2][P4_CNTR_LIMIT];            /* counter index (offset), -1 on abscence */
+};
+
+struct p4_pebs_bind {
+       unsigned int metric_pebs;
+       unsigned int metric_vert;
+};
+
+/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
+#define P4_GEN_PEBS_BIND(name, pebs, vert)                     \
+       [P4_PEBS_METRIC__##name] = {                            \
+               .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG,   \
+               .metric_vert = vert,                            \
+       }
+
+/*
+ * note we have P4_PEBS_ENABLE_UOP_TAG always set here
+ *
+ * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
+ * event configuration to find out which values are to be
+ * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
+ * resgisters
+ */
+static struct p4_pebs_bind p4_pebs_bind_map[] = {
+       P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired,  0x0000001, 0x0000001),
+       P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired,  0x0000002, 0x0000001),
+       P4_GEN_PEBS_BIND(dtlb_load_miss_retired,        0x0000004, 0x0000001),
+       P4_GEN_PEBS_BIND(dtlb_store_miss_retired,       0x0000004, 0x0000002),
+       P4_GEN_PEBS_BIND(dtlb_all_miss_retired,         0x0000004, 0x0000003),
+       P4_GEN_PEBS_BIND(tagged_mispred_branch,         0x0018000, 0x0000010),
+       P4_GEN_PEBS_BIND(mob_load_replay_retired,       0x0000200, 0x0000001),
+       P4_GEN_PEBS_BIND(split_load_retired,            0x0000400, 0x0000001),
+       P4_GEN_PEBS_BIND(split_store_retired,           0x0000400, 0x0000002),
+};
+
+/*
+ * Note that we don't use CCCR1 here, there is an
+ * exception for P4_BSQ_ALLOCATION but we just have
+ * no workaround
+ *
+ * consider this binding as resources which particular
+ * event may borrow, it doesn't contain EventMask,
+ * Tags and friends -- they are left to a caller
+ */
+static struct p4_event_bind p4_event_bind_map[] = {
+       [P4_EVENT_TC_DELIVER_MODE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
+               .escr_msr       = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
+               .shared         = 1,
+               .cntr           = { {4, 5, -1}, {6, 7, -1} },
+       },
+       [P4_EVENT_BPU_FETCH_REQUEST] = {
+               .opcode         = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
+               .escr_msr       = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_ITLB_REFERENCE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
+               .escr_msr       = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_MEMORY_CANCEL] = {
+               .opcode         = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
+               .escr_msr       = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_MEMORY_COMPLETE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
+               .escr_msr       = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_LOAD_PORT_REPLAY] = {
+               .opcode         = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
+               .escr_msr       = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_STORE_PORT_REPLAY] = {
+               .opcode         = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
+               .escr_msr       = { MSR_P4_SAAT_ESCR0 ,  MSR_P4_SAAT_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_MOB_LOAD_REPLAY] = {
+               .opcode         = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
+               .escr_msr       = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_PAGE_WALK_TYPE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
+               .escr_msr       = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
+               .shared         = 1,
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_BSQ_CACHE_REFERENCE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
+               .escr_msr       = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_IOQ_ALLOCATION] = {
+               .opcode         = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER)               |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_IOQ_ACTIVE_ENTRIES] = {       /* shared ESCR */
+               .opcode         = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
+               .escr_msr       = { MSR_P4_FSB_ESCR1,  MSR_P4_FSB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
+               .cntr           = { {2, -1, -1}, {3, -1, -1} },
+       },
+       [P4_EVENT_FSB_DATA_ACTIVITY] = {
+               .opcode         = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
+               .shared         = 1,
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_BSQ_ALLOCATION] = {           /* shared ESCR, broken CCCR1 */
+               .opcode         = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
+               .escr_msr       = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE)      |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE)      |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
+               .cntr           = { {0, -1, -1}, {1, -1, -1} },
+       },
+       [P4_EVENT_BSQ_ACTIVE_ENTRIES] = {       /* shared ESCR */
+               .opcode         = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
+               .escr_msr       = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE)     |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE)  |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE)  |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE)    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE)    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
+               .cntr           = { {2, -1, -1}, {3, -1, -1} },
+       },
+       [P4_EVENT_SSE_INPUT_ASSIST] = {
+               .opcode         = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_PACKED_SP_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_PACKED_DP_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_SCALAR_SP_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_SCALAR_DP_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_64BIT_MMX_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_128BIT_MMX_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_X87_FP_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_X87_FP_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_TC_MISC] = {
+               .opcode         = P4_OPCODE(P4_EVENT_TC_MISC),
+               .escr_msr       = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
+               .cntr           = { {4, 5, -1}, {6, 7, -1} },
+       },
+       [P4_EVENT_GLOBAL_POWER_EVENTS] = {
+               .opcode         = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_TC_MS_XFER] = {
+               .opcode         = P4_OPCODE(P4_EVENT_TC_MS_XFER),
+               .escr_msr       = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
+               .cntr           = { {4, 5, -1}, {6, 7, -1} },
+       },
+       [P4_EVENT_UOP_QUEUE_WRITES] = {
+               .opcode         = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
+               .escr_msr       = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD)     |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
+               .cntr           = { {4, 5, -1}, {6, 7, -1} },
+       },
+       [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
+               .escr_msr       = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL)    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
+               .cntr           = { {4, 5, -1}, {6, 7, -1} },
+       },
+       [P4_EVENT_RETIRED_BRANCH_TYPE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
+               .escr_msr       = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL)    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
+               .cntr           = { {4, 5, -1}, {6, 7, -1} },
+       },
+       [P4_EVENT_RESOURCE_STALL] = {
+               .opcode         = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
+               .escr_msr       = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_WC_BUFFER] = {
+               .opcode         = P4_OPCODE(P4_EVENT_WC_BUFFER),
+               .escr_msr       = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS)               |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_B2B_CYCLES] = {
+               .opcode         = P4_OPCODE(P4_EVENT_B2B_CYCLES),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     = 0,
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_BNR] = {
+               .opcode         = P4_OPCODE(P4_EVENT_BNR),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     = 0,
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_SNOOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_SNOOP),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     = 0,
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_RESPONSE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_RESPONSE),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     = 0,
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_FRONT_END_EVENT] = {
+               .opcode         = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
+               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_EXECUTION_EVENT] = {
+               .opcode         = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
+               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_REPLAY_EVENT] = {
+               .opcode         = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
+               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_INSTR_RETIRED] = {
+               .opcode         = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
+               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_UOPS_RETIRED] = {
+               .opcode         = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
+               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_UOP_TYPE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_UOP_TYPE),
+               .escr_msr       = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS)                  |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_BRANCH_RETIRED] = {
+               .opcode         = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
+               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
+               .opcode         = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
+               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_X87_ASSIST] = {
+               .opcode         = P4_OPCODE(P4_EVENT_X87_ASSIST),
+               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU)                    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO)                    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO)                    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU)                    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_MACHINE_CLEAR] = {
+               .opcode         = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
+               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_INSTR_COMPLETED] = {
+               .opcode         = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
+               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+};
+
+#define P4_GEN_CACHE_EVENT(event, bit, metric)                           \
+       p4_config_pack_escr(P4_ESCR_EVENT(event)                        | \
+                           P4_ESCR_EMASK_BIT(event, bit))              | \
+       p4_config_pack_cccr(metric                                      | \
+                           P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
+
+static __initconst const u64 p4_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
+                                               P4_PEBS_METRIC__1stl_cache_load_miss_retired),
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
+                                               P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
+       },
+},
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
+                                               P4_PEBS_METRIC__dtlb_load_miss_retired),
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
+                                               P4_PEBS_METRIC__dtlb_store_miss_retired),
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
+                                               P4_PEBS_METRIC__none),
+               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
+                                               P4_PEBS_METRIC__none),
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+/*
+ * Because of Netburst being quite restricted in how many
+ * identical events may run simultaneously, we introduce event aliases,
+ * ie the different events which have the same functionality but
+ * utilize non-intersected resources (ESCR/CCCR/counter registers).
+ *
+ * This allow us to relax restrictions a bit and run two or more
+ * identical events together.
+ *
+ * Never set any custom internal bits such as P4_CONFIG_HT,
+ * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
+ * either up to date automatically or not applicable at all.
+ */
+struct p4_event_alias {
+       u64 original;
+       u64 alternative;
+} p4_event_aliases[] = {
+       {
+               /*
+                * Non-halted cycles can be substituted with non-sleeping cycles (see
+                * Intel SDM Vol3b for details). We need this alias to be able
+                * to run nmi-watchdog and 'perf top' (or any other user space tool
+                * which is interested in running PERF_COUNT_HW_CPU_CYCLES)
+                * simultaneously.
+                */
+       .original       =
+               p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)         |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
+       .alternative    =
+               p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT)             |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
+               p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT          |
+                                   P4_CCCR_COMPARE),
+       },
+};
+
+static u64 p4_get_alias_event(u64 config)
+{
+       u64 config_match;
+       int i;
+
+       /*
+        * Only event with special mark is allowed,
+        * we're to be sure it didn't come as malformed
+        * RAW event.
+        */
+       if (!(config & P4_CONFIG_ALIASABLE))
+               return 0;
+
+       config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
+
+       for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
+               if (config_match == p4_event_aliases[i].original) {
+                       config_match = p4_event_aliases[i].alternative;
+                       break;
+               } else if (config_match == p4_event_aliases[i].alternative) {
+                       config_match = p4_event_aliases[i].original;
+                       break;
+               }
+       }
+
+       if (i >= ARRAY_SIZE(p4_event_aliases))
+               return 0;
+
+       return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
+}
+
+static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
+  /* non-halted CPU clocks */
+  [PERF_COUNT_HW_CPU_CYCLES] =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)         |
+               P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING))       |
+               P4_CONFIG_ALIASABLE,
+
+  /*
+   * retired instructions
+   * in a sake of simplicity we don't use the FSB tagging
+   */
+  [PERF_COUNT_HW_INSTRUCTIONS] =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED)               |
+               P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG)           |
+               P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
+
+  /* cache hits */
+  [PERF_COUNT_HW_CACHE_REFERENCES] =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE)         |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
+
+  /* cache misses */
+  [PERF_COUNT_HW_CACHE_MISSES] =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE)         |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
+
+  /* branch instructions retired */
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE)         |
+               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL)    |
+               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL)           |
+               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN)         |
+               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
+
+  /* mispredicted branches retired */
+  [PERF_COUNT_HW_BRANCH_MISSES]        =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED)      |
+               P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
+
+  /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN):  */
+  [PERF_COUNT_HW_BUS_CYCLES] =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY)           |
+               P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV)         |
+               P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN))        |
+       p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
+};
+
+static struct p4_event_bind *p4_config_get_bind(u64 config)
+{
+       unsigned int evnt = p4_config_unpack_event(config);
+       struct p4_event_bind *bind = NULL;
+
+       if (evnt < ARRAY_SIZE(p4_event_bind_map))
+               bind = &p4_event_bind_map[evnt];
+
+       return bind;
+}
+
+static u64 p4_pmu_event_map(int hw_event)
+{
+       struct p4_event_bind *bind;
+       unsigned int esel;
+       u64 config;
+
+       config = p4_general_events[hw_event];
+       bind = p4_config_get_bind(config);
+       esel = P4_OPCODE_ESEL(bind->opcode);
+       config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
+
+       return config;
+}
+
+/* check cpu model specifics */
+static bool p4_event_match_cpu_model(unsigned int event_idx)
+{
+       /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
+       if (event_idx == P4_EVENT_INSTR_COMPLETED) {
+               if (boot_cpu_data.x86_model != 3 &&
+                       boot_cpu_data.x86_model != 4 &&
+                       boot_cpu_data.x86_model != 6)
+                       return false;
+       }
+
+       /*
+        * For info
+        * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
+        */
+
+       return true;
+}
+
+static int p4_validate_raw_event(struct perf_event *event)
+{
+       unsigned int v, emask;
+
+       /* User data may have out-of-bound event index */
+       v = p4_config_unpack_event(event->attr.config);
+       if (v >= ARRAY_SIZE(p4_event_bind_map))
+               return -EINVAL;
+
+       /* It may be unsupported: */
+       if (!p4_event_match_cpu_model(v))
+               return -EINVAL;
+
+       /*
+        * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
+        * in Architectural Performance Monitoring, it means not
+        * on _which_ logical cpu to count but rather _when_, ie it
+        * depends on logical cpu state -- count event if one cpu active,
+        * none, both or any, so we just allow user to pass any value
+        * desired.
+        *
+        * In turn we always set Tx_OS/Tx_USR bits bound to logical
+        * cpu without their propagation to another cpu
+        */
+
+       /*
+        * if an event is shared across the logical threads
+        * the user needs special permissions to be able to use it
+        */
+       if (p4_ht_active() && p4_event_bind_map[v].shared) {
+               if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+                       return -EACCES;
+       }
+
+       /* ESCR EventMask bits may be invalid */
+       emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
+       if (emask & ~p4_event_bind_map[v].escr_emask)
+               return -EINVAL;
+
+       /*
+        * it may have some invalid PEBS bits
+        */
+       if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
+               return -EINVAL;
+
+       v = p4_config_unpack_metric(event->attr.config);
+       if (v >= ARRAY_SIZE(p4_pebs_bind_map))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int p4_hw_config(struct perf_event *event)
+{
+       int cpu = get_cpu();
+       int rc = 0;
+       u32 escr, cccr;
+
+       /*
+        * the reason we use cpu that early is that: if we get scheduled
+        * first time on the same cpu -- we will not need swap thread
+        * specific flags in config (and will save some cpu cycles)
+        */
+
+       cccr = p4_default_cccr_conf(cpu);
+       escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
+                                        event->attr.exclude_user);
+       event->hw.config = p4_config_pack_escr(escr) |
+                          p4_config_pack_cccr(cccr);
+
+       if (p4_ht_active() && p4_ht_thread(cpu))
+               event->hw.config = p4_set_ht_bit(event->hw.config);
+
+       if (event->attr.type == PERF_TYPE_RAW) {
+               struct p4_event_bind *bind;
+               unsigned int esel;
+               /*
+                * Clear bits we reserve to be managed by kernel itself
+                * and never allowed from a user space
+                */
+                event->attr.config &= P4_CONFIG_MASK;
+
+               rc = p4_validate_raw_event(event);
+               if (rc)
+                       goto out;
+
+               /*
+                * Note that for RAW events we allow user to use P4_CCCR_RESERVED
+                * bits since we keep additional info here (for cache events and etc)
+                */
+               event->hw.config |= event->attr.config;
+               bind = p4_config_get_bind(event->attr.config);
+               if (!bind) {
+                       rc = -EINVAL;
+                       goto out;
+               }
+               esel = P4_OPCODE_ESEL(bind->opcode);
+               event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
+       }
+
+       rc = x86_setup_perfctr(event);
+out:
+       put_cpu();
+       return rc;
+}
+
+static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
+{
+       u64 v;
+
+       /* an official way for overflow indication */
+       rdmsrl(hwc->config_base, v);
+       if (v & P4_CCCR_OVF) {
+               wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
+               return 1;
+       }
+
+       /*
+        * In some circumstances the overflow might issue an NMI but did
+        * not set P4_CCCR_OVF bit. Because a counter holds a negative value
+        * we simply check for high bit being set, if it's cleared it means
+        * the counter has reached zero value and continued counting before
+        * real NMI signal was received:
+        */
+       rdmsrl(hwc->event_base, v);
+       if (!(v & ARCH_P4_UNFLAGGED_BIT))
+               return 1;
+
+       return 0;
+}
+
+static void p4_pmu_disable_pebs(void)
+{
+       /*
+        * FIXME
+        *
+        * It's still allowed that two threads setup same cache
+        * events so we can't simply clear metrics until we knew
+        * no one is depending on us, so we need kind of counter
+        * for "ReplayEvent" users.
+        *
+        * What is more complex -- RAW events, if user (for some
+        * reason) will pass some cache event metric with improper
+        * event opcode -- it's fine from hardware point of view
+        * but completely nonsense from "meaning" of such action.
+        *
+        * So at moment let leave metrics turned on forever -- it's
+        * ok for now but need to be revisited!
+        *
+        * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0);
+        * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0);
+        */
+}
+
+static inline void p4_pmu_disable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       /*
+        * If event gets disabled while counter is in overflowed
+        * state we need to clear P4_CCCR_OVF, otherwise interrupt get
+        * asserted again and again
+        */
+       (void)wrmsrl_safe(hwc->config_base,
+               p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
+}
+
+static void p4_pmu_disable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               struct perf_event *event = cpuc->events[idx];
+               if (!test_bit(idx, cpuc->active_mask))
+                       continue;
+               p4_pmu_disable_event(event);
+       }
+
+       p4_pmu_disable_pebs();
+}
+
+/* configuration must be valid */
+static void p4_pmu_enable_pebs(u64 config)
+{
+       struct p4_pebs_bind *bind;
+       unsigned int idx;
+
+       BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
+
+       idx = p4_config_unpack_metric(config);
+       if (idx == P4_PEBS_METRIC__none)
+               return;
+
+       bind = &p4_pebs_bind_map[idx];
+
+       (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
+       (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT,      (u64)bind->metric_vert);
+}
+
+static void p4_pmu_enable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       int thread = p4_ht_config_thread(hwc->config);
+       u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
+       unsigned int idx = p4_config_unpack_event(hwc->config);
+       struct p4_event_bind *bind;
+       u64 escr_addr, cccr;
+
+       bind = &p4_event_bind_map[idx];
+       escr_addr = bind->escr_msr[thread];
+
+       /*
+        * - we dont support cascaded counters yet
+        * - and counter 1 is broken (erratum)
+        */
+       WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
+       WARN_ON_ONCE(hwc->idx == 1);
+
+       /* we need a real Event value */
+       escr_conf &= ~P4_ESCR_EVENT_MASK;
+       escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
+
+       cccr = p4_config_unpack_cccr(hwc->config);
+
+       /*
+        * it could be Cache event so we need to write metrics
+        * into additional MSRs
+        */
+       p4_pmu_enable_pebs(hwc->config);
+
+       (void)wrmsrl_safe(escr_addr, escr_conf);
+       (void)wrmsrl_safe(hwc->config_base,
+                               (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
+}
+
+static void p4_pmu_enable_all(int added)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               struct perf_event *event = cpuc->events[idx];
+               if (!test_bit(idx, cpuc->active_mask))
+                       continue;
+               p4_pmu_enable_event(event);
+       }
+}
+
+static int p4_pmu_handle_irq(struct pt_regs *regs)
+{
+       struct perf_sample_data data;
+       struct cpu_hw_events *cpuc;
+       struct perf_event *event;
+       struct hw_perf_event *hwc;
+       int idx, handled = 0;
+       u64 val;
+
+       cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               int overflow;
+
+               if (!test_bit(idx, cpuc->active_mask)) {
+                       /* catch in-flight IRQs */
+                       if (__test_and_clear_bit(idx, cpuc->running))
+                               handled++;
+                       continue;
+               }
+
+               event = cpuc->events[idx];
+               hwc = &event->hw;
+
+               WARN_ON_ONCE(hwc->idx != idx);
+
+               /* it might be unflagged overflow */
+               overflow = p4_pmu_clear_cccr_ovf(hwc);
+
+               val = x86_perf_event_update(event);
+               if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
+                       continue;
+
+               handled += overflow;
+
+               /* event overflow for sure */
+               perf_sample_data_init(&data, 0, hwc->last_period);
+
+               if (!x86_perf_event_set_period(event))
+                       continue;
+
+
+               if (perf_event_overflow(event, &data, regs))
+                       x86_pmu_stop(event, 0);
+       }
+
+       if (handled)
+               inc_irq_stat(apic_perf_irqs);
+
+       /*
+        * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
+        * been observed that the OVF bit flag has to be cleared first _before_
+        * the LVTPC can be unmasked.
+        *
+        * The reason is the NMI line will continue to be asserted while the OVF
+        * bit is set.  This causes a second NMI to generate if the LVTPC is
+        * unmasked before the OVF bit is cleared, leading to unknown NMI
+        * messages.
+        */
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+
+       return handled;
+}
+
+/*
+ * swap thread specific fields according to a thread
+ * we are going to run on
+ */
+static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
+{
+       u32 escr, cccr;
+
+       /*
+        * we either lucky and continue on same cpu or no HT support
+        */
+       if (!p4_should_swap_ts(hwc->config, cpu))
+               return;
+
+       /*
+        * the event is migrated from an another logical
+        * cpu, so we need to swap thread specific flags
+        */
+
+       escr = p4_config_unpack_escr(hwc->config);
+       cccr = p4_config_unpack_cccr(hwc->config);
+
+       if (p4_ht_thread(cpu)) {
+               cccr &= ~P4_CCCR_OVF_PMI_T0;
+               cccr |= P4_CCCR_OVF_PMI_T1;
+               if (escr & P4_ESCR_T0_OS) {
+                       escr &= ~P4_ESCR_T0_OS;
+                       escr |= P4_ESCR_T1_OS;
+               }
+               if (escr & P4_ESCR_T0_USR) {
+                       escr &= ~P4_ESCR_T0_USR;
+                       escr |= P4_ESCR_T1_USR;
+               }
+               hwc->config  = p4_config_pack_escr(escr);
+               hwc->config |= p4_config_pack_cccr(cccr);
+               hwc->config |= P4_CONFIG_HT;
+       } else {
+               cccr &= ~P4_CCCR_OVF_PMI_T1;
+               cccr |= P4_CCCR_OVF_PMI_T0;
+               if (escr & P4_ESCR_T1_OS) {
+                       escr &= ~P4_ESCR_T1_OS;
+                       escr |= P4_ESCR_T0_OS;
+               }
+               if (escr & P4_ESCR_T1_USR) {
+                       escr &= ~P4_ESCR_T1_USR;
+                       escr |= P4_ESCR_T0_USR;
+               }
+               hwc->config  = p4_config_pack_escr(escr);
+               hwc->config |= p4_config_pack_cccr(cccr);
+               hwc->config &= ~P4_CONFIG_HT;
+       }
+}
+
+/*
+ * ESCR address hashing is tricky, ESCRs are not sequential
+ * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and
+ * the metric between any ESCRs is laid in range [0xa0,0xe1]
+ *
+ * so we make ~70% filled hashtable
+ */
+
+#define P4_ESCR_MSR_BASE               0x000003a0
+#define P4_ESCR_MSR_MAX                        0x000003e1
+#define P4_ESCR_MSR_TABLE_SIZE         (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)
+#define P4_ESCR_MSR_IDX(msr)           (msr - P4_ESCR_MSR_BASE)
+#define P4_ESCR_MSR_TABLE_ENTRY(msr)   [P4_ESCR_MSR_IDX(msr)] = msr
+
+static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = {
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1),
+};
+
+static int p4_get_escr_idx(unsigned int addr)
+{
+       unsigned int idx = P4_ESCR_MSR_IDX(addr);
+
+       if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE      ||
+                       !p4_escr_table[idx]             ||
+                       p4_escr_table[idx] != addr)) {
+               WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);
+               return -1;
+       }
+
+       return idx;
+}
+
+static int p4_next_cntr(int thread, unsigned long *used_mask,
+                       struct p4_event_bind *bind)
+{
+       int i, j;
+
+       for (i = 0; i < P4_CNTR_LIMIT; i++) {
+               j = bind->cntr[thread][i];
+               if (j != -1 && !test_bit(j, used_mask))
+                       return j;
+       }
+
+       return -1;
+}
+
+static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+{
+       unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+       unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];
+       int cpu = smp_processor_id();
+       struct hw_perf_event *hwc;
+       struct p4_event_bind *bind;
+       unsigned int i, thread, num;
+       int cntr_idx, escr_idx;
+       u64 config_alias;
+       int pass;
+
+       bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+       bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
+
+       for (i = 0, num = n; i < n; i++, num--) {
+
+               hwc = &cpuc->event_list[i]->hw;
+               thread = p4_ht_thread(cpu);
+               pass = 0;
+
+again:
+               /*
+                * It's possible to hit a circular lock
+                * between original and alternative events
+                * if both are scheduled already.
+                */
+               if (pass > 2)
+                       goto done;
+
+               bind = p4_config_get_bind(hwc->config);
+               escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
+               if (unlikely(escr_idx == -1))
+                       goto done;
+
+               if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
+                       cntr_idx = hwc->idx;
+                       if (assign)
+                               assign[i] = hwc->idx;
+                       goto reserve;
+               }
+
+               cntr_idx = p4_next_cntr(thread, used_mask, bind);
+               if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
+                       /*
+                        * Check whether an event alias is still available.
+                        */
+                       config_alias = p4_get_alias_event(hwc->config);
+                       if (!config_alias)
+                               goto done;
+                       hwc->config = config_alias;
+                       pass++;
+                       goto again;
+               }
+               /*
+                * Perf does test runs to see if a whole group can be assigned
+                * together succesfully.  There can be multiple rounds of this.
+                * Unfortunately, p4_pmu_swap_config_ts touches the hwc->config
+                * bits, such that the next round of group assignments will
+                * cause the above p4_should_swap_ts to pass instead of fail.
+                * This leads to counters exclusive to thread0 being used by
+                * thread1.
+                *
+                * Solve this with a cheap hack, reset the idx back to -1 to
+                * force a new lookup (p4_next_cntr) to get the right counter
+                * for the right thread.
+                *
+                * This probably doesn't comply with the general spirit of how
+                * perf wants to work, but P4 is special. :-(
+                */
+               if (p4_should_swap_ts(hwc->config, cpu))
+                       hwc->idx = -1;
+               p4_pmu_swap_config_ts(hwc, cpu);
+               if (assign)
+                       assign[i] = cntr_idx;
+reserve:
+               set_bit(cntr_idx, used_mask);
+               set_bit(escr_idx, escr_mask);
+       }
+
+done:
+       return num ? -EINVAL : 0;
+}
+
+PMU_FORMAT_ATTR(cccr, "config:0-31" );
+PMU_FORMAT_ATTR(escr, "config:32-62");
+PMU_FORMAT_ATTR(ht,   "config:63"   );
+
+static struct attribute *intel_p4_formats_attr[] = {
+       &format_attr_cccr.attr,
+       &format_attr_escr.attr,
+       &format_attr_ht.attr,
+       NULL,
+};
+
+static __initconst const struct x86_pmu p4_pmu = {
+       .name                   = "Netburst P4/Xeon",
+       .handle_irq             = p4_pmu_handle_irq,
+       .disable_all            = p4_pmu_disable_all,
+       .enable_all             = p4_pmu_enable_all,
+       .enable                 = p4_pmu_enable_event,
+       .disable                = p4_pmu_disable_event,
+       .eventsel               = MSR_P4_BPU_CCCR0,
+       .perfctr                = MSR_P4_BPU_PERFCTR0,
+       .event_map              = p4_pmu_event_map,
+       .max_events             = ARRAY_SIZE(p4_general_events),
+       .get_event_constraints  = x86_get_event_constraints,
+       /*
+        * IF HT disabled we may need to use all
+        * ARCH_P4_MAX_CCCR counters simulaneously
+        * though leave it restricted at moment assuming
+        * HT is on
+        */
+       .num_counters           = ARCH_P4_MAX_CCCR,
+       .apic                   = 1,
+       .cntval_bits            = ARCH_P4_CNTRVAL_BITS,
+       .cntval_mask            = ARCH_P4_CNTRVAL_MASK,
+       .max_period             = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
+       .hw_config              = p4_hw_config,
+       .schedule_events        = p4_pmu_schedule_events,
+       /*
+        * This handles erratum N15 in intel doc 249199-029,
+        * the counter may not be updated correctly on write
+        * so we need a second write operation to do the trick
+        * (the official workaround didn't work)
+        *
+        * the former idea is taken from OProfile code
+        */
+       .perfctr_second_write   = 1,
+
+       .format_attrs           = intel_p4_formats_attr,
+};
+
+__init int p4_pmu_init(void)
+{
+       unsigned int low, high;
+       int i, reg;
+
+       /* If we get stripped -- indexing fails */
+       BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
+
+       rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+       if (!(low & (1 << 7))) {
+               pr_cont("unsupported Netburst CPU model %d ",
+                       boot_cpu_data.x86_model);
+               return -ENODEV;
+       }
+
+       memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
+               sizeof(hw_cache_event_ids));
+
+       pr_cont("Netburst events, ");
+
+       x86_pmu = p4_pmu;
+
+       /*
+        * Even though the counters are configured to interrupt a particular
+        * logical processor when an overflow happens, testing has shown that
+        * on kdump kernels (which uses a single cpu), thread1's counter
+        * continues to run and will report an NMI on thread0.  Due to the
+        * overflow bug, this leads to a stream of unknown NMIs.
+        *
+        * Solve this by zero'ing out the registers to mimic a reset.
+        */
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               reg = x86_pmu_config_addr(i);
+               wrmsrl_safe(reg, 0ULL);
+       }
+
+       return 0;
+}
diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c
new file mode 100644 (file)
index 0000000..1f5c47a
--- /dev/null
@@ -0,0 +1,279 @@
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include "../perf_event.h"
+
+/*
+ * Not sure about some of these
+ */
+static const u64 p6_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]           = 0x0079,       /* CPU_CLK_UNHALTED */
+  [PERF_COUNT_HW_INSTRUCTIONS]         = 0x00c0,       /* INST_RETIRED     */
+  [PERF_COUNT_HW_CACHE_REFERENCES]     = 0x0f2e,       /* L2_RQSTS:M:E:S:I */
+  [PERF_COUNT_HW_CACHE_MISSES]         = 0x012e,       /* L2_RQSTS:I       */
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]  = 0x00c4,       /* BR_INST_RETIRED  */
+  [PERF_COUNT_HW_BRANCH_MISSES]                = 0x00c5,       /* BR_MISS_PRED_RETIRED */
+  [PERF_COUNT_HW_BUS_CYCLES]           = 0x0062,       /* BUS_DRDY_CLOCKS  */
+  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2,    /* RESOURCE_STALLS  */
+
+};
+
+static const u64 __initconst p6_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0043,  /* DATA_MEM_REFS       */
+                [ C(RESULT_MISS)   ] = 0x0045, /* DCU_LINES_IN        */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0x0f29,  /* L2_LD:M:E:S:I       */
+       },
+        [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+        },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0080,  /* IFU_IFETCH         */
+               [ C(RESULT_MISS)   ] = 0x0f28,  /* L2_IFETCH:M:E:S:I  */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0x0025,  /* L2_M_LINES_INM     */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0043,  /* DATA_MEM_REFS      */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0080,  /* IFU_IFETCH         */
+               [ C(RESULT_MISS)   ] = 0x0085,  /* ITLB_MISS          */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4,  /* BR_INST_RETIRED      */
+               [ C(RESULT_MISS)   ] = 0x00c5,  /* BR_MISS_PRED_RETIRED */
+        },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+static u64 p6_pmu_event_map(int hw_event)
+{
+       return p6_perfmon_event_map[hw_event];
+}
+
+/*
+ * Event setting that is specified not to count anything.
+ * We use this to effectively disable a counter.
+ *
+ * L2_RQSTS with 0 MESI unit mask.
+ */
+#define P6_NOP_EVENT                   0x0000002EULL
+
+static struct event_constraint p6_event_constraints[] =
+{
+       INTEL_EVENT_CONSTRAINT(0xc1, 0x1),      /* FLOPS */
+       INTEL_EVENT_CONSTRAINT(0x10, 0x1),      /* FP_COMP_OPS_EXE */
+       INTEL_EVENT_CONSTRAINT(0x11, 0x2),      /* FP_ASSIST */
+       INTEL_EVENT_CONSTRAINT(0x12, 0x2),      /* MUL */
+       INTEL_EVENT_CONSTRAINT(0x13, 0x2),      /* DIV */
+       INTEL_EVENT_CONSTRAINT(0x14, 0x1),      /* CYCLES_DIV_BUSY */
+       EVENT_CONSTRAINT_END
+};
+
+static void p6_pmu_disable_all(void)
+{
+       u64 val;
+
+       /* p6 only has one enable register */
+       rdmsrl(MSR_P6_EVNTSEL0, val);
+       val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+       wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static void p6_pmu_enable_all(int added)
+{
+       unsigned long val;
+
+       /* p6 only has one enable register */
+       rdmsrl(MSR_P6_EVNTSEL0, val);
+       val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+       wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static inline void
+p6_pmu_disable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 val = P6_NOP_EVENT;
+
+       (void)wrmsrl_safe(hwc->config_base, val);
+}
+
+static void p6_pmu_enable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 val;
+
+       val = hwc->config;
+
+       /*
+        * p6 only has a global event enable, set on PerfEvtSel0
+        * We "disable" events by programming P6_NOP_EVENT
+        * and we rely on p6_pmu_enable_all() being called
+        * to actually enable the events.
+        */
+
+       (void)wrmsrl_safe(hwc->config_base, val);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7"    );
+PMU_FORMAT_ATTR(umask, "config:8-15"   );
+PMU_FORMAT_ATTR(edge,  "config:18"     );
+PMU_FORMAT_ATTR(pc,    "config:19"     );
+PMU_FORMAT_ATTR(inv,   "config:23"     );
+PMU_FORMAT_ATTR(cmask, "config:24-31"  );
+
+static struct attribute *intel_p6_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_pc.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask.attr,
+       NULL,
+};
+
+static __initconst const struct x86_pmu p6_pmu = {
+       .name                   = "p6",
+       .handle_irq             = x86_pmu_handle_irq,
+       .disable_all            = p6_pmu_disable_all,
+       .enable_all             = p6_pmu_enable_all,
+       .enable                 = p6_pmu_enable_event,
+       .disable                = p6_pmu_disable_event,
+       .hw_config              = x86_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_P6_EVNTSEL0,
+       .perfctr                = MSR_P6_PERFCTR0,
+       .event_map              = p6_pmu_event_map,
+       .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
+       .apic                   = 1,
+       .max_period             = (1ULL << 31) - 1,
+       .version                = 0,
+       .num_counters           = 2,
+       /*
+        * Events have 40 bits implemented. However they are designed such
+        * that bits [32-39] are sign extensions of bit 31. As such the
+        * effective width of a event for P6-like PMU is 32 bits only.
+        *
+        * See IA-32 Intel Architecture Software developer manual Vol 3B
+        */
+       .cntval_bits            = 32,
+       .cntval_mask            = (1ULL << 32) - 1,
+       .get_event_constraints  = x86_get_event_constraints,
+       .event_constraints      = p6_event_constraints,
+
+       .format_attrs           = intel_p6_formats_attr,
+       .events_sysfs_show      = intel_event_sysfs_show,
+
+};
+
+static __init void p6_pmu_rdpmc_quirk(void)
+{
+       if (boot_cpu_data.x86_mask < 9) {
+               /*
+                * PPro erratum 26; fixed in stepping 9 and above.
+                */
+               pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n");
+               x86_pmu.attr_rdpmc_broken = 1;
+               x86_pmu.attr_rdpmc = 0;
+       }
+}
+
+__init int p6_pmu_init(void)
+{
+       x86_pmu = p6_pmu;
+
+       switch (boot_cpu_data.x86_model) {
+       case  1: /* Pentium Pro */
+               x86_add_quirk(p6_pmu_rdpmc_quirk);
+               break;
+
+       case  3: /* Pentium II - Klamath */
+       case  5: /* Pentium II - Deschutes */
+       case  6: /* Pentium II - Mendocino */
+               break;
+
+       case  7: /* Pentium III - Katmai */
+       case  8: /* Pentium III - Coppermine */
+       case 10: /* Pentium III Xeon */
+       case 11: /* Pentium III - Tualatin */
+               break;
+
+       case  9: /* Pentium M - Banias */
+       case 13: /* Pentium M - Dothan */
+               break;
+
+       default:
+               pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
+               return -ENODEV;
+       }
+
+       memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
+               sizeof(hw_cache_event_ids));
+
+       return 0;
+}
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
new file mode 100644 (file)
index 0000000..6af7cf7
--- /dev/null
@@ -0,0 +1,1188 @@
+/*
+ * Intel(R) Processor Trace PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Intel PT is specified in the Intel Architecture Instruction Set Extensions
+ * Programming Reference:
+ * http://software.intel.com/en-us/intel-isa-extensions
+ */
+
+#undef DEBUG
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+
+#include <asm/perf_event.h>
+#include <asm/insn.h>
+#include <asm/io.h>
+#include <asm/intel_pt.h>
+
+#include "../perf_event.h"
+#include "pt.h"
+
+static DEFINE_PER_CPU(struct pt, pt_ctx);
+
+static struct pt_pmu pt_pmu;
+
+enum cpuid_regs {
+       CR_EAX = 0,
+       CR_ECX,
+       CR_EDX,
+       CR_EBX
+};
+
+/*
+ * Capabilities of Intel PT hardware, such as number of address bits or
+ * supported output schemes, are cached and exported to userspace as "caps"
+ * attribute group of pt pmu device
+ * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store
+ * relevant bits together with intel_pt traces.
+ *
+ * These are necessary for both trace decoding (payloads_lip, contains address
+ * width encoded in IP-related packets), and event configuration (bitmasks with
+ * permitted values for certain bit fields).
+ */
+#define PT_CAP(_n, _l, _r, _m)                                         \
+       [PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l,        \
+                           .reg = _r, .mask = _m }
+
+static struct pt_cap_desc {
+       const char      *name;
+       u32             leaf;
+       u8              reg;
+       u32             mask;
+} pt_caps[] = {
+       PT_CAP(max_subleaf,             0, CR_EAX, 0xffffffff),
+       PT_CAP(cr3_filtering,           0, CR_EBX, BIT(0)),
+       PT_CAP(psb_cyc,                 0, CR_EBX, BIT(1)),
+       PT_CAP(mtc,                     0, CR_EBX, BIT(3)),
+       PT_CAP(topa_output,             0, CR_ECX, BIT(0)),
+       PT_CAP(topa_multiple_entries,   0, CR_ECX, BIT(1)),
+       PT_CAP(single_range_output,     0, CR_ECX, BIT(2)),
+       PT_CAP(payloads_lip,            0, CR_ECX, BIT(31)),
+       PT_CAP(mtc_periods,             1, CR_EAX, 0xffff0000),
+       PT_CAP(cycle_thresholds,        1, CR_EBX, 0xffff),
+       PT_CAP(psb_periods,             1, CR_EBX, 0xffff0000),
+};
+
+static u32 pt_cap_get(enum pt_capabilities cap)
+{
+       struct pt_cap_desc *cd = &pt_caps[cap];
+       u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
+       unsigned int shift = __ffs(cd->mask);
+
+       return (c & cd->mask) >> shift;
+}
+
+static ssize_t pt_cap_show(struct device *cdev,
+                          struct device_attribute *attr,
+                          char *buf)
+{
+       struct dev_ext_attribute *ea =
+               container_of(attr, struct dev_ext_attribute, attr);
+       enum pt_capabilities cap = (long)ea->var;
+
+       return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
+}
+
+static struct attribute_group pt_cap_group = {
+       .name   = "caps",
+};
+
+PMU_FORMAT_ATTR(cyc,           "config:1"      );
+PMU_FORMAT_ATTR(mtc,           "config:9"      );
+PMU_FORMAT_ATTR(tsc,           "config:10"     );
+PMU_FORMAT_ATTR(noretcomp,     "config:11"     );
+PMU_FORMAT_ATTR(mtc_period,    "config:14-17"  );
+PMU_FORMAT_ATTR(cyc_thresh,    "config:19-22"  );
+PMU_FORMAT_ATTR(psb_period,    "config:24-27"  );
+
+static struct attribute *pt_formats_attr[] = {
+       &format_attr_cyc.attr,
+       &format_attr_mtc.attr,
+       &format_attr_tsc.attr,
+       &format_attr_noretcomp.attr,
+       &format_attr_mtc_period.attr,
+       &format_attr_cyc_thresh.attr,
+       &format_attr_psb_period.attr,
+       NULL,
+};
+
+static struct attribute_group pt_format_group = {
+       .name   = "format",
+       .attrs  = pt_formats_attr,
+};
+
+static const struct attribute_group *pt_attr_groups[] = {
+       &pt_cap_group,
+       &pt_format_group,
+       NULL,
+};
+
+static int __init pt_pmu_hw_init(void)
+{
+       struct dev_ext_attribute *de_attrs;
+       struct attribute **attrs;
+       size_t size;
+       int ret;
+       long i;
+
+       attrs = NULL;
+
+       for (i = 0; i < PT_CPUID_LEAVES; i++) {
+               cpuid_count(20, i,
+                           &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
+       }
+
+       ret = -ENOMEM;
+       size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1);
+       attrs = kzalloc(size, GFP_KERNEL);
+       if (!attrs)
+               goto fail;
+
+       size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1);
+       de_attrs = kzalloc(size, GFP_KERNEL);
+       if (!de_attrs)
+               goto fail;
+
+       for (i = 0; i < ARRAY_SIZE(pt_caps); i++) {
+               struct dev_ext_attribute *de_attr = de_attrs + i;
+
+               de_attr->attr.attr.name = pt_caps[i].name;
+
+               sysfs_attr_init(&de_attr->attr.attr);
+
+               de_attr->attr.attr.mode         = S_IRUGO;
+               de_attr->attr.show              = pt_cap_show;
+               de_attr->var                    = (void *)i;
+
+               attrs[i] = &de_attr->attr.attr;
+       }
+
+       pt_cap_group.attrs = attrs;
+
+       return 0;
+
+fail:
+       kfree(attrs);
+
+       return ret;
+}
+
+#define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC    | \
+                         RTIT_CTL_CYC_THRESH   | \
+                         RTIT_CTL_PSB_FREQ)
+
+#define RTIT_CTL_MTC   (RTIT_CTL_MTC_EN        | \
+                        RTIT_CTL_MTC_RANGE)
+
+#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN                | \
+                       RTIT_CTL_DISRETC        | \
+                       RTIT_CTL_CYC_PSB        | \
+                       RTIT_CTL_MTC)
+
+static bool pt_event_valid(struct perf_event *event)
+{
+       u64 config = event->attr.config;
+       u64 allowed, requested;
+
+       if ((config & PT_CONFIG_MASK) != config)
+               return false;
+
+       if (config & RTIT_CTL_CYC_PSB) {
+               if (!pt_cap_get(PT_CAP_psb_cyc))
+                       return false;
+
+               allowed = pt_cap_get(PT_CAP_psb_periods);
+               requested = (config & RTIT_CTL_PSB_FREQ) >>
+                       RTIT_CTL_PSB_FREQ_OFFSET;
+               if (requested && (!(allowed & BIT(requested))))
+                       return false;
+
+               allowed = pt_cap_get(PT_CAP_cycle_thresholds);
+               requested = (config & RTIT_CTL_CYC_THRESH) >>
+                       RTIT_CTL_CYC_THRESH_OFFSET;
+               if (requested && (!(allowed & BIT(requested))))
+                       return false;
+       }
+
+       if (config & RTIT_CTL_MTC) {
+               /*
+                * In the unlikely case that CPUID lists valid mtc periods,
+                * but not the mtc capability, drop out here.
+                *
+                * Spec says that setting mtc period bits while mtc bit in
+                * CPUID is 0 will #GP, so better safe than sorry.
+                */
+               if (!pt_cap_get(PT_CAP_mtc))
+                       return false;
+
+               allowed = pt_cap_get(PT_CAP_mtc_periods);
+               if (!allowed)
+                       return false;
+
+               requested = (config & RTIT_CTL_MTC_RANGE) >>
+                       RTIT_CTL_MTC_RANGE_OFFSET;
+
+               if (!(allowed & BIT(requested)))
+                       return false;
+       }
+
+       return true;
+}
+
+/*
+ * PT configuration helpers
+ * These all are cpu affine and operate on a local PT
+ */
+
+static void pt_config(struct perf_event *event)
+{
+       u64 reg;
+
+       if (!event->hw.itrace_started) {
+               event->hw.itrace_started = 1;
+               wrmsrl(MSR_IA32_RTIT_STATUS, 0);
+       }
+
+       reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
+
+       if (!event->attr.exclude_kernel)
+               reg |= RTIT_CTL_OS;
+       if (!event->attr.exclude_user)
+               reg |= RTIT_CTL_USR;
+
+       reg |= (event->attr.config & PT_CONFIG_MASK);
+
+       wrmsrl(MSR_IA32_RTIT_CTL, reg);
+}
+
+static void pt_config_start(bool start)
+{
+       u64 ctl;
+
+       rdmsrl(MSR_IA32_RTIT_CTL, ctl);
+       if (start)
+               ctl |= RTIT_CTL_TRACEEN;
+       else
+               ctl &= ~RTIT_CTL_TRACEEN;
+       wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+
+       /*
+        * A wrmsr that disables trace generation serializes other PT
+        * registers and causes all data packets to be written to memory,
+        * but a fence is required for the data to become globally visible.
+        *
+        * The below WMB, separating data store and aux_head store matches
+        * the consumer's RMB that separates aux_head load and data load.
+        */
+       if (!start)
+               wmb();
+}
+
+static void pt_config_buffer(void *buf, unsigned int topa_idx,
+                            unsigned int output_off)
+{
+       u64 reg;
+
+       wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
+
+       reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
+
+       wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+}
+
+/*
+ * Keep ToPA table-related metadata on the same page as the actual table,
+ * taking up a few words from the top
+ */
+
+#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
+
+/**
+ * struct topa - page-sized ToPA table with metadata at the top
+ * @table:     actual ToPA table entries, as understood by PT hardware
+ * @list:      linkage to struct pt_buffer's list of tables
+ * @phys:      physical address of this page
+ * @offset:    offset of the first entry in this table in the buffer
+ * @size:      total size of all entries in this table
+ * @last:      index of the last initialized entry in this table
+ */
+struct topa {
+       struct topa_entry       table[TENTS_PER_PAGE];
+       struct list_head        list;
+       u64                     phys;
+       u64                     offset;
+       size_t                  size;
+       int                     last;
+};
+
+/* make -1 stand for the last table entry */
+#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
+
+/**
+ * topa_alloc() - allocate page-sized ToPA table
+ * @cpu:       CPU on which to allocate.
+ * @gfp:       Allocation flags.
+ *
+ * Return:     On success, return the pointer to ToPA table page.
+ */
+static struct topa *topa_alloc(int cpu, gfp_t gfp)
+{
+       int node = cpu_to_node(cpu);
+       struct topa *topa;
+       struct page *p;
+
+       p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
+       if (!p)
+               return NULL;
+
+       topa = page_address(p);
+       topa->last = 0;
+       topa->phys = page_to_phys(p);
+
+       /*
+        * In case of singe-entry ToPA, always put the self-referencing END
+        * link as the 2nd entry in the table
+        */
+       if (!pt_cap_get(PT_CAP_topa_multiple_entries)) {
+               TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
+               TOPA_ENTRY(topa, 1)->end = 1;
+       }
+
+       return topa;
+}
+
+/**
+ * topa_free() - free a page-sized ToPA table
+ * @topa:      Table to deallocate.
+ */
+static void topa_free(struct topa *topa)
+{
+       free_page((unsigned long)topa);
+}
+
+/**
+ * topa_insert_table() - insert a ToPA table into a buffer
+ * @buf:        PT buffer that's being extended.
+ * @topa:       New topa table to be inserted.
+ *
+ * If it's the first table in this buffer, set up buffer's pointers
+ * accordingly; otherwise, add a END=1 link entry to @topa to the current
+ * "last" table and adjust the last table pointer to @topa.
+ */
+static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
+{
+       struct topa *last = buf->last;
+
+       list_add_tail(&topa->list, &buf->tables);
+
+       if (!buf->first) {
+               buf->first = buf->last = buf->cur = topa;
+               return;
+       }
+
+       topa->offset = last->offset + last->size;
+       buf->last = topa;
+
+       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+               return;
+
+       BUG_ON(last->last != TENTS_PER_PAGE - 1);
+
+       TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
+       TOPA_ENTRY(last, -1)->end = 1;
+}
+
+/**
+ * topa_table_full() - check if a ToPA table is filled up
+ * @topa:      ToPA table.
+ */
+static bool topa_table_full(struct topa *topa)
+{
+       /* single-entry ToPA is a special case */
+       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+               return !!topa->last;
+
+       return topa->last == TENTS_PER_PAGE - 1;
+}
+
+/**
+ * topa_insert_pages() - create a list of ToPA tables
+ * @buf:       PT buffer being initialized.
+ * @gfp:       Allocation flags.
+ *
+ * This initializes a list of ToPA tables with entries from
+ * the data_pages provided by rb_alloc_aux().
+ *
+ * Return:     0 on success or error code.
+ */
+static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
+{
+       struct topa *topa = buf->last;
+       int order = 0;
+       struct page *p;
+
+       p = virt_to_page(buf->data_pages[buf->nr_pages]);
+       if (PagePrivate(p))
+               order = page_private(p);
+
+       if (topa_table_full(topa)) {
+               topa = topa_alloc(buf->cpu, gfp);
+               if (!topa)
+                       return -ENOMEM;
+
+               topa_insert_table(buf, topa);
+       }
+
+       TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
+       TOPA_ENTRY(topa, -1)->size = order;
+       if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) {
+               TOPA_ENTRY(topa, -1)->intr = 1;
+               TOPA_ENTRY(topa, -1)->stop = 1;
+       }
+
+       topa->last++;
+       topa->size += sizes(order);
+
+       buf->nr_pages += 1ul << order;
+
+       return 0;
+}
+
+/**
+ * pt_topa_dump() - print ToPA tables and their entries
+ * @buf:       PT buffer.
+ */
+static void pt_topa_dump(struct pt_buffer *buf)
+{
+       struct topa *topa;
+
+       list_for_each_entry(topa, &buf->tables, list) {
+               int i;
+
+               pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
+                        topa->phys, topa->offset, topa->size);
+               for (i = 0; i < TENTS_PER_PAGE; i++) {
+                       pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
+                                &topa->table[i],
+                                (unsigned long)topa->table[i].base << TOPA_SHIFT,
+                                sizes(topa->table[i].size),
+                                topa->table[i].end ?  'E' : ' ',
+                                topa->table[i].intr ? 'I' : ' ',
+                                topa->table[i].stop ? 'S' : ' ',
+                                *(u64 *)&topa->table[i]);
+                       if ((pt_cap_get(PT_CAP_topa_multiple_entries) &&
+                            topa->table[i].stop) ||
+                           topa->table[i].end)
+                               break;
+               }
+       }
+}
+
+/**
+ * pt_buffer_advance() - advance to the next output region
+ * @buf:       PT buffer.
+ *
+ * Advance the current pointers in the buffer to the next ToPA entry.
+ */
+static void pt_buffer_advance(struct pt_buffer *buf)
+{
+       buf->output_off = 0;
+       buf->cur_idx++;
+
+       if (buf->cur_idx == buf->cur->last) {
+               if (buf->cur == buf->last)
+                       buf->cur = buf->first;
+               else
+                       buf->cur = list_entry(buf->cur->list.next, struct topa,
+                                             list);
+               buf->cur_idx = 0;
+       }
+}
+
+/**
+ * pt_update_head() - calculate current offsets and sizes
+ * @pt:                Per-cpu pt context.
+ *
+ * Update buffer's current write pointer position and data size.
+ */
+static void pt_update_head(struct pt *pt)
+{
+       struct pt_buffer *buf = perf_get_aux(&pt->handle);
+       u64 topa_idx, base, old;
+
+       /* offset of the first region in this table from the beginning of buf */
+       base = buf->cur->offset + buf->output_off;
+
+       /* offset of the current output region within this table */
+       for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
+               base += sizes(buf->cur->table[topa_idx].size);
+
+       if (buf->snapshot) {
+               local_set(&buf->data_size, base);
+       } else {
+               old = (local64_xchg(&buf->head, base) &
+                      ((buf->nr_pages << PAGE_SHIFT) - 1));
+               if (base < old)
+                       base += buf->nr_pages << PAGE_SHIFT;
+
+               local_add(base - old, &buf->data_size);
+       }
+}
+
+/**
+ * pt_buffer_region() - obtain current output region's address
+ * @buf:       PT buffer.
+ */
+static void *pt_buffer_region(struct pt_buffer *buf)
+{
+       return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
+}
+
+/**
+ * pt_buffer_region_size() - obtain current output region's size
+ * @buf:       PT buffer.
+ */
+static size_t pt_buffer_region_size(struct pt_buffer *buf)
+{
+       return sizes(buf->cur->table[buf->cur_idx].size);
+}
+
+/**
+ * pt_handle_status() - take care of possible status conditions
+ * @pt:                Per-cpu pt context.
+ */
+static void pt_handle_status(struct pt *pt)
+{
+       struct pt_buffer *buf = perf_get_aux(&pt->handle);
+       int advance = 0;
+       u64 status;
+
+       rdmsrl(MSR_IA32_RTIT_STATUS, status);
+
+       if (status & RTIT_STATUS_ERROR) {
+               pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
+               pt_topa_dump(buf);
+               status &= ~RTIT_STATUS_ERROR;
+       }
+
+       if (status & RTIT_STATUS_STOPPED) {
+               status &= ~RTIT_STATUS_STOPPED;
+
+               /*
+                * On systems that only do single-entry ToPA, hitting STOP
+                * means we are already losing data; need to let the decoder
+                * know.
+                */
+               if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
+                   buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
+                       local_inc(&buf->lost);
+                       advance++;
+               }
+       }
+
+       /*
+        * Also on single-entry ToPA implementations, interrupt will come
+        * before the output reaches its output region's boundary.
+        */
+       if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot &&
+           pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) {
+               void *head = pt_buffer_region(buf);
+
+               /* everything within this margin needs to be zeroed out */
+               memset(head + buf->output_off, 0,
+                      pt_buffer_region_size(buf) -
+                      buf->output_off);
+               advance++;
+       }
+
+       if (advance)
+               pt_buffer_advance(buf);
+
+       wrmsrl(MSR_IA32_RTIT_STATUS, status);
+}
+
+/**
+ * pt_read_offset() - translate registers into buffer pointers
+ * @buf:       PT buffer.
+ *
+ * Set buffer's output pointers from MSR values.
+ */
+static void pt_read_offset(struct pt_buffer *buf)
+{
+       u64 offset, base_topa;
+
+       rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
+       buf->cur = phys_to_virt(base_topa);
+
+       rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
+       /* offset within current output region */
+       buf->output_off = offset >> 32;
+       /* index of current output region within this table */
+       buf->cur_idx = (offset & 0xffffff80) >> 7;
+}
+
+/**
+ * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
+ * @buf:       PT buffer.
+ * @pg:                Page offset in the buffer.
+ *
+ * When advancing to the next output region (ToPA entry), given a page offset
+ * into the buffer, we need to find the offset of the first page in the next
+ * region.
+ */
+static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
+{
+       struct topa_entry *te = buf->topa_index[pg];
+
+       /* one region */
+       if (buf->first == buf->last && buf->first->last == 1)
+               return pg;
+
+       do {
+               pg++;
+               pg &= buf->nr_pages - 1;
+       } while (buf->topa_index[pg] == te);
+
+       return pg;
+}
+
+/**
+ * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer
+ * @buf:       PT buffer.
+ * @handle:    Current output handle.
+ *
+ * Place INT and STOP marks to prevent overwriting old data that the consumer
+ * hasn't yet collected and waking up the consumer after a certain fraction of
+ * the buffer has filled up. Only needed and sensible for non-snapshot counters.
+ *
+ * This obviously relies on buf::head to figure out buffer markers, so it has
+ * to be called after pt_buffer_reset_offsets() and before the hardware tracing
+ * is enabled.
+ */
+static int pt_buffer_reset_markers(struct pt_buffer *buf,
+                                  struct perf_output_handle *handle)
+
+{
+       unsigned long head = local64_read(&buf->head);
+       unsigned long idx, npages, wakeup;
+
+       /* can't stop in the middle of an output region */
+       if (buf->output_off + handle->size + 1 <
+           sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
+               return -EINVAL;
+
+
+       /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
+       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+               return 0;
+
+       /* clear STOP and INT from current entry */
+       buf->topa_index[buf->stop_pos]->stop = 0;
+       buf->topa_index[buf->intr_pos]->intr = 0;
+
+       /* how many pages till the STOP marker */
+       npages = handle->size >> PAGE_SHIFT;
+
+       /* if it's on a page boundary, fill up one more page */
+       if (!offset_in_page(head + handle->size + 1))
+               npages++;
+
+       idx = (head >> PAGE_SHIFT) + npages;
+       idx &= buf->nr_pages - 1;
+       buf->stop_pos = idx;
+
+       wakeup = handle->wakeup >> PAGE_SHIFT;
+
+       /* in the worst case, wake up the consumer one page before hard stop */
+       idx = (head >> PAGE_SHIFT) + npages - 1;
+       if (idx > wakeup)
+               idx = wakeup;
+
+       idx &= buf->nr_pages - 1;
+       buf->intr_pos = idx;
+
+       buf->topa_index[buf->stop_pos]->stop = 1;
+       buf->topa_index[buf->intr_pos]->intr = 1;
+
+       return 0;
+}
+
+/**
+ * pt_buffer_setup_topa_index() - build topa_index[] table of regions
+ * @buf:       PT buffer.
+ *
+ * topa_index[] references output regions indexed by offset into the
+ * buffer for purposes of quick reverse lookup.
+ */
+static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
+{
+       struct topa *cur = buf->first, *prev = buf->last;
+       struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
+               *te_prev = TOPA_ENTRY(prev, prev->last - 1);
+       int pg = 0, idx = 0;
+
+       while (pg < buf->nr_pages) {
+               int tidx;
+
+               /* pages within one topa entry */
+               for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
+                       buf->topa_index[pg] = te_prev;
+
+               te_prev = te_cur;
+
+               if (idx == cur->last - 1) {
+                       /* advance to next topa table */
+                       idx = 0;
+                       cur = list_entry(cur->list.next, struct topa, list);
+               } else {
+                       idx++;
+               }
+               te_cur = TOPA_ENTRY(cur, idx);
+       }
+
+}
+
+/**
+ * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
+ * @buf:       PT buffer.
+ * @head:      Write pointer (aux_head) from AUX buffer.
+ *
+ * Find the ToPA table and entry corresponding to given @head and set buffer's
+ * "current" pointers accordingly. This is done after we have obtained the
+ * current aux_head position from a successful call to perf_aux_output_begin()
+ * to make sure the hardware is writing to the right place.
+ *
+ * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
+ * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
+ * which are used to determine INT and STOP markers' locations by a subsequent
+ * call to pt_buffer_reset_markers().
+ */
+static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
+{
+       int pg;
+
+       if (buf->snapshot)
+               head &= (buf->nr_pages << PAGE_SHIFT) - 1;
+
+       pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
+       pg = pt_topa_next_entry(buf, pg);
+
+       buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
+       buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
+                       (unsigned long)buf->cur) / sizeof(struct topa_entry);
+       buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
+
+       local64_set(&buf->head, head);
+       local_set(&buf->data_size, 0);
+}
+
+/**
+ * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer
+ * @buf:       PT buffer.
+ */
+static void pt_buffer_fini_topa(struct pt_buffer *buf)
+{
+       struct topa *topa, *iter;
+
+       list_for_each_entry_safe(topa, iter, &buf->tables, list) {
+               /*
+                * right now, this is in free_aux() path only, so
+                * no need to unlink this table from the list
+                */
+               topa_free(topa);
+       }
+}
+
+/**
+ * pt_buffer_init_topa() - initialize ToPA table for pt buffer
+ * @buf:       PT buffer.
+ * @size:      Total size of all regions within this ToPA.
+ * @gfp:       Allocation flags.
+ */
+static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
+                              gfp_t gfp)
+{
+       struct topa *topa;
+       int err;
+
+       topa = topa_alloc(buf->cpu, gfp);
+       if (!topa)
+               return -ENOMEM;
+
+       topa_insert_table(buf, topa);
+
+       while (buf->nr_pages < nr_pages) {
+               err = topa_insert_pages(buf, gfp);
+               if (err) {
+                       pt_buffer_fini_topa(buf);
+                       return -ENOMEM;
+               }
+       }
+
+       pt_buffer_setup_topa_index(buf);
+
+       /* link last table to the first one, unless we're double buffering */
+       if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
+               TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
+               TOPA_ENTRY(buf->last, -1)->end = 1;
+       }
+
+       pt_topa_dump(buf);
+       return 0;
+}
+
+/**
+ * pt_buffer_setup_aux() - set up topa tables for a PT buffer
+ * @cpu:       Cpu on which to allocate, -1 means current.
+ * @pages:     Array of pointers to buffer pages passed from perf core.
+ * @nr_pages:  Number of pages in the buffer.
+ * @snapshot:  If this is a snapshot/overwrite counter.
+ *
+ * This is a pmu::setup_aux callback that sets up ToPA tables and all the
+ * bookkeeping for an AUX buffer.
+ *
+ * Return:     Our private PT buffer structure.
+ */
+static void *
+pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
+{
+       struct pt_buffer *buf;
+       int node, ret;
+
+       if (!nr_pages)
+               return NULL;
+
+       if (cpu == -1)
+               cpu = raw_smp_processor_id();
+       node = cpu_to_node(cpu);
+
+       buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
+                          GFP_KERNEL, node);
+       if (!buf)
+               return NULL;
+
+       buf->cpu = cpu;
+       buf->snapshot = snapshot;
+       buf->data_pages = pages;
+
+       INIT_LIST_HEAD(&buf->tables);
+
+       ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
+       if (ret) {
+               kfree(buf);
+               return NULL;
+       }
+
+       return buf;
+}
+
+/**
+ * pt_buffer_free_aux() - perf AUX deallocation path callback
+ * @data:      PT buffer.
+ */
+static void pt_buffer_free_aux(void *data)
+{
+       struct pt_buffer *buf = data;
+
+       pt_buffer_fini_topa(buf);
+       kfree(buf);
+}
+
+/**
+ * pt_buffer_is_full() - check if the buffer is full
+ * @buf:       PT buffer.
+ * @pt:                Per-cpu pt handle.
+ *
+ * If the user hasn't read data from the output region that aux_head
+ * points to, the buffer is considered full: the user needs to read at
+ * least this region and update aux_tail to point past it.
+ */
+static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
+{
+       if (buf->snapshot)
+               return false;
+
+       if (local_read(&buf->data_size) >= pt->handle.size)
+               return true;
+
+       return false;
+}
+
+/**
+ * intel_pt_interrupt() - PT PMI handler
+ */
+void intel_pt_interrupt(void)
+{
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+       struct pt_buffer *buf;
+       struct perf_event *event = pt->handle.event;
+
+       /*
+        * There may be a dangling PT bit in the interrupt status register
+        * after PT has been disabled by pt_event_stop(). Make sure we don't
+        * do anything (particularly, re-enable) for this event here.
+        */
+       if (!ACCESS_ONCE(pt->handle_nmi))
+               return;
+
+       pt_config_start(false);
+
+       if (!event)
+               return;
+
+       buf = perf_get_aux(&pt->handle);
+       if (!buf)
+               return;
+
+       pt_read_offset(buf);
+
+       pt_handle_status(pt);
+
+       pt_update_head(pt);
+
+       perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
+                           local_xchg(&buf->lost, 0));
+
+       if (!event->hw.state) {
+               int ret;
+
+               buf = perf_aux_output_begin(&pt->handle, event);
+               if (!buf) {
+                       event->hw.state = PERF_HES_STOPPED;
+                       return;
+               }
+
+               pt_buffer_reset_offsets(buf, pt->handle.head);
+               /* snapshot counters don't use PMI, so it's safe */
+               ret = pt_buffer_reset_markers(buf, &pt->handle);
+               if (ret) {
+                       perf_aux_output_end(&pt->handle, 0, true);
+                       return;
+               }
+
+               pt_config_buffer(buf->cur->table, buf->cur_idx,
+                                buf->output_off);
+               pt_config(event);
+       }
+}
+
+/*
+ * PMU callbacks
+ */
+
+static void pt_event_start(struct perf_event *event, int mode)
+{
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+       struct pt_buffer *buf = perf_get_aux(&pt->handle);
+
+       if (!buf || pt_buffer_is_full(buf, pt)) {
+               event->hw.state = PERF_HES_STOPPED;
+               return;
+       }
+
+       ACCESS_ONCE(pt->handle_nmi) = 1;
+       event->hw.state = 0;
+
+       pt_config_buffer(buf->cur->table, buf->cur_idx,
+                        buf->output_off);
+       pt_config(event);
+}
+
+static void pt_event_stop(struct perf_event *event, int mode)
+{
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+
+       /*
+        * Protect against the PMI racing with disabling wrmsr,
+        * see comment in intel_pt_interrupt().
+        */
+       ACCESS_ONCE(pt->handle_nmi) = 0;
+       pt_config_start(false);
+
+       if (event->hw.state == PERF_HES_STOPPED)
+               return;
+
+       event->hw.state = PERF_HES_STOPPED;
+
+       if (mode & PERF_EF_UPDATE) {
+               struct pt_buffer *buf = perf_get_aux(&pt->handle);
+
+               if (!buf)
+                       return;
+
+               if (WARN_ON_ONCE(pt->handle.event != event))
+                       return;
+
+               pt_read_offset(buf);
+
+               pt_handle_status(pt);
+
+               pt_update_head(pt);
+       }
+}
+
+static void pt_event_del(struct perf_event *event, int mode)
+{
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+       struct pt_buffer *buf;
+
+       pt_event_stop(event, PERF_EF_UPDATE);
+
+       buf = perf_get_aux(&pt->handle);
+
+       if (buf) {
+               if (buf->snapshot)
+                       pt->handle.head =
+                               local_xchg(&buf->data_size,
+                                          buf->nr_pages << PAGE_SHIFT);
+               perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
+                                   local_xchg(&buf->lost, 0));
+       }
+}
+
+static int pt_event_add(struct perf_event *event, int mode)
+{
+       struct pt_buffer *buf;
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+       struct hw_perf_event *hwc = &event->hw;
+       int ret = -EBUSY;
+
+       if (pt->handle.event)
+               goto fail;
+
+       buf = perf_aux_output_begin(&pt->handle, event);
+       ret = -EINVAL;
+       if (!buf)
+               goto fail_stop;
+
+       pt_buffer_reset_offsets(buf, pt->handle.head);
+       if (!buf->snapshot) {
+               ret = pt_buffer_reset_markers(buf, &pt->handle);
+               if (ret)
+                       goto fail_end_stop;
+       }
+
+       if (mode & PERF_EF_START) {
+               pt_event_start(event, 0);
+               ret = -EBUSY;
+               if (hwc->state == PERF_HES_STOPPED)
+                       goto fail_end_stop;
+       } else {
+               hwc->state = PERF_HES_STOPPED;
+       }
+
+       return 0;
+
+fail_end_stop:
+       perf_aux_output_end(&pt->handle, 0, true);
+fail_stop:
+       hwc->state = PERF_HES_STOPPED;
+fail:
+       return ret;
+}
+
+static void pt_event_read(struct perf_event *event)
+{
+}
+
+static void pt_event_destroy(struct perf_event *event)
+{
+       x86_del_exclusive(x86_lbr_exclusive_pt);
+}
+
+static int pt_event_init(struct perf_event *event)
+{
+       if (event->attr.type != pt_pmu.pmu.type)
+               return -ENOENT;
+
+       if (!pt_event_valid(event))
+               return -EINVAL;
+
+       if (x86_add_exclusive(x86_lbr_exclusive_pt))
+               return -EBUSY;
+
+       event->destroy = pt_event_destroy;
+
+       return 0;
+}
+
+void cpu_emergency_stop_pt(void)
+{
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+
+       if (pt->handle.event)
+               pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
+}
+
+static __init int pt_init(void)
+{
+       int ret, cpu, prior_warn = 0;
+
+       BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
+
+       if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
+               return -ENODEV;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               u64 ctl;
+
+               ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
+               if (!ret && (ctl & RTIT_CTL_TRACEEN))
+                       prior_warn++;
+       }
+       put_online_cpus();
+
+       if (prior_warn) {
+               x86_add_exclusive(x86_lbr_exclusive_pt);
+               pr_warn("PT is enabled at boot time, doing nothing\n");
+
+               return -EBUSY;
+       }
+
+       ret = pt_pmu_hw_init();
+       if (ret)
+               return ret;
+
+       if (!pt_cap_get(PT_CAP_topa_output)) {
+               pr_warn("ToPA output is not supported on this CPU\n");
+               return -ENODEV;
+       }
+
+       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+               pt_pmu.pmu.capabilities =
+                       PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
+
+       pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
+       pt_pmu.pmu.attr_groups  = pt_attr_groups;
+       pt_pmu.pmu.task_ctx_nr  = perf_sw_context;
+       pt_pmu.pmu.event_init   = pt_event_init;
+       pt_pmu.pmu.add          = pt_event_add;
+       pt_pmu.pmu.del          = pt_event_del;
+       pt_pmu.pmu.start        = pt_event_start;
+       pt_pmu.pmu.stop         = pt_event_stop;
+       pt_pmu.pmu.read         = pt_event_read;
+       pt_pmu.pmu.setup_aux    = pt_buffer_setup_aux;
+       pt_pmu.pmu.free_aux     = pt_buffer_free_aux;
+       ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
+
+       return ret;
+}
+arch_initcall(pt_init);
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
new file mode 100644 (file)
index 0000000..336878a
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * Intel(R) Processor Trace PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Intel PT is specified in the Intel Architecture Instruction Set Extensions
+ * Programming Reference:
+ * http://software.intel.com/en-us/intel-isa-extensions
+ */
+
+#ifndef __INTEL_PT_H__
+#define __INTEL_PT_H__
+
+/*
+ * Single-entry ToPA: when this close to region boundary, switch
+ * buffers to avoid losing data.
+ */
+#define TOPA_PMI_MARGIN 512
+
+#define TOPA_SHIFT 12
+
+static inline unsigned int sizes(unsigned int tsz)
+{
+       return 1 << (tsz + TOPA_SHIFT);
+};
+
+struct topa_entry {
+       u64     end     : 1;
+       u64     rsvd0   : 1;
+       u64     intr    : 1;
+       u64     rsvd1   : 1;
+       u64     stop    : 1;
+       u64     rsvd2   : 1;
+       u64     size    : 4;
+       u64     rsvd3   : 2;
+       u64     base    : 36;
+       u64     rsvd4   : 16;
+};
+
+#define PT_CPUID_LEAVES                2
+#define PT_CPUID_REGS_NUM      4 /* number of regsters (eax, ebx, ecx, edx) */
+
+enum pt_capabilities {
+       PT_CAP_max_subleaf = 0,
+       PT_CAP_cr3_filtering,
+       PT_CAP_psb_cyc,
+       PT_CAP_mtc,
+       PT_CAP_topa_output,
+       PT_CAP_topa_multiple_entries,
+       PT_CAP_single_range_output,
+       PT_CAP_payloads_lip,
+       PT_CAP_mtc_periods,
+       PT_CAP_cycle_thresholds,
+       PT_CAP_psb_periods,
+};
+
+struct pt_pmu {
+       struct pmu              pmu;
+       u32                     caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+};
+
+/**
+ * struct pt_buffer - buffer configuration; one buffer per task_struct or
+ *             cpu, depending on perf event configuration
+ * @cpu:       cpu for per-cpu allocation
+ * @tables:    list of ToPA tables in this buffer
+ * @first:     shorthand for first topa table
+ * @last:      shorthand for last topa table
+ * @cur:       current topa table
+ * @nr_pages:  buffer size in pages
+ * @cur_idx:   current output region's index within @cur table
+ * @output_off:        offset within the current output region
+ * @data_size: running total of the amount of data in this buffer
+ * @lost:      if data was lost/truncated
+ * @head:      logical write offset inside the buffer
+ * @snapshot:  if this is for a snapshot/overwrite counter
+ * @stop_pos:  STOP topa entry in the buffer
+ * @intr_pos:  INT topa entry in the buffer
+ * @data_pages:        array of pages from perf
+ * @topa_index:        table of topa entries indexed by page offset
+ */
+struct pt_buffer {
+       int                     cpu;
+       struct list_head        tables;
+       struct topa             *first, *last, *cur;
+       unsigned int            cur_idx;
+       size_t                  output_off;
+       unsigned long           nr_pages;
+       local_t                 data_size;
+       local_t                 lost;
+       local64_t               head;
+       bool                    snapshot;
+       unsigned long           stop_pos, intr_pos;
+       void                    **data_pages;
+       struct topa_entry       *topa_index[0];
+};
+
+/**
+ * struct pt - per-cpu pt context
+ * @handle:    perf output handle
+ * @handle_nmi:        do handle PT PMI on this cpu, there's an active event
+ */
+struct pt {
+       struct perf_output_handle handle;
+       int                     handle_nmi;
+};
+
+#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
new file mode 100644 (file)
index 0000000..b834a3f
--- /dev/null
@@ -0,0 +1,767 @@
+/*
+ * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
+ * Copyright (C) 2013 Google, Inc., Stephane Eranian
+ *
+ * Intel RAPL interface is specified in the IA-32 Manual Vol3b
+ * section 14.7.1 (September 2013)
+ *
+ * RAPL provides more controls than just reporting energy consumption
+ * however here we only expose the 3 energy consumption free running
+ * counters (pp0, pkg, dram).
+ *
+ * Each of those counters increments in a power unit defined by the
+ * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules
+ * but it can vary.
+ *
+ * Counter to rapl events mappings:
+ *
+ *  pp0 counter: consumption of all physical cores (power plane 0)
+ *       event: rapl_energy_cores
+ *    perf code: 0x1
+ *
+ *  pkg counter: consumption of the whole processor package
+ *       event: rapl_energy_pkg
+ *    perf code: 0x2
+ *
+ * dram counter: consumption of the dram domain (servers only)
+ *       event: rapl_energy_dram
+ *    perf code: 0x3
+ *
+ * dram counter: consumption of the builtin-gpu domain (client only)
+ *       event: rapl_energy_gpu
+ *    perf code: 0x4
+ *
+ * We manage those counters as free running (read-only). They may be
+ * use simultaneously by other tools, such as turbostat.
+ *
+ * The events only support system-wide mode counting. There is no
+ * sampling support because it does not make sense and is not
+ * supported by the RAPL hardware.
+ *
+ * Because we want to avoid floating-point operations in the kernel,
+ * the events are all reported in fixed point arithmetic (32.32).
+ * Tools must adjust the counts to convert them to Watts using
+ * the duration of the measurement. Tools may use a function such as
+ * ldexp(raw_count, -32);
+ */
+
+#define pr_fmt(fmt) "RAPL PMU: " fmt
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+/*
+ * RAPL energy status counters
+ */
+#define RAPL_IDX_PP0_NRG_STAT  0       /* all cores */
+#define INTEL_RAPL_PP0         0x1     /* pseudo-encoding */
+#define RAPL_IDX_PKG_NRG_STAT  1       /* entire package */
+#define INTEL_RAPL_PKG         0x2     /* pseudo-encoding */
+#define RAPL_IDX_RAM_NRG_STAT  2       /* DRAM */
+#define INTEL_RAPL_RAM         0x3     /* pseudo-encoding */
+#define RAPL_IDX_PP1_NRG_STAT  3       /* gpu */
+#define INTEL_RAPL_PP1         0x4     /* pseudo-encoding */
+
+#define NR_RAPL_DOMAINS         0x4
+static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+       "pp0-core",
+       "package",
+       "dram",
+       "pp1-gpu",
+};
+
+/* Clients have PP0, PKG */
+#define RAPL_IDX_CLN   (1<<RAPL_IDX_PP0_NRG_STAT|\
+                        1<<RAPL_IDX_PKG_NRG_STAT|\
+                        1<<RAPL_IDX_PP1_NRG_STAT)
+
+/* Servers have PP0, PKG, RAM */
+#define RAPL_IDX_SRV   (1<<RAPL_IDX_PP0_NRG_STAT|\
+                        1<<RAPL_IDX_PKG_NRG_STAT|\
+                        1<<RAPL_IDX_RAM_NRG_STAT)
+
+/* Servers have PP0, PKG, RAM, PP1 */
+#define RAPL_IDX_HSW   (1<<RAPL_IDX_PP0_NRG_STAT|\
+                        1<<RAPL_IDX_PKG_NRG_STAT|\
+                        1<<RAPL_IDX_RAM_NRG_STAT|\
+                        1<<RAPL_IDX_PP1_NRG_STAT)
+
+/* Knights Landing has PKG, RAM */
+#define RAPL_IDX_KNL   (1<<RAPL_IDX_PKG_NRG_STAT|\
+                        1<<RAPL_IDX_RAM_NRG_STAT)
+
+/*
+ * event code: LSB 8 bits, passed in attr->config
+ * any other bit is reserved
+ */
+#define RAPL_EVENT_MASK        0xFFULL
+
+#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format)          \
+static ssize_t __rapl_##_var##_show(struct kobject *kobj,      \
+                               struct kobj_attribute *attr,    \
+                               char *page)                     \
+{                                                              \
+       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);             \
+       return sprintf(page, _format "\n");                     \
+}                                                              \
+static struct kobj_attribute format_attr_##_var =              \
+       __ATTR(_name, 0444, __rapl_##_var##_show, NULL)
+
+#define RAPL_CNTR_WIDTH 32
+
+#define RAPL_EVENT_ATTR_STR(_name, v, str)                                     \
+static struct perf_pmu_events_attr event_attr_##v = {                          \
+       .attr           = __ATTR(_name, 0444, perf_event_sysfs_show, NULL),     \
+       .id             = 0,                                                    \
+       .event_str      = str,                                                  \
+};
+
+struct rapl_pmu {
+       raw_spinlock_t          lock;
+       int                     n_active;
+       int                     cpu;
+       struct list_head        active_list;
+       struct pmu              *pmu;
+       ktime_t                 timer_interval;
+       struct hrtimer          hrtimer;
+};
+
+struct rapl_pmus {
+       struct pmu              pmu;
+       unsigned int            maxpkg;
+       struct rapl_pmu         *pmus[];
+};
+
+ /* 1/2^hw_unit Joule */
+static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
+static struct rapl_pmus *rapl_pmus;
+static cpumask_t rapl_cpu_mask;
+static unsigned int rapl_cntr_mask;
+static u64 rapl_timer_ms;
+
+static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+{
+       return rapl_pmus->pmus[topology_logical_package_id(cpu)];
+}
+
+static inline u64 rapl_read_counter(struct perf_event *event)
+{
+       u64 raw;
+       rdmsrl(event->hw.event_base, raw);
+       return raw;
+}
+
+static inline u64 rapl_scale(u64 v, int cfg)
+{
+       if (cfg > NR_RAPL_DOMAINS) {
+               pr_warn("Invalid domain %d, failed to scale data\n", cfg);
+               return v;
+       }
+       /*
+        * scale delta to smallest unit (1/2^32)
+        * users must then scale back: count * 1/(1e9*2^32) to get Joules
+        * or use ldexp(count, -32).
+        * Watts = Joules/Time delta
+        */
+       return v << (32 - rapl_hw_unit[cfg - 1]);
+}
+
+static u64 rapl_event_update(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 prev_raw_count, new_raw_count;
+       s64 delta, sdelta;
+       int shift = RAPL_CNTR_WIDTH;
+
+again:
+       prev_raw_count = local64_read(&hwc->prev_count);
+       rdmsrl(event->hw.event_base, new_raw_count);
+
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                           new_raw_count) != prev_raw_count) {
+               cpu_relax();
+               goto again;
+       }
+
+       /*
+        * Now we have the new raw value and have updated the prev
+        * timestamp already. We can now calculate the elapsed delta
+        * (event-)time and add that to the generic event.
+        *
+        * Careful, not all hw sign-extends above the physical width
+        * of the count.
+        */
+       delta = (new_raw_count << shift) - (prev_raw_count << shift);
+       delta >>= shift;
+
+       sdelta = rapl_scale(delta, event->hw.config);
+
+       local64_add(sdelta, &event->count);
+
+       return new_raw_count;
+}
+
+static void rapl_start_hrtimer(struct rapl_pmu *pmu)
+{
+       hrtimer_start(&pmu->hrtimer, pmu->timer_interval,
+                    HRTIMER_MODE_REL_PINNED);
+}
+
+static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
+{
+       struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
+       struct perf_event *event;
+       unsigned long flags;
+
+       if (!pmu->n_active)
+               return HRTIMER_NORESTART;
+
+       raw_spin_lock_irqsave(&pmu->lock, flags);
+
+       list_for_each_entry(event, &pmu->active_list, active_entry)
+               rapl_event_update(event);
+
+       raw_spin_unlock_irqrestore(&pmu->lock, flags);
+
+       hrtimer_forward_now(hrtimer, pmu->timer_interval);
+
+       return HRTIMER_RESTART;
+}
+
+static void rapl_hrtimer_init(struct rapl_pmu *pmu)
+{
+       struct hrtimer *hr = &pmu->hrtimer;
+
+       hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       hr->function = rapl_hrtimer_handle;
+}
+
+static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
+                                  struct perf_event *event)
+{
+       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+               return;
+
+       event->hw.state = 0;
+
+       list_add_tail(&event->active_entry, &pmu->active_list);
+
+       local64_set(&event->hw.prev_count, rapl_read_counter(event));
+
+       pmu->n_active++;
+       if (pmu->n_active == 1)
+               rapl_start_hrtimer(pmu);
+}
+
+static void rapl_pmu_event_start(struct perf_event *event, int mode)
+{
+       struct rapl_pmu *pmu = event->pmu_private;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&pmu->lock, flags);
+       __rapl_pmu_event_start(pmu, event);
+       raw_spin_unlock_irqrestore(&pmu->lock, flags);
+}
+
+static void rapl_pmu_event_stop(struct perf_event *event, int mode)
+{
+       struct rapl_pmu *pmu = event->pmu_private;
+       struct hw_perf_event *hwc = &event->hw;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&pmu->lock, flags);
+
+       /* mark event as deactivated and stopped */
+       if (!(hwc->state & PERF_HES_STOPPED)) {
+               WARN_ON_ONCE(pmu->n_active <= 0);
+               pmu->n_active--;
+               if (pmu->n_active == 0)
+                       hrtimer_cancel(&pmu->hrtimer);
+
+               list_del(&event->active_entry);
+
+               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+               hwc->state |= PERF_HES_STOPPED;
+       }
+
+       /* check if update of sw counter is necessary */
+       if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               /*
+                * Drain the remaining delta count out of a event
+                * that we are disabling:
+                */
+               rapl_event_update(event);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
+
+       raw_spin_unlock_irqrestore(&pmu->lock, flags);
+}
+
+static int rapl_pmu_event_add(struct perf_event *event, int mode)
+{
+       struct rapl_pmu *pmu = event->pmu_private;
+       struct hw_perf_event *hwc = &event->hw;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&pmu->lock, flags);
+
+       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+       if (mode & PERF_EF_START)
+               __rapl_pmu_event_start(pmu, event);
+
+       raw_spin_unlock_irqrestore(&pmu->lock, flags);
+
+       return 0;
+}
+
+static void rapl_pmu_event_del(struct perf_event *event, int flags)
+{
+       rapl_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int rapl_pmu_event_init(struct perf_event *event)
+{
+       u64 cfg = event->attr.config & RAPL_EVENT_MASK;
+       int bit, msr, ret = 0;
+       struct rapl_pmu *pmu;
+
+       /* only look at RAPL events */
+       if (event->attr.type != rapl_pmus->pmu.type)
+               return -ENOENT;
+
+       /* check only supported bits are set */
+       if (event->attr.config & ~RAPL_EVENT_MASK)
+               return -EINVAL;
+
+       if (event->cpu < 0)
+               return -EINVAL;
+
+       /*
+        * check event is known (determines counter)
+        */
+       switch (cfg) {
+       case INTEL_RAPL_PP0:
+               bit = RAPL_IDX_PP0_NRG_STAT;
+               msr = MSR_PP0_ENERGY_STATUS;
+               break;
+       case INTEL_RAPL_PKG:
+               bit = RAPL_IDX_PKG_NRG_STAT;
+               msr = MSR_PKG_ENERGY_STATUS;
+               break;
+       case INTEL_RAPL_RAM:
+               bit = RAPL_IDX_RAM_NRG_STAT;
+               msr = MSR_DRAM_ENERGY_STATUS;
+               break;
+       case INTEL_RAPL_PP1:
+               bit = RAPL_IDX_PP1_NRG_STAT;
+               msr = MSR_PP1_ENERGY_STATUS;
+               break;
+       default:
+               return -EINVAL;
+       }
+       /* check event supported */
+       if (!(rapl_cntr_mask & (1 << bit)))
+               return -EINVAL;
+
+       /* unsupported modes and filters */
+       if (event->attr.exclude_user   ||
+           event->attr.exclude_kernel ||
+           event->attr.exclude_hv     ||
+           event->attr.exclude_idle   ||
+           event->attr.exclude_host   ||
+           event->attr.exclude_guest  ||
+           event->attr.sample_period) /* no sampling */
+               return -EINVAL;
+
+       /* must be done before validate_group */
+       pmu = cpu_to_rapl_pmu(event->cpu);
+       event->cpu = pmu->cpu;
+       event->pmu_private = pmu;
+       event->hw.event_base = msr;
+       event->hw.config = cfg;
+       event->hw.idx = bit;
+
+       return ret;
+}
+
+static void rapl_pmu_event_read(struct perf_event *event)
+{
+       rapl_event_update(event);
+}
+
+static ssize_t rapl_get_attr_cpumask(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
+
+static struct attribute *rapl_pmu_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL,
+};
+
+static struct attribute_group rapl_pmu_attr_group = {
+       .attrs = rapl_pmu_attrs,
+};
+
+RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
+RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
+RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
+RAPL_EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
+
+RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
+
+/*
+ * we compute in 0.23 nJ increments regardless of MSR
+ */
+RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
+
+static struct attribute *rapl_events_srv_attr[] = {
+       EVENT_PTR(rapl_cores),
+       EVENT_PTR(rapl_pkg),
+       EVENT_PTR(rapl_ram),
+
+       EVENT_PTR(rapl_cores_unit),
+       EVENT_PTR(rapl_pkg_unit),
+       EVENT_PTR(rapl_ram_unit),
+
+       EVENT_PTR(rapl_cores_scale),
+       EVENT_PTR(rapl_pkg_scale),
+       EVENT_PTR(rapl_ram_scale),
+       NULL,
+};
+
+static struct attribute *rapl_events_cln_attr[] = {
+       EVENT_PTR(rapl_cores),
+       EVENT_PTR(rapl_pkg),
+       EVENT_PTR(rapl_gpu),
+
+       EVENT_PTR(rapl_cores_unit),
+       EVENT_PTR(rapl_pkg_unit),
+       EVENT_PTR(rapl_gpu_unit),
+
+       EVENT_PTR(rapl_cores_scale),
+       EVENT_PTR(rapl_pkg_scale),
+       EVENT_PTR(rapl_gpu_scale),
+       NULL,
+};
+
+static struct attribute *rapl_events_hsw_attr[] = {
+       EVENT_PTR(rapl_cores),
+       EVENT_PTR(rapl_pkg),
+       EVENT_PTR(rapl_gpu),
+       EVENT_PTR(rapl_ram),
+
+       EVENT_PTR(rapl_cores_unit),
+       EVENT_PTR(rapl_pkg_unit),
+       EVENT_PTR(rapl_gpu_unit),
+       EVENT_PTR(rapl_ram_unit),
+
+       EVENT_PTR(rapl_cores_scale),
+       EVENT_PTR(rapl_pkg_scale),
+       EVENT_PTR(rapl_gpu_scale),
+       EVENT_PTR(rapl_ram_scale),
+       NULL,
+};
+
+static struct attribute *rapl_events_knl_attr[] = {
+       EVENT_PTR(rapl_pkg),
+       EVENT_PTR(rapl_ram),
+
+       EVENT_PTR(rapl_pkg_unit),
+       EVENT_PTR(rapl_ram_unit),
+
+       EVENT_PTR(rapl_pkg_scale),
+       EVENT_PTR(rapl_ram_scale),
+       NULL,
+};
+
+static struct attribute_group rapl_pmu_events_group = {
+       .name = "events",
+       .attrs = NULL, /* patched at runtime */
+};
+
+DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
+static struct attribute *rapl_formats_attr[] = {
+       &format_attr_event.attr,
+       NULL,
+};
+
+static struct attribute_group rapl_pmu_format_group = {
+       .name = "format",
+       .attrs = rapl_formats_attr,
+};
+
+const struct attribute_group *rapl_attr_groups[] = {
+       &rapl_pmu_attr_group,
+       &rapl_pmu_format_group,
+       &rapl_pmu_events_group,
+       NULL,
+};
+
+static void rapl_cpu_exit(int cpu)
+{
+       struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+       int target;
+
+       /* Check if exiting cpu is used for collecting rapl events */
+       if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
+               return;
+
+       pmu->cpu = -1;
+       /* Find a new cpu to collect rapl events */
+       target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+
+       /* Migrate rapl events to the new target */
+       if (target < nr_cpu_ids) {
+               cpumask_set_cpu(target, &rapl_cpu_mask);
+               pmu->cpu = target;
+               perf_pmu_migrate_context(pmu->pmu, cpu, target);
+       }
+}
+
+static void rapl_cpu_init(int cpu)
+{
+       struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+       int target;
+
+       /*
+        * Check if there is an online cpu in the package which collects rapl
+        * events already.
+        */
+       target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
+       if (target < nr_cpu_ids)
+               return;
+
+       cpumask_set_cpu(cpu, &rapl_cpu_mask);
+       pmu->cpu = cpu;
+}
+
+static int rapl_cpu_prepare(int cpu)
+{
+       struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+
+       if (pmu)
+               return 0;
+
+       pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+       if (!pmu)
+               return -ENOMEM;
+
+       raw_spin_lock_init(&pmu->lock);
+       INIT_LIST_HEAD(&pmu->active_list);
+       pmu->pmu = &rapl_pmus->pmu;
+       pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+       pmu->cpu = -1;
+       rapl_hrtimer_init(pmu);
+       rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
+       return 0;
+}
+
+static int rapl_cpu_notifier(struct notifier_block *self,
+                            unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (long)hcpu;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_UP_PREPARE:
+               rapl_cpu_prepare(cpu);
+               break;
+
+       case CPU_DOWN_FAILED:
+       case CPU_ONLINE:
+               rapl_cpu_init(cpu);
+               break;
+
+       case CPU_DOWN_PREPARE:
+               rapl_cpu_exit(cpu);
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static int rapl_check_hw_unit(bool apply_quirk)
+{
+       u64 msr_rapl_power_unit_bits;
+       int i;
+
+       /* protect rdmsrl() to handle virtualization */
+       if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+               return -1;
+       for (i = 0; i < NR_RAPL_DOMAINS; i++)
+               rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+
+       /*
+        * DRAM domain on HSW server and KNL has fixed energy unit which can be
+        * different than the unit from power unit MSR. See
+        * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+        * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+        */
+       if (apply_quirk)
+               rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+
+       /*
+        * Calculate the timer rate:
+        * Use reference of 200W for scaling the timeout to avoid counter
+        * overflows. 200W = 200 Joules/sec
+        * Divide interval by 2 to avoid lockstep (2 * 100)
+        * if hw unit is 32, then we use 2 ms 1/200/2
+        */
+       rapl_timer_ms = 2;
+       if (rapl_hw_unit[0] < 32) {
+               rapl_timer_ms = (1000 / (2 * 100));
+               rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1));
+       }
+       return 0;
+}
+
+static void __init rapl_advertise(void)
+{
+       int i;
+
+       pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
+               hweight32(rapl_cntr_mask), rapl_timer_ms);
+
+       for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+               if (rapl_cntr_mask & (1 << i)) {
+                       pr_info("hw unit of domain %s 2^-%d Joules\n",
+                               rapl_domain_names[i], rapl_hw_unit[i]);
+               }
+       }
+}
+
+static int __init rapl_prepare_cpus(void)
+{
+       unsigned int cpu, pkg;
+       int ret;
+
+       for_each_online_cpu(cpu) {
+               pkg = topology_logical_package_id(cpu);
+               if (rapl_pmus->pmus[pkg])
+                       continue;
+
+               ret = rapl_cpu_prepare(cpu);
+               if (ret)
+                       return ret;
+               rapl_cpu_init(cpu);
+       }
+       return 0;
+}
+
+static void __init cleanup_rapl_pmus(void)
+{
+       int i;
+
+       for (i = 0; i < rapl_pmus->maxpkg; i++)
+               kfree(rapl_pmus->pmus + i);
+       kfree(rapl_pmus);
+}
+
+static int __init init_rapl_pmus(void)
+{
+       int maxpkg = topology_max_packages();
+       size_t size;
+
+       size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *);
+       rapl_pmus = kzalloc(size, GFP_KERNEL);
+       if (!rapl_pmus)
+               return -ENOMEM;
+
+       rapl_pmus->maxpkg               = maxpkg;
+       rapl_pmus->pmu.attr_groups      = rapl_attr_groups;
+       rapl_pmus->pmu.task_ctx_nr      = perf_invalid_context;
+       rapl_pmus->pmu.event_init       = rapl_pmu_event_init;
+       rapl_pmus->pmu.add              = rapl_pmu_event_add;
+       rapl_pmus->pmu.del              = rapl_pmu_event_del;
+       rapl_pmus->pmu.start            = rapl_pmu_event_start;
+       rapl_pmus->pmu.stop             = rapl_pmu_event_stop;
+       rapl_pmus->pmu.read             = rapl_pmu_event_read;
+       return 0;
+}
+
+static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
+       [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
+       [1] = {},
+};
+
+static int __init rapl_pmu_init(void)
+{
+       bool apply_quirk = false;
+       int ret;
+
+       if (!x86_match_cpu(rapl_cpu_match))
+               return -ENODEV;
+
+       switch (boot_cpu_data.x86_model) {
+       case 42: /* Sandy Bridge */
+       case 58: /* Ivy Bridge */
+               rapl_cntr_mask = RAPL_IDX_CLN;
+               rapl_pmu_events_group.attrs = rapl_events_cln_attr;
+               break;
+       case 63: /* Haswell-Server */
+               apply_quirk = true;
+               rapl_cntr_mask = RAPL_IDX_SRV;
+               rapl_pmu_events_group.attrs = rapl_events_srv_attr;
+               break;
+       case 60: /* Haswell */
+       case 69: /* Haswell-Celeron */
+       case 61: /* Broadwell */
+               rapl_cntr_mask = RAPL_IDX_HSW;
+               rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
+               break;
+       case 45: /* Sandy Bridge-EP */
+       case 62: /* IvyTown */
+               rapl_cntr_mask = RAPL_IDX_SRV;
+               rapl_pmu_events_group.attrs = rapl_events_srv_attr;
+               break;
+       case 87: /* Knights Landing */
+               apply_quirk = true;
+               rapl_cntr_mask = RAPL_IDX_KNL;
+               rapl_pmu_events_group.attrs = rapl_events_knl_attr;
+               break;
+       default:
+               return -ENODEV;
+       }
+
+       ret = rapl_check_hw_unit(apply_quirk);
+       if (ret)
+               return ret;
+
+       ret = init_rapl_pmus();
+       if (ret)
+               return ret;
+
+       cpu_notifier_register_begin();
+
+       ret = rapl_prepare_cpus();
+       if (ret)
+               goto out;
+
+       ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
+       if (ret)
+               goto out;
+
+       __perf_cpu_notifier(rapl_cpu_notifier);
+       cpu_notifier_register_done();
+       rapl_advertise();
+       return 0;
+
+out:
+       pr_warn("Initialization failed (%d), disabled\n", ret);
+       cleanup_rapl_pmus();
+       cpu_notifier_register_done();
+       return ret;
+}
+device_initcall(rapl_pmu_init);
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
new file mode 100644 (file)
index 0000000..7012d18
--- /dev/null
@@ -0,0 +1,1412 @@
+#include "uncore.h"
+
+static struct intel_uncore_type *empty_uncore[] = { NULL, };
+struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
+struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
+
+static bool pcidrv_registered;
+struct pci_driver *uncore_pci_driver;
+/* pci bus to socket mapping */
+DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
+struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
+struct pci_extra_dev *uncore_extra_pci_dev;
+static int max_packages;
+
+/* mask of cpus that collect uncore events */
+static cpumask_t uncore_cpu_mask;
+
+/* constraint for the fixed counter */
+static struct event_constraint uncore_constraint_fixed =
+       EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
+struct event_constraint uncore_constraint_empty =
+       EVENT_CONSTRAINT(0, 0, 0);
+
+static int uncore_pcibus_to_physid(struct pci_bus *bus)
+{
+       struct pci2phy_map *map;
+       int phys_id = -1;
+
+       raw_spin_lock(&pci2phy_map_lock);
+       list_for_each_entry(map, &pci2phy_map_head, list) {
+               if (map->segment == pci_domain_nr(bus)) {
+                       phys_id = map->pbus_to_physid[bus->number];
+                       break;
+               }
+       }
+       raw_spin_unlock(&pci2phy_map_lock);
+
+       return phys_id;
+}
+
+static void uncore_free_pcibus_map(void)
+{
+       struct pci2phy_map *map, *tmp;
+
+       list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
+               list_del(&map->list);
+               kfree(map);
+       }
+}
+
+struct pci2phy_map *__find_pci2phy_map(int segment)
+{
+       struct pci2phy_map *map, *alloc = NULL;
+       int i;
+
+       lockdep_assert_held(&pci2phy_map_lock);
+
+lookup:
+       list_for_each_entry(map, &pci2phy_map_head, list) {
+               if (map->segment == segment)
+                       goto end;
+       }
+
+       if (!alloc) {
+               raw_spin_unlock(&pci2phy_map_lock);
+               alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
+               raw_spin_lock(&pci2phy_map_lock);
+
+               if (!alloc)
+                       return NULL;
+
+               goto lookup;
+       }
+
+       map = alloc;
+       alloc = NULL;
+       map->segment = segment;
+       for (i = 0; i < 256; i++)
+               map->pbus_to_physid[i] = -1;
+       list_add_tail(&map->list, &pci2phy_map_head);
+
+end:
+       kfree(alloc);
+       return map;
+}
+
+ssize_t uncore_event_show(struct kobject *kobj,
+                         struct kobj_attribute *attr, char *buf)
+{
+       struct uncore_event_desc *event =
+               container_of(attr, struct uncore_event_desc, attr);
+       return sprintf(buf, "%s", event->config);
+}
+
+struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
+{
+       return pmu->boxes[topology_logical_package_id(cpu)];
+}
+
+u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+       u64 count;
+
+       rdmsrl(event->hw.event_base, count);
+
+       return count;
+}
+
+/*
+ * generic get constraint function for shared match/mask registers.
+ */
+struct event_constraint *
+uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct intel_uncore_extra_reg *er;
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+       unsigned long flags;
+       bool ok = false;
+
+       /*
+        * reg->alloc can be set due to existing state, so for fake box we
+        * need to ignore this, otherwise we might fail to allocate proper
+        * fake state for this extra reg constraint.
+        */
+       if (reg1->idx == EXTRA_REG_NONE ||
+           (!uncore_box_is_fake(box) && reg1->alloc))
+               return NULL;
+
+       er = &box->shared_regs[reg1->idx];
+       raw_spin_lock_irqsave(&er->lock, flags);
+       if (!atomic_read(&er->ref) ||
+           (er->config1 == reg1->config && er->config2 == reg2->config)) {
+               atomic_inc(&er->ref);
+               er->config1 = reg1->config;
+               er->config2 = reg2->config;
+               ok = true;
+       }
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+
+       if (ok) {
+               if (!uncore_box_is_fake(box))
+                       reg1->alloc = 1;
+               return NULL;
+       }
+
+       return &uncore_constraint_empty;
+}
+
+void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct intel_uncore_extra_reg *er;
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+
+       /*
+        * Only put constraint if extra reg was actually allocated. Also
+        * takes care of event which do not use an extra shared reg.
+        *
+        * Also, if this is a fake box we shouldn't touch any event state
+        * (reg->alloc) and we don't care about leaving inconsistent box
+        * state either since it will be thrown out.
+        */
+       if (uncore_box_is_fake(box) || !reg1->alloc)
+               return;
+
+       er = &box->shared_regs[reg1->idx];
+       atomic_dec(&er->ref);
+       reg1->alloc = 0;
+}
+
+u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+       struct intel_uncore_extra_reg *er;
+       unsigned long flags;
+       u64 config;
+
+       er = &box->shared_regs[idx];
+
+       raw_spin_lock_irqsave(&er->lock, flags);
+       config = er->config;
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+
+       return config;
+}
+
+static void uncore_assign_hw_event(struct intel_uncore_box *box,
+                                  struct perf_event *event, int idx)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       hwc->idx = idx;
+       hwc->last_tag = ++box->tags[idx];
+
+       if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
+               hwc->event_base = uncore_fixed_ctr(box);
+               hwc->config_base = uncore_fixed_ctl(box);
+               return;
+       }
+
+       hwc->config_base = uncore_event_ctl(box, hwc->idx);
+       hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
+}
+
+void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
+{
+       u64 prev_count, new_count, delta;
+       int shift;
+
+       if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
+               shift = 64 - uncore_fixed_ctr_bits(box);
+       else
+               shift = 64 - uncore_perf_ctr_bits(box);
+
+       /* the hrtimer might modify the previous event value */
+again:
+       prev_count = local64_read(&event->hw.prev_count);
+       new_count = uncore_read_counter(box, event);
+       if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
+               goto again;
+
+       delta = (new_count << shift) - (prev_count << shift);
+       delta >>= shift;
+
+       local64_add(delta, &event->count);
+}
+
+/*
+ * The overflow interrupt is unavailable for SandyBridge-EP, is broken
+ * for SandyBridge. So we use hrtimer to periodically poll the counter
+ * to avoid overflow.
+ */
+static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
+{
+       struct intel_uncore_box *box;
+       struct perf_event *event;
+       unsigned long flags;
+       int bit;
+
+       box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
+       if (!box->n_active || box->cpu != smp_processor_id())
+               return HRTIMER_NORESTART;
+       /*
+        * disable local interrupt to prevent uncore_pmu_event_start/stop
+        * to interrupt the update process
+        */
+       local_irq_save(flags);
+
+       /*
+        * handle boxes with an active event list as opposed to active
+        * counters
+        */
+       list_for_each_entry(event, &box->active_list, active_entry) {
+               uncore_perf_event_update(box, event);
+       }
+
+       for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
+               uncore_perf_event_update(box, box->events[bit]);
+
+       local_irq_restore(flags);
+
+       hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
+       return HRTIMER_RESTART;
+}
+
+void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
+{
+       hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
+                     HRTIMER_MODE_REL_PINNED);
+}
+
+void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
+{
+       hrtimer_cancel(&box->hrtimer);
+}
+
+static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
+{
+       hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       box->hrtimer.function = uncore_pmu_hrtimer;
+}
+
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
+                                                int node)
+{
+       int i, size, numshared = type->num_shared_regs ;
+       struct intel_uncore_box *box;
+
+       size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
+
+       box = kzalloc_node(size, GFP_KERNEL, node);
+       if (!box)
+               return NULL;
+
+       for (i = 0; i < numshared; i++)
+               raw_spin_lock_init(&box->shared_regs[i].lock);
+
+       uncore_pmu_init_hrtimer(box);
+       box->cpu = -1;
+       box->pci_phys_id = -1;
+       box->pkgid = -1;
+
+       /* set default hrtimer timeout */
+       box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
+
+       INIT_LIST_HEAD(&box->active_list);
+
+       return box;
+}
+
+/*
+ * Using uncore_pmu_event_init pmu event_init callback
+ * as a detection point for uncore events.
+ */
+static int uncore_pmu_event_init(struct perf_event *event);
+
+static bool is_uncore_event(struct perf_event *event)
+{
+       return event->pmu->event_init == uncore_pmu_event_init;
+}
+
+static int
+uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
+                     bool dogrp)
+{
+       struct perf_event *event;
+       int n, max_count;
+
+       max_count = box->pmu->type->num_counters;
+       if (box->pmu->type->fixed_ctl)
+               max_count++;
+
+       if (box->n_events >= max_count)
+               return -EINVAL;
+
+       n = box->n_events;
+
+       if (is_uncore_event(leader)) {
+               box->event_list[n] = leader;
+               n++;
+       }
+
+       if (!dogrp)
+               return n;
+
+       list_for_each_entry(event, &leader->sibling_list, group_entry) {
+               if (!is_uncore_event(event) ||
+                   event->state <= PERF_EVENT_STATE_OFF)
+                       continue;
+
+               if (n >= max_count)
+                       return -EINVAL;
+
+               box->event_list[n] = event;
+               n++;
+       }
+       return n;
+}
+
+static struct event_constraint *
+uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct intel_uncore_type *type = box->pmu->type;
+       struct event_constraint *c;
+
+       if (type->ops->get_constraint) {
+               c = type->ops->get_constraint(box, event);
+               if (c)
+                       return c;
+       }
+
+       if (event->attr.config == UNCORE_FIXED_EVENT)
+               return &uncore_constraint_fixed;
+
+       if (type->constraints) {
+               for_each_event_constraint(c, type->constraints) {
+                       if ((event->hw.config & c->cmask) == c->code)
+                               return c;
+               }
+       }
+
+       return &type->unconstrainted;
+}
+
+static void uncore_put_event_constraint(struct intel_uncore_box *box,
+                                       struct perf_event *event)
+{
+       if (box->pmu->type->ops->put_constraint)
+               box->pmu->type->ops->put_constraint(box, event);
+}
+
+static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
+{
+       unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
+       struct event_constraint *c;
+       int i, wmin, wmax, ret = 0;
+       struct hw_perf_event *hwc;
+
+       bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
+
+       for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+               c = uncore_get_event_constraint(box, box->event_list[i]);
+               box->event_constraint[i] = c;
+               wmin = min(wmin, c->weight);
+               wmax = max(wmax, c->weight);
+       }
+
+       /* fastpath, try to reuse previous register */
+       for (i = 0; i < n; i++) {
+               hwc = &box->event_list[i]->hw;
+               c = box->event_constraint[i];
+
+               /* never assigned */
+               if (hwc->idx == -1)
+                       break;
+
+               /* constraint still honored */
+               if (!test_bit(hwc->idx, c->idxmsk))
+                       break;
+
+               /* not already used */
+               if (test_bit(hwc->idx, used_mask))
+                       break;
+
+               __set_bit(hwc->idx, used_mask);
+               if (assign)
+                       assign[i] = hwc->idx;
+       }
+       /* slow path */
+       if (i != n)
+               ret = perf_assign_events(box->event_constraint, n,
+                                        wmin, wmax, n, assign);
+
+       if (!assign || ret) {
+               for (i = 0; i < n; i++)
+                       uncore_put_event_constraint(box, box->event_list[i]);
+       }
+       return ret ? -EINVAL : 0;
+}
+
+static void uncore_pmu_event_start(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       int idx = event->hw.idx;
+
+       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+               return;
+
+       if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
+               return;
+
+       event->hw.state = 0;
+       box->events[idx] = event;
+       box->n_active++;
+       __set_bit(idx, box->active_mask);
+
+       local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
+       uncore_enable_event(box, event);
+
+       if (box->n_active == 1) {
+               uncore_enable_box(box);
+               uncore_pmu_start_hrtimer(box);
+       }
+}
+
+static void uncore_pmu_event_stop(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
+               uncore_disable_event(box, event);
+               box->n_active--;
+               box->events[hwc->idx] = NULL;
+               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+               hwc->state |= PERF_HES_STOPPED;
+
+               if (box->n_active == 0) {
+                       uncore_disable_box(box);
+                       uncore_pmu_cancel_hrtimer(box);
+               }
+       }
+
+       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               /*
+                * Drain the remaining delta count out of a event
+                * that we are disabling:
+                */
+               uncore_perf_event_update(box, event);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
+}
+
+static int uncore_pmu_event_add(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       struct hw_perf_event *hwc = &event->hw;
+       int assign[UNCORE_PMC_IDX_MAX];
+       int i, n, ret;
+
+       if (!box)
+               return -ENODEV;
+
+       ret = n = uncore_collect_events(box, event, false);
+       if (ret < 0)
+               return ret;
+
+       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+       if (!(flags & PERF_EF_START))
+               hwc->state |= PERF_HES_ARCH;
+
+       ret = uncore_assign_events(box, assign, n);
+       if (ret)
+               return ret;
+
+       /* save events moving to new counters */
+       for (i = 0; i < box->n_events; i++) {
+               event = box->event_list[i];
+               hwc = &event->hw;
+
+               if (hwc->idx == assign[i] &&
+                       hwc->last_tag == box->tags[assign[i]])
+                       continue;
+               /*
+                * Ensure we don't accidentally enable a stopped
+                * counter simply because we rescheduled.
+                */
+               if (hwc->state & PERF_HES_STOPPED)
+                       hwc->state |= PERF_HES_ARCH;
+
+               uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+       }
+
+       /* reprogram moved events into new counters */
+       for (i = 0; i < n; i++) {
+               event = box->event_list[i];
+               hwc = &event->hw;
+
+               if (hwc->idx != assign[i] ||
+                       hwc->last_tag != box->tags[assign[i]])
+                       uncore_assign_hw_event(box, event, assign[i]);
+               else if (i < box->n_events)
+                       continue;
+
+               if (hwc->state & PERF_HES_ARCH)
+                       continue;
+
+               uncore_pmu_event_start(event, 0);
+       }
+       box->n_events = n;
+
+       return 0;
+}
+
+static void uncore_pmu_event_del(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       int i;
+
+       uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+
+       for (i = 0; i < box->n_events; i++) {
+               if (event == box->event_list[i]) {
+                       uncore_put_event_constraint(box, event);
+
+                       for (++i; i < box->n_events; i++)
+                               box->event_list[i - 1] = box->event_list[i];
+
+                       --box->n_events;
+                       break;
+               }
+       }
+
+       event->hw.idx = -1;
+       event->hw.last_tag = ~0ULL;
+}
+
+void uncore_pmu_event_read(struct perf_event *event)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       uncore_perf_event_update(box, event);
+}
+
+/*
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
+static int uncore_validate_group(struct intel_uncore_pmu *pmu,
+                               struct perf_event *event)
+{
+       struct perf_event *leader = event->group_leader;
+       struct intel_uncore_box *fake_box;
+       int ret = -EINVAL, n;
+
+       fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
+       if (!fake_box)
+               return -ENOMEM;
+
+       fake_box->pmu = pmu;
+       /*
+        * the event is not yet connected with its
+        * siblings therefore we must first collect
+        * existing siblings, then add the new event
+        * before we can simulate the scheduling
+        */
+       n = uncore_collect_events(fake_box, leader, true);
+       if (n < 0)
+               goto out;
+
+       fake_box->n_events = n;
+       n = uncore_collect_events(fake_box, event, false);
+       if (n < 0)
+               goto out;
+
+       fake_box->n_events = n;
+
+       ret = uncore_assign_events(fake_box, NULL, n);
+out:
+       kfree(fake_box);
+       return ret;
+}
+
+static int uncore_pmu_event_init(struct perf_event *event)
+{
+       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_box *box;
+       struct hw_perf_event *hwc = &event->hw;
+       int ret;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       pmu = uncore_event_to_pmu(event);
+       /* no device found for this pmu */
+       if (pmu->func_id < 0)
+               return -ENOENT;
+
+       /*
+        * Uncore PMU does measure at all privilege level all the time.
+        * So it doesn't make sense to specify any exclude bits.
+        */
+       if (event->attr.exclude_user || event->attr.exclude_kernel ||
+                       event->attr.exclude_hv || event->attr.exclude_idle)
+               return -EINVAL;
+
+       /* Sampling not supported yet */
+       if (hwc->sample_period)
+               return -EINVAL;
+
+       /*
+        * Place all uncore events for a particular physical package
+        * onto a single cpu
+        */
+       if (event->cpu < 0)
+               return -EINVAL;
+       box = uncore_pmu_to_box(pmu, event->cpu);
+       if (!box || box->cpu < 0)
+               return -EINVAL;
+       event->cpu = box->cpu;
+       event->pmu_private = box;
+
+       event->hw.idx = -1;
+       event->hw.last_tag = ~0ULL;
+       event->hw.extra_reg.idx = EXTRA_REG_NONE;
+       event->hw.branch_reg.idx = EXTRA_REG_NONE;
+
+       if (event->attr.config == UNCORE_FIXED_EVENT) {
+               /* no fixed counter */
+               if (!pmu->type->fixed_ctl)
+                       return -EINVAL;
+               /*
+                * if there is only one fixed counter, only the first pmu
+                * can access the fixed counter
+                */
+               if (pmu->type->single_fixed && pmu->pmu_idx > 0)
+                       return -EINVAL;
+
+               /* fixed counters have event field hardcoded to zero */
+               hwc->config = 0ULL;
+       } else {
+               hwc->config = event->attr.config & pmu->type->event_mask;
+               if (pmu->type->ops->hw_config) {
+                       ret = pmu->type->ops->hw_config(box, event);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       if (event->group_leader != event)
+               ret = uncore_validate_group(pmu, event);
+       else
+               ret = 0;
+
+       return ret;
+}
+
+static ssize_t uncore_get_attr_cpumask(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
+
+static struct attribute *uncore_pmu_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL,
+};
+
+static struct attribute_group uncore_pmu_attr_group = {
+       .attrs = uncore_pmu_attrs,
+};
+
+static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
+{
+       int ret;
+
+       if (!pmu->type->pmu) {
+               pmu->pmu = (struct pmu) {
+                       .attr_groups    = pmu->type->attr_groups,
+                       .task_ctx_nr    = perf_invalid_context,
+                       .event_init     = uncore_pmu_event_init,
+                       .add            = uncore_pmu_event_add,
+                       .del            = uncore_pmu_event_del,
+                       .start          = uncore_pmu_event_start,
+                       .stop           = uncore_pmu_event_stop,
+                       .read           = uncore_pmu_event_read,
+               };
+       } else {
+               pmu->pmu = *pmu->type->pmu;
+               pmu->pmu.attr_groups = pmu->type->attr_groups;
+       }
+
+       if (pmu->type->num_boxes == 1) {
+               if (strlen(pmu->type->name) > 0)
+                       sprintf(pmu->name, "uncore_%s", pmu->type->name);
+               else
+                       sprintf(pmu->name, "uncore");
+       } else {
+               sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
+                       pmu->pmu_idx);
+       }
+
+       ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
+       if (!ret)
+               pmu->registered = true;
+       return ret;
+}
+
+static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
+{
+       if (!pmu->registered)
+               return;
+       perf_pmu_unregister(&pmu->pmu);
+       pmu->registered = false;
+}
+
+static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
+{
+       struct intel_uncore_pmu *pmu = type->pmus;
+       struct intel_uncore_box *box;
+       int i, pkg;
+
+       if (pmu) {
+               pkg = topology_physical_package_id(cpu);
+               for (i = 0; i < type->num_boxes; i++, pmu++) {
+                       box = pmu->boxes[pkg];
+                       if (box)
+                               uncore_box_exit(box);
+               }
+       }
+}
+
+static void __init uncore_exit_boxes(void *dummy)
+{
+       struct intel_uncore_type **types;
+
+       for (types = uncore_msr_uncores; *types; types++)
+               __uncore_exit_boxes(*types++, smp_processor_id());
+}
+
+static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
+{
+       int pkg;
+
+       for (pkg = 0; pkg < max_packages; pkg++)
+               kfree(pmu->boxes[pkg]);
+       kfree(pmu->boxes);
+}
+
+static void __init uncore_type_exit(struct intel_uncore_type *type)
+{
+       struct intel_uncore_pmu *pmu = type->pmus;
+       int i;
+
+       if (pmu) {
+               for (i = 0; i < type->num_boxes; i++, pmu++) {
+                       uncore_pmu_unregister(pmu);
+                       uncore_free_boxes(pmu);
+               }
+               kfree(type->pmus);
+               type->pmus = NULL;
+       }
+       kfree(type->events_group);
+       type->events_group = NULL;
+}
+
+static void __init uncore_types_exit(struct intel_uncore_type **types)
+{
+       for (; *types; types++)
+               uncore_type_exit(*types);
+}
+
+static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
+{
+       struct intel_uncore_pmu *pmus;
+       struct attribute_group *attr_group;
+       struct attribute **attrs;
+       size_t size;
+       int i, j;
+
+       pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
+       if (!pmus)
+               return -ENOMEM;
+
+       size = max_packages * sizeof(struct intel_uncore_box *);
+
+       for (i = 0; i < type->num_boxes; i++) {
+               pmus[i].func_id = setid ? i : -1;
+               pmus[i].pmu_idx = i;
+               pmus[i].type    = type;
+               pmus[i].boxes   = kzalloc(size, GFP_KERNEL);
+               if (!pmus[i].boxes)
+                       return -ENOMEM;
+       }
+
+       type->pmus = pmus;
+       type->unconstrainted = (struct event_constraint)
+               __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
+                               0, type->num_counters, 0, 0);
+
+       if (type->event_descs) {
+               for (i = 0; type->event_descs[i].attr.attr.name; i++);
+
+               attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
+                                       sizeof(*attr_group), GFP_KERNEL);
+               if (!attr_group)
+                       return -ENOMEM;
+
+               attrs = (struct attribute **)(attr_group + 1);
+               attr_group->name = "events";
+               attr_group->attrs = attrs;
+
+               for (j = 0; j < i; j++)
+                       attrs[j] = &type->event_descs[j].attr.attr;
+
+               type->events_group = attr_group;
+       }
+
+       type->pmu_group = &uncore_pmu_attr_group;
+       return 0;
+}
+
+static int __init
+uncore_types_init(struct intel_uncore_type **types, bool setid)
+{
+       int ret;
+
+       for (; *types; types++) {
+               ret = uncore_type_init(*types, setid);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
+/*
+ * add a pci uncore device
+ */
+static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct intel_uncore_type *type;
+       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_box *box;
+       int phys_id, pkg, ret;
+
+       phys_id = uncore_pcibus_to_physid(pdev->bus);
+       if (phys_id < 0)
+               return -ENODEV;
+
+       pkg = topology_phys_to_logical_pkg(phys_id);
+       if (WARN_ON_ONCE(pkg < 0))
+               return -EINVAL;
+
+       if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
+               int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
+
+               uncore_extra_pci_dev[pkg].dev[idx] = pdev;
+               pci_set_drvdata(pdev, NULL);
+               return 0;
+       }
+
+       type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+       /*
+        * for performance monitoring unit with multiple boxes,
+        * each box has a different function id.
+        */
+       pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
+       /* Knights Landing uses a common PCI device ID for multiple instances of
+        * an uncore PMU device type. There is only one entry per device type in
+        * the knl_uncore_pci_ids table inspite of multiple devices present for
+        * some device types. Hence PCI device idx would be 0 for all devices.
+        * So increment pmu pointer to point to an unused array element.
+        */
+       if (boot_cpu_data.x86_model == 87) {
+               while (pmu->func_id >= 0)
+                       pmu++;
+       }
+
+       if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
+               return -EINVAL;
+
+       box = uncore_alloc_box(type, NUMA_NO_NODE);
+       if (!box)
+               return -ENOMEM;
+
+       if (pmu->func_id < 0)
+               pmu->func_id = pdev->devfn;
+       else
+               WARN_ON_ONCE(pmu->func_id != pdev->devfn);
+
+       atomic_inc(&box->refcnt);
+       box->pci_phys_id = phys_id;
+       box->pkgid = pkg;
+       box->pci_dev = pdev;
+       box->pmu = pmu;
+       uncore_box_init(box);
+       pci_set_drvdata(pdev, box);
+
+       pmu->boxes[pkg] = box;
+       if (atomic_inc_return(&pmu->activeboxes) > 1)
+               return 0;
+
+       /* First active box registers the pmu */
+       ret = uncore_pmu_register(pmu);
+       if (ret) {
+               pci_set_drvdata(pdev, NULL);
+               pmu->boxes[pkg] = NULL;
+               uncore_box_exit(box);
+               kfree(box);
+       }
+       return ret;
+}
+
+static void uncore_pci_remove(struct pci_dev *pdev)
+{
+       struct intel_uncore_box *box = pci_get_drvdata(pdev);
+       struct intel_uncore_pmu *pmu;
+       int i, phys_id, pkg;
+
+       phys_id = uncore_pcibus_to_physid(pdev->bus);
+       pkg = topology_phys_to_logical_pkg(phys_id);
+
+       box = pci_get_drvdata(pdev);
+       if (!box) {
+               for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
+                       if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
+                               uncore_extra_pci_dev[pkg].dev[i] = NULL;
+                               break;
+                       }
+               }
+               WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
+               return;
+       }
+
+       pmu = box->pmu;
+       if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
+               return;
+
+       pci_set_drvdata(pdev, NULL);
+       pmu->boxes[pkg] = NULL;
+       if (atomic_dec_return(&pmu->activeboxes) == 0)
+               uncore_pmu_unregister(pmu);
+       uncore_box_exit(box);
+       kfree(box);
+}
+
+static int __init uncore_pci_init(void)
+{
+       size_t size;
+       int ret;
+
+       switch (boot_cpu_data.x86_model) {
+       case 45: /* Sandy Bridge-EP */
+               ret = snbep_uncore_pci_init();
+               break;
+       case 62: /* Ivy Bridge-EP */
+               ret = ivbep_uncore_pci_init();
+               break;
+       case 63: /* Haswell-EP */
+               ret = hswep_uncore_pci_init();
+               break;
+       case 79: /* BDX-EP */
+       case 86: /* BDX-DE */
+               ret = bdx_uncore_pci_init();
+               break;
+       case 42: /* Sandy Bridge */
+               ret = snb_uncore_pci_init();
+               break;
+       case 58: /* Ivy Bridge */
+               ret = ivb_uncore_pci_init();
+               break;
+       case 60: /* Haswell */
+       case 69: /* Haswell Celeron */
+               ret = hsw_uncore_pci_init();
+               break;
+       case 61: /* Broadwell */
+               ret = bdw_uncore_pci_init();
+               break;
+       case 87: /* Knights Landing */
+               ret = knl_uncore_pci_init();
+               break;
+       case 94: /* SkyLake */
+               ret = skl_uncore_pci_init();
+               break;
+       default:
+               return -ENODEV;
+       }
+
+       if (ret)
+               return ret;
+
+       size = max_packages * sizeof(struct pci_extra_dev);
+       uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
+       if (!uncore_extra_pci_dev) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       ret = uncore_types_init(uncore_pci_uncores, false);
+       if (ret)
+               goto errtype;
+
+       uncore_pci_driver->probe = uncore_pci_probe;
+       uncore_pci_driver->remove = uncore_pci_remove;
+
+       ret = pci_register_driver(uncore_pci_driver);
+       if (ret)
+               goto errtype;
+
+       pcidrv_registered = true;
+       return 0;
+
+errtype:
+       uncore_types_exit(uncore_pci_uncores);
+       kfree(uncore_extra_pci_dev);
+       uncore_extra_pci_dev = NULL;
+       uncore_free_pcibus_map();
+err:
+       uncore_pci_uncores = empty_uncore;
+       return ret;
+}
+
+static void __init uncore_pci_exit(void)
+{
+       if (pcidrv_registered) {
+               pcidrv_registered = false;
+               pci_unregister_driver(uncore_pci_driver);
+               uncore_types_exit(uncore_pci_uncores);
+               kfree(uncore_extra_pci_dev);
+               uncore_free_pcibus_map();
+       }
+}
+
+static void uncore_cpu_dying(int cpu)
+{
+       struct intel_uncore_type *type, **types = uncore_msr_uncores;
+       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_box *box;
+       int i, pkg;
+
+       pkg = topology_logical_package_id(cpu);
+       for (; *types; types++) {
+               type = *types;
+               pmu = type->pmus;
+               for (i = 0; i < type->num_boxes; i++, pmu++) {
+                       box = pmu->boxes[pkg];
+                       if (box && atomic_dec_return(&box->refcnt) == 0)
+                               uncore_box_exit(box);
+               }
+       }
+}
+
+static void uncore_cpu_starting(int cpu, bool init)
+{
+       struct intel_uncore_type *type, **types = uncore_msr_uncores;
+       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_box *box;
+       int i, pkg, ncpus = 1;
+
+       if (init) {
+               /*
+                * On init we get the number of online cpus in the package
+                * and set refcount for all of them.
+                */
+               ncpus = cpumask_weight(topology_core_cpumask(cpu));
+       }
+
+       pkg = topology_logical_package_id(cpu);
+       for (; *types; types++) {
+               type = *types;
+               pmu = type->pmus;
+               for (i = 0; i < type->num_boxes; i++, pmu++) {
+                       box = pmu->boxes[pkg];
+                       if (!box)
+                               continue;
+                       /* The first cpu on a package activates the box */
+                       if (atomic_add_return(ncpus, &box->refcnt) == ncpus)
+                               uncore_box_init(box);
+               }
+       }
+}
+
+static int uncore_cpu_prepare(int cpu)
+{
+       struct intel_uncore_type *type, **types = uncore_msr_uncores;
+       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_box *box;
+       int i, pkg;
+
+       pkg = topology_logical_package_id(cpu);
+       for (; *types; types++) {
+               type = *types;
+               pmu = type->pmus;
+               for (i = 0; i < type->num_boxes; i++, pmu++) {
+                       if (pmu->boxes[pkg])
+                               continue;
+                       /* First cpu of a package allocates the box */
+                       box = uncore_alloc_box(type, cpu_to_node(cpu));
+                       if (!box)
+                               return -ENOMEM;
+                       box->pmu = pmu;
+                       box->pkgid = pkg;
+                       pmu->boxes[pkg] = box;
+               }
+       }
+       return 0;
+}
+
+static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
+                                  int new_cpu)
+{
+       struct intel_uncore_pmu *pmu = type->pmus;
+       struct intel_uncore_box *box;
+       int i, pkg;
+
+       pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
+       for (i = 0; i < type->num_boxes; i++, pmu++) {
+               box = pmu->boxes[pkg];
+               if (!box)
+                       continue;
+
+               if (old_cpu < 0) {
+                       WARN_ON_ONCE(box->cpu != -1);
+                       box->cpu = new_cpu;
+                       continue;
+               }
+
+               WARN_ON_ONCE(box->cpu != old_cpu);
+               box->cpu = -1;
+               if (new_cpu < 0)
+                       continue;
+
+               uncore_pmu_cancel_hrtimer(box);
+               perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
+               box->cpu = new_cpu;
+       }
+}
+
+static void uncore_change_context(struct intel_uncore_type **uncores,
+                                 int old_cpu, int new_cpu)
+{
+       for (; *uncores; uncores++)
+               uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
+}
+
+static void uncore_event_exit_cpu(int cpu)
+{
+       int target;
+
+       /* Check if exiting cpu is used for collecting uncore events */
+       if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
+               return;
+
+       /* Find a new cpu to collect uncore events */
+       target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+
+       /* Migrate uncore events to the new target */
+       if (target < nr_cpu_ids)
+               cpumask_set_cpu(target, &uncore_cpu_mask);
+       else
+               target = -1;
+
+       uncore_change_context(uncore_msr_uncores, cpu, target);
+       uncore_change_context(uncore_pci_uncores, cpu, target);
+}
+
+static void uncore_event_init_cpu(int cpu)
+{
+       int target;
+
+       /*
+        * Check if there is an online cpu in the package
+        * which collects uncore events already.
+        */
+       target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
+       if (target < nr_cpu_ids)
+               return;
+
+       cpumask_set_cpu(cpu, &uncore_cpu_mask);
+
+       uncore_change_context(uncore_msr_uncores, -1, cpu);
+       uncore_change_context(uncore_pci_uncores, -1, cpu);
+}
+
+static int uncore_cpu_notifier(struct notifier_block *self,
+                              unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (long)hcpu;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_UP_PREPARE:
+               return notifier_from_errno(uncore_cpu_prepare(cpu));
+
+       case CPU_STARTING:
+               uncore_cpu_starting(cpu, false);
+       case CPU_DOWN_FAILED:
+               uncore_event_init_cpu(cpu);
+               break;
+
+       case CPU_UP_CANCELED:
+       case CPU_DYING:
+               uncore_cpu_dying(cpu);
+               break;
+
+       case CPU_DOWN_PREPARE:
+               uncore_event_exit_cpu(cpu);
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block uncore_cpu_nb = {
+       .notifier_call  = uncore_cpu_notifier,
+       /*
+        * to migrate uncore events, our notifier should be executed
+        * before perf core's notifier.
+        */
+       .priority       = CPU_PRI_PERF + 1,
+};
+
+static int __init type_pmu_register(struct intel_uncore_type *type)
+{
+       int i, ret;
+
+       for (i = 0; i < type->num_boxes; i++) {
+               ret = uncore_pmu_register(&type->pmus[i]);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
+static int __init uncore_msr_pmus_register(void)
+{
+       struct intel_uncore_type **types = uncore_msr_uncores;
+       int ret;
+
+       for (; *types; types++) {
+               ret = type_pmu_register(*types);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
+static int __init uncore_cpu_init(void)
+{
+       int ret;
+
+       switch (boot_cpu_data.x86_model) {
+       case 26: /* Nehalem */
+       case 30:
+       case 37: /* Westmere */
+       case 44:
+               nhm_uncore_cpu_init();
+               break;
+       case 42: /* Sandy Bridge */
+       case 58: /* Ivy Bridge */
+       case 60: /* Haswell */
+       case 69: /* Haswell */
+       case 70: /* Haswell */
+       case 61: /* Broadwell */
+       case 71: /* Broadwell */
+               snb_uncore_cpu_init();
+               break;
+       case 45: /* Sandy Bridge-EP */
+               snbep_uncore_cpu_init();
+               break;
+       case 46: /* Nehalem-EX */
+       case 47: /* Westmere-EX aka. Xeon E7 */
+               nhmex_uncore_cpu_init();
+               break;
+       case 62: /* Ivy Bridge-EP */
+               ivbep_uncore_cpu_init();
+               break;
+       case 63: /* Haswell-EP */
+               hswep_uncore_cpu_init();
+               break;
+       case 79: /* BDX-EP */
+       case 86: /* BDX-DE */
+               bdx_uncore_cpu_init();
+               break;
+       case 87: /* Knights Landing */
+               knl_uncore_cpu_init();
+               break;
+       default:
+               return -ENODEV;
+       }
+
+       ret = uncore_types_init(uncore_msr_uncores, true);
+       if (ret)
+               goto err;
+
+       ret = uncore_msr_pmus_register();
+       if (ret)
+               goto err;
+       return 0;
+err:
+       uncore_types_exit(uncore_msr_uncores);
+       uncore_msr_uncores = empty_uncore;
+       return ret;
+}
+
+static void __init uncore_cpu_setup(void *dummy)
+{
+       uncore_cpu_starting(smp_processor_id(), true);
+}
+
+/* Lazy to avoid allocation of a few bytes for the normal case */
+static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
+
+static int __init uncore_cpumask_init(bool msr)
+{
+       unsigned int cpu;
+
+       for_each_online_cpu(cpu) {
+               unsigned int pkg = topology_logical_package_id(cpu);
+               int ret;
+
+               if (test_and_set_bit(pkg, packages))
+                       continue;
+               /*
+                * The first online cpu of each package allocates and takes
+                * the refcounts for all other online cpus in that package.
+                * If msrs are not enabled no allocation is required.
+                */
+               if (msr) {
+                       ret = uncore_cpu_prepare(cpu);
+                       if (ret)
+                               return ret;
+               }
+               uncore_event_init_cpu(cpu);
+               smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
+       }
+       __register_cpu_notifier(&uncore_cpu_nb);
+       return 0;
+}
+
+static int __init intel_uncore_init(void)
+{
+       int pret, cret, ret;
+
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+               return -ENODEV;
+
+       if (cpu_has_hypervisor)
+               return -ENODEV;
+
+       max_packages = topology_max_packages();
+
+       pret = uncore_pci_init();
+       cret = uncore_cpu_init();
+
+       if (cret && pret)
+               return -ENODEV;
+
+       cpu_notifier_register_begin();
+       ret = uncore_cpumask_init(!cret);
+       if (ret)
+               goto err;
+       cpu_notifier_register_done();
+       return 0;
+
+err:
+       /* Undo box->init_box() */
+       on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
+       uncore_types_exit(uncore_msr_uncores);
+       uncore_pci_exit();
+       cpu_notifier_register_done();
+       return ret;
+}
+device_initcall(intel_uncore_init);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
new file mode 100644 (file)
index 0000000..79766b9
--- /dev/null
@@ -0,0 +1,378 @@
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <asm/apicdef.h>
+
+#include <linux/perf_event.h>
+#include "../perf_event.h"
+
+#define UNCORE_PMU_NAME_LEN            32
+#define UNCORE_PMU_HRTIMER_INTERVAL    (60LL * NSEC_PER_SEC)
+#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC)
+
+#define UNCORE_FIXED_EVENT             0xff
+#define UNCORE_PMC_IDX_MAX_GENERIC     8
+#define UNCORE_PMC_IDX_FIXED           UNCORE_PMC_IDX_MAX_GENERIC
+#define UNCORE_PMC_IDX_MAX             (UNCORE_PMC_IDX_FIXED + 1)
+
+#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
+#define UNCORE_PCI_DEV_TYPE(data)      ((data >> 8) & 0xff)
+#define UNCORE_PCI_DEV_IDX(data)       (data & 0xff)
+#define UNCORE_EXTRA_PCI_DEV           0xff
+#define UNCORE_EXTRA_PCI_DEV_MAX       3
+
+#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
+
+struct pci_extra_dev {
+       struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
+};
+
+struct intel_uncore_ops;
+struct intel_uncore_pmu;
+struct intel_uncore_box;
+struct uncore_event_desc;
+
+struct intel_uncore_type {
+       const char *name;
+       int num_counters;
+       int num_boxes;
+       int perf_ctr_bits;
+       int fixed_ctr_bits;
+       unsigned perf_ctr;
+       unsigned event_ctl;
+       unsigned event_mask;
+       unsigned fixed_ctr;
+       unsigned fixed_ctl;
+       unsigned box_ctl;
+       unsigned msr_offset;
+       unsigned num_shared_regs:8;
+       unsigned single_fixed:1;
+       unsigned pair_ctr_ctl:1;
+       unsigned *msr_offsets;
+       struct event_constraint unconstrainted;
+       struct event_constraint *constraints;
+       struct intel_uncore_pmu *pmus;
+       struct intel_uncore_ops *ops;
+       struct uncore_event_desc *event_descs;
+       const struct attribute_group *attr_groups[4];
+       struct pmu *pmu; /* for custom pmu ops */
+};
+
+#define pmu_group attr_groups[0]
+#define format_group attr_groups[1]
+#define events_group attr_groups[2]
+
+struct intel_uncore_ops {
+       void (*init_box)(struct intel_uncore_box *);
+       void (*exit_box)(struct intel_uncore_box *);
+       void (*disable_box)(struct intel_uncore_box *);
+       void (*enable_box)(struct intel_uncore_box *);
+       void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
+       void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
+       u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
+       int (*hw_config)(struct intel_uncore_box *, struct perf_event *);
+       struct event_constraint *(*get_constraint)(struct intel_uncore_box *,
+                                                  struct perf_event *);
+       void (*put_constraint)(struct intel_uncore_box *, struct perf_event *);
+};
+
+struct intel_uncore_pmu {
+       struct pmu                      pmu;
+       char                            name[UNCORE_PMU_NAME_LEN];
+       int                             pmu_idx;
+       int                             func_id;
+       bool                            registered;
+       atomic_t                        activeboxes;
+       struct intel_uncore_type        *type;
+       struct intel_uncore_box         **boxes;
+};
+
+struct intel_uncore_extra_reg {
+       raw_spinlock_t lock;
+       u64 config, config1, config2;
+       atomic_t ref;
+};
+
+struct intel_uncore_box {
+       int pci_phys_id;
+       int pkgid;
+       int n_active;   /* number of active events */
+       int n_events;
+       int cpu;        /* cpu to collect events */
+       unsigned long flags;
+       atomic_t refcnt;
+       struct perf_event *events[UNCORE_PMC_IDX_MAX];
+       struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
+       struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
+       unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
+       u64 tags[UNCORE_PMC_IDX_MAX];
+       struct pci_dev *pci_dev;
+       struct intel_uncore_pmu *pmu;
+       u64 hrtimer_duration; /* hrtimer timeout for this box */
+       struct hrtimer hrtimer;
+       struct list_head list;
+       struct list_head active_list;
+       void *io_addr;
+       struct intel_uncore_extra_reg shared_regs[0];
+};
+
+#define UNCORE_BOX_FLAG_INITIATED      0
+
+struct uncore_event_desc {
+       struct kobj_attribute attr;
+       const char *config;
+};
+
+struct pci2phy_map {
+       struct list_head list;
+       int segment;
+       int pbus_to_physid[256];
+};
+
+struct pci2phy_map *__find_pci2phy_map(int segment);
+
+ssize_t uncore_event_show(struct kobject *kobj,
+                         struct kobj_attribute *attr, char *buf);
+
+#define INTEL_UNCORE_EVENT_DESC(_name, _config)                        \
+{                                                              \
+       .attr   = __ATTR(_name, 0444, uncore_event_show, NULL), \
+       .config = _config,                                      \
+}
+
+#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)                        \
+static ssize_t __uncore_##_var##_show(struct kobject *kobj,            \
+                               struct kobj_attribute *attr,            \
+                               char *page)                             \
+{                                                                      \
+       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);                     \
+       return sprintf(page, _format "\n");                             \
+}                                                                      \
+static struct kobj_attribute format_attr_##_var =                      \
+       __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
+
+static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
+{
+       return box->pmu->type->box_ctl;
+}
+
+static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box)
+{
+       return box->pmu->type->fixed_ctl;
+}
+
+static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
+{
+       return box->pmu->type->fixed_ctr;
+}
+
+static inline
+unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
+{
+       return idx * 4 + box->pmu->type->event_ctl;
+}
+
+static inline
+unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+       return idx * 8 + box->pmu->type->perf_ctr;
+}
+
+static inline unsigned uncore_msr_box_offset(struct intel_uncore_box *box)
+{
+       struct intel_uncore_pmu *pmu = box->pmu;
+       return pmu->type->msr_offsets ?
+               pmu->type->msr_offsets[pmu->pmu_idx] :
+               pmu->type->msr_offset * pmu->pmu_idx;
+}
+
+static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
+{
+       if (!box->pmu->type->box_ctl)
+               return 0;
+       return box->pmu->type->box_ctl + uncore_msr_box_offset(box);
+}
+
+static inline unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
+{
+       if (!box->pmu->type->fixed_ctl)
+               return 0;
+       return box->pmu->type->fixed_ctl + uncore_msr_box_offset(box);
+}
+
+static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
+{
+       return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
+}
+
+static inline
+unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
+{
+       return box->pmu->type->event_ctl +
+               (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+               uncore_msr_box_offset(box);
+}
+
+static inline
+unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+       return box->pmu->type->perf_ctr +
+               (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+               uncore_msr_box_offset(box);
+}
+
+static inline
+unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
+{
+       if (box->pci_dev)
+               return uncore_pci_fixed_ctl(box);
+       else
+               return uncore_msr_fixed_ctl(box);
+}
+
+static inline
+unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
+{
+       if (box->pci_dev)
+               return uncore_pci_fixed_ctr(box);
+       else
+               return uncore_msr_fixed_ctr(box);
+}
+
+static inline
+unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
+{
+       if (box->pci_dev)
+               return uncore_pci_event_ctl(box, idx);
+       else
+               return uncore_msr_event_ctl(box, idx);
+}
+
+static inline
+unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+       if (box->pci_dev)
+               return uncore_pci_perf_ctr(box, idx);
+       else
+               return uncore_msr_perf_ctr(box, idx);
+}
+
+static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
+{
+       return box->pmu->type->perf_ctr_bits;
+}
+
+static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
+{
+       return box->pmu->type->fixed_ctr_bits;
+}
+
+static inline int uncore_num_counters(struct intel_uncore_box *box)
+{
+       return box->pmu->type->num_counters;
+}
+
+static inline void uncore_disable_box(struct intel_uncore_box *box)
+{
+       if (box->pmu->type->ops->disable_box)
+               box->pmu->type->ops->disable_box(box);
+}
+
+static inline void uncore_enable_box(struct intel_uncore_box *box)
+{
+       if (box->pmu->type->ops->enable_box)
+               box->pmu->type->ops->enable_box(box);
+}
+
+static inline void uncore_disable_event(struct intel_uncore_box *box,
+                               struct perf_event *event)
+{
+       box->pmu->type->ops->disable_event(box, event);
+}
+
+static inline void uncore_enable_event(struct intel_uncore_box *box,
+                               struct perf_event *event)
+{
+       box->pmu->type->ops->enable_event(box, event);
+}
+
+static inline u64 uncore_read_counter(struct intel_uncore_box *box,
+                               struct perf_event *event)
+{
+       return box->pmu->type->ops->read_counter(box, event);
+}
+
+static inline void uncore_box_init(struct intel_uncore_box *box)
+{
+       if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+               if (box->pmu->type->ops->init_box)
+                       box->pmu->type->ops->init_box(box);
+       }
+}
+
+static inline void uncore_box_exit(struct intel_uncore_box *box)
+{
+       if (test_and_clear_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+               if (box->pmu->type->ops->exit_box)
+                       box->pmu->type->ops->exit_box(box);
+       }
+}
+
+static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
+{
+       return (box->pkgid < 0);
+}
+
+static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
+{
+       return container_of(event->pmu, struct intel_uncore_pmu, pmu);
+}
+
+static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
+{
+       return event->pmu_private;
+}
+
+struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
+u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
+void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
+void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
+void uncore_pmu_event_read(struct perf_event *event);
+void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
+struct event_constraint *
+uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
+void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
+u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
+
+extern struct intel_uncore_type **uncore_msr_uncores;
+extern struct intel_uncore_type **uncore_pci_uncores;
+extern struct pci_driver *uncore_pci_driver;
+extern raw_spinlock_t pci2phy_map_lock;
+extern struct list_head pci2phy_map_head;
+extern struct pci_extra_dev *uncore_extra_pci_dev;
+extern struct event_constraint uncore_constraint_empty;
+
+/* perf_event_intel_uncore_snb.c */
+int snb_uncore_pci_init(void);
+int ivb_uncore_pci_init(void);
+int hsw_uncore_pci_init(void);
+int bdw_uncore_pci_init(void);
+int skl_uncore_pci_init(void);
+void snb_uncore_cpu_init(void);
+void nhm_uncore_cpu_init(void);
+int snb_pci2phy_map_init(int devid);
+
+/* perf_event_intel_uncore_snbep.c */
+int snbep_uncore_pci_init(void);
+void snbep_uncore_cpu_init(void);
+int ivbep_uncore_pci_init(void);
+void ivbep_uncore_cpu_init(void);
+int hswep_uncore_pci_init(void);
+void hswep_uncore_cpu_init(void);
+int bdx_uncore_pci_init(void);
+void bdx_uncore_cpu_init(void);
+int knl_uncore_pci_init(void);
+void knl_uncore_cpu_init(void);
+
+/* perf_event_intel_uncore_nhmex.c */
+void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
new file mode 100644 (file)
index 0000000..cda5693
--- /dev/null
@@ -0,0 +1,1227 @@
+/* Nehalem-EX/Westmere-EX uncore support */
+#include "uncore.h"
+
+/* NHM-EX event control */
+#define NHMEX_PMON_CTL_EV_SEL_MASK     0x000000ff
+#define NHMEX_PMON_CTL_UMASK_MASK      0x0000ff00
+#define NHMEX_PMON_CTL_EN_BIT0         (1 << 0)
+#define NHMEX_PMON_CTL_EDGE_DET                (1 << 18)
+#define NHMEX_PMON_CTL_PMI_EN          (1 << 20)
+#define NHMEX_PMON_CTL_EN_BIT22                (1 << 22)
+#define NHMEX_PMON_CTL_INVERT          (1 << 23)
+#define NHMEX_PMON_CTL_TRESH_MASK      0xff000000
+#define NHMEX_PMON_RAW_EVENT_MASK      (NHMEX_PMON_CTL_EV_SEL_MASK | \
+                                        NHMEX_PMON_CTL_UMASK_MASK | \
+                                        NHMEX_PMON_CTL_EDGE_DET | \
+                                        NHMEX_PMON_CTL_INVERT | \
+                                        NHMEX_PMON_CTL_TRESH_MASK)
+
+/* NHM-EX Ubox */
+#define NHMEX_U_MSR_PMON_GLOBAL_CTL            0xc00
+#define NHMEX_U_MSR_PMON_CTR                   0xc11
+#define NHMEX_U_MSR_PMON_EV_SEL                        0xc10
+
+#define NHMEX_U_PMON_GLOBAL_EN                 (1 << 0)
+#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL       0x0000001e
+#define NHMEX_U_PMON_GLOBAL_EN_ALL             (1 << 28)
+#define NHMEX_U_PMON_GLOBAL_RST_ALL            (1 << 29)
+#define NHMEX_U_PMON_GLOBAL_FRZ_ALL            (1 << 31)
+
+#define NHMEX_U_PMON_RAW_EVENT_MASK            \
+               (NHMEX_PMON_CTL_EV_SEL_MASK |   \
+                NHMEX_PMON_CTL_EDGE_DET)
+
+/* NHM-EX Cbox */
+#define NHMEX_C0_MSR_PMON_GLOBAL_CTL           0xd00
+#define NHMEX_C0_MSR_PMON_CTR0                 0xd11
+#define NHMEX_C0_MSR_PMON_EV_SEL0              0xd10
+#define NHMEX_C_MSR_OFFSET                     0x20
+
+/* NHM-EX Bbox */
+#define NHMEX_B0_MSR_PMON_GLOBAL_CTL           0xc20
+#define NHMEX_B0_MSR_PMON_CTR0                 0xc31
+#define NHMEX_B0_MSR_PMON_CTL0                 0xc30
+#define NHMEX_B_MSR_OFFSET                     0x40
+#define NHMEX_B0_MSR_MATCH                     0xe45
+#define NHMEX_B0_MSR_MASK                      0xe46
+#define NHMEX_B1_MSR_MATCH                     0xe4d
+#define NHMEX_B1_MSR_MASK                      0xe4e
+
+#define NHMEX_B_PMON_CTL_EN                    (1 << 0)
+#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT          1
+#define NHMEX_B_PMON_CTL_EV_SEL_MASK           \
+               (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_B_PMON_CTR_SHIFT         6
+#define NHMEX_B_PMON_CTR_MASK          \
+               (0x3 << NHMEX_B_PMON_CTR_SHIFT)
+#define NHMEX_B_PMON_RAW_EVENT_MASK            \
+               (NHMEX_B_PMON_CTL_EV_SEL_MASK | \
+                NHMEX_B_PMON_CTR_MASK)
+
+/* NHM-EX Sbox */
+#define NHMEX_S0_MSR_PMON_GLOBAL_CTL           0xc40
+#define NHMEX_S0_MSR_PMON_CTR0                 0xc51
+#define NHMEX_S0_MSR_PMON_CTL0                 0xc50
+#define NHMEX_S_MSR_OFFSET                     0x80
+#define NHMEX_S0_MSR_MM_CFG                    0xe48
+#define NHMEX_S0_MSR_MATCH                     0xe49
+#define NHMEX_S0_MSR_MASK                      0xe4a
+#define NHMEX_S1_MSR_MM_CFG                    0xe58
+#define NHMEX_S1_MSR_MATCH                     0xe59
+#define NHMEX_S1_MSR_MASK                      0xe5a
+
+#define NHMEX_S_PMON_MM_CFG_EN                 (0x1ULL << 63)
+#define NHMEX_S_EVENT_TO_R_PROG_EV             0
+
+/* NHM-EX Mbox */
+#define NHMEX_M0_MSR_GLOBAL_CTL                        0xca0
+#define NHMEX_M0_MSR_PMU_DSP                   0xca5
+#define NHMEX_M0_MSR_PMU_ISS                   0xca6
+#define NHMEX_M0_MSR_PMU_MAP                   0xca7
+#define NHMEX_M0_MSR_PMU_MSC_THR               0xca8
+#define NHMEX_M0_MSR_PMU_PGT                   0xca9
+#define NHMEX_M0_MSR_PMU_PLD                   0xcaa
+#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC           0xcab
+#define NHMEX_M0_MSR_PMU_CTL0                  0xcb0
+#define NHMEX_M0_MSR_PMU_CNT0                  0xcb1
+#define NHMEX_M_MSR_OFFSET                     0x40
+#define NHMEX_M0_MSR_PMU_MM_CFG                        0xe54
+#define NHMEX_M1_MSR_PMU_MM_CFG                        0xe5c
+
+#define NHMEX_M_PMON_MM_CFG_EN                 (1ULL << 63)
+#define NHMEX_M_PMON_ADDR_MATCH_MASK           0x3ffffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_MASK            0x7ffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_SHIFT           34
+
+#define NHMEX_M_PMON_CTL_EN                    (1 << 0)
+#define NHMEX_M_PMON_CTL_PMI_EN                        (1 << 1)
+#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT      2
+#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK       \
+       (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT    4
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK     \
+       (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_WRAP_MODE             (1 << 6)
+#define NHMEX_M_PMON_CTL_FLAG_MODE             (1 << 7)
+#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT         9
+#define NHMEX_M_PMON_CTL_INC_SEL_MASK          \
+       (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT    19
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK     \
+       (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT)
+#define NHMEX_M_PMON_RAW_EVENT_MASK                    \
+               (NHMEX_M_PMON_CTL_COUNT_MODE_MASK |     \
+                NHMEX_M_PMON_CTL_STORAGE_MODE_MASK |   \
+                NHMEX_M_PMON_CTL_WRAP_MODE |           \
+                NHMEX_M_PMON_CTL_FLAG_MODE |           \
+                NHMEX_M_PMON_CTL_INC_SEL_MASK |        \
+                NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
+
+#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK          (((1 << 11) - 1) | (1 << 23))
+#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
+
+#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK          (((1 << 12) - 1) | (1 << 24))
+#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
+
+/*
+ * use the 9~13 bits to select event If the 7th bit is not set,
+ * otherwise use the 19~21 bits to select event.
+ */
+#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \
+                               NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \
+                          NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \
+                               NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_EXTAR_REG(c, r) \
+               EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+                               MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r)
+#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \
+               EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+                               MBOX_SET_FLAG_SEL_MASK, \
+                               (u64)-1, NHMEX_M_##r)
+
+/* NHM-EX Rbox */
+#define NHMEX_R_MSR_GLOBAL_CTL                 0xe00
+#define NHMEX_R_MSR_PMON_CTL0                  0xe10
+#define NHMEX_R_MSR_PMON_CNT0                  0xe11
+#define NHMEX_R_MSR_OFFSET                     0x20
+
+#define NHMEX_R_MSR_PORTN_QLX_CFG(n)           \
+               ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n)                (0xe04 + (n))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n)                (0xe24 + (n))
+#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n)                \
+               (((n) < 4 ? 0 : 0x10) + (n) * 4)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n)   \
+               (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n)    \
+               (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n)     \
+               (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n)   \
+               (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n)    \
+               (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n)     \
+               (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2)
+
+#define NHMEX_R_PMON_CTL_EN                    (1 << 0)
+#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT          1
+#define NHMEX_R_PMON_CTL_EV_SEL_MASK           \
+               (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_R_PMON_CTL_PMI_EN                        (1 << 6)
+#define NHMEX_R_PMON_RAW_EVENT_MASK            NHMEX_R_PMON_CTL_EV_SEL_MASK
+
+/* NHM-EX Wbox */
+#define NHMEX_W_MSR_GLOBAL_CTL                 0xc80
+#define NHMEX_W_MSR_PMON_CNT0                  0xc90
+#define NHMEX_W_MSR_PMON_EVT_SEL0              0xc91
+#define NHMEX_W_MSR_PMON_FIXED_CTR             0x394
+#define NHMEX_W_MSR_PMON_FIXED_CTL             0x395
+
+#define NHMEX_W_PMON_GLOBAL_FIXED_EN           (1ULL << 31)
+
+#define __BITS_VALUE(x, i, n)  ((typeof(x))(((x) >> ((i) * (n))) & \
+                               ((1ULL << (n)) - 1)))
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7");
+DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
+
+static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+       wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
+}
+
+static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+       wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0);
+}
+
+static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+       unsigned msr = uncore_msr_box_ctl(box);
+       u64 config;
+
+       if (msr) {
+               rdmsrl(msr, config);
+               config &= ~((1ULL << uncore_num_counters(box)) - 1);
+               /* WBox has a fixed counter */
+               if (uncore_msr_fixed_ctl(box))
+                       config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN;
+               wrmsrl(msr, config);
+       }
+}
+
+static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+       unsigned msr = uncore_msr_box_ctl(box);
+       u64 config;
+
+       if (msr) {
+               rdmsrl(msr, config);
+               config |= (1ULL << uncore_num_counters(box)) - 1;
+               /* WBox has a fixed counter */
+               if (uncore_msr_fixed_ctl(box))
+                       config |= NHMEX_W_PMON_GLOBAL_FIXED_EN;
+               wrmsrl(msr, config);
+       }
+}
+
+static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       wrmsrl(event->hw.config_base, 0);
+}
+
+static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
+               wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
+       else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
+               wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+       else
+               wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+#define NHMEX_UNCORE_OPS_COMMON_INIT()                         \
+       .init_box       = nhmex_uncore_msr_init_box,            \
+       .exit_box       = nhmex_uncore_msr_exit_box,            \
+       .disable_box    = nhmex_uncore_msr_disable_box,         \
+       .enable_box     = nhmex_uncore_msr_enable_box,          \
+       .disable_event  = nhmex_uncore_msr_disable_event,       \
+       .read_counter   = uncore_msr_read_counter
+
+static struct intel_uncore_ops nhmex_uncore_ops = {
+       NHMEX_UNCORE_OPS_COMMON_INIT(),
+       .enable_event   = nhmex_uncore_msr_enable_event,
+};
+
+static struct attribute *nhmex_uncore_ubox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_edge.attr,
+       NULL,
+};
+
+static struct attribute_group nhmex_uncore_ubox_format_group = {
+       .name           = "format",
+       .attrs          = nhmex_uncore_ubox_formats_attr,
+};
+
+static struct intel_uncore_type nhmex_uncore_ubox = {
+       .name           = "ubox",
+       .num_counters   = 1,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 48,
+       .event_ctl      = NHMEX_U_MSR_PMON_EV_SEL,
+       .perf_ctr       = NHMEX_U_MSR_PMON_CTR,
+       .event_mask     = NHMEX_U_PMON_RAW_EVENT_MASK,
+       .box_ctl        = NHMEX_U_MSR_PMON_GLOBAL_CTL,
+       .ops            = &nhmex_uncore_ops,
+       .format_group   = &nhmex_uncore_ubox_format_group
+};
+
+static struct attribute *nhmex_uncore_cbox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       NULL,
+};
+
+static struct attribute_group nhmex_uncore_cbox_format_group = {
+       .name = "format",
+       .attrs = nhmex_uncore_cbox_formats_attr,
+};
+
+/* msr offset for each instance of cbox */
+static unsigned nhmex_cbox_msr_offsets[] = {
+       0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0,
+};
+
+static struct intel_uncore_type nhmex_uncore_cbox = {
+       .name                   = "cbox",
+       .num_counters           = 6,
+       .num_boxes              = 10,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = NHMEX_C0_MSR_PMON_EV_SEL0,
+       .perf_ctr               = NHMEX_C0_MSR_PMON_CTR0,
+       .event_mask             = NHMEX_PMON_RAW_EVENT_MASK,
+       .box_ctl                = NHMEX_C0_MSR_PMON_GLOBAL_CTL,
+       .msr_offsets            = nhmex_cbox_msr_offsets,
+       .pair_ctr_ctl           = 1,
+       .ops                    = &nhmex_uncore_ops,
+       .format_group           = &nhmex_uncore_cbox_format_group
+};
+
+static struct uncore_event_desc nhmex_uncore_wbox_events[] = {
+       INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"),
+       { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type nhmex_uncore_wbox = {
+       .name                   = "wbox",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = NHMEX_W_MSR_PMON_CNT0,
+       .perf_ctr               = NHMEX_W_MSR_PMON_EVT_SEL0,
+       .fixed_ctr              = NHMEX_W_MSR_PMON_FIXED_CTR,
+       .fixed_ctl              = NHMEX_W_MSR_PMON_FIXED_CTL,
+       .event_mask             = NHMEX_PMON_RAW_EVENT_MASK,
+       .box_ctl                = NHMEX_W_MSR_GLOBAL_CTL,
+       .pair_ctr_ctl           = 1,
+       .event_descs            = nhmex_uncore_wbox_events,
+       .ops                    = &nhmex_uncore_ops,
+       .format_group           = &nhmex_uncore_cbox_format_group
+};
+
+static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+       int ctr, ev_sel;
+
+       ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >>
+               NHMEX_B_PMON_CTR_SHIFT;
+       ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >>
+                 NHMEX_B_PMON_CTL_EV_SEL_SHIFT;
+
+       /* events that do not use the match/mask registers */
+       if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) ||
+           (ctr == 2 && ev_sel != 0x4) || ctr == 3)
+               return 0;
+
+       if (box->pmu->pmu_idx == 0)
+               reg1->reg = NHMEX_B0_MSR_MATCH;
+       else
+               reg1->reg = NHMEX_B1_MSR_MATCH;
+       reg1->idx = 0;
+       reg1->config = event->attr.config1;
+       reg2->config = event->attr.config2;
+       return 0;
+}
+
+static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+       if (reg1->idx != EXTRA_REG_NONE) {
+               wrmsrl(reg1->reg, reg1->config);
+               wrmsrl(reg1->reg + 1, reg2->config);
+       }
+       wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+               (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK));
+}
+
+/*
+ * The Bbox has 4 counters, but each counter monitors different events.
+ * Use bits 6-7 in the event config to select counter.
+ */
+static struct event_constraint nhmex_uncore_bbox_constraints[] = {
+       EVENT_CONSTRAINT(0 , 1, 0xc0),
+       EVENT_CONSTRAINT(0x40, 2, 0xc0),
+       EVENT_CONSTRAINT(0x80, 4, 0xc0),
+       EVENT_CONSTRAINT(0xc0, 8, 0xc0),
+       EVENT_CONSTRAINT_END,
+};
+
+static struct attribute *nhmex_uncore_bbox_formats_attr[] = {
+       &format_attr_event5.attr,
+       &format_attr_counter.attr,
+       &format_attr_match.attr,
+       &format_attr_mask.attr,
+       NULL,
+};
+
+static struct attribute_group nhmex_uncore_bbox_format_group = {
+       .name = "format",
+       .attrs = nhmex_uncore_bbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_bbox_ops = {
+       NHMEX_UNCORE_OPS_COMMON_INIT(),
+       .enable_event           = nhmex_bbox_msr_enable_event,
+       .hw_config              = nhmex_bbox_hw_config,
+       .get_constraint         = uncore_get_constraint,
+       .put_constraint         = uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_bbox = {
+       .name                   = "bbox",
+       .num_counters           = 4,
+       .num_boxes              = 2,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = NHMEX_B0_MSR_PMON_CTL0,
+       .perf_ctr               = NHMEX_B0_MSR_PMON_CTR0,
+       .event_mask             = NHMEX_B_PMON_RAW_EVENT_MASK,
+       .box_ctl                = NHMEX_B0_MSR_PMON_GLOBAL_CTL,
+       .msr_offset             = NHMEX_B_MSR_OFFSET,
+       .pair_ctr_ctl           = 1,
+       .num_shared_regs        = 1,
+       .constraints            = nhmex_uncore_bbox_constraints,
+       .ops                    = &nhmex_uncore_bbox_ops,
+       .format_group           = &nhmex_uncore_bbox_format_group
+};
+
+static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+       /* only TO_R_PROG_EV event uses the match/mask register */
+       if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) !=
+           NHMEX_S_EVENT_TO_R_PROG_EV)
+               return 0;
+
+       if (box->pmu->pmu_idx == 0)
+               reg1->reg = NHMEX_S0_MSR_MM_CFG;
+       else
+               reg1->reg = NHMEX_S1_MSR_MM_CFG;
+       reg1->idx = 0;
+       reg1->config = event->attr.config1;
+       reg2->config = event->attr.config2;
+       return 0;
+}
+
+static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+       if (reg1->idx != EXTRA_REG_NONE) {
+               wrmsrl(reg1->reg, 0);
+               wrmsrl(reg1->reg + 1, reg1->config);
+               wrmsrl(reg1->reg + 2, reg2->config);
+               wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
+       }
+       wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+}
+
+static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_match.attr,
+       &format_attr_mask.attr,
+       NULL,
+};
+
+static struct attribute_group nhmex_uncore_sbox_format_group = {
+       .name                   = "format",
+       .attrs                  = nhmex_uncore_sbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_sbox_ops = {
+       NHMEX_UNCORE_OPS_COMMON_INIT(),
+       .enable_event           = nhmex_sbox_msr_enable_event,
+       .hw_config              = nhmex_sbox_hw_config,
+       .get_constraint         = uncore_get_constraint,
+       .put_constraint         = uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_sbox = {
+       .name                   = "sbox",
+       .num_counters           = 4,
+       .num_boxes              = 2,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = NHMEX_S0_MSR_PMON_CTL0,
+       .perf_ctr               = NHMEX_S0_MSR_PMON_CTR0,
+       .event_mask             = NHMEX_PMON_RAW_EVENT_MASK,
+       .box_ctl                = NHMEX_S0_MSR_PMON_GLOBAL_CTL,
+       .msr_offset             = NHMEX_S_MSR_OFFSET,
+       .pair_ctr_ctl           = 1,
+       .num_shared_regs        = 1,
+       .ops                    = &nhmex_uncore_sbox_ops,
+       .format_group           = &nhmex_uncore_sbox_format_group
+};
+
+enum {
+       EXTRA_REG_NHMEX_M_FILTER,
+       EXTRA_REG_NHMEX_M_DSP,
+       EXTRA_REG_NHMEX_M_ISS,
+       EXTRA_REG_NHMEX_M_MAP,
+       EXTRA_REG_NHMEX_M_MSC_THR,
+       EXTRA_REG_NHMEX_M_PGT,
+       EXTRA_REG_NHMEX_M_PLD,
+       EXTRA_REG_NHMEX_M_ZDP_CTL_FVC,
+};
+
+static struct extra_reg nhmex_uncore_mbox_extra_regs[] = {
+       MBOX_INC_SEL_EXTAR_REG(0x0, DSP),
+       MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR),
+       MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR),
+       MBOX_INC_SEL_EXTAR_REG(0x9, ISS),
+       /* event 0xa uses two extra registers */
+       MBOX_INC_SEL_EXTAR_REG(0xa, ISS),
+       MBOX_INC_SEL_EXTAR_REG(0xa, PLD),
+       MBOX_INC_SEL_EXTAR_REG(0xb, PLD),
+       /* events 0xd ~ 0x10 use the same extra register */
+       MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC),
+       MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC),
+       MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC),
+       MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC),
+       MBOX_INC_SEL_EXTAR_REG(0x16, PGT),
+       MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP),
+       MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS),
+       MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT),
+       MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP),
+       EVENT_EXTRA_END
+};
+
+/* Nehalem-EX or Westmere-EX ? */
+static bool uncore_nhmex;
+
+static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config)
+{
+       struct intel_uncore_extra_reg *er;
+       unsigned long flags;
+       bool ret = false;
+       u64 mask;
+
+       if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+               er = &box->shared_regs[idx];
+               raw_spin_lock_irqsave(&er->lock, flags);
+               if (!atomic_read(&er->ref) || er->config == config) {
+                       atomic_inc(&er->ref);
+                       er->config = config;
+                       ret = true;
+               }
+               raw_spin_unlock_irqrestore(&er->lock, flags);
+
+               return ret;
+       }
+       /*
+        * The ZDP_CTL_FVC MSR has 4 fields which are used to control
+        * events 0xd ~ 0x10. Besides these 4 fields, there are additional
+        * fields which are shared.
+        */
+       idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+       if (WARN_ON_ONCE(idx >= 4))
+               return false;
+
+       /* mask of the shared fields */
+       if (uncore_nhmex)
+               mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
+       else
+               mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK;
+       er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+
+       raw_spin_lock_irqsave(&er->lock, flags);
+       /* add mask of the non-shared field if it's in use */
+       if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) {
+               if (uncore_nhmex)
+                       mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+               else
+                       mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+       }
+
+       if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) {
+               atomic_add(1 << (idx * 8), &er->ref);
+               if (uncore_nhmex)
+                       mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
+                               NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+               else
+                       mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK |
+                               WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+               er->config &= ~mask;
+               er->config |= (config & mask);
+               ret = true;
+       }
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+
+       return ret;
+}
+
+static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx)
+{
+       struct intel_uncore_extra_reg *er;
+
+       if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+               er = &box->shared_regs[idx];
+               atomic_dec(&er->ref);
+               return;
+       }
+
+       idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+       er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+       atomic_sub(1 << (idx * 8), &er->ref);
+}
+
+static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
+       u64 config = reg1->config;
+
+       /* get the non-shared control bits and shift them */
+       idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+       if (uncore_nhmex)
+               config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+       else
+               config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+       if (new_idx > orig_idx) {
+               idx = new_idx - orig_idx;
+               config <<= 3 * idx;
+       } else {
+               idx = orig_idx - new_idx;
+               config >>= 3 * idx;
+       }
+
+       /* add the shared control bits back */
+       if (uncore_nhmex)
+               config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+       else
+               config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+       config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+       if (modify) {
+               /* adjust the main event selector */
+               if (new_idx > orig_idx)
+                       hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+               else
+                       hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+               reg1->config = config;
+               reg1->idx = ~0xff | new_idx;
+       }
+       return config;
+}
+
+static struct event_constraint *
+nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+       int i, idx[2], alloc = 0;
+       u64 config1 = reg1->config;
+
+       idx[0] = __BITS_VALUE(reg1->idx, 0, 8);
+       idx[1] = __BITS_VALUE(reg1->idx, 1, 8);
+again:
+       for (i = 0; i < 2; i++) {
+               if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
+                       idx[i] = 0xff;
+
+               if (idx[i] == 0xff)
+                       continue;
+
+               if (!nhmex_mbox_get_shared_reg(box, idx[i],
+                               __BITS_VALUE(config1, i, 32)))
+                       goto fail;
+               alloc |= (0x1 << i);
+       }
+
+       /* for the match/mask registers */
+       if (reg2->idx != EXTRA_REG_NONE &&
+           (uncore_box_is_fake(box) || !reg2->alloc) &&
+           !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config))
+               goto fail;
+
+       /*
+        * If it's a fake box -- as per validate_{group,event}() we
+        * shouldn't touch event state and we can avoid doing so
+        * since both will only call get_event_constraints() once
+        * on each event, this avoids the need for reg->alloc.
+        */
+       if (!uncore_box_is_fake(box)) {
+               if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8))
+                       nhmex_mbox_alter_er(event, idx[0], true);
+               reg1->alloc |= alloc;
+               if (reg2->idx != EXTRA_REG_NONE)
+                       reg2->alloc = 1;
+       }
+       return NULL;
+fail:
+       if (idx[0] != 0xff && !(alloc & 0x1) &&
+           idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+               /*
+                * events 0xd ~ 0x10 are functional identical, but are
+                * controlled by different fields in the ZDP_CTL_FVC
+                * register. If we failed to take one field, try the
+                * rest 3 choices.
+                */
+               BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff);
+               idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+               idx[0] = (idx[0] + 1) % 4;
+               idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+               if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) {
+                       config1 = nhmex_mbox_alter_er(event, idx[0], false);
+                       goto again;
+               }
+       }
+
+       if (alloc & 0x1)
+               nhmex_mbox_put_shared_reg(box, idx[0]);
+       if (alloc & 0x2)
+               nhmex_mbox_put_shared_reg(box, idx[1]);
+       return &uncore_constraint_empty;
+}
+
+static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+
+       if (uncore_box_is_fake(box))
+               return;
+
+       if (reg1->alloc & 0x1)
+               nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8));
+       if (reg1->alloc & 0x2)
+               nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8));
+       reg1->alloc = 0;
+
+       if (reg2->alloc) {
+               nhmex_mbox_put_shared_reg(box, reg2->idx);
+               reg2->alloc = 0;
+       }
+}
+
+static int nhmex_mbox_extra_reg_idx(struct extra_reg *er)
+{
+       if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+               return er->idx;
+       return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd;
+}
+
+static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct intel_uncore_type *type = box->pmu->type;
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+       struct extra_reg *er;
+       unsigned msr;
+       int reg_idx = 0;
+       /*
+        * The mbox events may require 2 extra MSRs at the most. But only
+        * the lower 32 bits in these MSRs are significant, so we can use
+        * config1 to pass two MSRs' config.
+        */
+       for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) {
+               if (er->event != (event->hw.config & er->config_mask))
+                       continue;
+               if (event->attr.config1 & ~er->valid_mask)
+                       return -EINVAL;
+
+               msr = er->msr + type->msr_offset * box->pmu->pmu_idx;
+               if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff))
+                       return -EINVAL;
+
+               /* always use the 32~63 bits to pass the PLD config */
+               if (er->idx == EXTRA_REG_NHMEX_M_PLD)
+                       reg_idx = 1;
+               else if (WARN_ON_ONCE(reg_idx > 0))
+                       return -EINVAL;
+
+               reg1->idx &= ~(0xff << (reg_idx * 8));
+               reg1->reg &= ~(0xffff << (reg_idx * 16));
+               reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8);
+               reg1->reg |= msr << (reg_idx * 16);
+               reg1->config = event->attr.config1;
+               reg_idx++;
+       }
+       /*
+        * The mbox only provides ability to perform address matching
+        * for the PLD events.
+        */
+       if (reg_idx == 2) {
+               reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
+               if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
+                       reg2->config = event->attr.config2;
+               else
+                       reg2->config = ~0ULL;
+               if (box->pmu->pmu_idx == 0)
+                       reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
+               else
+                       reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
+       }
+       return 0;
+}
+
+static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+       struct intel_uncore_extra_reg *er;
+       unsigned long flags;
+       u64 config;
+
+       if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+               return box->shared_regs[idx].config;
+
+       er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+       raw_spin_lock_irqsave(&er->lock, flags);
+       config = er->config;
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+       return config;
+}
+
+static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+       int idx;
+
+       idx = __BITS_VALUE(reg1->idx, 0, 8);
+       if (idx != 0xff)
+               wrmsrl(__BITS_VALUE(reg1->reg, 0, 16),
+                       nhmex_mbox_shared_reg_config(box, idx));
+       idx = __BITS_VALUE(reg1->idx, 1, 8);
+       if (idx != 0xff)
+               wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
+                       nhmex_mbox_shared_reg_config(box, idx));
+
+       if (reg2->idx != EXTRA_REG_NONE) {
+               wrmsrl(reg2->reg, 0);
+               if (reg2->config != ~0ULL) {
+                       wrmsrl(reg2->reg + 1,
+                               reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
+                       wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
+                               (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
+                       wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
+               }
+       }
+
+       wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(count_mode,          count_mode,     "config:2-3");
+DEFINE_UNCORE_FORMAT_ATTR(storage_mode,                storage_mode,   "config:4-5");
+DEFINE_UNCORE_FORMAT_ATTR(wrap_mode,           wrap_mode,      "config:6");
+DEFINE_UNCORE_FORMAT_ATTR(flag_mode,           flag_mode,      "config:7");
+DEFINE_UNCORE_FORMAT_ATTR(inc_sel,             inc_sel,        "config:9-13");
+DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel,                set_flag_sel,   "config:19-21");
+DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en,       filter_cfg_en,  "config2:63");
+DEFINE_UNCORE_FORMAT_ATTR(filter_match,                filter_match,   "config2:0-33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_mask,         filter_mask,    "config2:34-61");
+DEFINE_UNCORE_FORMAT_ATTR(dsp,                 dsp,            "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(thr,                 thr,            "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(fvc,                 fvc,            "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pgt,                 pgt,            "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(map,                 map,            "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(iss,                 iss,            "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pld,                 pld,            "config1:32-63");
+
+static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
+       &format_attr_count_mode.attr,
+       &format_attr_storage_mode.attr,
+       &format_attr_wrap_mode.attr,
+       &format_attr_flag_mode.attr,
+       &format_attr_inc_sel.attr,
+       &format_attr_set_flag_sel.attr,
+       &format_attr_filter_cfg_en.attr,
+       &format_attr_filter_match.attr,
+       &format_attr_filter_mask.attr,
+       &format_attr_dsp.attr,
+       &format_attr_thr.attr,
+       &format_attr_fvc.attr,
+       &format_attr_pgt.attr,
+       &format_attr_map.attr,
+       &format_attr_iss.attr,
+       &format_attr_pld.attr,
+       NULL,
+};
+
+static struct attribute_group nhmex_uncore_mbox_format_group = {
+       .name           = "format",
+       .attrs          = nhmex_uncore_mbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_mbox_events[] = {
+       INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"),
+       INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"),
+       { /* end: all zeroes */ },
+};
+
+static struct uncore_event_desc wsmex_uncore_mbox_events[] = {
+       INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"),
+       INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"),
+       { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_mbox_ops = {
+       NHMEX_UNCORE_OPS_COMMON_INIT(),
+       .enable_event   = nhmex_mbox_msr_enable_event,
+       .hw_config      = nhmex_mbox_hw_config,
+       .get_constraint = nhmex_mbox_get_constraint,
+       .put_constraint = nhmex_mbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_mbox = {
+       .name                   = "mbox",
+       .num_counters           = 6,
+       .num_boxes              = 2,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = NHMEX_M0_MSR_PMU_CTL0,
+       .perf_ctr               = NHMEX_M0_MSR_PMU_CNT0,
+       .event_mask             = NHMEX_M_PMON_RAW_EVENT_MASK,
+       .box_ctl                = NHMEX_M0_MSR_GLOBAL_CTL,
+       .msr_offset             = NHMEX_M_MSR_OFFSET,
+       .pair_ctr_ctl           = 1,
+       .num_shared_regs        = 8,
+       .event_descs            = nhmex_uncore_mbox_events,
+       .ops                    = &nhmex_uncore_mbox_ops,
+       .format_group           = &nhmex_uncore_mbox_format_group,
+};
+
+static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+       /* adjust the main event selector and extra register index */
+       if (reg1->idx % 2) {
+               reg1->idx--;
+               hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+       } else {
+               reg1->idx++;
+               hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+       }
+
+       /* adjust extra register config */
+       switch (reg1->idx % 6) {
+       case 2:
+               /* shift the 8~15 bits to the 0~7 bits */
+               reg1->config >>= 8;
+               break;
+       case 3:
+               /* shift the 0~7 bits to the 8~15 bits */
+               reg1->config <<= 8;
+               break;
+       }
+}
+
+/*
+ * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7.
+ * An event set consists of 6 events, the 3rd and 4th events in
+ * an event set use the same extra register. So an event set uses
+ * 5 extra registers.
+ */
+static struct event_constraint *
+nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+       struct intel_uncore_extra_reg *er;
+       unsigned long flags;
+       int idx, er_idx;
+       u64 config1;
+       bool ok = false;
+
+       if (!uncore_box_is_fake(box) && reg1->alloc)
+               return NULL;
+
+       idx = reg1->idx % 6;
+       config1 = reg1->config;
+again:
+       er_idx = idx;
+       /* the 3rd and 4th events use the same extra register */
+       if (er_idx > 2)
+               er_idx--;
+       er_idx += (reg1->idx / 6) * 5;
+
+       er = &box->shared_regs[er_idx];
+       raw_spin_lock_irqsave(&er->lock, flags);
+       if (idx < 2) {
+               if (!atomic_read(&er->ref) || er->config == reg1->config) {
+                       atomic_inc(&er->ref);
+                       er->config = reg1->config;
+                       ok = true;
+               }
+       } else if (idx == 2 || idx == 3) {
+               /*
+                * these two events use different fields in a extra register,
+                * the 0~7 bits and the 8~15 bits respectively.
+                */
+               u64 mask = 0xff << ((idx - 2) * 8);
+               if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) ||
+                               !((er->config ^ config1) & mask)) {
+                       atomic_add(1 << ((idx - 2) * 8), &er->ref);
+                       er->config &= ~mask;
+                       er->config |= config1 & mask;
+                       ok = true;
+               }
+       } else {
+               if (!atomic_read(&er->ref) ||
+                               (er->config == (hwc->config >> 32) &&
+                                er->config1 == reg1->config &&
+                                er->config2 == reg2->config)) {
+                       atomic_inc(&er->ref);
+                       er->config = (hwc->config >> 32);
+                       er->config1 = reg1->config;
+                       er->config2 = reg2->config;
+                       ok = true;
+               }
+       }
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+
+       if (!ok) {
+               /*
+                * The Rbox events are always in pairs. The paired
+                * events are functional identical, but use different
+                * extra registers. If we failed to take an extra
+                * register, try the alternative.
+                */
+               idx ^= 1;
+               if (idx != reg1->idx % 6) {
+                       if (idx == 2)
+                               config1 >>= 8;
+                       else if (idx == 3)
+                               config1 <<= 8;
+                       goto again;
+               }
+       } else {
+               if (!uncore_box_is_fake(box)) {
+                       if (idx != reg1->idx % 6)
+                               nhmex_rbox_alter_er(box, event);
+                       reg1->alloc = 1;
+               }
+               return NULL;
+       }
+       return &uncore_constraint_empty;
+}
+
+static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct intel_uncore_extra_reg *er;
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       int idx, er_idx;
+
+       if (uncore_box_is_fake(box) || !reg1->alloc)
+               return;
+
+       idx = reg1->idx % 6;
+       er_idx = idx;
+       if (er_idx > 2)
+               er_idx--;
+       er_idx += (reg1->idx / 6) * 5;
+
+       er = &box->shared_regs[er_idx];
+       if (idx == 2 || idx == 3)
+               atomic_sub(1 << ((idx - 2) * 8), &er->ref);
+       else
+               atomic_dec(&er->ref);
+
+       reg1->alloc = 0;
+}
+
+static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+       int idx;
+
+       idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >>
+               NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+       if (idx >= 0x18)
+               return -EINVAL;
+
+       reg1->idx = idx;
+       reg1->config = event->attr.config1;
+
+       switch (idx % 6) {
+       case 4:
+       case 5:
+               hwc->config |= event->attr.config & (~0ULL << 32);
+               reg2->config = event->attr.config2;
+               break;
+       }
+       return 0;
+}
+
+static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+       int idx, port;
+
+       idx = reg1->idx;
+       port = idx / 6 + box->pmu->pmu_idx * 4;
+
+       switch (idx % 6) {
+       case 0:
+               wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config);
+               break;
+       case 1:
+               wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config);
+               break;
+       case 2:
+       case 3:
+               wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port),
+                       uncore_shared_reg_config(box, 2 + (idx / 6) * 5));
+               break;
+       case 4:
+               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port),
+                       hwc->config >> 32);
+               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config);
+               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config);
+               break;
+       case 5:
+               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port),
+                       hwc->config >> 32);
+               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config);
+               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config);
+               break;
+       }
+
+       wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+               (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15");
+DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31");
+
+static struct attribute *nhmex_uncore_rbox_formats_attr[] = {
+       &format_attr_event5.attr,
+       &format_attr_xbr_mm_cfg.attr,
+       &format_attr_xbr_match.attr,
+       &format_attr_xbr_mask.attr,
+       &format_attr_qlx_cfg.attr,
+       &format_attr_iperf_cfg.attr,
+       NULL,
+};
+
+static struct attribute_group nhmex_uncore_rbox_format_group = {
+       .name = "format",
+       .attrs = nhmex_uncore_rbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_rbox_events[] = {
+       INTEL_UNCORE_EVENT_DESC(qpi0_flit_send,         "event=0x0,iperf_cfg=0x80000000"),
+       INTEL_UNCORE_EVENT_DESC(qpi1_filt_send,         "event=0x6,iperf_cfg=0x80000000"),
+       INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt,         "event=0x0,iperf_cfg=0x40000000"),
+       INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt,         "event=0x6,iperf_cfg=0x40000000"),
+       INTEL_UNCORE_EVENT_DESC(qpi0_date_response,     "event=0x0,iperf_cfg=0xc4"),
+       INTEL_UNCORE_EVENT_DESC(qpi1_date_response,     "event=0x6,iperf_cfg=0xc4"),
+       { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_rbox_ops = {
+       NHMEX_UNCORE_OPS_COMMON_INIT(),
+       .enable_event           = nhmex_rbox_msr_enable_event,
+       .hw_config              = nhmex_rbox_hw_config,
+       .get_constraint         = nhmex_rbox_get_constraint,
+       .put_constraint         = nhmex_rbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_rbox = {
+       .name                   = "rbox",
+       .num_counters           = 8,
+       .num_boxes              = 2,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = NHMEX_R_MSR_PMON_CTL0,
+       .perf_ctr               = NHMEX_R_MSR_PMON_CNT0,
+       .event_mask             = NHMEX_R_PMON_RAW_EVENT_MASK,
+       .box_ctl                = NHMEX_R_MSR_GLOBAL_CTL,
+       .msr_offset             = NHMEX_R_MSR_OFFSET,
+       .pair_ctr_ctl           = 1,
+       .num_shared_regs        = 20,
+       .event_descs            = nhmex_uncore_rbox_events,
+       .ops                    = &nhmex_uncore_rbox_ops,
+       .format_group           = &nhmex_uncore_rbox_format_group
+};
+
+static struct intel_uncore_type *nhmex_msr_uncores[] = {
+       &nhmex_uncore_ubox,
+       &nhmex_uncore_cbox,
+       &nhmex_uncore_bbox,
+       &nhmex_uncore_sbox,
+       &nhmex_uncore_mbox,
+       &nhmex_uncore_rbox,
+       &nhmex_uncore_wbox,
+       NULL,
+};
+
+void nhmex_uncore_cpu_init(void)
+{
+       if (boot_cpu_data.x86_model == 46)
+               uncore_nhmex = true;
+       else
+               nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
+       if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+               nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+       uncore_msr_uncores = nhmex_msr_uncores;
+}
+/* end of Nehalem-EX uncore support */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
new file mode 100644 (file)
index 0000000..96531d2
--- /dev/null
@@ -0,0 +1,731 @@
+/* Nehalem/SandBridge/Haswell uncore support */
+#include "uncore.h"
+
+/* Uncore IMC PCI IDs */
+#define PCI_DEVICE_ID_INTEL_SNB_IMC    0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC    0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC  0x0a04
+#define PCI_DEVICE_ID_INTEL_BDW_IMC    0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_IMC    0x191f
+
+/* SNB event control */
+#define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
+#define SNB_UNC_CTL_UMASK_MASK                 0x0000ff00
+#define SNB_UNC_CTL_EDGE_DET                   (1 << 18)
+#define SNB_UNC_CTL_EN                         (1 << 22)
+#define SNB_UNC_CTL_INVERT                     (1 << 23)
+#define SNB_UNC_CTL_CMASK_MASK                 0x1f000000
+#define NHM_UNC_CTL_CMASK_MASK                 0xff000000
+#define NHM_UNC_FIXED_CTR_CTL_EN               (1 << 0)
+
+#define SNB_UNC_RAW_EVENT_MASK                 (SNB_UNC_CTL_EV_SEL_MASK | \
+                                                SNB_UNC_CTL_UMASK_MASK | \
+                                                SNB_UNC_CTL_EDGE_DET | \
+                                                SNB_UNC_CTL_INVERT | \
+                                                SNB_UNC_CTL_CMASK_MASK)
+
+#define NHM_UNC_RAW_EVENT_MASK                 (SNB_UNC_CTL_EV_SEL_MASK | \
+                                                SNB_UNC_CTL_UMASK_MASK | \
+                                                SNB_UNC_CTL_EDGE_DET | \
+                                                SNB_UNC_CTL_INVERT | \
+                                                NHM_UNC_CTL_CMASK_MASK)
+
+/* SNB global control register */
+#define SNB_UNC_PERF_GLOBAL_CTL                 0x391
+#define SNB_UNC_FIXED_CTR_CTRL                  0x394
+#define SNB_UNC_FIXED_CTR                       0x395
+
+/* SNB uncore global control */
+#define SNB_UNC_GLOBAL_CTL_CORE_ALL             ((1 << 4) - 1)
+#define SNB_UNC_GLOBAL_CTL_EN                   (1 << 29)
+
+/* SNB Cbo register */
+#define SNB_UNC_CBO_0_PERFEVTSEL0               0x700
+#define SNB_UNC_CBO_0_PER_CTR0                  0x706
+#define SNB_UNC_CBO_MSR_OFFSET                  0x10
+
+/* SNB ARB register */
+#define SNB_UNC_ARB_PER_CTR0                   0x3b0
+#define SNB_UNC_ARB_PERFEVTSEL0                        0x3b2
+#define SNB_UNC_ARB_MSR_OFFSET                 0x10
+
+/* NHM global control register */
+#define NHM_UNC_PERF_GLOBAL_CTL                 0x391
+#define NHM_UNC_FIXED_CTR                       0x394
+#define NHM_UNC_FIXED_CTR_CTRL                  0x395
+
+/* NHM uncore global control */
+#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL            ((1ULL << 8) - 1)
+#define NHM_UNC_GLOBAL_CTL_EN_FC                (1ULL << 32)
+
+/* NHM uncore register */
+#define NHM_UNC_PERFEVTSEL0                     0x3c0
+#define NHM_UNC_UNCORE_PMC0                     0x3b0
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
+
+/* Sandy Bridge uncore support */
+static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+               wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+       else
+               wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
+}
+
+static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       wrmsrl(event->hw.config_base, 0);
+}
+
+static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+       if (box->pmu->pmu_idx == 0) {
+               wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+                       SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+       }
+}
+
+static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+       if (box->pmu->pmu_idx == 0)
+               wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static struct uncore_event_desc snb_uncore_events[] = {
+       INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+       { /* end: all zeroes */ },
+};
+
+static struct attribute *snb_uncore_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask5.attr,
+       NULL,
+};
+
+static struct attribute_group snb_uncore_format_group = {
+       .name           = "format",
+       .attrs          = snb_uncore_formats_attr,
+};
+
+static struct intel_uncore_ops snb_uncore_msr_ops = {
+       .init_box       = snb_uncore_msr_init_box,
+       .exit_box       = snb_uncore_msr_exit_box,
+       .disable_event  = snb_uncore_msr_disable_event,
+       .enable_event   = snb_uncore_msr_enable_event,
+       .read_counter   = uncore_msr_read_counter,
+};
+
+static struct event_constraint snb_uncore_arb_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snb_uncore_cbox = {
+       .name           = "cbox",
+       .num_counters   = 2,
+       .num_boxes      = 4,
+       .perf_ctr_bits  = 44,
+       .fixed_ctr_bits = 48,
+       .perf_ctr       = SNB_UNC_CBO_0_PER_CTR0,
+       .event_ctl      = SNB_UNC_CBO_0_PERFEVTSEL0,
+       .fixed_ctr      = SNB_UNC_FIXED_CTR,
+       .fixed_ctl      = SNB_UNC_FIXED_CTR_CTRL,
+       .single_fixed   = 1,
+       .event_mask     = SNB_UNC_RAW_EVENT_MASK,
+       .msr_offset     = SNB_UNC_CBO_MSR_OFFSET,
+       .ops            = &snb_uncore_msr_ops,
+       .format_group   = &snb_uncore_format_group,
+       .event_descs    = snb_uncore_events,
+};
+
+static struct intel_uncore_type snb_uncore_arb = {
+       .name           = "arb",
+       .num_counters   = 2,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 44,
+       .perf_ctr       = SNB_UNC_ARB_PER_CTR0,
+       .event_ctl      = SNB_UNC_ARB_PERFEVTSEL0,
+       .event_mask     = SNB_UNC_RAW_EVENT_MASK,
+       .msr_offset     = SNB_UNC_ARB_MSR_OFFSET,
+       .constraints    = snb_uncore_arb_constraints,
+       .ops            = &snb_uncore_msr_ops,
+       .format_group   = &snb_uncore_format_group,
+};
+
+static struct intel_uncore_type *snb_msr_uncores[] = {
+       &snb_uncore_cbox,
+       &snb_uncore_arb,
+       NULL,
+};
+
+void snb_uncore_cpu_init(void)
+{
+       uncore_msr_uncores = snb_msr_uncores;
+       if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+               snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+}
+
+enum {
+       SNB_PCI_UNCORE_IMC,
+};
+
+static struct uncore_event_desc snb_uncore_imc_events[] = {
+       INTEL_UNCORE_EVENT_DESC(data_reads,  "event=0x01"),
+       INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
+       INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
+
+       INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
+       INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
+       INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
+
+       { /* end: all zeroes */ },
+};
+
+#define SNB_UNCORE_PCI_IMC_EVENT_MASK          0xff
+#define SNB_UNCORE_PCI_IMC_BAR_OFFSET          0x48
+
+/* page size multiple covering all config regs */
+#define SNB_UNCORE_PCI_IMC_MAP_SIZE            0x6000
+
+#define SNB_UNCORE_PCI_IMC_DATA_READS          0x1
+#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE     0x5050
+#define SNB_UNCORE_PCI_IMC_DATA_WRITES         0x2
+#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE    0x5054
+#define SNB_UNCORE_PCI_IMC_CTR_BASE            SNB_UNCORE_PCI_IMC_DATA_READS_BASE
+
+static struct attribute *snb_uncore_imc_formats_attr[] = {
+       &format_attr_event.attr,
+       NULL,
+};
+
+static struct attribute_group snb_uncore_imc_format_group = {
+       .name = "format",
+       .attrs = snb_uncore_imc_formats_attr,
+};
+
+static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
+       resource_size_t addr;
+       u32 pci_dword;
+
+       pci_read_config_dword(pdev, where, &pci_dword);
+       addr = pci_dword;
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+       pci_read_config_dword(pdev, where + 4, &pci_dword);
+       addr |= ((resource_size_t)pci_dword << 32);
+#endif
+
+       addr &= ~(PAGE_SIZE - 1);
+
+       box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
+       box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
+}
+
+static void snb_uncore_imc_exit_box(struct intel_uncore_box *box)
+{
+       iounmap(box->io_addr);
+}
+
+static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
+{}
+
+static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
+{}
+
+static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{}
+
+static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{}
+
+static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
+}
+
+/*
+ * custom event_init() function because we define our own fixed, free
+ * running counters, so we do not want to conflict with generic uncore
+ * logic. Also simplifies processing
+ */
+static int snb_uncore_imc_event_init(struct perf_event *event)
+{
+       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_box *box;
+       struct hw_perf_event *hwc = &event->hw;
+       u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
+       int idx, base;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       pmu = uncore_event_to_pmu(event);
+       /* no device found for this pmu */
+       if (pmu->func_id < 0)
+               return -ENOENT;
+
+       /* Sampling not supported yet */
+       if (hwc->sample_period)
+               return -EINVAL;
+
+       /* unsupported modes and filters */
+       if (event->attr.exclude_user   ||
+           event->attr.exclude_kernel ||
+           event->attr.exclude_hv     ||
+           event->attr.exclude_idle   ||
+           event->attr.exclude_host   ||
+           event->attr.exclude_guest  ||
+           event->attr.sample_period) /* no sampling */
+               return -EINVAL;
+
+       /*
+        * Place all uncore events for a particular physical package
+        * onto a single cpu
+        */
+       if (event->cpu < 0)
+               return -EINVAL;
+
+       /* check only supported bits are set */
+       if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
+               return -EINVAL;
+
+       box = uncore_pmu_to_box(pmu, event->cpu);
+       if (!box || box->cpu < 0)
+               return -EINVAL;
+
+       event->cpu = box->cpu;
+       event->pmu_private = box;
+
+       event->hw.idx = -1;
+       event->hw.last_tag = ~0ULL;
+       event->hw.extra_reg.idx = EXTRA_REG_NONE;
+       event->hw.branch_reg.idx = EXTRA_REG_NONE;
+       /*
+        * check event is known (whitelist, determines counter)
+        */
+       switch (cfg) {
+       case SNB_UNCORE_PCI_IMC_DATA_READS:
+               base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
+               idx = UNCORE_PMC_IDX_FIXED;
+               break;
+       case SNB_UNCORE_PCI_IMC_DATA_WRITES:
+               base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
+               idx = UNCORE_PMC_IDX_FIXED + 1;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       /* must be done before validate_group */
+       event->hw.event_base = base;
+       event->hw.config = cfg;
+       event->hw.idx = idx;
+
+       /* no group validation needed, we have free running counters */
+
+       return 0;
+}
+
+static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       return 0;
+}
+
+static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       u64 count;
+
+       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+               return;
+
+       event->hw.state = 0;
+       box->n_active++;
+
+       list_add_tail(&event->active_entry, &box->active_list);
+
+       count = snb_uncore_imc_read_counter(box, event);
+       local64_set(&event->hw.prev_count, count);
+
+       if (box->n_active == 1)
+               uncore_pmu_start_hrtimer(box);
+}
+
+static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (!(hwc->state & PERF_HES_STOPPED)) {
+               box->n_active--;
+
+               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+               hwc->state |= PERF_HES_STOPPED;
+
+               list_del(&event->active_entry);
+
+               if (box->n_active == 0)
+                       uncore_pmu_cancel_hrtimer(box);
+       }
+
+       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               /*
+                * Drain the remaining delta count out of a event
+                * that we are disabling:
+                */
+               uncore_perf_event_update(box, event);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
+}
+
+static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (!box)
+               return -ENODEV;
+
+       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+       if (!(flags & PERF_EF_START))
+               hwc->state |= PERF_HES_ARCH;
+
+       snb_uncore_imc_event_start(event, 0);
+
+       box->n_events++;
+
+       return 0;
+}
+
+static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       int i;
+
+       snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
+
+       for (i = 0; i < box->n_events; i++) {
+               if (event == box->event_list[i]) {
+                       --box->n_events;
+                       break;
+               }
+       }
+}
+
+int snb_pci2phy_map_init(int devid)
+{
+       struct pci_dev *dev = NULL;
+       struct pci2phy_map *map;
+       int bus, segment;
+
+       dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
+       if (!dev)
+               return -ENOTTY;
+
+       bus = dev->bus->number;
+       segment = pci_domain_nr(dev->bus);
+
+       raw_spin_lock(&pci2phy_map_lock);
+       map = __find_pci2phy_map(segment);
+       if (!map) {
+               raw_spin_unlock(&pci2phy_map_lock);
+               pci_dev_put(dev);
+               return -ENOMEM;
+       }
+       map->pbus_to_physid[bus] = 0;
+       raw_spin_unlock(&pci2phy_map_lock);
+
+       pci_dev_put(dev);
+
+       return 0;
+}
+
+static struct pmu snb_uncore_imc_pmu = {
+       .task_ctx_nr    = perf_invalid_context,
+       .event_init     = snb_uncore_imc_event_init,
+       .add            = snb_uncore_imc_event_add,
+       .del            = snb_uncore_imc_event_del,
+       .start          = snb_uncore_imc_event_start,
+       .stop           = snb_uncore_imc_event_stop,
+       .read           = uncore_pmu_event_read,
+};
+
+static struct intel_uncore_ops snb_uncore_imc_ops = {
+       .init_box       = snb_uncore_imc_init_box,
+       .exit_box       = snb_uncore_imc_exit_box,
+       .enable_box     = snb_uncore_imc_enable_box,
+       .disable_box    = snb_uncore_imc_disable_box,
+       .disable_event  = snb_uncore_imc_disable_event,
+       .enable_event   = snb_uncore_imc_enable_event,
+       .hw_config      = snb_uncore_imc_hw_config,
+       .read_counter   = snb_uncore_imc_read_counter,
+};
+
+static struct intel_uncore_type snb_uncore_imc = {
+       .name           = "imc",
+       .num_counters   = 2,
+       .num_boxes      = 1,
+       .fixed_ctr_bits = 32,
+       .fixed_ctr      = SNB_UNCORE_PCI_IMC_CTR_BASE,
+       .event_descs    = snb_uncore_imc_events,
+       .format_group   = &snb_uncore_imc_format_group,
+       .perf_ctr       = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
+       .event_mask     = SNB_UNCORE_PCI_IMC_EVENT_MASK,
+       .ops            = &snb_uncore_imc_ops,
+       .pmu            = &snb_uncore_imc_pmu,
+};
+
+static struct intel_uncore_type *snb_pci_uncores[] = {
+       [SNB_PCI_UNCORE_IMC]    = &snb_uncore_imc,
+       NULL,
+};
+
+static const struct pci_device_id snb_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id ivb_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id hsw_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id bdw_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id skl_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
+static struct pci_driver snb_uncore_pci_driver = {
+       .name           = "snb_uncore",
+       .id_table       = snb_uncore_pci_ids,
+};
+
+static struct pci_driver ivb_uncore_pci_driver = {
+       .name           = "ivb_uncore",
+       .id_table       = ivb_uncore_pci_ids,
+};
+
+static struct pci_driver hsw_uncore_pci_driver = {
+       .name           = "hsw_uncore",
+       .id_table       = hsw_uncore_pci_ids,
+};
+
+static struct pci_driver bdw_uncore_pci_driver = {
+       .name           = "bdw_uncore",
+       .id_table       = bdw_uncore_pci_ids,
+};
+
+static struct pci_driver skl_uncore_pci_driver = {
+       .name           = "skl_uncore",
+       .id_table       = skl_uncore_pci_ids,
+};
+
+struct imc_uncore_pci_dev {
+       __u32 pci_id;
+       struct pci_driver *driver;
+};
+#define IMC_DEV(a, d) \
+       { .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) }
+
+static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
+       IMC_DEV(SNB_IMC, &snb_uncore_pci_driver),
+       IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */
+       IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
+       IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
+       IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
+       IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
+       IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
+       {  /* end marker */ }
+};
+
+
+#define for_each_imc_pci_id(x, t) \
+       for (x = (t); (x)->pci_id; x++)
+
+static struct pci_driver *imc_uncore_find_dev(void)
+{
+       const struct imc_uncore_pci_dev *p;
+       int ret;
+
+       for_each_imc_pci_id(p, desktop_imc_pci_ids) {
+               ret = snb_pci2phy_map_init(p->pci_id);
+               if (ret == 0)
+                       return p->driver;
+       }
+       return NULL;
+}
+
+static int imc_uncore_pci_init(void)
+{
+       struct pci_driver *imc_drv = imc_uncore_find_dev();
+
+       if (!imc_drv)
+               return -ENODEV;
+
+       uncore_pci_uncores = snb_pci_uncores;
+       uncore_pci_driver = imc_drv;
+
+       return 0;
+}
+
+int snb_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
+int ivb_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+int hsw_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
+int bdw_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
+int skl_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
+/* end of Sandy Bridge uncore support */
+
+/* Nehalem uncore support */
+static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+       wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+       wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
+}
+
+static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+               wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+       else
+               wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
+}
+
+static struct attribute *nhm_uncore_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask8.attr,
+       NULL,
+};
+
+static struct attribute_group nhm_uncore_format_group = {
+       .name = "format",
+       .attrs = nhm_uncore_formats_attr,
+};
+
+static struct uncore_event_desc nhm_uncore_events[] = {
+       INTEL_UNCORE_EVENT_DESC(clockticks,                "event=0xff,umask=0x00"),
+       INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any,       "event=0x2f,umask=0x0f"),
+       INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any,      "event=0x2c,umask=0x0f"),
+       INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads,     "event=0x20,umask=0x01"),
+       INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes,    "event=0x20,umask=0x02"),
+       INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads,  "event=0x20,umask=0x04"),
+       INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
+       INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads,   "event=0x20,umask=0x10"),
+       INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes,  "event=0x20,umask=0x20"),
+       { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhm_uncore_msr_ops = {
+       .disable_box    = nhm_uncore_msr_disable_box,
+       .enable_box     = nhm_uncore_msr_enable_box,
+       .disable_event  = snb_uncore_msr_disable_event,
+       .enable_event   = nhm_uncore_msr_enable_event,
+       .read_counter   = uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type nhm_uncore = {
+       .name           = "",
+       .num_counters   = 8,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 48,
+       .fixed_ctr_bits = 48,
+       .event_ctl      = NHM_UNC_PERFEVTSEL0,
+       .perf_ctr       = NHM_UNC_UNCORE_PMC0,
+       .fixed_ctr      = NHM_UNC_FIXED_CTR,
+       .fixed_ctl      = NHM_UNC_FIXED_CTR_CTRL,
+       .event_mask     = NHM_UNC_RAW_EVENT_MASK,
+       .event_descs    = nhm_uncore_events,
+       .ops            = &nhm_uncore_msr_ops,
+       .format_group   = &nhm_uncore_format_group,
+};
+
+static struct intel_uncore_type *nhm_msr_uncores[] = {
+       &nhm_uncore,
+       NULL,
+};
+
+void nhm_uncore_cpu_init(void)
+{
+       uncore_msr_uncores = nhm_msr_uncores;
+}
+
+/* end of Nehalem uncore support */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
new file mode 100644 (file)
index 0000000..93f6bd9
--- /dev/null
@@ -0,0 +1,3135 @@
+/* SandyBridge-EP/IvyTown uncore support */
+#include "uncore.h"
+
+/* SNB-EP Box level control */
+#define SNBEP_PMON_BOX_CTL_RST_CTRL    (1 << 0)
+#define SNBEP_PMON_BOX_CTL_RST_CTRS    (1 << 1)
+#define SNBEP_PMON_BOX_CTL_FRZ         (1 << 8)
+#define SNBEP_PMON_BOX_CTL_FRZ_EN      (1 << 16)
+#define SNBEP_PMON_BOX_CTL_INT         (SNBEP_PMON_BOX_CTL_RST_CTRL | \
+                                        SNBEP_PMON_BOX_CTL_RST_CTRS | \
+                                        SNBEP_PMON_BOX_CTL_FRZ_EN)
+/* SNB-EP event control */
+#define SNBEP_PMON_CTL_EV_SEL_MASK     0x000000ff
+#define SNBEP_PMON_CTL_UMASK_MASK      0x0000ff00
+#define SNBEP_PMON_CTL_RST             (1 << 17)
+#define SNBEP_PMON_CTL_EDGE_DET                (1 << 18)
+#define SNBEP_PMON_CTL_EV_SEL_EXT      (1 << 21)
+#define SNBEP_PMON_CTL_EN              (1 << 22)
+#define SNBEP_PMON_CTL_INVERT          (1 << 23)
+#define SNBEP_PMON_CTL_TRESH_MASK      0xff000000
+#define SNBEP_PMON_RAW_EVENT_MASK      (SNBEP_PMON_CTL_EV_SEL_MASK | \
+                                        SNBEP_PMON_CTL_UMASK_MASK | \
+                                        SNBEP_PMON_CTL_EDGE_DET | \
+                                        SNBEP_PMON_CTL_INVERT | \
+                                        SNBEP_PMON_CTL_TRESH_MASK)
+
+/* SNB-EP Ubox event control */
+#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK                0x1f000000
+#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK                \
+                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
+                                SNBEP_PMON_CTL_UMASK_MASK | \
+                                SNBEP_PMON_CTL_EDGE_DET | \
+                                SNBEP_PMON_CTL_INVERT | \
+                                SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
+
+#define SNBEP_CBO_PMON_CTL_TID_EN              (1 << 19)
+#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK      (SNBEP_PMON_RAW_EVENT_MASK | \
+                                                SNBEP_CBO_PMON_CTL_TID_EN)
+
+/* SNB-EP PCU event control */
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK    0x0000c000
+#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK      0x1f000000
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT      (1 << 30)
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET    (1 << 31)
+#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK      \
+                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+                                SNBEP_PMON_CTL_EDGE_DET | \
+                                SNBEP_PMON_CTL_EV_SEL_EXT | \
+                                SNBEP_PMON_CTL_INVERT | \
+                                SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+
+#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK      \
+                               (SNBEP_PMON_RAW_EVENT_MASK | \
+                                SNBEP_PMON_CTL_EV_SEL_EXT)
+
+/* SNB-EP pci control register */
+#define SNBEP_PCI_PMON_BOX_CTL                 0xf4
+#define SNBEP_PCI_PMON_CTL0                    0xd8
+/* SNB-EP pci counter register */
+#define SNBEP_PCI_PMON_CTR0                    0xa0
+
+/* SNB-EP home agent register */
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0       0x40
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1       0x44
+#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH      0x48
+/* SNB-EP memory controller register */
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL                0xf0
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR                0xd0
+/* SNB-EP QPI register */
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0         0x228
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1         0x22c
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0          0x238
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1          0x23c
+
+/* SNB-EP Ubox register */
+#define SNBEP_U_MSR_PMON_CTR0                  0xc16
+#define SNBEP_U_MSR_PMON_CTL0                  0xc10
+
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL                0xc08
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR                0xc09
+
+/* SNB-EP Cbo register */
+#define SNBEP_C0_MSR_PMON_CTR0                 0xd16
+#define SNBEP_C0_MSR_PMON_CTL0                 0xd10
+#define SNBEP_C0_MSR_PMON_BOX_CTL              0xd04
+#define SNBEP_C0_MSR_PMON_BOX_FILTER           0xd14
+#define SNBEP_CBO_MSR_OFFSET                   0x20
+
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID      0x1f
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID      0x3fc00
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE    0x7c0000
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC      0xff800000
+
+#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) {   \
+       .event = (e),                           \
+       .msr = SNBEP_C0_MSR_PMON_BOX_FILTER,    \
+       .config_mask = (m),                     \
+       .idx = (i)                              \
+}
+
+/* SNB-EP PCU register */
+#define SNBEP_PCU_MSR_PMON_CTR0                        0xc36
+#define SNBEP_PCU_MSR_PMON_CTL0                        0xc30
+#define SNBEP_PCU_MSR_PMON_BOX_CTL             0xc24
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER          0xc34
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK     0xffffffff
+#define SNBEP_PCU_MSR_CORE_C3_CTR              0x3fc
+#define SNBEP_PCU_MSR_CORE_C6_CTR              0x3fd
+
+/* IVBEP event control */
+#define IVBEP_PMON_BOX_CTL_INT         (SNBEP_PMON_BOX_CTL_RST_CTRL | \
+                                        SNBEP_PMON_BOX_CTL_RST_CTRS)
+#define IVBEP_PMON_RAW_EVENT_MASK              (SNBEP_PMON_CTL_EV_SEL_MASK | \
+                                        SNBEP_PMON_CTL_UMASK_MASK | \
+                                        SNBEP_PMON_CTL_EDGE_DET | \
+                                        SNBEP_PMON_CTL_TRESH_MASK)
+/* IVBEP Ubox */
+#define IVBEP_U_MSR_PMON_GLOBAL_CTL            0xc00
+#define IVBEP_U_PMON_GLOBAL_FRZ_ALL            (1 << 31)
+#define IVBEP_U_PMON_GLOBAL_UNFRZ_ALL          (1 << 29)
+
+#define IVBEP_U_MSR_PMON_RAW_EVENT_MASK        \
+                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
+                                SNBEP_PMON_CTL_UMASK_MASK | \
+                                SNBEP_PMON_CTL_EDGE_DET | \
+                                SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
+/* IVBEP Cbo */
+#define IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK              (IVBEP_PMON_RAW_EVENT_MASK | \
+                                                SNBEP_CBO_PMON_CTL_TID_EN)
+
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_TID              (0x1fULL << 0)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK     (0xfULL << 5)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE    (0x3fULL << 17)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NID              (0xffffULL << 32)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC              (0x1ffULL << 52)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_C6               (0x1ULL << 61)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NC               (0x1ULL << 62)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC     (0x1ULL << 63)
+
+/* IVBEP home agent */
+#define IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST                (1 << 16)
+#define IVBEP_HA_PCI_PMON_RAW_EVENT_MASK               \
+                               (IVBEP_PMON_RAW_EVENT_MASK | \
+                                IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST)
+/* IVBEP PCU */
+#define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK      \
+                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
+                                SNBEP_PMON_CTL_EV_SEL_EXT | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+                                SNBEP_PMON_CTL_EDGE_DET | \
+                                SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+/* IVBEP QPI */
+#define IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK      \
+                               (IVBEP_PMON_RAW_EVENT_MASK | \
+                                SNBEP_PMON_CTL_EV_SEL_EXT)
+
+#define __BITS_VALUE(x, i, n)  ((typeof(x))(((x) >> ((i) * (n))) & \
+                               ((1ULL << (n)) - 1)))
+
+/* Haswell-EP Ubox */
+#define HSWEP_U_MSR_PMON_CTR0                  0x709
+#define HSWEP_U_MSR_PMON_CTL0                  0x705
+#define HSWEP_U_MSR_PMON_FILTER                        0x707
+
+#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL                0x703
+#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTR                0x704
+
+#define HSWEP_U_MSR_PMON_BOX_FILTER_TID                (0x1 << 0)
+#define HSWEP_U_MSR_PMON_BOX_FILTER_CID                (0x1fULL << 1)
+#define HSWEP_U_MSR_PMON_BOX_FILTER_MASK \
+                                       (HSWEP_U_MSR_PMON_BOX_FILTER_TID | \
+                                        HSWEP_U_MSR_PMON_BOX_FILTER_CID)
+
+/* Haswell-EP CBo */
+#define HSWEP_C0_MSR_PMON_CTR0                 0xe08
+#define HSWEP_C0_MSR_PMON_CTL0                 0xe01
+#define HSWEP_C0_MSR_PMON_BOX_CTL                      0xe00
+#define HSWEP_C0_MSR_PMON_BOX_FILTER0          0xe05
+#define HSWEP_CBO_MSR_OFFSET                   0x10
+
+
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_TID              (0x3fULL << 0)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK     (0xfULL << 6)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE    (0x7fULL << 17)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NID              (0xffffULL << 32)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC              (0x1ffULL << 52)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_C6               (0x1ULL << 61)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NC               (0x1ULL << 62)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC     (0x1ULL << 63)
+
+
+/* Haswell-EP Sbox */
+#define HSWEP_S0_MSR_PMON_CTR0                 0x726
+#define HSWEP_S0_MSR_PMON_CTL0                 0x721
+#define HSWEP_S0_MSR_PMON_BOX_CTL                      0x720
+#define HSWEP_SBOX_MSR_OFFSET                  0xa
+#define HSWEP_S_MSR_PMON_RAW_EVENT_MASK                (SNBEP_PMON_RAW_EVENT_MASK | \
+                                                SNBEP_CBO_PMON_CTL_TID_EN)
+
+/* Haswell-EP PCU */
+#define HSWEP_PCU_MSR_PMON_CTR0                        0x717
+#define HSWEP_PCU_MSR_PMON_CTL0                        0x711
+#define HSWEP_PCU_MSR_PMON_BOX_CTL             0x710
+#define HSWEP_PCU_MSR_PMON_BOX_FILTER          0x715
+
+/* KNL Ubox */
+#define KNL_U_MSR_PMON_RAW_EVENT_MASK \
+                                       (SNBEP_U_MSR_PMON_RAW_EVENT_MASK | \
+                                               SNBEP_CBO_PMON_CTL_TID_EN)
+/* KNL CHA */
+#define KNL_CHA_MSR_OFFSET                     0xc
+#define KNL_CHA_MSR_PMON_CTL_QOR               (1 << 16)
+#define KNL_CHA_MSR_PMON_RAW_EVENT_MASK \
+                                       (SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK | \
+                                        KNL_CHA_MSR_PMON_CTL_QOR)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_TID                0x1ff
+#define KNL_CHA_MSR_PMON_BOX_FILTER_STATE      (7 << 18)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_OP         (0xfffffe2aULL << 32)
+
+/* KNL EDC/MC UCLK */
+#define KNL_UCLK_MSR_PMON_CTR0_LOW             0x400
+#define KNL_UCLK_MSR_PMON_CTL0                 0x420
+#define KNL_UCLK_MSR_PMON_BOX_CTL              0x430
+#define KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW       0x44c
+#define KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL       0x454
+#define KNL_PMON_FIXED_CTL_EN                  0x1
+
+/* KNL EDC */
+#define KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW                0xa00
+#define KNL_EDC0_ECLK_MSR_PMON_CTL0            0xa20
+#define KNL_EDC0_ECLK_MSR_PMON_BOX_CTL         0xa30
+#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW  0xa3c
+#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL  0xa44
+
+/* KNL MC */
+#define KNL_MC0_CH0_MSR_PMON_CTR0_LOW          0xb00
+#define KNL_MC0_CH0_MSR_PMON_CTL0              0xb20
+#define KNL_MC0_CH0_MSR_PMON_BOX_CTL           0xb30
+#define KNL_MC0_CH0_MSR_PMON_FIXED_LOW         0xb3c
+#define KNL_MC0_CH0_MSR_PMON_FIXED_CTL         0xb44
+
+/* KNL IRP */
+#define KNL_IRP_PCI_PMON_BOX_CTL               0xf0
+#define KNL_IRP_PCI_PMON_RAW_EVENT_MASK                (SNBEP_PMON_RAW_EVENT_MASK | \
+                                                KNL_CHA_MSR_PMON_CTL_QOR)
+/* KNL PCU */
+#define KNL_PCU_PMON_CTL_EV_SEL_MASK           0x0000007f
+#define KNL_PCU_PMON_CTL_USE_OCC_CTR           (1 << 7)
+#define KNL_PCU_MSR_PMON_CTL_TRESH_MASK                0x3f000000
+#define KNL_PCU_MSR_PMON_RAW_EVENT_MASK        \
+                               (KNL_PCU_PMON_CTL_EV_SEL_MASK | \
+                                KNL_PCU_PMON_CTL_USE_OCC_CTR | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+                                SNBEP_PMON_CTL_EDGE_DET | \
+                                SNBEP_CBO_PMON_CTL_TID_EN | \
+                                SNBEP_PMON_CTL_EV_SEL_EXT | \
+                                SNBEP_PMON_CTL_INVERT | \
+                                KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
+DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
+DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29");
+DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
+DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
+DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
+DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20");
+DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62");
+DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61");
+DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31");
+DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
+DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
+
+static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       int box_ctl = uncore_pci_box_ctl(box);
+       u32 config = 0;
+
+       if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+               config |= SNBEP_PMON_BOX_CTL_FRZ;
+               pci_write_config_dword(pdev, box_ctl, config);
+       }
+}
+
+static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       int box_ctl = uncore_pci_box_ctl(box);
+       u32 config = 0;
+
+       if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+               config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+               pci_write_config_dword(pdev, box_ctl, config);
+       }
+}
+
+static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+
+       pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+
+       pci_write_config_dword(pdev, hwc->config_base, hwc->config);
+}
+
+static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+       u64 count = 0;
+
+       pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
+       pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
+
+       return count;
+}
+
+static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       int box_ctl = uncore_pci_box_ctl(box);
+
+       pci_write_config_dword(pdev, box_ctl, SNBEP_PMON_BOX_CTL_INT);
+}
+
+static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+       u64 config;
+       unsigned msr;
+
+       msr = uncore_msr_box_ctl(box);
+       if (msr) {
+               rdmsrl(msr, config);
+               config |= SNBEP_PMON_BOX_CTL_FRZ;
+               wrmsrl(msr, config);
+       }
+}
+
+static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+       u64 config;
+       unsigned msr;
+
+       msr = uncore_msr_box_ctl(box);
+       if (msr) {
+               rdmsrl(msr, config);
+               config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+               wrmsrl(msr, config);
+       }
+}
+
+static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+       if (reg1->idx != EXTRA_REG_NONE)
+               wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0));
+
+       wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
+                                       struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       wrmsrl(hwc->config_base, hwc->config);
+}
+
+static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+       unsigned msr = uncore_msr_box_ctl(box);
+
+       if (msr)
+               wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
+}
+
+static struct attribute *snbep_uncore_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       NULL,
+};
+
+static struct attribute *snbep_uncore_ubox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh5.attr,
+       NULL,
+};
+
+static struct attribute *snbep_uncore_cbox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_filter_tid.attr,
+       &format_attr_filter_nid.attr,
+       &format_attr_filter_state.attr,
+       &format_attr_filter_opc.attr,
+       NULL,
+};
+
+static struct attribute *snbep_uncore_pcu_formats_attr[] = {
+       &format_attr_event_ext.attr,
+       &format_attr_occ_sel.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh5.attr,
+       &format_attr_occ_invert.attr,
+       &format_attr_occ_edge.attr,
+       &format_attr_filter_band0.attr,
+       &format_attr_filter_band1.attr,
+       &format_attr_filter_band2.attr,
+       &format_attr_filter_band3.attr,
+       NULL,
+};
+
+static struct attribute *snbep_uncore_qpi_formats_attr[] = {
+       &format_attr_event_ext.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_match_rds.attr,
+       &format_attr_match_rnid30.attr,
+       &format_attr_match_rnid4.attr,
+       &format_attr_match_dnid.attr,
+       &format_attr_match_mc.attr,
+       &format_attr_match_opc.attr,
+       &format_attr_match_vnw.attr,
+       &format_attr_match0.attr,
+       &format_attr_match1.attr,
+       &format_attr_mask_rds.attr,
+       &format_attr_mask_rnid30.attr,
+       &format_attr_mask_rnid4.attr,
+       &format_attr_mask_dnid.attr,
+       &format_attr_mask_mc.attr,
+       &format_attr_mask_opc.attr,
+       &format_attr_mask_vnw.attr,
+       &format_attr_mask0.attr,
+       &format_attr_mask1.attr,
+       NULL,
+};
+
+static struct uncore_event_desc snbep_uncore_imc_events[] = {
+       INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0xff,umask=0x00"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
+       { /* end: all zeroes */ },
+};
+
+static struct uncore_event_desc snbep_uncore_qpi_events[] = {
+       INTEL_UNCORE_EVENT_DESC(clockticks,       "event=0x14"),
+       INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
+       INTEL_UNCORE_EVENT_DESC(drs_data,         "event=0x102,umask=0x08"),
+       INTEL_UNCORE_EVENT_DESC(ncb_data,         "event=0x103,umask=0x04"),
+       { /* end: all zeroes */ },
+};
+
+static struct attribute_group snbep_uncore_format_group = {
+       .name = "format",
+       .attrs = snbep_uncore_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_ubox_format_group = {
+       .name = "format",
+       .attrs = snbep_uncore_ubox_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_cbox_format_group = {
+       .name = "format",
+       .attrs = snbep_uncore_cbox_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_pcu_format_group = {
+       .name = "format",
+       .attrs = snbep_uncore_pcu_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_qpi_format_group = {
+       .name = "format",
+       .attrs = snbep_uncore_qpi_formats_attr,
+};
+
+#define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT()                   \
+       .disable_box    = snbep_uncore_msr_disable_box,         \
+       .enable_box     = snbep_uncore_msr_enable_box,          \
+       .disable_event  = snbep_uncore_msr_disable_event,       \
+       .enable_event   = snbep_uncore_msr_enable_event,        \
+       .read_counter   = uncore_msr_read_counter
+
+#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT()                     \
+       __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),                   \
+       .init_box       = snbep_uncore_msr_init_box             \
+
+static struct intel_uncore_ops snbep_uncore_msr_ops = {
+       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+};
+
+#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT()                     \
+       .init_box       = snbep_uncore_pci_init_box,            \
+       .disable_box    = snbep_uncore_pci_disable_box,         \
+       .enable_box     = snbep_uncore_pci_enable_box,          \
+       .disable_event  = snbep_uncore_pci_disable_event,       \
+       .read_counter   = snbep_uncore_pci_read_counter
+
+static struct intel_uncore_ops snbep_uncore_pci_ops = {
+       SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+       .enable_event   = snbep_uncore_pci_enable_event,        \
+};
+
+static struct event_constraint snbep_uncore_cbox_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x04, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x05, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x07, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x13, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x1b, 0xc),
+       UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
+       UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
+       UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
+       EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff),
+       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r2pcie_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x12, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r3qpi_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2a, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x30, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snbep_uncore_ubox = {
+       .name           = "ubox",
+       .num_counters   = 2,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 44,
+       .fixed_ctr_bits = 48,
+       .perf_ctr       = SNBEP_U_MSR_PMON_CTR0,
+       .event_ctl      = SNBEP_U_MSR_PMON_CTL0,
+       .event_mask     = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+       .fixed_ctr      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
+       .fixed_ctl      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
+       .ops            = &snbep_uncore_msr_ops,
+       .format_group   = &snbep_uncore_ubox_format_group,
+};
+
+static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
+       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2),
+       EVENT_EXTRA_END
+};
+
+static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+       int i;
+
+       if (uncore_box_is_fake(box))
+               return;
+
+       for (i = 0; i < 5; i++) {
+               if (reg1->alloc & (0x1 << i))
+                       atomic_sub(1 << (i * 6), &er->ref);
+       }
+       reg1->alloc = 0;
+}
+
+static struct event_constraint *
+__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event,
+                           u64 (*cbox_filter_mask)(int fields))
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+       int i, alloc = 0;
+       unsigned long flags;
+       u64 mask;
+
+       if (reg1->idx == EXTRA_REG_NONE)
+               return NULL;
+
+       raw_spin_lock_irqsave(&er->lock, flags);
+       for (i = 0; i < 5; i++) {
+               if (!(reg1->idx & (0x1 << i)))
+                       continue;
+               if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
+                       continue;
+
+               mask = cbox_filter_mask(0x1 << i);
+               if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) ||
+                   !((reg1->config ^ er->config) & mask)) {
+                       atomic_add(1 << (i * 6), &er->ref);
+                       er->config &= ~mask;
+                       er->config |= reg1->config & mask;
+                       alloc |= (0x1 << i);
+               } else {
+                       break;
+               }
+       }
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+       if (i < 5)
+               goto fail;
+
+       if (!uncore_box_is_fake(box))
+               reg1->alloc |= alloc;
+
+       return NULL;
+fail:
+       for (; i >= 0; i--) {
+               if (alloc & (0x1 << i))
+                       atomic_sub(1 << (i * 6), &er->ref);
+       }
+       return &uncore_constraint_empty;
+}
+
+static u64 snbep_cbox_filter_mask(int fields)
+{
+       u64 mask = 0;
+
+       if (fields & 0x1)
+               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID;
+       if (fields & 0x2)
+               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID;
+       if (fields & 0x4)
+               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
+       if (fields & 0x8)
+               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
+
+       return mask;
+}
+
+static struct event_constraint *
+snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask);
+}
+
+static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct extra_reg *er;
+       int idx = 0;
+
+       for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) {
+               if (er->event != (event->hw.config & er->config_mask))
+                       continue;
+               idx |= er->idx;
+       }
+
+       if (idx) {
+               reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
+                       SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+               reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx);
+               reg1->idx = idx;
+       }
+       return 0;
+}
+
+static struct intel_uncore_ops snbep_uncore_cbox_ops = {
+       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+       .hw_config              = snbep_cbox_hw_config,
+       .get_constraint         = snbep_cbox_get_constraint,
+       .put_constraint         = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type snbep_uncore_cbox = {
+       .name                   = "cbox",
+       .num_counters           = 4,
+       .num_boxes              = 8,
+       .perf_ctr_bits          = 44,
+       .event_ctl              = SNBEP_C0_MSR_PMON_CTL0,
+       .perf_ctr               = SNBEP_C0_MSR_PMON_CTR0,
+       .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_C0_MSR_PMON_BOX_CTL,
+       .msr_offset             = SNBEP_CBO_MSR_OFFSET,
+       .num_shared_regs        = 1,
+       .constraints            = snbep_uncore_cbox_constraints,
+       .ops                    = &snbep_uncore_cbox_ops,
+       .format_group           = &snbep_uncore_cbox_format_group,
+};
+
+static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       u64 config = reg1->config;
+
+       if (new_idx > reg1->idx)
+               config <<= 8 * (new_idx - reg1->idx);
+       else
+               config >>= 8 * (reg1->idx - new_idx);
+
+       if (modify) {
+               hwc->config += new_idx - reg1->idx;
+               reg1->config = config;
+               reg1->idx = new_idx;
+       }
+       return config;
+}
+
+static struct event_constraint *
+snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+       unsigned long flags;
+       int idx = reg1->idx;
+       u64 mask, config1 = reg1->config;
+       bool ok = false;
+
+       if (reg1->idx == EXTRA_REG_NONE ||
+           (!uncore_box_is_fake(box) && reg1->alloc))
+               return NULL;
+again:
+       mask = 0xffULL << (idx * 8);
+       raw_spin_lock_irqsave(&er->lock, flags);
+       if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
+           !((config1 ^ er->config) & mask)) {
+               atomic_add(1 << (idx * 8), &er->ref);
+               er->config &= ~mask;
+               er->config |= config1 & mask;
+               ok = true;
+       }
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+
+       if (!ok) {
+               idx = (idx + 1) % 4;
+               if (idx != reg1->idx) {
+                       config1 = snbep_pcu_alter_er(event, idx, false);
+                       goto again;
+               }
+               return &uncore_constraint_empty;
+       }
+
+       if (!uncore_box_is_fake(box)) {
+               if (idx != reg1->idx)
+                       snbep_pcu_alter_er(event, idx, true);
+               reg1->alloc = 1;
+       }
+       return NULL;
+}
+
+static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+
+       if (uncore_box_is_fake(box) || !reg1->alloc)
+               return;
+
+       atomic_sub(1 << (reg1->idx * 8), &er->ref);
+       reg1->alloc = 0;
+}
+
+static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
+
+       if (ev_sel >= 0xb && ev_sel <= 0xe) {
+               reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
+               reg1->idx = ev_sel - 0xb;
+               reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8));
+       }
+       return 0;
+}
+
+static struct intel_uncore_ops snbep_uncore_pcu_ops = {
+       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+       .hw_config              = snbep_pcu_hw_config,
+       .get_constraint         = snbep_pcu_get_constraint,
+       .put_constraint         = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type snbep_uncore_pcu = {
+       .name                   = "pcu",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCU_MSR_PMON_CTR0,
+       .event_ctl              = SNBEP_PCU_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCU_MSR_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &snbep_uncore_pcu_ops,
+       .format_group           = &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *snbep_msr_uncores[] = {
+       &snbep_uncore_ubox,
+       &snbep_uncore_cbox,
+       &snbep_uncore_pcu,
+       NULL,
+};
+
+void snbep_uncore_cpu_init(void)
+{
+       if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+               snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+       uncore_msr_uncores = snbep_msr_uncores;
+}
+
+enum {
+       SNBEP_PCI_QPI_PORT0_FILTER,
+       SNBEP_PCI_QPI_PORT1_FILTER,
+       HSWEP_PCI_PCU_3,
+};
+
+static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+       if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) {
+               reg1->idx = 0;
+               reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0;
+               reg1->config = event->attr.config1;
+               reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0;
+               reg2->config = event->attr.config2;
+       }
+       return 0;
+}
+
+static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+       if (reg1->idx != EXTRA_REG_NONE) {
+               int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
+               int pkg = topology_phys_to_logical_pkg(box->pci_phys_id);
+               struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
+
+               if (filter_pdev) {
+                       pci_write_config_dword(filter_pdev, reg1->reg,
+                                               (u32)reg1->config);
+                       pci_write_config_dword(filter_pdev, reg1->reg + 4,
+                                               (u32)(reg1->config >> 32));
+                       pci_write_config_dword(filter_pdev, reg2->reg,
+                                               (u32)reg2->config);
+                       pci_write_config_dword(filter_pdev, reg2->reg + 4,
+                                               (u32)(reg2->config >> 32));
+               }
+       }
+
+       pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops snbep_uncore_qpi_ops = {
+       SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+       .enable_event           = snbep_qpi_enable_event,
+       .hw_config              = snbep_qpi_hw_config,
+       .get_constraint         = uncore_get_constraint,
+       .put_constraint         = uncore_put_constraint,
+};
+
+#define SNBEP_UNCORE_PCI_COMMON_INIT()                         \
+       .perf_ctr       = SNBEP_PCI_PMON_CTR0,                  \
+       .event_ctl      = SNBEP_PCI_PMON_CTL0,                  \
+       .event_mask     = SNBEP_PMON_RAW_EVENT_MASK,            \
+       .box_ctl        = SNBEP_PCI_PMON_BOX_CTL,               \
+       .ops            = &snbep_uncore_pci_ops,                \
+       .format_group   = &snbep_uncore_format_group
+
+static struct intel_uncore_type snbep_uncore_ha = {
+       .name           = "ha",
+       .num_counters   = 4,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 48,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_imc = {
+       .name           = "imc",
+       .num_counters   = 4,
+       .num_boxes      = 4,
+       .perf_ctr_bits  = 48,
+       .fixed_ctr_bits = 48,
+       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+       .event_descs    = snbep_uncore_imc_events,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_qpi = {
+       .name                   = "qpi",
+       .num_counters           = 4,
+       .num_boxes              = 2,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
+       .event_ctl              = SNBEP_PCI_PMON_CTL0,
+       .event_mask             = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &snbep_uncore_qpi_ops,
+       .event_descs            = snbep_uncore_qpi_events,
+       .format_group           = &snbep_uncore_qpi_format_group,
+};
+
+
+static struct intel_uncore_type snbep_uncore_r2pcie = {
+       .name           = "r2pcie",
+       .num_counters   = 4,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 44,
+       .constraints    = snbep_uncore_r2pcie_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_r3qpi = {
+       .name           = "r3qpi",
+       .num_counters   = 3,
+       .num_boxes      = 2,
+       .perf_ctr_bits  = 44,
+       .constraints    = snbep_uncore_r3qpi_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+       SNBEP_PCI_UNCORE_HA,
+       SNBEP_PCI_UNCORE_IMC,
+       SNBEP_PCI_UNCORE_QPI,
+       SNBEP_PCI_UNCORE_R2PCIE,
+       SNBEP_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *snbep_pci_uncores[] = {
+       [SNBEP_PCI_UNCORE_HA]           = &snbep_uncore_ha,
+       [SNBEP_PCI_UNCORE_IMC]          = &snbep_uncore_imc,
+       [SNBEP_PCI_UNCORE_QPI]          = &snbep_uncore_qpi,
+       [SNBEP_PCI_UNCORE_R2PCIE]       = &snbep_uncore_r2pcie,
+       [SNBEP_PCI_UNCORE_R3QPI]        = &snbep_uncore_r3qpi,
+       NULL,
+};
+
+static const struct pci_device_id snbep_uncore_pci_ids[] = {
+       { /* Home Agent */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
+       },
+       { /* MC Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0),
+       },
+       { /* MC Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1),
+       },
+       { /* MC Channel 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2),
+       },
+       { /* MC Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3),
+       },
+       { /* QPI Port 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0),
+       },
+       { /* QPI Port 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1),
+       },
+       { /* R2PCIe */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0),
+       },
+       { /* R3QPI Link 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0),
+       },
+       { /* R3QPI Link 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
+       },
+       { /* QPI Port 0 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  SNBEP_PCI_QPI_PORT0_FILTER),
+       },
+       { /* QPI Port 0 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  SNBEP_PCI_QPI_PORT1_FILTER),
+       },
+       { /* end: all zeroes */ }
+};
+
+static struct pci_driver snbep_uncore_pci_driver = {
+       .name           = "snbep_uncore",
+       .id_table       = snbep_uncore_pci_ids,
+};
+
+/*
+ * build pci bus to socket mapping
+ */
+static int snbep_pci2phy_map_init(int devid)
+{
+       struct pci_dev *ubox_dev = NULL;
+       int i, bus, nodeid, segment;
+       struct pci2phy_map *map;
+       int err = 0;
+       u32 config = 0;
+
+       while (1) {
+               /* find the UBOX device */
+               ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev);
+               if (!ubox_dev)
+                       break;
+               bus = ubox_dev->bus->number;
+               /* get the Node ID of the local register */
+               err = pci_read_config_dword(ubox_dev, 0x40, &config);
+               if (err)
+                       break;
+               nodeid = config;
+               /* get the Node ID mapping */
+               err = pci_read_config_dword(ubox_dev, 0x54, &config);
+               if (err)
+                       break;
+
+               segment = pci_domain_nr(ubox_dev->bus);
+               raw_spin_lock(&pci2phy_map_lock);
+               map = __find_pci2phy_map(segment);
+               if (!map) {
+                       raw_spin_unlock(&pci2phy_map_lock);
+                       err = -ENOMEM;
+                       break;
+               }
+
+               /*
+                * every three bits in the Node ID mapping register maps
+                * to a particular node.
+                */
+               for (i = 0; i < 8; i++) {
+                       if (nodeid == ((config >> (3 * i)) & 0x7)) {
+                               map->pbus_to_physid[bus] = i;
+                               break;
+                       }
+               }
+               raw_spin_unlock(&pci2phy_map_lock);
+       }
+
+       if (!err) {
+               /*
+                * For PCI bus with no UBOX device, find the next bus
+                * that has UBOX device and use its mapping.
+                */
+               raw_spin_lock(&pci2phy_map_lock);
+               list_for_each_entry(map, &pci2phy_map_head, list) {
+                       i = -1;
+                       for (bus = 255; bus >= 0; bus--) {
+                               if (map->pbus_to_physid[bus] >= 0)
+                                       i = map->pbus_to_physid[bus];
+                               else
+                                       map->pbus_to_physid[bus] = i;
+                       }
+               }
+               raw_spin_unlock(&pci2phy_map_lock);
+       }
+
+       pci_dev_put(ubox_dev);
+
+       return err ? pcibios_err_to_errno(err) : 0;
+}
+
+int snbep_uncore_pci_init(void)
+{
+       int ret = snbep_pci2phy_map_init(0x3ce0);
+       if (ret)
+               return ret;
+       uncore_pci_uncores = snbep_pci_uncores;
+       uncore_pci_driver = &snbep_uncore_pci_driver;
+       return 0;
+}
+/* end of Sandy Bridge-EP uncore support */
+
+/* IvyTown uncore support */
+static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+       unsigned msr = uncore_msr_box_ctl(box);
+       if (msr)
+               wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT);
+}
+
+static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+       struct pci_dev *pdev = box->pci_dev;
+
+       pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
+}
+
+#define IVBEP_UNCORE_MSR_OPS_COMMON_INIT()                     \
+       .init_box       = ivbep_uncore_msr_init_box,            \
+       .disable_box    = snbep_uncore_msr_disable_box,         \
+       .enable_box     = snbep_uncore_msr_enable_box,          \
+       .disable_event  = snbep_uncore_msr_disable_event,       \
+       .enable_event   = snbep_uncore_msr_enable_event,        \
+       .read_counter   = uncore_msr_read_counter
+
+static struct intel_uncore_ops ivbep_uncore_msr_ops = {
+       IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+};
+
+static struct intel_uncore_ops ivbep_uncore_pci_ops = {
+       .init_box       = ivbep_uncore_pci_init_box,
+       .disable_box    = snbep_uncore_pci_disable_box,
+       .enable_box     = snbep_uncore_pci_enable_box,
+       .disable_event  = snbep_uncore_pci_disable_event,
+       .enable_event   = snbep_uncore_pci_enable_event,
+       .read_counter   = snbep_uncore_pci_read_counter,
+};
+
+#define IVBEP_UNCORE_PCI_COMMON_INIT()                         \
+       .perf_ctr       = SNBEP_PCI_PMON_CTR0,                  \
+       .event_ctl      = SNBEP_PCI_PMON_CTL0,                  \
+       .event_mask     = IVBEP_PMON_RAW_EVENT_MASK,            \
+       .box_ctl        = SNBEP_PCI_PMON_BOX_CTL,               \
+       .ops            = &ivbep_uncore_pci_ops,                        \
+       .format_group   = &ivbep_uncore_format_group
+
+static struct attribute *ivbep_uncore_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       NULL,
+};
+
+static struct attribute *ivbep_uncore_ubox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh5.attr,
+       NULL,
+};
+
+static struct attribute *ivbep_uncore_cbox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_filter_tid.attr,
+       &format_attr_filter_link.attr,
+       &format_attr_filter_state2.attr,
+       &format_attr_filter_nid2.attr,
+       &format_attr_filter_opc2.attr,
+       &format_attr_filter_nc.attr,
+       &format_attr_filter_c6.attr,
+       &format_attr_filter_isoc.attr,
+       NULL,
+};
+
+static struct attribute *ivbep_uncore_pcu_formats_attr[] = {
+       &format_attr_event_ext.attr,
+       &format_attr_occ_sel.attr,
+       &format_attr_edge.attr,
+       &format_attr_thresh5.attr,
+       &format_attr_occ_invert.attr,
+       &format_attr_occ_edge.attr,
+       &format_attr_filter_band0.attr,
+       &format_attr_filter_band1.attr,
+       &format_attr_filter_band2.attr,
+       &format_attr_filter_band3.attr,
+       NULL,
+};
+
+static struct attribute *ivbep_uncore_qpi_formats_attr[] = {
+       &format_attr_event_ext.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_match_rds.attr,
+       &format_attr_match_rnid30.attr,
+       &format_attr_match_rnid4.attr,
+       &format_attr_match_dnid.attr,
+       &format_attr_match_mc.attr,
+       &format_attr_match_opc.attr,
+       &format_attr_match_vnw.attr,
+       &format_attr_match0.attr,
+       &format_attr_match1.attr,
+       &format_attr_mask_rds.attr,
+       &format_attr_mask_rnid30.attr,
+       &format_attr_mask_rnid4.attr,
+       &format_attr_mask_dnid.attr,
+       &format_attr_mask_mc.attr,
+       &format_attr_mask_opc.attr,
+       &format_attr_mask_vnw.attr,
+       &format_attr_mask0.attr,
+       &format_attr_mask1.attr,
+       NULL,
+};
+
+static struct attribute_group ivbep_uncore_format_group = {
+       .name = "format",
+       .attrs = ivbep_uncore_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_ubox_format_group = {
+       .name = "format",
+       .attrs = ivbep_uncore_ubox_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_cbox_format_group = {
+       .name = "format",
+       .attrs = ivbep_uncore_cbox_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_pcu_format_group = {
+       .name = "format",
+       .attrs = ivbep_uncore_pcu_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_qpi_format_group = {
+       .name = "format",
+       .attrs = ivbep_uncore_qpi_formats_attr,
+};
+
+static struct intel_uncore_type ivbep_uncore_ubox = {
+       .name           = "ubox",
+       .num_counters   = 2,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 44,
+       .fixed_ctr_bits = 48,
+       .perf_ctr       = SNBEP_U_MSR_PMON_CTR0,
+       .event_ctl      = SNBEP_U_MSR_PMON_CTL0,
+       .event_mask     = IVBEP_U_MSR_PMON_RAW_EVENT_MASK,
+       .fixed_ctr      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
+       .fixed_ctl      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
+       .ops            = &ivbep_uncore_msr_ops,
+       .format_group   = &ivbep_uncore_ubox_format_group,
+};
+
+static struct extra_reg ivbep_uncore_cbox_extra_regs[] = {
+       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
+       EVENT_EXTRA_END
+};
+
+static u64 ivbep_cbox_filter_mask(int fields)
+{
+       u64 mask = 0;
+
+       if (fields & 0x1)
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_TID;
+       if (fields & 0x2)
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK;
+       if (fields & 0x4)
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
+       if (fields & 0x8)
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NID;
+       if (fields & 0x10) {
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NC;
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_C6;
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
+       }
+
+       return mask;
+}
+
+static struct event_constraint *
+ivbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       return __snbep_cbox_get_constraint(box, event, ivbep_cbox_filter_mask);
+}
+
+static int ivbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct extra_reg *er;
+       int idx = 0;
+
+       for (er = ivbep_uncore_cbox_extra_regs; er->msr; er++) {
+               if (er->event != (event->hw.config & er->config_mask))
+                       continue;
+               idx |= er->idx;
+       }
+
+       if (idx) {
+               reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
+                       SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+               reg1->config = event->attr.config1 & ivbep_cbox_filter_mask(idx);
+               reg1->idx = idx;
+       }
+       return 0;
+}
+
+static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+       if (reg1->idx != EXTRA_REG_NONE) {
+               u64 filter = uncore_shared_reg_config(box, 0);
+               wrmsrl(reg1->reg, filter & 0xffffffff);
+               wrmsrl(reg1->reg + 6, filter >> 32);
+       }
+
+       wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops ivbep_uncore_cbox_ops = {
+       .init_box               = ivbep_uncore_msr_init_box,
+       .disable_box            = snbep_uncore_msr_disable_box,
+       .enable_box             = snbep_uncore_msr_enable_box,
+       .disable_event          = snbep_uncore_msr_disable_event,
+       .enable_event           = ivbep_cbox_enable_event,
+       .read_counter           = uncore_msr_read_counter,
+       .hw_config              = ivbep_cbox_hw_config,
+       .get_constraint         = ivbep_cbox_get_constraint,
+       .put_constraint         = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type ivbep_uncore_cbox = {
+       .name                   = "cbox",
+       .num_counters           = 4,
+       .num_boxes              = 15,
+       .perf_ctr_bits          = 44,
+       .event_ctl              = SNBEP_C0_MSR_PMON_CTL0,
+       .perf_ctr               = SNBEP_C0_MSR_PMON_CTR0,
+       .event_mask             = IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_C0_MSR_PMON_BOX_CTL,
+       .msr_offset             = SNBEP_CBO_MSR_OFFSET,
+       .num_shared_regs        = 1,
+       .constraints            = snbep_uncore_cbox_constraints,
+       .ops                    = &ivbep_uncore_cbox_ops,
+       .format_group           = &ivbep_uncore_cbox_format_group,
+};
+
+static struct intel_uncore_ops ivbep_uncore_pcu_ops = {
+       IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+       .hw_config              = snbep_pcu_hw_config,
+       .get_constraint         = snbep_pcu_get_constraint,
+       .put_constraint         = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type ivbep_uncore_pcu = {
+       .name                   = "pcu",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCU_MSR_PMON_CTR0,
+       .event_ctl              = SNBEP_PCU_MSR_PMON_CTL0,
+       .event_mask             = IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCU_MSR_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &ivbep_uncore_pcu_ops,
+       .format_group           = &ivbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *ivbep_msr_uncores[] = {
+       &ivbep_uncore_ubox,
+       &ivbep_uncore_cbox,
+       &ivbep_uncore_pcu,
+       NULL,
+};
+
+void ivbep_uncore_cpu_init(void)
+{
+       if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+               ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+       uncore_msr_uncores = ivbep_msr_uncores;
+}
+
+static struct intel_uncore_type ivbep_uncore_ha = {
+       .name           = "ha",
+       .num_counters   = 4,
+       .num_boxes      = 2,
+       .perf_ctr_bits  = 48,
+       IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type ivbep_uncore_imc = {
+       .name           = "imc",
+       .num_counters   = 4,
+       .num_boxes      = 8,
+       .perf_ctr_bits  = 48,
+       .fixed_ctr_bits = 48,
+       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+       .event_descs    = snbep_uncore_imc_events,
+       IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+/* registers in IRP boxes are not properly aligned */
+static unsigned ivbep_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4};
+static unsigned ivbep_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0};
+
+static void ivbep_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+
+       pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx],
+                              hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void ivbep_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+
+       pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], hwc->config);
+}
+
+static u64 ivbep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+       u64 count = 0;
+
+       pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
+       pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
+
+       return count;
+}
+
+static struct intel_uncore_ops ivbep_uncore_irp_ops = {
+       .init_box       = ivbep_uncore_pci_init_box,
+       .disable_box    = snbep_uncore_pci_disable_box,
+       .enable_box     = snbep_uncore_pci_enable_box,
+       .disable_event  = ivbep_uncore_irp_disable_event,
+       .enable_event   = ivbep_uncore_irp_enable_event,
+       .read_counter   = ivbep_uncore_irp_read_counter,
+};
+
+static struct intel_uncore_type ivbep_uncore_irp = {
+       .name                   = "irp",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .event_mask             = IVBEP_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .ops                    = &ivbep_uncore_irp_ops,
+       .format_group           = &ivbep_uncore_format_group,
+};
+
+static struct intel_uncore_ops ivbep_uncore_qpi_ops = {
+       .init_box       = ivbep_uncore_pci_init_box,
+       .disable_box    = snbep_uncore_pci_disable_box,
+       .enable_box     = snbep_uncore_pci_enable_box,
+       .disable_event  = snbep_uncore_pci_disable_event,
+       .enable_event   = snbep_qpi_enable_event,
+       .read_counter   = snbep_uncore_pci_read_counter,
+       .hw_config      = snbep_qpi_hw_config,
+       .get_constraint = uncore_get_constraint,
+       .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type ivbep_uncore_qpi = {
+       .name                   = "qpi",
+       .num_counters           = 4,
+       .num_boxes              = 3,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
+       .event_ctl              = SNBEP_PCI_PMON_CTL0,
+       .event_mask             = IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &ivbep_uncore_qpi_ops,
+       .format_group           = &ivbep_uncore_qpi_format_group,
+};
+
+static struct intel_uncore_type ivbep_uncore_r2pcie = {
+       .name           = "r2pcie",
+       .num_counters   = 4,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 44,
+       .constraints    = snbep_uncore_r2pcie_constraints,
+       IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type ivbep_uncore_r3qpi = {
+       .name           = "r3qpi",
+       .num_counters   = 3,
+       .num_boxes      = 2,
+       .perf_ctr_bits  = 44,
+       .constraints    = snbep_uncore_r3qpi_constraints,
+       IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+       IVBEP_PCI_UNCORE_HA,
+       IVBEP_PCI_UNCORE_IMC,
+       IVBEP_PCI_UNCORE_IRP,
+       IVBEP_PCI_UNCORE_QPI,
+       IVBEP_PCI_UNCORE_R2PCIE,
+       IVBEP_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *ivbep_pci_uncores[] = {
+       [IVBEP_PCI_UNCORE_HA]   = &ivbep_uncore_ha,
+       [IVBEP_PCI_UNCORE_IMC]  = &ivbep_uncore_imc,
+       [IVBEP_PCI_UNCORE_IRP]  = &ivbep_uncore_irp,
+       [IVBEP_PCI_UNCORE_QPI]  = &ivbep_uncore_qpi,
+       [IVBEP_PCI_UNCORE_R2PCIE]       = &ivbep_uncore_r2pcie,
+       [IVBEP_PCI_UNCORE_R3QPI]        = &ivbep_uncore_r3qpi,
+       NULL,
+};
+
+static const struct pci_device_id ivbep_uncore_pci_ids[] = {
+       { /* Home Agent 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 0),
+       },
+       { /* Home Agent 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 1),
+       },
+       { /* MC0 Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 0),
+       },
+       { /* MC0 Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 1),
+       },
+       { /* MC0 Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 2),
+       },
+       { /* MC0 Channel 4 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 3),
+       },
+       { /* MC1 Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 4),
+       },
+       { /* MC1 Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 5),
+       },
+       { /* MC1 Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 6),
+       },
+       { /* MC1 Channel 4 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 7),
+       },
+       { /* IRP */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IRP, 0),
+       },
+       { /* QPI0 Port 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 0),
+       },
+       { /* QPI0 Port 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 1),
+       },
+       { /* QPI1 Port 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 2),
+       },
+       { /* R2PCIe */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R2PCIE, 0),
+       },
+       { /* R3QPI0 Link 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 0),
+       },
+       { /* R3QPI0 Link 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 1),
+       },
+       { /* R3QPI1 Link 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 2),
+       },
+       { /* QPI Port 0 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  SNBEP_PCI_QPI_PORT0_FILTER),
+       },
+       { /* QPI Port 0 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  SNBEP_PCI_QPI_PORT1_FILTER),
+       },
+       { /* end: all zeroes */ }
+};
+
+static struct pci_driver ivbep_uncore_pci_driver = {
+       .name           = "ivbep_uncore",
+       .id_table       = ivbep_uncore_pci_ids,
+};
+
+int ivbep_uncore_pci_init(void)
+{
+       int ret = snbep_pci2phy_map_init(0x0e1e);
+       if (ret)
+               return ret;
+       uncore_pci_uncores = ivbep_pci_uncores;
+       uncore_pci_driver = &ivbep_uncore_pci_driver;
+       return 0;
+}
+/* end of IvyTown uncore support */
+
+/* KNL uncore support */
+static struct attribute *knl_uncore_ubox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh5.attr,
+       NULL,
+};
+
+static struct attribute_group knl_uncore_ubox_format_group = {
+       .name = "format",
+       .attrs = knl_uncore_ubox_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_ubox = {
+       .name                   = "ubox",
+       .num_counters           = 2,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = HSWEP_U_MSR_PMON_CTR0,
+       .event_ctl              = HSWEP_U_MSR_PMON_CTL0,
+       .event_mask             = KNL_U_MSR_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+       .fixed_ctl              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+       .ops                    = &snbep_uncore_msr_ops,
+       .format_group           = &knl_uncore_ubox_format_group,
+};
+
+static struct attribute *knl_uncore_cha_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_qor.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_filter_tid4.attr,
+       &format_attr_filter_link3.attr,
+       &format_attr_filter_state4.attr,
+       &format_attr_filter_local.attr,
+       &format_attr_filter_all_op.attr,
+       &format_attr_filter_nnm.attr,
+       &format_attr_filter_opc3.attr,
+       &format_attr_filter_nc.attr,
+       &format_attr_filter_isoc.attr,
+       NULL,
+};
+
+static struct attribute_group knl_uncore_cha_format_group = {
+       .name = "format",
+       .attrs = knl_uncore_cha_formats_attr,
+};
+
+static struct event_constraint knl_uncore_cha_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x1f, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg knl_uncore_cha_extra_regs[] = {
+       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x3d, 0xff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x4),
+       EVENT_EXTRA_END
+};
+
+static u64 knl_cha_filter_mask(int fields)
+{
+       u64 mask = 0;
+
+       if (fields & 0x1)
+               mask |= KNL_CHA_MSR_PMON_BOX_FILTER_TID;
+       if (fields & 0x2)
+               mask |= KNL_CHA_MSR_PMON_BOX_FILTER_STATE;
+       if (fields & 0x4)
+               mask |= KNL_CHA_MSR_PMON_BOX_FILTER_OP;
+       return mask;
+}
+
+static struct event_constraint *
+knl_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       return __snbep_cbox_get_constraint(box, event, knl_cha_filter_mask);
+}
+
+static int knl_cha_hw_config(struct intel_uncore_box *box,
+                            struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct extra_reg *er;
+       int idx = 0;
+
+       for (er = knl_uncore_cha_extra_regs; er->msr; er++) {
+               if (er->event != (event->hw.config & er->config_mask))
+                       continue;
+               idx |= er->idx;
+       }
+
+       if (idx) {
+               reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
+                           KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx;
+               reg1->config = event->attr.config1 & knl_cha_filter_mask(idx);
+               reg1->idx = idx;
+       }
+       return 0;
+}
+
+static void hswep_cbox_enable_event(struct intel_uncore_box *box,
+                                   struct perf_event *event);
+
+static struct intel_uncore_ops knl_uncore_cha_ops = {
+       .init_box               = snbep_uncore_msr_init_box,
+       .disable_box            = snbep_uncore_msr_disable_box,
+       .enable_box             = snbep_uncore_msr_enable_box,
+       .disable_event          = snbep_uncore_msr_disable_event,
+       .enable_event           = hswep_cbox_enable_event,
+       .read_counter           = uncore_msr_read_counter,
+       .hw_config              = knl_cha_hw_config,
+       .get_constraint         = knl_cha_get_constraint,
+       .put_constraint         = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type knl_uncore_cha = {
+       .name                   = "cha",
+       .num_counters           = 4,
+       .num_boxes              = 38,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
+       .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
+       .event_mask             = KNL_CHA_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_C0_MSR_PMON_BOX_CTL,
+       .msr_offset             = KNL_CHA_MSR_OFFSET,
+       .num_shared_regs        = 1,
+       .constraints            = knl_uncore_cha_constraints,
+       .ops                    = &knl_uncore_cha_ops,
+       .format_group           = &knl_uncore_cha_format_group,
+};
+
+static struct attribute *knl_uncore_pcu_formats_attr[] = {
+       &format_attr_event2.attr,
+       &format_attr_use_occ_ctr.attr,
+       &format_attr_occ_sel.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh6.attr,
+       &format_attr_occ_invert.attr,
+       &format_attr_occ_edge_det.attr,
+       NULL,
+};
+
+static struct attribute_group knl_uncore_pcu_format_group = {
+       .name = "format",
+       .attrs = knl_uncore_pcu_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_pcu = {
+       .name                   = "pcu",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = HSWEP_PCU_MSR_PMON_CTR0,
+       .event_ctl              = HSWEP_PCU_MSR_PMON_CTL0,
+       .event_mask             = KNL_PCU_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_PCU_MSR_PMON_BOX_CTL,
+       .ops                    = &snbep_uncore_msr_ops,
+       .format_group           = &knl_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *knl_msr_uncores[] = {
+       &knl_uncore_ubox,
+       &knl_uncore_cha,
+       &knl_uncore_pcu,
+       NULL,
+};
+
+void knl_uncore_cpu_init(void)
+{
+       uncore_msr_uncores = knl_msr_uncores;
+}
+
+static void knl_uncore_imc_enable_box(struct intel_uncore_box *box)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       int box_ctl = uncore_pci_box_ctl(box);
+
+       pci_write_config_dword(pdev, box_ctl, 0);
+}
+
+static void knl_uncore_imc_enable_event(struct intel_uncore_box *box,
+                                       struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+
+       if ((event->attr.config & SNBEP_PMON_CTL_EV_SEL_MASK)
+                                                       == UNCORE_FIXED_EVENT)
+               pci_write_config_dword(pdev, hwc->config_base,
+                                      hwc->config | KNL_PMON_FIXED_CTL_EN);
+       else
+               pci_write_config_dword(pdev, hwc->config_base,
+                                      hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops knl_uncore_imc_ops = {
+       .init_box       = snbep_uncore_pci_init_box,
+       .disable_box    = snbep_uncore_pci_disable_box,
+       .enable_box     = knl_uncore_imc_enable_box,
+       .read_counter   = snbep_uncore_pci_read_counter,
+       .enable_event   = knl_uncore_imc_enable_event,
+       .disable_event  = snbep_uncore_pci_disable_event,
+};
+
+static struct intel_uncore_type knl_uncore_imc_uclk = {
+       .name                   = "imc_uclk",
+       .num_counters           = 4,
+       .num_boxes              = 2,
+       .perf_ctr_bits          = 48,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = KNL_UCLK_MSR_PMON_CTR0_LOW,
+       .event_ctl              = KNL_UCLK_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
+       .fixed_ctl              = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
+       .box_ctl                = KNL_UCLK_MSR_PMON_BOX_CTL,
+       .ops                    = &knl_uncore_imc_ops,
+       .format_group           = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_imc_dclk = {
+       .name                   = "imc",
+       .num_counters           = 4,
+       .num_boxes              = 6,
+       .perf_ctr_bits          = 48,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = KNL_MC0_CH0_MSR_PMON_CTR0_LOW,
+       .event_ctl              = KNL_MC0_CH0_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = KNL_MC0_CH0_MSR_PMON_FIXED_LOW,
+       .fixed_ctl              = KNL_MC0_CH0_MSR_PMON_FIXED_CTL,
+       .box_ctl                = KNL_MC0_CH0_MSR_PMON_BOX_CTL,
+       .ops                    = &knl_uncore_imc_ops,
+       .format_group           = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_edc_uclk = {
+       .name                   = "edc_uclk",
+       .num_counters           = 4,
+       .num_boxes              = 8,
+       .perf_ctr_bits          = 48,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = KNL_UCLK_MSR_PMON_CTR0_LOW,
+       .event_ctl              = KNL_UCLK_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
+       .fixed_ctl              = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
+       .box_ctl                = KNL_UCLK_MSR_PMON_BOX_CTL,
+       .ops                    = &knl_uncore_imc_ops,
+       .format_group           = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_edc_eclk = {
+       .name                   = "edc_eclk",
+       .num_counters           = 4,
+       .num_boxes              = 8,
+       .perf_ctr_bits          = 48,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW,
+       .event_ctl              = KNL_EDC0_ECLK_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW,
+       .fixed_ctl              = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL,
+       .box_ctl                = KNL_EDC0_ECLK_MSR_PMON_BOX_CTL,
+       .ops                    = &knl_uncore_imc_ops,
+       .format_group           = &snbep_uncore_format_group,
+};
+
+static struct event_constraint knl_uncore_m2pcie_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type knl_uncore_m2pcie = {
+       .name           = "m2pcie",
+       .num_counters   = 4,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 48,
+       .constraints    = knl_uncore_m2pcie_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct attribute *knl_uncore_irp_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_qor.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       NULL,
+};
+
+static struct attribute_group knl_uncore_irp_format_group = {
+       .name = "format",
+       .attrs = knl_uncore_irp_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_irp = {
+       .name                   = "irp",
+       .num_counters           = 2,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
+       .event_ctl              = SNBEP_PCI_PMON_CTL0,
+       .event_mask             = KNL_IRP_PCI_PMON_RAW_EVENT_MASK,
+       .box_ctl                = KNL_IRP_PCI_PMON_BOX_CTL,
+       .ops                    = &snbep_uncore_pci_ops,
+       .format_group           = &knl_uncore_irp_format_group,
+};
+
+enum {
+       KNL_PCI_UNCORE_MC_UCLK,
+       KNL_PCI_UNCORE_MC_DCLK,
+       KNL_PCI_UNCORE_EDC_UCLK,
+       KNL_PCI_UNCORE_EDC_ECLK,
+       KNL_PCI_UNCORE_M2PCIE,
+       KNL_PCI_UNCORE_IRP,
+};
+
+static struct intel_uncore_type *knl_pci_uncores[] = {
+       [KNL_PCI_UNCORE_MC_UCLK]        = &knl_uncore_imc_uclk,
+       [KNL_PCI_UNCORE_MC_DCLK]        = &knl_uncore_imc_dclk,
+       [KNL_PCI_UNCORE_EDC_UCLK]       = &knl_uncore_edc_uclk,
+       [KNL_PCI_UNCORE_EDC_ECLK]       = &knl_uncore_edc_eclk,
+       [KNL_PCI_UNCORE_M2PCIE]         = &knl_uncore_m2pcie,
+       [KNL_PCI_UNCORE_IRP]            = &knl_uncore_irp,
+       NULL,
+};
+
+/*
+ * KNL uses a common PCI device ID for multiple instances of an Uncore PMU
+ * device type. prior to KNL, each instance of a PMU device type had a unique
+ * device ID.
+ *
+ *     PCI Device ID   Uncore PMU Devices
+ *     ----------------------------------
+ *     0x7841          MC0 UClk, MC1 UClk
+ *     0x7843          MC0 DClk CH 0, MC0 DClk CH 1, MC0 DClk CH 2,
+ *                     MC1 DClk CH 0, MC1 DClk CH 1, MC1 DClk CH 2
+ *     0x7833          EDC0 UClk, EDC1 UClk, EDC2 UClk, EDC3 UClk,
+ *                     EDC4 UClk, EDC5 UClk, EDC6 UClk, EDC7 UClk
+ *     0x7835          EDC0 EClk, EDC1 EClk, EDC2 EClk, EDC3 EClk,
+ *                     EDC4 EClk, EDC5 EClk, EDC6 EClk, EDC7 EClk
+ *     0x7817          M2PCIe
+ *     0x7814          IRP
+*/
+
+static const struct pci_device_id knl_uncore_pci_ids[] = {
+       { /* MC UClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+       },
+       { /* MC DClk Channel */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+       },
+       { /* EDC UClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+       },
+       { /* EDC EClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+       },
+       { /* M2PCIe */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
+               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_M2PCIE, 0),
+       },
+       { /* IRP */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7814),
+               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_IRP, 0),
+       },
+       { /* end: all zeroes */ }
+};
+
+static struct pci_driver knl_uncore_pci_driver = {
+       .name           = "knl_uncore",
+       .id_table       = knl_uncore_pci_ids,
+};
+
+int knl_uncore_pci_init(void)
+{
+       int ret;
+
+       /* All KNL PCI based PMON units are on the same PCI bus except IRP */
+       ret = snb_pci2phy_map_init(0x7814); /* IRP */
+       if (ret)
+               return ret;
+       ret = snb_pci2phy_map_init(0x7817); /* M2PCIe */
+       if (ret)
+               return ret;
+       uncore_pci_uncores = knl_pci_uncores;
+       uncore_pci_driver = &knl_uncore_pci_driver;
+       return 0;
+}
+
+/* end of KNL uncore support */
+
+/* Haswell-EP uncore support */
+static struct attribute *hswep_uncore_ubox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh5.attr,
+       &format_attr_filter_tid2.attr,
+       &format_attr_filter_cid.attr,
+       NULL,
+};
+
+static struct attribute_group hswep_uncore_ubox_format_group = {
+       .name = "format",
+       .attrs = hswep_uncore_ubox_formats_attr,
+};
+
+static int hswep_ubox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       reg1->reg = HSWEP_U_MSR_PMON_FILTER;
+       reg1->config = event->attr.config1 & HSWEP_U_MSR_PMON_BOX_FILTER_MASK;
+       reg1->idx = 0;
+       return 0;
+}
+
+static struct intel_uncore_ops hswep_uncore_ubox_ops = {
+       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+       .hw_config              = hswep_ubox_hw_config,
+       .get_constraint         = uncore_get_constraint,
+       .put_constraint         = uncore_put_constraint,
+};
+
+static struct intel_uncore_type hswep_uncore_ubox = {
+       .name                   = "ubox",
+       .num_counters           = 2,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 44,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = HSWEP_U_MSR_PMON_CTR0,
+       .event_ctl              = HSWEP_U_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+       .fixed_ctl              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &hswep_uncore_ubox_ops,
+       .format_group           = &hswep_uncore_ubox_format_group,
+};
+
+static struct attribute *hswep_uncore_cbox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_filter_tid3.attr,
+       &format_attr_filter_link2.attr,
+       &format_attr_filter_state3.attr,
+       &format_attr_filter_nid2.attr,
+       &format_attr_filter_opc2.attr,
+       &format_attr_filter_nc.attr,
+       &format_attr_filter_c6.attr,
+       &format_attr_filter_isoc.attr,
+       NULL,
+};
+
+static struct attribute_group hswep_uncore_cbox_format_group = {
+       .name = "format",
+       .attrs = hswep_uncore_cbox_formats_attr,
+};
+
+static struct event_constraint hswep_uncore_cbox_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x09, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg hswep_uncore_cbox_extra_regs[] = {
+       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4028, 0x40ff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4032, 0x40ff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4029, 0x40ff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4033, 0x40ff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x402A, 0x40ff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x12),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
+       EVENT_EXTRA_END
+};
+
+static u64 hswep_cbox_filter_mask(int fields)
+{
+       u64 mask = 0;
+       if (fields & 0x1)
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_TID;
+       if (fields & 0x2)
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK;
+       if (fields & 0x4)
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE;
+       if (fields & 0x8)
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NID;
+       if (fields & 0x10) {
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC;
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NC;
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_C6;
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
+       }
+       return mask;
+}
+
+static struct event_constraint *
+hswep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       return __snbep_cbox_get_constraint(box, event, hswep_cbox_filter_mask);
+}
+
+static int hswep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct extra_reg *er;
+       int idx = 0;
+
+       for (er = hswep_uncore_cbox_extra_regs; er->msr; er++) {
+               if (er->event != (event->hw.config & er->config_mask))
+                       continue;
+               idx |= er->idx;
+       }
+
+       if (idx) {
+               reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
+                           HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+               reg1->config = event->attr.config1 & hswep_cbox_filter_mask(idx);
+               reg1->idx = idx;
+       }
+       return 0;
+}
+
+static void hswep_cbox_enable_event(struct intel_uncore_box *box,
+                                 struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+       if (reg1->idx != EXTRA_REG_NONE) {
+               u64 filter = uncore_shared_reg_config(box, 0);
+               wrmsrl(reg1->reg, filter & 0xffffffff);
+               wrmsrl(reg1->reg + 1, filter >> 32);
+       }
+
+       wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops hswep_uncore_cbox_ops = {
+       .init_box               = snbep_uncore_msr_init_box,
+       .disable_box            = snbep_uncore_msr_disable_box,
+       .enable_box             = snbep_uncore_msr_enable_box,
+       .disable_event          = snbep_uncore_msr_disable_event,
+       .enable_event           = hswep_cbox_enable_event,
+       .read_counter           = uncore_msr_read_counter,
+       .hw_config              = hswep_cbox_hw_config,
+       .get_constraint         = hswep_cbox_get_constraint,
+       .put_constraint         = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type hswep_uncore_cbox = {
+       .name                   = "cbox",
+       .num_counters           = 4,
+       .num_boxes              = 18,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
+       .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
+       .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_C0_MSR_PMON_BOX_CTL,
+       .msr_offset             = HSWEP_CBO_MSR_OFFSET,
+       .num_shared_regs        = 1,
+       .constraints            = hswep_uncore_cbox_constraints,
+       .ops                    = &hswep_uncore_cbox_ops,
+       .format_group           = &hswep_uncore_cbox_format_group,
+};
+
+/*
+ * Write SBOX Initialization register bit by bit to avoid spurious #GPs
+ */
+static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box)
+{
+       unsigned msr = uncore_msr_box_ctl(box);
+
+       if (msr) {
+               u64 init = SNBEP_PMON_BOX_CTL_INT;
+               u64 flags = 0;
+               int i;
+
+               for_each_set_bit(i, (unsigned long *)&init, 64) {
+                       flags |= (1ULL << i);
+                       wrmsrl(msr, flags);
+               }
+       }
+}
+
+static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = {
+       __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+       .init_box               = hswep_uncore_sbox_msr_init_box
+};
+
+static struct attribute *hswep_uncore_sbox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       NULL,
+};
+
+static struct attribute_group hswep_uncore_sbox_format_group = {
+       .name = "format",
+       .attrs = hswep_uncore_sbox_formats_attr,
+};
+
+static struct intel_uncore_type hswep_uncore_sbox = {
+       .name                   = "sbox",
+       .num_counters           = 4,
+       .num_boxes              = 4,
+       .perf_ctr_bits          = 44,
+       .event_ctl              = HSWEP_S0_MSR_PMON_CTL0,
+       .perf_ctr               = HSWEP_S0_MSR_PMON_CTR0,
+       .event_mask             = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_S0_MSR_PMON_BOX_CTL,
+       .msr_offset             = HSWEP_SBOX_MSR_OFFSET,
+       .ops                    = &hswep_uncore_sbox_msr_ops,
+       .format_group           = &hswep_uncore_sbox_format_group,
+};
+
+static int hswep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
+
+       if (ev_sel >= 0xb && ev_sel <= 0xe) {
+               reg1->reg = HSWEP_PCU_MSR_PMON_BOX_FILTER;
+               reg1->idx = ev_sel - 0xb;
+               reg1->config = event->attr.config1 & (0xff << reg1->idx);
+       }
+       return 0;
+}
+
+static struct intel_uncore_ops hswep_uncore_pcu_ops = {
+       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+       .hw_config              = hswep_pcu_hw_config,
+       .get_constraint         = snbep_pcu_get_constraint,
+       .put_constraint         = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type hswep_uncore_pcu = {
+       .name                   = "pcu",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = HSWEP_PCU_MSR_PMON_CTR0,
+       .event_ctl              = HSWEP_PCU_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_PCU_MSR_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &hswep_uncore_pcu_ops,
+       .format_group           = &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *hswep_msr_uncores[] = {
+       &hswep_uncore_ubox,
+       &hswep_uncore_cbox,
+       &hswep_uncore_sbox,
+       &hswep_uncore_pcu,
+       NULL,
+};
+
+void hswep_uncore_cpu_init(void)
+{
+       int pkg = topology_phys_to_logical_pkg(0);
+
+       if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+               hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+
+       /* Detect 6-8 core systems with only two SBOXes */
+       if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) {
+               u32 capid4;
+
+               pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3],
+                                     0x94, &capid4);
+               if (((capid4 >> 6) & 0x3) == 0)
+                       hswep_uncore_sbox.num_boxes = 2;
+       }
+
+       uncore_msr_uncores = hswep_msr_uncores;
+}
+
+static struct intel_uncore_type hswep_uncore_ha = {
+       .name           = "ha",
+       .num_counters   = 5,
+       .num_boxes      = 2,
+       .perf_ctr_bits  = 48,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct uncore_event_desc hswep_uncore_imc_events[] = {
+       INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0x00,umask=0x00"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
+       { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type hswep_uncore_imc = {
+       .name           = "imc",
+       .num_counters   = 5,
+       .num_boxes      = 8,
+       .perf_ctr_bits  = 48,
+       .fixed_ctr_bits = 48,
+       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+       .event_descs    = hswep_uncore_imc_events,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8};
+
+static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+       u64 count = 0;
+
+       pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
+       pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
+
+       return count;
+}
+
+static struct intel_uncore_ops hswep_uncore_irp_ops = {
+       .init_box       = snbep_uncore_pci_init_box,
+       .disable_box    = snbep_uncore_pci_disable_box,
+       .enable_box     = snbep_uncore_pci_enable_box,
+       .disable_event  = ivbep_uncore_irp_disable_event,
+       .enable_event   = ivbep_uncore_irp_enable_event,
+       .read_counter   = hswep_uncore_irp_read_counter,
+};
+
+static struct intel_uncore_type hswep_uncore_irp = {
+       .name                   = "irp",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .ops                    = &hswep_uncore_irp_ops,
+       .format_group           = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type hswep_uncore_qpi = {
+       .name                   = "qpi",
+       .num_counters           = 5,
+       .num_boxes              = 3,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
+       .event_ctl              = SNBEP_PCI_PMON_CTL0,
+       .event_mask             = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &snbep_uncore_qpi_ops,
+       .format_group           = &snbep_uncore_qpi_format_group,
+};
+
+static struct event_constraint hswep_uncore_r2pcie_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x24, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x27, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2a, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type hswep_uncore_r2pcie = {
+       .name           = "r2pcie",
+       .num_counters   = 4,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 48,
+       .constraints    = hswep_uncore_r2pcie_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x01, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type hswep_uncore_r3qpi = {
+       .name           = "r3qpi",
+       .num_counters   = 4,
+       .num_boxes      = 3,
+       .perf_ctr_bits  = 44,
+       .constraints    = hswep_uncore_r3qpi_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+       HSWEP_PCI_UNCORE_HA,
+       HSWEP_PCI_UNCORE_IMC,
+       HSWEP_PCI_UNCORE_IRP,
+       HSWEP_PCI_UNCORE_QPI,
+       HSWEP_PCI_UNCORE_R2PCIE,
+       HSWEP_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *hswep_pci_uncores[] = {
+       [HSWEP_PCI_UNCORE_HA]   = &hswep_uncore_ha,
+       [HSWEP_PCI_UNCORE_IMC]  = &hswep_uncore_imc,
+       [HSWEP_PCI_UNCORE_IRP]  = &hswep_uncore_irp,
+       [HSWEP_PCI_UNCORE_QPI]  = &hswep_uncore_qpi,
+       [HSWEP_PCI_UNCORE_R2PCIE]       = &hswep_uncore_r2pcie,
+       [HSWEP_PCI_UNCORE_R3QPI]        = &hswep_uncore_r3qpi,
+       NULL,
+};
+
+static const struct pci_device_id hswep_uncore_pci_ids[] = {
+       { /* Home Agent 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0),
+       },
+       { /* Home Agent 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f38),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 1),
+       },
+       { /* MC0 Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb0),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 0),
+       },
+       { /* MC0 Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb1),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 1),
+       },
+       { /* MC0 Channel 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb4),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 2),
+       },
+       { /* MC0 Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb5),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 3),
+       },
+       { /* MC1 Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd0),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 4),
+       },
+       { /* MC1 Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd1),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 5),
+       },
+       { /* MC1 Channel 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd4),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 6),
+       },
+       { /* MC1 Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd5),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 7),
+       },
+       { /* IRP */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f39),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IRP, 0),
+       },
+       { /* QPI0 Port 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f32),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 0),
+       },
+       { /* QPI0 Port 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f33),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 1),
+       },
+       { /* QPI1 Port 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3a),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 2),
+       },
+       { /* R2PCIe */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f34),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R2PCIE, 0),
+       },
+       { /* R3QPI0 Link 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f36),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 0),
+       },
+       { /* R3QPI0 Link 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f37),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 1),
+       },
+       { /* R3QPI1 Link 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3e),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 2),
+       },
+       { /* QPI Port 0 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f86),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  SNBEP_PCI_QPI_PORT0_FILTER),
+       },
+       { /* QPI Port 1 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  SNBEP_PCI_QPI_PORT1_FILTER),
+       },
+       { /* PCU.3 (for Capability registers) */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  HSWEP_PCI_PCU_3),
+       },
+       { /* end: all zeroes */ }
+};
+
+static struct pci_driver hswep_uncore_pci_driver = {
+       .name           = "hswep_uncore",
+       .id_table       = hswep_uncore_pci_ids,
+};
+
+int hswep_uncore_pci_init(void)
+{
+       int ret = snbep_pci2phy_map_init(0x2f1e);
+       if (ret)
+               return ret;
+       uncore_pci_uncores = hswep_pci_uncores;
+       uncore_pci_driver = &hswep_uncore_pci_driver;
+       return 0;
+}
+/* end of Haswell-EP uncore support */
+
+/* BDX uncore support */
+
+static struct intel_uncore_type bdx_uncore_ubox = {
+       .name                   = "ubox",
+       .num_counters           = 2,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = HSWEP_U_MSR_PMON_CTR0,
+       .event_ctl              = HSWEP_U_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+       .fixed_ctl              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &ivbep_uncore_msr_ops,
+       .format_group           = &ivbep_uncore_ubox_format_group,
+};
+
+static struct event_constraint bdx_uncore_cbox_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_cbox = {
+       .name                   = "cbox",
+       .num_counters           = 4,
+       .num_boxes              = 24,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
+       .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
+       .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_C0_MSR_PMON_BOX_CTL,
+       .msr_offset             = HSWEP_CBO_MSR_OFFSET,
+       .num_shared_regs        = 1,
+       .constraints            = bdx_uncore_cbox_constraints,
+       .ops                    = &hswep_uncore_cbox_ops,
+       .format_group           = &hswep_uncore_cbox_format_group,
+};
+
+static struct intel_uncore_type bdx_uncore_sbox = {
+       .name                   = "sbox",
+       .num_counters           = 4,
+       .num_boxes              = 4,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = HSWEP_S0_MSR_PMON_CTL0,
+       .perf_ctr               = HSWEP_S0_MSR_PMON_CTR0,
+       .event_mask             = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_S0_MSR_PMON_BOX_CTL,
+       .msr_offset             = HSWEP_SBOX_MSR_OFFSET,
+       .ops                    = &hswep_uncore_sbox_msr_ops,
+       .format_group           = &hswep_uncore_sbox_format_group,
+};
+
+#define BDX_MSR_UNCORE_SBOX    3
+
+static struct intel_uncore_type *bdx_msr_uncores[] = {
+       &bdx_uncore_ubox,
+       &bdx_uncore_cbox,
+       &hswep_uncore_pcu,
+       &bdx_uncore_sbox,
+       NULL,
+};
+
+void bdx_uncore_cpu_init(void)
+{
+       if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+               bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+       uncore_msr_uncores = bdx_msr_uncores;
+
+       /* BDX-DE doesn't have SBOX */
+       if (boot_cpu_data.x86_model == 86)
+               uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
+}
+
+static struct intel_uncore_type bdx_uncore_ha = {
+       .name           = "ha",
+       .num_counters   = 4,
+       .num_boxes      = 2,
+       .perf_ctr_bits  = 48,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type bdx_uncore_imc = {
+       .name           = "imc",
+       .num_counters   = 5,
+       .num_boxes      = 8,
+       .perf_ctr_bits  = 48,
+       .fixed_ctr_bits = 48,
+       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+       .event_descs    = hswep_uncore_imc_events,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type bdx_uncore_irp = {
+       .name                   = "irp",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .ops                    = &hswep_uncore_irp_ops,
+       .format_group           = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type bdx_uncore_qpi = {
+       .name                   = "qpi",
+       .num_counters           = 4,
+       .num_boxes              = 3,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
+       .event_ctl              = SNBEP_PCI_PMON_CTL0,
+       .event_mask             = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &snbep_uncore_qpi_ops,
+       .format_group           = &snbep_uncore_qpi_format_group,
+};
+
+static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_r2pcie = {
+       .name           = "r2pcie",
+       .num_counters   = 4,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 48,
+       .constraints    = bdx_uncore_r2pcie_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct event_constraint bdx_uncore_r3qpi_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x01, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_r3qpi = {
+       .name           = "r3qpi",
+       .num_counters   = 3,
+       .num_boxes      = 3,
+       .perf_ctr_bits  = 48,
+       .constraints    = bdx_uncore_r3qpi_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+       BDX_PCI_UNCORE_HA,
+       BDX_PCI_UNCORE_IMC,
+       BDX_PCI_UNCORE_IRP,
+       BDX_PCI_UNCORE_QPI,
+       BDX_PCI_UNCORE_R2PCIE,
+       BDX_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *bdx_pci_uncores[] = {
+       [BDX_PCI_UNCORE_HA]     = &bdx_uncore_ha,
+       [BDX_PCI_UNCORE_IMC]    = &bdx_uncore_imc,
+       [BDX_PCI_UNCORE_IRP]    = &bdx_uncore_irp,
+       [BDX_PCI_UNCORE_QPI]    = &bdx_uncore_qpi,
+       [BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie,
+       [BDX_PCI_UNCORE_R3QPI]  = &bdx_uncore_r3qpi,
+       NULL,
+};
+
+static const struct pci_device_id bdx_uncore_pci_ids[] = {
+       { /* Home Agent 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
+       },
+       { /* Home Agent 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f38),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 1),
+       },
+       { /* MC0 Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0),
+       },
+       { /* MC0 Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1),
+       },
+       { /* MC0 Channel 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb4),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 2),
+       },
+       { /* MC0 Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb5),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 3),
+       },
+       { /* MC1 Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd0),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 4),
+       },
+       { /* MC1 Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd1),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 5),
+       },
+       { /* MC1 Channel 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd4),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 6),
+       },
+       { /* MC1 Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd5),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 7),
+       },
+       { /* IRP */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0),
+       },
+       { /* QPI0 Port 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f32),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 0),
+       },
+       { /* QPI0 Port 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f33),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 1),
+       },
+       { /* QPI1 Port 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3a),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 2),
+       },
+       { /* R2PCIe */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0),
+       },
+       { /* R3QPI0 Link 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f36),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 0),
+       },
+       { /* R3QPI0 Link 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f37),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 1),
+       },
+       { /* R3QPI1 Link 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3e),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 2),
+       },
+       { /* QPI Port 0 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0),
+       },
+       { /* QPI Port 1 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1),
+       },
+       { /* QPI Port 2 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
+       },
+       { /* end: all zeroes */ }
+};
+
+static struct pci_driver bdx_uncore_pci_driver = {
+       .name           = "bdx_uncore",
+       .id_table       = bdx_uncore_pci_ids,
+};
+
+int bdx_uncore_pci_init(void)
+{
+       int ret = snbep_pci2phy_map_init(0x6f1e);
+
+       if (ret)
+               return ret;
+       uncore_pci_uncores = bdx_pci_uncores;
+       uncore_pci_driver = &bdx_uncore_pci_driver;
+       return 0;
+}
+
+/* end of BDX uncore support */
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
new file mode 100644 (file)
index 0000000..ec863b9
--- /dev/null
@@ -0,0 +1,241 @@
+#include <linux/perf_event.h>
+
+enum perf_msr_id {
+       PERF_MSR_TSC                    = 0,
+       PERF_MSR_APERF                  = 1,
+       PERF_MSR_MPERF                  = 2,
+       PERF_MSR_PPERF                  = 3,
+       PERF_MSR_SMI                    = 4,
+
+       PERF_MSR_EVENT_MAX,
+};
+
+static bool test_aperfmperf(int idx)
+{
+       return boot_cpu_has(X86_FEATURE_APERFMPERF);
+}
+
+static bool test_intel(int idx)
+{
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+           boot_cpu_data.x86 != 6)
+               return false;
+
+       switch (boot_cpu_data.x86_model) {
+       case 30: /* 45nm Nehalem    */
+       case 26: /* 45nm Nehalem-EP */
+       case 46: /* 45nm Nehalem-EX */
+
+       case 37: /* 32nm Westmere    */
+       case 44: /* 32nm Westmere-EP */
+       case 47: /* 32nm Westmere-EX */
+
+       case 42: /* 32nm SandyBridge         */
+       case 45: /* 32nm SandyBridge-E/EN/EP */
+
+       case 58: /* 22nm IvyBridge       */
+       case 62: /* 22nm IvyBridge-EP/EX */
+
+       case 60: /* 22nm Haswell Core */
+       case 63: /* 22nm Haswell Server */
+       case 69: /* 22nm Haswell ULT */
+       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+       case 61: /* 14nm Broadwell Core-M */
+       case 86: /* 14nm Broadwell Xeon D */
+       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+       case 79: /* 14nm Broadwell Server */
+
+       case 55: /* 22nm Atom "Silvermont"                */
+       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+       case 76: /* 14nm Atom "Airmont"                   */
+               if (idx == PERF_MSR_SMI)
+                       return true;
+               break;
+
+       case 78: /* 14nm Skylake Mobile */
+       case 94: /* 14nm Skylake Desktop */
+               if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
+                       return true;
+               break;
+       }
+
+       return false;
+}
+
+struct perf_msr {
+       u64     msr;
+       struct  perf_pmu_events_attr *attr;
+       bool    (*test)(int idx);
+};
+
+PMU_EVENT_ATTR_STRING(tsc,   evattr_tsc,   "event=0x00");
+PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
+PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
+PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
+PMU_EVENT_ATTR_STRING(smi,   evattr_smi,   "event=0x04");
+
+static struct perf_msr msr[] = {
+       [PERF_MSR_TSC]   = { 0,                 &evattr_tsc,    NULL,            },
+       [PERF_MSR_APERF] = { MSR_IA32_APERF,    &evattr_aperf,  test_aperfmperf, },
+       [PERF_MSR_MPERF] = { MSR_IA32_MPERF,    &evattr_mperf,  test_aperfmperf, },
+       [PERF_MSR_PPERF] = { MSR_PPERF,         &evattr_pperf,  test_intel,      },
+       [PERF_MSR_SMI]   = { MSR_SMI_COUNT,     &evattr_smi,    test_intel,      },
+};
+
+static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
+       NULL,
+};
+
+static struct attribute_group events_attr_group = {
+       .name = "events",
+       .attrs = events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+static struct attribute *format_attrs[] = {
+       &format_attr_event.attr,
+       NULL,
+};
+static struct attribute_group format_attr_group = {
+       .name = "format",
+       .attrs = format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+       &events_attr_group,
+       &format_attr_group,
+       NULL,
+};
+
+static int msr_event_init(struct perf_event *event)
+{
+       u64 cfg = event->attr.config;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       if (cfg >= PERF_MSR_EVENT_MAX)
+               return -EINVAL;
+
+       /* unsupported modes and filters */
+       if (event->attr.exclude_user   ||
+           event->attr.exclude_kernel ||
+           event->attr.exclude_hv     ||
+           event->attr.exclude_idle   ||
+           event->attr.exclude_host   ||
+           event->attr.exclude_guest  ||
+           event->attr.sample_period) /* no sampling */
+               return -EINVAL;
+
+       if (!msr[cfg].attr)
+               return -EINVAL;
+
+       event->hw.idx = -1;
+       event->hw.event_base = msr[cfg].msr;
+       event->hw.config = cfg;
+
+       return 0;
+}
+
+static inline u64 msr_read_counter(struct perf_event *event)
+{
+       u64 now;
+
+       if (event->hw.event_base)
+               rdmsrl(event->hw.event_base, now);
+       else
+               rdtscll(now);
+
+       return now;
+}
+static void msr_event_update(struct perf_event *event)
+{
+       u64 prev, now;
+       s64 delta;
+
+       /* Careful, an NMI might modify the previous event value. */
+again:
+       prev = local64_read(&event->hw.prev_count);
+       now = msr_read_counter(event);
+
+       if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
+               goto again;
+
+       delta = now - prev;
+       if (unlikely(event->hw.event_base == MSR_SMI_COUNT))
+               delta = sign_extend64(delta, 31);
+
+       local64_add(now - prev, &event->count);
+}
+
+static void msr_event_start(struct perf_event *event, int flags)
+{
+       u64 now;
+
+       now = msr_read_counter(event);
+       local64_set(&event->hw.prev_count, now);
+}
+
+static void msr_event_stop(struct perf_event *event, int flags)
+{
+       msr_event_update(event);
+}
+
+static void msr_event_del(struct perf_event *event, int flags)
+{
+       msr_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int msr_event_add(struct perf_event *event, int flags)
+{
+       if (flags & PERF_EF_START)
+               msr_event_start(event, flags);
+
+       return 0;
+}
+
+static struct pmu pmu_msr = {
+       .task_ctx_nr    = perf_sw_context,
+       .attr_groups    = attr_groups,
+       .event_init     = msr_event_init,
+       .add            = msr_event_add,
+       .del            = msr_event_del,
+       .start          = msr_event_start,
+       .stop           = msr_event_stop,
+       .read           = msr_event_update,
+       .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static int __init msr_init(void)
+{
+       int i, j = 0;
+
+       if (!boot_cpu_has(X86_FEATURE_TSC)) {
+               pr_cont("no MSR PMU driver.\n");
+               return 0;
+       }
+
+       /* Probe the MSRs. */
+       for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
+               u64 val;
+
+               /*
+                * Virt sucks arse; you cannot tell if a R/O MSR is present :/
+                */
+               if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
+                       msr[i].attr = NULL;
+       }
+
+       /* List remaining MSRs in the sysfs attrs. */
+       for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
+               if (msr[i].attr)
+                       events_attrs[j++] = &msr[i].attr->attr.attr;
+       }
+       events_attrs[j] = NULL;
+
+       perf_pmu_register(&pmu_msr, "msr", -1);
+
+       return 0;
+}
+device_initcall(msr_init);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
new file mode 100644 (file)
index 0000000..68155ca
--- /dev/null
@@ -0,0 +1,960 @@
+/*
+ * Performance events x86 architecture header
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
+ *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ *  Copyright (C) 2009 Google, Inc., Stephane Eranian
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+
+/* To enable MSR tracing please use the generic trace points. */
+
+/*
+ *          |   NHM/WSM    |      SNB     |
+ * register -------------------------------
+ *          |  HT  | no HT |  HT  | no HT |
+ *-----------------------------------------
+ * offcore  | core | core  | cpu  | core  |
+ * lbr_sel  | core | core  | cpu  | core  |
+ * ld_lat   | cpu  | core  | cpu  | core  |
+ *-----------------------------------------
+ *
+ * Given that there is a small number of shared regs,
+ * we can pre-allocate their slot in the per-cpu
+ * per-core reg tables.
+ */
+enum extra_reg_type {
+       EXTRA_REG_NONE  = -1,   /* not used */
+
+       EXTRA_REG_RSP_0 = 0,    /* offcore_response_0 */
+       EXTRA_REG_RSP_1 = 1,    /* offcore_response_1 */
+       EXTRA_REG_LBR   = 2,    /* lbr_select */
+       EXTRA_REG_LDLAT = 3,    /* ld_lat_threshold */
+       EXTRA_REG_FE    = 4,    /* fe_* */
+
+       EXTRA_REG_MAX           /* number of entries needed */
+};
+
+struct event_constraint {
+       union {
+               unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+               u64             idxmsk64;
+       };
+       u64     code;
+       u64     cmask;
+       int     weight;
+       int     overlap;
+       int     flags;
+};
+/*
+ * struct hw_perf_event.flags flags
+ */
+#define PERF_X86_EVENT_PEBS_LDLAT      0x0001 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST         0x0002 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW     0x0004 /* haswell style datala, store */
+#define PERF_X86_EVENT_COMMITTED       0x0008 /* event passed commit_txn */
+#define PERF_X86_EVENT_PEBS_LD_HSW     0x0010 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW     0x0020 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL            0x0040 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC         0x0080 /* dynamic alloc'd constraint */
+#define PERF_X86_EVENT_RDPMC_ALLOWED   0x0100 /* grant rdpmc permission */
+#define PERF_X86_EVENT_EXCL_ACCT       0x0200 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD     0x0400 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_FREERUNNING     0x0800 /* use freerunning PEBS */
+
+
+struct amd_nb {
+       int nb_id;  /* NorthBridge id */
+       int refcnt; /* reference count */
+       struct perf_event *owners[X86_PMC_IDX_MAX];
+       struct event_constraint event_constraints[X86_PMC_IDX_MAX];
+};
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS                8
+
+/*
+ * Flags PEBS can handle without an PMI.
+ *
+ * TID can only be handled by flushing at context switch.
+ *
+ */
+#define PEBS_FREERUNNING_FLAGS \
+       (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
+       PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
+       PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
+       PERF_SAMPLE_TRANSACTION)
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+       u64     bts_buffer_base;
+       u64     bts_index;
+       u64     bts_absolute_maximum;
+       u64     bts_interrupt_threshold;
+       u64     pebs_buffer_base;
+       u64     pebs_index;
+       u64     pebs_absolute_maximum;
+       u64     pebs_interrupt_threshold;
+       u64     pebs_event_reset[MAX_PEBS_EVENTS];
+};
+
+/*
+ * Per register state.
+ */
+struct er_account {
+       raw_spinlock_t          lock;   /* per-core: protect structure */
+       u64                 config;     /* extra MSR config */
+       u64                 reg;        /* extra MSR number */
+       atomic_t            ref;        /* reference count */
+};
+
+/*
+ * Per core/cpu state
+ *
+ * Used to coordinate shared registers between HT threads or
+ * among events on a single PMU.
+ */
+struct intel_shared_regs {
+       struct er_account       regs[EXTRA_REG_MAX];
+       int                     refcnt;         /* per-core: #HT threads */
+       unsigned                core_id;        /* per-core: core id */
+};
+
+enum intel_excl_state_type {
+       INTEL_EXCL_UNUSED    = 0, /* counter is unused */
+       INTEL_EXCL_SHARED    = 1, /* counter can be used by both threads */
+       INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
+};
+
+struct intel_excl_states {
+       enum intel_excl_state_type state[X86_PMC_IDX_MAX];
+       bool sched_started; /* true if scheduling has started */
+};
+
+struct intel_excl_cntrs {
+       raw_spinlock_t  lock;
+
+       struct intel_excl_states states[2];
+
+       union {
+               u16     has_exclusive[2];
+               u32     exclusive_present;
+       };
+
+       int             refcnt;         /* per-core: #HT threads */
+       unsigned        core_id;        /* per-core: core id */
+};
+
+#define MAX_LBR_ENTRIES                32
+
+enum {
+       X86_PERF_KFREE_SHARED = 0,
+       X86_PERF_KFREE_EXCL   = 1,
+       X86_PERF_KFREE_MAX
+};
+
+struct cpu_hw_events {
+       /*
+        * Generic x86 PMC bits
+        */
+       struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
+       unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+       unsigned long           running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+       int                     enabled;
+
+       int                     n_events; /* the # of events in the below arrays */
+       int                     n_added;  /* the # last events in the below arrays;
+                                            they've never been enabled yet */
+       int                     n_txn;    /* the # last events in the below arrays;
+                                            added in the current transaction */
+       int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+       u64                     tags[X86_PMC_IDX_MAX];
+
+       struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+       struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
+
+       int                     n_excl; /* the number of exclusive events */
+
+       unsigned int            txn_flags;
+       int                     is_fake;
+
+       /*
+        * Intel DebugStore bits
+        */
+       struct debug_store      *ds;
+       u64                     pebs_enabled;
+
+       /*
+        * Intel LBR bits
+        */
+       int                             lbr_users;
+       void                            *lbr_context;
+       struct perf_branch_stack        lbr_stack;
+       struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
+       struct er_account               *lbr_sel;
+       u64                             br_sel;
+
+       /*
+        * Intel host/guest exclude bits
+        */
+       u64                             intel_ctrl_guest_mask;
+       u64                             intel_ctrl_host_mask;
+       struct perf_guest_switch_msr    guest_switch_msrs[X86_PMC_IDX_MAX];
+
+       /*
+        * Intel checkpoint mask
+        */
+       u64                             intel_cp_status;
+
+       /*
+        * manage shared (per-core, per-cpu) registers
+        * used on Intel NHM/WSM/SNB
+        */
+       struct intel_shared_regs        *shared_regs;
+       /*
+        * manage exclusive counter access between hyperthread
+        */
+       struct event_constraint *constraint_list; /* in enable order */
+       struct intel_excl_cntrs         *excl_cntrs;
+       int excl_thread_id; /* 0 or 1 */
+
+       /*
+        * AMD specific bits
+        */
+       struct amd_nb                   *amd_nb;
+       /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
+       u64                             perf_ctr_virt_mask;
+
+       void                            *kfree_on_online[X86_PERF_KFREE_MAX];
+};
+
+#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
+       { .idxmsk64 = (n) },            \
+       .code = (c),                    \
+       .cmask = (m),                   \
+       .weight = (w),                  \
+       .overlap = (o),                 \
+       .flags = f,                     \
+}
+
+#define EVENT_CONSTRAINT(c, n, m)      \
+       __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
+
+#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
+       __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
+                          0, PERF_X86_EVENT_EXCL)
+
+/*
+ * The overlap flag marks event constraints with overlapping counter
+ * masks. This is the case if the counter mask of such an event is not
+ * a subset of any other counter mask of a constraint with an equal or
+ * higher weight, e.g.:
+ *
+ *  c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+ *  c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
+ *  c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
+ *
+ * The event scheduler may not select the correct counter in the first
+ * cycle because it needs to know which subsequent events will be
+ * scheduled. It may fail to schedule the events then. So we set the
+ * overlap flag for such constraints to give the scheduler a hint which
+ * events to select for counter rescheduling.
+ *
+ * Care must be taken as the rescheduling algorithm is O(n!) which
+ * will increase scheduling cycles for an over-commited system
+ * dramatically.  The number of such EVENT_CONSTRAINT_OVERLAP() macros
+ * and its counter masks must be kept at a minimum.
+ */
+#define EVENT_CONSTRAINT_OVERLAP(c, n, m)      \
+       __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
+
+/*
+ * Constraint on the Event code.
+ */
+#define INTEL_EVENT_CONSTRAINT(c, n)   \
+       EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
+
+/*
+ * Constraint on the Event code + UMask + fixed-mask
+ *
+ * filter mask to validate fixed counter events.
+ * the following filters disqualify for fixed counters:
+ *  - inv
+ *  - edge
+ *  - cnt-mask
+ *  - in_tx
+ *  - in_tx_checkpointed
+ *  The other filters are supported by fixed counters.
+ *  The any-thread option is supported starting with v3.
+ */
+#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
+#define FIXED_EVENT_CONSTRAINT(c, n)   \
+       EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
+
+/*
+ * Constraint on the Event code + UMask
+ */
+#define INTEL_UEVENT_CONSTRAINT(c, n)  \
+       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+
+/* Constraint on specific umask bit only + event */
+#define INTEL_UBIT_EVENT_CONSTRAINT(c, n)      \
+       EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))
+
+/* Like UEVENT_CONSTRAINT, but match flags too */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n)    \
+       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+#define INTEL_EXCLUEVT_CONSTRAINT(c, n)        \
+       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+                          HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
+
+#define INTEL_PLD_CONSTRAINT(c, n)     \
+       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                          HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
+
+#define INTEL_PST_CONSTRAINT(c, n)     \
+       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(code, n)    \
+       EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
+
+/* Check flags and event code, and set the HSW store flag */
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+
+/* Check flags and event code, and set the HSW load flag */
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, \
+                         PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+
+/* Check flags and event code/umask, and set the HSW store flag */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, \
+                         PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL)
+
+/* Check flags and event code/umask, and set the HSW load flag */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, \
+                         PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+
+/* Check flags and event code/umask, and set the HSW N/A flag */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
+
+
+/*
+ * We define the end marker as having a weight of -1
+ * to enable blacklisting of events using a counter bitmask
+ * of zero and thus a weight of zero.
+ * The end marker has a weight that cannot possibly be
+ * obtained from counting the bits in the bitmask.
+ */
+#define EVENT_CONSTRAINT_END { .weight = -1 }
+
+/*
+ * Check for end marker with weight == -1
+ */
+#define for_each_event_constraint(e, c)        \
+       for ((e) = (c); (e)->weight != -1; (e)++)
+
+/*
+ * Extra registers for specific events.
+ *
+ * Some events need large masks and require external MSRs.
+ * Those extra MSRs end up being shared for all events on
+ * a PMU and sometimes between PMU of sibling HT threads.
+ * In either case, the kernel needs to handle conflicting
+ * accesses to those extra, shared, regs. The data structure
+ * to manage those registers is stored in cpu_hw_event.
+ */
+struct extra_reg {
+       unsigned int            event;
+       unsigned int            msr;
+       u64                     config_mask;
+       u64                     valid_mask;
+       int                     idx;  /* per_xxx->regs[] reg index */
+       bool                    extra_msr_access;
+};
+
+#define EVENT_EXTRA_REG(e, ms, m, vm, i) {     \
+       .event = (e),                   \
+       .msr = (ms),                    \
+       .config_mask = (m),             \
+       .valid_mask = (vm),             \
+       .idx = EXTRA_REG_##i,           \
+       .extra_msr_access = true,       \
+       }
+
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)     \
+       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
+
+#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
+       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
+                       ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
+
+#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
+       INTEL_UEVENT_EXTRA_REG(c, \
+                              MSR_PEBS_LD_LAT_THRESHOLD, \
+                              0xffff, \
+                              LDLAT)
+
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
+
+union perf_capabilities {
+       struct {
+               u64     lbr_format:6;
+               u64     pebs_trap:1;
+               u64     pebs_arch_reg:1;
+               u64     pebs_format:4;
+               u64     smm_freeze:1;
+               /*
+                * PMU supports separate counter range for writing
+                * values > 32bit.
+                */
+               u64     full_width_write:1;
+       };
+       u64     capabilities;
+};
+
+struct x86_pmu_quirk {
+       struct x86_pmu_quirk *next;
+       void (*func)(void);
+};
+
+union x86_pmu_config {
+       struct {
+               u64 event:8,
+                   umask:8,
+                   usr:1,
+                   os:1,
+                   edge:1,
+                   pc:1,
+                   interrupt:1,
+                   __reserved1:1,
+                   en:1,
+                   inv:1,
+                   cmask:8,
+                   event2:4,
+                   __reserved2:4,
+                   go:1,
+                   ho:1;
+       } bits;
+       u64 value;
+};
+
+#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
+
+enum {
+       x86_lbr_exclusive_lbr,
+       x86_lbr_exclusive_bts,
+       x86_lbr_exclusive_pt,
+       x86_lbr_exclusive_max,
+};
+
+/*
+ * struct x86_pmu - generic x86 pmu
+ */
+struct x86_pmu {
+       /*
+        * Generic x86 PMC bits
+        */
+       const char      *name;
+       int             version;
+       int             (*handle_irq)(struct pt_regs *);
+       void            (*disable_all)(void);
+       void            (*enable_all)(int added);
+       void            (*enable)(struct perf_event *);
+       void            (*disable)(struct perf_event *);
+       int             (*hw_config)(struct perf_event *event);
+       int             (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
+       unsigned        eventsel;
+       unsigned        perfctr;
+       int             (*addr_offset)(int index, bool eventsel);
+       int             (*rdpmc_index)(int index);
+       u64             (*event_map)(int);
+       int             max_events;
+       int             num_counters;
+       int             num_counters_fixed;
+       int             cntval_bits;
+       u64             cntval_mask;
+       union {
+                       unsigned long events_maskl;
+                       unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
+       };
+       int             events_mask_len;
+       int             apic;
+       u64             max_period;
+       struct event_constraint *
+                       (*get_event_constraints)(struct cpu_hw_events *cpuc,
+                                                int idx,
+                                                struct perf_event *event);
+
+       void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
+                                                struct perf_event *event);
+
+       void            (*start_scheduling)(struct cpu_hw_events *cpuc);
+
+       void            (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
+
+       void            (*stop_scheduling)(struct cpu_hw_events *cpuc);
+
+       struct event_constraint *event_constraints;
+       struct x86_pmu_quirk *quirks;
+       int             perfctr_second_write;
+       bool            late_ack;
+       unsigned        (*limit_period)(struct perf_event *event, unsigned l);
+
+       /*
+        * sysfs attrs
+        */
+       int             attr_rdpmc_broken;
+       int             attr_rdpmc;
+       struct attribute **format_attrs;
+       struct attribute **event_attrs;
+
+       ssize_t         (*events_sysfs_show)(char *page, u64 config);
+       struct attribute **cpu_events;
+
+       /*
+        * CPU Hotplug hooks
+        */
+       int             (*cpu_prepare)(int cpu);
+       void            (*cpu_starting)(int cpu);
+       void            (*cpu_dying)(int cpu);
+       void            (*cpu_dead)(int cpu);
+
+       void            (*check_microcode)(void);
+       void            (*sched_task)(struct perf_event_context *ctx,
+                                     bool sched_in);
+
+       /*
+        * Intel Arch Perfmon v2+
+        */
+       u64                     intel_ctrl;
+       union perf_capabilities intel_cap;
+
+       /*
+        * Intel DebugStore bits
+        */
+       unsigned int    bts             :1,
+                       bts_active      :1,
+                       pebs            :1,
+                       pebs_active     :1,
+                       pebs_broken     :1,
+                       pebs_prec_dist  :1;
+       int             pebs_record_size;
+       int             pebs_buffer_size;
+       void            (*drain_pebs)(struct pt_regs *regs);
+       struct event_constraint *pebs_constraints;
+       void            (*pebs_aliases)(struct perf_event *event);
+       int             max_pebs_events;
+       unsigned long   free_running_flags;
+
+       /*
+        * Intel LBR
+        */
+       unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
+       int             lbr_nr;                    /* hardware stack size */
+       u64             lbr_sel_mask;              /* LBR_SELECT valid bits */
+       const int       *lbr_sel_map;              /* lbr_select mappings */
+       bool            lbr_double_abort;          /* duplicated lbr aborts */
+
+       /*
+        * Intel PT/LBR/BTS are exclusive
+        */
+       atomic_t        lbr_exclusive[x86_lbr_exclusive_max];
+
+       /*
+        * Extra registers for events
+        */
+       struct extra_reg *extra_regs;
+       unsigned int flags;
+
+       /*
+        * Intel host/guest support (KVM)
+        */
+       struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
+};
+
+struct x86_perf_task_context {
+       u64 lbr_from[MAX_LBR_ENTRIES];
+       u64 lbr_to[MAX_LBR_ENTRIES];
+       u64 lbr_info[MAX_LBR_ENTRIES];
+       int tos;
+       int lbr_callstack_users;
+       int lbr_stack_state;
+};
+
+#define x86_add_quirk(func_)                                           \
+do {                                                                   \
+       static struct x86_pmu_quirk __quirk __initdata = {              \
+               .func = func_,                                          \
+       };                                                              \
+       __quirk.next = x86_pmu.quirks;                                  \
+       x86_pmu.quirks = &__quirk;                                      \
+} while (0)
+
+/*
+ * x86_pmu flags
+ */
+#define PMU_FL_NO_HT_SHARING   0x1 /* no hyper-threading resource sharing */
+#define PMU_FL_HAS_RSP_1       0x2 /* has 2 equivalent offcore_rsp regs   */
+#define PMU_FL_EXCL_CNTRS      0x4 /* has exclusive counter requirements  */
+#define PMU_FL_EXCL_ENABLED    0x8 /* exclusive counter active */
+
+#define EVENT_VAR(_id)  event_attr_##_id
+#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
+
+#define EVENT_ATTR(_name, _id)                                         \
+static struct perf_pmu_events_attr EVENT_VAR(_id) = {                  \
+       .attr           = __ATTR(_name, 0444, events_sysfs_show, NULL), \
+       .id             = PERF_COUNT_HW_##_id,                          \
+       .event_str      = NULL,                                         \
+};
+
+#define EVENT_ATTR_STR(_name, v, str)                                  \
+static struct perf_pmu_events_attr event_attr_##v = {                  \
+       .attr           = __ATTR(_name, 0444, events_sysfs_show, NULL), \
+       .id             = 0,                                            \
+       .event_str      = str,                                          \
+};
+
+extern struct x86_pmu x86_pmu __read_mostly;
+
+static inline bool x86_pmu_has_lbr_callstack(void)
+{
+       return  x86_pmu.lbr_sel_map &&
+               x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
+}
+
+DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+int x86_perf_event_set_period(struct perf_event *event);
+
+/*
+ * Generalized hw caching related hw_event table, filled
+ * in on a per model basis. A value of 0 means
+ * 'not supported', -1 means 'hw_event makes no sense on
+ * this CPU', any other value means the raw hw_event
+ * ID.
+ */
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+extern u64 __read_mostly hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+extern u64 __read_mostly hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+u64 x86_perf_event_update(struct perf_event *event);
+
+static inline unsigned int x86_pmu_config_addr(int index)
+{
+       return x86_pmu.eventsel + (x86_pmu.addr_offset ?
+                                  x86_pmu.addr_offset(index, true) : index);
+}
+
+static inline unsigned int x86_pmu_event_addr(int index)
+{
+       return x86_pmu.perfctr + (x86_pmu.addr_offset ?
+                                 x86_pmu.addr_offset(index, false) : index);
+}
+
+static inline int x86_pmu_rdpmc_index(int index)
+{
+       return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
+}
+
+int x86_add_exclusive(unsigned int what);
+
+void x86_del_exclusive(unsigned int what);
+
+int x86_reserve_hardware(void);
+
+void x86_release_hardware(void);
+
+void hw_perf_lbr_event_destroy(struct perf_event *event);
+
+int x86_setup_perfctr(struct perf_event *event);
+
+int x86_pmu_hw_config(struct perf_event *event);
+
+void x86_pmu_disable_all(void);
+
+static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
+                                         u64 enable_mask)
+{
+       u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
+
+       if (hwc->extra_reg.reg)
+               wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
+       wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
+}
+
+void x86_pmu_enable_all(int added);
+
+int perf_assign_events(struct event_constraint **constraints, int n,
+                       int wmin, int wmax, int gpmax, int *assign);
+int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
+
+void x86_pmu_stop(struct perf_event *event, int flags);
+
+static inline void x86_pmu_disable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       wrmsrl(hwc->config_base, hwc->config);
+}
+
+void x86_pmu_enable_event(struct perf_event *event);
+
+int x86_pmu_handle_irq(struct pt_regs *regs);
+
+extern struct event_constraint emptyconstraint;
+
+extern struct event_constraint unconstrained;
+
+static inline bool kernel_ip(unsigned long ip)
+{
+#ifdef CONFIG_X86_32
+       return ip > PAGE_OFFSET;
+#else
+       return (long)ip < 0;
+#endif
+}
+
+/*
+ * Not all PMUs provide the right context information to place the reported IP
+ * into full context. Specifically segment registers are typically not
+ * supplied.
+ *
+ * Assuming the address is a linear address (it is for IBS), we fake the CS and
+ * vm86 mode using the known zero-based code segment and 'fix up' the registers
+ * to reflect this.
+ *
+ * Intel PEBS/LBR appear to typically provide the effective address, nothing
+ * much we can do about that but pray and treat it like a linear address.
+ */
+static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
+{
+       regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
+       if (regs->flags & X86_VM_MASK)
+               regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
+       regs->ip = ip;
+}
+
+ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
+ssize_t intel_event_sysfs_show(char *page, u64 config);
+
+struct attribute **merge_attr(struct attribute **a, struct attribute **b);
+
+#ifdef CONFIG_CPU_SUP_AMD
+
+int amd_pmu_init(void);
+
+#else /* CONFIG_CPU_SUP_AMD */
+
+static inline int amd_pmu_init(void)
+{
+       return 0;
+}
+
+#endif /* CONFIG_CPU_SUP_AMD */
+
+#ifdef CONFIG_CPU_SUP_INTEL
+
+static inline bool intel_pmu_has_bts(struct perf_event *event)
+{
+       if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+           !event->attr.freq && event->hw.sample_period == 1)
+               return true;
+
+       return false;
+}
+
+int intel_pmu_save_and_restart(struct perf_event *event);
+
+struct event_constraint *
+x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                         struct perf_event *event);
+
+struct intel_shared_regs *allocate_shared_regs(int cpu);
+
+int intel_pmu_init(void);
+
+void init_debug_store_on_cpu(int cpu);
+
+void fini_debug_store_on_cpu(int cpu);
+
+void release_ds_buffers(void);
+
+void reserve_ds_buffers(void);
+
+extern struct event_constraint bts_constraint;
+
+void intel_pmu_enable_bts(u64 config);
+
+void intel_pmu_disable_bts(void);
+
+int intel_pmu_drain_bts_buffer(void);
+
+extern struct event_constraint intel_core2_pebs_event_constraints[];
+
+extern struct event_constraint intel_atom_pebs_event_constraints[];
+
+extern struct event_constraint intel_slm_pebs_event_constraints[];
+
+extern struct event_constraint intel_nehalem_pebs_event_constraints[];
+
+extern struct event_constraint intel_westmere_pebs_event_constraints[];
+
+extern struct event_constraint intel_snb_pebs_event_constraints[];
+
+extern struct event_constraint intel_ivb_pebs_event_constraints[];
+
+extern struct event_constraint intel_hsw_pebs_event_constraints[];
+
+extern struct event_constraint intel_bdw_pebs_event_constraints[];
+
+extern struct event_constraint intel_skl_pebs_event_constraints[];
+
+struct event_constraint *intel_pebs_constraints(struct perf_event *event);
+
+void intel_pmu_pebs_enable(struct perf_event *event);
+
+void intel_pmu_pebs_disable(struct perf_event *event);
+
+void intel_pmu_pebs_enable_all(void);
+
+void intel_pmu_pebs_disable_all(void);
+
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
+
+void intel_ds_init(void);
+
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+
+void intel_pmu_lbr_reset(void);
+
+void intel_pmu_lbr_enable(struct perf_event *event);
+
+void intel_pmu_lbr_disable(struct perf_event *event);
+
+void intel_pmu_lbr_enable_all(bool pmi);
+
+void intel_pmu_lbr_disable_all(void);
+
+void intel_pmu_lbr_read(void);
+
+void intel_pmu_lbr_init_core(void);
+
+void intel_pmu_lbr_init_nhm(void);
+
+void intel_pmu_lbr_init_atom(void);
+
+void intel_pmu_lbr_init_snb(void);
+
+void intel_pmu_lbr_init_hsw(void);
+
+void intel_pmu_lbr_init_skl(void);
+
+void intel_pmu_lbr_init_knl(void);
+
+void intel_pmu_pebs_data_source_nhm(void);
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event);
+
+void intel_pt_interrupt(void);
+
+int intel_bts_interrupt(void);
+
+void intel_bts_enable_local(void);
+
+void intel_bts_disable_local(void);
+
+int p4_pmu_init(void);
+
+int p6_pmu_init(void);
+
+int knc_pmu_init(void);
+
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+                         char *page);
+
+static inline int is_ht_workaround_enabled(void)
+{
+       return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
+}
+
+#else /* CONFIG_CPU_SUP_INTEL */
+
+static inline void reserve_ds_buffers(void)
+{
+}
+
+static inline void release_ds_buffers(void)
+{
+}
+
+static inline int intel_pmu_init(void)
+{
+       return 0;
+}
+
+static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
+{
+       return NULL;
+}
+
+static inline int is_ht_workaround_enabled(void)
+{
+       return 0;
+}
+#endif /* CONFIG_CPU_SUP_INTEL */
index 1514753fd43553e079696712b48a8d08b6966e98..15340e36ddcb3364e16eb63cd61c61a42676d756 100644 (file)
@@ -256,7 +256,7 @@ extern int force_personality32;
    instruction set this CPU supports.  This could be done in user space,
    but it's not easy, and we've already done it here.  */
 
-#define ELF_HWCAP              (boot_cpu_data.x86_capability[0])
+#define ELF_HWCAP              (boot_cpu_data.x86_capability[CPUID_1_EDX])
 
 /* This yields a string that ld.so will use to load implementation
    specific libraries for optimization.  This is more specific in
index 7bcb861a04e5cf5b32c2c26919756fa29353cb07..5a2ed3ed2f261893d5de08022ea3259198c8c0fe 100644 (file)
@@ -165,6 +165,7 @@ struct x86_pmu_capability {
 #define GLOBAL_STATUS_ASIF                             BIT_ULL(60)
 #define GLOBAL_STATUS_COUNTERS_FROZEN                  BIT_ULL(59)
 #define GLOBAL_STATUS_LBRS_FROZEN                      BIT_ULL(58)
+#define GLOBAL_STATUS_TRACE_TOPAPMI                    BIT_ULL(55)
 
 /*
  * IBS cpuid feature detection
index 20c11d1aa4ccce11b0709c3fe56092a5505151cf..813384ef811a03273678f693ca60be23d8bfd216 100644 (file)
@@ -129,6 +129,8 @@ struct cpuinfo_x86 {
        u16                     booted_cores;
        /* Physical processor id: */
        u16                     phys_proc_id;
+       /* Logical processor id: */
+       u16                     logical_proc_id;
        /* Core id: */
        u16                     cpu_core_id;
        /* Compute unit id */
index 0fb46482dfde160b9dcfad6ef57841e07c3830e2..7f991bd5031b24947e0773265023ab70b934a7b8 100644 (file)
@@ -119,12 +119,23 @@ static inline void setup_node_to_cpumask_map(void) { }
 
 extern const struct cpumask *cpu_coregroup_mask(int cpu);
 
+#define topology_logical_package_id(cpu)       (cpu_data(cpu).logical_proc_id)
 #define topology_physical_package_id(cpu)      (cpu_data(cpu).phys_proc_id)
 #define topology_core_id(cpu)                  (cpu_data(cpu).cpu_core_id)
 
 #ifdef ENABLE_TOPO_DEFINES
 #define topology_core_cpumask(cpu)             (per_cpu(cpu_core_map, cpu))
 #define topology_sibling_cpumask(cpu)          (per_cpu(cpu_sibling_map, cpu))
+
+extern unsigned int __max_logical_packages;
+#define topology_max_packages()                        (__max_logical_packages)
+int topology_update_package_map(unsigned int apicid, unsigned int cpu);
+extern int topology_phys_to_logical_pkg(unsigned int pkg);
+#else
+#define topology_max_packages()                        (1)
+static inline int
+topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
+static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
index 8a5cddac7d444084c223e95948944b5099d15003..531b9611c51d5d4b42f721f45609a5f4ecd3cce2 100644 (file)
@@ -2077,6 +2077,20 @@ int generic_processor_info(int apicid, int version)
        } else
                cpu = cpumask_next_zero(-1, cpu_present_mask);
 
+       /*
+        * This can happen on physical hotplug. The sanity check at boot time
+        * is done from native_smp_prepare_cpus() after num_possible_cpus() is
+        * established.
+        */
+       if (topology_update_package_map(apicid, cpu) < 0) {
+               int thiscpu = max + disabled_cpus;
+
+               pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
+                          thiscpu, apicid);
+               disabled_cpus++;
+               return -ENOSPC;
+       }
+
        /*
         * Validate version
         */
index 58031303e30488c540d609f95d0693918c09fa64..7a60424d63fa4853c37941725bfaa73372c81175 100644 (file)
@@ -30,33 +30,11 @@ obj-$(CONFIG_CPU_SUP_CENTAUR)               += centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)     += transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)           += umc.o
 
-obj-$(CONFIG_PERF_EVENTS)              += perf_event.o
-
-ifdef CONFIG_PERF_EVENTS
-obj-$(CONFIG_CPU_SUP_AMD)              += perf_event_amd.o perf_event_amd_uncore.o
-ifdef CONFIG_AMD_IOMMU
-obj-$(CONFIG_CPU_SUP_AMD)              += perf_event_amd_iommu.o
-endif
-obj-$(CONFIG_CPU_SUP_INTEL)            += perf_event_p6.o perf_event_knc.o perf_event_p4.o
-obj-$(CONFIG_CPU_SUP_INTEL)            += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
-obj-$(CONFIG_CPU_SUP_INTEL)            += perf_event_intel_rapl.o perf_event_intel_cqm.o
-obj-$(CONFIG_CPU_SUP_INTEL)            += perf_event_intel_pt.o perf_event_intel_bts.o
-obj-$(CONFIG_CPU_SUP_INTEL)            += perf_event_intel_cstate.o
-
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
-                                          perf_event_intel_uncore_snb.o \
-                                          perf_event_intel_uncore_snbep.o \
-                                          perf_event_intel_uncore_nhmex.o
-obj-$(CONFIG_CPU_SUP_INTEL)            += perf_event_msr.o
-obj-$(CONFIG_CPU_SUP_AMD)              += perf_event_msr.o
-endif
-
-
 obj-$(CONFIG_X86_MCE)                  += mcheck/
 obj-$(CONFIG_MTRR)                     += mtrr/
 obj-$(CONFIG_MICROCODE)                        += microcode/
 
-obj-$(CONFIG_X86_LOCAL_APIC)           += perfctr-watchdog.o perf_event_amd_ibs.o
+obj-$(CONFIG_X86_LOCAL_APIC)           += perfctr-watchdog.o
 
 obj-$(CONFIG_HYPERVISOR_GUEST)         += vmware.o hypervisor.o mshyperv.o
 
index a07956a08936e8ea56c1a73075d700d281895577..97c59fd60702f4ceacad7c4d46a3c8d5de7dacbf 100644 (file)
@@ -117,7 +117,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                void (*f_vide)(void);
                u64 d, d2;
 
-               printk(KERN_INFO "AMD K6 stepping B detected - ");
+               pr_info("AMD K6 stepping B detected - ");
 
                /*
                 * It looks like AMD fixed the 2.6.2 bug and improved indirect
@@ -133,10 +133,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                d = d2-d;
 
                if (d > 20*K6_BUG_LOOP)
-                       printk(KERN_CONT
-                               "system stability may be impaired when more than 32 MB are used.\n");
+                       pr_cont("system stability may be impaired when more than 32 MB are used.\n");
                else
-                       printk(KERN_CONT "probably OK (after B9730xxxx).\n");
+                       pr_cont("probably OK (after B9730xxxx).\n");
        }
 
        /* K6 with old style WHCR */
@@ -154,7 +153,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                        wbinvd();
                        wrmsr(MSR_K6_WHCR, l, h);
                        local_irq_restore(flags);
-                       printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
+                       pr_info("Enabling old style K6 write allocation for %d Mb\n",
                                mbytes);
                }
                return;
@@ -175,7 +174,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                        wbinvd();
                        wrmsr(MSR_K6_WHCR, l, h);
                        local_irq_restore(flags);
-                       printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
+                       pr_info("Enabling new style K6 write allocation for %d Mb\n",
                                mbytes);
                }
 
@@ -202,7 +201,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
         */
        if (c->x86_model >= 6 && c->x86_model <= 10) {
                if (!cpu_has(c, X86_FEATURE_XMM)) {
-                       printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+                       pr_info("Enabling disabled K7/SSE Support.\n");
                        msr_clear_bit(MSR_K7_HWCR, 15);
                        set_cpu_cap(c, X86_FEATURE_XMM);
                }
@@ -216,9 +215,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
        if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
                rdmsr(MSR_K7_CLK_CTL, l, h);
                if ((l & 0xfff00000) != 0x20000000) {
-                       printk(KERN_INFO
-                           "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
-                                       l, ((l & 0x000fffff)|0x20000000));
+                       pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
+                               l, ((l & 0x000fffff)|0x20000000));
                        wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
                }
        }
@@ -485,7 +483,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
                if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
                        unsigned long pfn = tseg >> PAGE_SHIFT;
 
-                       printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+                       pr_debug("tseg: %010llx\n", tseg);
                        if (pfn_range_is_mapped(pfn, pfn + 1))
                                set_memory_4k((unsigned long)__va(tseg), 1);
                }
@@ -500,8 +498,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
                        rdmsrl(MSR_K7_HWCR, val);
                        if (!(val & BIT(24)))
-                               printk(KERN_WARNING FW_BUG "TSC doesn't count "
-                                       "with P0 frequency!\n");
+                               pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n");
                }
        }
 
index 04f0fe5af83ec34bb4fd6ec09fd3cc8db1c7ee07..a972ac4c7e7df05238e78b08e093cded47640fcd 100644 (file)
@@ -15,7 +15,7 @@ void __init check_bugs(void)
 {
        identify_boot_cpu();
 #if !defined(CONFIG_SMP)
-       printk(KERN_INFO "CPU: ");
+       pr_info("CPU: ");
        print_cpu_info(&boot_cpu_data);
 #endif
        alternative_instructions();
index ae20be6e483c77703413cf36e9db065134da01f3..ce197bb7c1294f0f1138c78d1514c78e0d92b910 100644 (file)
@@ -29,7 +29,7 @@ static void init_c3(struct cpuinfo_x86 *c)
                        rdmsr(MSR_VIA_FCR, lo, hi);
                        lo |= ACE_FCR;          /* enable ACE unit */
                        wrmsr(MSR_VIA_FCR, lo, hi);
-                       printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
+                       pr_info("CPU: Enabled ACE h/w crypto\n");
                }
 
                /* enable RNG unit, if present and disabled */
@@ -37,7 +37,7 @@ static void init_c3(struct cpuinfo_x86 *c)
                        rdmsr(MSR_VIA_RNG, lo, hi);
                        lo |= RNG_ENABLE;       /* enable RNG unit */
                        wrmsr(MSR_VIA_RNG, lo, hi);
-                       printk(KERN_INFO "CPU: Enabled h/w RNG\n");
+                       pr_info("CPU: Enabled h/w RNG\n");
                }
 
                /* store Centaur Extended Feature Flags as
@@ -130,7 +130,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
                        name = "C6";
                        fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
                        fcr_clr = DPDC;
-                       printk(KERN_NOTICE "Disabling bugged TSC.\n");
+                       pr_notice("Disabling bugged TSC.\n");
                        clear_cpu_cap(c, X86_FEATURE_TSC);
                        break;
                case 8:
@@ -163,11 +163,11 @@ static void init_centaur(struct cpuinfo_x86 *c)
                newlo = (lo|fcr_set) & (~fcr_clr);
 
                if (newlo != lo) {
-                       printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n",
+                       pr_info("Centaur FCR was 0x%X now 0x%X\n",
                                lo, newlo);
                        wrmsr(MSR_IDT_FCR1, newlo, hi);
                } else {
-                       printk(KERN_INFO "Centaur FCR is 0x%X\n", lo);
+                       pr_info("Centaur FCR is 0x%X\n", lo);
                }
                /* Emulate MTRRs using Centaur's MCR. */
                set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
index 37830de8f60a8f0d8f8da27d409da72ce1f13551..81cf716f6f97f2438b386c7e3eb3ca073e16d0c8 100644 (file)
@@ -228,7 +228,7 @@ static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
        lo |= 0x200000;
        wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
 
-       printk(KERN_NOTICE "CPU serial number disabled.\n");
+       pr_notice("CPU serial number disabled.\n");
        clear_cpu_cap(c, X86_FEATURE_PN);
 
        /* Disabling the serial number may affect the cpuid level */
@@ -329,9 +329,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
                if (!warn)
                        continue;
 
-               printk(KERN_WARNING
-                      "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
-                               x86_cap_flag(df->feature), df->level);
+               pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
+                       x86_cap_flag(df->feature), df->level);
        }
 }
 
@@ -510,7 +509,7 @@ void detect_ht(struct cpuinfo_x86 *c)
        smp_num_siblings = (ebx & 0xff0000) >> 16;
 
        if (smp_num_siblings == 1) {
-               printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
+               pr_info_once("CPU0: Hyper-Threading is disabled\n");
                goto out;
        }
 
@@ -531,10 +530,10 @@ void detect_ht(struct cpuinfo_x86 *c)
 
 out:
        if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
-               printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
-                      c->phys_proc_id);
-               printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
-                      c->cpu_core_id);
+               pr_info("CPU: Physical Processor ID: %d\n",
+                       c->phys_proc_id);
+               pr_info("CPU: Processor Core ID: %d\n",
+                       c->cpu_core_id);
                printed = 1;
        }
 #endif
@@ -559,9 +558,8 @@ static void get_cpu_vendor(struct cpuinfo_x86 *c)
                }
        }
 
-       printk_once(KERN_ERR
-                       "CPU: vendor_id '%s' unknown, using generic init.\n" \
-                       "CPU: Your system may be unstable.\n", v);
+       pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \
+                   "CPU: Your system may be unstable.\n", v);
 
        c->x86_vendor = X86_VENDOR_UNKNOWN;
        this_cpu = &default_cpu;
@@ -760,7 +758,7 @@ void __init early_cpu_init(void)
        int count = 0;
 
 #ifdef CONFIG_PROCESSOR_SELECT
-       printk(KERN_INFO "KERNEL supported cpus:\n");
+       pr_info("KERNEL supported cpus:\n");
 #endif
 
        for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
@@ -778,7 +776,7 @@ void __init early_cpu_init(void)
                        for (j = 0; j < 2; j++) {
                                if (!cpudev->c_ident[j])
                                        continue;
-                               printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
+                               pr_info("  %s %s\n", cpudev->c_vendor,
                                        cpudev->c_ident[j]);
                        }
                }
@@ -977,6 +975,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 #ifdef CONFIG_NUMA
        numa_add_cpu(smp_processor_id());
 #endif
+       /* The boot/hotplug time assigment got cleared, restore it */
+       c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id);
 }
 
 /*
@@ -1061,7 +1061,7 @@ static void __print_cpu_msr(void)
                for (index = index_min; index < index_max; index++) {
                        if (rdmsrl_safe(index, &val))
                                continue;
-                       printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
+                       pr_info(" MSR%08x: %016llx\n", index, val);
                }
        }
 }
@@ -1100,19 +1100,19 @@ void print_cpu_info(struct cpuinfo_x86 *c)
        }
 
        if (vendor && !strstr(c->x86_model_id, vendor))
-               printk(KERN_CONT "%s ", vendor);
+               pr_cont("%s ", vendor);
 
        if (c->x86_model_id[0])
-               printk(KERN_CONT "%s", c->x86_model_id);
+               pr_cont("%s", c->x86_model_id);
        else
-               printk(KERN_CONT "%d86", c->x86);
+               pr_cont("%d86", c->x86);
 
-       printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
+       pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
 
        if (c->x86_mask || c->cpuid_level >= 0)
-               printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
+               pr_cont(", stepping: 0x%x)\n", c->x86_mask);
        else
-               printk(KERN_CONT ")\n");
+               pr_cont(")\n");
 
        print_cpu_msr(c);
 }
@@ -1438,7 +1438,7 @@ void cpu_init(void)
 
        show_ucode_info_early();
 
-       printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+       pr_info("Initializing CPU#%d\n", cpu);
 
        if (cpu_feature_enabled(X86_FEATURE_VME) ||
            cpu_has_tsc ||
index aaf152e79637384781d264afbff76a58da9489cc..187bb583d0dfbccd6bf3e8c58a4973a0bff9ddd7 100644 (file)
@@ -103,7 +103,7 @@ static void check_cx686_slop(struct cpuinfo_x86 *c)
                local_irq_restore(flags);
 
                if (ccr5 & 2) { /* possible wrong calibration done */
-                       printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
+                       pr_info("Recalibrating delay loop with SLOP bit reset\n");
                        calibrate_delay();
                        c->loops_per_jiffy = loops_per_jiffy;
                }
@@ -115,7 +115,7 @@ static void set_cx86_reorder(void)
 {
        u8 ccr3;
 
-       printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
+       pr_info("Enable Memory access reorder on Cyrix/NSC processor.\n");
        ccr3 = getCx86(CX86_CCR3);
        setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
 
@@ -128,7 +128,7 @@ static void set_cx86_reorder(void)
 
 static void set_cx86_memwb(void)
 {
-       printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
+       pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
 
        /* CCR2 bit 2: unlock NW bit */
        setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
@@ -268,7 +268,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
                 *  VSA1 we work around however.
                 */
 
-               printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
+               pr_info("Working around Cyrix MediaGX virtual DMA bugs.\n");
                isa_dma_bridge_buggy = 2;
 
                /* We do this before the PCI layer is running. However we
@@ -426,7 +426,7 @@ static void cyrix_identify(struct cpuinfo_x86 *c)
                if (dir0 == 5 || dir0 == 3) {
                        unsigned char ccr3;
                        unsigned long flags;
-                       printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
+                       pr_info("Enabling CPUID on Cyrix processor.\n");
                        local_irq_save(flags);
                        ccr3 = getCx86(CX86_CCR3);
                        /* enable MAPEN  */
index d820d8eae96be0b3daa0ec01d9f4a22bf1ad930e..73d391ae452f82a7bfeca4be2276eed628dcd452 100644 (file)
@@ -56,7 +56,7 @@ detect_hypervisor_vendor(void)
        }
 
        if (max_pri)
-               printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name);
+               pr_info("Hypervisor detected: %s\n", x86_hyper->name);
 }
 
 void init_hypervisor(struct cpuinfo_x86 *c)
index 565648bc1a0aef6c3cf60da92ec9fb60a2408c90..38766c2b5b003984bcc63e459e7cfb820c907473 100644 (file)
@@ -61,7 +61,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
         */
        if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
            c->microcode < 0x20e) {
-               printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
+               pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
                clear_cpu_cap(c, X86_FEATURE_PSE);
        }
 
@@ -140,7 +140,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
        if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
                rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
                if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
-                       printk(KERN_INFO "Disabled fast string operations\n");
+                       pr_info("Disabled fast string operations\n");
                        setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
                        setup_clear_cpu_cap(X86_FEATURE_ERMS);
                }
@@ -160,6 +160,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
                pr_info("Disabling PGE capability bit\n");
                setup_clear_cpu_cap(X86_FEATURE_PGE);
        }
+
+       if (c->cpuid_level >= 0x00000001) {
+               u32 eax, ebx, ecx, edx;
+
+               cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+               /*
+                * If HTT (EDX[28]) is set EBX[16:23] contain the number of
+                * apicids which are reserved per package. Store the resulting
+                * shift value for the package management code.
+                */
+               if (edx & (1U << 28))
+                       c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
+       }
 }
 
 #ifdef CONFIG_X86_32
@@ -176,7 +189,7 @@ int ppro_with_ram_bug(void)
            boot_cpu_data.x86 == 6 &&
            boot_cpu_data.x86_model == 1 &&
            boot_cpu_data.x86_mask < 8) {
-               printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
+               pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
                return 1;
        }
        return 0;
@@ -225,7 +238,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
 
                set_cpu_bug(c, X86_BUG_F00F);
                if (!f00f_workaround_enabled) {
-                       printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+                       pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n");
                        f00f_workaround_enabled = 1;
                }
        }
@@ -244,7 +257,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
         * Forcefully enable PAE if kernel parameter "forcepae" is present.
         */
        if (forcepae) {
-               printk(KERN_WARNING "PAE forced!\n");
+               pr_warn("PAE forced!\n");
                set_cpu_cap(c, X86_FEATURE_PAE);
                add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
        }
index 0b6c52388cf484f809a7f328f475424dd262417c..6ed779efff2662bbeaa5f063a7d1c8ff2fd16e2f 100644 (file)
@@ -444,7 +444,7 @@ static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
        err = amd_set_l3_disable_slot(nb, cpu, slot, val);
        if (err) {
                if (err == -EEXIST)
-                       pr_warning("L3 slot %d in use/index already disabled!\n",
+                       pr_warn("L3 slot %d in use/index already disabled!\n",
                                   slot);
                return err;
        }
diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/kernel/cpu/intel_pt.h
deleted file mode 100644 (file)
index 336878a..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Intel(R) Processor Trace PMU driver for perf
- * Copyright (c) 2013-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * Intel PT is specified in the Intel Architecture Instruction Set Extensions
- * Programming Reference:
- * http://software.intel.com/en-us/intel-isa-extensions
- */
-
-#ifndef __INTEL_PT_H__
-#define __INTEL_PT_H__
-
-/*
- * Single-entry ToPA: when this close to region boundary, switch
- * buffers to avoid losing data.
- */
-#define TOPA_PMI_MARGIN 512
-
-#define TOPA_SHIFT 12
-
-static inline unsigned int sizes(unsigned int tsz)
-{
-       return 1 << (tsz + TOPA_SHIFT);
-};
-
-struct topa_entry {
-       u64     end     : 1;
-       u64     rsvd0   : 1;
-       u64     intr    : 1;
-       u64     rsvd1   : 1;
-       u64     stop    : 1;
-       u64     rsvd2   : 1;
-       u64     size    : 4;
-       u64     rsvd3   : 2;
-       u64     base    : 36;
-       u64     rsvd4   : 16;
-};
-
-#define PT_CPUID_LEAVES                2
-#define PT_CPUID_REGS_NUM      4 /* number of regsters (eax, ebx, ecx, edx) */
-
-enum pt_capabilities {
-       PT_CAP_max_subleaf = 0,
-       PT_CAP_cr3_filtering,
-       PT_CAP_psb_cyc,
-       PT_CAP_mtc,
-       PT_CAP_topa_output,
-       PT_CAP_topa_multiple_entries,
-       PT_CAP_single_range_output,
-       PT_CAP_payloads_lip,
-       PT_CAP_mtc_periods,
-       PT_CAP_cycle_thresholds,
-       PT_CAP_psb_periods,
-};
-
-struct pt_pmu {
-       struct pmu              pmu;
-       u32                     caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
-};
-
-/**
- * struct pt_buffer - buffer configuration; one buffer per task_struct or
- *             cpu, depending on perf event configuration
- * @cpu:       cpu for per-cpu allocation
- * @tables:    list of ToPA tables in this buffer
- * @first:     shorthand for first topa table
- * @last:      shorthand for last topa table
- * @cur:       current topa table
- * @nr_pages:  buffer size in pages
- * @cur_idx:   current output region's index within @cur table
- * @output_off:        offset within the current output region
- * @data_size: running total of the amount of data in this buffer
- * @lost:      if data was lost/truncated
- * @head:      logical write offset inside the buffer
- * @snapshot:  if this is for a snapshot/overwrite counter
- * @stop_pos:  STOP topa entry in the buffer
- * @intr_pos:  INT topa entry in the buffer
- * @data_pages:        array of pages from perf
- * @topa_index:        table of topa entries indexed by page offset
- */
-struct pt_buffer {
-       int                     cpu;
-       struct list_head        tables;
-       struct topa             *first, *last, *cur;
-       unsigned int            cur_idx;
-       size_t                  output_off;
-       unsigned long           nr_pages;
-       local_t                 data_size;
-       local_t                 lost;
-       local64_t               head;
-       bool                    snapshot;
-       unsigned long           stop_pos, intr_pos;
-       void                    **data_pages;
-       struct topa_entry       *topa_index[0];
-};
-
-/**
- * struct pt - per-cpu pt context
- * @handle:    perf output handle
- * @handle_nmi:        do handle PT PMI on this cpu, there's an active event
- */
-struct pt {
-       struct perf_output_handle handle;
-       int                     handle_nmi;
-};
-
-#endif /* __INTEL_PT_H__ */
index 4cfba4371a71f28c4615610475e3eec9e8d9790c..517619ea6498b41839f87f28bce4a76b1e43c7ab 100644 (file)
@@ -115,7 +115,7 @@ static int raise_local(void)
        int cpu = m->extcpu;
 
        if (m->inject_flags & MCJ_EXCEPTION) {
-               printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
+               pr_info("Triggering MCE exception on CPU %d\n", cpu);
                switch (context) {
                case MCJ_CTX_IRQ:
                        /*
@@ -128,15 +128,15 @@ static int raise_local(void)
                        raise_exception(m, NULL);
                        break;
                default:
-                       printk(KERN_INFO "Invalid MCE context\n");
+                       pr_info("Invalid MCE context\n");
                        ret = -EINVAL;
                }
-               printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
+               pr_info("MCE exception done on CPU %d\n", cpu);
        } else if (m->status) {
-               printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
+               pr_info("Starting machine check poll CPU %d\n", cpu);
                raise_poll(m);
                mce_notify_irq();
-               printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
+               pr_info("Machine check poll done on CPU %d\n", cpu);
        } else
                m->finished = 0;
 
@@ -183,8 +183,7 @@ static void raise_mce(struct mce *m)
                start = jiffies;
                while (!cpumask_empty(mce_inject_cpumask)) {
                        if (!time_before(jiffies, start + 2*HZ)) {
-                               printk(KERN_ERR
-                               "Timeout waiting for mce inject %lx\n",
+                               pr_err("Timeout waiting for mce inject %lx\n",
                                        *cpumask_bits(mce_inject_cpumask));
                                break;
                        }
@@ -241,7 +240,7 @@ static int inject_init(void)
 {
        if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
                return -ENOMEM;
-       printk(KERN_INFO "Machine check injector initialized\n");
+       pr_info("Machine check injector initialized\n");
        register_mce_write_callback(mce_write);
        register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
                                "mce_notify");
index 12402e10aeffda428821dd6dcb88597203e48054..2a0717bf803372d3968dca9d2b2e6c81740d946d 100644 (file)
@@ -26,14 +26,12 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
        rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
        rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
 
-       printk(KERN_EMERG
-               "CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n",
-               smp_processor_id(), loaddr, lotype);
+       pr_emerg("CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n",
+                smp_processor_id(), loaddr, lotype);
 
        if (lotype & (1<<5)) {
-               printk(KERN_EMERG
-                       "CPU#%d: Possible thermal failure (CPU on fire ?).\n",
-                       smp_processor_id());
+               pr_emerg("CPU#%d: Possible thermal failure (CPU on fire ?).\n",
+                        smp_processor_id());
        }
 
        add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -61,12 +59,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
        /* Read registers before enabling: */
        rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
        rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
-       printk(KERN_INFO
-              "Intel old style machine check architecture supported.\n");
+       pr_info("Intel old style machine check architecture supported.\n");
 
        /* Enable MCE: */
        cr4_set_bits(X86_CR4_MCE);
-       printk(KERN_INFO
-              "Intel old style machine check reporting enabled on CPU#%d.\n",
-              smp_processor_id());
+       pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
+               smp_processor_id());
 }
index 2c5aaf8c2e2f3dcc94d348dfe91da6d7d5000ae9..0b445c2ff735d44fedc469fd8ce0c1b8b1f1633b 100644 (file)
@@ -190,7 +190,7 @@ static int therm_throt_process(bool new_event, int event, int level)
        /* if we just entered the thermal event */
        if (new_event) {
                if (event == THERMAL_THROTTLING_EVENT)
-                       printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
+                       pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
                                this_cpu,
                                level == CORE_LEVEL ? "Core" : "Package",
                                state->count);
@@ -198,8 +198,7 @@ static int therm_throt_process(bool new_event, int event, int level)
        }
        if (old_event) {
                if (event == THERMAL_THROTTLING_EVENT)
-                       printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
-                               this_cpu,
+                       pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
                                level == CORE_LEVEL ? "Core" : "Package");
                return 1;
        }
@@ -417,8 +416,8 @@ static void intel_thermal_interrupt(void)
 
 static void unexpected_thermal_interrupt(void)
 {
-       printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
-                       smp_processor_id());
+       pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
+               smp_processor_id());
 }
 
 static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
@@ -499,7 +498,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 
        if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
                if (system_state == SYSTEM_BOOTING)
-                       printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+                       pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
                return;
        }
 
@@ -557,8 +556,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
        l = apic_read(APIC_LVTTHMR);
        apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 
-       printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
-                      tm2 ? "TM2" : "TM1");
+       pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
+                     tm2 ? "TM2" : "TM1");
 
        /* enable thermal throttle processing */
        atomic_set(&therm_throt_en, 1);
index 7245980186eea047e643af5010e1509bfc991632..fcf9ae9384f4cb4693d67cc8ee6433562dfc9f34 100644 (file)
@@ -12,8 +12,8 @@
 
 static void default_threshold_interrupt(void)
 {
-       printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
-                        THRESHOLD_APIC_VECTOR);
+       pr_err("Unexpected threshold interrupt at vector %x\n",
+               THRESHOLD_APIC_VECTOR);
 }
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
index 01dd8702880b7f2db9fcdc3874ba8809c2ce9b80..c6a722e1d011458fa30ec5ad3ad4e78c58d2fa82 100644 (file)
@@ -17,7 +17,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
 {
        ist_enter(regs);
 
-       printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
+       pr_emerg("CPU0: Machine Check Exception.\n");
        add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 
        ist_exit(regs);
@@ -39,6 +39,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
 
        cr4_set_bits(X86_CR4_MCE);
 
-       printk(KERN_INFO
-              "Winchip machine check reporting enabled on CPU#0.\n");
+       pr_info("Winchip machine check reporting enabled on CPU#0.\n");
 }
index 2233f8a766156891a52b9a7658f04efeaf4f86c8..75d3aab5f7b243623ca311c4c23bf07ef2e26ca4 100644 (file)
@@ -953,7 +953,7 @@ struct microcode_ops * __init init_amd_microcode(void)
        struct cpuinfo_x86 *c = &boot_cpu_data;
 
        if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
-               pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
+               pr_warn("AMD CPU family 0x%x not supported\n", c->x86);
                return NULL;
        }
 
index 20e242ea1bc46b5f5828c7b95071d920853b7609..4e7c6933691cc8de42aa4a82473482127bd0faab 100644 (file)
@@ -161,8 +161,8 @@ static void __init ms_hyperv_init_platform(void)
        ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
        ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
 
-       printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
-              ms_hyperv.features, ms_hyperv.hints);
+       pr_info("HyperV: features 0x%x, hints 0x%x\n",
+               ms_hyperv.features, ms_hyperv.hints);
 
 #ifdef CONFIG_X86_LOCAL_APIC
        if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
@@ -174,8 +174,8 @@ static void __init ms_hyperv_init_platform(void)
                rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
                hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
                lapic_timer_frequency = hv_lapic_frequency;
-               printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n",
-                               lapic_timer_frequency);
+               pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
+                       lapic_timer_frequency);
        }
 #endif
 
index 316fe3e60a9764e479d4e49d01346c7921763ee8..3d689937fc1b13974a0a8a5edd31ddda0f6401b2 100644 (file)
@@ -103,7 +103,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
         */
        if (type != MTRR_TYPE_WRCOMB &&
            (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
-               pr_warning("mtrr: only write-combining%s supported\n",
+               pr_warn("mtrr: only write-combining%s supported\n",
                           centaur_mcr_type ? " and uncacheable are" : " is");
                return -EINVAL;
        }
index 0d98503c2245aab81283526c062f746650b99c32..31e951ce6dff33782c9610c7b43898181d59d637 100644 (file)
@@ -57,9 +57,9 @@ static int __initdata                         nr_range;
 static struct var_mtrr_range_state __initdata  range_state[RANGE_NUM];
 
 static int __initdata debug_print;
-#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
+#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0)
 
-#define BIOS_BUG_MSG KERN_WARNING \
+#define BIOS_BUG_MSG \
        "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
 
 static int __init
@@ -81,9 +81,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
                                                base, base + size);
        }
        if (debug_print) {
-               printk(KERN_DEBUG "After WB checking\n");
+               pr_debug("After WB checking\n");
                for (i = 0; i < nr_range; i++)
-                       printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+                       pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
                                 range[i].start, range[i].end);
        }
 
@@ -101,7 +101,7 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
                    (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
                    (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
                        /* Var MTRR contains UC entry below 1M? Skip it: */
-                       printk(BIOS_BUG_MSG, i);
+                       pr_warn(BIOS_BUG_MSG, i);
                        if (base + size <= (1<<(20-PAGE_SHIFT)))
                                continue;
                        size -= (1<<(20-PAGE_SHIFT)) - base;
@@ -114,11 +114,11 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
                                 extra_remove_base + extra_remove_size);
 
        if  (debug_print) {
-               printk(KERN_DEBUG "After UC checking\n");
+               pr_debug("After UC checking\n");
                for (i = 0; i < RANGE_NUM; i++) {
                        if (!range[i].end)
                                continue;
-                       printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+                       pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
                                 range[i].start, range[i].end);
                }
        }
@@ -126,9 +126,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
        /* sort the ranges */
        nr_range = clean_sort_range(range, RANGE_NUM);
        if  (debug_print) {
-               printk(KERN_DEBUG "After sorting\n");
+               pr_debug("After sorting\n");
                for (i = 0; i < nr_range; i++)
-                       printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+                       pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
                                 range[i].start, range[i].end);
        }
 
@@ -544,7 +544,7 @@ static void __init print_out_mtrr_range_state(void)
                start_base = to_size_factor(start_base, &start_factor),
                type = range_state[i].type;
 
-               printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+               pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
                        i, start_base, start_factor,
                        size_base, size_factor,
                        (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
@@ -713,7 +713,7 @@ int __init mtrr_cleanup(unsigned address_bits)
                return 0;
 
        /* Print original var MTRRs at first, for debugging: */
-       printk(KERN_DEBUG "original variable MTRRs\n");
+       pr_debug("original variable MTRRs\n");
        print_out_mtrr_range_state();
 
        memset(range, 0, sizeof(range));
@@ -733,7 +733,7 @@ int __init mtrr_cleanup(unsigned address_bits)
                                          x_remove_base, x_remove_size);
 
        range_sums = sum_ranges(range, nr_range);
-       printk(KERN_INFO "total RAM covered: %ldM\n",
+       pr_info("total RAM covered: %ldM\n",
               range_sums >> (20 - PAGE_SHIFT));
 
        if (mtrr_chunk_size && mtrr_gran_size) {
@@ -745,12 +745,11 @@ int __init mtrr_cleanup(unsigned address_bits)
 
                if (!result[i].bad) {
                        set_var_mtrr_all(address_bits);
-                       printk(KERN_DEBUG "New variable MTRRs\n");
+                       pr_debug("New variable MTRRs\n");
                        print_out_mtrr_range_state();
                        return 1;
                }
-               printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
-                      "will find optimal one\n");
+               pr_info("invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n");
        }
 
        i = 0;
@@ -768,7 +767,7 @@ int __init mtrr_cleanup(unsigned address_bits)
                                      x_remove_base, x_remove_size, i);
                        if (debug_print) {
                                mtrr_print_out_one_result(i);
-                               printk(KERN_INFO "\n");
+                               pr_info("\n");
                        }
 
                        i++;
@@ -779,7 +778,7 @@ int __init mtrr_cleanup(unsigned address_bits)
        index_good = mtrr_search_optimal_index();
 
        if (index_good != -1) {
-               printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+               pr_info("Found optimal setting for mtrr clean up\n");
                i = index_good;
                mtrr_print_out_one_result(i);
 
@@ -790,7 +789,7 @@ int __init mtrr_cleanup(unsigned address_bits)
                gran_size <<= 10;
                x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
                set_var_mtrr_all(address_bits);
-               printk(KERN_DEBUG "New variable MTRRs\n");
+               pr_debug("New variable MTRRs\n");
                print_out_mtrr_range_state();
                return 1;
        } else {
@@ -799,8 +798,8 @@ int __init mtrr_cleanup(unsigned address_bits)
                        mtrr_print_out_one_result(i);
        }
 
-       printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
-       printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+       pr_info("mtrr_cleanup: can not find optimal value\n");
+       pr_info("please specify mtrr_gran_size/mtrr_chunk_size\n");
 
        return 0;
 }
@@ -918,7 +917,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 
        /* kvm/qemu doesn't have mtrr set right, don't trim them all: */
        if (!highest_pfn) {
-               printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
+               pr_info("CPU MTRRs all blank - virtualized system.\n");
                return 0;
        }
 
@@ -973,7 +972,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
                                                         end_pfn);
 
        if (total_trim_size) {
-               pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
+               pr_warn("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n",
+                       total_trim_size >> 20);
 
                if (!changed_by_mtrr_cleanup)
                        WARN_ON(1);
index c870af1610083ec3dda7cb61b966860c9a224374..fcbcb2f678ca47360d23623887d216165969d6e8 100644 (file)
@@ -55,7 +55,7 @@ static inline void k8_check_syscfg_dram_mod_en(void)
 
        rdmsr(MSR_K8_SYSCFG, lo, hi);
        if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
-               printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
+               pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
                       " not cleared by BIOS, clearing this bit\n",
                       smp_processor_id());
                lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
@@ -501,14 +501,14 @@ void __init mtrr_state_warn(void)
        if (!mask)
                return;
        if (mask & MTRR_CHANGE_MASK_FIXED)
-               pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+               pr_warn("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
        if (mask & MTRR_CHANGE_MASK_VARIABLE)
-               pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
+               pr_warn("mtrr: your CPUs had inconsistent variable MTRR settings\n");
        if (mask & MTRR_CHANGE_MASK_DEFTYPE)
-               pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+               pr_warn("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
 
-       printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
-       printk(KERN_INFO "mtrr: corrected configuration.\n");
+       pr_info("mtrr: probably your BIOS does not setup all CPUs.\n");
+       pr_info("mtrr: corrected configuration.\n");
 }
 
 /*
@@ -519,8 +519,7 @@ void __init mtrr_state_warn(void)
 void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
 {
        if (wrmsr_safe(msr, a, b) < 0) {
-               printk(KERN_ERR
-                       "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
+               pr_err("MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
                        smp_processor_id(), msr, a, b);
        }
 }
@@ -607,7 +606,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
                tmp |= ~((1ULL<<(hi - 1)) - 1);
 
                if (tmp != mask) {
-                       printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
+                       pr_warn("mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
                        add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
                        mask = tmp;
                }
@@ -858,13 +857,13 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
            boot_cpu_data.x86_model == 1 &&
            boot_cpu_data.x86_mask <= 7) {
                if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
-                       pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
+                       pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
                        return -EINVAL;
                }
                if (!(base + size < 0x70000 || base > 0x7003F) &&
                    (type == MTRR_TYPE_WRCOMB
                     || type == MTRR_TYPE_WRBACK)) {
-                       pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
+                       pr_warn("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
                        return -EINVAL;
                }
        }
@@ -878,7 +877,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
             lbase = lbase >> 1, last = last >> 1)
                ;
        if (lbase != last) {
-               pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
+               pr_warn("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
                return -EINVAL;
        }
        return 0;
index 5c3d149ee91cb1f6c87ff6ad1853a38adfce82da..ba80d68f683e72727d455cb36a3d0dc8493a5e25 100644 (file)
@@ -300,24 +300,24 @@ int mtrr_add_page(unsigned long base, unsigned long size,
                return error;
 
        if (type >= MTRR_NUM_TYPES) {
-               pr_warning("mtrr: type: %u invalid\n", type);
+               pr_warn("mtrr: type: %u invalid\n", type);
                return -EINVAL;
        }
 
        /* If the type is WC, check that this processor supports it */
        if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
-               pr_warning("mtrr: your processor doesn't support write-combining\n");
+               pr_warn("mtrr: your processor doesn't support write-combining\n");
                return -ENOSYS;
        }
 
        if (!size) {
-               pr_warning("mtrr: zero sized request\n");
+               pr_warn("mtrr: zero sized request\n");
                return -EINVAL;
        }
 
        if ((base | (base + size - 1)) >>
            (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
-               pr_warning("mtrr: base or size exceeds the MTRR width\n");
+               pr_warn("mtrr: base or size exceeds the MTRR width\n");
                return -EINVAL;
        }
 
@@ -348,7 +348,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
                                } else if (types_compatible(type, ltype))
                                        continue;
                        }
-                       pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
+                       pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing"
                                " 0x%lx000,0x%lx000\n", base, size, lbase,
                                lsize);
                        goto out;
@@ -357,7 +357,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
                if (ltype != type) {
                        if (types_compatible(type, ltype))
                                continue;
-                       pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+                       pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
                                base, size, mtrr_attrib_to_str(ltype),
                                mtrr_attrib_to_str(type));
                        goto out;
@@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 static int mtrr_check(unsigned long base, unsigned long size)
 {
        if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
-               pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
+               pr_warn("mtrr: size and base must be multiples of 4 kiB\n");
                pr_debug("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
                dump_stack();
                return -1;
@@ -493,16 +493,16 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
                }
        }
        if (reg >= max) {
-               pr_warning("mtrr: register: %d too big\n", reg);
+               pr_warn("mtrr: register: %d too big\n", reg);
                goto out;
        }
        mtrr_if->get(reg, &lbase, &lsize, &ltype);
        if (lsize < 1) {
-               pr_warning("mtrr: MTRR %d not used\n", reg);
+               pr_warn("mtrr: MTRR %d not used\n", reg);
                goto out;
        }
        if (mtrr_usage_table[reg] < 1) {
-               pr_warning("mtrr: reg: %d has count=0\n", reg);
+               pr_warn("mtrr: reg: %d has count=0\n", reg);
                goto out;
        }
        if (--mtrr_usage_table[reg] < 1)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
deleted file mode 100644 (file)
index 1b443db..0000000
+++ /dev/null
@@ -1,2428 +0,0 @@
-/*
- * Performance events x86 architecture code
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2009 Jaswinder Singh Rajput
- *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
- *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
- *  Copyright (C) 2009 Google, Inc., Stephane Eranian
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-#include <linux/capability.h>
-#include <linux/notifier.h>
-#include <linux/hardirq.h>
-#include <linux/kprobes.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/sched.h>
-#include <linux/uaccess.h>
-#include <linux/slab.h>
-#include <linux/cpu.h>
-#include <linux/bitops.h>
-#include <linux/device.h>
-
-#include <asm/apic.h>
-#include <asm/stacktrace.h>
-#include <asm/nmi.h>
-#include <asm/smp.h>
-#include <asm/alternative.h>
-#include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
-#include <asm/timer.h>
-#include <asm/desc.h>
-#include <asm/ldt.h>
-
-#include "perf_event.h"
-
-struct x86_pmu x86_pmu __read_mostly;
-
-DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
-       .enabled = 1,
-};
-
-struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE;
-
-u64 __read_mostly hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
-u64 __read_mostly hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
-
-/*
- * Propagate event elapsed time into the generic event.
- * Can only be executed on the CPU where the event is active.
- * Returns the delta events processed.
- */
-u64 x86_perf_event_update(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       int shift = 64 - x86_pmu.cntval_bits;
-       u64 prev_raw_count, new_raw_count;
-       int idx = hwc->idx;
-       s64 delta;
-
-       if (idx == INTEL_PMC_IDX_FIXED_BTS)
-               return 0;
-
-       /*
-        * Careful: an NMI might modify the previous event value.
-        *
-        * Our tactic to handle this is to first atomically read and
-        * exchange a new raw count - then add that new-prev delta
-        * count to the generic event atomically:
-        */
-again:
-       prev_raw_count = local64_read(&hwc->prev_count);
-       rdpmcl(hwc->event_base_rdpmc, new_raw_count);
-
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                                       new_raw_count) != prev_raw_count)
-               goto again;
-
-       /*
-        * Now we have the new raw value and have updated the prev
-        * timestamp already. We can now calculate the elapsed delta
-        * (event-)time and add that to the generic event.
-        *
-        * Careful, not all hw sign-extends above the physical width
-        * of the count.
-        */
-       delta = (new_raw_count << shift) - (prev_raw_count << shift);
-       delta >>= shift;
-
-       local64_add(delta, &event->count);
-       local64_sub(delta, &hwc->period_left);
-
-       return new_raw_count;
-}
-
-/*
- * Find and validate any extra registers to set up.
- */
-static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg;
-       struct extra_reg *er;
-
-       reg = &event->hw.extra_reg;
-
-       if (!x86_pmu.extra_regs)
-               return 0;
-
-       for (er = x86_pmu.extra_regs; er->msr; er++) {
-               if (er->event != (config & er->config_mask))
-                       continue;
-               if (event->attr.config1 & ~er->valid_mask)
-                       return -EINVAL;
-               /* Check if the extra msrs can be safely accessed*/
-               if (!er->extra_msr_access)
-                       return -ENXIO;
-
-               reg->idx = er->idx;
-               reg->config = event->attr.config1;
-               reg->reg = er->msr;
-               break;
-       }
-       return 0;
-}
-
-static atomic_t active_events;
-static atomic_t pmc_refcount;
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-static bool reserve_pmc_hardware(void)
-{
-       int i;
-
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
-                       goto perfctr_fail;
-       }
-
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
-                       goto eventsel_fail;
-       }
-
-       return true;
-
-eventsel_fail:
-       for (i--; i >= 0; i--)
-               release_evntsel_nmi(x86_pmu_config_addr(i));
-
-       i = x86_pmu.num_counters;
-
-perfctr_fail:
-       for (i--; i >= 0; i--)
-               release_perfctr_nmi(x86_pmu_event_addr(i));
-
-       return false;
-}
-
-static void release_pmc_hardware(void)
-{
-       int i;
-
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               release_perfctr_nmi(x86_pmu_event_addr(i));
-               release_evntsel_nmi(x86_pmu_config_addr(i));
-       }
-}
-
-#else
-
-static bool reserve_pmc_hardware(void) { return true; }
-static void release_pmc_hardware(void) {}
-
-#endif
-
-static bool check_hw_exists(void)
-{
-       u64 val, val_fail, val_new= ~0;
-       int i, reg, reg_fail, ret = 0;
-       int bios_fail = 0;
-       int reg_safe = -1;
-
-       /*
-        * Check to see if the BIOS enabled any of the counters, if so
-        * complain and bail.
-        */
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               reg = x86_pmu_config_addr(i);
-               ret = rdmsrl_safe(reg, &val);
-               if (ret)
-                       goto msr_fail;
-               if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
-                       bios_fail = 1;
-                       val_fail = val;
-                       reg_fail = reg;
-               } else {
-                       reg_safe = i;
-               }
-       }
-
-       if (x86_pmu.num_counters_fixed) {
-               reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-               ret = rdmsrl_safe(reg, &val);
-               if (ret)
-                       goto msr_fail;
-               for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
-                       if (val & (0x03 << i*4)) {
-                               bios_fail = 1;
-                               val_fail = val;
-                               reg_fail = reg;
-                       }
-               }
-       }
-
-       /*
-        * If all the counters are enabled, the below test will always
-        * fail.  The tools will also become useless in this scenario.
-        * Just fail and disable the hardware counters.
-        */
-
-       if (reg_safe == -1) {
-               reg = reg_safe;
-               goto msr_fail;
-       }
-
-       /*
-        * Read the current value, change it and read it back to see if it
-        * matches, this is needed to detect certain hardware emulators
-        * (qemu/kvm) that don't trap on the MSR access and always return 0s.
-        */
-       reg = x86_pmu_event_addr(reg_safe);
-       if (rdmsrl_safe(reg, &val))
-               goto msr_fail;
-       val ^= 0xffffUL;
-       ret = wrmsrl_safe(reg, val);
-       ret |= rdmsrl_safe(reg, &val_new);
-       if (ret || val != val_new)
-               goto msr_fail;
-
-       /*
-        * We still allow the PMU driver to operate:
-        */
-       if (bios_fail) {
-               printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
-               printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail);
-       }
-
-       return true;
-
-msr_fail:
-       printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
-       printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
-               boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
-               reg, val_new);
-
-       return false;
-}
-
-static void hw_perf_event_destroy(struct perf_event *event)
-{
-       x86_release_hardware();
-       atomic_dec(&active_events);
-}
-
-void hw_perf_lbr_event_destroy(struct perf_event *event)
-{
-       hw_perf_event_destroy(event);
-
-       /* undo the lbr/bts event accounting */
-       x86_del_exclusive(x86_lbr_exclusive_lbr);
-}
-
-static inline int x86_pmu_initialized(void)
-{
-       return x86_pmu.handle_irq != NULL;
-}
-
-static inline int
-set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
-{
-       struct perf_event_attr *attr = &event->attr;
-       unsigned int cache_type, cache_op, cache_result;
-       u64 config, val;
-
-       config = attr->config;
-
-       cache_type = (config >>  0) & 0xff;
-       if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
-               return -EINVAL;
-
-       cache_op = (config >>  8) & 0xff;
-       if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
-               return -EINVAL;
-
-       cache_result = (config >> 16) & 0xff;
-       if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-               return -EINVAL;
-
-       val = hw_cache_event_ids[cache_type][cache_op][cache_result];
-
-       if (val == 0)
-               return -ENOENT;
-
-       if (val == -1)
-               return -EINVAL;
-
-       hwc->config |= val;
-       attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
-       return x86_pmu_extra_regs(val, event);
-}
-
-int x86_reserve_hardware(void)
-{
-       int err = 0;
-
-       if (!atomic_inc_not_zero(&pmc_refcount)) {
-               mutex_lock(&pmc_reserve_mutex);
-               if (atomic_read(&pmc_refcount) == 0) {
-                       if (!reserve_pmc_hardware())
-                               err = -EBUSY;
-                       else
-                               reserve_ds_buffers();
-               }
-               if (!err)
-                       atomic_inc(&pmc_refcount);
-               mutex_unlock(&pmc_reserve_mutex);
-       }
-
-       return err;
-}
-
-void x86_release_hardware(void)
-{
-       if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
-               release_pmc_hardware();
-               release_ds_buffers();
-               mutex_unlock(&pmc_reserve_mutex);
-       }
-}
-
-/*
- * Check if we can create event of a certain type (that no conflicting events
- * are present).
- */
-int x86_add_exclusive(unsigned int what)
-{
-       int i;
-
-       if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
-               mutex_lock(&pmc_reserve_mutex);
-               for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
-                       if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
-                               goto fail_unlock;
-               }
-               atomic_inc(&x86_pmu.lbr_exclusive[what]);
-               mutex_unlock(&pmc_reserve_mutex);
-       }
-
-       atomic_inc(&active_events);
-       return 0;
-
-fail_unlock:
-       mutex_unlock(&pmc_reserve_mutex);
-       return -EBUSY;
-}
-
-void x86_del_exclusive(unsigned int what)
-{
-       atomic_dec(&x86_pmu.lbr_exclusive[what]);
-       atomic_dec(&active_events);
-}
-
-int x86_setup_perfctr(struct perf_event *event)
-{
-       struct perf_event_attr *attr = &event->attr;
-       struct hw_perf_event *hwc = &event->hw;
-       u64 config;
-
-       if (!is_sampling_event(event)) {
-               hwc->sample_period = x86_pmu.max_period;
-               hwc->last_period = hwc->sample_period;
-               local64_set(&hwc->period_left, hwc->sample_period);
-       }
-
-       if (attr->type == PERF_TYPE_RAW)
-               return x86_pmu_extra_regs(event->attr.config, event);
-
-       if (attr->type == PERF_TYPE_HW_CACHE)
-               return set_ext_hw_attr(hwc, event);
-
-       if (attr->config >= x86_pmu.max_events)
-               return -EINVAL;
-
-       /*
-        * The generic map:
-        */
-       config = x86_pmu.event_map(attr->config);
-
-       if (config == 0)
-               return -ENOENT;
-
-       if (config == -1LL)
-               return -EINVAL;
-
-       /*
-        * Branch tracing:
-        */
-       if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
-           !attr->freq && hwc->sample_period == 1) {
-               /* BTS is not supported by this architecture. */
-               if (!x86_pmu.bts_active)
-                       return -EOPNOTSUPP;
-
-               /* BTS is currently only allowed for user-mode. */
-               if (!attr->exclude_kernel)
-                       return -EOPNOTSUPP;
-
-               /* disallow bts if conflicting events are present */
-               if (x86_add_exclusive(x86_lbr_exclusive_lbr))
-                       return -EBUSY;
-
-               event->destroy = hw_perf_lbr_event_destroy;
-       }
-
-       hwc->config |= config;
-
-       return 0;
-}
-
-/*
- * check that branch_sample_type is compatible with
- * settings needed for precise_ip > 1 which implies
- * using the LBR to capture ALL taken branches at the
- * priv levels of the measurement
- */
-static inline int precise_br_compat(struct perf_event *event)
-{
-       u64 m = event->attr.branch_sample_type;
-       u64 b = 0;
-
-       /* must capture all branches */
-       if (!(m & PERF_SAMPLE_BRANCH_ANY))
-               return 0;
-
-       m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
-
-       if (!event->attr.exclude_user)
-               b |= PERF_SAMPLE_BRANCH_USER;
-
-       if (!event->attr.exclude_kernel)
-               b |= PERF_SAMPLE_BRANCH_KERNEL;
-
-       /*
-        * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
-        */
-
-       return m == b;
-}
-
-int x86_pmu_hw_config(struct perf_event *event)
-{
-       if (event->attr.precise_ip) {
-               int precise = 0;
-
-               /* Support for constant skid */
-               if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
-                       precise++;
-
-                       /* Support for IP fixup */
-                       if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
-                               precise++;
-
-                       if (x86_pmu.pebs_prec_dist)
-                               precise++;
-               }
-
-               if (event->attr.precise_ip > precise)
-                       return -EOPNOTSUPP;
-       }
-       /*
-        * check that PEBS LBR correction does not conflict with
-        * whatever the user is asking with attr->branch_sample_type
-        */
-       if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
-               u64 *br_type = &event->attr.branch_sample_type;
-
-               if (has_branch_stack(event)) {
-                       if (!precise_br_compat(event))
-                               return -EOPNOTSUPP;
-
-                       /* branch_sample_type is compatible */
-
-               } else {
-                       /*
-                        * user did not specify  branch_sample_type
-                        *
-                        * For PEBS fixups, we capture all
-                        * the branches at the priv level of the
-                        * event.
-                        */
-                       *br_type = PERF_SAMPLE_BRANCH_ANY;
-
-                       if (!event->attr.exclude_user)
-                               *br_type |= PERF_SAMPLE_BRANCH_USER;
-
-                       if (!event->attr.exclude_kernel)
-                               *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
-               }
-       }
-
-       if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
-               event->attach_state |= PERF_ATTACH_TASK_DATA;
-
-       /*
-        * Generate PMC IRQs:
-        * (keep 'enabled' bit clear for now)
-        */
-       event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
-
-       /*
-        * Count user and OS events unless requested not to
-        */
-       if (!event->attr.exclude_user)
-               event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
-       if (!event->attr.exclude_kernel)
-               event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
-
-       if (event->attr.type == PERF_TYPE_RAW)
-               event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
-
-       if (event->attr.sample_period && x86_pmu.limit_period) {
-               if (x86_pmu.limit_period(event, event->attr.sample_period) >
-                               event->attr.sample_period)
-                       return -EINVAL;
-       }
-
-       return x86_setup_perfctr(event);
-}
-
-/*
- * Setup the hardware configuration for a given attr_type
- */
-static int __x86_pmu_event_init(struct perf_event *event)
-{
-       int err;
-
-       if (!x86_pmu_initialized())
-               return -ENODEV;
-
-       err = x86_reserve_hardware();
-       if (err)
-               return err;
-
-       atomic_inc(&active_events);
-       event->destroy = hw_perf_event_destroy;
-
-       event->hw.idx = -1;
-       event->hw.last_cpu = -1;
-       event->hw.last_tag = ~0ULL;
-
-       /* mark unused */
-       event->hw.extra_reg.idx = EXTRA_REG_NONE;
-       event->hw.branch_reg.idx = EXTRA_REG_NONE;
-
-       return x86_pmu.hw_config(event);
-}
-
-void x86_pmu_disable_all(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int idx;
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               u64 val;
-
-               if (!test_bit(idx, cpuc->active_mask))
-                       continue;
-               rdmsrl(x86_pmu_config_addr(idx), val);
-               if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
-                       continue;
-               val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-               wrmsrl(x86_pmu_config_addr(idx), val);
-       }
-}
-
-static void x86_pmu_disable(struct pmu *pmu)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (!x86_pmu_initialized())
-               return;
-
-       if (!cpuc->enabled)
-               return;
-
-       cpuc->n_added = 0;
-       cpuc->enabled = 0;
-       barrier();
-
-       x86_pmu.disable_all();
-}
-
-void x86_pmu_enable_all(int added)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int idx;
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
-
-               if (!test_bit(idx, cpuc->active_mask))
-                       continue;
-
-               __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
-       }
-}
-
-static struct pmu pmu;
-
-static inline int is_x86_event(struct perf_event *event)
-{
-       return event->pmu == &pmu;
-}
-
-/*
- * Event scheduler state:
- *
- * Assign events iterating over all events and counters, beginning
- * with events with least weights first. Keep the current iterator
- * state in struct sched_state.
- */
-struct sched_state {
-       int     weight;
-       int     event;          /* event index */
-       int     counter;        /* counter index */
-       int     unassigned;     /* number of events to be assigned left */
-       int     nr_gp;          /* number of GP counters used */
-       unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-};
-
-/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
-#define        SCHED_STATES_MAX        2
-
-struct perf_sched {
-       int                     max_weight;
-       int                     max_events;
-       int                     max_gp;
-       int                     saved_states;
-       struct event_constraint **constraints;
-       struct sched_state      state;
-       struct sched_state      saved[SCHED_STATES_MAX];
-};
-
-/*
- * Initialize interator that runs through all events and counters.
- */
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
-                           int num, int wmin, int wmax, int gpmax)
-{
-       int idx;
-
-       memset(sched, 0, sizeof(*sched));
-       sched->max_events       = num;
-       sched->max_weight       = wmax;
-       sched->max_gp           = gpmax;
-       sched->constraints      = constraints;
-
-       for (idx = 0; idx < num; idx++) {
-               if (constraints[idx]->weight == wmin)
-                       break;
-       }
-
-       sched->state.event      = idx;          /* start with min weight */
-       sched->state.weight     = wmin;
-       sched->state.unassigned = num;
-}
-
-static void perf_sched_save_state(struct perf_sched *sched)
-{
-       if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
-               return;
-
-       sched->saved[sched->saved_states] = sched->state;
-       sched->saved_states++;
-}
-
-static bool perf_sched_restore_state(struct perf_sched *sched)
-{
-       if (!sched->saved_states)
-               return false;
-
-       sched->saved_states--;
-       sched->state = sched->saved[sched->saved_states];
-
-       /* continue with next counter: */
-       clear_bit(sched->state.counter++, sched->state.used);
-
-       return true;
-}
-
-/*
- * Select a counter for the current event to schedule. Return true on
- * success.
- */
-static bool __perf_sched_find_counter(struct perf_sched *sched)
-{
-       struct event_constraint *c;
-       int idx;
-
-       if (!sched->state.unassigned)
-               return false;
-
-       if (sched->state.event >= sched->max_events)
-               return false;
-
-       c = sched->constraints[sched->state.event];
-       /* Prefer fixed purpose counters */
-       if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
-               idx = INTEL_PMC_IDX_FIXED;
-               for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
-                       if (!__test_and_set_bit(idx, sched->state.used))
-                               goto done;
-               }
-       }
-
-       /* Grab the first unused counter starting with idx */
-       idx = sched->state.counter;
-       for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
-               if (!__test_and_set_bit(idx, sched->state.used)) {
-                       if (sched->state.nr_gp++ >= sched->max_gp)
-                               return false;
-
-                       goto done;
-               }
-       }
-
-       return false;
-
-done:
-       sched->state.counter = idx;
-
-       if (c->overlap)
-               perf_sched_save_state(sched);
-
-       return true;
-}
-
-static bool perf_sched_find_counter(struct perf_sched *sched)
-{
-       while (!__perf_sched_find_counter(sched)) {
-               if (!perf_sched_restore_state(sched))
-                       return false;
-       }
-
-       return true;
-}
-
-/*
- * Go through all unassigned events and find the next one to schedule.
- * Take events with the least weight first. Return true on success.
- */
-static bool perf_sched_next_event(struct perf_sched *sched)
-{
-       struct event_constraint *c;
-
-       if (!sched->state.unassigned || !--sched->state.unassigned)
-               return false;
-
-       do {
-               /* next event */
-               sched->state.event++;
-               if (sched->state.event >= sched->max_events) {
-                       /* next weight */
-                       sched->state.event = 0;
-                       sched->state.weight++;
-                       if (sched->state.weight > sched->max_weight)
-                               return false;
-               }
-               c = sched->constraints[sched->state.event];
-       } while (c->weight != sched->state.weight);
-
-       sched->state.counter = 0;       /* start with first counter */
-
-       return true;
-}
-
-/*
- * Assign a counter for each event.
- */
-int perf_assign_events(struct event_constraint **constraints, int n,
-                       int wmin, int wmax, int gpmax, int *assign)
-{
-       struct perf_sched sched;
-
-       perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
-
-       do {
-               if (!perf_sched_find_counter(&sched))
-                       break;  /* failed */
-               if (assign)
-                       assign[sched.state.event] = sched.state.counter;
-       } while (perf_sched_next_event(&sched));
-
-       return sched.state.unassigned;
-}
-EXPORT_SYMBOL_GPL(perf_assign_events);
-
-int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
-{
-       struct event_constraint *c;
-       unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       struct perf_event *e;
-       int i, wmin, wmax, unsched = 0;
-       struct hw_perf_event *hwc;
-
-       bitmap_zero(used_mask, X86_PMC_IDX_MAX);
-
-       if (x86_pmu.start_scheduling)
-               x86_pmu.start_scheduling(cpuc);
-
-       for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
-               cpuc->event_constraint[i] = NULL;
-               c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
-               cpuc->event_constraint[i] = c;
-
-               wmin = min(wmin, c->weight);
-               wmax = max(wmax, c->weight);
-       }
-
-       /*
-        * fastpath, try to reuse previous register
-        */
-       for (i = 0; i < n; i++) {
-               hwc = &cpuc->event_list[i]->hw;
-               c = cpuc->event_constraint[i];
-
-               /* never assigned */
-               if (hwc->idx == -1)
-                       break;
-
-               /* constraint still honored */
-               if (!test_bit(hwc->idx, c->idxmsk))
-                       break;
-
-               /* not already used */
-               if (test_bit(hwc->idx, used_mask))
-                       break;
-
-               __set_bit(hwc->idx, used_mask);
-               if (assign)
-                       assign[i] = hwc->idx;
-       }
-
-       /* slow path */
-       if (i != n) {
-               int gpmax = x86_pmu.num_counters;
-
-               /*
-                * Do not allow scheduling of more than half the available
-                * generic counters.
-                *
-                * This helps avoid counter starvation of sibling thread by
-                * ensuring at most half the counters cannot be in exclusive
-                * mode. There is no designated counters for the limits. Any
-                * N/2 counters can be used. This helps with events with
-                * specific counter constraints.
-                */
-               if (is_ht_workaround_enabled() && !cpuc->is_fake &&
-                   READ_ONCE(cpuc->excl_cntrs->exclusive_present))
-                       gpmax /= 2;
-
-               unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
-                                            wmax, gpmax, assign);
-       }
-
-       /*
-        * In case of success (unsched = 0), mark events as committed,
-        * so we do not put_constraint() in case new events are added
-        * and fail to be scheduled
-        *
-        * We invoke the lower level commit callback to lock the resource
-        *
-        * We do not need to do all of this in case we are called to
-        * validate an event group (assign == NULL)
-        */
-       if (!unsched && assign) {
-               for (i = 0; i < n; i++) {
-                       e = cpuc->event_list[i];
-                       e->hw.flags |= PERF_X86_EVENT_COMMITTED;
-                       if (x86_pmu.commit_scheduling)
-                               x86_pmu.commit_scheduling(cpuc, i, assign[i]);
-               }
-       } else {
-               for (i = 0; i < n; i++) {
-                       e = cpuc->event_list[i];
-                       /*
-                        * do not put_constraint() on comitted events,
-                        * because they are good to go
-                        */
-                       if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
-                               continue;
-
-                       /*
-                        * release events that failed scheduling
-                        */
-                       if (x86_pmu.put_event_constraints)
-                               x86_pmu.put_event_constraints(cpuc, e);
-               }
-       }
-
-       if (x86_pmu.stop_scheduling)
-               x86_pmu.stop_scheduling(cpuc);
-
-       return unsched ? -EINVAL : 0;
-}
-
-/*
- * dogrp: true if must collect siblings events (group)
- * returns total number of events and error code
- */
-static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
-{
-       struct perf_event *event;
-       int n, max_count;
-
-       max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
-
-       /* current number of events already accepted */
-       n = cpuc->n_events;
-
-       if (is_x86_event(leader)) {
-               if (n >= max_count)
-                       return -EINVAL;
-               cpuc->event_list[n] = leader;
-               n++;
-       }
-       if (!dogrp)
-               return n;
-
-       list_for_each_entry(event, &leader->sibling_list, group_entry) {
-               if (!is_x86_event(event) ||
-                   event->state <= PERF_EVENT_STATE_OFF)
-                       continue;
-
-               if (n >= max_count)
-                       return -EINVAL;
-
-               cpuc->event_list[n] = event;
-               n++;
-       }
-       return n;
-}
-
-static inline void x86_assign_hw_event(struct perf_event *event,
-                               struct cpu_hw_events *cpuc, int i)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       hwc->idx = cpuc->assign[i];
-       hwc->last_cpu = smp_processor_id();
-       hwc->last_tag = ++cpuc->tags[i];
-
-       if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
-               hwc->config_base = 0;
-               hwc->event_base = 0;
-       } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
-               hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-               hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
-               hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
-       } else {
-               hwc->config_base = x86_pmu_config_addr(hwc->idx);
-               hwc->event_base  = x86_pmu_event_addr(hwc->idx);
-               hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
-       }
-}
-
-static inline int match_prev_assignment(struct hw_perf_event *hwc,
-                                       struct cpu_hw_events *cpuc,
-                                       int i)
-{
-       return hwc->idx == cpuc->assign[i] &&
-               hwc->last_cpu == smp_processor_id() &&
-               hwc->last_tag == cpuc->tags[i];
-}
-
-static void x86_pmu_start(struct perf_event *event, int flags);
-
-static void x86_pmu_enable(struct pmu *pmu)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct perf_event *event;
-       struct hw_perf_event *hwc;
-       int i, added = cpuc->n_added;
-
-       if (!x86_pmu_initialized())
-               return;
-
-       if (cpuc->enabled)
-               return;
-
-       if (cpuc->n_added) {
-               int n_running = cpuc->n_events - cpuc->n_added;
-               /*
-                * apply assignment obtained either from
-                * hw_perf_group_sched_in() or x86_pmu_enable()
-                *
-                * step1: save events moving to new counters
-                */
-               for (i = 0; i < n_running; i++) {
-                       event = cpuc->event_list[i];
-                       hwc = &event->hw;
-
-                       /*
-                        * we can avoid reprogramming counter if:
-                        * - assigned same counter as last time
-                        * - running on same CPU as last time
-                        * - no other event has used the counter since
-                        */
-                       if (hwc->idx == -1 ||
-                           match_prev_assignment(hwc, cpuc, i))
-                               continue;
-
-                       /*
-                        * Ensure we don't accidentally enable a stopped
-                        * counter simply because we rescheduled.
-                        */
-                       if (hwc->state & PERF_HES_STOPPED)
-                               hwc->state |= PERF_HES_ARCH;
-
-                       x86_pmu_stop(event, PERF_EF_UPDATE);
-               }
-
-               /*
-                * step2: reprogram moved events into new counters
-                */
-               for (i = 0; i < cpuc->n_events; i++) {
-                       event = cpuc->event_list[i];
-                       hwc = &event->hw;
-
-                       if (!match_prev_assignment(hwc, cpuc, i))
-                               x86_assign_hw_event(event, cpuc, i);
-                       else if (i < n_running)
-                               continue;
-
-                       if (hwc->state & PERF_HES_ARCH)
-                               continue;
-
-                       x86_pmu_start(event, PERF_EF_RELOAD);
-               }
-               cpuc->n_added = 0;
-               perf_events_lapic_init();
-       }
-
-       cpuc->enabled = 1;
-       barrier();
-
-       x86_pmu.enable_all(added);
-}
-
-static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
-
-/*
- * Set the next IRQ period, based on the hwc->period_left value.
- * To be called with the event disabled in hw:
- */
-int x86_perf_event_set_period(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       s64 left = local64_read(&hwc->period_left);
-       s64 period = hwc->sample_period;
-       int ret = 0, idx = hwc->idx;
-
-       if (idx == INTEL_PMC_IDX_FIXED_BTS)
-               return 0;
-
-       /*
-        * If we are way outside a reasonable range then just skip forward:
-        */
-       if (unlikely(left <= -period)) {
-               left = period;
-               local64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-               ret = 1;
-       }
-
-       if (unlikely(left <= 0)) {
-               left += period;
-               local64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-               ret = 1;
-       }
-       /*
-        * Quirk: certain CPUs dont like it if just 1 hw_event is left:
-        */
-       if (unlikely(left < 2))
-               left = 2;
-
-       if (left > x86_pmu.max_period)
-               left = x86_pmu.max_period;
-
-       if (x86_pmu.limit_period)
-               left = x86_pmu.limit_period(event, left);
-
-       per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
-
-       if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
-           local64_read(&hwc->prev_count) != (u64)-left) {
-               /*
-                * The hw event starts counting from this event offset,
-                * mark it to be able to extra future deltas:
-                */
-               local64_set(&hwc->prev_count, (u64)-left);
-
-               wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
-       }
-
-       /*
-        * Due to erratum on certan cpu we need
-        * a second write to be sure the register
-        * is updated properly
-        */
-       if (x86_pmu.perfctr_second_write) {
-               wrmsrl(hwc->event_base,
-                       (u64)(-left) & x86_pmu.cntval_mask);
-       }
-
-       perf_event_update_userpage(event);
-
-       return ret;
-}
-
-void x86_pmu_enable_event(struct perf_event *event)
-{
-       if (__this_cpu_read(cpu_hw_events.enabled))
-               __x86_pmu_enable_event(&event->hw,
-                                      ARCH_PERFMON_EVENTSEL_ENABLE);
-}
-
-/*
- * Add a single event to the PMU.
- *
- * The event is added to the group of enabled events
- * but only if it can be scehduled with existing events.
- */
-static int x86_pmu_add(struct perf_event *event, int flags)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct hw_perf_event *hwc;
-       int assign[X86_PMC_IDX_MAX];
-       int n, n0, ret;
-
-       hwc = &event->hw;
-
-       n0 = cpuc->n_events;
-       ret = n = collect_events(cpuc, event, false);
-       if (ret < 0)
-               goto out;
-
-       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-       if (!(flags & PERF_EF_START))
-               hwc->state |= PERF_HES_ARCH;
-
-       /*
-        * If group events scheduling transaction was started,
-        * skip the schedulability test here, it will be performed
-        * at commit time (->commit_txn) as a whole.
-        */
-       if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
-               goto done_collect;
-
-       ret = x86_pmu.schedule_events(cpuc, n, assign);
-       if (ret)
-               goto out;
-       /*
-        * copy new assignment, now we know it is possible
-        * will be used by hw_perf_enable()
-        */
-       memcpy(cpuc->assign, assign, n*sizeof(int));
-
-done_collect:
-       /*
-        * Commit the collect_events() state. See x86_pmu_del() and
-        * x86_pmu_*_txn().
-        */
-       cpuc->n_events = n;
-       cpuc->n_added += n - n0;
-       cpuc->n_txn += n - n0;
-
-       ret = 0;
-out:
-       return ret;
-}
-
-static void x86_pmu_start(struct perf_event *event, int flags)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int idx = event->hw.idx;
-
-       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
-               return;
-
-       if (WARN_ON_ONCE(idx == -1))
-               return;
-
-       if (flags & PERF_EF_RELOAD) {
-               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
-               x86_perf_event_set_period(event);
-       }
-
-       event->hw.state = 0;
-
-       cpuc->events[idx] = event;
-       __set_bit(idx, cpuc->active_mask);
-       __set_bit(idx, cpuc->running);
-       x86_pmu.enable(event);
-       perf_event_update_userpage(event);
-}
-
-void perf_event_print_debug(void)
-{
-       u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
-       u64 pebs, debugctl;
-       struct cpu_hw_events *cpuc;
-       unsigned long flags;
-       int cpu, idx;
-
-       if (!x86_pmu.num_counters)
-               return;
-
-       local_irq_save(flags);
-
-       cpu = smp_processor_id();
-       cpuc = &per_cpu(cpu_hw_events, cpu);
-
-       if (x86_pmu.version >= 2) {
-               rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
-               rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-               rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
-               rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
-
-               pr_info("\n");
-               pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
-               pr_info("CPU#%d: status:     %016llx\n", cpu, status);
-               pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
-               pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
-               if (x86_pmu.pebs_constraints) {
-                       rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
-                       pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
-               }
-               if (x86_pmu.lbr_nr) {
-                       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-                       pr_info("CPU#%d: debugctl:   %016llx\n", cpu, debugctl);
-               }
-       }
-       pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
-               rdmsrl(x86_pmu_event_addr(idx), pmc_count);
-
-               prev_left = per_cpu(pmc_prev_left[idx], cpu);
-
-               pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
-                       cpu, idx, pmc_ctrl);
-               pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
-                       cpu, idx, pmc_count);
-               pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
-                       cpu, idx, prev_left);
-       }
-       for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
-               rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
-
-               pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
-                       cpu, idx, pmc_count);
-       }
-       local_irq_restore(flags);
-}
-
-void x86_pmu_stop(struct perf_event *event, int flags)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
-               x86_pmu.disable(event);
-               cpuc->events[hwc->idx] = NULL;
-               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-               hwc->state |= PERF_HES_STOPPED;
-       }
-
-       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-               /*
-                * Drain the remaining delta count out of a event
-                * that we are disabling:
-                */
-               x86_perf_event_update(event);
-               hwc->state |= PERF_HES_UPTODATE;
-       }
-}
-
-static void x86_pmu_del(struct perf_event *event, int flags)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int i;
-
-       /*
-        * event is descheduled
-        */
-       event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
-
-       /*
-        * If we're called during a txn, we don't need to do anything.
-        * The events never got scheduled and ->cancel_txn will truncate
-        * the event_list.
-        *
-        * XXX assumes any ->del() called during a TXN will only be on
-        * an event added during that same TXN.
-        */
-       if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
-               return;
-
-       /*
-        * Not a TXN, therefore cleanup properly.
-        */
-       x86_pmu_stop(event, PERF_EF_UPDATE);
-
-       for (i = 0; i < cpuc->n_events; i++) {
-               if (event == cpuc->event_list[i])
-                       break;
-       }
-
-       if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
-               return;
-
-       /* If we have a newly added event; make sure to decrease n_added. */
-       if (i >= cpuc->n_events - cpuc->n_added)
-               --cpuc->n_added;
-
-       if (x86_pmu.put_event_constraints)
-               x86_pmu.put_event_constraints(cpuc, event);
-
-       /* Delete the array entry. */
-       while (++i < cpuc->n_events) {
-               cpuc->event_list[i-1] = cpuc->event_list[i];
-               cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
-       }
-       --cpuc->n_events;
-
-       perf_event_update_userpage(event);
-}
-
-int x86_pmu_handle_irq(struct pt_regs *regs)
-{
-       struct perf_sample_data data;
-       struct cpu_hw_events *cpuc;
-       struct perf_event *event;
-       int idx, handled = 0;
-       u64 val;
-
-       cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       /*
-        * Some chipsets need to unmask the LVTPC in a particular spot
-        * inside the nmi handler.  As a result, the unmasking was pushed
-        * into all the nmi handlers.
-        *
-        * This generic handler doesn't seem to have any issues where the
-        * unmasking occurs so it was left at the top.
-        */
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               if (!test_bit(idx, cpuc->active_mask)) {
-                       /*
-                        * Though we deactivated the counter some cpus
-                        * might still deliver spurious interrupts still
-                        * in flight. Catch them:
-                        */
-                       if (__test_and_clear_bit(idx, cpuc->running))
-                               handled++;
-                       continue;
-               }
-
-               event = cpuc->events[idx];
-
-               val = x86_perf_event_update(event);
-               if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
-                       continue;
-
-               /*
-                * event overflow
-                */
-               handled++;
-               perf_sample_data_init(&data, 0, event->hw.last_period);
-
-               if (!x86_perf_event_set_period(event))
-                       continue;
-
-               if (perf_event_overflow(event, &data, regs))
-                       x86_pmu_stop(event, 0);
-       }
-
-       if (handled)
-               inc_irq_stat(apic_perf_irqs);
-
-       return handled;
-}
-
-void perf_events_lapic_init(void)
-{
-       if (!x86_pmu.apic || !x86_pmu_initialized())
-               return;
-
-       /*
-        * Always use NMI for PMU
-        */
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-}
-
-static int
-perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
-{
-       u64 start_clock;
-       u64 finish_clock;
-       int ret;
-
-       /*
-        * All PMUs/events that share this PMI handler should make sure to
-        * increment active_events for their events.
-        */
-       if (!atomic_read(&active_events))
-               return NMI_DONE;
-
-       start_clock = sched_clock();
-       ret = x86_pmu.handle_irq(regs);
-       finish_clock = sched_clock();
-
-       perf_sample_event_took(finish_clock - start_clock);
-
-       return ret;
-}
-NOKPROBE_SYMBOL(perf_event_nmi_handler);
-
-struct event_constraint emptyconstraint;
-struct event_constraint unconstrained;
-
-static int
-x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-       unsigned int cpu = (long)hcpu;
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       int i, ret = NOTIFY_OK;
-
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
-                       cpuc->kfree_on_online[i] = NULL;
-               if (x86_pmu.cpu_prepare)
-                       ret = x86_pmu.cpu_prepare(cpu);
-               break;
-
-       case CPU_STARTING:
-               if (x86_pmu.cpu_starting)
-                       x86_pmu.cpu_starting(cpu);
-               break;
-
-       case CPU_ONLINE:
-               for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
-                       kfree(cpuc->kfree_on_online[i]);
-                       cpuc->kfree_on_online[i] = NULL;
-               }
-               break;
-
-       case CPU_DYING:
-               if (x86_pmu.cpu_dying)
-                       x86_pmu.cpu_dying(cpu);
-               break;
-
-       case CPU_UP_CANCELED:
-       case CPU_DEAD:
-               if (x86_pmu.cpu_dead)
-                       x86_pmu.cpu_dead(cpu);
-               break;
-
-       default:
-               break;
-       }
-
-       return ret;
-}
-
-static void __init pmu_check_apic(void)
-{
-       if (cpu_has_apic)
-               return;
-
-       x86_pmu.apic = 0;
-       pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
-       pr_info("no hardware sampling interrupt available.\n");
-
-       /*
-        * If we have a PMU initialized but no APIC
-        * interrupts, we cannot sample hardware
-        * events (user-space has to fall back and
-        * sample via a hrtimer based software event):
-        */
-       pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
-}
-
-static struct attribute_group x86_pmu_format_group = {
-       .name = "format",
-       .attrs = NULL,
-};
-
-/*
- * Remove all undefined events (x86_pmu.event_map(id) == 0)
- * out of events_attr attributes.
- */
-static void __init filter_events(struct attribute **attrs)
-{
-       struct device_attribute *d;
-       struct perf_pmu_events_attr *pmu_attr;
-       int offset = 0;
-       int i, j;
-
-       for (i = 0; attrs[i]; i++) {
-               d = (struct device_attribute *)attrs[i];
-               pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
-               /* str trumps id */
-               if (pmu_attr->event_str)
-                       continue;
-               if (x86_pmu.event_map(i + offset))
-                       continue;
-
-               for (j = i; attrs[j]; j++)
-                       attrs[j] = attrs[j + 1];
-
-               /* Check the shifted attr. */
-               i--;
-
-               /*
-                * event_map() is index based, the attrs array is organized
-                * by increasing event index. If we shift the events, then
-                * we need to compensate for the event_map(), otherwise
-                * we are looking up the wrong event in the map
-                */
-               offset++;
-       }
-}
-
-/* Merge two pointer arrays */
-__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
-{
-       struct attribute **new;
-       int j, i;
-
-       for (j = 0; a[j]; j++)
-               ;
-       for (i = 0; b[i]; i++)
-               j++;
-       j++;
-
-       new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
-       if (!new)
-               return NULL;
-
-       j = 0;
-       for (i = 0; a[i]; i++)
-               new[j++] = a[i];
-       for (i = 0; b[i]; i++)
-               new[j++] = b[i];
-       new[j] = NULL;
-
-       return new;
-}
-
-ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
-                         char *page)
-{
-       struct perf_pmu_events_attr *pmu_attr = \
-               container_of(attr, struct perf_pmu_events_attr, attr);
-       u64 config = x86_pmu.event_map(pmu_attr->id);
-
-       /* string trumps id */
-       if (pmu_attr->event_str)
-               return sprintf(page, "%s", pmu_attr->event_str);
-
-       return x86_pmu.events_sysfs_show(page, config);
-}
-
-EVENT_ATTR(cpu-cycles,                 CPU_CYCLES              );
-EVENT_ATTR(instructions,               INSTRUCTIONS            );
-EVENT_ATTR(cache-references,           CACHE_REFERENCES        );
-EVENT_ATTR(cache-misses,               CACHE_MISSES            );
-EVENT_ATTR(branch-instructions,                BRANCH_INSTRUCTIONS     );
-EVENT_ATTR(branch-misses,              BRANCH_MISSES           );
-EVENT_ATTR(bus-cycles,                 BUS_CYCLES              );
-EVENT_ATTR(stalled-cycles-frontend,    STALLED_CYCLES_FRONTEND );
-EVENT_ATTR(stalled-cycles-backend,     STALLED_CYCLES_BACKEND  );
-EVENT_ATTR(ref-cycles,                 REF_CPU_CYCLES          );
-
-static struct attribute *empty_attrs;
-
-static struct attribute *events_attr[] = {
-       EVENT_PTR(CPU_CYCLES),
-       EVENT_PTR(INSTRUCTIONS),
-       EVENT_PTR(CACHE_REFERENCES),
-       EVENT_PTR(CACHE_MISSES),
-       EVENT_PTR(BRANCH_INSTRUCTIONS),
-       EVENT_PTR(BRANCH_MISSES),
-       EVENT_PTR(BUS_CYCLES),
-       EVENT_PTR(STALLED_CYCLES_FRONTEND),
-       EVENT_PTR(STALLED_CYCLES_BACKEND),
-       EVENT_PTR(REF_CPU_CYCLES),
-       NULL,
-};
-
-static struct attribute_group x86_pmu_events_group = {
-       .name = "events",
-       .attrs = events_attr,
-};
-
-ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
-{
-       u64 umask  = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
-       u64 cmask  = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
-       bool edge  = (config & ARCH_PERFMON_EVENTSEL_EDGE);
-       bool pc    = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
-       bool any   = (config & ARCH_PERFMON_EVENTSEL_ANY);
-       bool inv   = (config & ARCH_PERFMON_EVENTSEL_INV);
-       ssize_t ret;
-
-       /*
-       * We have whole page size to spend and just little data
-       * to write, so we can safely use sprintf.
-       */
-       ret = sprintf(page, "event=0x%02llx", event);
-
-       if (umask)
-               ret += sprintf(page + ret, ",umask=0x%02llx", umask);
-
-       if (edge)
-               ret += sprintf(page + ret, ",edge");
-
-       if (pc)
-               ret += sprintf(page + ret, ",pc");
-
-       if (any)
-               ret += sprintf(page + ret, ",any");
-
-       if (inv)
-               ret += sprintf(page + ret, ",inv");
-
-       if (cmask)
-               ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
-
-       ret += sprintf(page + ret, "\n");
-
-       return ret;
-}
-
-static int __init init_hw_perf_events(void)
-{
-       struct x86_pmu_quirk *quirk;
-       int err;
-
-       pr_info("Performance Events: ");
-
-       switch (boot_cpu_data.x86_vendor) {
-       case X86_VENDOR_INTEL:
-               err = intel_pmu_init();
-               break;
-       case X86_VENDOR_AMD:
-               err = amd_pmu_init();
-               break;
-       default:
-               err = -ENOTSUPP;
-       }
-       if (err != 0) {
-               pr_cont("no PMU driver, software events only.\n");
-               return 0;
-       }
-
-       pmu_check_apic();
-
-       /* sanity check that the hardware exists or is emulated */
-       if (!check_hw_exists())
-               return 0;
-
-       pr_cont("%s PMU driver.\n", x86_pmu.name);
-
-       x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
-
-       for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
-               quirk->func();
-
-       if (!x86_pmu.intel_ctrl)
-               x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
-
-       perf_events_lapic_init();
-       register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
-
-       unconstrained = (struct event_constraint)
-               __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
-                                  0, x86_pmu.num_counters, 0, 0);
-
-       x86_pmu_format_group.attrs = x86_pmu.format_attrs;
-
-       if (x86_pmu.event_attrs)
-               x86_pmu_events_group.attrs = x86_pmu.event_attrs;
-
-       if (!x86_pmu.events_sysfs_show)
-               x86_pmu_events_group.attrs = &empty_attrs;
-       else
-               filter_events(x86_pmu_events_group.attrs);
-
-       if (x86_pmu.cpu_events) {
-               struct attribute **tmp;
-
-               tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
-               if (!WARN_ON(!tmp))
-                       x86_pmu_events_group.attrs = tmp;
-       }
-
-       pr_info("... version:                %d\n",     x86_pmu.version);
-       pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
-       pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
-       pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
-       pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
-       pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
-       pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
-
-       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
-       perf_cpu_notifier(x86_pmu_notifier);
-
-       return 0;
-}
-early_initcall(init_hw_perf_events);
-
-static inline void x86_pmu_read(struct perf_event *event)
-{
-       x86_perf_event_update(event);
-}
-
-/*
- * Start group events scheduling transaction
- * Set the flag to make pmu::enable() not perform the
- * schedulability test, it will be performed at commit time
- *
- * We only support PERF_PMU_TXN_ADD transactions. Save the
- * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
- * transactions.
- */
-static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       WARN_ON_ONCE(cpuc->txn_flags);          /* txn already in flight */
-
-       cpuc->txn_flags = txn_flags;
-       if (txn_flags & ~PERF_PMU_TXN_ADD)
-               return;
-
-       perf_pmu_disable(pmu);
-       __this_cpu_write(cpu_hw_events.n_txn, 0);
-}
-
-/*
- * Stop group events scheduling transaction
- * Clear the flag and pmu::enable() will perform the
- * schedulability test.
- */
-static void x86_pmu_cancel_txn(struct pmu *pmu)
-{
-       unsigned int txn_flags;
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
-
-       txn_flags = cpuc->txn_flags;
-       cpuc->txn_flags = 0;
-       if (txn_flags & ~PERF_PMU_TXN_ADD)
-               return;
-
-       /*
-        * Truncate collected array by the number of events added in this
-        * transaction. See x86_pmu_add() and x86_pmu_*_txn().
-        */
-       __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
-       __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
-       perf_pmu_enable(pmu);
-}
-
-/*
- * Commit group events scheduling transaction
- * Perform the group schedulability test as a whole
- * Return 0 if success
- *
- * Does not cancel the transaction on failure; expects the caller to do this.
- */
-static int x86_pmu_commit_txn(struct pmu *pmu)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int assign[X86_PMC_IDX_MAX];
-       int n, ret;
-
-       WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
-
-       if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
-               cpuc->txn_flags = 0;
-               return 0;
-       }
-
-       n = cpuc->n_events;
-
-       if (!x86_pmu_initialized())
-               return -EAGAIN;
-
-       ret = x86_pmu.schedule_events(cpuc, n, assign);
-       if (ret)
-               return ret;
-
-       /*
-        * copy new assignment, now we know it is possible
-        * will be used by hw_perf_enable()
-        */
-       memcpy(cpuc->assign, assign, n*sizeof(int));
-
-       cpuc->txn_flags = 0;
-       perf_pmu_enable(pmu);
-       return 0;
-}
-/*
- * a fake_cpuc is used to validate event groups. Due to
- * the extra reg logic, we need to also allocate a fake
- * per_core and per_cpu structure. Otherwise, group events
- * using extra reg may conflict without the kernel being
- * able to catch this when the last event gets added to
- * the group.
- */
-static void free_fake_cpuc(struct cpu_hw_events *cpuc)
-{
-       kfree(cpuc->shared_regs);
-       kfree(cpuc);
-}
-
-static struct cpu_hw_events *allocate_fake_cpuc(void)
-{
-       struct cpu_hw_events *cpuc;
-       int cpu = raw_smp_processor_id();
-
-       cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
-       if (!cpuc)
-               return ERR_PTR(-ENOMEM);
-
-       /* only needed, if we have extra_regs */
-       if (x86_pmu.extra_regs) {
-               cpuc->shared_regs = allocate_shared_regs(cpu);
-               if (!cpuc->shared_regs)
-                       goto error;
-       }
-       cpuc->is_fake = 1;
-       return cpuc;
-error:
-       free_fake_cpuc(cpuc);
-       return ERR_PTR(-ENOMEM);
-}
-
-/*
- * validate that we can schedule this event
- */
-static int validate_event(struct perf_event *event)
-{
-       struct cpu_hw_events *fake_cpuc;
-       struct event_constraint *c;
-       int ret = 0;
-
-       fake_cpuc = allocate_fake_cpuc();
-       if (IS_ERR(fake_cpuc))
-               return PTR_ERR(fake_cpuc);
-
-       c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
-
-       if (!c || !c->weight)
-               ret = -EINVAL;
-
-       if (x86_pmu.put_event_constraints)
-               x86_pmu.put_event_constraints(fake_cpuc, event);
-
-       free_fake_cpuc(fake_cpuc);
-
-       return ret;
-}
-
-/*
- * validate a single event group
- *
- * validation include:
- *     - check events are compatible which each other
- *     - events do not compete for the same counter
- *     - number of events <= number of counters
- *
- * validation ensures the group can be loaded onto the
- * PMU if it was the only group available.
- */
-static int validate_group(struct perf_event *event)
-{
-       struct perf_event *leader = event->group_leader;
-       struct cpu_hw_events *fake_cpuc;
-       int ret = -EINVAL, n;
-
-       fake_cpuc = allocate_fake_cpuc();
-       if (IS_ERR(fake_cpuc))
-               return PTR_ERR(fake_cpuc);
-       /*
-        * the event is not yet connected with its
-        * siblings therefore we must first collect
-        * existing siblings, then add the new event
-        * before we can simulate the scheduling
-        */
-       n = collect_events(fake_cpuc, leader, true);
-       if (n < 0)
-               goto out;
-
-       fake_cpuc->n_events = n;
-       n = collect_events(fake_cpuc, event, false);
-       if (n < 0)
-               goto out;
-
-       fake_cpuc->n_events = n;
-
-       ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
-
-out:
-       free_fake_cpuc(fake_cpuc);
-       return ret;
-}
-
-static int x86_pmu_event_init(struct perf_event *event)
-{
-       struct pmu *tmp;
-       int err;
-
-       switch (event->attr.type) {
-       case PERF_TYPE_RAW:
-       case PERF_TYPE_HARDWARE:
-       case PERF_TYPE_HW_CACHE:
-               break;
-
-       default:
-               return -ENOENT;
-       }
-
-       err = __x86_pmu_event_init(event);
-       if (!err) {
-               /*
-                * we temporarily connect event to its pmu
-                * such that validate_group() can classify
-                * it as an x86 event using is_x86_event()
-                */
-               tmp = event->pmu;
-               event->pmu = &pmu;
-
-               if (event->group_leader != event)
-                       err = validate_group(event);
-               else
-                       err = validate_event(event);
-
-               event->pmu = tmp;
-       }
-       if (err) {
-               if (event->destroy)
-                       event->destroy(event);
-       }
-
-       if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
-               event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
-
-       return err;
-}
-
-static void refresh_pce(void *ignored)
-{
-       if (current->mm)
-               load_mm_cr4(current->mm);
-}
-
-static void x86_pmu_event_mapped(struct perf_event *event)
-{
-       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
-               return;
-
-       if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
-               on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
-}
-
-static void x86_pmu_event_unmapped(struct perf_event *event)
-{
-       if (!current->mm)
-               return;
-
-       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
-               return;
-
-       if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
-               on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
-}
-
-static int x86_pmu_event_idx(struct perf_event *event)
-{
-       int idx = event->hw.idx;
-
-       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
-               return 0;
-
-       if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
-               idx -= INTEL_PMC_IDX_FIXED;
-               idx |= 1 << 30;
-       }
-
-       return idx + 1;
-}
-
-static ssize_t get_attr_rdpmc(struct device *cdev,
-                             struct device_attribute *attr,
-                             char *buf)
-{
-       return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
-}
-
-static ssize_t set_attr_rdpmc(struct device *cdev,
-                             struct device_attribute *attr,
-                             const char *buf, size_t count)
-{
-       unsigned long val;
-       ssize_t ret;
-
-       ret = kstrtoul(buf, 0, &val);
-       if (ret)
-               return ret;
-
-       if (val > 2)
-               return -EINVAL;
-
-       if (x86_pmu.attr_rdpmc_broken)
-               return -ENOTSUPP;
-
-       if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
-               /*
-                * Changing into or out of always available, aka
-                * perf-event-bypassing mode.  This path is extremely slow,
-                * but only root can trigger it, so it's okay.
-                */
-               if (val == 2)
-                       static_key_slow_inc(&rdpmc_always_available);
-               else
-                       static_key_slow_dec(&rdpmc_always_available);
-               on_each_cpu(refresh_pce, NULL, 1);
-       }
-
-       x86_pmu.attr_rdpmc = val;
-
-       return count;
-}
-
-static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
-
-static struct attribute *x86_pmu_attrs[] = {
-       &dev_attr_rdpmc.attr,
-       NULL,
-};
-
-static struct attribute_group x86_pmu_attr_group = {
-       .attrs = x86_pmu_attrs,
-};
-
-static const struct attribute_group *x86_pmu_attr_groups[] = {
-       &x86_pmu_attr_group,
-       &x86_pmu_format_group,
-       &x86_pmu_events_group,
-       NULL,
-};
-
-static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
-       if (x86_pmu.sched_task)
-               x86_pmu.sched_task(ctx, sched_in);
-}
-
-void perf_check_microcode(void)
-{
-       if (x86_pmu.check_microcode)
-               x86_pmu.check_microcode();
-}
-EXPORT_SYMBOL_GPL(perf_check_microcode);
-
-static struct pmu pmu = {
-       .pmu_enable             = x86_pmu_enable,
-       .pmu_disable            = x86_pmu_disable,
-
-       .attr_groups            = x86_pmu_attr_groups,
-
-       .event_init             = x86_pmu_event_init,
-
-       .event_mapped           = x86_pmu_event_mapped,
-       .event_unmapped         = x86_pmu_event_unmapped,
-
-       .add                    = x86_pmu_add,
-       .del                    = x86_pmu_del,
-       .start                  = x86_pmu_start,
-       .stop                   = x86_pmu_stop,
-       .read                   = x86_pmu_read,
-
-       .start_txn              = x86_pmu_start_txn,
-       .cancel_txn             = x86_pmu_cancel_txn,
-       .commit_txn             = x86_pmu_commit_txn,
-
-       .event_idx              = x86_pmu_event_idx,
-       .sched_task             = x86_pmu_sched_task,
-       .task_ctx_size          = sizeof(struct x86_perf_task_context),
-};
-
-void arch_perf_update_userpage(struct perf_event *event,
-                              struct perf_event_mmap_page *userpg, u64 now)
-{
-       struct cyc2ns_data *data;
-
-       userpg->cap_user_time = 0;
-       userpg->cap_user_time_zero = 0;
-       userpg->cap_user_rdpmc =
-               !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
-       userpg->pmc_width = x86_pmu.cntval_bits;
-
-       if (!sched_clock_stable())
-               return;
-
-       data = cyc2ns_read_begin();
-
-       /*
-        * Internal timekeeping for enabled/running/stopped times
-        * is always in the local_clock domain.
-        */
-       userpg->cap_user_time = 1;
-       userpg->time_mult = data->cyc2ns_mul;
-       userpg->time_shift = data->cyc2ns_shift;
-       userpg->time_offset = data->cyc2ns_offset - now;
-
-       /*
-        * cap_user_time_zero doesn't make sense when we're using a different
-        * time base for the records.
-        */
-       if (event->clock == &local_clock) {
-               userpg->cap_user_time_zero = 1;
-               userpg->time_zero = data->cyc2ns_offset;
-       }
-
-       cyc2ns_read_end(data);
-}
-
-/*
- * callchain support
- */
-
-static int backtrace_stack(void *data, char *name)
-{
-       return 0;
-}
-
-static void backtrace_address(void *data, unsigned long addr, int reliable)
-{
-       struct perf_callchain_entry *entry = data;
-
-       perf_callchain_store(entry, addr);
-}
-
-static const struct stacktrace_ops backtrace_ops = {
-       .stack                  = backtrace_stack,
-       .address                = backtrace_address,
-       .walk_stack             = print_context_stack_bp,
-};
-
-void
-perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
-{
-       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-               /* TODO: We don't support guest os callchain now */
-               return;
-       }
-
-       perf_callchain_store(entry, regs->ip);
-
-       dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
-}
-
-static inline int
-valid_user_frame(const void __user *fp, unsigned long size)
-{
-       return (__range_not_ok(fp, size, TASK_SIZE) == 0);
-}
-
-static unsigned long get_segment_base(unsigned int segment)
-{
-       struct desc_struct *desc;
-       int idx = segment >> 3;
-
-       if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-               struct ldt_struct *ldt;
-
-               if (idx > LDT_ENTRIES)
-                       return 0;
-
-               /* IRQs are off, so this synchronizes with smp_store_release */
-               ldt = lockless_dereference(current->active_mm->context.ldt);
-               if (!ldt || idx > ldt->size)
-                       return 0;
-
-               desc = &ldt->entries[idx];
-#else
-               return 0;
-#endif
-       } else {
-               if (idx > GDT_ENTRIES)
-                       return 0;
-
-               desc = raw_cpu_ptr(gdt_page.gdt) + idx;
-       }
-
-       return get_desc_base(desc);
-}
-
-#ifdef CONFIG_IA32_EMULATION
-
-#include <asm/compat.h>
-
-static inline int
-perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-       /* 32-bit process in 64-bit kernel. */
-       unsigned long ss_base, cs_base;
-       struct stack_frame_ia32 frame;
-       const void __user *fp;
-
-       if (!test_thread_flag(TIF_IA32))
-               return 0;
-
-       cs_base = get_segment_base(regs->cs);
-       ss_base = get_segment_base(regs->ss);
-
-       fp = compat_ptr(ss_base + regs->bp);
-       pagefault_disable();
-       while (entry->nr < PERF_MAX_STACK_DEPTH) {
-               unsigned long bytes;
-               frame.next_frame     = 0;
-               frame.return_address = 0;
-
-               if (!access_ok(VERIFY_READ, fp, 8))
-                       break;
-
-               bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
-               if (bytes != 0)
-                       break;
-               bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
-               if (bytes != 0)
-                       break;
-
-               if (!valid_user_frame(fp, sizeof(frame)))
-                       break;
-
-               perf_callchain_store(entry, cs_base + frame.return_address);
-               fp = compat_ptr(ss_base + frame.next_frame);
-       }
-       pagefault_enable();
-       return 1;
-}
-#else
-static inline int
-perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-    return 0;
-}
-#endif
-
-void
-perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
-{
-       struct stack_frame frame;
-       const void __user *fp;
-
-       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-               /* TODO: We don't support guest os callchain now */
-               return;
-       }
-
-       /*
-        * We don't know what to do with VM86 stacks.. ignore them for now.
-        */
-       if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
-               return;
-
-       fp = (void __user *)regs->bp;
-
-       perf_callchain_store(entry, regs->ip);
-
-       if (!current->mm)
-               return;
-
-       if (perf_callchain_user32(regs, entry))
-               return;
-
-       pagefault_disable();
-       while (entry->nr < PERF_MAX_STACK_DEPTH) {
-               unsigned long bytes;
-               frame.next_frame             = NULL;
-               frame.return_address = 0;
-
-               if (!access_ok(VERIFY_READ, fp, 16))
-                       break;
-
-               bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
-               if (bytes != 0)
-                       break;
-               bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
-               if (bytes != 0)
-                       break;
-
-               if (!valid_user_frame(fp, sizeof(frame)))
-                       break;
-
-               perf_callchain_store(entry, frame.return_address);
-               fp = (void __user *)frame.next_frame;
-       }
-       pagefault_enable();
-}
-
-/*
- * Deal with code segment offsets for the various execution modes:
- *
- *   VM86 - the good olde 16 bit days, where the linear address is
- *          20 bits and we use regs->ip + 0x10 * regs->cs.
- *
- *   IA32 - Where we need to look at GDT/LDT segment descriptor tables
- *          to figure out what the 32bit base address is.
- *
- *    X32 - has TIF_X32 set, but is running in x86_64
- *
- * X86_64 - CS,DS,SS,ES are all zero based.
- */
-static unsigned long code_segment_base(struct pt_regs *regs)
-{
-       /*
-        * For IA32 we look at the GDT/LDT segment base to convert the
-        * effective IP to a linear address.
-        */
-
-#ifdef CONFIG_X86_32
-       /*
-        * If we are in VM86 mode, add the segment offset to convert to a
-        * linear address.
-        */
-       if (regs->flags & X86_VM_MASK)
-               return 0x10 * regs->cs;
-
-       if (user_mode(regs) && regs->cs != __USER_CS)
-               return get_segment_base(regs->cs);
-#else
-       if (user_mode(regs) && !user_64bit_mode(regs) &&
-           regs->cs != __USER32_CS)
-               return get_segment_base(regs->cs);
-#endif
-       return 0;
-}
-
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
-       if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
-               return perf_guest_cbs->get_guest_ip();
-
-       return regs->ip + code_segment_base(regs);
-}
-
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
-       int misc = 0;
-
-       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-               if (perf_guest_cbs->is_user_mode())
-                       misc |= PERF_RECORD_MISC_GUEST_USER;
-               else
-                       misc |= PERF_RECORD_MISC_GUEST_KERNEL;
-       } else {
-               if (user_mode(regs))
-                       misc |= PERF_RECORD_MISC_USER;
-               else
-                       misc |= PERF_RECORD_MISC_KERNEL;
-       }
-
-       if (regs->flags & PERF_EFLAGS_EXACT)
-               misc |= PERF_RECORD_MISC_EXACT_IP;
-
-       return misc;
-}
-
-void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
-{
-       cap->version            = x86_pmu.version;
-       cap->num_counters_gp    = x86_pmu.num_counters;
-       cap->num_counters_fixed = x86_pmu.num_counters_fixed;
-       cap->bit_width_gp       = x86_pmu.cntval_bits;
-       cap->bit_width_fixed    = x86_pmu.cntval_bits;
-       cap->events_mask        = (unsigned int)x86_pmu.events_maskl;
-       cap->events_mask_len    = x86_pmu.events_mask_len;
-}
-EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
deleted file mode 100644 (file)
index 7bb61e3..0000000
+++ /dev/null
@@ -1,955 +0,0 @@
-/*
- * Performance events x86 architecture header
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2009 Jaswinder Singh Rajput
- *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
- *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
- *  Copyright (C) 2009 Google, Inc., Stephane Eranian
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-
-/* To enable MSR tracing please use the generic trace points. */
-
-/*
- *          |   NHM/WSM    |      SNB     |
- * register -------------------------------
- *          |  HT  | no HT |  HT  | no HT |
- *-----------------------------------------
- * offcore  | core | core  | cpu  | core  |
- * lbr_sel  | core | core  | cpu  | core  |
- * ld_lat   | cpu  | core  | cpu  | core  |
- *-----------------------------------------
- *
- * Given that there is a small number of shared regs,
- * we can pre-allocate their slot in the per-cpu
- * per-core reg tables.
- */
-enum extra_reg_type {
-       EXTRA_REG_NONE  = -1,   /* not used */
-
-       EXTRA_REG_RSP_0 = 0,    /* offcore_response_0 */
-       EXTRA_REG_RSP_1 = 1,    /* offcore_response_1 */
-       EXTRA_REG_LBR   = 2,    /* lbr_select */
-       EXTRA_REG_LDLAT = 3,    /* ld_lat_threshold */
-       EXTRA_REG_FE    = 4,    /* fe_* */
-
-       EXTRA_REG_MAX           /* number of entries needed */
-};
-
-struct event_constraint {
-       union {
-               unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-               u64             idxmsk64;
-       };
-       u64     code;
-       u64     cmask;
-       int     weight;
-       int     overlap;
-       int     flags;
-};
-/*
- * struct hw_perf_event.flags flags
- */
-#define PERF_X86_EVENT_PEBS_LDLAT      0x0001 /* ld+ldlat data address sampling */
-#define PERF_X86_EVENT_PEBS_ST         0x0002 /* st data address sampling */
-#define PERF_X86_EVENT_PEBS_ST_HSW     0x0004 /* haswell style datala, store */
-#define PERF_X86_EVENT_COMMITTED       0x0008 /* event passed commit_txn */
-#define PERF_X86_EVENT_PEBS_LD_HSW     0x0010 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW     0x0020 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL            0x0040 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC         0x0080 /* dynamic alloc'd constraint */
-#define PERF_X86_EVENT_RDPMC_ALLOWED   0x0100 /* grant rdpmc permission */
-#define PERF_X86_EVENT_EXCL_ACCT       0x0200 /* accounted EXCL event */
-#define PERF_X86_EVENT_AUTO_RELOAD     0x0400 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_FREERUNNING     0x0800 /* use freerunning PEBS */
-
-
-struct amd_nb {
-       int nb_id;  /* NorthBridge id */
-       int refcnt; /* reference count */
-       struct perf_event *owners[X86_PMC_IDX_MAX];
-       struct event_constraint event_constraints[X86_PMC_IDX_MAX];
-};
-
-/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS                8
-
-/*
- * Flags PEBS can handle without an PMI.
- *
- * TID can only be handled by flushing at context switch.
- *
- */
-#define PEBS_FREERUNNING_FLAGS \
-       (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
-       PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
-       PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
-       PERF_SAMPLE_TRANSACTION)
-
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
-       u64     bts_buffer_base;
-       u64     bts_index;
-       u64     bts_absolute_maximum;
-       u64     bts_interrupt_threshold;
-       u64     pebs_buffer_base;
-       u64     pebs_index;
-       u64     pebs_absolute_maximum;
-       u64     pebs_interrupt_threshold;
-       u64     pebs_event_reset[MAX_PEBS_EVENTS];
-};
-
-/*
- * Per register state.
- */
-struct er_account {
-       raw_spinlock_t          lock;   /* per-core: protect structure */
-       u64                 config;     /* extra MSR config */
-       u64                 reg;        /* extra MSR number */
-       atomic_t            ref;        /* reference count */
-};
-
-/*
- * Per core/cpu state
- *
- * Used to coordinate shared registers between HT threads or
- * among events on a single PMU.
- */
-struct intel_shared_regs {
-       struct er_account       regs[EXTRA_REG_MAX];
-       int                     refcnt;         /* per-core: #HT threads */
-       unsigned                core_id;        /* per-core: core id */
-};
-
-enum intel_excl_state_type {
-       INTEL_EXCL_UNUSED    = 0, /* counter is unused */
-       INTEL_EXCL_SHARED    = 1, /* counter can be used by both threads */
-       INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
-};
-
-struct intel_excl_states {
-       enum intel_excl_state_type state[X86_PMC_IDX_MAX];
-       bool sched_started; /* true if scheduling has started */
-};
-
-struct intel_excl_cntrs {
-       raw_spinlock_t  lock;
-
-       struct intel_excl_states states[2];
-
-       union {
-               u16     has_exclusive[2];
-               u32     exclusive_present;
-       };
-
-       int             refcnt;         /* per-core: #HT threads */
-       unsigned        core_id;        /* per-core: core id */
-};
-
-#define MAX_LBR_ENTRIES                32
-
-enum {
-       X86_PERF_KFREE_SHARED = 0,
-       X86_PERF_KFREE_EXCL   = 1,
-       X86_PERF_KFREE_MAX
-};
-
-struct cpu_hw_events {
-       /*
-        * Generic x86 PMC bits
-        */
-       struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
-       unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       unsigned long           running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       int                     enabled;
-
-       int                     n_events; /* the # of events in the below arrays */
-       int                     n_added;  /* the # last events in the below arrays;
-                                            they've never been enabled yet */
-       int                     n_txn;    /* the # last events in the below arrays;
-                                            added in the current transaction */
-       int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
-       u64                     tags[X86_PMC_IDX_MAX];
-
-       struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
-       struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
-
-       int                     n_excl; /* the number of exclusive events */
-
-       unsigned int            txn_flags;
-       int                     is_fake;
-
-       /*
-        * Intel DebugStore bits
-        */
-       struct debug_store      *ds;
-       u64                     pebs_enabled;
-
-       /*
-        * Intel LBR bits
-        */
-       int                             lbr_users;
-       void                            *lbr_context;
-       struct perf_branch_stack        lbr_stack;
-       struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
-       struct er_account               *lbr_sel;
-       u64                             br_sel;
-
-       /*
-        * Intel host/guest exclude bits
-        */
-       u64                             intel_ctrl_guest_mask;
-       u64                             intel_ctrl_host_mask;
-       struct perf_guest_switch_msr    guest_switch_msrs[X86_PMC_IDX_MAX];
-
-       /*
-        * Intel checkpoint mask
-        */
-       u64                             intel_cp_status;
-
-       /*
-        * manage shared (per-core, per-cpu) registers
-        * used on Intel NHM/WSM/SNB
-        */
-       struct intel_shared_regs        *shared_regs;
-       /*
-        * manage exclusive counter access between hyperthread
-        */
-       struct event_constraint *constraint_list; /* in enable order */
-       struct intel_excl_cntrs         *excl_cntrs;
-       int excl_thread_id; /* 0 or 1 */
-
-       /*
-        * AMD specific bits
-        */
-       struct amd_nb                   *amd_nb;
-       /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
-       u64                             perf_ctr_virt_mask;
-
-       void                            *kfree_on_online[X86_PERF_KFREE_MAX];
-};
-
-#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
-       { .idxmsk64 = (n) },            \
-       .code = (c),                    \
-       .cmask = (m),                   \
-       .weight = (w),                  \
-       .overlap = (o),                 \
-       .flags = f,                     \
-}
-
-#define EVENT_CONSTRAINT(c, n, m)      \
-       __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
-
-#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
-       __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
-                          0, PERF_X86_EVENT_EXCL)
-
-/*
- * The overlap flag marks event constraints with overlapping counter
- * masks. This is the case if the counter mask of such an event is not
- * a subset of any other counter mask of a constraint with an equal or
- * higher weight, e.g.:
- *
- *  c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
- *  c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
- *  c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
- *
- * The event scheduler may not select the correct counter in the first
- * cycle because it needs to know which subsequent events will be
- * scheduled. It may fail to schedule the events then. So we set the
- * overlap flag for such constraints to give the scheduler a hint which
- * events to select for counter rescheduling.
- *
- * Care must be taken as the rescheduling algorithm is O(n!) which
- * will increase scheduling cycles for an over-commited system
- * dramatically.  The number of such EVENT_CONSTRAINT_OVERLAP() macros
- * and its counter masks must be kept at a minimum.
- */
-#define EVENT_CONSTRAINT_OVERLAP(c, n, m)      \
-       __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
-
-/*
- * Constraint on the Event code.
- */
-#define INTEL_EVENT_CONSTRAINT(c, n)   \
-       EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
-
-/*
- * Constraint on the Event code + UMask + fixed-mask
- *
- * filter mask to validate fixed counter events.
- * the following filters disqualify for fixed counters:
- *  - inv
- *  - edge
- *  - cnt-mask
- *  - in_tx
- *  - in_tx_checkpointed
- *  The other filters are supported by fixed counters.
- *  The any-thread option is supported starting with v3.
- */
-#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
-#define FIXED_EVENT_CONSTRAINT(c, n)   \
-       EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
-
-/*
- * Constraint on the Event code + UMask
- */
-#define INTEL_UEVENT_CONSTRAINT(c, n)  \
-       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
-
-/* Constraint on specific umask bit only + event */
-#define INTEL_UBIT_EVENT_CONSTRAINT(c, n)      \
-       EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))
-
-/* Like UEVENT_CONSTRAINT, but match flags too */
-#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n)    \
-       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
-
-#define INTEL_EXCLUEVT_CONSTRAINT(c, n)        \
-       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
-                          HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
-
-#define INTEL_PLD_CONSTRAINT(c, n)     \
-       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                          HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
-
-#define INTEL_PST_CONSTRAINT(c, n)     \
-       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
-
-/* Event constraint, but match on all event flags too. */
-#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
-       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
-
-/* Check only flags, but allow all event/umask */
-#define INTEL_ALL_EVENT_CONSTRAINT(code, n)    \
-       EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
-
-/* Check flags and event code, and set the HSW store flag */
-#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
-
-/* Check flags and event code, and set the HSW load flag */
-#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
-
-#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, \
-                         PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
-
-/* Check flags and event code/umask, and set the HSW store flag */
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
-
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, \
-                         PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL)
-
-/* Check flags and event code/umask, and set the HSW load flag */
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
-
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, \
-                         PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
-
-/* Check flags and event code/umask, and set the HSW N/A flag */
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
-
-
-/*
- * We define the end marker as having a weight of -1
- * to enable blacklisting of events using a counter bitmask
- * of zero and thus a weight of zero.
- * The end marker has a weight that cannot possibly be
- * obtained from counting the bits in the bitmask.
- */
-#define EVENT_CONSTRAINT_END { .weight = -1 }
-
-/*
- * Check for end marker with weight == -1
- */
-#define for_each_event_constraint(e, c)        \
-       for ((e) = (c); (e)->weight != -1; (e)++)
-
-/*
- * Extra registers for specific events.
- *
- * Some events need large masks and require external MSRs.
- * Those extra MSRs end up being shared for all events on
- * a PMU and sometimes between PMU of sibling HT threads.
- * In either case, the kernel needs to handle conflicting
- * accesses to those extra, shared, regs. The data structure
- * to manage those registers is stored in cpu_hw_event.
- */
-struct extra_reg {
-       unsigned int            event;
-       unsigned int            msr;
-       u64                     config_mask;
-       u64                     valid_mask;
-       int                     idx;  /* per_xxx->regs[] reg index */
-       bool                    extra_msr_access;
-};
-
-#define EVENT_EXTRA_REG(e, ms, m, vm, i) {     \
-       .event = (e),                   \
-       .msr = (ms),                    \
-       .config_mask = (m),             \
-       .valid_mask = (vm),             \
-       .idx = EXTRA_REG_##i,           \
-       .extra_msr_access = true,       \
-       }
-
-#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)     \
-       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
-
-#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
-       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
-                       ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
-
-#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
-       INTEL_UEVENT_EXTRA_REG(c, \
-                              MSR_PEBS_LD_LAT_THRESHOLD, \
-                              0xffff, \
-                              LDLAT)
-
-#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
-
-union perf_capabilities {
-       struct {
-               u64     lbr_format:6;
-               u64     pebs_trap:1;
-               u64     pebs_arch_reg:1;
-               u64     pebs_format:4;
-               u64     smm_freeze:1;
-               /*
-                * PMU supports separate counter range for writing
-                * values > 32bit.
-                */
-               u64     full_width_write:1;
-       };
-       u64     capabilities;
-};
-
-struct x86_pmu_quirk {
-       struct x86_pmu_quirk *next;
-       void (*func)(void);
-};
-
-union x86_pmu_config {
-       struct {
-               u64 event:8,
-                   umask:8,
-                   usr:1,
-                   os:1,
-                   edge:1,
-                   pc:1,
-                   interrupt:1,
-                   __reserved1:1,
-                   en:1,
-                   inv:1,
-                   cmask:8,
-                   event2:4,
-                   __reserved2:4,
-                   go:1,
-                   ho:1;
-       } bits;
-       u64 value;
-};
-
-#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
-
-enum {
-       x86_lbr_exclusive_lbr,
-       x86_lbr_exclusive_bts,
-       x86_lbr_exclusive_pt,
-       x86_lbr_exclusive_max,
-};
-
-/*
- * struct x86_pmu - generic x86 pmu
- */
-struct x86_pmu {
-       /*
-        * Generic x86 PMC bits
-        */
-       const char      *name;
-       int             version;
-       int             (*handle_irq)(struct pt_regs *);
-       void            (*disable_all)(void);
-       void            (*enable_all)(int added);
-       void            (*enable)(struct perf_event *);
-       void            (*disable)(struct perf_event *);
-       int             (*hw_config)(struct perf_event *event);
-       int             (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
-       unsigned        eventsel;
-       unsigned        perfctr;
-       int             (*addr_offset)(int index, bool eventsel);
-       int             (*rdpmc_index)(int index);
-       u64             (*event_map)(int);
-       int             max_events;
-       int             num_counters;
-       int             num_counters_fixed;
-       int             cntval_bits;
-       u64             cntval_mask;
-       union {
-                       unsigned long events_maskl;
-                       unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
-       };
-       int             events_mask_len;
-       int             apic;
-       u64             max_period;
-       struct event_constraint *
-                       (*get_event_constraints)(struct cpu_hw_events *cpuc,
-                                                int idx,
-                                                struct perf_event *event);
-
-       void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
-                                                struct perf_event *event);
-
-       void            (*start_scheduling)(struct cpu_hw_events *cpuc);
-
-       void            (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
-
-       void            (*stop_scheduling)(struct cpu_hw_events *cpuc);
-
-       struct event_constraint *event_constraints;
-       struct x86_pmu_quirk *quirks;
-       int             perfctr_second_write;
-       bool            late_ack;
-       unsigned        (*limit_period)(struct perf_event *event, unsigned l);
-
-       /*
-        * sysfs attrs
-        */
-       int             attr_rdpmc_broken;
-       int             attr_rdpmc;
-       struct attribute **format_attrs;
-       struct attribute **event_attrs;
-
-       ssize_t         (*events_sysfs_show)(char *page, u64 config);
-       struct attribute **cpu_events;
-
-       /*
-        * CPU Hotplug hooks
-        */
-       int             (*cpu_prepare)(int cpu);
-       void            (*cpu_starting)(int cpu);
-       void            (*cpu_dying)(int cpu);
-       void            (*cpu_dead)(int cpu);
-
-       void            (*check_microcode)(void);
-       void            (*sched_task)(struct perf_event_context *ctx,
-                                     bool sched_in);
-
-       /*
-        * Intel Arch Perfmon v2+
-        */
-       u64                     intel_ctrl;
-       union perf_capabilities intel_cap;
-
-       /*
-        * Intel DebugStore bits
-        */
-       unsigned int    bts             :1,
-                       bts_active      :1,
-                       pebs            :1,
-                       pebs_active     :1,
-                       pebs_broken     :1,
-                       pebs_prec_dist  :1;
-       int             pebs_record_size;
-       void            (*drain_pebs)(struct pt_regs *regs);
-       struct event_constraint *pebs_constraints;
-       void            (*pebs_aliases)(struct perf_event *event);
-       int             max_pebs_events;
-       unsigned long   free_running_flags;
-
-       /*
-        * Intel LBR
-        */
-       unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
-       int             lbr_nr;                    /* hardware stack size */
-       u64             lbr_sel_mask;              /* LBR_SELECT valid bits */
-       const int       *lbr_sel_map;              /* lbr_select mappings */
-       bool            lbr_double_abort;          /* duplicated lbr aborts */
-
-       /*
-        * Intel PT/LBR/BTS are exclusive
-        */
-       atomic_t        lbr_exclusive[x86_lbr_exclusive_max];
-
-       /*
-        * Extra registers for events
-        */
-       struct extra_reg *extra_regs;
-       unsigned int flags;
-
-       /*
-        * Intel host/guest support (KVM)
-        */
-       struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
-};
-
-struct x86_perf_task_context {
-       u64 lbr_from[MAX_LBR_ENTRIES];
-       u64 lbr_to[MAX_LBR_ENTRIES];
-       u64 lbr_info[MAX_LBR_ENTRIES];
-       int tos;
-       int lbr_callstack_users;
-       int lbr_stack_state;
-};
-
-#define x86_add_quirk(func_)                                           \
-do {                                                                   \
-       static struct x86_pmu_quirk __quirk __initdata = {              \
-               .func = func_,                                          \
-       };                                                              \
-       __quirk.next = x86_pmu.quirks;                                  \
-       x86_pmu.quirks = &__quirk;                                      \
-} while (0)
-
-/*
- * x86_pmu flags
- */
-#define PMU_FL_NO_HT_SHARING   0x1 /* no hyper-threading resource sharing */
-#define PMU_FL_HAS_RSP_1       0x2 /* has 2 equivalent offcore_rsp regs   */
-#define PMU_FL_EXCL_CNTRS      0x4 /* has exclusive counter requirements  */
-#define PMU_FL_EXCL_ENABLED    0x8 /* exclusive counter active */
-
-#define EVENT_VAR(_id)  event_attr_##_id
-#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
-
-#define EVENT_ATTR(_name, _id)                                         \
-static struct perf_pmu_events_attr EVENT_VAR(_id) = {                  \
-       .attr           = __ATTR(_name, 0444, events_sysfs_show, NULL), \
-       .id             = PERF_COUNT_HW_##_id,                          \
-       .event_str      = NULL,                                         \
-};
-
-#define EVENT_ATTR_STR(_name, v, str)                                  \
-static struct perf_pmu_events_attr event_attr_##v = {                  \
-       .attr           = __ATTR(_name, 0444, events_sysfs_show, NULL), \
-       .id             = 0,                                            \
-       .event_str      = str,                                          \
-};
-
-extern struct x86_pmu x86_pmu __read_mostly;
-
-static inline bool x86_pmu_has_lbr_callstack(void)
-{
-       return  x86_pmu.lbr_sel_map &&
-               x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
-}
-
-DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
-
-int x86_perf_event_set_period(struct perf_event *event);
-
-/*
- * Generalized hw caching related hw_event table, filled
- * in on a per model basis. A value of 0 means
- * 'not supported', -1 means 'hw_event makes no sense on
- * this CPU', any other value means the raw hw_event
- * ID.
- */
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-
-extern u64 __read_mostly hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
-extern u64 __read_mostly hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
-
-u64 x86_perf_event_update(struct perf_event *event);
-
-static inline unsigned int x86_pmu_config_addr(int index)
-{
-       return x86_pmu.eventsel + (x86_pmu.addr_offset ?
-                                  x86_pmu.addr_offset(index, true) : index);
-}
-
-static inline unsigned int x86_pmu_event_addr(int index)
-{
-       return x86_pmu.perfctr + (x86_pmu.addr_offset ?
-                                 x86_pmu.addr_offset(index, false) : index);
-}
-
-static inline int x86_pmu_rdpmc_index(int index)
-{
-       return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
-}
-
-int x86_add_exclusive(unsigned int what);
-
-void x86_del_exclusive(unsigned int what);
-
-int x86_reserve_hardware(void);
-
-void x86_release_hardware(void);
-
-void hw_perf_lbr_event_destroy(struct perf_event *event);
-
-int x86_setup_perfctr(struct perf_event *event);
-
-int x86_pmu_hw_config(struct perf_event *event);
-
-void x86_pmu_disable_all(void);
-
-static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
-                                         u64 enable_mask)
-{
-       u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
-
-       if (hwc->extra_reg.reg)
-               wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
-       wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
-}
-
-void x86_pmu_enable_all(int added);
-
-int perf_assign_events(struct event_constraint **constraints, int n,
-                       int wmin, int wmax, int gpmax, int *assign);
-int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
-
-void x86_pmu_stop(struct perf_event *event, int flags);
-
-static inline void x86_pmu_disable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       wrmsrl(hwc->config_base, hwc->config);
-}
-
-void x86_pmu_enable_event(struct perf_event *event);
-
-int x86_pmu_handle_irq(struct pt_regs *regs);
-
-extern struct event_constraint emptyconstraint;
-
-extern struct event_constraint unconstrained;
-
-static inline bool kernel_ip(unsigned long ip)
-{
-#ifdef CONFIG_X86_32
-       return ip > PAGE_OFFSET;
-#else
-       return (long)ip < 0;
-#endif
-}
-
-/*
- * Not all PMUs provide the right context information to place the reported IP
- * into full context. Specifically segment registers are typically not
- * supplied.
- *
- * Assuming the address is a linear address (it is for IBS), we fake the CS and
- * vm86 mode using the known zero-based code segment and 'fix up' the registers
- * to reflect this.
- *
- * Intel PEBS/LBR appear to typically provide the effective address, nothing
- * much we can do about that but pray and treat it like a linear address.
- */
-static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
-{
-       regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
-       if (regs->flags & X86_VM_MASK)
-               regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
-       regs->ip = ip;
-}
-
-ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
-ssize_t intel_event_sysfs_show(char *page, u64 config);
-
-struct attribute **merge_attr(struct attribute **a, struct attribute **b);
-
-#ifdef CONFIG_CPU_SUP_AMD
-
-int amd_pmu_init(void);
-
-#else /* CONFIG_CPU_SUP_AMD */
-
-static inline int amd_pmu_init(void)
-{
-       return 0;
-}
-
-#endif /* CONFIG_CPU_SUP_AMD */
-
-#ifdef CONFIG_CPU_SUP_INTEL
-
-static inline bool intel_pmu_has_bts(struct perf_event *event)
-{
-       if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
-           !event->attr.freq && event->hw.sample_period == 1)
-               return true;
-
-       return false;
-}
-
-int intel_pmu_save_and_restart(struct perf_event *event);
-
-struct event_constraint *
-x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
-                         struct perf_event *event);
-
-struct intel_shared_regs *allocate_shared_regs(int cpu);
-
-int intel_pmu_init(void);
-
-void init_debug_store_on_cpu(int cpu);
-
-void fini_debug_store_on_cpu(int cpu);
-
-void release_ds_buffers(void);
-
-void reserve_ds_buffers(void);
-
-extern struct event_constraint bts_constraint;
-
-void intel_pmu_enable_bts(u64 config);
-
-void intel_pmu_disable_bts(void);
-
-int intel_pmu_drain_bts_buffer(void);
-
-extern struct event_constraint intel_core2_pebs_event_constraints[];
-
-extern struct event_constraint intel_atom_pebs_event_constraints[];
-
-extern struct event_constraint intel_slm_pebs_event_constraints[];
-
-extern struct event_constraint intel_nehalem_pebs_event_constraints[];
-
-extern struct event_constraint intel_westmere_pebs_event_constraints[];
-
-extern struct event_constraint intel_snb_pebs_event_constraints[];
-
-extern struct event_constraint intel_ivb_pebs_event_constraints[];
-
-extern struct event_constraint intel_hsw_pebs_event_constraints[];
-
-extern struct event_constraint intel_skl_pebs_event_constraints[];
-
-struct event_constraint *intel_pebs_constraints(struct perf_event *event);
-
-void intel_pmu_pebs_enable(struct perf_event *event);
-
-void intel_pmu_pebs_disable(struct perf_event *event);
-
-void intel_pmu_pebs_enable_all(void);
-
-void intel_pmu_pebs_disable_all(void);
-
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
-
-void intel_ds_init(void);
-
-void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
-
-void intel_pmu_lbr_reset(void);
-
-void intel_pmu_lbr_enable(struct perf_event *event);
-
-void intel_pmu_lbr_disable(struct perf_event *event);
-
-void intel_pmu_lbr_enable_all(bool pmi);
-
-void intel_pmu_lbr_disable_all(void);
-
-void intel_pmu_lbr_read(void);
-
-void intel_pmu_lbr_init_core(void);
-
-void intel_pmu_lbr_init_nhm(void);
-
-void intel_pmu_lbr_init_atom(void);
-
-void intel_pmu_lbr_init_snb(void);
-
-void intel_pmu_lbr_init_hsw(void);
-
-void intel_pmu_lbr_init_skl(void);
-
-void intel_pmu_lbr_init_knl(void);
-
-int intel_pmu_setup_lbr_filter(struct perf_event *event);
-
-void intel_pt_interrupt(void);
-
-int intel_bts_interrupt(void);
-
-void intel_bts_enable_local(void);
-
-void intel_bts_disable_local(void);
-
-int p4_pmu_init(void);
-
-int p6_pmu_init(void);
-
-int knc_pmu_init(void);
-
-ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
-                         char *page);
-
-static inline int is_ht_workaround_enabled(void)
-{
-       return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
-}
-
-#else /* CONFIG_CPU_SUP_INTEL */
-
-static inline void reserve_ds_buffers(void)
-{
-}
-
-static inline void release_ds_buffers(void)
-{
-}
-
-static inline int intel_pmu_init(void)
-{
-       return 0;
-}
-
-static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
-{
-       return NULL;
-}
-
-static inline int is_ht_workaround_enabled(void)
-{
-       return 0;
-}
-#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
deleted file mode 100644 (file)
index 5861053..0000000
+++ /dev/null
@@ -1,731 +0,0 @@
-#include <linux/perf_event.h>
-#include <linux/export.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <asm/apicdef.h>
-
-#include "perf_event.h"
-
-static __initconst const u64 amd_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-               [ C(RESULT_MISS)   ] = 0x0141, /* Data Cache Misses          */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
-               [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
-               [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
-               [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-               [ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
-               [ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
-               [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
-               [ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-/*
- * AMD Performance Monitor K7 and later.
- */
-static const u64 amd_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]                   = 0x0076,
-  [PERF_COUNT_HW_INSTRUCTIONS]                 = 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]             = 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]                 = 0x0081,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]          = 0x00c2,
-  [PERF_COUNT_HW_BRANCH_MISSES]                        = 0x00c3,
-  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]      = 0x00d0, /* "Decoder empty" event */
-  [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]       = 0x00d1, /* "Dispatch stalls" event */
-};
-
-static u64 amd_pmu_event_map(int hw_event)
-{
-       return amd_perfmon_event_map[hw_event];
-}
-
-/*
- * Previously calculated offsets
- */
-static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
-static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
-
-/*
- * Legacy CPUs:
- *   4 counters starting at 0xc0010000 each offset by 1
- *
- * CPUs with core performance counter extensions:
- *   6 counters starting at 0xc0010200 each offset by 2
- */
-static inline int amd_pmu_addr_offset(int index, bool eventsel)
-{
-       int offset;
-
-       if (!index)
-               return index;
-
-       if (eventsel)
-               offset = event_offsets[index];
-       else
-               offset = count_offsets[index];
-
-       if (offset)
-               return offset;
-
-       if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
-               offset = index;
-       else
-               offset = index << 1;
-
-       if (eventsel)
-               event_offsets[index] = offset;
-       else
-               count_offsets[index] = offset;
-
-       return offset;
-}
-
-static int amd_core_hw_config(struct perf_event *event)
-{
-       if (event->attr.exclude_host && event->attr.exclude_guest)
-               /*
-                * When HO == GO == 1 the hardware treats that as GO == HO == 0
-                * and will count in both modes. We don't want to count in that
-                * case so we emulate no-counting by setting US = OS = 0.
-                */
-               event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
-                                     ARCH_PERFMON_EVENTSEL_OS);
-       else if (event->attr.exclude_host)
-               event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
-       else if (event->attr.exclude_guest)
-               event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
-
-       return 0;
-}
-
-/*
- * AMD64 events are detected based on their event codes.
- */
-static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
-{
-       return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
-}
-
-static inline int amd_is_nb_event(struct hw_perf_event *hwc)
-{
-       return (hwc->config & 0xe0) == 0xe0;
-}
-
-static inline int amd_has_nb(struct cpu_hw_events *cpuc)
-{
-       struct amd_nb *nb = cpuc->amd_nb;
-
-       return nb && nb->nb_id != -1;
-}
-
-static int amd_pmu_hw_config(struct perf_event *event)
-{
-       int ret;
-
-       /* pass precise event sampling to ibs: */
-       if (event->attr.precise_ip && get_ibs_caps())
-               return -ENOENT;
-
-       if (has_branch_stack(event))
-               return -EOPNOTSUPP;
-
-       ret = x86_pmu_hw_config(event);
-       if (ret)
-               return ret;
-
-       if (event->attr.type == PERF_TYPE_RAW)
-               event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
-
-       return amd_core_hw_config(event);
-}
-
-static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
-                                          struct perf_event *event)
-{
-       struct amd_nb *nb = cpuc->amd_nb;
-       int i;
-
-       /*
-        * need to scan whole list because event may not have
-        * been assigned during scheduling
-        *
-        * no race condition possible because event can only
-        * be removed on one CPU at a time AND PMU is disabled
-        * when we come here
-        */
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               if (cmpxchg(nb->owners + i, event, NULL) == event)
-                       break;
-       }
-}
-
- /*
-  * AMD64 NorthBridge events need special treatment because
-  * counter access needs to be synchronized across all cores
-  * of a package. Refer to BKDG section 3.12
-  *
-  * NB events are events measuring L3 cache, Hypertransport
-  * traffic. They are identified by an event code >= 0xe00.
-  * They measure events on the NorthBride which is shared
-  * by all cores on a package. NB events are counted on a
-  * shared set of counters. When a NB event is programmed
-  * in a counter, the data actually comes from a shared
-  * counter. Thus, access to those counters needs to be
-  * synchronized.
-  *
-  * We implement the synchronization such that no two cores
-  * can be measuring NB events using the same counters. Thus,
-  * we maintain a per-NB allocation table. The available slot
-  * is propagated using the event_constraint structure.
-  *
-  * We provide only one choice for each NB event based on
-  * the fact that only NB events have restrictions. Consequently,
-  * if a counter is available, there is a guarantee the NB event
-  * will be assigned to it. If no slot is available, an empty
-  * constraint is returned and scheduling will eventually fail
-  * for this event.
-  *
-  * Note that all cores attached the same NB compete for the same
-  * counters to host NB events, this is why we use atomic ops. Some
-  * multi-chip CPUs may have more than one NB.
-  *
-  * Given that resources are allocated (cmpxchg), they must be
-  * eventually freed for others to use. This is accomplished by
-  * calling __amd_put_nb_event_constraints()
-  *
-  * Non NB events are not impacted by this restriction.
-  */
-static struct event_constraint *
-__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
-                              struct event_constraint *c)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct amd_nb *nb = cpuc->amd_nb;
-       struct perf_event *old;
-       int idx, new = -1;
-
-       if (!c)
-               c = &unconstrained;
-
-       if (cpuc->is_fake)
-               return c;
-
-       /*
-        * detect if already present, if so reuse
-        *
-        * cannot merge with actual allocation
-        * because of possible holes
-        *
-        * event can already be present yet not assigned (in hwc->idx)
-        * because of successive calls to x86_schedule_events() from
-        * hw_perf_group_sched_in() without hw_perf_enable()
-        */
-       for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
-               if (new == -1 || hwc->idx == idx)
-                       /* assign free slot, prefer hwc->idx */
-                       old = cmpxchg(nb->owners + idx, NULL, event);
-               else if (nb->owners[idx] == event)
-                       /* event already present */
-                       old = event;
-               else
-                       continue;
-
-               if (old && old != event)
-                       continue;
-
-               /* reassign to this slot */
-               if (new != -1)
-                       cmpxchg(nb->owners + new, event, NULL);
-               new = idx;
-
-               /* already present, reuse */
-               if (old == event)
-                       break;
-       }
-
-       if (new == -1)
-               return &emptyconstraint;
-
-       return &nb->event_constraints[new];
-}
-
-static struct amd_nb *amd_alloc_nb(int cpu)
-{
-       struct amd_nb *nb;
-       int i;
-
-       nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
-       if (!nb)
-               return NULL;
-
-       nb->nb_id = -1;
-
-       /*
-        * initialize all possible NB constraints
-        */
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               __set_bit(i, nb->event_constraints[i].idxmsk);
-               nb->event_constraints[i].weight = 1;
-       }
-       return nb;
-}
-
-static int amd_pmu_cpu_prepare(int cpu)
-{
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-
-       WARN_ON_ONCE(cpuc->amd_nb);
-
-       if (boot_cpu_data.x86_max_cores < 2)
-               return NOTIFY_OK;
-
-       cpuc->amd_nb = amd_alloc_nb(cpu);
-       if (!cpuc->amd_nb)
-               return NOTIFY_BAD;
-
-       return NOTIFY_OK;
-}
-
-static void amd_pmu_cpu_starting(int cpu)
-{
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
-       struct amd_nb *nb;
-       int i, nb_id;
-
-       cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
-
-       if (boot_cpu_data.x86_max_cores < 2)
-               return;
-
-       nb_id = amd_get_nb_id(cpu);
-       WARN_ON_ONCE(nb_id == BAD_APICID);
-
-       for_each_online_cpu(i) {
-               nb = per_cpu(cpu_hw_events, i).amd_nb;
-               if (WARN_ON_ONCE(!nb))
-                       continue;
-
-               if (nb->nb_id == nb_id) {
-                       *onln = cpuc->amd_nb;
-                       cpuc->amd_nb = nb;
-                       break;
-               }
-       }
-
-       cpuc->amd_nb->nb_id = nb_id;
-       cpuc->amd_nb->refcnt++;
-}
-
-static void amd_pmu_cpu_dead(int cpu)
-{
-       struct cpu_hw_events *cpuhw;
-
-       if (boot_cpu_data.x86_max_cores < 2)
-               return;
-
-       cpuhw = &per_cpu(cpu_hw_events, cpu);
-
-       if (cpuhw->amd_nb) {
-               struct amd_nb *nb = cpuhw->amd_nb;
-
-               if (nb->nb_id == -1 || --nb->refcnt == 0)
-                       kfree(nb);
-
-               cpuhw->amd_nb = NULL;
-       }
-}
-
-static struct event_constraint *
-amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
-                         struct perf_event *event)
-{
-       /*
-        * if not NB event or no NB, then no constraints
-        */
-       if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
-               return &unconstrained;
-
-       return __amd_get_nb_event_constraints(cpuc, event, NULL);
-}
-
-static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
-                                     struct perf_event *event)
-{
-       if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
-               __amd_put_nb_event_constraints(cpuc, event);
-}
-
-PMU_FORMAT_ATTR(event, "config:0-7,32-35");
-PMU_FORMAT_ATTR(umask, "config:8-15"   );
-PMU_FORMAT_ATTR(edge,  "config:18"     );
-PMU_FORMAT_ATTR(inv,   "config:23"     );
-PMU_FORMAT_ATTR(cmask, "config:24-31"  );
-
-static struct attribute *amd_format_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask.attr,
-       NULL,
-};
-
-/* AMD Family 15h */
-
-#define AMD_EVENT_TYPE_MASK    0x000000F0ULL
-
-#define AMD_EVENT_FP           0x00000000ULL ... 0x00000010ULL
-#define AMD_EVENT_LS           0x00000020ULL ... 0x00000030ULL
-#define AMD_EVENT_DC           0x00000040ULL ... 0x00000050ULL
-#define AMD_EVENT_CU           0x00000060ULL ... 0x00000070ULL
-#define AMD_EVENT_IC_DE                0x00000080ULL ... 0x00000090ULL
-#define AMD_EVENT_EX_LS                0x000000C0ULL
-#define AMD_EVENT_DE           0x000000D0ULL
-#define AMD_EVENT_NB           0x000000E0ULL ... 0x000000F0ULL
-
-/*
- * AMD family 15h event code/PMC mappings:
- *
- * type = event_code & 0x0F0:
- *
- * 0x000       FP      PERF_CTL[5:3]
- * 0x010       FP      PERF_CTL[5:3]
- * 0x020       LS      PERF_CTL[5:0]
- * 0x030       LS      PERF_CTL[5:0]
- * 0x040       DC      PERF_CTL[5:0]
- * 0x050       DC      PERF_CTL[5:0]
- * 0x060       CU      PERF_CTL[2:0]
- * 0x070       CU      PERF_CTL[2:0]
- * 0x080       IC/DE   PERF_CTL[2:0]
- * 0x090       IC/DE   PERF_CTL[2:0]
- * 0x0A0       ---
- * 0x0B0       ---
- * 0x0C0       EX/LS   PERF_CTL[5:0]
- * 0x0D0       DE      PERF_CTL[2:0]
- * 0x0E0       NB      NB_PERF_CTL[3:0]
- * 0x0F0       NB      NB_PERF_CTL[3:0]
- *
- * Exceptions:
- *
- * 0x000       FP      PERF_CTL[3], PERF_CTL[5:3] (*)
- * 0x003       FP      PERF_CTL[3]
- * 0x004       FP      PERF_CTL[3], PERF_CTL[5:3] (*)
- * 0x00B       FP      PERF_CTL[3]
- * 0x00D       FP      PERF_CTL[3]
- * 0x023       DE      PERF_CTL[2:0]
- * 0x02D       LS      PERF_CTL[3]
- * 0x02E       LS      PERF_CTL[3,0]
- * 0x031       LS      PERF_CTL[2:0] (**)
- * 0x043       CU      PERF_CTL[2:0]
- * 0x045       CU      PERF_CTL[2:0]
- * 0x046       CU      PERF_CTL[2:0]
- * 0x054       CU      PERF_CTL[2:0]
- * 0x055       CU      PERF_CTL[2:0]
- * 0x08F       IC      PERF_CTL[0]
- * 0x187       DE      PERF_CTL[0]
- * 0x188       DE      PERF_CTL[0]
- * 0x0DB       EX      PERF_CTL[5:0]
- * 0x0DC       LS      PERF_CTL[5:0]
- * 0x0DD       LS      PERF_CTL[5:0]
- * 0x0DE       LS      PERF_CTL[5:0]
- * 0x0DF       LS      PERF_CTL[5:0]
- * 0x1C0       EX      PERF_CTL[5:3]
- * 0x1D6       EX      PERF_CTL[5:0]
- * 0x1D8       EX      PERF_CTL[5:0]
- *
- * (*)  depending on the umask all FPU counters may be used
- * (**) only one unitmask enabled at a time
- */
-
-static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
-static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
-static struct event_constraint amd_f15_PMC3  = EVENT_CONSTRAINT(0, 0x08, 0);
-static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
-static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
-static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
-
-static struct event_constraint *
-amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
-                              struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       unsigned int event_code = amd_get_event_code(hwc);
-
-       switch (event_code & AMD_EVENT_TYPE_MASK) {
-       case AMD_EVENT_FP:
-               switch (event_code) {
-               case 0x000:
-                       if (!(hwc->config & 0x0000F000ULL))
-                               break;
-                       if (!(hwc->config & 0x00000F00ULL))
-                               break;
-                       return &amd_f15_PMC3;
-               case 0x004:
-                       if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
-                               break;
-                       return &amd_f15_PMC3;
-               case 0x003:
-               case 0x00B:
-               case 0x00D:
-                       return &amd_f15_PMC3;
-               }
-               return &amd_f15_PMC53;
-       case AMD_EVENT_LS:
-       case AMD_EVENT_DC:
-       case AMD_EVENT_EX_LS:
-               switch (event_code) {
-               case 0x023:
-               case 0x043:
-               case 0x045:
-               case 0x046:
-               case 0x054:
-               case 0x055:
-                       return &amd_f15_PMC20;
-               case 0x02D:
-                       return &amd_f15_PMC3;
-               case 0x02E:
-                       return &amd_f15_PMC30;
-               case 0x031:
-                       if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
-                               return &amd_f15_PMC20;
-                       return &emptyconstraint;
-               case 0x1C0:
-                       return &amd_f15_PMC53;
-               default:
-                       return &amd_f15_PMC50;
-               }
-       case AMD_EVENT_CU:
-       case AMD_EVENT_IC_DE:
-       case AMD_EVENT_DE:
-               switch (event_code) {
-               case 0x08F:
-               case 0x187:
-               case 0x188:
-                       return &amd_f15_PMC0;
-               case 0x0DB ... 0x0DF:
-               case 0x1D6:
-               case 0x1D8:
-                       return &amd_f15_PMC50;
-               default:
-                       return &amd_f15_PMC20;
-               }
-       case AMD_EVENT_NB:
-               /* moved to perf_event_amd_uncore.c */
-               return &emptyconstraint;
-       default:
-               return &emptyconstraint;
-       }
-}
-
-static ssize_t amd_event_sysfs_show(char *page, u64 config)
-{
-       u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
-                   (config & AMD64_EVENTSEL_EVENT) >> 24;
-
-       return x86_event_sysfs_show(page, config, event);
-}
-
-static __initconst const struct x86_pmu amd_pmu = {
-       .name                   = "AMD",
-       .handle_irq             = x86_pmu_handle_irq,
-       .disable_all            = x86_pmu_disable_all,
-       .enable_all             = x86_pmu_enable_all,
-       .enable                 = x86_pmu_enable_event,
-       .disable                = x86_pmu_disable_event,
-       .hw_config              = amd_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_K7_EVNTSEL0,
-       .perfctr                = MSR_K7_PERFCTR0,
-       .addr_offset            = amd_pmu_addr_offset,
-       .event_map              = amd_pmu_event_map,
-       .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
-       .num_counters           = AMD64_NUM_COUNTERS,
-       .cntval_bits            = 48,
-       .cntval_mask            = (1ULL << 48) - 1,
-       .apic                   = 1,
-       /* use highest bit to detect overflow */
-       .max_period             = (1ULL << 47) - 1,
-       .get_event_constraints  = amd_get_event_constraints,
-       .put_event_constraints  = amd_put_event_constraints,
-
-       .format_attrs           = amd_format_attr,
-       .events_sysfs_show      = amd_event_sysfs_show,
-
-       .cpu_prepare            = amd_pmu_cpu_prepare,
-       .cpu_starting           = amd_pmu_cpu_starting,
-       .cpu_dead               = amd_pmu_cpu_dead,
-};
-
-static int __init amd_core_pmu_init(void)
-{
-       if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
-               return 0;
-
-       switch (boot_cpu_data.x86) {
-       case 0x15:
-               pr_cont("Fam15h ");
-               x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
-               break;
-
-       default:
-               pr_err("core perfctr but no constraints; unknown hardware!\n");
-               return -ENODEV;
-       }
-
-       /*
-        * If core performance counter extensions exists, we must use
-        * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
-        * amd_pmu_addr_offset().
-        */
-       x86_pmu.eventsel        = MSR_F15H_PERF_CTL;
-       x86_pmu.perfctr         = MSR_F15H_PERF_CTR;
-       x86_pmu.num_counters    = AMD64_NUM_COUNTERS_CORE;
-
-       pr_cont("core perfctr, ");
-       return 0;
-}
-
-__init int amd_pmu_init(void)
-{
-       int ret;
-
-       /* Performance-monitoring supported from K7 and later: */
-       if (boot_cpu_data.x86 < 6)
-               return -ENODEV;
-
-       x86_pmu = amd_pmu;
-
-       ret = amd_core_pmu_init();
-       if (ret)
-               return ret;
-
-       /* Events are common for all AMDs */
-       memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
-              sizeof(hw_cache_event_ids));
-
-       return 0;
-}
-
-void amd_pmu_enable_virt(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       cpuc->perf_ctr_virt_mask = 0;
-
-       /* Reload all events */
-       x86_pmu_disable_all();
-       x86_pmu_enable_all(0);
-}
-EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
-
-void amd_pmu_disable_virt(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       /*
-        * We only mask out the Host-only bit so that host-only counting works
-        * when SVM is disabled. If someone sets up a guest-only counter when
-        * SVM is disabled the Guest-only bits still gets set and the counter
-        * will not count anything.
-        */
-       cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
-
-       /* Reload all events */
-       x86_pmu_disable_all();
-       x86_pmu_enable_all(0);
-}
-EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
deleted file mode 100644 (file)
index 989d3c2..0000000
+++ /dev/null
@@ -1,959 +0,0 @@
-/*
- * Performance events - AMD IBS
- *
- *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/ptrace.h>
-#include <linux/syscore_ops.h>
-
-#include <asm/apic.h>
-
-#include "perf_event.h"
-
-static u32 ibs_caps;
-
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
-
-#include <linux/kprobes.h>
-#include <linux/hardirq.h>
-
-#include <asm/nmi.h>
-
-#define IBS_FETCH_CONFIG_MASK  (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
-#define IBS_OP_CONFIG_MASK     IBS_OP_MAX_CNT
-
-enum ibs_states {
-       IBS_ENABLED     = 0,
-       IBS_STARTED     = 1,
-       IBS_STOPPING    = 2,
-
-       IBS_MAX_STATES,
-};
-
-struct cpu_perf_ibs {
-       struct perf_event       *event;
-       unsigned long           state[BITS_TO_LONGS(IBS_MAX_STATES)];
-};
-
-struct perf_ibs {
-       struct pmu                      pmu;
-       unsigned int                    msr;
-       u64                             config_mask;
-       u64                             cnt_mask;
-       u64                             enable_mask;
-       u64                             valid_mask;
-       u64                             max_period;
-       unsigned long                   offset_mask[1];
-       int                             offset_max;
-       struct cpu_perf_ibs __percpu    *pcpu;
-
-       struct attribute                **format_attrs;
-       struct attribute_group          format_group;
-       const struct attribute_group    *attr_groups[2];
-
-       u64                             (*get_count)(u64 config);
-};
-
-struct perf_ibs_data {
-       u32             size;
-       union {
-               u32     data[0];        /* data buffer starts here */
-               u32     caps;
-       };
-       u64             regs[MSR_AMD64_IBS_REG_COUNT_MAX];
-};
-
-static int
-perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
-{
-       s64 left = local64_read(&hwc->period_left);
-       s64 period = hwc->sample_period;
-       int overflow = 0;
-
-       /*
-        * If we are way outside a reasonable range then just skip forward:
-        */
-       if (unlikely(left <= -period)) {
-               left = period;
-               local64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-               overflow = 1;
-       }
-
-       if (unlikely(left < (s64)min)) {
-               left += period;
-               local64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-               overflow = 1;
-       }
-
-       /*
-        * If the hw period that triggers the sw overflow is too short
-        * we might hit the irq handler. This biases the results.
-        * Thus we shorten the next-to-last period and set the last
-        * period to the max period.
-        */
-       if (left > max) {
-               left -= max;
-               if (left > max)
-                       left = max;
-               else if (left < min)
-                       left = min;
-       }
-
-       *hw_period = (u64)left;
-
-       return overflow;
-}
-
-static  int
-perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       int shift = 64 - width;
-       u64 prev_raw_count;
-       u64 delta;
-
-       /*
-        * Careful: an NMI might modify the previous event value.
-        *
-        * Our tactic to handle this is to first atomically read and
-        * exchange a new raw count - then add that new-prev delta
-        * count to the generic event atomically:
-        */
-       prev_raw_count = local64_read(&hwc->prev_count);
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                                       new_raw_count) != prev_raw_count)
-               return 0;
-
-       /*
-        * Now we have the new raw value and have updated the prev
-        * timestamp already. We can now calculate the elapsed delta
-        * (event-)time and add that to the generic event.
-        *
-        * Careful, not all hw sign-extends above the physical width
-        * of the count.
-        */
-       delta = (new_raw_count << shift) - (prev_raw_count << shift);
-       delta >>= shift;
-
-       local64_add(delta, &event->count);
-       local64_sub(delta, &hwc->period_left);
-
-       return 1;
-}
-
-static struct perf_ibs perf_ibs_fetch;
-static struct perf_ibs perf_ibs_op;
-
-static struct perf_ibs *get_ibs_pmu(int type)
-{
-       if (perf_ibs_fetch.pmu.type == type)
-               return &perf_ibs_fetch;
-       if (perf_ibs_op.pmu.type == type)
-               return &perf_ibs_op;
-       return NULL;
-}
-
-/*
- * Use IBS for precise event sampling:
- *
- *  perf record -a -e cpu-cycles:p ...    # use ibs op counting cycle count
- *  perf record -a -e r076:p ...          # same as -e cpu-cycles:p
- *  perf record -a -e r0C1:p ...          # use ibs op counting micro-ops
- *
- * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
- * MSRC001_1033) is used to select either cycle or micro-ops counting
- * mode.
- *
- * The rip of IBS samples has skid 0. Thus, IBS supports precise
- * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
- * rip is invalid when IBS was not able to record the rip correctly.
- * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
- *
- */
-static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
-{
-       switch (event->attr.precise_ip) {
-       case 0:
-               return -ENOENT;
-       case 1:
-       case 2:
-               break;
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       switch (event->attr.type) {
-       case PERF_TYPE_HARDWARE:
-               switch (event->attr.config) {
-               case PERF_COUNT_HW_CPU_CYCLES:
-                       *config = 0;
-                       return 0;
-               }
-               break;
-       case PERF_TYPE_RAW:
-               switch (event->attr.config) {
-               case 0x0076:
-                       *config = 0;
-                       return 0;
-               case 0x00C1:
-                       *config = IBS_OP_CNT_CTL;
-                       return 0;
-               }
-               break;
-       default:
-               return -ENOENT;
-       }
-
-       return -EOPNOTSUPP;
-}
-
-static const struct perf_event_attr ibs_notsupp = {
-       .exclude_user   = 1,
-       .exclude_kernel = 1,
-       .exclude_hv     = 1,
-       .exclude_idle   = 1,
-       .exclude_host   = 1,
-       .exclude_guest  = 1,
-};
-
-static int perf_ibs_init(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct perf_ibs *perf_ibs;
-       u64 max_cnt, config;
-       int ret;
-
-       perf_ibs = get_ibs_pmu(event->attr.type);
-       if (perf_ibs) {
-               config = event->attr.config;
-       } else {
-               perf_ibs = &perf_ibs_op;
-               ret = perf_ibs_precise_event(event, &config);
-               if (ret)
-                       return ret;
-       }
-
-       if (event->pmu != &perf_ibs->pmu)
-               return -ENOENT;
-
-       if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
-               return -EINVAL;
-
-       if (config & ~perf_ibs->config_mask)
-               return -EINVAL;
-
-       if (hwc->sample_period) {
-               if (config & perf_ibs->cnt_mask)
-                       /* raw max_cnt may not be set */
-                       return -EINVAL;
-               if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
-                       /*
-                        * lower 4 bits can not be set in ibs max cnt,
-                        * but allowing it in case we adjust the
-                        * sample period to set a frequency.
-                        */
-                       return -EINVAL;
-               hwc->sample_period &= ~0x0FULL;
-               if (!hwc->sample_period)
-                       hwc->sample_period = 0x10;
-       } else {
-               max_cnt = config & perf_ibs->cnt_mask;
-               config &= ~perf_ibs->cnt_mask;
-               event->attr.sample_period = max_cnt << 4;
-               hwc->sample_period = event->attr.sample_period;
-       }
-
-       if (!hwc->sample_period)
-               return -EINVAL;
-
-       /*
-        * If we modify hwc->sample_period, we also need to update
-        * hwc->last_period and hwc->period_left.
-        */
-       hwc->last_period = hwc->sample_period;
-       local64_set(&hwc->period_left, hwc->sample_period);
-
-       hwc->config_base = perf_ibs->msr;
-       hwc->config = config;
-
-       return 0;
-}
-
-static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
-                              struct hw_perf_event *hwc, u64 *period)
-{
-       int overflow;
-
-       /* ignore lower 4 bits in min count: */
-       overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
-       local64_set(&hwc->prev_count, 0);
-
-       return overflow;
-}
-
-static u64 get_ibs_fetch_count(u64 config)
-{
-       return (config & IBS_FETCH_CNT) >> 12;
-}
-
-static u64 get_ibs_op_count(u64 config)
-{
-       u64 count = 0;
-
-       if (config & IBS_OP_VAL)
-               count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
-
-       if (ibs_caps & IBS_CAPS_RDWROPCNT)
-               count += (config & IBS_OP_CUR_CNT) >> 32;
-
-       return count;
-}
-
-static void
-perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
-                     u64 *config)
-{
-       u64 count = perf_ibs->get_count(*config);
-
-       /*
-        * Set width to 64 since we do not overflow on max width but
-        * instead on max count. In perf_ibs_set_period() we clear
-        * prev count manually on overflow.
-        */
-       while (!perf_event_try_update(event, count, 64)) {
-               rdmsrl(event->hw.config_base, *config);
-               count = perf_ibs->get_count(*config);
-       }
-}
-
-static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
-                                        struct hw_perf_event *hwc, u64 config)
-{
-       wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
-}
-
-/*
- * Erratum #420 Instruction-Based Sampling Engine May Generate
- * Interrupt that Cannot Be Cleared:
- *
- * Must clear counter mask first, then clear the enable bit. See
- * Revision Guide for AMD Family 10h Processors, Publication #41322.
- */
-static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
-                                         struct hw_perf_event *hwc, u64 config)
-{
-       config &= ~perf_ibs->cnt_mask;
-       wrmsrl(hwc->config_base, config);
-       config &= ~perf_ibs->enable_mask;
-       wrmsrl(hwc->config_base, config);
-}
-
-/*
- * We cannot restore the ibs pmu state, so we always needs to update
- * the event while stopping it and then reset the state when starting
- * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
- * perf_ibs_start()/perf_ibs_stop() and instead always do it.
- */
-static void perf_ibs_start(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
-       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
-       u64 period;
-
-       if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
-               return;
-
-       WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-       hwc->state = 0;
-
-       perf_ibs_set_period(perf_ibs, hwc, &period);
-       set_bit(IBS_STARTED, pcpu->state);
-       perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
-
-       perf_event_update_userpage(event);
-}
-
-static void perf_ibs_stop(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
-       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
-       u64 config;
-       int stopping;
-
-       stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
-
-       if (!stopping && (hwc->state & PERF_HES_UPTODATE))
-               return;
-
-       rdmsrl(hwc->config_base, config);
-
-       if (stopping) {
-               set_bit(IBS_STOPPING, pcpu->state);
-               perf_ibs_disable_event(perf_ibs, hwc, config);
-               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-               hwc->state |= PERF_HES_STOPPED;
-       }
-
-       if (hwc->state & PERF_HES_UPTODATE)
-               return;
-
-       /*
-        * Clear valid bit to not count rollovers on update, rollovers
-        * are only updated in the irq handler.
-        */
-       config &= ~perf_ibs->valid_mask;
-
-       perf_ibs_event_update(perf_ibs, event, &config);
-       hwc->state |= PERF_HES_UPTODATE;
-}
-
-static int perf_ibs_add(struct perf_event *event, int flags)
-{
-       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
-       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
-
-       if (test_and_set_bit(IBS_ENABLED, pcpu->state))
-               return -ENOSPC;
-
-       event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
-       pcpu->event = event;
-
-       if (flags & PERF_EF_START)
-               perf_ibs_start(event, PERF_EF_RELOAD);
-
-       return 0;
-}
-
-static void perf_ibs_del(struct perf_event *event, int flags)
-{
-       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
-       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
-
-       if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
-               return;
-
-       perf_ibs_stop(event, PERF_EF_UPDATE);
-
-       pcpu->event = NULL;
-
-       perf_event_update_userpage(event);
-}
-
-static void perf_ibs_read(struct perf_event *event) { }
-
-PMU_FORMAT_ATTR(rand_en,       "config:57");
-PMU_FORMAT_ATTR(cnt_ctl,       "config:19");
-
-static struct attribute *ibs_fetch_format_attrs[] = {
-       &format_attr_rand_en.attr,
-       NULL,
-};
-
-static struct attribute *ibs_op_format_attrs[] = {
-       NULL,   /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
-       NULL,
-};
-
-static struct perf_ibs perf_ibs_fetch = {
-       .pmu = {
-               .task_ctx_nr    = perf_invalid_context,
-
-               .event_init     = perf_ibs_init,
-               .add            = perf_ibs_add,
-               .del            = perf_ibs_del,
-               .start          = perf_ibs_start,
-               .stop           = perf_ibs_stop,
-               .read           = perf_ibs_read,
-       },
-       .msr                    = MSR_AMD64_IBSFETCHCTL,
-       .config_mask            = IBS_FETCH_CONFIG_MASK,
-       .cnt_mask               = IBS_FETCH_MAX_CNT,
-       .enable_mask            = IBS_FETCH_ENABLE,
-       .valid_mask             = IBS_FETCH_VAL,
-       .max_period             = IBS_FETCH_MAX_CNT << 4,
-       .offset_mask            = { MSR_AMD64_IBSFETCH_REG_MASK },
-       .offset_max             = MSR_AMD64_IBSFETCH_REG_COUNT,
-       .format_attrs           = ibs_fetch_format_attrs,
-
-       .get_count              = get_ibs_fetch_count,
-};
-
-static struct perf_ibs perf_ibs_op = {
-       .pmu = {
-               .task_ctx_nr    = perf_invalid_context,
-
-               .event_init     = perf_ibs_init,
-               .add            = perf_ibs_add,
-               .del            = perf_ibs_del,
-               .start          = perf_ibs_start,
-               .stop           = perf_ibs_stop,
-               .read           = perf_ibs_read,
-       },
-       .msr                    = MSR_AMD64_IBSOPCTL,
-       .config_mask            = IBS_OP_CONFIG_MASK,
-       .cnt_mask               = IBS_OP_MAX_CNT,
-       .enable_mask            = IBS_OP_ENABLE,
-       .valid_mask             = IBS_OP_VAL,
-       .max_period             = IBS_OP_MAX_CNT << 4,
-       .offset_mask            = { MSR_AMD64_IBSOP_REG_MASK },
-       .offset_max             = MSR_AMD64_IBSOP_REG_COUNT,
-       .format_attrs           = ibs_op_format_attrs,
-
-       .get_count              = get_ibs_op_count,
-};
-
-static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
-{
-       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
-       struct perf_event *event = pcpu->event;
-       struct hw_perf_event *hwc = &event->hw;
-       struct perf_sample_data data;
-       struct perf_raw_record raw;
-       struct pt_regs regs;
-       struct perf_ibs_data ibs_data;
-       int offset, size, check_rip, offset_max, throttle = 0;
-       unsigned int msr;
-       u64 *buf, *config, period;
-
-       if (!test_bit(IBS_STARTED, pcpu->state)) {
-               /*
-                * Catch spurious interrupts after stopping IBS: After
-                * disabling IBS there could be still incoming NMIs
-                * with samples that even have the valid bit cleared.
-                * Mark all this NMIs as handled.
-                */
-               return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
-       }
-
-       msr = hwc->config_base;
-       buf = ibs_data.regs;
-       rdmsrl(msr, *buf);
-       if (!(*buf++ & perf_ibs->valid_mask))
-               return 0;
-
-       config = &ibs_data.regs[0];
-       perf_ibs_event_update(perf_ibs, event, config);
-       perf_sample_data_init(&data, 0, hwc->last_period);
-       if (!perf_ibs_set_period(perf_ibs, hwc, &period))
-               goto out;       /* no sw counter overflow */
-
-       ibs_data.caps = ibs_caps;
-       size = 1;
-       offset = 1;
-       check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
-       if (event->attr.sample_type & PERF_SAMPLE_RAW)
-               offset_max = perf_ibs->offset_max;
-       else if (check_rip)
-               offset_max = 2;
-       else
-               offset_max = 1;
-       do {
-               rdmsrl(msr + offset, *buf++);
-               size++;
-               offset = find_next_bit(perf_ibs->offset_mask,
-                                      perf_ibs->offset_max,
-                                      offset + 1);
-       } while (offset < offset_max);
-       if (event->attr.sample_type & PERF_SAMPLE_RAW) {
-               /*
-                * Read IbsBrTarget and IbsOpData4 separately
-                * depending on their availability.
-                * Can't add to offset_max as they are staggered
-                */
-               if (ibs_caps & IBS_CAPS_BRNTRGT) {
-                       rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
-                       size++;
-               }
-               if (ibs_caps & IBS_CAPS_OPDATA4) {
-                       rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
-                       size++;
-               }
-       }
-       ibs_data.size = sizeof(u64) * size;
-
-       regs = *iregs;
-       if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
-               regs.flags &= ~PERF_EFLAGS_EXACT;
-       } else {
-               set_linear_ip(&regs, ibs_data.regs[1]);
-               regs.flags |= PERF_EFLAGS_EXACT;
-       }
-
-       if (event->attr.sample_type & PERF_SAMPLE_RAW) {
-               raw.size = sizeof(u32) + ibs_data.size;
-               raw.data = ibs_data.data;
-               data.raw = &raw;
-       }
-
-       throttle = perf_event_overflow(event, &data, &regs);
-out:
-       if (throttle)
-               perf_ibs_disable_event(perf_ibs, hwc, *config);
-       else
-               perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
-
-       perf_event_update_userpage(event);
-
-       return 1;
-}
-
-static int
-perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
-{
-       int handled = 0;
-
-       handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
-       handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
-
-       if (handled)
-               inc_irq_stat(apic_perf_irqs);
-
-       return handled;
-}
-NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
-
-static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
-{
-       struct cpu_perf_ibs __percpu *pcpu;
-       int ret;
-
-       pcpu = alloc_percpu(struct cpu_perf_ibs);
-       if (!pcpu)
-               return -ENOMEM;
-
-       perf_ibs->pcpu = pcpu;
-
-       /* register attributes */
-       if (perf_ibs->format_attrs[0]) {
-               memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
-               perf_ibs->format_group.name     = "format";
-               perf_ibs->format_group.attrs    = perf_ibs->format_attrs;
-
-               memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
-               perf_ibs->attr_groups[0]        = &perf_ibs->format_group;
-               perf_ibs->pmu.attr_groups       = perf_ibs->attr_groups;
-       }
-
-       ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
-       if (ret) {
-               perf_ibs->pcpu = NULL;
-               free_percpu(pcpu);
-       }
-
-       return ret;
-}
-
-static __init int perf_event_ibs_init(void)
-{
-       struct attribute **attr = ibs_op_format_attrs;
-
-       if (!ibs_caps)
-               return -ENODEV; /* ibs not supported by the cpu */
-
-       perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
-
-       if (ibs_caps & IBS_CAPS_OPCNT) {
-               perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
-               *attr++ = &format_attr_cnt_ctl.attr;
-       }
-       perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
-
-       register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
-       printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
-
-       return 0;
-}
-
-#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
-
-static __init int perf_event_ibs_init(void) { return 0; }
-
-#endif
-
-/* IBS - apic initialization, for perf and oprofile */
-
-static __init u32 __get_ibs_caps(void)
-{
-       u32 caps;
-       unsigned int max_level;
-
-       if (!boot_cpu_has(X86_FEATURE_IBS))
-               return 0;
-
-       /* check IBS cpuid feature flags */
-       max_level = cpuid_eax(0x80000000);
-       if (max_level < IBS_CPUID_FEATURES)
-               return IBS_CAPS_DEFAULT;
-
-       caps = cpuid_eax(IBS_CPUID_FEATURES);
-       if (!(caps & IBS_CAPS_AVAIL))
-               /* cpuid flags not valid */
-               return IBS_CAPS_DEFAULT;
-
-       return caps;
-}
-
-u32 get_ibs_caps(void)
-{
-       return ibs_caps;
-}
-
-EXPORT_SYMBOL(get_ibs_caps);
-
-static inline int get_eilvt(int offset)
-{
-       return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
-}
-
-static inline int put_eilvt(int offset)
-{
-       return !setup_APIC_eilvt(offset, 0, 0, 1);
-}
-
-/*
- * Check and reserve APIC extended interrupt LVT offset for IBS if available.
- */
-static inline int ibs_eilvt_valid(void)
-{
-       int offset;
-       u64 val;
-       int valid = 0;
-
-       preempt_disable();
-
-       rdmsrl(MSR_AMD64_IBSCTL, val);
-       offset = val & IBSCTL_LVT_OFFSET_MASK;
-
-       if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
-               pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
-                      smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
-               goto out;
-       }
-
-       if (!get_eilvt(offset)) {
-               pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
-                      smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
-               goto out;
-       }
-
-       valid = 1;
-out:
-       preempt_enable();
-
-       return valid;
-}
-
-static int setup_ibs_ctl(int ibs_eilvt_off)
-{
-       struct pci_dev *cpu_cfg;
-       int nodes;
-       u32 value = 0;
-
-       nodes = 0;
-       cpu_cfg = NULL;
-       do {
-               cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
-                                        PCI_DEVICE_ID_AMD_10H_NB_MISC,
-                                        cpu_cfg);
-               if (!cpu_cfg)
-                       break;
-               ++nodes;
-               pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
-                                      | IBSCTL_LVT_OFFSET_VALID);
-               pci_read_config_dword(cpu_cfg, IBSCTL, &value);
-               if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
-                       pci_dev_put(cpu_cfg);
-                       printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
-                              "IBSCTL = 0x%08x\n", value);
-                       return -EINVAL;
-               }
-       } while (1);
-
-       if (!nodes) {
-               printk(KERN_DEBUG "No CPU node configured for IBS\n");
-               return -ENODEV;
-       }
-
-       return 0;
-}
-
-/*
- * This runs only on the current cpu. We try to find an LVT offset and
- * setup the local APIC. For this we must disable preemption. On
- * success we initialize all nodes with this offset. This updates then
- * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
- * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
- * is using the new offset.
- */
-static void force_ibs_eilvt_setup(void)
-{
-       int offset;
-       int ret;
-
-       preempt_disable();
-       /* find the next free available EILVT entry, skip offset 0 */
-       for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
-               if (get_eilvt(offset))
-                       break;
-       }
-       preempt_enable();
-
-       if (offset == APIC_EILVT_NR_MAX) {
-               printk(KERN_DEBUG "No EILVT entry available\n");
-               return;
-       }
-
-       ret = setup_ibs_ctl(offset);
-       if (ret)
-               goto out;
-
-       if (!ibs_eilvt_valid())
-               goto out;
-
-       pr_info("IBS: LVT offset %d assigned\n", offset);
-
-       return;
-out:
-       preempt_disable();
-       put_eilvt(offset);
-       preempt_enable();
-       return;
-}
-
-static void ibs_eilvt_setup(void)
-{
-       /*
-        * Force LVT offset assignment for family 10h: The offsets are
-        * not assigned by the BIOS for this family, so the OS is
-        * responsible for doing it. If the OS assignment fails, fall
-        * back to BIOS settings and try to setup this.
-        */
-       if (boot_cpu_data.x86 == 0x10)
-               force_ibs_eilvt_setup();
-}
-
-static inline int get_ibs_lvt_offset(void)
-{
-       u64 val;
-
-       rdmsrl(MSR_AMD64_IBSCTL, val);
-       if (!(val & IBSCTL_LVT_OFFSET_VALID))
-               return -EINVAL;
-
-       return val & IBSCTL_LVT_OFFSET_MASK;
-}
-
-static void setup_APIC_ibs(void *dummy)
-{
-       int offset;
-
-       offset = get_ibs_lvt_offset();
-       if (offset < 0)
-               goto failed;
-
-       if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
-               return;
-failed:
-       pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
-               smp_processor_id());
-}
-
-static void clear_APIC_ibs(void *dummy)
-{
-       int offset;
-
-       offset = get_ibs_lvt_offset();
-       if (offset >= 0)
-               setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
-}
-
-#ifdef CONFIG_PM
-
-static int perf_ibs_suspend(void)
-{
-       clear_APIC_ibs(NULL);
-       return 0;
-}
-
-static void perf_ibs_resume(void)
-{
-       ibs_eilvt_setup();
-       setup_APIC_ibs(NULL);
-}
-
-static struct syscore_ops perf_ibs_syscore_ops = {
-       .resume         = perf_ibs_resume,
-       .suspend        = perf_ibs_suspend,
-};
-
-static void perf_ibs_pm_init(void)
-{
-       register_syscore_ops(&perf_ibs_syscore_ops);
-}
-
-#else
-
-static inline void perf_ibs_pm_init(void) { }
-
-#endif
-
-static int
-perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_STARTING:
-               setup_APIC_ibs(NULL);
-               break;
-       case CPU_DYING:
-               clear_APIC_ibs(NULL);
-               break;
-       default:
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-static __init int amd_ibs_init(void)
-{
-       u32 caps;
-       int ret = -EINVAL;
-
-       caps = __get_ibs_caps();
-       if (!caps)
-               return -ENODEV; /* ibs not supported by the cpu */
-
-       ibs_eilvt_setup();
-
-       if (!ibs_eilvt_valid())
-               goto out;
-
-       perf_ibs_pm_init();
-       cpu_notifier_register_begin();
-       ibs_caps = caps;
-       /* make ibs_caps visible to other cpus: */
-       smp_mb();
-       smp_call_function(setup_APIC_ibs, NULL, 1);
-       __perf_cpu_notifier(perf_ibs_cpu_notifier);
-       cpu_notifier_register_done();
-
-       ret = perf_event_ibs_init();
-out:
-       if (ret)
-               pr_err("Failed to setup IBS, %d\n", ret);
-       return ret;
-}
-
-/* Since we need the pci subsystem to init ibs we can't do this earlier: */
-device_initcall(amd_ibs_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
deleted file mode 100644 (file)
index 97242a9..0000000
+++ /dev/null
@@ -1,499 +0,0 @@
-/*
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
- *
- * Author: Steven Kinney <Steven.Kinney@amd.com>
- * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
- *
- * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/perf_event.h>
-#include <linux/module.h>
-#include <linux/cpumask.h>
-#include <linux/slab.h>
-
-#include "perf_event.h"
-#include "perf_event_amd_iommu.h"
-
-#define COUNTER_SHIFT          16
-
-#define _GET_BANK(ev)       ((u8)(ev->hw.extra_reg.reg >> 8))
-#define _GET_CNTR(ev)       ((u8)(ev->hw.extra_reg.reg))
-
-/* iommu pmu config masks */
-#define _GET_CSOURCE(ev)    ((ev->hw.config & 0xFFULL))
-#define _GET_DEVID(ev)      ((ev->hw.config >> 8)  & 0xFFFFULL)
-#define _GET_PASID(ev)      ((ev->hw.config >> 24) & 0xFFFFULL)
-#define _GET_DOMID(ev)      ((ev->hw.config >> 40) & 0xFFFFULL)
-#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config)  & 0xFFFFULL)
-#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
-#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
-
-static struct perf_amd_iommu __perf_iommu;
-
-struct perf_amd_iommu {
-       struct pmu pmu;
-       u8 max_banks;
-       u8 max_counters;
-       u64 cntr_assign_mask;
-       raw_spinlock_t lock;
-       const struct attribute_group *attr_groups[4];
-};
-
-#define format_group   attr_groups[0]
-#define cpumask_group  attr_groups[1]
-#define events_group   attr_groups[2]
-#define null_group     attr_groups[3]
-
-/*---------------------------------------------
- * sysfs format attributes
- *---------------------------------------------*/
-PMU_FORMAT_ATTR(csource,    "config:0-7");
-PMU_FORMAT_ATTR(devid,      "config:8-23");
-PMU_FORMAT_ATTR(pasid,      "config:24-39");
-PMU_FORMAT_ATTR(domid,      "config:40-55");
-PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
-PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
-PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
-
-static struct attribute *iommu_format_attrs[] = {
-       &format_attr_csource.attr,
-       &format_attr_devid.attr,
-       &format_attr_pasid.attr,
-       &format_attr_domid.attr,
-       &format_attr_devid_mask.attr,
-       &format_attr_pasid_mask.attr,
-       &format_attr_domid_mask.attr,
-       NULL,
-};
-
-static struct attribute_group amd_iommu_format_group = {
-       .name = "format",
-       .attrs = iommu_format_attrs,
-};
-
-/*---------------------------------------------
- * sysfs events attributes
- *---------------------------------------------*/
-struct amd_iommu_event_desc {
-       struct kobj_attribute attr;
-       const char *event;
-};
-
-static ssize_t _iommu_event_show(struct kobject *kobj,
-                               struct kobj_attribute *attr, char *buf)
-{
-       struct amd_iommu_event_desc *event =
-               container_of(attr, struct amd_iommu_event_desc, attr);
-       return sprintf(buf, "%s\n", event->event);
-}
-
-#define AMD_IOMMU_EVENT_DESC(_name, _event)                    \
-{                                                              \
-       .attr  = __ATTR(_name, 0444, _iommu_event_show, NULL),  \
-       .event = _event,                                        \
-}
-
-static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
-       AMD_IOMMU_EVENT_DESC(mem_pass_untrans,        "csource=0x01"),
-       AMD_IOMMU_EVENT_DESC(mem_pass_pretrans,       "csource=0x02"),
-       AMD_IOMMU_EVENT_DESC(mem_pass_excl,           "csource=0x03"),
-       AMD_IOMMU_EVENT_DESC(mem_target_abort,        "csource=0x04"),
-       AMD_IOMMU_EVENT_DESC(mem_trans_total,         "csource=0x05"),
-       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit,   "csource=0x06"),
-       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis,   "csource=0x07"),
-       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit,   "csource=0x08"),
-       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis,   "csource=0x09"),
-       AMD_IOMMU_EVENT_DESC(mem_dte_hit,             "csource=0x0a"),
-       AMD_IOMMU_EVENT_DESC(mem_dte_mis,             "csource=0x0b"),
-       AMD_IOMMU_EVENT_DESC(page_tbl_read_tot,       "csource=0x0c"),
-       AMD_IOMMU_EVENT_DESC(page_tbl_read_nst,       "csource=0x0d"),
-       AMD_IOMMU_EVENT_DESC(page_tbl_read_gst,       "csource=0x0e"),
-       AMD_IOMMU_EVENT_DESC(int_dte_hit,             "csource=0x0f"),
-       AMD_IOMMU_EVENT_DESC(int_dte_mis,             "csource=0x10"),
-       AMD_IOMMU_EVENT_DESC(cmd_processed,           "csource=0x11"),
-       AMD_IOMMU_EVENT_DESC(cmd_processed_inv,       "csource=0x12"),
-       AMD_IOMMU_EVENT_DESC(tlb_inv,                 "csource=0x13"),
-       { /* end: all zeroes */ },
-};
-
-/*---------------------------------------------
- * sysfs cpumask attributes
- *---------------------------------------------*/
-static cpumask_t iommu_cpumask;
-
-static ssize_t _iommu_cpumask_show(struct device *dev,
-                                  struct device_attribute *attr,
-                                  char *buf)
-{
-       return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask);
-}
-static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
-
-static struct attribute *iommu_cpumask_attrs[] = {
-       &dev_attr_cpumask.attr,
-       NULL,
-};
-
-static struct attribute_group amd_iommu_cpumask_group = {
-       .attrs = iommu_cpumask_attrs,
-};
-
-/*---------------------------------------------*/
-
-static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
-{
-       unsigned long flags;
-       int shift, bank, cntr, retval;
-       int max_banks = perf_iommu->max_banks;
-       int max_cntrs = perf_iommu->max_counters;
-
-       raw_spin_lock_irqsave(&perf_iommu->lock, flags);
-
-       for (bank = 0, shift = 0; bank < max_banks; bank++) {
-               for (cntr = 0; cntr < max_cntrs; cntr++) {
-                       shift = bank + (bank*3) + cntr;
-                       if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
-                               continue;
-                       } else {
-                               perf_iommu->cntr_assign_mask |= (1ULL<<shift);
-                               retval = ((u16)((u16)bank<<8) | (u8)(cntr));
-                               goto out;
-                       }
-               }
-       }
-       retval = -ENOSPC;
-out:
-       raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
-       return retval;
-}
-
-static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
-                                       u8 bank, u8 cntr)
-{
-       unsigned long flags;
-       int max_banks, max_cntrs;
-       int shift = 0;
-
-       max_banks = perf_iommu->max_banks;
-       max_cntrs = perf_iommu->max_counters;
-
-       if ((bank > max_banks) || (cntr > max_cntrs))
-               return -EINVAL;
-
-       shift = bank + cntr + (bank*3);
-
-       raw_spin_lock_irqsave(&perf_iommu->lock, flags);
-       perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
-       raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
-
-       return 0;
-}
-
-static int perf_iommu_event_init(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct perf_amd_iommu *perf_iommu;
-       u64 config, config1;
-
-       /* test the event attr type check for PMU enumeration */
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
-       /*
-        * IOMMU counters are shared across all cores.
-        * Therefore, it does not support per-process mode.
-        * Also, it does not support event sampling mode.
-        */
-       if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
-               return -EINVAL;
-
-       /* IOMMU counters do not have usr/os/guest/host bits */
-       if (event->attr.exclude_user || event->attr.exclude_kernel ||
-           event->attr.exclude_host || event->attr.exclude_guest)
-               return -EINVAL;
-
-       if (event->cpu < 0)
-               return -EINVAL;
-
-       perf_iommu = &__perf_iommu;
-
-       if (event->pmu != &perf_iommu->pmu)
-               return -ENOENT;
-
-       if (perf_iommu) {
-               config = event->attr.config;
-               config1 = event->attr.config1;
-       } else {
-               return -EINVAL;
-       }
-
-       /* integrate with iommu base devid (0000), assume one iommu */
-       perf_iommu->max_banks =
-               amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
-       perf_iommu->max_counters =
-               amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
-       if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
-               return -EINVAL;
-
-       /* update the hw_perf_event struct with the iommu config data */
-       hwc->config = config;
-       hwc->extra_reg.config = config1;
-
-       return 0;
-}
-
-static void perf_iommu_enable_event(struct perf_event *ev)
-{
-       u8 csource = _GET_CSOURCE(ev);
-       u16 devid = _GET_DEVID(ev);
-       u64 reg = 0ULL;
-
-       reg = csource;
-       amd_iommu_pc_get_set_reg_val(devid,
-                       _GET_BANK(ev), _GET_CNTR(ev) ,
-                        IOMMU_PC_COUNTER_SRC_REG, &reg, true);
-
-       reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
-       if (reg)
-               reg |= (1UL << 31);
-       amd_iommu_pc_get_set_reg_val(devid,
-                       _GET_BANK(ev), _GET_CNTR(ev) ,
-                        IOMMU_PC_DEVID_MATCH_REG, &reg, true);
-
-       reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
-       if (reg)
-               reg |= (1UL << 31);
-       amd_iommu_pc_get_set_reg_val(devid,
-                       _GET_BANK(ev), _GET_CNTR(ev) ,
-                        IOMMU_PC_PASID_MATCH_REG, &reg, true);
-
-       reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
-       if (reg)
-               reg |= (1UL << 31);
-       amd_iommu_pc_get_set_reg_val(devid,
-                       _GET_BANK(ev), _GET_CNTR(ev) ,
-                        IOMMU_PC_DOMID_MATCH_REG, &reg, true);
-}
-
-static void perf_iommu_disable_event(struct perf_event *event)
-{
-       u64 reg = 0ULL;
-
-       amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
-                       _GET_BANK(event), _GET_CNTR(event),
-                       IOMMU_PC_COUNTER_SRC_REG, &reg, true);
-}
-
-static void perf_iommu_start(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       pr_debug("perf: amd_iommu:perf_iommu_start\n");
-       if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
-               return;
-
-       WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-       hwc->state = 0;
-
-       if (flags & PERF_EF_RELOAD) {
-               u64 prev_raw_count =  local64_read(&hwc->prev_count);
-               amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
-                               _GET_BANK(event), _GET_CNTR(event),
-                               IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
-       }
-
-       perf_iommu_enable_event(event);
-       perf_event_update_userpage(event);
-
-}
-
-static void perf_iommu_read(struct perf_event *event)
-{
-       u64 count = 0ULL;
-       u64 prev_raw_count = 0ULL;
-       u64 delta = 0ULL;
-       struct hw_perf_event *hwc = &event->hw;
-       pr_debug("perf: amd_iommu:perf_iommu_read\n");
-
-       amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
-                               _GET_BANK(event), _GET_CNTR(event),
-                               IOMMU_PC_COUNTER_REG, &count, false);
-
-       /* IOMMU pc counter register is only 48 bits */
-       count &= 0xFFFFFFFFFFFFULL;
-
-       prev_raw_count =  local64_read(&hwc->prev_count);
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                                       count) != prev_raw_count)
-               return;
-
-       /* Handling 48-bit counter overflowing */
-       delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
-       delta >>= COUNTER_SHIFT;
-       local64_add(delta, &event->count);
-
-}
-
-static void perf_iommu_stop(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 config;
-
-       pr_debug("perf: amd_iommu:perf_iommu_stop\n");
-
-       if (hwc->state & PERF_HES_UPTODATE)
-               return;
-
-       perf_iommu_disable_event(event);
-       WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-       hwc->state |= PERF_HES_STOPPED;
-
-       if (hwc->state & PERF_HES_UPTODATE)
-               return;
-
-       config = hwc->config;
-       perf_iommu_read(event);
-       hwc->state |= PERF_HES_UPTODATE;
-}
-
-static int perf_iommu_add(struct perf_event *event, int flags)
-{
-       int retval;
-       struct perf_amd_iommu *perf_iommu =
-                       container_of(event->pmu, struct perf_amd_iommu, pmu);
-
-       pr_debug("perf: amd_iommu:perf_iommu_add\n");
-       event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
-       /* request an iommu bank/counter */
-       retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
-       if (retval != -ENOSPC)
-               event->hw.extra_reg.reg = (u16)retval;
-       else
-               return retval;
-
-       if (flags & PERF_EF_START)
-               perf_iommu_start(event, PERF_EF_RELOAD);
-
-       return 0;
-}
-
-static void perf_iommu_del(struct perf_event *event, int flags)
-{
-       struct perf_amd_iommu *perf_iommu =
-                       container_of(event->pmu, struct perf_amd_iommu, pmu);
-
-       pr_debug("perf: amd_iommu:perf_iommu_del\n");
-       perf_iommu_stop(event, PERF_EF_UPDATE);
-
-       /* clear the assigned iommu bank/counter */
-       clear_avail_iommu_bnk_cntr(perf_iommu,
-                                    _GET_BANK(event),
-                                    _GET_CNTR(event));
-
-       perf_event_update_userpage(event);
-}
-
-static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
-{
-       struct attribute **attrs;
-       struct attribute_group *attr_group;
-       int i = 0, j;
-
-       while (amd_iommu_v2_event_descs[i].attr.attr.name)
-               i++;
-
-       attr_group = kzalloc(sizeof(struct attribute *)
-               * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
-       if (!attr_group)
-               return -ENOMEM;
-
-       attrs = (struct attribute **)(attr_group + 1);
-       for (j = 0; j < i; j++)
-               attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
-
-       attr_group->name = "events";
-       attr_group->attrs = attrs;
-       perf_iommu->events_group = attr_group;
-
-       return 0;
-}
-
-static __init void amd_iommu_pc_exit(void)
-{
-       if (__perf_iommu.events_group != NULL) {
-               kfree(__perf_iommu.events_group);
-               __perf_iommu.events_group = NULL;
-       }
-}
-
-static __init int _init_perf_amd_iommu(
-       struct perf_amd_iommu *perf_iommu, char *name)
-{
-       int ret;
-
-       raw_spin_lock_init(&perf_iommu->lock);
-
-       /* Init format attributes */
-       perf_iommu->format_group = &amd_iommu_format_group;
-
-       /* Init cpumask attributes to only core 0 */
-       cpumask_set_cpu(0, &iommu_cpumask);
-       perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
-
-       /* Init events attributes */
-       if (_init_events_attrs(perf_iommu) != 0)
-               pr_err("perf: amd_iommu: Only support raw events.\n");
-
-       /* Init null attributes */
-       perf_iommu->null_group = NULL;
-       perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
-
-       ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
-       if (ret) {
-               pr_err("perf: amd_iommu: Failed to initialized.\n");
-               amd_iommu_pc_exit();
-       } else {
-               pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
-                       amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
-                       amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
-       }
-
-       return ret;
-}
-
-static struct perf_amd_iommu __perf_iommu = {
-       .pmu = {
-               .event_init     = perf_iommu_event_init,
-               .add            = perf_iommu_add,
-               .del            = perf_iommu_del,
-               .start          = perf_iommu_start,
-               .stop           = perf_iommu_stop,
-               .read           = perf_iommu_read,
-       },
-       .max_banks              = 0x00,
-       .max_counters           = 0x00,
-       .cntr_assign_mask       = 0ULL,
-       .format_group           = NULL,
-       .cpumask_group          = NULL,
-       .events_group           = NULL,
-       .null_group             = NULL,
-};
-
-static __init int amd_iommu_pc_init(void)
-{
-       /* Make sure the IOMMU PC resource is available */
-       if (!amd_iommu_pc_supported())
-               return -ENODEV;
-
-       _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
-
-       return 0;
-}
-
-device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
deleted file mode 100644 (file)
index 845d173..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
- *
- * Author: Steven Kinney <Steven.Kinney@amd.com>
- * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _PERF_EVENT_AMD_IOMMU_H_
-#define _PERF_EVENT_AMD_IOMMU_H_
-
-/* iommu pc mmio region register indexes */
-#define IOMMU_PC_COUNTER_REG                   0x00
-#define IOMMU_PC_COUNTER_SRC_REG               0x08
-#define IOMMU_PC_PASID_MATCH_REG               0x10
-#define IOMMU_PC_DOMID_MATCH_REG               0x18
-#define IOMMU_PC_DEVID_MATCH_REG               0x20
-#define IOMMU_PC_COUNTER_REPORT_REG            0x28
-
-/* maximun specified bank/counters */
-#define PC_MAX_SPEC_BNKS                       64
-#define PC_MAX_SPEC_CNTRS                      16
-
-/* iommu pc reg masks*/
-#define IOMMU_BASE_DEVID                       0x0000
-
-/* amd_iommu_init.c external support functions */
-extern bool amd_iommu_pc_supported(void);
-
-extern u8 amd_iommu_pc_get_max_banks(u16 devid);
-
-extern u8 amd_iommu_pc_get_max_counters(u16 devid);
-
-extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
-                       u8 fxn, u64 *value, bool is_write);
-
-#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
deleted file mode 100644 (file)
index 8836fc9..0000000
+++ /dev/null
@@ -1,603 +0,0 @@
-/*
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
- *
- * Author: Jacob Shin <jacob.shin@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/perf_event.h>
-#include <linux/percpu.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-
-#include <asm/cpufeature.h>
-#include <asm/perf_event.h>
-#include <asm/msr.h>
-
-#define NUM_COUNTERS_NB                4
-#define NUM_COUNTERS_L2                4
-#define MAX_COUNTERS           NUM_COUNTERS_NB
-
-#define RDPMC_BASE_NB          6
-#define RDPMC_BASE_L2          10
-
-#define COUNTER_SHIFT          16
-
-struct amd_uncore {
-       int id;
-       int refcnt;
-       int cpu;
-       int num_counters;
-       int rdpmc_base;
-       u32 msr_base;
-       cpumask_t *active_mask;
-       struct pmu *pmu;
-       struct perf_event *events[MAX_COUNTERS];
-       struct amd_uncore *free_when_cpu_online;
-};
-
-static struct amd_uncore * __percpu *amd_uncore_nb;
-static struct amd_uncore * __percpu *amd_uncore_l2;
-
-static struct pmu amd_nb_pmu;
-static struct pmu amd_l2_pmu;
-
-static cpumask_t amd_nb_active_mask;
-static cpumask_t amd_l2_active_mask;
-
-static bool is_nb_event(struct perf_event *event)
-{
-       return event->pmu->type == amd_nb_pmu.type;
-}
-
-static bool is_l2_event(struct perf_event *event)
-{
-       return event->pmu->type == amd_l2_pmu.type;
-}
-
-static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
-{
-       if (is_nb_event(event) && amd_uncore_nb)
-               return *per_cpu_ptr(amd_uncore_nb, event->cpu);
-       else if (is_l2_event(event) && amd_uncore_l2)
-               return *per_cpu_ptr(amd_uncore_l2, event->cpu);
-
-       return NULL;
-}
-
-static void amd_uncore_read(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 prev, new;
-       s64 delta;
-
-       /*
-        * since we do not enable counter overflow interrupts,
-        * we do not have to worry about prev_count changing on us
-        */
-
-       prev = local64_read(&hwc->prev_count);
-       rdpmcl(hwc->event_base_rdpmc, new);
-       local64_set(&hwc->prev_count, new);
-       delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
-       delta >>= COUNTER_SHIFT;
-       local64_add(delta, &event->count);
-}
-
-static void amd_uncore_start(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (flags & PERF_EF_RELOAD)
-               wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
-
-       hwc->state = 0;
-       wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
-       perf_event_update_userpage(event);
-}
-
-static void amd_uncore_stop(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       wrmsrl(hwc->config_base, hwc->config);
-       hwc->state |= PERF_HES_STOPPED;
-
-       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-               amd_uncore_read(event);
-               hwc->state |= PERF_HES_UPTODATE;
-       }
-}
-
-static int amd_uncore_add(struct perf_event *event, int flags)
-{
-       int i;
-       struct amd_uncore *uncore = event_to_amd_uncore(event);
-       struct hw_perf_event *hwc = &event->hw;
-
-       /* are we already assigned? */
-       if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
-               goto out;
-
-       for (i = 0; i < uncore->num_counters; i++) {
-               if (uncore->events[i] == event) {
-                       hwc->idx = i;
-                       goto out;
-               }
-       }
-
-       /* if not, take the first available counter */
-       hwc->idx = -1;
-       for (i = 0; i < uncore->num_counters; i++) {
-               if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
-                       hwc->idx = i;
-                       break;
-               }
-       }
-
-out:
-       if (hwc->idx == -1)
-               return -EBUSY;
-
-       hwc->config_base = uncore->msr_base + (2 * hwc->idx);
-       hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
-       hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
-       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
-       if (flags & PERF_EF_START)
-               amd_uncore_start(event, PERF_EF_RELOAD);
-
-       return 0;
-}
-
-static void amd_uncore_del(struct perf_event *event, int flags)
-{
-       int i;
-       struct amd_uncore *uncore = event_to_amd_uncore(event);
-       struct hw_perf_event *hwc = &event->hw;
-
-       amd_uncore_stop(event, PERF_EF_UPDATE);
-
-       for (i = 0; i < uncore->num_counters; i++) {
-               if (cmpxchg(&uncore->events[i], event, NULL) == event)
-                       break;
-       }
-
-       hwc->idx = -1;
-}
-
-static int amd_uncore_event_init(struct perf_event *event)
-{
-       struct amd_uncore *uncore;
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
-       /*
-        * NB and L2 counters (MSRs) are shared across all cores that share the
-        * same NB / L2 cache. Interrupts can be directed to a single target
-        * core, however, event counts generated by processes running on other
-        * cores cannot be masked out. So we do not support sampling and
-        * per-thread events.
-        */
-       if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
-               return -EINVAL;
-
-       /* NB and L2 counters do not have usr/os/guest/host bits */
-       if (event->attr.exclude_user || event->attr.exclude_kernel ||
-           event->attr.exclude_host || event->attr.exclude_guest)
-               return -EINVAL;
-
-       /* and we do not enable counter overflow interrupts */
-       hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
-       hwc->idx = -1;
-
-       if (event->cpu < 0)
-               return -EINVAL;
-
-       uncore = event_to_amd_uncore(event);
-       if (!uncore)
-               return -ENODEV;
-
-       /*
-        * since request can come in to any of the shared cores, we will remap
-        * to a single common cpu.
-        */
-       event->cpu = uncore->cpu;
-
-       return 0;
-}
-
-static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
-                                           struct device_attribute *attr,
-                                           char *buf)
-{
-       cpumask_t *active_mask;
-       struct pmu *pmu = dev_get_drvdata(dev);
-
-       if (pmu->type == amd_nb_pmu.type)
-               active_mask = &amd_nb_active_mask;
-       else if (pmu->type == amd_l2_pmu.type)
-               active_mask = &amd_l2_active_mask;
-       else
-               return 0;
-
-       return cpumap_print_to_pagebuf(true, buf, active_mask);
-}
-static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
-
-static struct attribute *amd_uncore_attrs[] = {
-       &dev_attr_cpumask.attr,
-       NULL,
-};
-
-static struct attribute_group amd_uncore_attr_group = {
-       .attrs = amd_uncore_attrs,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-7,32-35");
-PMU_FORMAT_ATTR(umask, "config:8-15");
-
-static struct attribute *amd_uncore_format_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       NULL,
-};
-
-static struct attribute_group amd_uncore_format_group = {
-       .name = "format",
-       .attrs = amd_uncore_format_attr,
-};
-
-static const struct attribute_group *amd_uncore_attr_groups[] = {
-       &amd_uncore_attr_group,
-       &amd_uncore_format_group,
-       NULL,
-};
-
-static struct pmu amd_nb_pmu = {
-       .attr_groups    = amd_uncore_attr_groups,
-       .name           = "amd_nb",
-       .event_init     = amd_uncore_event_init,
-       .add            = amd_uncore_add,
-       .del            = amd_uncore_del,
-       .start          = amd_uncore_start,
-       .stop           = amd_uncore_stop,
-       .read           = amd_uncore_read,
-};
-
-static struct pmu amd_l2_pmu = {
-       .attr_groups    = amd_uncore_attr_groups,
-       .name           = "amd_l2",
-       .event_init     = amd_uncore_event_init,
-       .add            = amd_uncore_add,
-       .del            = amd_uncore_del,
-       .start          = amd_uncore_start,
-       .stop           = amd_uncore_stop,
-       .read           = amd_uncore_read,
-};
-
-static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
-{
-       return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
-                       cpu_to_node(cpu));
-}
-
-static int amd_uncore_cpu_up_prepare(unsigned int cpu)
-{
-       struct amd_uncore *uncore_nb = NULL, *uncore_l2;
-
-       if (amd_uncore_nb) {
-               uncore_nb = amd_uncore_alloc(cpu);
-               if (!uncore_nb)
-                       goto fail;
-               uncore_nb->cpu = cpu;
-               uncore_nb->num_counters = NUM_COUNTERS_NB;
-               uncore_nb->rdpmc_base = RDPMC_BASE_NB;
-               uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
-               uncore_nb->active_mask = &amd_nb_active_mask;
-               uncore_nb->pmu = &amd_nb_pmu;
-               *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
-       }
-
-       if (amd_uncore_l2) {
-               uncore_l2 = amd_uncore_alloc(cpu);
-               if (!uncore_l2)
-                       goto fail;
-               uncore_l2->cpu = cpu;
-               uncore_l2->num_counters = NUM_COUNTERS_L2;
-               uncore_l2->rdpmc_base = RDPMC_BASE_L2;
-               uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
-               uncore_l2->active_mask = &amd_l2_active_mask;
-               uncore_l2->pmu = &amd_l2_pmu;
-               *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
-       }
-
-       return 0;
-
-fail:
-       if (amd_uncore_nb)
-               *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
-       kfree(uncore_nb);
-       return -ENOMEM;
-}
-
-static struct amd_uncore *
-amd_uncore_find_online_sibling(struct amd_uncore *this,
-                              struct amd_uncore * __percpu *uncores)
-{
-       unsigned int cpu;
-       struct amd_uncore *that;
-
-       for_each_online_cpu(cpu) {
-               that = *per_cpu_ptr(uncores, cpu);
-
-               if (!that)
-                       continue;
-
-               if (this == that)
-                       continue;
-
-               if (this->id == that->id) {
-                       that->free_when_cpu_online = this;
-                       this = that;
-                       break;
-               }
-       }
-
-       this->refcnt++;
-       return this;
-}
-
-static void amd_uncore_cpu_starting(unsigned int cpu)
-{
-       unsigned int eax, ebx, ecx, edx;
-       struct amd_uncore *uncore;
-
-       if (amd_uncore_nb) {
-               uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
-               cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-               uncore->id = ecx & 0xff;
-
-               uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
-               *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
-       }
-
-       if (amd_uncore_l2) {
-               unsigned int apicid = cpu_data(cpu).apicid;
-               unsigned int nshared;
-
-               uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
-               cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
-               nshared = ((eax >> 14) & 0xfff) + 1;
-               uncore->id = apicid - (apicid % nshared);
-
-               uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
-               *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
-       }
-}
-
-static void uncore_online(unsigned int cpu,
-                         struct amd_uncore * __percpu *uncores)
-{
-       struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
-
-       kfree(uncore->free_when_cpu_online);
-       uncore->free_when_cpu_online = NULL;
-
-       if (cpu == uncore->cpu)
-               cpumask_set_cpu(cpu, uncore->active_mask);
-}
-
-static void amd_uncore_cpu_online(unsigned int cpu)
-{
-       if (amd_uncore_nb)
-               uncore_online(cpu, amd_uncore_nb);
-
-       if (amd_uncore_l2)
-               uncore_online(cpu, amd_uncore_l2);
-}
-
-static void uncore_down_prepare(unsigned int cpu,
-                               struct amd_uncore * __percpu *uncores)
-{
-       unsigned int i;
-       struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
-
-       if (this->cpu != cpu)
-               return;
-
-       /* this cpu is going down, migrate to a shared sibling if possible */
-       for_each_online_cpu(i) {
-               struct amd_uncore *that = *per_cpu_ptr(uncores, i);
-
-               if (cpu == i)
-                       continue;
-
-               if (this == that) {
-                       perf_pmu_migrate_context(this->pmu, cpu, i);
-                       cpumask_clear_cpu(cpu, that->active_mask);
-                       cpumask_set_cpu(i, that->active_mask);
-                       that->cpu = i;
-                       break;
-               }
-       }
-}
-
-static void amd_uncore_cpu_down_prepare(unsigned int cpu)
-{
-       if (amd_uncore_nb)
-               uncore_down_prepare(cpu, amd_uncore_nb);
-
-       if (amd_uncore_l2)
-               uncore_down_prepare(cpu, amd_uncore_l2);
-}
-
-static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
-{
-       struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
-
-       if (cpu == uncore->cpu)
-               cpumask_clear_cpu(cpu, uncore->active_mask);
-
-       if (!--uncore->refcnt)
-               kfree(uncore);
-       *per_cpu_ptr(uncores, cpu) = NULL;
-}
-
-static void amd_uncore_cpu_dead(unsigned int cpu)
-{
-       if (amd_uncore_nb)
-               uncore_dead(cpu, amd_uncore_nb);
-
-       if (amd_uncore_l2)
-               uncore_dead(cpu, amd_uncore_l2);
-}
-
-static int
-amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
-                       void *hcpu)
-{
-       unsigned int cpu = (long)hcpu;
-
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               if (amd_uncore_cpu_up_prepare(cpu))
-                       return notifier_from_errno(-ENOMEM);
-               break;
-
-       case CPU_STARTING:
-               amd_uncore_cpu_starting(cpu);
-               break;
-
-       case CPU_ONLINE:
-               amd_uncore_cpu_online(cpu);
-               break;
-
-       case CPU_DOWN_PREPARE:
-               amd_uncore_cpu_down_prepare(cpu);
-               break;
-
-       case CPU_UP_CANCELED:
-       case CPU_DEAD:
-               amd_uncore_cpu_dead(cpu);
-               break;
-
-       default:
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block amd_uncore_cpu_notifier_block = {
-       .notifier_call  = amd_uncore_cpu_notifier,
-       .priority       = CPU_PRI_PERF + 1,
-};
-
-static void __init init_cpu_already_online(void *dummy)
-{
-       unsigned int cpu = smp_processor_id();
-
-       amd_uncore_cpu_starting(cpu);
-       amd_uncore_cpu_online(cpu);
-}
-
-static void cleanup_cpu_online(void *dummy)
-{
-       unsigned int cpu = smp_processor_id();
-
-       amd_uncore_cpu_dead(cpu);
-}
-
-static int __init amd_uncore_init(void)
-{
-       unsigned int cpu, cpu2;
-       int ret = -ENODEV;
-
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
-               goto fail_nodev;
-
-       if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
-               goto fail_nodev;
-
-       if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
-               amd_uncore_nb = alloc_percpu(struct amd_uncore *);
-               if (!amd_uncore_nb) {
-                       ret = -ENOMEM;
-                       goto fail_nb;
-               }
-               ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
-               if (ret)
-                       goto fail_nb;
-
-               printk(KERN_INFO "perf: AMD NB counters detected\n");
-               ret = 0;
-       }
-
-       if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
-               amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
-               if (!amd_uncore_l2) {
-                       ret = -ENOMEM;
-                       goto fail_l2;
-               }
-               ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
-               if (ret)
-                       goto fail_l2;
-
-               printk(KERN_INFO "perf: AMD L2I counters detected\n");
-               ret = 0;
-       }
-
-       if (ret)
-               goto fail_nodev;
-
-       cpu_notifier_register_begin();
-
-       /* init cpus already online before registering for hotplug notifier */
-       for_each_online_cpu(cpu) {
-               ret = amd_uncore_cpu_up_prepare(cpu);
-               if (ret)
-                       goto fail_online;
-               smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
-       }
-
-       __register_cpu_notifier(&amd_uncore_cpu_notifier_block);
-       cpu_notifier_register_done();
-
-       return 0;
-
-
-fail_online:
-       for_each_online_cpu(cpu2) {
-               if (cpu2 == cpu)
-                       break;
-               smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
-       }
-       cpu_notifier_register_done();
-
-       /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
-       amd_uncore_nb = amd_uncore_l2 = NULL;
-
-       if (boot_cpu_has(X86_FEATURE_PERFCTR_L2))
-               perf_pmu_unregister(&amd_l2_pmu);
-fail_l2:
-       if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
-               perf_pmu_unregister(&amd_nb_pmu);
-       if (amd_uncore_l2)
-               free_percpu(amd_uncore_l2);
-fail_nb:
-       if (amd_uncore_nb)
-               free_percpu(amd_uncore_nb);
-
-fail_nodev:
-       return ret;
-}
-device_initcall(amd_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
deleted file mode 100644 (file)
index fed2ab1..0000000
+++ /dev/null
@@ -1,3773 +0,0 @@
-/*
- * Per core/cpu state
- *
- * Used to coordinate shared registers between HT threads or
- * among events on a single PMU.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/stddef.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/nmi.h>
-
-#include <asm/cpufeature.h>
-#include <asm/hardirq.h>
-#include <asm/apic.h>
-
-#include "perf_event.h"
-
-/*
- * Intel PerfMon, used on Core and later.
- */
-static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
-{
-       [PERF_COUNT_HW_CPU_CYCLES]              = 0x003c,
-       [PERF_COUNT_HW_INSTRUCTIONS]            = 0x00c0,
-       [PERF_COUNT_HW_CACHE_REFERENCES]        = 0x4f2e,
-       [PERF_COUNT_HW_CACHE_MISSES]            = 0x412e,
-       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x00c4,
-       [PERF_COUNT_HW_BRANCH_MISSES]           = 0x00c5,
-       [PERF_COUNT_HW_BUS_CYCLES]              = 0x013c,
-       [PERF_COUNT_HW_REF_CPU_CYCLES]          = 0x0300, /* pseudo-encoding */
-};
-
-static struct event_constraint intel_core_event_constraints[] __read_mostly =
-{
-       INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
-       INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
-       INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
-       INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
-       INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
-       INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_core2_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
-       INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
-       INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
-       INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
-       INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
-       INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
-       INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
-       INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
-       INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
-       INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
-       INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
-       INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
-       INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
-       INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
-       INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
-       INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
-       INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
-       EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
-{
-       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
-       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
-       EVENT_EXTRA_END
-};
-
-static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
-       INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
-       INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
-       INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_snb_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
-       INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
-       INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
-       INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
-       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
-       INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
-
-       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
-       INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
-       INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
-       INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
-
-       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-
-       EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
-{
-       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
-       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
-       EVENT_EXTRA_END
-};
-
-static struct event_constraint intel_v1_event_constraints[] __read_mostly =
-{
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_gen_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_slm_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_skl_event_constraints[] = {
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
-       INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2),    /* INST_RETIRED.PREC_DIST */
-       EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
-       INTEL_UEVENT_EXTRA_REG(0x01b7,
-                              MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x02b7,
-                              MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
-       EVENT_EXTRA_END
-};
-
-static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
-       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
-       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
-       EVENT_EXTRA_END
-};
-
-static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
-       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
-       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
-       EVENT_EXTRA_END
-};
-
-static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
-       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
-       /*
-        * Note the low 8 bits eventsel code is not a continuous field, containing
-        * some #GPing bits. These are masked out.
-        */
-       INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
-       EVENT_EXTRA_END
-};
-
-EVENT_ATTR_STR(mem-loads,      mem_ld_nhm,     "event=0x0b,umask=0x10,ldlat=3");
-EVENT_ATTR_STR(mem-loads,      mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores,     mem_st_snb,     "event=0xcd,umask=0x2");
-
-struct attribute *nhm_events_attrs[] = {
-       EVENT_PTR(mem_ld_nhm),
-       NULL,
-};
-
-struct attribute *snb_events_attrs[] = {
-       EVENT_PTR(mem_ld_snb),
-       EVENT_PTR(mem_st_snb),
-       NULL,
-};
-
-static struct event_constraint intel_hsw_event_constraints[] = {
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
-       INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
-       /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
-       /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
-       /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
-       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
-
-       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_bdw_event_constraints[] = {
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
-       INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
-       INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4),        /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
-       EVENT_CONSTRAINT_END
-};
-
-static u64 intel_pmu_event_map(int hw_event)
-{
-       return intel_perfmon_event_map[hw_event];
-}
-
-/*
- * Notes on the events:
- * - data reads do not include code reads (comparable to earlier tables)
- * - data counts include speculative execution (except L1 write, dtlb, bpu)
- * - remote node access includes remote memory, remote cache, remote mmio.
- * - prefetches are not included in the counts.
- * - icache miss does not include decoded icache
- */
-
-#define SKL_DEMAND_DATA_RD             BIT_ULL(0)
-#define SKL_DEMAND_RFO                 BIT_ULL(1)
-#define SKL_ANY_RESPONSE               BIT_ULL(16)
-#define SKL_SUPPLIER_NONE              BIT_ULL(17)
-#define SKL_L3_MISS_LOCAL_DRAM         BIT_ULL(26)
-#define SKL_L3_MISS_REMOTE_HOP0_DRAM   BIT_ULL(27)
-#define SKL_L3_MISS_REMOTE_HOP1_DRAM   BIT_ULL(28)
-#define SKL_L3_MISS_REMOTE_HOP2P_DRAM  BIT_ULL(29)
-#define SKL_L3_MISS                    (SKL_L3_MISS_LOCAL_DRAM| \
-                                        SKL_L3_MISS_REMOTE_HOP0_DRAM| \
-                                        SKL_L3_MISS_REMOTE_HOP1_DRAM| \
-                                        SKL_L3_MISS_REMOTE_HOP2P_DRAM)
-#define SKL_SPL_HIT                    BIT_ULL(30)
-#define SKL_SNOOP_NONE                 BIT_ULL(31)
-#define SKL_SNOOP_NOT_NEEDED           BIT_ULL(32)
-#define SKL_SNOOP_MISS                 BIT_ULL(33)
-#define SKL_SNOOP_HIT_NO_FWD           BIT_ULL(34)
-#define SKL_SNOOP_HIT_WITH_FWD         BIT_ULL(35)
-#define SKL_SNOOP_HITM                 BIT_ULL(36)
-#define SKL_SNOOP_NON_DRAM             BIT_ULL(37)
-#define SKL_ANY_SNOOP                  (SKL_SPL_HIT|SKL_SNOOP_NONE| \
-                                        SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
-                                        SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
-                                        SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
-#define SKL_DEMAND_READ                        SKL_DEMAND_DATA_RD
-#define SKL_SNOOP_DRAM                 (SKL_SNOOP_NONE| \
-                                        SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
-                                        SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
-                                        SKL_SNOOP_HITM|SKL_SPL_HIT)
-#define SKL_DEMAND_WRITE               SKL_DEMAND_RFO
-#define SKL_LLC_ACCESS                 SKL_ANY_RESPONSE
-#define SKL_L3_MISS_REMOTE             (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
-                                        SKL_L3_MISS_REMOTE_HOP1_DRAM| \
-                                        SKL_L3_MISS_REMOTE_HOP2P_DRAM)
-
-static __initconst const u64 skl_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
-               [ C(RESULT_MISS)   ] = 0x151,   /* L1D.REPLACEMENT */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x283,   /* ICACHE_64B.MISS */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
-               [ C(RESULT_MISS)   ] = 0x608,   /* DTLB_LOAD_MISSES.WALK_COMPLETED */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
-               [ C(RESULT_MISS)   ] = 0x649,   /* DTLB_STORE_MISSES.WALK_COMPLETED */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x2085,  /* ITLB_MISSES.STLB_HIT */
-               [ C(RESULT_MISS)   ] = 0xe85,   /* ITLB_MISSES.WALK_COMPLETED */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0xc4,    /* BR_INST_RETIRED.ALL_BRANCHES */
-               [ C(RESULT_MISS)   ] = 0xc5,    /* BR_MISP_RETIRED.ALL_BRANCHES */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
-};
-
-static __initconst const u64 skl_hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
-                                      SKL_LLC_ACCESS|SKL_ANY_SNOOP,
-               [ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
-                                      SKL_L3_MISS|SKL_ANY_SNOOP|
-                                      SKL_SUPPLIER_NONE,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
-                                      SKL_LLC_ACCESS|SKL_ANY_SNOOP,
-               [ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
-                                      SKL_L3_MISS|SKL_ANY_SNOOP|
-                                      SKL_SUPPLIER_NONE,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
-                                      SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
-               [ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
-                                      SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
-                                      SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
-               [ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
-                                      SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
-};
-
-#define SNB_DMND_DATA_RD       (1ULL << 0)
-#define SNB_DMND_RFO           (1ULL << 1)
-#define SNB_DMND_IFETCH                (1ULL << 2)
-#define SNB_DMND_WB            (1ULL << 3)
-#define SNB_PF_DATA_RD         (1ULL << 4)
-#define SNB_PF_RFO             (1ULL << 5)
-#define SNB_PF_IFETCH          (1ULL << 6)
-#define SNB_LLC_DATA_RD                (1ULL << 7)
-#define SNB_LLC_RFO            (1ULL << 8)
-#define SNB_LLC_IFETCH         (1ULL << 9)
-#define SNB_BUS_LOCKS          (1ULL << 10)
-#define SNB_STRM_ST            (1ULL << 11)
-#define SNB_OTHER              (1ULL << 15)
-#define SNB_RESP_ANY           (1ULL << 16)
-#define SNB_NO_SUPP            (1ULL << 17)
-#define SNB_LLC_HITM           (1ULL << 18)
-#define SNB_LLC_HITE           (1ULL << 19)
-#define SNB_LLC_HITS           (1ULL << 20)
-#define SNB_LLC_HITF           (1ULL << 21)
-#define SNB_LOCAL              (1ULL << 22)
-#define SNB_REMOTE             (0xffULL << 23)
-#define SNB_SNP_NONE           (1ULL << 31)
-#define SNB_SNP_NOT_NEEDED     (1ULL << 32)
-#define SNB_SNP_MISS           (1ULL << 33)
-#define SNB_NO_FWD             (1ULL << 34)
-#define SNB_SNP_FWD            (1ULL << 35)
-#define SNB_HITM               (1ULL << 36)
-#define SNB_NON_DRAM           (1ULL << 37)
-
-#define SNB_DMND_READ          (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
-#define SNB_DMND_WRITE         (SNB_DMND_RFO|SNB_LLC_RFO)
-#define SNB_DMND_PREFETCH      (SNB_PF_DATA_RD|SNB_PF_RFO)
-
-#define SNB_SNP_ANY            (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
-                                SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
-                                SNB_HITM)
-
-#define SNB_DRAM_ANY           (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
-#define SNB_DRAM_REMOTE                (SNB_REMOTE|SNB_SNP_ANY)
-
-#define SNB_L3_ACCESS          SNB_RESP_ANY
-#define SNB_L3_MISS            (SNB_DRAM_ANY|SNB_NON_DRAM)
-
-static __initconst const u64 snb_hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
-               [ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_L3_MISS,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
-               [ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_L3_MISS,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
-               [ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
-               [ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
-               [ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
-               [ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
-       },
- },
-};
-
-static __initconst const u64 snb_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS        */
-               [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPLACEMENT              */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES       */
-               [ C(RESULT_MISS)   ] = 0x0851, /* L1D.ALL_M_REPLACEMENT        */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x024e, /* HW_PRE_REQ.DL1_MISS          */
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0280, /* ICACHE.MISSES */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_WRITE) ] = {
-               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
-               [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
-               [ C(RESULT_MISS)   ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT         */
-               [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK    */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
-               [ C(RESULT_MISS)   ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
-
-};
-
-/*
- * Notes on the events:
- * - data reads do not include code reads (comparable to earlier tables)
- * - data counts include speculative execution (except L1 write, dtlb, bpu)
- * - remote node access includes remote memory, remote cache, remote mmio.
- * - prefetches are not included in the counts because they are not
- *   reliably counted.
- */
-
-#define HSW_DEMAND_DATA_RD             BIT_ULL(0)
-#define HSW_DEMAND_RFO                 BIT_ULL(1)
-#define HSW_ANY_RESPONSE               BIT_ULL(16)
-#define HSW_SUPPLIER_NONE              BIT_ULL(17)
-#define HSW_L3_MISS_LOCAL_DRAM         BIT_ULL(22)
-#define HSW_L3_MISS_REMOTE_HOP0                BIT_ULL(27)
-#define HSW_L3_MISS_REMOTE_HOP1                BIT_ULL(28)
-#define HSW_L3_MISS_REMOTE_HOP2P       BIT_ULL(29)
-#define HSW_L3_MISS                    (HSW_L3_MISS_LOCAL_DRAM| \
-                                        HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
-                                        HSW_L3_MISS_REMOTE_HOP2P)
-#define HSW_SNOOP_NONE                 BIT_ULL(31)
-#define HSW_SNOOP_NOT_NEEDED           BIT_ULL(32)
-#define HSW_SNOOP_MISS                 BIT_ULL(33)
-#define HSW_SNOOP_HIT_NO_FWD           BIT_ULL(34)
-#define HSW_SNOOP_HIT_WITH_FWD         BIT_ULL(35)
-#define HSW_SNOOP_HITM                 BIT_ULL(36)
-#define HSW_SNOOP_NON_DRAM             BIT_ULL(37)
-#define HSW_ANY_SNOOP                  (HSW_SNOOP_NONE| \
-                                        HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
-                                        HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
-                                        HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
-#define HSW_SNOOP_DRAM                 (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
-#define HSW_DEMAND_READ                        HSW_DEMAND_DATA_RD
-#define HSW_DEMAND_WRITE               HSW_DEMAND_RFO
-#define HSW_L3_MISS_REMOTE             (HSW_L3_MISS_REMOTE_HOP0|\
-                                        HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
-#define HSW_LLC_ACCESS                 HSW_ANY_RESPONSE
-
-#define BDW_L3_MISS_LOCAL              BIT(26)
-#define BDW_L3_MISS                    (BDW_L3_MISS_LOCAL| \
-                                        HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
-                                        HSW_L3_MISS_REMOTE_HOP2P)
-
-
-static __initconst const u64 hsw_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_UOPS_RETIRED.ALL_LOADS */
-               [ C(RESULT_MISS)   ] = 0x151,   /* L1D.REPLACEMENT */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_UOPS_RETIRED.ALL_STORES */
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x280,   /* ICACHE.MISSES */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_UOPS_RETIRED.ALL_LOADS */
-               [ C(RESULT_MISS)   ] = 0x108,   /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_UOPS_RETIRED.ALL_STORES */
-               [ C(RESULT_MISS)   ] = 0x149,   /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x6085,  /* ITLB_MISSES.STLB_HIT */
-               [ C(RESULT_MISS)   ] = 0x185,   /* ITLB_MISSES.MISS_CAUSES_A_WALK */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0xc4,    /* BR_INST_RETIRED.ALL_BRANCHES */
-               [ C(RESULT_MISS)   ] = 0xc5,    /* BR_MISP_RETIRED.ALL_BRANCHES */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
-};
-
-static __initconst const u64 hsw_hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
-                                      HSW_LLC_ACCESS,
-               [ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
-                                      HSW_L3_MISS|HSW_ANY_SNOOP,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
-                                      HSW_LLC_ACCESS,
-               [ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
-                                      HSW_L3_MISS|HSW_ANY_SNOOP,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
-                                      HSW_L3_MISS_LOCAL_DRAM|
-                                      HSW_SNOOP_DRAM,
-               [ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
-                                      HSW_L3_MISS_REMOTE|
-                                      HSW_SNOOP_DRAM,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
-                                      HSW_L3_MISS_LOCAL_DRAM|
-                                      HSW_SNOOP_DRAM,
-               [ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
-                                      HSW_L3_MISS_REMOTE|
-                                      HSW_SNOOP_DRAM,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
-};
-
-static __initconst const u64 westmere_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
-               [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
-               [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
-               [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
-               [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       /*
-        * Use RFO, not WRITEBACK, because a write miss would typically occur
-        * on RFO.
-        */
-       [ C(OP_WRITE) ] = {
-               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
-               [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
-               [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
-               [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
-               [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
-};
-
-/*
- * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
- * See IA32 SDM Vol 3B 30.6.1.3
- */
-
-#define NHM_DMND_DATA_RD       (1 << 0)
-#define NHM_DMND_RFO           (1 << 1)
-#define NHM_DMND_IFETCH                (1 << 2)
-#define NHM_DMND_WB            (1 << 3)
-#define NHM_PF_DATA_RD         (1 << 4)
-#define NHM_PF_DATA_RFO                (1 << 5)
-#define NHM_PF_IFETCH          (1 << 6)
-#define NHM_OFFCORE_OTHER      (1 << 7)
-#define NHM_UNCORE_HIT         (1 << 8)
-#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
-#define NHM_OTHER_CORE_HITM    (1 << 10)
-                               /* reserved */
-#define NHM_REMOTE_CACHE_FWD   (1 << 12)
-#define NHM_REMOTE_DRAM                (1 << 13)
-#define NHM_LOCAL_DRAM         (1 << 14)
-#define NHM_NON_DRAM           (1 << 15)
-
-#define NHM_LOCAL              (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
-#define NHM_REMOTE             (NHM_REMOTE_DRAM)
-
-#define NHM_DMND_READ          (NHM_DMND_DATA_RD)
-#define NHM_DMND_WRITE         (NHM_DMND_RFO|NHM_DMND_WB)
-#define NHM_DMND_PREFETCH      (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
-
-#define NHM_L3_HIT     (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
-#define NHM_L3_MISS    (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
-#define NHM_L3_ACCESS  (NHM_L3_HIT|NHM_L3_MISS)
-
-static __initconst const u64 nehalem_hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
-               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
-               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
-               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
-               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
-               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
-               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE,
-       },
- },
-};
-
-static __initconst const u64 nehalem_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
-               [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
-               [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
-               [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
-               [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       /*
-        * Use RFO, not WRITEBACK, because a write miss would typically occur
-        * on RFO.
-        */
-       [ C(OP_WRITE) ] = {
-               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
-               [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
-               [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
-               [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
-               [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
-};
-
-static __initconst const u64 core2_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
-               [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
-               [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
-               [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-               [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-               [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
-               [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
-               [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-               [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-               [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-static __initconst const u64 atom_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
-               [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-               [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-               [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
-               [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
-               [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-               [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-               [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-static struct extra_reg intel_slm_extra_regs[] __read_mostly =
-{
-       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
-       EVENT_EXTRA_END
-};
-
-#define SLM_DMND_READ          SNB_DMND_DATA_RD
-#define SLM_DMND_WRITE         SNB_DMND_RFO
-#define SLM_DMND_PREFETCH      (SNB_PF_DATA_RD|SNB_PF_RFO)
-
-#define SLM_SNP_ANY            (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
-#define SLM_LLC_ACCESS         SNB_RESP_ANY
-#define SLM_LLC_MISS           (SLM_SNP_ANY|SNB_NON_DRAM)
-
-static __initconst const u64 slm_hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
-               [ C(RESULT_MISS)   ] = SLM_DMND_WRITE|SLM_LLC_MISS,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
-               [ C(RESULT_MISS)   ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
-       },
- },
-};
-
-static __initconst const u64 slm_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0x0104, /* LD_DCU_MISS */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
-               [ C(RESULT_MISS)   ] = 0x0280, /* ICACGE.MISSES */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_WRITE) ] = {
-               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0x0804, /* LD_DTLB_MISS */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
-               [ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
-               [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-#define KNL_OT_L2_HITE         BIT_ULL(19) /* Other Tile L2 Hit */
-#define KNL_OT_L2_HITF         BIT_ULL(20) /* Other Tile L2 Hit */
-#define KNL_MCDRAM_LOCAL       BIT_ULL(21)
-#define KNL_MCDRAM_FAR         BIT_ULL(22)
-#define KNL_DDR_LOCAL          BIT_ULL(23)
-#define KNL_DDR_FAR            BIT_ULL(24)
-#define KNL_DRAM_ANY           (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \
-                                   KNL_DDR_LOCAL | KNL_DDR_FAR)
-#define KNL_L2_READ            SLM_DMND_READ
-#define KNL_L2_WRITE           SLM_DMND_WRITE
-#define KNL_L2_PREFETCH                SLM_DMND_PREFETCH
-#define KNL_L2_ACCESS          SLM_LLC_ACCESS
-#define KNL_L2_MISS            (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \
-                                  KNL_DRAM_ANY | SNB_SNP_ANY | \
-                                                 SNB_NON_DRAM)
-
-static __initconst const u64 knl_hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
-       [C(LL)] = {
-               [C(OP_READ)] = {
-                       [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS,
-                       [C(RESULT_MISS)]   = 0,
-               },
-               [C(OP_WRITE)] = {
-                       [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS,
-                       [C(RESULT_MISS)]   = KNL_L2_WRITE | KNL_L2_MISS,
-               },
-               [C(OP_PREFETCH)] = {
-                       [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS,
-                       [C(RESULT_MISS)]   = KNL_L2_PREFETCH | KNL_L2_MISS,
-               },
-       },
-};
-
-/*
- * Use from PMIs where the LBRs are already disabled.
- */
-static void __intel_pmu_disable_all(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-       if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
-               intel_pmu_disable_bts();
-       else
-               intel_bts_disable_local();
-
-       intel_pmu_pebs_disable_all();
-}
-
-static void intel_pmu_disable_all(void)
-{
-       __intel_pmu_disable_all();
-       intel_pmu_lbr_disable_all();
-}
-
-static void __intel_pmu_enable_all(int added, bool pmi)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       intel_pmu_pebs_enable_all();
-       intel_pmu_lbr_enable_all(pmi);
-       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
-                       x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
-
-       if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
-               struct perf_event *event =
-                       cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
-
-               if (WARN_ON_ONCE(!event))
-                       return;
-
-               intel_pmu_enable_bts(event->hw.config);
-       } else
-               intel_bts_enable_local();
-}
-
-static void intel_pmu_enable_all(int added)
-{
-       __intel_pmu_enable_all(added, false);
-}
-
-/*
- * Workaround for:
- *   Intel Errata AAK100 (model 26)
- *   Intel Errata AAP53  (model 30)
- *   Intel Errata BD53   (model 44)
- *
- * The official story:
- *   These chips need to be 'reset' when adding counters by programming the
- *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
- *   in sequence on the same PMC or on different PMCs.
- *
- * In practise it appears some of these events do in fact count, and
- * we need to programm all 4 events.
- */
-static void intel_pmu_nhm_workaround(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       static const unsigned long nhm_magic[4] = {
-               0x4300B5,
-               0x4300D2,
-               0x4300B1,
-               0x4300B1
-       };
-       struct perf_event *event;
-       int i;
-
-       /*
-        * The Errata requires below steps:
-        * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
-        * 2) Configure 4 PERFEVTSELx with the magic events and clear
-        *    the corresponding PMCx;
-        * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
-        * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
-        * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
-        */
-
-       /*
-        * The real steps we choose are a little different from above.
-        * A) To reduce MSR operations, we don't run step 1) as they
-        *    are already cleared before this function is called;
-        * B) Call x86_perf_event_update to save PMCx before configuring
-        *    PERFEVTSELx with magic number;
-        * C) With step 5), we do clear only when the PERFEVTSELx is
-        *    not used currently.
-        * D) Call x86_perf_event_set_period to restore PMCx;
-        */
-
-       /* We always operate 4 pairs of PERF Counters */
-       for (i = 0; i < 4; i++) {
-               event = cpuc->events[i];
-               if (event)
-                       x86_perf_event_update(event);
-       }
-
-       for (i = 0; i < 4; i++) {
-               wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
-               wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
-       }
-
-       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
-       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
-
-       for (i = 0; i < 4; i++) {
-               event = cpuc->events[i];
-
-               if (event) {
-                       x86_perf_event_set_period(event);
-                       __x86_pmu_enable_event(&event->hw,
-                                       ARCH_PERFMON_EVENTSEL_ENABLE);
-               } else
-                       wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
-       }
-}
-
-static void intel_pmu_nhm_enable_all(int added)
-{
-       if (added)
-               intel_pmu_nhm_workaround();
-       intel_pmu_enable_all(added);
-}
-
-static inline u64 intel_pmu_get_status(void)
-{
-       u64 status;
-
-       rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-
-       return status;
-}
-
-static inline void intel_pmu_ack_status(u64 ack)
-{
-       wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
-}
-
-static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
-{
-       int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
-       u64 ctrl_val, mask;
-
-       mask = 0xfULL << (idx * 4);
-
-       rdmsrl(hwc->config_base, ctrl_val);
-       ctrl_val &= ~mask;
-       wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static inline bool event_is_checkpointed(struct perf_event *event)
-{
-       return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
-}
-
-static void intel_pmu_disable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
-               intel_pmu_disable_bts();
-               intel_pmu_drain_bts_buffer();
-               return;
-       }
-
-       cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
-       cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
-       cpuc->intel_cp_status &= ~(1ull << hwc->idx);
-
-       /*
-        * must disable before any actual event
-        * because any event may be combined with LBR
-        */
-       if (needs_branch_stack(event))
-               intel_pmu_lbr_disable(event);
-
-       if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-               intel_pmu_disable_fixed(hwc);
-               return;
-       }
-
-       x86_pmu_disable_event(event);
-
-       if (unlikely(event->attr.precise_ip))
-               intel_pmu_pebs_disable(event);
-}
-
-static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
-{
-       int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
-       u64 ctrl_val, bits, mask;
-
-       /*
-        * Enable IRQ generation (0x8),
-        * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
-        * if requested:
-        */
-       bits = 0x8ULL;
-       if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
-               bits |= 0x2;
-       if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
-               bits |= 0x1;
-
-       /*
-        * ANY bit is supported in v3 and up
-        */
-       if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
-               bits |= 0x4;
-
-       bits <<= (idx * 4);
-       mask = 0xfULL << (idx * 4);
-
-       rdmsrl(hwc->config_base, ctrl_val);
-       ctrl_val &= ~mask;
-       ctrl_val |= bits;
-       wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static void intel_pmu_enable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
-               if (!__this_cpu_read(cpu_hw_events.enabled))
-                       return;
-
-               intel_pmu_enable_bts(hwc->config);
-               return;
-       }
-       /*
-        * must enabled before any actual event
-        * because any event may be combined with LBR
-        */
-       if (needs_branch_stack(event))
-               intel_pmu_lbr_enable(event);
-
-       if (event->attr.exclude_host)
-               cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
-       if (event->attr.exclude_guest)
-               cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
-
-       if (unlikely(event_is_checkpointed(event)))
-               cpuc->intel_cp_status |= (1ull << hwc->idx);
-
-       if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-               intel_pmu_enable_fixed(hwc);
-               return;
-       }
-
-       if (unlikely(event->attr.precise_ip))
-               intel_pmu_pebs_enable(event);
-
-       __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
-}
-
-/*
- * Save and restart an expired event. Called by NMI contexts,
- * so it has to be careful about preempting normal event ops:
- */
-int intel_pmu_save_and_restart(struct perf_event *event)
-{
-       x86_perf_event_update(event);
-       /*
-        * For a checkpointed counter always reset back to 0.  This
-        * avoids a situation where the counter overflows, aborts the
-        * transaction and is then set back to shortly before the
-        * overflow, and overflows and aborts again.
-        */
-       if (unlikely(event_is_checkpointed(event))) {
-               /* No race with NMIs because the counter should not be armed */
-               wrmsrl(event->hw.event_base, 0);
-               local64_set(&event->hw.prev_count, 0);
-       }
-       return x86_perf_event_set_period(event);
-}
-
-static void intel_pmu_reset(void)
-{
-       struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
-       unsigned long flags;
-       int idx;
-
-       if (!x86_pmu.num_counters)
-               return;
-
-       local_irq_save(flags);
-
-       pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
-               wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
-       }
-       for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
-               wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
-
-       if (ds)
-               ds->bts_index = ds->bts_buffer_base;
-
-       /* Ack all overflows and disable fixed counters */
-       if (x86_pmu.version >= 2) {
-               intel_pmu_ack_status(intel_pmu_get_status());
-               wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-       }
-
-       /* Reset LBRs and LBR freezing */
-       if (x86_pmu.lbr_nr) {
-               update_debugctlmsr(get_debugctlmsr() &
-                       ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
-       }
-
-       local_irq_restore(flags);
-}
-
-/*
- * This handler is triggered by the local APIC, so the APIC IRQ handling
- * rules apply:
- */
-static int intel_pmu_handle_irq(struct pt_regs *regs)
-{
-       struct perf_sample_data data;
-       struct cpu_hw_events *cpuc;
-       int bit, loops;
-       u64 status;
-       int handled;
-
-       cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       /*
-        * No known reason to not always do late ACK,
-        * but just in case do it opt-in.
-        */
-       if (!x86_pmu.late_ack)
-               apic_write(APIC_LVTPC, APIC_DM_NMI);
-       __intel_pmu_disable_all();
-       handled = intel_pmu_drain_bts_buffer();
-       handled += intel_bts_interrupt();
-       status = intel_pmu_get_status();
-       if (!status)
-               goto done;
-
-       loops = 0;
-again:
-       intel_pmu_lbr_read();
-       intel_pmu_ack_status(status);
-       if (++loops > 100) {
-               static bool warned = false;
-               if (!warned) {
-                       WARN(1, "perfevents: irq loop stuck!\n");
-                       perf_event_print_debug();
-                       warned = true;
-               }
-               intel_pmu_reset();
-               goto done;
-       }
-
-       inc_irq_stat(apic_perf_irqs);
-
-
-       /*
-        * Ignore a range of extra bits in status that do not indicate
-        * overflow by themselves.
-        */
-       status &= ~(GLOBAL_STATUS_COND_CHG |
-                   GLOBAL_STATUS_ASIF |
-                   GLOBAL_STATUS_LBRS_FROZEN);
-       if (!status)
-               goto done;
-
-       /*
-        * PEBS overflow sets bit 62 in the global status register
-        */
-       if (__test_and_clear_bit(62, (unsigned long *)&status)) {
-               handled++;
-               x86_pmu.drain_pebs(regs);
-       }
-
-       /*
-        * Intel PT
-        */
-       if (__test_and_clear_bit(55, (unsigned long *)&status)) {
-               handled++;
-               intel_pt_interrupt();
-       }
-
-       /*
-        * Checkpointed counters can lead to 'spurious' PMIs because the
-        * rollback caused by the PMI will have cleared the overflow status
-        * bit. Therefore always force probe these counters.
-        */
-       status |= cpuc->intel_cp_status;
-
-       for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
-               struct perf_event *event = cpuc->events[bit];
-
-               handled++;
-
-               if (!test_bit(bit, cpuc->active_mask))
-                       continue;
-
-               if (!intel_pmu_save_and_restart(event))
-                       continue;
-
-               perf_sample_data_init(&data, 0, event->hw.last_period);
-
-               if (has_branch_stack(event))
-                       data.br_stack = &cpuc->lbr_stack;
-
-               if (perf_event_overflow(event, &data, regs))
-                       x86_pmu_stop(event, 0);
-       }
-
-       /*
-        * Repeat if there is more work to be done:
-        */
-       status = intel_pmu_get_status();
-       if (status)
-               goto again;
-
-done:
-       __intel_pmu_enable_all(0, true);
-       /*
-        * Only unmask the NMI after the overflow counters
-        * have been reset. This avoids spurious NMIs on
-        * Haswell CPUs.
-        */
-       if (x86_pmu.late_ack)
-               apic_write(APIC_LVTPC, APIC_DM_NMI);
-       return handled;
-}
-
-static struct event_constraint *
-intel_bts_constraints(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       unsigned int hw_event, bts_event;
-
-       if (event->attr.freq)
-               return NULL;
-
-       hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
-       bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
-
-       if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
-               return &bts_constraint;
-
-       return NULL;
-}
-
-static int intel_alt_er(int idx, u64 config)
-{
-       int alt_idx = idx;
-
-       if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
-               return idx;
-
-       if (idx == EXTRA_REG_RSP_0)
-               alt_idx = EXTRA_REG_RSP_1;
-
-       if (idx == EXTRA_REG_RSP_1)
-               alt_idx = EXTRA_REG_RSP_0;
-
-       if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
-               return idx;
-
-       return alt_idx;
-}
-
-static void intel_fixup_er(struct perf_event *event, int idx)
-{
-       event->hw.extra_reg.idx = idx;
-
-       if (idx == EXTRA_REG_RSP_0) {
-               event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-               event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
-               event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
-       } else if (idx == EXTRA_REG_RSP_1) {
-               event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-               event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
-               event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
-       }
-}
-
-/*
- * manage allocation of shared extra msr for certain events
- *
- * sharing can be:
- * per-cpu: to be shared between the various events on a single PMU
- * per-core: per-cpu + shared by HT threads
- */
-static struct event_constraint *
-__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
-                                  struct perf_event *event,
-                                  struct hw_perf_event_extra *reg)
-{
-       struct event_constraint *c = &emptyconstraint;
-       struct er_account *era;
-       unsigned long flags;
-       int idx = reg->idx;
-
-       /*
-        * reg->alloc can be set due to existing state, so for fake cpuc we
-        * need to ignore this, otherwise we might fail to allocate proper fake
-        * state for this extra reg constraint. Also see the comment below.
-        */
-       if (reg->alloc && !cpuc->is_fake)
-               return NULL; /* call x86_get_event_constraint() */
-
-again:
-       era = &cpuc->shared_regs->regs[idx];
-       /*
-        * we use spin_lock_irqsave() to avoid lockdep issues when
-        * passing a fake cpuc
-        */
-       raw_spin_lock_irqsave(&era->lock, flags);
-
-       if (!atomic_read(&era->ref) || era->config == reg->config) {
-
-               /*
-                * If its a fake cpuc -- as per validate_{group,event}() we
-                * shouldn't touch event state and we can avoid doing so
-                * since both will only call get_event_constraints() once
-                * on each event, this avoids the need for reg->alloc.
-                *
-                * Not doing the ER fixup will only result in era->reg being
-                * wrong, but since we won't actually try and program hardware
-                * this isn't a problem either.
-                */
-               if (!cpuc->is_fake) {
-                       if (idx != reg->idx)
-                               intel_fixup_er(event, idx);
-
-                       /*
-                        * x86_schedule_events() can call get_event_constraints()
-                        * multiple times on events in the case of incremental
-                        * scheduling(). reg->alloc ensures we only do the ER
-                        * allocation once.
-                        */
-                       reg->alloc = 1;
-               }
-
-               /* lock in msr value */
-               era->config = reg->config;
-               era->reg = reg->reg;
-
-               /* one more user */
-               atomic_inc(&era->ref);
-
-               /*
-                * need to call x86_get_event_constraint()
-                * to check if associated event has constraints
-                */
-               c = NULL;
-       } else {
-               idx = intel_alt_er(idx, reg->config);
-               if (idx != reg->idx) {
-                       raw_spin_unlock_irqrestore(&era->lock, flags);
-                       goto again;
-               }
-       }
-       raw_spin_unlock_irqrestore(&era->lock, flags);
-
-       return c;
-}
-
-static void
-__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
-                                  struct hw_perf_event_extra *reg)
-{
-       struct er_account *era;
-
-       /*
-        * Only put constraint if extra reg was actually allocated. Also takes
-        * care of event which do not use an extra shared reg.
-        *
-        * Also, if this is a fake cpuc we shouldn't touch any event state
-        * (reg->alloc) and we don't care about leaving inconsistent cpuc state
-        * either since it'll be thrown out.
-        */
-       if (!reg->alloc || cpuc->is_fake)
-               return;
-
-       era = &cpuc->shared_regs->regs[reg->idx];
-
-       /* one fewer user */
-       atomic_dec(&era->ref);
-
-       /* allocate again next time */
-       reg->alloc = 0;
-}
-
-static struct event_constraint *
-intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
-                             struct perf_event *event)
-{
-       struct event_constraint *c = NULL, *d;
-       struct hw_perf_event_extra *xreg, *breg;
-
-       xreg = &event->hw.extra_reg;
-       if (xreg->idx != EXTRA_REG_NONE) {
-               c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
-               if (c == &emptyconstraint)
-                       return c;
-       }
-       breg = &event->hw.branch_reg;
-       if (breg->idx != EXTRA_REG_NONE) {
-               d = __intel_shared_reg_get_constraints(cpuc, event, breg);
-               if (d == &emptyconstraint) {
-                       __intel_shared_reg_put_constraints(cpuc, xreg);
-                       c = d;
-               }
-       }
-       return c;
-}
-
-struct event_constraint *
-x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
-                         struct perf_event *event)
-{
-       struct event_constraint *c;
-
-       if (x86_pmu.event_constraints) {
-               for_each_event_constraint(c, x86_pmu.event_constraints) {
-                       if ((event->hw.config & c->cmask) == c->code) {
-                               event->hw.flags |= c->flags;
-                               return c;
-                       }
-               }
-       }
-
-       return &unconstrained;
-}
-
-static struct event_constraint *
-__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
-                           struct perf_event *event)
-{
-       struct event_constraint *c;
-
-       c = intel_bts_constraints(event);
-       if (c)
-               return c;
-
-       c = intel_shared_regs_constraints(cpuc, event);
-       if (c)
-               return c;
-
-       c = intel_pebs_constraints(event);
-       if (c)
-               return c;
-
-       return x86_get_event_constraints(cpuc, idx, event);
-}
-
-static void
-intel_start_scheduling(struct cpu_hw_events *cpuc)
-{
-       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct intel_excl_states *xl;
-       int tid = cpuc->excl_thread_id;
-
-       /*
-        * nothing needed if in group validation mode
-        */
-       if (cpuc->is_fake || !is_ht_workaround_enabled())
-               return;
-
-       /*
-        * no exclusion needed
-        */
-       if (WARN_ON_ONCE(!excl_cntrs))
-               return;
-
-       xl = &excl_cntrs->states[tid];
-
-       xl->sched_started = true;
-       /*
-        * lock shared state until we are done scheduling
-        * in stop_event_scheduling()
-        * makes scheduling appear as a transaction
-        */
-       raw_spin_lock(&excl_cntrs->lock);
-}
-
-static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
-{
-       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct event_constraint *c = cpuc->event_constraint[idx];
-       struct intel_excl_states *xl;
-       int tid = cpuc->excl_thread_id;
-
-       if (cpuc->is_fake || !is_ht_workaround_enabled())
-               return;
-
-       if (WARN_ON_ONCE(!excl_cntrs))
-               return;
-
-       if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
-               return;
-
-       xl = &excl_cntrs->states[tid];
-
-       lockdep_assert_held(&excl_cntrs->lock);
-
-       if (c->flags & PERF_X86_EVENT_EXCL)
-               xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
-       else
-               xl->state[cntr] = INTEL_EXCL_SHARED;
-}
-
-static void
-intel_stop_scheduling(struct cpu_hw_events *cpuc)
-{
-       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct intel_excl_states *xl;
-       int tid = cpuc->excl_thread_id;
-
-       /*
-        * nothing needed if in group validation mode
-        */
-       if (cpuc->is_fake || !is_ht_workaround_enabled())
-               return;
-       /*
-        * no exclusion needed
-        */
-       if (WARN_ON_ONCE(!excl_cntrs))
-               return;
-
-       xl = &excl_cntrs->states[tid];
-
-       xl->sched_started = false;
-       /*
-        * release shared state lock (acquired in intel_start_scheduling())
-        */
-       raw_spin_unlock(&excl_cntrs->lock);
-}
-
-static struct event_constraint *
-intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
-                          int idx, struct event_constraint *c)
-{
-       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct intel_excl_states *xlo;
-       int tid = cpuc->excl_thread_id;
-       int is_excl, i;
-
-       /*
-        * validating a group does not require
-        * enforcing cross-thread  exclusion
-        */
-       if (cpuc->is_fake || !is_ht_workaround_enabled())
-               return c;
-
-       /*
-        * no exclusion needed
-        */
-       if (WARN_ON_ONCE(!excl_cntrs))
-               return c;
-
-       /*
-        * because we modify the constraint, we need
-        * to make a copy. Static constraints come
-        * from static const tables.
-        *
-        * only needed when constraint has not yet
-        * been cloned (marked dynamic)
-        */
-       if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
-               struct event_constraint *cx;
-
-               /*
-                * grab pre-allocated constraint entry
-                */
-               cx = &cpuc->constraint_list[idx];
-
-               /*
-                * initialize dynamic constraint
-                * with static constraint
-                */
-               *cx = *c;
-
-               /*
-                * mark constraint as dynamic, so we
-                * can free it later on
-                */
-               cx->flags |= PERF_X86_EVENT_DYNAMIC;
-               c = cx;
-       }
-
-       /*
-        * From here on, the constraint is dynamic.
-        * Either it was just allocated above, or it
-        * was allocated during a earlier invocation
-        * of this function
-        */
-
-       /*
-        * state of sibling HT
-        */
-       xlo = &excl_cntrs->states[tid ^ 1];
-
-       /*
-        * event requires exclusive counter access
-        * across HT threads
-        */
-       is_excl = c->flags & PERF_X86_EVENT_EXCL;
-       if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
-               event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
-               if (!cpuc->n_excl++)
-                       WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
-       }
-
-       /*
-        * Modify static constraint with current dynamic
-        * state of thread
-        *
-        * EXCLUSIVE: sibling counter measuring exclusive event
-        * SHARED   : sibling counter measuring non-exclusive event
-        * UNUSED   : sibling counter unused
-        */
-       for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
-               /*
-                * exclusive event in sibling counter
-                * our corresponding counter cannot be used
-                * regardless of our event
-                */
-               if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
-                       __clear_bit(i, c->idxmsk);
-               /*
-                * if measuring an exclusive event, sibling
-                * measuring non-exclusive, then counter cannot
-                * be used
-                */
-               if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
-                       __clear_bit(i, c->idxmsk);
-       }
-
-       /*
-        * recompute actual bit weight for scheduling algorithm
-        */
-       c->weight = hweight64(c->idxmsk64);
-
-       /*
-        * if we return an empty mask, then switch
-        * back to static empty constraint to avoid
-        * the cost of freeing later on
-        */
-       if (c->weight == 0)
-               c = &emptyconstraint;
-
-       return c;
-}
-
-static struct event_constraint *
-intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
-                           struct perf_event *event)
-{
-       struct event_constraint *c1 = NULL;
-       struct event_constraint *c2;
-
-       if (idx >= 0) /* fake does < 0 */
-               c1 = cpuc->event_constraint[idx];
-
-       /*
-        * first time only
-        * - static constraint: no change across incremental scheduling calls
-        * - dynamic constraint: handled by intel_get_excl_constraints()
-        */
-       c2 = __intel_get_event_constraints(cpuc, idx, event);
-       if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
-               bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
-               c1->weight = c2->weight;
-               c2 = c1;
-       }
-
-       if (cpuc->excl_cntrs)
-               return intel_get_excl_constraints(cpuc, event, idx, c2);
-
-       return c2;
-}
-
-static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
-               struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       int tid = cpuc->excl_thread_id;
-       struct intel_excl_states *xl;
-
-       /*
-        * nothing needed if in group validation mode
-        */
-       if (cpuc->is_fake)
-               return;
-
-       if (WARN_ON_ONCE(!excl_cntrs))
-               return;
-
-       if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
-               hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
-               if (!--cpuc->n_excl)
-                       WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
-       }
-
-       /*
-        * If event was actually assigned, then mark the counter state as
-        * unused now.
-        */
-       if (hwc->idx >= 0) {
-               xl = &excl_cntrs->states[tid];
-
-               /*
-                * put_constraint may be called from x86_schedule_events()
-                * which already has the lock held so here make locking
-                * conditional.
-                */
-               if (!xl->sched_started)
-                       raw_spin_lock(&excl_cntrs->lock);
-
-               xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
-
-               if (!xl->sched_started)
-                       raw_spin_unlock(&excl_cntrs->lock);
-       }
-}
-
-static void
-intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
-                                       struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg;
-
-       reg = &event->hw.extra_reg;
-       if (reg->idx != EXTRA_REG_NONE)
-               __intel_shared_reg_put_constraints(cpuc, reg);
-
-       reg = &event->hw.branch_reg;
-       if (reg->idx != EXTRA_REG_NONE)
-               __intel_shared_reg_put_constraints(cpuc, reg);
-}
-
-static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
-                                       struct perf_event *event)
-{
-       intel_put_shared_regs_event_constraints(cpuc, event);
-
-       /*
-        * is PMU has exclusive counter restrictions, then
-        * all events are subject to and must call the
-        * put_excl_constraints() routine
-        */
-       if (cpuc->excl_cntrs)
-               intel_put_excl_constraints(cpuc, event);
-}
-
-static void intel_pebs_aliases_core2(struct perf_event *event)
-{
-       if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
-               /*
-                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
-                * (0x003c) so that we can use it with PEBS.
-                *
-                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
-                * PEBS capable. However we can use INST_RETIRED.ANY_P
-                * (0x00c0), which is a PEBS capable event, to get the same
-                * count.
-                *
-                * INST_RETIRED.ANY_P counts the number of cycles that retires
-                * CNTMASK instructions. By setting CNTMASK to a value (16)
-                * larger than the maximum number of instructions that can be
-                * retired per cycle (4) and then inverting the condition, we
-                * count all cycles that retire 16 or less instructions, which
-                * is every cycle.
-                *
-                * Thereby we gain a PEBS capable cycle counter.
-                */
-               u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
-
-               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
-               event->hw.config = alt_config;
-       }
-}
-
-static void intel_pebs_aliases_snb(struct perf_event *event)
-{
-       if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
-               /*
-                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
-                * (0x003c) so that we can use it with PEBS.
-                *
-                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
-                * PEBS capable. However we can use UOPS_RETIRED.ALL
-                * (0x01c2), which is a PEBS capable event, to get the same
-                * count.
-                *
-                * UOPS_RETIRED.ALL counts the number of cycles that retires
-                * CNTMASK micro-ops. By setting CNTMASK to a value (16)
-                * larger than the maximum number of micro-ops that can be
-                * retired per cycle (4) and then inverting the condition, we
-                * count all cycles that retire 16 or less micro-ops, which
-                * is every cycle.
-                *
-                * Thereby we gain a PEBS capable cycle counter.
-                */
-               u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
-
-               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
-               event->hw.config = alt_config;
-       }
-}
-
-static void intel_pebs_aliases_precdist(struct perf_event *event)
-{
-       if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
-               /*
-                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
-                * (0x003c) so that we can use it with PEBS.
-                *
-                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
-                * PEBS capable. However we can use INST_RETIRED.PREC_DIST
-                * (0x01c0), which is a PEBS capable event, to get the same
-                * count.
-                *
-                * The PREC_DIST event has special support to minimize sample
-                * shadowing effects. One drawback is that it can be
-                * only programmed on counter 1, but that seems like an
-                * acceptable trade off.
-                */
-               u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16);
-
-               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
-               event->hw.config = alt_config;
-       }
-}
-
-static void intel_pebs_aliases_ivb(struct perf_event *event)
-{
-       if (event->attr.precise_ip < 3)
-               return intel_pebs_aliases_snb(event);
-       return intel_pebs_aliases_precdist(event);
-}
-
-static void intel_pebs_aliases_skl(struct perf_event *event)
-{
-       if (event->attr.precise_ip < 3)
-               return intel_pebs_aliases_core2(event);
-       return intel_pebs_aliases_precdist(event);
-}
-
-static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
-{
-       unsigned long flags = x86_pmu.free_running_flags;
-
-       if (event->attr.use_clockid)
-               flags &= ~PERF_SAMPLE_TIME;
-       return flags;
-}
-
-static int intel_pmu_hw_config(struct perf_event *event)
-{
-       int ret = x86_pmu_hw_config(event);
-
-       if (ret)
-               return ret;
-
-       if (event->attr.precise_ip) {
-               if (!event->attr.freq) {
-                       event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
-                       if (!(event->attr.sample_type &
-                             ~intel_pmu_free_running_flags(event)))
-                               event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
-               }
-               if (x86_pmu.pebs_aliases)
-                       x86_pmu.pebs_aliases(event);
-       }
-
-       if (needs_branch_stack(event)) {
-               ret = intel_pmu_setup_lbr_filter(event);
-               if (ret)
-                       return ret;
-
-               /*
-                * BTS is set up earlier in this path, so don't account twice
-                */
-               if (!intel_pmu_has_bts(event)) {
-                       /* disallow lbr if conflicting events are present */
-                       if (x86_add_exclusive(x86_lbr_exclusive_lbr))
-                               return -EBUSY;
-
-                       event->destroy = hw_perf_lbr_event_destroy;
-               }
-       }
-
-       if (event->attr.type != PERF_TYPE_RAW)
-               return 0;
-
-       if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
-               return 0;
-
-       if (x86_pmu.version < 3)
-               return -EINVAL;
-
-       if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-               return -EACCES;
-
-       event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
-
-       return 0;
-}
-
-struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
-{
-       if (x86_pmu.guest_get_msrs)
-               return x86_pmu.guest_get_msrs(nr);
-       *nr = 0;
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
-
-static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
-
-       arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
-       arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
-       arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
-       /*
-        * If PMU counter has PEBS enabled it is not enough to disable counter
-        * on a guest entry since PEBS memory write can overshoot guest entry
-        * and corrupt guest memory. Disabling PEBS solves the problem.
-        */
-       arr[1].msr = MSR_IA32_PEBS_ENABLE;
-       arr[1].host = cpuc->pebs_enabled;
-       arr[1].guest = 0;
-
-       *nr = 2;
-       return arr;
-}
-
-static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
-       int idx;
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++)  {
-               struct perf_event *event = cpuc->events[idx];
-
-               arr[idx].msr = x86_pmu_config_addr(idx);
-               arr[idx].host = arr[idx].guest = 0;
-
-               if (!test_bit(idx, cpuc->active_mask))
-                       continue;
-
-               arr[idx].host = arr[idx].guest =
-                       event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
-
-               if (event->attr.exclude_host)
-                       arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-               else if (event->attr.exclude_guest)
-                       arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-       }
-
-       *nr = x86_pmu.num_counters;
-       return arr;
-}
-
-static void core_pmu_enable_event(struct perf_event *event)
-{
-       if (!event->attr.exclude_host)
-               x86_pmu_enable_event(event);
-}
-
-static void core_pmu_enable_all(int added)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int idx;
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
-
-               if (!test_bit(idx, cpuc->active_mask) ||
-                               cpuc->events[idx]->attr.exclude_host)
-                       continue;
-
-               __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
-       }
-}
-
-static int hsw_hw_config(struct perf_event *event)
-{
-       int ret = intel_pmu_hw_config(event);
-
-       if (ret)
-               return ret;
-       if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
-               return 0;
-       event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
-
-       /*
-        * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
-        * PEBS or in ANY thread mode. Since the results are non-sensical forbid
-        * this combination.
-        */
-       if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
-            ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
-             event->attr.precise_ip > 0))
-               return -EOPNOTSUPP;
-
-       if (event_is_checkpointed(event)) {
-               /*
-                * Sampling of checkpointed events can cause situations where
-                * the CPU constantly aborts because of a overflow, which is
-                * then checkpointed back and ignored. Forbid checkpointing
-                * for sampling.
-                *
-                * But still allow a long sampling period, so that perf stat
-                * from KVM works.
-                */
-               if (event->attr.sample_period > 0 &&
-                   event->attr.sample_period < 0x7fffffff)
-                       return -EOPNOTSUPP;
-       }
-       return 0;
-}
-
-static struct event_constraint counter2_constraint =
-                       EVENT_CONSTRAINT(0, 0x4, 0);
-
-static struct event_constraint *
-hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
-                         struct perf_event *event)
-{
-       struct event_constraint *c;
-
-       c = intel_get_event_constraints(cpuc, idx, event);
-
-       /* Handle special quirk on in_tx_checkpointed only in counter 2 */
-       if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
-               if (c->idxmsk64 & (1U << 2))
-                       return &counter2_constraint;
-               return &emptyconstraint;
-       }
-
-       return c;
-}
-
-/*
- * Broadwell:
- *
- * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
- * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
- * the two to enforce a minimum period of 128 (the smallest value that has bits
- * 0-5 cleared and >= 100).
- *
- * Because of how the code in x86_perf_event_set_period() works, the truncation
- * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
- * to make up for the 'lost' events due to carrying the 'error' in period_left.
- *
- * Therefore the effective (average) period matches the requested period,
- * despite coarser hardware granularity.
- */
-static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
-{
-       if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
-                       X86_CONFIG(.event=0xc0, .umask=0x01)) {
-               if (left < 128)
-                       left = 128;
-               left &= ~0x3fu;
-       }
-       return left;
-}
-
-PMU_FORMAT_ATTR(event, "config:0-7"    );
-PMU_FORMAT_ATTR(umask, "config:8-15"   );
-PMU_FORMAT_ATTR(edge,  "config:18"     );
-PMU_FORMAT_ATTR(pc,    "config:19"     );
-PMU_FORMAT_ATTR(any,   "config:21"     ); /* v3 + */
-PMU_FORMAT_ATTR(inv,   "config:23"     );
-PMU_FORMAT_ATTR(cmask, "config:24-31"  );
-PMU_FORMAT_ATTR(in_tx,  "config:32");
-PMU_FORMAT_ATTR(in_tx_cp, "config:33");
-
-static struct attribute *intel_arch_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_pc.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask.attr,
-       NULL,
-};
-
-ssize_t intel_event_sysfs_show(char *page, u64 config)
-{
-       u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
-
-       return x86_event_sysfs_show(page, config, event);
-}
-
-struct intel_shared_regs *allocate_shared_regs(int cpu)
-{
-       struct intel_shared_regs *regs;
-       int i;
-
-       regs = kzalloc_node(sizeof(struct intel_shared_regs),
-                           GFP_KERNEL, cpu_to_node(cpu));
-       if (regs) {
-               /*
-                * initialize the locks to keep lockdep happy
-                */
-               for (i = 0; i < EXTRA_REG_MAX; i++)
-                       raw_spin_lock_init(&regs->regs[i].lock);
-
-               regs->core_id = -1;
-       }
-       return regs;
-}
-
-static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
-{
-       struct intel_excl_cntrs *c;
-
-       c = kzalloc_node(sizeof(struct intel_excl_cntrs),
-                        GFP_KERNEL, cpu_to_node(cpu));
-       if (c) {
-               raw_spin_lock_init(&c->lock);
-               c->core_id = -1;
-       }
-       return c;
-}
-
-static int intel_pmu_cpu_prepare(int cpu)
-{
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-
-       if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
-               cpuc->shared_regs = allocate_shared_regs(cpu);
-               if (!cpuc->shared_regs)
-                       goto err;
-       }
-
-       if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
-               size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
-
-               cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
-               if (!cpuc->constraint_list)
-                       goto err_shared_regs;
-
-               cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
-               if (!cpuc->excl_cntrs)
-                       goto err_constraint_list;
-
-               cpuc->excl_thread_id = 0;
-       }
-
-       return NOTIFY_OK;
-
-err_constraint_list:
-       kfree(cpuc->constraint_list);
-       cpuc->constraint_list = NULL;
-
-err_shared_regs:
-       kfree(cpuc->shared_regs);
-       cpuc->shared_regs = NULL;
-
-err:
-       return NOTIFY_BAD;
-}
-
-static void intel_pmu_cpu_starting(int cpu)
-{
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       int core_id = topology_core_id(cpu);
-       int i;
-
-       init_debug_store_on_cpu(cpu);
-       /*
-        * Deal with CPUs that don't clear their LBRs on power-up.
-        */
-       intel_pmu_lbr_reset();
-
-       cpuc->lbr_sel = NULL;
-
-       if (!cpuc->shared_regs)
-               return;
-
-       if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
-               for_each_cpu(i, topology_sibling_cpumask(cpu)) {
-                       struct intel_shared_regs *pc;
-
-                       pc = per_cpu(cpu_hw_events, i).shared_regs;
-                       if (pc && pc->core_id == core_id) {
-                               cpuc->kfree_on_online[0] = cpuc->shared_regs;
-                               cpuc->shared_regs = pc;
-                               break;
-                       }
-               }
-               cpuc->shared_regs->core_id = core_id;
-               cpuc->shared_regs->refcnt++;
-       }
-
-       if (x86_pmu.lbr_sel_map)
-               cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
-
-       if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
-               for_each_cpu(i, topology_sibling_cpumask(cpu)) {
-                       struct intel_excl_cntrs *c;
-
-                       c = per_cpu(cpu_hw_events, i).excl_cntrs;
-                       if (c && c->core_id == core_id) {
-                               cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
-                               cpuc->excl_cntrs = c;
-                               cpuc->excl_thread_id = 1;
-                               break;
-                       }
-               }
-               cpuc->excl_cntrs->core_id = core_id;
-               cpuc->excl_cntrs->refcnt++;
-       }
-}
-
-static void free_excl_cntrs(int cpu)
-{
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       struct intel_excl_cntrs *c;
-
-       c = cpuc->excl_cntrs;
-       if (c) {
-               if (c->core_id == -1 || --c->refcnt == 0)
-                       kfree(c);
-               cpuc->excl_cntrs = NULL;
-               kfree(cpuc->constraint_list);
-               cpuc->constraint_list = NULL;
-       }
-}
-
-static void intel_pmu_cpu_dying(int cpu)
-{
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       struct intel_shared_regs *pc;
-
-       pc = cpuc->shared_regs;
-       if (pc) {
-               if (pc->core_id == -1 || --pc->refcnt == 0)
-                       kfree(pc);
-               cpuc->shared_regs = NULL;
-       }
-
-       free_excl_cntrs(cpu);
-
-       fini_debug_store_on_cpu(cpu);
-}
-
-static void intel_pmu_sched_task(struct perf_event_context *ctx,
-                                bool sched_in)
-{
-       if (x86_pmu.pebs_active)
-               intel_pmu_pebs_sched_task(ctx, sched_in);
-       if (x86_pmu.lbr_nr)
-               intel_pmu_lbr_sched_task(ctx, sched_in);
-}
-
-PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
-
-PMU_FORMAT_ATTR(ldlat, "config1:0-15");
-
-PMU_FORMAT_ATTR(frontend, "config1:0-23");
-
-static struct attribute *intel_arch3_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_pc.attr,
-       &format_attr_any.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask.attr,
-       &format_attr_in_tx.attr,
-       &format_attr_in_tx_cp.attr,
-
-       &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
-       &format_attr_ldlat.attr, /* PEBS load latency */
-       NULL,
-};
-
-static struct attribute *skl_format_attr[] = {
-       &format_attr_frontend.attr,
-       NULL,
-};
-
-static __initconst const struct x86_pmu core_pmu = {
-       .name                   = "core",
-       .handle_irq             = x86_pmu_handle_irq,
-       .disable_all            = x86_pmu_disable_all,
-       .enable_all             = core_pmu_enable_all,
-       .enable                 = core_pmu_enable_event,
-       .disable                = x86_pmu_disable_event,
-       .hw_config              = x86_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
-       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
-       .event_map              = intel_pmu_event_map,
-       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
-       .apic                   = 1,
-       .free_running_flags     = PEBS_FREERUNNING_FLAGS,
-
-       /*
-        * Intel PMCs cannot be accessed sanely above 32-bit width,
-        * so we install an artificial 1<<31 period regardless of
-        * the generic event period:
-        */
-       .max_period             = (1ULL<<31) - 1,
-       .get_event_constraints  = intel_get_event_constraints,
-       .put_event_constraints  = intel_put_event_constraints,
-       .event_constraints      = intel_core_event_constraints,
-       .guest_get_msrs         = core_guest_get_msrs,
-       .format_attrs           = intel_arch_formats_attr,
-       .events_sysfs_show      = intel_event_sysfs_show,
-
-       /*
-        * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
-        * together with PMU version 1 and thus be using core_pmu with
-        * shared_regs. We need following callbacks here to allocate
-        * it properly.
-        */
-       .cpu_prepare            = intel_pmu_cpu_prepare,
-       .cpu_starting           = intel_pmu_cpu_starting,
-       .cpu_dying              = intel_pmu_cpu_dying,
-};
-
-static __initconst const struct x86_pmu intel_pmu = {
-       .name                   = "Intel",
-       .handle_irq             = intel_pmu_handle_irq,
-       .disable_all            = intel_pmu_disable_all,
-       .enable_all             = intel_pmu_enable_all,
-       .enable                 = intel_pmu_enable_event,
-       .disable                = intel_pmu_disable_event,
-       .hw_config              = intel_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
-       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
-       .event_map              = intel_pmu_event_map,
-       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
-       .apic                   = 1,
-       .free_running_flags     = PEBS_FREERUNNING_FLAGS,
-       /*
-        * Intel PMCs cannot be accessed sanely above 32 bit width,
-        * so we install an artificial 1<<31 period regardless of
-        * the generic event period:
-        */
-       .max_period             = (1ULL << 31) - 1,
-       .get_event_constraints  = intel_get_event_constraints,
-       .put_event_constraints  = intel_put_event_constraints,
-       .pebs_aliases           = intel_pebs_aliases_core2,
-
-       .format_attrs           = intel_arch3_formats_attr,
-       .events_sysfs_show      = intel_event_sysfs_show,
-
-       .cpu_prepare            = intel_pmu_cpu_prepare,
-       .cpu_starting           = intel_pmu_cpu_starting,
-       .cpu_dying              = intel_pmu_cpu_dying,
-       .guest_get_msrs         = intel_guest_get_msrs,
-       .sched_task             = intel_pmu_sched_task,
-};
-
-static __init void intel_clovertown_quirk(void)
-{
-       /*
-        * PEBS is unreliable due to:
-        *
-        *   AJ67  - PEBS may experience CPL leaks
-        *   AJ68  - PEBS PMI may be delayed by one event
-        *   AJ69  - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
-        *   AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
-        *
-        * AJ67 could be worked around by restricting the OS/USR flags.
-        * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
-        *
-        * AJ106 could possibly be worked around by not allowing LBR
-        *       usage from PEBS, including the fixup.
-        * AJ68  could possibly be worked around by always programming
-        *       a pebs_event_reset[0] value and coping with the lost events.
-        *
-        * But taken together it might just make sense to not enable PEBS on
-        * these chips.
-        */
-       pr_warn("PEBS disabled due to CPU errata\n");
-       x86_pmu.pebs = 0;
-       x86_pmu.pebs_constraints = NULL;
-}
-
-static int intel_snb_pebs_broken(int cpu)
-{
-       u32 rev = UINT_MAX; /* default to broken for unknown models */
-
-       switch (cpu_data(cpu).x86_model) {
-       case 42: /* SNB */
-               rev = 0x28;
-               break;
-
-       case 45: /* SNB-EP */
-               switch (cpu_data(cpu).x86_mask) {
-               case 6: rev = 0x618; break;
-               case 7: rev = 0x70c; break;
-               }
-       }
-
-       return (cpu_data(cpu).microcode < rev);
-}
-
-static void intel_snb_check_microcode(void)
-{
-       int pebs_broken = 0;
-       int cpu;
-
-       get_online_cpus();
-       for_each_online_cpu(cpu) {
-               if ((pebs_broken = intel_snb_pebs_broken(cpu)))
-                       break;
-       }
-       put_online_cpus();
-
-       if (pebs_broken == x86_pmu.pebs_broken)
-               return;
-
-       /*
-        * Serialized by the microcode lock..
-        */
-       if (x86_pmu.pebs_broken) {
-               pr_info("PEBS enabled due to microcode update\n");
-               x86_pmu.pebs_broken = 0;
-       } else {
-               pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
-               x86_pmu.pebs_broken = 1;
-       }
-}
-
-/*
- * Under certain circumstances, access certain MSR may cause #GP.
- * The function tests if the input MSR can be safely accessed.
- */
-static bool check_msr(unsigned long msr, u64 mask)
-{
-       u64 val_old, val_new, val_tmp;
-
-       /*
-        * Read the current value, change it and read it back to see if it
-        * matches, this is needed to detect certain hardware emulators
-        * (qemu/kvm) that don't trap on the MSR access and always return 0s.
-        */
-       if (rdmsrl_safe(msr, &val_old))
-               return false;
-
-       /*
-        * Only change the bits which can be updated by wrmsrl.
-        */
-       val_tmp = val_old ^ mask;
-       if (wrmsrl_safe(msr, val_tmp) ||
-           rdmsrl_safe(msr, &val_new))
-               return false;
-
-       if (val_new != val_tmp)
-               return false;
-
-       /* Here it's sure that the MSR can be safely accessed.
-        * Restore the old value and return.
-        */
-       wrmsrl(msr, val_old);
-
-       return true;
-}
-
-static __init void intel_sandybridge_quirk(void)
-{
-       x86_pmu.check_microcode = intel_snb_check_microcode;
-       intel_snb_check_microcode();
-}
-
-static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
-       { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
-       { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
-       { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
-       { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
-       { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
-       { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
-       { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
-};
-
-static __init void intel_arch_events_quirk(void)
-{
-       int bit;
-
-       /* disable event that reported as not presend by cpuid */
-       for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
-               intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
-               pr_warn("CPUID marked event: \'%s\' unavailable\n",
-                       intel_arch_events_map[bit].name);
-       }
-}
-
-static __init void intel_nehalem_quirk(void)
-{
-       union cpuid10_ebx ebx;
-
-       ebx.full = x86_pmu.events_maskl;
-       if (ebx.split.no_branch_misses_retired) {
-               /*
-                * Erratum AAJ80 detected, we work it around by using
-                * the BR_MISP_EXEC.ANY event. This will over-count
-                * branch-misses, but it's still much better than the
-                * architectural event which is often completely bogus:
-                */
-               intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
-               ebx.split.no_branch_misses_retired = 0;
-               x86_pmu.events_maskl = ebx.full;
-               pr_info("CPU erratum AAJ80 worked around\n");
-       }
-}
-
-/*
- * enable software workaround for errata:
- * SNB: BJ122
- * IVB: BV98
- * HSW: HSD29
- *
- * Only needed when HT is enabled. However detecting
- * if HT is enabled is difficult (model specific). So instead,
- * we enable the workaround in the early boot, and verify if
- * it is needed in a later initcall phase once we have valid
- * topology information to check if HT is actually enabled
- */
-static __init void intel_ht_bug(void)
-{
-       x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
-
-       x86_pmu.start_scheduling = intel_start_scheduling;
-       x86_pmu.commit_scheduling = intel_commit_scheduling;
-       x86_pmu.stop_scheduling = intel_stop_scheduling;
-}
-
-EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
-
-/* Haswell special events */
-EVENT_ATTR_STR(tx-start,       tx_start,       "event=0xc9,umask=0x1");
-EVENT_ATTR_STR(tx-commit,      tx_commit,      "event=0xc9,umask=0x2");
-EVENT_ATTR_STR(tx-abort,       tx_abort,       "event=0xc9,umask=0x4");
-EVENT_ATTR_STR(tx-capacity,    tx_capacity,    "event=0x54,umask=0x2");
-EVENT_ATTR_STR(tx-conflict,    tx_conflict,    "event=0x54,umask=0x1");
-EVENT_ATTR_STR(el-start,       el_start,       "event=0xc8,umask=0x1");
-EVENT_ATTR_STR(el-commit,      el_commit,      "event=0xc8,umask=0x2");
-EVENT_ATTR_STR(el-abort,       el_abort,       "event=0xc8,umask=0x4");
-EVENT_ATTR_STR(el-capacity,    el_capacity,    "event=0x54,umask=0x2");
-EVENT_ATTR_STR(el-conflict,    el_conflict,    "event=0x54,umask=0x1");
-EVENT_ATTR_STR(cycles-t,       cycles_t,       "event=0x3c,in_tx=1");
-EVENT_ATTR_STR(cycles-ct,      cycles_ct,      "event=0x3c,in_tx=1,in_tx_cp=1");
-
-static struct attribute *hsw_events_attrs[] = {
-       EVENT_PTR(tx_start),
-       EVENT_PTR(tx_commit),
-       EVENT_PTR(tx_abort),
-       EVENT_PTR(tx_capacity),
-       EVENT_PTR(tx_conflict),
-       EVENT_PTR(el_start),
-       EVENT_PTR(el_commit),
-       EVENT_PTR(el_abort),
-       EVENT_PTR(el_capacity),
-       EVENT_PTR(el_conflict),
-       EVENT_PTR(cycles_t),
-       EVENT_PTR(cycles_ct),
-       EVENT_PTR(mem_ld_hsw),
-       EVENT_PTR(mem_st_hsw),
-       NULL
-};
-
-__init int intel_pmu_init(void)
-{
-       union cpuid10_edx edx;
-       union cpuid10_eax eax;
-       union cpuid10_ebx ebx;
-       struct event_constraint *c;
-       unsigned int unused;
-       struct extra_reg *er;
-       int version, i;
-
-       if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-               switch (boot_cpu_data.x86) {
-               case 0x6:
-                       return p6_pmu_init();
-               case 0xb:
-                       return knc_pmu_init();
-               case 0xf:
-                       return p4_pmu_init();
-               }
-               return -ENODEV;
-       }
-
-       /*
-        * Check whether the Architectural PerfMon supports
-        * Branch Misses Retired hw_event or not.
-        */
-       cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
-       if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
-               return -ENODEV;
-
-       version = eax.split.version_id;
-       if (version < 2)
-               x86_pmu = core_pmu;
-       else
-               x86_pmu = intel_pmu;
-
-       x86_pmu.version                 = version;
-       x86_pmu.num_counters            = eax.split.num_counters;
-       x86_pmu.cntval_bits             = eax.split.bit_width;
-       x86_pmu.cntval_mask             = (1ULL << eax.split.bit_width) - 1;
-
-       x86_pmu.events_maskl            = ebx.full;
-       x86_pmu.events_mask_len         = eax.split.mask_length;
-
-       x86_pmu.max_pebs_events         = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
-
-       /*
-        * Quirk: v2 perfmon does not report fixed-purpose events, so
-        * assume at least 3 events:
-        */
-       if (version > 1)
-               x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
-
-       if (boot_cpu_has(X86_FEATURE_PDCM)) {
-               u64 capabilities;
-
-               rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
-               x86_pmu.intel_cap.capabilities = capabilities;
-       }
-
-       intel_ds_init();
-
-       x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
-
-       /*
-        * Install the hw-cache-events table:
-        */
-       switch (boot_cpu_data.x86_model) {
-       case 14: /* 65nm Core "Yonah" */
-               pr_cont("Core events, ");
-               break;
-
-       case 15: /* 65nm Core2 "Merom"          */
-               x86_add_quirk(intel_clovertown_quirk);
-       case 22: /* 65nm Core2 "Merom-L"        */
-       case 23: /* 45nm Core2 "Penryn"         */
-       case 29: /* 45nm Core2 "Dunnington (MP) */
-               memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
-                      sizeof(hw_cache_event_ids));
-
-               intel_pmu_lbr_init_core();
-
-               x86_pmu.event_constraints = intel_core2_event_constraints;
-               x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
-               pr_cont("Core2 events, ");
-               break;
-
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-               memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
-                      sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
-                      sizeof(hw_cache_extra_regs));
-
-               intel_pmu_lbr_init_nhm();
-
-               x86_pmu.event_constraints = intel_nehalem_event_constraints;
-               x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
-               x86_pmu.enable_all = intel_pmu_nhm_enable_all;
-               x86_pmu.extra_regs = intel_nehalem_extra_regs;
-
-               x86_pmu.cpu_events = nhm_events_attrs;
-
-               /* UOPS_ISSUED.STALLED_CYCLES */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
-                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
-               /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
-                       X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
-
-               x86_add_quirk(intel_nehalem_quirk);
-
-               pr_cont("Nehalem events, ");
-               break;
-
-       case 28: /* 45nm Atom "Pineview"   */
-       case 38: /* 45nm Atom "Lincroft"   */
-       case 39: /* 32nm Atom "Penwell"    */
-       case 53: /* 32nm Atom "Cloverview" */
-       case 54: /* 32nm Atom "Cedarview"  */
-               memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
-                      sizeof(hw_cache_event_ids));
-
-               intel_pmu_lbr_init_atom();
-
-               x86_pmu.event_constraints = intel_gen_event_constraints;
-               x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
-               x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
-               pr_cont("Atom events, ");
-               break;
-
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 76: /* 14nm Atom "Airmont"                   */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-               memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
-                       sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
-                      sizeof(hw_cache_extra_regs));
-
-               intel_pmu_lbr_init_atom();
-
-               x86_pmu.event_constraints = intel_slm_event_constraints;
-               x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
-               x86_pmu.extra_regs = intel_slm_extra_regs;
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               pr_cont("Silvermont events, ");
-               break;
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-               memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
-                      sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
-                      sizeof(hw_cache_extra_regs));
-
-               intel_pmu_lbr_init_nhm();
-
-               x86_pmu.event_constraints = intel_westmere_event_constraints;
-               x86_pmu.enable_all = intel_pmu_nhm_enable_all;
-               x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
-               x86_pmu.extra_regs = intel_westmere_extra_regs;
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-
-               x86_pmu.cpu_events = nhm_events_attrs;
-
-               /* UOPS_ISSUED.STALLED_CYCLES */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
-                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
-               /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
-                       X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
-
-               pr_cont("Westmere events, ");
-               break;
-
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-               x86_add_quirk(intel_sandybridge_quirk);
-               x86_add_quirk(intel_ht_bug);
-               memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
-                      sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
-                      sizeof(hw_cache_extra_regs));
-
-               intel_pmu_lbr_init_snb();
-
-               x86_pmu.event_constraints = intel_snb_event_constraints;
-               x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
-               x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
-               if (boot_cpu_data.x86_model == 45)
-                       x86_pmu.extra_regs = intel_snbep_extra_regs;
-               else
-                       x86_pmu.extra_regs = intel_snb_extra_regs;
-
-
-               /* all extra regs are per-cpu when HT is on */
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
-               x86_pmu.cpu_events = snb_events_attrs;
-
-               /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
-                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
-               /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
-                       X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
-
-               pr_cont("SandyBridge events, ");
-               break;
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-               x86_add_quirk(intel_ht_bug);
-               memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
-                      sizeof(hw_cache_event_ids));
-               /* dTLB-load-misses on IVB is different than SNB */
-               hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
-
-               memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
-                      sizeof(hw_cache_extra_regs));
-
-               intel_pmu_lbr_init_snb();
-
-               x86_pmu.event_constraints = intel_ivb_event_constraints;
-               x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
-               x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
-               x86_pmu.pebs_prec_dist = true;
-               if (boot_cpu_data.x86_model == 62)
-                       x86_pmu.extra_regs = intel_snbep_extra_regs;
-               else
-                       x86_pmu.extra_regs = intel_snb_extra_regs;
-               /* all extra regs are per-cpu when HT is on */
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
-               x86_pmu.cpu_events = snb_events_attrs;
-
-               /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
-                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
-
-               pr_cont("IvyBridge events, ");
-               break;
-
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-               x86_add_quirk(intel_ht_bug);
-               x86_pmu.late_ack = true;
-               memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-
-               intel_pmu_lbr_init_hsw();
-
-               x86_pmu.event_constraints = intel_hsw_event_constraints;
-               x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
-               x86_pmu.extra_regs = intel_snbep_extra_regs;
-               x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
-               x86_pmu.pebs_prec_dist = true;
-               /* all extra regs are per-cpu when HT is on */
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
-               x86_pmu.hw_config = hsw_hw_config;
-               x86_pmu.get_event_constraints = hsw_get_event_constraints;
-               x86_pmu.cpu_events = hsw_events_attrs;
-               x86_pmu.lbr_double_abort = true;
-               pr_cont("Haswell events, ");
-               break;
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-               x86_pmu.late_ack = true;
-               memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-
-               /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */
-               hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ |
-                                                                        BDW_L3_MISS|HSW_SNOOP_DRAM;
-               hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS|
-                                                                         HSW_SNOOP_DRAM;
-               hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ|
-                                                                            BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
-               hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
-                                                                             BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
-
-               intel_pmu_lbr_init_hsw();
-
-               x86_pmu.event_constraints = intel_bdw_event_constraints;
-               x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
-               x86_pmu.extra_regs = intel_snbep_extra_regs;
-               x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
-               x86_pmu.pebs_prec_dist = true;
-               /* all extra regs are per-cpu when HT is on */
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
-               x86_pmu.hw_config = hsw_hw_config;
-               x86_pmu.get_event_constraints = hsw_get_event_constraints;
-               x86_pmu.cpu_events = hsw_events_attrs;
-               x86_pmu.limit_period = bdw_limit_period;
-               pr_cont("Broadwell events, ");
-               break;
-
-       case 87: /* Knights Landing Xeon Phi */
-               memcpy(hw_cache_event_ids,
-                      slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs,
-                      knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-               intel_pmu_lbr_init_knl();
-
-               x86_pmu.event_constraints = intel_slm_event_constraints;
-               x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
-               x86_pmu.extra_regs = intel_knl_extra_regs;
-
-               /* all extra regs are per-cpu when HT is on */
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
-               pr_cont("Knights Landing events, ");
-               break;
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               x86_pmu.late_ack = true;
-               memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-               intel_pmu_lbr_init_skl();
-
-               x86_pmu.event_constraints = intel_skl_event_constraints;
-               x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
-               x86_pmu.extra_regs = intel_skl_extra_regs;
-               x86_pmu.pebs_aliases = intel_pebs_aliases_skl;
-               x86_pmu.pebs_prec_dist = true;
-               /* all extra regs are per-cpu when HT is on */
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
-               x86_pmu.hw_config = hsw_hw_config;
-               x86_pmu.get_event_constraints = hsw_get_event_constraints;
-               x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
-                                                 skl_format_attr);
-               WARN_ON(!x86_pmu.format_attrs);
-               x86_pmu.cpu_events = hsw_events_attrs;
-               pr_cont("Skylake events, ");
-               break;
-
-       default:
-               switch (x86_pmu.version) {
-               case 1:
-                       x86_pmu.event_constraints = intel_v1_event_constraints;
-                       pr_cont("generic architected perfmon v1, ");
-                       break;
-               default:
-                       /*
-                        * default constraints for v2 and up
-                        */
-                       x86_pmu.event_constraints = intel_gen_event_constraints;
-                       pr_cont("generic architected perfmon, ");
-                       break;
-               }
-       }
-
-       if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
-               WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
-                    x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
-               x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
-       }
-       x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
-
-       if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
-               WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
-                    x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
-               x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
-       }
-
-       x86_pmu.intel_ctrl |=
-               ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
-
-       if (x86_pmu.event_constraints) {
-               /*
-                * event on fixed counter2 (REF_CYCLES) only works on this
-                * counter, so do not extend mask to generic counters
-                */
-               for_each_event_constraint(c, x86_pmu.event_constraints) {
-                       if (c->cmask == FIXED_EVENT_FLAGS
-                           && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
-                               c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
-                       }
-                       c->idxmsk64 &=
-                               ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
-                       c->weight = hweight64(c->idxmsk64);
-               }
-       }
-
-       /*
-        * Access LBR MSR may cause #GP under certain circumstances.
-        * E.g. KVM doesn't support LBR MSR
-        * Check all LBT MSR here.
-        * Disable LBR access if any LBR MSRs can not be accessed.
-        */
-       if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
-               x86_pmu.lbr_nr = 0;
-       for (i = 0; i < x86_pmu.lbr_nr; i++) {
-               if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
-                     check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
-                       x86_pmu.lbr_nr = 0;
-       }
-
-       /*
-        * Access extra MSR may cause #GP under certain circumstances.
-        * E.g. KVM doesn't support offcore event
-        * Check all extra_regs here.
-        */
-       if (x86_pmu.extra_regs) {
-               for (er = x86_pmu.extra_regs; er->msr; er++) {
-                       er->extra_msr_access = check_msr(er->msr, 0x11UL);
-                       /* Disable LBR select mapping */
-                       if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
-                               x86_pmu.lbr_sel_map = NULL;
-               }
-       }
-
-       /* Support full width counters using alternative MSR range */
-       if (x86_pmu.intel_cap.full_width_write) {
-               x86_pmu.max_period = x86_pmu.cntval_mask;
-               x86_pmu.perfctr = MSR_IA32_PMC0;
-               pr_cont("full-width counters, ");
-       }
-
-       return 0;
-}
-
-/*
- * HT bug: phase 2 init
- * Called once we have valid topology information to check
- * whether or not HT is enabled
- * If HT is off, then we disable the workaround
- */
-static __init int fixup_ht_bug(void)
-{
-       int cpu = smp_processor_id();
-       int w, c;
-       /*
-        * problem not present on this CPU model, nothing to do
-        */
-       if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
-               return 0;
-
-       w = cpumask_weight(topology_sibling_cpumask(cpu));
-       if (w > 1) {
-               pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
-               return 0;
-       }
-
-       if (lockup_detector_suspend() != 0) {
-               pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n");
-               return 0;
-       }
-
-       x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
-
-       x86_pmu.start_scheduling = NULL;
-       x86_pmu.commit_scheduling = NULL;
-       x86_pmu.stop_scheduling = NULL;
-
-       lockup_detector_resume();
-
-       get_online_cpus();
-
-       for_each_online_cpu(c) {
-               free_excl_cntrs(c);
-       }
-
-       put_online_cpus();
-       pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
-       return 0;
-}
-subsys_initcall(fixup_ht_bug)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
deleted file mode 100644 (file)
index 2cad71d..0000000
+++ /dev/null
@@ -1,544 +0,0 @@
-/*
- * BTS PMU driver for perf
- * Copyright (c) 2013-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#undef DEBUG
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/debugfs.h>
-#include <linux/device.h>
-#include <linux/coredump.h>
-
-#include <asm-generic/sizes.h>
-#include <asm/perf_event.h>
-
-#include "perf_event.h"
-
-struct bts_ctx {
-       struct perf_output_handle       handle;
-       struct debug_store              ds_back;
-       int                             started;
-};
-
-static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
-
-#define BTS_RECORD_SIZE                24
-#define BTS_SAFETY_MARGIN      4080
-
-struct bts_phys {
-       struct page     *page;
-       unsigned long   size;
-       unsigned long   offset;
-       unsigned long   displacement;
-};
-
-struct bts_buffer {
-       size_t          real_size;      /* multiple of BTS_RECORD_SIZE */
-       unsigned int    nr_pages;
-       unsigned int    nr_bufs;
-       unsigned int    cur_buf;
-       bool            snapshot;
-       local_t         data_size;
-       local_t         lost;
-       local_t         head;
-       unsigned long   end;
-       void            **data_pages;
-       struct bts_phys buf[0];
-};
-
-struct pmu bts_pmu;
-
-static size_t buf_size(struct page *page)
-{
-       return 1 << (PAGE_SHIFT + page_private(page));
-}
-
-static void *
-bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
-{
-       struct bts_buffer *buf;
-       struct page *page;
-       int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
-       unsigned long offset;
-       size_t size = nr_pages << PAGE_SHIFT;
-       int pg, nbuf, pad;
-
-       /* count all the high order buffers */
-       for (pg = 0, nbuf = 0; pg < nr_pages;) {
-               page = virt_to_page(pages[pg]);
-               if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
-                       return NULL;
-               pg += 1 << page_private(page);
-               nbuf++;
-       }
-
-       /*
-        * to avoid interrupts in overwrite mode, only allow one physical
-        */
-       if (overwrite && nbuf > 1)
-               return NULL;
-
-       buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
-       if (!buf)
-               return NULL;
-
-       buf->nr_pages = nr_pages;
-       buf->nr_bufs = nbuf;
-       buf->snapshot = overwrite;
-       buf->data_pages = pages;
-       buf->real_size = size - size % BTS_RECORD_SIZE;
-
-       for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
-               unsigned int __nr_pages;
-
-               page = virt_to_page(pages[pg]);
-               __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
-               buf->buf[nbuf].page = page;
-               buf->buf[nbuf].offset = offset;
-               buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
-               buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
-               pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
-               buf->buf[nbuf].size -= pad;
-
-               pg += __nr_pages;
-               offset += __nr_pages << PAGE_SHIFT;
-       }
-
-       return buf;
-}
-
-static void bts_buffer_free_aux(void *data)
-{
-       kfree(data);
-}
-
-static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
-{
-       return buf->buf[idx].offset + buf->buf[idx].displacement;
-}
-
-static void
-bts_config_buffer(struct bts_buffer *buf)
-{
-       int cpu = raw_smp_processor_id();
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-       struct bts_phys *phys = &buf->buf[buf->cur_buf];
-       unsigned long index, thresh = 0, end = phys->size;
-       struct page *page = phys->page;
-
-       index = local_read(&buf->head);
-
-       if (!buf->snapshot) {
-               if (buf->end < phys->offset + buf_size(page))
-                       end = buf->end - phys->offset - phys->displacement;
-
-               index -= phys->offset + phys->displacement;
-
-               if (end - index > BTS_SAFETY_MARGIN)
-                       thresh = end - BTS_SAFETY_MARGIN;
-               else if (end - index > BTS_RECORD_SIZE)
-                       thresh = end - BTS_RECORD_SIZE;
-               else
-                       thresh = end;
-       }
-
-       ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
-       ds->bts_index = ds->bts_buffer_base + index;
-       ds->bts_absolute_maximum = ds->bts_buffer_base + end;
-       ds->bts_interrupt_threshold = !buf->snapshot
-               ? ds->bts_buffer_base + thresh
-               : ds->bts_absolute_maximum + BTS_RECORD_SIZE;
-}
-
-static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
-{
-       unsigned long index = head - phys->offset;
-
-       memset(page_address(phys->page) + index, 0, phys->size - index);
-}
-
-static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
-{
-       if (buf->snapshot)
-               return false;
-
-       if (local_read(&buf->data_size) >= bts->handle.size ||
-           bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
-               return true;
-
-       return false;
-}
-
-static void bts_update(struct bts_ctx *bts)
-{
-       int cpu = raw_smp_processor_id();
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-       struct bts_buffer *buf = perf_get_aux(&bts->handle);
-       unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
-
-       if (!buf)
-               return;
-
-       head = index + bts_buffer_offset(buf, buf->cur_buf);
-       old = local_xchg(&buf->head, head);
-
-       if (!buf->snapshot) {
-               if (old == head)
-                       return;
-
-               if (ds->bts_index >= ds->bts_absolute_maximum)
-                       local_inc(&buf->lost);
-
-               /*
-                * old and head are always in the same physical buffer, so we
-                * can subtract them to get the data size.
-                */
-               local_add(head - old, &buf->data_size);
-       } else {
-               local_set(&buf->data_size, head);
-       }
-}
-
-static void __bts_event_start(struct perf_event *event)
-{
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-       struct bts_buffer *buf = perf_get_aux(&bts->handle);
-       u64 config = 0;
-
-       if (!buf || bts_buffer_is_full(buf, bts))
-               return;
-
-       event->hw.itrace_started = 1;
-       event->hw.state = 0;
-
-       if (!buf->snapshot)
-               config |= ARCH_PERFMON_EVENTSEL_INT;
-       if (!event->attr.exclude_kernel)
-               config |= ARCH_PERFMON_EVENTSEL_OS;
-       if (!event->attr.exclude_user)
-               config |= ARCH_PERFMON_EVENTSEL_USR;
-
-       bts_config_buffer(buf);
-
-       /*
-        * local barrier to make sure that ds configuration made it
-        * before we enable BTS
-        */
-       wmb();
-
-       intel_pmu_enable_bts(config);
-}
-
-static void bts_event_start(struct perf_event *event, int flags)
-{
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-
-       __bts_event_start(event);
-
-       /* PMI handler: this counter is running and likely generating PMIs */
-       ACCESS_ONCE(bts->started) = 1;
-}
-
-static void __bts_event_stop(struct perf_event *event)
-{
-       /*
-        * No extra synchronization is mandated by the documentation to have
-        * BTS data stores globally visible.
-        */
-       intel_pmu_disable_bts();
-
-       if (event->hw.state & PERF_HES_STOPPED)
-               return;
-
-       ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
-}
-
-static void bts_event_stop(struct perf_event *event, int flags)
-{
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-
-       /* PMI handler: don't restart this counter */
-       ACCESS_ONCE(bts->started) = 0;
-
-       __bts_event_stop(event);
-
-       if (flags & PERF_EF_UPDATE)
-               bts_update(bts);
-}
-
-void intel_bts_enable_local(void)
-{
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-
-       if (bts->handle.event && bts->started)
-               __bts_event_start(bts->handle.event);
-}
-
-void intel_bts_disable_local(void)
-{
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-
-       if (bts->handle.event)
-               __bts_event_stop(bts->handle.event);
-}
-
-static int
-bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
-{
-       unsigned long head, space, next_space, pad, gap, skip, wakeup;
-       unsigned int next_buf;
-       struct bts_phys *phys, *next_phys;
-       int ret;
-
-       if (buf->snapshot)
-               return 0;
-
-       head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
-       if (WARN_ON_ONCE(head != local_read(&buf->head)))
-               return -EINVAL;
-
-       phys = &buf->buf[buf->cur_buf];
-       space = phys->offset + phys->displacement + phys->size - head;
-       pad = space;
-       if (space > handle->size) {
-               space = handle->size;
-               space -= space % BTS_RECORD_SIZE;
-       }
-       if (space <= BTS_SAFETY_MARGIN) {
-               /* See if next phys buffer has more space */
-               next_buf = buf->cur_buf + 1;
-               if (next_buf >= buf->nr_bufs)
-                       next_buf = 0;
-               next_phys = &buf->buf[next_buf];
-               gap = buf_size(phys->page) - phys->displacement - phys->size +
-                     next_phys->displacement;
-               skip = pad + gap;
-               if (handle->size >= skip) {
-                       next_space = next_phys->size;
-                       if (next_space + skip > handle->size) {
-                               next_space = handle->size - skip;
-                               next_space -= next_space % BTS_RECORD_SIZE;
-                       }
-                       if (next_space > space || !space) {
-                               if (pad)
-                                       bts_buffer_pad_out(phys, head);
-                               ret = perf_aux_output_skip(handle, skip);
-                               if (ret)
-                                       return ret;
-                               /* Advance to next phys buffer */
-                               phys = next_phys;
-                               space = next_space;
-                               head = phys->offset + phys->displacement;
-                               /*
-                                * After this, cur_buf and head won't match ds
-                                * anymore, so we must not be racing with
-                                * bts_update().
-                                */
-                               buf->cur_buf = next_buf;
-                               local_set(&buf->head, head);
-                       }
-               }
-       }
-
-       /* Don't go far beyond wakeup watermark */
-       wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
-                handle->head;
-       if (space > wakeup) {
-               space = wakeup;
-               space -= space % BTS_RECORD_SIZE;
-       }
-
-       buf->end = head + space;
-
-       /*
-        * If we have no space, the lost notification would have been sent when
-        * we hit absolute_maximum - see bts_update()
-        */
-       if (!space)
-               return -ENOSPC;
-
-       return 0;
-}
-
-int intel_bts_interrupt(void)
-{
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-       struct perf_event *event = bts->handle.event;
-       struct bts_buffer *buf;
-       s64 old_head;
-       int err;
-
-       if (!event || !bts->started)
-               return 0;
-
-       buf = perf_get_aux(&bts->handle);
-       /*
-        * Skip snapshot counters: they don't use the interrupt, but
-        * there's no other way of telling, because the pointer will
-        * keep moving
-        */
-       if (!buf || buf->snapshot)
-               return 0;
-
-       old_head = local_read(&buf->head);
-       bts_update(bts);
-
-       /* no new data */
-       if (old_head == local_read(&buf->head))
-               return 0;
-
-       perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
-                           !!local_xchg(&buf->lost, 0));
-
-       buf = perf_aux_output_begin(&bts->handle, event);
-       if (!buf)
-               return 1;
-
-       err = bts_buffer_reset(buf, &bts->handle);
-       if (err)
-               perf_aux_output_end(&bts->handle, 0, false);
-
-       return 1;
-}
-
-static void bts_event_del(struct perf_event *event, int mode)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-       struct bts_buffer *buf = perf_get_aux(&bts->handle);
-
-       bts_event_stop(event, PERF_EF_UPDATE);
-
-       if (buf) {
-               if (buf->snapshot)
-                       bts->handle.head =
-                               local_xchg(&buf->data_size,
-                                          buf->nr_pages << PAGE_SHIFT);
-               perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
-                                   !!local_xchg(&buf->lost, 0));
-       }
-
-       cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
-       cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
-       cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
-       cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
-}
-
-static int bts_event_add(struct perf_event *event, int mode)
-{
-       struct bts_buffer *buf;
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct hw_perf_event *hwc = &event->hw;
-       int ret = -EBUSY;
-
-       event->hw.state = PERF_HES_STOPPED;
-
-       if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
-               return -EBUSY;
-
-       if (bts->handle.event)
-               return -EBUSY;
-
-       buf = perf_aux_output_begin(&bts->handle, event);
-       if (!buf)
-               return -EINVAL;
-
-       ret = bts_buffer_reset(buf, &bts->handle);
-       if (ret) {
-               perf_aux_output_end(&bts->handle, 0, false);
-               return ret;
-       }
-
-       bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
-       bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
-       bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
-
-       if (mode & PERF_EF_START) {
-               bts_event_start(event, 0);
-               if (hwc->state & PERF_HES_STOPPED) {
-                       bts_event_del(event, 0);
-                       return -EBUSY;
-               }
-       }
-
-       return 0;
-}
-
-static void bts_event_destroy(struct perf_event *event)
-{
-       x86_release_hardware();
-       x86_del_exclusive(x86_lbr_exclusive_bts);
-}
-
-static int bts_event_init(struct perf_event *event)
-{
-       int ret;
-
-       if (event->attr.type != bts_pmu.type)
-               return -ENOENT;
-
-       if (x86_add_exclusive(x86_lbr_exclusive_bts))
-               return -EBUSY;
-
-       /*
-        * BTS leaks kernel addresses even when CPL0 tracing is
-        * disabled, so disallow intel_bts driver for unprivileged
-        * users on paranoid systems since it provides trace data
-        * to the user in a zero-copy fashion.
-        *
-        * Note that the default paranoia setting permits unprivileged
-        * users to profile the kernel.
-        */
-       if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
-           !capable(CAP_SYS_ADMIN))
-               return -EACCES;
-
-       ret = x86_reserve_hardware();
-       if (ret) {
-               x86_del_exclusive(x86_lbr_exclusive_bts);
-               return ret;
-       }
-
-       event->destroy = bts_event_destroy;
-
-       return 0;
-}
-
-static void bts_event_read(struct perf_event *event)
-{
-}
-
-static __init int bts_init(void)
-{
-       if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
-               return -ENODEV;
-
-       bts_pmu.capabilities    = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
-       bts_pmu.task_ctx_nr     = perf_sw_context;
-       bts_pmu.event_init      = bts_event_init;
-       bts_pmu.add             = bts_event_add;
-       bts_pmu.del             = bts_event_del;
-       bts_pmu.start           = bts_event_start;
-       bts_pmu.stop            = bts_event_stop;
-       bts_pmu.read            = bts_event_read;
-       bts_pmu.setup_aux       = bts_buffer_setup_aux;
-       bts_pmu.free_aux        = bts_buffer_free_aux;
-
-       return perf_pmu_register(&bts_pmu, "intel_bts", -1);
-}
-arch_initcall(bts_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
deleted file mode 100644 (file)
index a316ca9..0000000
+++ /dev/null
@@ -1,1391 +0,0 @@
-/*
- * Intel Cache Quality-of-Service Monitoring (CQM) support.
- *
- * Based very, very heavily on work by Peter Zijlstra.
- */
-
-#include <linux/perf_event.h>
-#include <linux/slab.h>
-#include <asm/cpu_device_id.h>
-#include "perf_event.h"
-
-#define MSR_IA32_PQR_ASSOC     0x0c8f
-#define MSR_IA32_QM_CTR                0x0c8e
-#define MSR_IA32_QM_EVTSEL     0x0c8d
-
-static u32 cqm_max_rmid = -1;
-static unsigned int cqm_l3_scale; /* supposedly cacheline size */
-
-/**
- * struct intel_pqr_state - State cache for the PQR MSR
- * @rmid:              The cached Resource Monitoring ID
- * @closid:            The cached Class Of Service ID
- * @rmid_usecnt:       The usage counter for rmid
- *
- * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
- * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
- * contains both parts, so we need to cache them.
- *
- * The cache also helps to avoid pointless updates if the value does
- * not change.
- */
-struct intel_pqr_state {
-       u32                     rmid;
-       u32                     closid;
-       int                     rmid_usecnt;
-};
-
-/*
- * The cached intel_pqr_state is strictly per CPU and can never be
- * updated from a remote CPU. Both functions which modify the state
- * (intel_cqm_event_start and intel_cqm_event_stop) are called with
- * interrupts disabled, which is sufficient for the protection.
- */
-static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
-
-/*
- * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
- * Also protects event->hw.cqm_rmid
- *
- * Hold either for stability, both for modification of ->hw.cqm_rmid.
- */
-static DEFINE_MUTEX(cache_mutex);
-static DEFINE_RAW_SPINLOCK(cache_lock);
-
-/*
- * Groups of events that have the same target(s), one RMID per group.
- */
-static LIST_HEAD(cache_groups);
-
-/*
- * Mask of CPUs for reading CQM values. We only need one per-socket.
- */
-static cpumask_t cqm_cpumask;
-
-#define RMID_VAL_ERROR         (1ULL << 63)
-#define RMID_VAL_UNAVAIL       (1ULL << 62)
-
-#define QOS_L3_OCCUP_EVENT_ID  (1 << 0)
-
-#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID
-
-/*
- * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
- *
- * This rmid is always free and is guaranteed to have an associated
- * near-zero occupancy value, i.e. no cachelines are tagged with this
- * RMID, once __intel_cqm_rmid_rotate() returns.
- */
-static u32 intel_cqm_rotation_rmid;
-
-#define INVALID_RMID           (-1)
-
-/*
- * Is @rmid valid for programming the hardware?
- *
- * rmid 0 is reserved by the hardware for all non-monitored tasks, which
- * means that we should never come across an rmid with that value.
- * Likewise, an rmid value of -1 is used to indicate "no rmid currently
- * assigned" and is used as part of the rotation code.
- */
-static inline bool __rmid_valid(u32 rmid)
-{
-       if (!rmid || rmid == INVALID_RMID)
-               return false;
-
-       return true;
-}
-
-static u64 __rmid_read(u32 rmid)
-{
-       u64 val;
-
-       /*
-        * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
-        * it just says that to increase confusion.
-        */
-       wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
-       rdmsrl(MSR_IA32_QM_CTR, val);
-
-       /*
-        * Aside from the ERROR and UNAVAIL bits, assume this thing returns
-        * the number of cachelines tagged with @rmid.
-        */
-       return val;
-}
-
-enum rmid_recycle_state {
-       RMID_YOUNG = 0,
-       RMID_AVAILABLE,
-       RMID_DIRTY,
-};
-
-struct cqm_rmid_entry {
-       u32 rmid;
-       enum rmid_recycle_state state;
-       struct list_head list;
-       unsigned long queue_time;
-};
-
-/*
- * cqm_rmid_free_lru - A least recently used list of RMIDs.
- *
- * Oldest entry at the head, newest (most recently used) entry at the
- * tail. This list is never traversed, it's only used to keep track of
- * the lru order. That is, we only pick entries of the head or insert
- * them on the tail.
- *
- * All entries on the list are 'free', and their RMIDs are not currently
- * in use. To mark an RMID as in use, remove its entry from the lru
- * list.
- *
- *
- * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
- *
- * This list is contains RMIDs that no one is currently using but that
- * may have a non-zero occupancy value associated with them. The
- * rotation worker moves RMIDs from the limbo list to the free list once
- * the occupancy value drops below __intel_cqm_threshold.
- *
- * Both lists are protected by cache_mutex.
- */
-static LIST_HEAD(cqm_rmid_free_lru);
-static LIST_HEAD(cqm_rmid_limbo_lru);
-
-/*
- * We use a simple array of pointers so that we can lookup a struct
- * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
- * and __put_rmid() from having to worry about dealing with struct
- * cqm_rmid_entry - they just deal with rmids, i.e. integers.
- *
- * Once this array is initialized it is read-only. No locks are required
- * to access it.
- *
- * All entries for all RMIDs can be looked up in the this array at all
- * times.
- */
-static struct cqm_rmid_entry **cqm_rmid_ptrs;
-
-static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
-{
-       struct cqm_rmid_entry *entry;
-
-       entry = cqm_rmid_ptrs[rmid];
-       WARN_ON(entry->rmid != rmid);
-
-       return entry;
-}
-
-/*
- * Returns < 0 on fail.
- *
- * We expect to be called with cache_mutex held.
- */
-static u32 __get_rmid(void)
-{
-       struct cqm_rmid_entry *entry;
-
-       lockdep_assert_held(&cache_mutex);
-
-       if (list_empty(&cqm_rmid_free_lru))
-               return INVALID_RMID;
-
-       entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
-       list_del(&entry->list);
-
-       return entry->rmid;
-}
-
-static void __put_rmid(u32 rmid)
-{
-       struct cqm_rmid_entry *entry;
-
-       lockdep_assert_held(&cache_mutex);
-
-       WARN_ON(!__rmid_valid(rmid));
-       entry = __rmid_entry(rmid);
-
-       entry->queue_time = jiffies;
-       entry->state = RMID_YOUNG;
-
-       list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
-}
-
-static int intel_cqm_setup_rmid_cache(void)
-{
-       struct cqm_rmid_entry *entry;
-       unsigned int nr_rmids;
-       int r = 0;
-
-       nr_rmids = cqm_max_rmid + 1;
-       cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
-                               nr_rmids, GFP_KERNEL);
-       if (!cqm_rmid_ptrs)
-               return -ENOMEM;
-
-       for (; r <= cqm_max_rmid; r++) {
-               struct cqm_rmid_entry *entry;
-
-               entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-               if (!entry)
-                       goto fail;
-
-               INIT_LIST_HEAD(&entry->list);
-               entry->rmid = r;
-               cqm_rmid_ptrs[r] = entry;
-
-               list_add_tail(&entry->list, &cqm_rmid_free_lru);
-       }
-
-       /*
-        * RMID 0 is special and is always allocated. It's used for all
-        * tasks that are not monitored.
-        */
-       entry = __rmid_entry(0);
-       list_del(&entry->list);
-
-       mutex_lock(&cache_mutex);
-       intel_cqm_rotation_rmid = __get_rmid();
-       mutex_unlock(&cache_mutex);
-
-       return 0;
-fail:
-       while (r--)
-               kfree(cqm_rmid_ptrs[r]);
-
-       kfree(cqm_rmid_ptrs);
-       return -ENOMEM;
-}
-
-/*
- * Determine if @a and @b measure the same set of tasks.
- *
- * If @a and @b measure the same set of tasks then we want to share a
- * single RMID.
- */
-static bool __match_event(struct perf_event *a, struct perf_event *b)
-{
-       /* Per-cpu and task events don't mix */
-       if ((a->attach_state & PERF_ATTACH_TASK) !=
-           (b->attach_state & PERF_ATTACH_TASK))
-               return false;
-
-#ifdef CONFIG_CGROUP_PERF
-       if (a->cgrp != b->cgrp)
-               return false;
-#endif
-
-       /* If not task event, we're machine wide */
-       if (!(b->attach_state & PERF_ATTACH_TASK))
-               return true;
-
-       /*
-        * Events that target same task are placed into the same cache group.
-        */
-       if (a->hw.target == b->hw.target)
-               return true;
-
-       /*
-        * Are we an inherited event?
-        */
-       if (b->parent == a)
-               return true;
-
-       return false;
-}
-
-#ifdef CONFIG_CGROUP_PERF
-static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
-{
-       if (event->attach_state & PERF_ATTACH_TASK)
-               return perf_cgroup_from_task(event->hw.target, event->ctx);
-
-       return event->cgrp;
-}
-#endif
-
-/*
- * Determine if @a's tasks intersect with @b's tasks
- *
- * There are combinations of events that we explicitly prohibit,
- *
- *                PROHIBITS
- *     system-wide    ->       cgroup and task
- *     cgroup        ->        system-wide
- *                           ->        task in cgroup
- *     task          ->        system-wide
- *                           ->        task in cgroup
- *
- * Call this function before allocating an RMID.
- */
-static bool __conflict_event(struct perf_event *a, struct perf_event *b)
-{
-#ifdef CONFIG_CGROUP_PERF
-       /*
-        * We can have any number of cgroups but only one system-wide
-        * event at a time.
-        */
-       if (a->cgrp && b->cgrp) {
-               struct perf_cgroup *ac = a->cgrp;
-               struct perf_cgroup *bc = b->cgrp;
-
-               /*
-                * This condition should have been caught in
-                * __match_event() and we should be sharing an RMID.
-                */
-               WARN_ON_ONCE(ac == bc);
-
-               if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
-                   cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
-                       return true;
-
-               return false;
-       }
-
-       if (a->cgrp || b->cgrp) {
-               struct perf_cgroup *ac, *bc;
-
-               /*
-                * cgroup and system-wide events are mutually exclusive
-                */
-               if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) ||
-                   (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
-                       return true;
-
-               /*
-                * Ensure neither event is part of the other's cgroup
-                */
-               ac = event_to_cgroup(a);
-               bc = event_to_cgroup(b);
-               if (ac == bc)
-                       return true;
-
-               /*
-                * Must have cgroup and non-intersecting task events.
-                */
-               if (!ac || !bc)
-                       return false;
-
-               /*
-                * We have cgroup and task events, and the task belongs
-                * to a cgroup. Check for for overlap.
-                */
-               if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
-                   cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
-                       return true;
-
-               return false;
-       }
-#endif
-       /*
-        * If one of them is not a task, same story as above with cgroups.
-        */
-       if (!(a->attach_state & PERF_ATTACH_TASK) ||
-           !(b->attach_state & PERF_ATTACH_TASK))
-               return true;
-
-       /*
-        * Must be non-overlapping.
-        */
-       return false;
-}
-
-struct rmid_read {
-       u32 rmid;
-       atomic64_t value;
-};
-
-static void __intel_cqm_event_count(void *info);
-
-/*
- * Exchange the RMID of a group of events.
- */
-static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
-{
-       struct perf_event *event;
-       struct list_head *head = &group->hw.cqm_group_entry;
-       u32 old_rmid = group->hw.cqm_rmid;
-
-       lockdep_assert_held(&cache_mutex);
-
-       /*
-        * If our RMID is being deallocated, perform a read now.
-        */
-       if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
-               struct rmid_read rr = {
-                       .value = ATOMIC64_INIT(0),
-                       .rmid = old_rmid,
-               };
-
-               on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count,
-                                &rr, 1);
-               local64_set(&group->count, atomic64_read(&rr.value));
-       }
-
-       raw_spin_lock_irq(&cache_lock);
-
-       group->hw.cqm_rmid = rmid;
-       list_for_each_entry(event, head, hw.cqm_group_entry)
-               event->hw.cqm_rmid = rmid;
-
-       raw_spin_unlock_irq(&cache_lock);
-
-       return old_rmid;
-}
-
-/*
- * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
- * cachelines are still tagged with RMIDs in limbo, we progressively
- * increment the threshold until we find an RMID in limbo with <=
- * __intel_cqm_threshold lines tagged. This is designed to mitigate the
- * problem where cachelines tagged with an RMID are not steadily being
- * evicted.
- *
- * On successful rotations we decrease the threshold back towards zero.
- *
- * __intel_cqm_max_threshold provides an upper bound on the threshold,
- * and is measured in bytes because it's exposed to userland.
- */
-static unsigned int __intel_cqm_threshold;
-static unsigned int __intel_cqm_max_threshold;
-
-/*
- * Test whether an RMID has a zero occupancy value on this cpu.
- */
-static void intel_cqm_stable(void *arg)
-{
-       struct cqm_rmid_entry *entry;
-
-       list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
-               if (entry->state != RMID_AVAILABLE)
-                       break;
-
-               if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
-                       entry->state = RMID_DIRTY;
-       }
-}
-
-/*
- * If we have group events waiting for an RMID that don't conflict with
- * events already running, assign @rmid.
- */
-static bool intel_cqm_sched_in_event(u32 rmid)
-{
-       struct perf_event *leader, *event;
-
-       lockdep_assert_held(&cache_mutex);
-
-       leader = list_first_entry(&cache_groups, struct perf_event,
-                                 hw.cqm_groups_entry);
-       event = leader;
-
-       list_for_each_entry_continue(event, &cache_groups,
-                                    hw.cqm_groups_entry) {
-               if (__rmid_valid(event->hw.cqm_rmid))
-                       continue;
-
-               if (__conflict_event(event, leader))
-                       continue;
-
-               intel_cqm_xchg_rmid(event, rmid);
-               return true;
-       }
-
-       return false;
-}
-
-/*
- * Initially use this constant for both the limbo queue time and the
- * rotation timer interval, pmu::hrtimer_interval_ms.
- *
- * They don't need to be the same, but the two are related since if you
- * rotate faster than you recycle RMIDs, you may run out of available
- * RMIDs.
- */
-#define RMID_DEFAULT_QUEUE_TIME 250    /* ms */
-
-static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
-
-/*
- * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
- * @nr_available: number of freeable RMIDs on the limbo list
- *
- * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
- * cachelines are tagged with those RMIDs. After this we can reuse them
- * and know that the current set of active RMIDs is stable.
- *
- * Return %true or %false depending on whether stabilization needs to be
- * reattempted.
- *
- * If we return %true then @nr_available is updated to indicate the
- * number of RMIDs on the limbo list that have been queued for the
- * minimum queue time (RMID_AVAILABLE), but whose data occupancy values
- * are above __intel_cqm_threshold.
- */
-static bool intel_cqm_rmid_stabilize(unsigned int *available)
-{
-       struct cqm_rmid_entry *entry, *tmp;
-
-       lockdep_assert_held(&cache_mutex);
-
-       *available = 0;
-       list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
-               unsigned long min_queue_time;
-               unsigned long now = jiffies;
-
-               /*
-                * We hold RMIDs placed into limbo for a minimum queue
-                * time. Before the minimum queue time has elapsed we do
-                * not recycle RMIDs.
-                *
-                * The reasoning is that until a sufficient time has
-                * passed since we stopped using an RMID, any RMID
-                * placed onto the limbo list will likely still have
-                * data tagged in the cache, which means we'll probably
-                * fail to recycle it anyway.
-                *
-                * We can save ourselves an expensive IPI by skipping
-                * any RMIDs that have not been queued for the minimum
-                * time.
-                */
-               min_queue_time = entry->queue_time +
-                       msecs_to_jiffies(__rmid_queue_time_ms);
-
-               if (time_after(min_queue_time, now))
-                       break;
-
-               entry->state = RMID_AVAILABLE;
-               (*available)++;
-       }
-
-       /*
-        * Fast return if none of the RMIDs on the limbo list have been
-        * sitting on the queue for the minimum queue time.
-        */
-       if (!*available)
-               return false;
-
-       /*
-        * Test whether an RMID is free for each package.
-        */
-       on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
-
-       list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
-               /*
-                * Exhausted all RMIDs that have waited min queue time.
-                */
-               if (entry->state == RMID_YOUNG)
-                       break;
-
-               if (entry->state == RMID_DIRTY)
-                       continue;
-
-               list_del(&entry->list); /* remove from limbo */
-
-               /*
-                * The rotation RMID gets priority if it's
-                * currently invalid. In which case, skip adding
-                * the RMID to the the free lru.
-                */
-               if (!__rmid_valid(intel_cqm_rotation_rmid)) {
-                       intel_cqm_rotation_rmid = entry->rmid;
-                       continue;
-               }
-
-               /*
-                * If we have groups waiting for RMIDs, hand
-                * them one now provided they don't conflict.
-                */
-               if (intel_cqm_sched_in_event(entry->rmid))
-                       continue;
-
-               /*
-                * Otherwise place it onto the free list.
-                */
-               list_add_tail(&entry->list, &cqm_rmid_free_lru);
-       }
-
-
-       return __rmid_valid(intel_cqm_rotation_rmid);
-}
-
-/*
- * Pick a victim group and move it to the tail of the group list.
- * @next: The first group without an RMID
- */
-static void __intel_cqm_pick_and_rotate(struct perf_event *next)
-{
-       struct perf_event *rotor;
-       u32 rmid;
-
-       lockdep_assert_held(&cache_mutex);
-
-       rotor = list_first_entry(&cache_groups, struct perf_event,
-                                hw.cqm_groups_entry);
-
-       /*
-        * The group at the front of the list should always have a valid
-        * RMID. If it doesn't then no groups have RMIDs assigned and we
-        * don't need to rotate the list.
-        */
-       if (next == rotor)
-               return;
-
-       rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
-       __put_rmid(rmid);
-
-       list_rotate_left(&cache_groups);
-}
-
-/*
- * Deallocate the RMIDs from any events that conflict with @event, and
- * place them on the back of the group list.
- */
-static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
-{
-       struct perf_event *group, *g;
-       u32 rmid;
-
-       lockdep_assert_held(&cache_mutex);
-
-       list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) {
-               if (group == event)
-                       continue;
-
-               rmid = group->hw.cqm_rmid;
-
-               /*
-                * Skip events that don't have a valid RMID.
-                */
-               if (!__rmid_valid(rmid))
-                       continue;
-
-               /*
-                * No conflict? No problem! Leave the event alone.
-                */
-               if (!__conflict_event(group, event))
-                       continue;
-
-               intel_cqm_xchg_rmid(group, INVALID_RMID);
-               __put_rmid(rmid);
-       }
-}
-
-/*
- * Attempt to rotate the groups and assign new RMIDs.
- *
- * We rotate for two reasons,
- *   1. To handle the scheduling of conflicting events
- *   2. To recycle RMIDs
- *
- * Rotating RMIDs is complicated because the hardware doesn't give us
- * any clues.
- *
- * There's problems with the hardware interface; when you change the
- * task:RMID map cachelines retain their 'old' tags, giving a skewed
- * picture. In order to work around this, we must always keep one free
- * RMID - intel_cqm_rotation_rmid.
- *
- * Rotation works by taking away an RMID from a group (the old RMID),
- * and assigning the free RMID to another group (the new RMID). We must
- * then wait for the old RMID to not be used (no cachelines tagged).
- * This ensure that all cachelines are tagged with 'active' RMIDs. At
- * this point we can start reading values for the new RMID and treat the
- * old RMID as the free RMID for the next rotation.
- *
- * Return %true or %false depending on whether we did any rotating.
- */
-static bool __intel_cqm_rmid_rotate(void)
-{
-       struct perf_event *group, *start = NULL;
-       unsigned int threshold_limit;
-       unsigned int nr_needed = 0;
-       unsigned int nr_available;
-       bool rotated = false;
-
-       mutex_lock(&cache_mutex);
-
-again:
-       /*
-        * Fast path through this function if there are no groups and no
-        * RMIDs that need cleaning.
-        */
-       if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
-               goto out;
-
-       list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
-               if (!__rmid_valid(group->hw.cqm_rmid)) {
-                       if (!start)
-                               start = group;
-                       nr_needed++;
-               }
-       }
-
-       /*
-        * We have some event groups, but they all have RMIDs assigned
-        * and no RMIDs need cleaning.
-        */
-       if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
-               goto out;
-
-       if (!nr_needed)
-               goto stabilize;
-
-       /*
-        * We have more event groups without RMIDs than available RMIDs,
-        * or we have event groups that conflict with the ones currently
-        * scheduled.
-        *
-        * We force deallocate the rmid of the group at the head of
-        * cache_groups. The first event group without an RMID then gets
-        * assigned intel_cqm_rotation_rmid. This ensures we always make
-        * forward progress.
-        *
-        * Rotate the cache_groups list so the previous head is now the
-        * tail.
-        */
-       __intel_cqm_pick_and_rotate(start);
-
-       /*
-        * If the rotation is going to succeed, reduce the threshold so
-        * that we don't needlessly reuse dirty RMIDs.
-        */
-       if (__rmid_valid(intel_cqm_rotation_rmid)) {
-               intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
-               intel_cqm_rotation_rmid = __get_rmid();
-
-               intel_cqm_sched_out_conflicting_events(start);
-
-               if (__intel_cqm_threshold)
-                       __intel_cqm_threshold--;
-       }
-
-       rotated = true;
-
-stabilize:
-       /*
-        * We now need to stablize the RMID we freed above (if any) to
-        * ensure that the next time we rotate we have an RMID with zero
-        * occupancy value.
-        *
-        * Alternatively, if we didn't need to perform any rotation,
-        * we'll have a bunch of RMIDs in limbo that need stabilizing.
-        */
-       threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
-
-       while (intel_cqm_rmid_stabilize(&nr_available) &&
-              __intel_cqm_threshold < threshold_limit) {
-               unsigned int steal_limit;
-
-               /*
-                * Don't spin if nobody is actively waiting for an RMID,
-                * the rotation worker will be kicked as soon as an
-                * event needs an RMID anyway.
-                */
-               if (!nr_needed)
-                       break;
-
-               /* Allow max 25% of RMIDs to be in limbo. */
-               steal_limit = (cqm_max_rmid + 1) / 4;
-
-               /*
-                * We failed to stabilize any RMIDs so our rotation
-                * logic is now stuck. In order to make forward progress
-                * we have a few options:
-                *
-                *   1. rotate ("steal") another RMID
-                *   2. increase the threshold
-                *   3. do nothing
-                *
-                * We do both of 1. and 2. until we hit the steal limit.
-                *
-                * The steal limit prevents all RMIDs ending up on the
-                * limbo list. This can happen if every RMID has a
-                * non-zero occupancy above threshold_limit, and the
-                * occupancy values aren't dropping fast enough.
-                *
-                * Note that there is prioritisation at work here - we'd
-                * rather increase the number of RMIDs on the limbo list
-                * than increase the threshold, because increasing the
-                * threshold skews the event data (because we reuse
-                * dirty RMIDs) - threshold bumps are a last resort.
-                */
-               if (nr_available < steal_limit)
-                       goto again;
-
-               __intel_cqm_threshold++;
-       }
-
-out:
-       mutex_unlock(&cache_mutex);
-       return rotated;
-}
-
-static void intel_cqm_rmid_rotate(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
-
-static struct pmu intel_cqm_pmu;
-
-static void intel_cqm_rmid_rotate(struct work_struct *work)
-{
-       unsigned long delay;
-
-       __intel_cqm_rmid_rotate();
-
-       delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
-       schedule_delayed_work(&intel_cqm_rmid_work, delay);
-}
-
-/*
- * Find a group and setup RMID.
- *
- * If we're part of a group, we use the group's RMID.
- */
-static void intel_cqm_setup_event(struct perf_event *event,
-                                 struct perf_event **group)
-{
-       struct perf_event *iter;
-       bool conflict = false;
-       u32 rmid;
-
-       list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
-               rmid = iter->hw.cqm_rmid;
-
-               if (__match_event(iter, event)) {
-                       /* All tasks in a group share an RMID */
-                       event->hw.cqm_rmid = rmid;
-                       *group = iter;
-                       return;
-               }
-
-               /*
-                * We only care about conflicts for events that are
-                * actually scheduled in (and hence have a valid RMID).
-                */
-               if (__conflict_event(iter, event) && __rmid_valid(rmid))
-                       conflict = true;
-       }
-
-       if (conflict)
-               rmid = INVALID_RMID;
-       else
-               rmid = __get_rmid();
-
-       event->hw.cqm_rmid = rmid;
-}
-
-static void intel_cqm_event_read(struct perf_event *event)
-{
-       unsigned long flags;
-       u32 rmid;
-       u64 val;
-
-       /*
-        * Task events are handled by intel_cqm_event_count().
-        */
-       if (event->cpu == -1)
-               return;
-
-       raw_spin_lock_irqsave(&cache_lock, flags);
-       rmid = event->hw.cqm_rmid;
-
-       if (!__rmid_valid(rmid))
-               goto out;
-
-       val = __rmid_read(rmid);
-
-       /*
-        * Ignore this reading on error states and do not update the value.
-        */
-       if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
-               goto out;
-
-       local64_set(&event->count, val);
-out:
-       raw_spin_unlock_irqrestore(&cache_lock, flags);
-}
-
-static void __intel_cqm_event_count(void *info)
-{
-       struct rmid_read *rr = info;
-       u64 val;
-
-       val = __rmid_read(rr->rmid);
-
-       if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
-               return;
-
-       atomic64_add(val, &rr->value);
-}
-
-static inline bool cqm_group_leader(struct perf_event *event)
-{
-       return !list_empty(&event->hw.cqm_groups_entry);
-}
-
-static u64 intel_cqm_event_count(struct perf_event *event)
-{
-       unsigned long flags;
-       struct rmid_read rr = {
-               .value = ATOMIC64_INIT(0),
-       };
-
-       /*
-        * We only need to worry about task events. System-wide events
-        * are handled like usual, i.e. entirely with
-        * intel_cqm_event_read().
-        */
-       if (event->cpu != -1)
-               return __perf_event_count(event);
-
-       /*
-        * Only the group leader gets to report values. This stops us
-        * reporting duplicate values to userspace, and gives us a clear
-        * rule for which task gets to report the values.
-        *
-        * Note that it is impossible to attribute these values to
-        * specific packages - we forfeit that ability when we create
-        * task events.
-        */
-       if (!cqm_group_leader(event))
-               return 0;
-
-       /*
-        * Getting up-to-date values requires an SMP IPI which is not
-        * possible if we're being called in interrupt context. Return
-        * the cached values instead.
-        */
-       if (unlikely(in_interrupt()))
-               goto out;
-
-       /*
-        * Notice that we don't perform the reading of an RMID
-        * atomically, because we can't hold a spin lock across the
-        * IPIs.
-        *
-        * Speculatively perform the read, since @event might be
-        * assigned a different (possibly invalid) RMID while we're
-        * busying performing the IPI calls. It's therefore necessary to
-        * check @event's RMID afterwards, and if it has changed,
-        * discard the result of the read.
-        */
-       rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
-
-       if (!__rmid_valid(rr.rmid))
-               goto out;
-
-       on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
-
-       raw_spin_lock_irqsave(&cache_lock, flags);
-       if (event->hw.cqm_rmid == rr.rmid)
-               local64_set(&event->count, atomic64_read(&rr.value));
-       raw_spin_unlock_irqrestore(&cache_lock, flags);
-out:
-       return __perf_event_count(event);
-}
-
-static void intel_cqm_event_start(struct perf_event *event, int mode)
-{
-       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-       u32 rmid = event->hw.cqm_rmid;
-
-       if (!(event->hw.cqm_state & PERF_HES_STOPPED))
-               return;
-
-       event->hw.cqm_state &= ~PERF_HES_STOPPED;
-
-       if (state->rmid_usecnt++) {
-               if (!WARN_ON_ONCE(state->rmid != rmid))
-                       return;
-       } else {
-               WARN_ON_ONCE(state->rmid);
-       }
-
-       state->rmid = rmid;
-       wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
-}
-
-static void intel_cqm_event_stop(struct perf_event *event, int mode)
-{
-       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-
-       if (event->hw.cqm_state & PERF_HES_STOPPED)
-               return;
-
-       event->hw.cqm_state |= PERF_HES_STOPPED;
-
-       intel_cqm_event_read(event);
-
-       if (!--state->rmid_usecnt) {
-               state->rmid = 0;
-               wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
-       } else {
-               WARN_ON_ONCE(!state->rmid);
-       }
-}
-
-static int intel_cqm_event_add(struct perf_event *event, int mode)
-{
-       unsigned long flags;
-       u32 rmid;
-
-       raw_spin_lock_irqsave(&cache_lock, flags);
-
-       event->hw.cqm_state = PERF_HES_STOPPED;
-       rmid = event->hw.cqm_rmid;
-
-       if (__rmid_valid(rmid) && (mode & PERF_EF_START))
-               intel_cqm_event_start(event, mode);
-
-       raw_spin_unlock_irqrestore(&cache_lock, flags);
-
-       return 0;
-}
-
-static void intel_cqm_event_destroy(struct perf_event *event)
-{
-       struct perf_event *group_other = NULL;
-
-       mutex_lock(&cache_mutex);
-
-       /*
-        * If there's another event in this group...
-        */
-       if (!list_empty(&event->hw.cqm_group_entry)) {
-               group_other = list_first_entry(&event->hw.cqm_group_entry,
-                                              struct perf_event,
-                                              hw.cqm_group_entry);
-               list_del(&event->hw.cqm_group_entry);
-       }
-
-       /*
-        * And we're the group leader..
-        */
-       if (cqm_group_leader(event)) {
-               /*
-                * If there was a group_other, make that leader, otherwise
-                * destroy the group and return the RMID.
-                */
-               if (group_other) {
-                       list_replace(&event->hw.cqm_groups_entry,
-                                    &group_other->hw.cqm_groups_entry);
-               } else {
-                       u32 rmid = event->hw.cqm_rmid;
-
-                       if (__rmid_valid(rmid))
-                               __put_rmid(rmid);
-                       list_del(&event->hw.cqm_groups_entry);
-               }
-       }
-
-       mutex_unlock(&cache_mutex);
-}
-
-static int intel_cqm_event_init(struct perf_event *event)
-{
-       struct perf_event *group = NULL;
-       bool rotate = false;
-
-       if (event->attr.type != intel_cqm_pmu.type)
-               return -ENOENT;
-
-       if (event->attr.config & ~QOS_EVENT_MASK)
-               return -EINVAL;
-
-       /* unsupported modes and filters */
-       if (event->attr.exclude_user   ||
-           event->attr.exclude_kernel ||
-           event->attr.exclude_hv     ||
-           event->attr.exclude_idle   ||
-           event->attr.exclude_host   ||
-           event->attr.exclude_guest  ||
-           event->attr.sample_period) /* no sampling */
-               return -EINVAL;
-
-       INIT_LIST_HEAD(&event->hw.cqm_group_entry);
-       INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
-
-       event->destroy = intel_cqm_event_destroy;
-
-       mutex_lock(&cache_mutex);
-
-       /* Will also set rmid */
-       intel_cqm_setup_event(event, &group);
-
-       if (group) {
-               list_add_tail(&event->hw.cqm_group_entry,
-                             &group->hw.cqm_group_entry);
-       } else {
-               list_add_tail(&event->hw.cqm_groups_entry,
-                             &cache_groups);
-
-               /*
-                * All RMIDs are either in use or have recently been
-                * used. Kick the rotation worker to clean/free some.
-                *
-                * We only do this for the group leader, rather than for
-                * every event in a group to save on needless work.
-                */
-               if (!__rmid_valid(event->hw.cqm_rmid))
-                       rotate = true;
-       }
-
-       mutex_unlock(&cache_mutex);
-
-       if (rotate)
-               schedule_delayed_work(&intel_cqm_rmid_work, 0);
-
-       return 0;
-}
-
-EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01");
-EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1");
-EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
-EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
-EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
-
-static struct attribute *intel_cqm_events_attr[] = {
-       EVENT_PTR(intel_cqm_llc),
-       EVENT_PTR(intel_cqm_llc_pkg),
-       EVENT_PTR(intel_cqm_llc_unit),
-       EVENT_PTR(intel_cqm_llc_scale),
-       EVENT_PTR(intel_cqm_llc_snapshot),
-       NULL,
-};
-
-static struct attribute_group intel_cqm_events_group = {
-       .name = "events",
-       .attrs = intel_cqm_events_attr,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-7");
-static struct attribute *intel_cqm_formats_attr[] = {
-       &format_attr_event.attr,
-       NULL,
-};
-
-static struct attribute_group intel_cqm_format_group = {
-       .name = "format",
-       .attrs = intel_cqm_formats_attr,
-};
-
-static ssize_t
-max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
-                          char *page)
-{
-       ssize_t rv;
-
-       mutex_lock(&cache_mutex);
-       rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
-       mutex_unlock(&cache_mutex);
-
-       return rv;
-}
-
-static ssize_t
-max_recycle_threshold_store(struct device *dev,
-                           struct device_attribute *attr,
-                           const char *buf, size_t count)
-{
-       unsigned int bytes, cachelines;
-       int ret;
-
-       ret = kstrtouint(buf, 0, &bytes);
-       if (ret)
-               return ret;
-
-       mutex_lock(&cache_mutex);
-
-       __intel_cqm_max_threshold = bytes;
-       cachelines = bytes / cqm_l3_scale;
-
-       /*
-        * The new maximum takes effect immediately.
-        */
-       if (__intel_cqm_threshold > cachelines)
-               __intel_cqm_threshold = cachelines;
-
-       mutex_unlock(&cache_mutex);
-
-       return count;
-}
-
-static DEVICE_ATTR_RW(max_recycle_threshold);
-
-static struct attribute *intel_cqm_attrs[] = {
-       &dev_attr_max_recycle_threshold.attr,
-       NULL,
-};
-
-static const struct attribute_group intel_cqm_group = {
-       .attrs = intel_cqm_attrs,
-};
-
-static const struct attribute_group *intel_cqm_attr_groups[] = {
-       &intel_cqm_events_group,
-       &intel_cqm_format_group,
-       &intel_cqm_group,
-       NULL,
-};
-
-static struct pmu intel_cqm_pmu = {
-       .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
-       .attr_groups         = intel_cqm_attr_groups,
-       .task_ctx_nr         = perf_sw_context,
-       .event_init          = intel_cqm_event_init,
-       .add                 = intel_cqm_event_add,
-       .del                 = intel_cqm_event_stop,
-       .start               = intel_cqm_event_start,
-       .stop                = intel_cqm_event_stop,
-       .read                = intel_cqm_event_read,
-       .count               = intel_cqm_event_count,
-};
-
-static inline void cqm_pick_event_reader(int cpu)
-{
-       int phys_id = topology_physical_package_id(cpu);
-       int i;
-
-       for_each_cpu(i, &cqm_cpumask) {
-               if (phys_id == topology_physical_package_id(i))
-                       return; /* already got reader for this socket */
-       }
-
-       cpumask_set_cpu(cpu, &cqm_cpumask);
-}
-
-static void intel_cqm_cpu_starting(unsigned int cpu)
-{
-       struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
-       struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-       state->rmid = 0;
-       state->closid = 0;
-       state->rmid_usecnt = 0;
-
-       WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
-       WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
-}
-
-static void intel_cqm_cpu_exit(unsigned int cpu)
-{
-       int phys_id = topology_physical_package_id(cpu);
-       int i;
-
-       /*
-        * Is @cpu a designated cqm reader?
-        */
-       if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
-               return;
-
-       for_each_online_cpu(i) {
-               if (i == cpu)
-                       continue;
-
-               if (phys_id == topology_physical_package_id(i)) {
-                       cpumask_set_cpu(i, &cqm_cpumask);
-                       break;
-               }
-       }
-}
-
-static int intel_cqm_cpu_notifier(struct notifier_block *nb,
-                                 unsigned long action, void *hcpu)
-{
-       unsigned int cpu  = (unsigned long)hcpu;
-
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_DOWN_PREPARE:
-               intel_cqm_cpu_exit(cpu);
-               break;
-       case CPU_STARTING:
-               intel_cqm_cpu_starting(cpu);
-               cqm_pick_event_reader(cpu);
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-static const struct x86_cpu_id intel_cqm_match[] = {
-       { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC },
-       {}
-};
-
-static int __init intel_cqm_init(void)
-{
-       char *str, scale[20];
-       int i, cpu, ret;
-
-       if (!x86_match_cpu(intel_cqm_match))
-               return -ENODEV;
-
-       cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
-
-       /*
-        * It's possible that not all resources support the same number
-        * of RMIDs. Instead of making scheduling much more complicated
-        * (where we have to match a task's RMID to a cpu that supports
-        * that many RMIDs) just find the minimum RMIDs supported across
-        * all cpus.
-        *
-        * Also, check that the scales match on all cpus.
-        */
-       cpu_notifier_register_begin();
-
-       for_each_online_cpu(cpu) {
-               struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-               if (c->x86_cache_max_rmid < cqm_max_rmid)
-                       cqm_max_rmid = c->x86_cache_max_rmid;
-
-               if (c->x86_cache_occ_scale != cqm_l3_scale) {
-                       pr_err("Multiple LLC scale values, disabling\n");
-                       ret = -EINVAL;
-                       goto out;
-               }
-       }
-
-       /*
-        * A reasonable upper limit on the max threshold is the number
-        * of lines tagged per RMID if all RMIDs have the same number of
-        * lines tagged in the LLC.
-        *
-        * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
-        */
-       __intel_cqm_max_threshold =
-               boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
-
-       snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
-       str = kstrdup(scale, GFP_KERNEL);
-       if (!str) {
-               ret = -ENOMEM;
-               goto out;
-       }
-
-       event_attr_intel_cqm_llc_scale.event_str = str;
-
-       ret = intel_cqm_setup_rmid_cache();
-       if (ret)
-               goto out;
-
-       for_each_online_cpu(i) {
-               intel_cqm_cpu_starting(i);
-               cqm_pick_event_reader(i);
-       }
-
-       __perf_cpu_notifier(intel_cqm_cpu_notifier);
-
-       ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
-       if (ret)
-               pr_err("Intel CQM perf registration failed: %d\n", ret);
-       else
-               pr_info("Intel CQM monitoring enabled\n");
-
-out:
-       cpu_notifier_register_done();
-
-       return ret;
-}
-device_initcall(intel_cqm_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/kernel/cpu/perf_event_intel_cstate.c
deleted file mode 100644 (file)
index 75a38b5..0000000
+++ /dev/null
@@ -1,694 +0,0 @@
-/*
- * perf_event_intel_cstate.c: support cstate residency counters
- *
- * Copyright (C) 2015, Intel Corp.
- * Author: Kan Liang (kan.liang@intel.com)
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Library General Public License for more details.
- *
- */
-
-/*
- * This file export cstate related free running (read-only) counters
- * for perf. These counters may be use simultaneously by other tools,
- * such as turbostat. However, it still make sense to implement them
- * in perf. Because we can conveniently collect them together with
- * other events, and allow to use them from tools without special MSR
- * access code.
- *
- * The events only support system-wide mode counting. There is no
- * sampling support because it is not supported by the hardware.
- *
- * According to counters' scope and category, two PMUs are registered
- * with the perf_event core subsystem.
- *  - 'cstate_core': The counter is available for each physical core.
- *    The counters include CORE_C*_RESIDENCY.
- *  - 'cstate_pkg': The counter is available for each physical package.
- *    The counters include PKG_C*_RESIDENCY.
- *
- * All of these counters are specified in the Intel® 64 and IA-32
- * Architectures Software Developer.s Manual Vol3b.
- *
- * Model specific counters:
- *     MSR_CORE_C1_RES: CORE C1 Residency Counter
- *                      perf code: 0x00
- *                      Available model: SLM,AMT
- *                      Scope: Core (each processor core has a MSR)
- *     MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
- *                            perf code: 0x01
- *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
- *                            Scope: Core
- *     MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
- *                            perf code: 0x02
- *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
- *                            Scope: Core
- *     MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
- *                            perf code: 0x03
- *                            Available model: SNB,IVB,HSW,BDW,SKL
- *                            Scope: Core
- *     MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
- *                            perf code: 0x00
- *                            Available model: SNB,IVB,HSW,BDW,SKL
- *                            Scope: Package (physical package)
- *     MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
- *                            perf code: 0x01
- *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
- *                            Scope: Package (physical package)
- *     MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
- *                            perf code: 0x02
- *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
- *                            Scope: Package (physical package)
- *     MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
- *                            perf code: 0x03
- *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
- *                            Scope: Package (physical package)
- *     MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
- *                            perf code: 0x04
- *                            Available model: HSW ULT only
- *                            Scope: Package (physical package)
- *     MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
- *                            perf code: 0x05
- *                            Available model: HSW ULT only
- *                            Scope: Package (physical package)
- *     MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
- *                            perf code: 0x06
- *                            Available model: HSW ULT only
- *                            Scope: Package (physical package)
- *
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/perf_event.h>
-#include <asm/cpu_device_id.h>
-#include "perf_event.h"
-
-#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)                \
-static ssize_t __cstate_##_var##_show(struct kobject *kobj,    \
-                               struct kobj_attribute *attr,    \
-                               char *page)                     \
-{                                                              \
-       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);             \
-       return sprintf(page, _format "\n");                     \
-}                                                              \
-static struct kobj_attribute format_attr_##_var =              \
-       __ATTR(_name, 0444, __cstate_##_var##_show, NULL)
-
-static ssize_t cstate_get_attr_cpumask(struct device *dev,
-                                      struct device_attribute *attr,
-                                      char *buf);
-
-struct perf_cstate_msr {
-       u64     msr;
-       struct  perf_pmu_events_attr *attr;
-       bool    (*test)(int idx);
-};
-
-
-/* cstate_core PMU */
-
-static struct pmu cstate_core_pmu;
-static bool has_cstate_core;
-
-enum perf_cstate_core_id {
-       /*
-        * cstate_core events
-        */
-       PERF_CSTATE_CORE_C1_RES = 0,
-       PERF_CSTATE_CORE_C3_RES,
-       PERF_CSTATE_CORE_C6_RES,
-       PERF_CSTATE_CORE_C7_RES,
-
-       PERF_CSTATE_CORE_EVENT_MAX,
-};
-
-bool test_core(int idx)
-{
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-           boot_cpu_data.x86 != 6)
-               return false;
-
-       switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-               if (idx == PERF_CSTATE_CORE_C3_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES)
-                       return true;
-               break;
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               if (idx == PERF_CSTATE_CORE_C3_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES ||
-                   idx == PERF_CSTATE_CORE_C7_RES)
-                       return true;
-               break;
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-       case 76: /* 14nm Atom "Airmont"                   */
-               if (idx == PERF_CSTATE_CORE_C1_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES)
-                       return true;
-               break;
-       }
-
-       return false;
-}
-
-PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
-
-static struct perf_cstate_msr core_msr[] = {
-       [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,          &evattr_cstate_core_c1, test_core, },
-       [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,    &evattr_cstate_core_c3, test_core, },
-       [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,    &evattr_cstate_core_c6, test_core, },
-       [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,    &evattr_cstate_core_c7, test_core, },
-};
-
-static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
-       NULL,
-};
-
-static struct attribute_group core_events_attr_group = {
-       .name = "events",
-       .attrs = core_events_attrs,
-};
-
-DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
-static struct attribute *core_format_attrs[] = {
-       &format_attr_core_event.attr,
-       NULL,
-};
-
-static struct attribute_group core_format_attr_group = {
-       .name = "format",
-       .attrs = core_format_attrs,
-};
-
-static cpumask_t cstate_core_cpu_mask;
-static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
-
-static struct attribute *cstate_cpumask_attrs[] = {
-       &dev_attr_cpumask.attr,
-       NULL,
-};
-
-static struct attribute_group cpumask_attr_group = {
-       .attrs = cstate_cpumask_attrs,
-};
-
-static const struct attribute_group *core_attr_groups[] = {
-       &core_events_attr_group,
-       &core_format_attr_group,
-       &cpumask_attr_group,
-       NULL,
-};
-
-/* cstate_core PMU end */
-
-
-/* cstate_pkg PMU */
-
-static struct pmu cstate_pkg_pmu;
-static bool has_cstate_pkg;
-
-enum perf_cstate_pkg_id {
-       /*
-        * cstate_pkg events
-        */
-       PERF_CSTATE_PKG_C2_RES = 0,
-       PERF_CSTATE_PKG_C3_RES,
-       PERF_CSTATE_PKG_C6_RES,
-       PERF_CSTATE_PKG_C7_RES,
-       PERF_CSTATE_PKG_C8_RES,
-       PERF_CSTATE_PKG_C9_RES,
-       PERF_CSTATE_PKG_C10_RES,
-
-       PERF_CSTATE_PKG_EVENT_MAX,
-};
-
-bool test_pkg(int idx)
-{
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-           boot_cpu_data.x86 != 6)
-               return false;
-
-       switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-               if (idx == PERF_CSTATE_CORE_C3_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES ||
-                   idx == PERF_CSTATE_CORE_C7_RES)
-                       return true;
-               break;
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               if (idx == PERF_CSTATE_PKG_C2_RES ||
-                   idx == PERF_CSTATE_PKG_C3_RES ||
-                   idx == PERF_CSTATE_PKG_C6_RES ||
-                   idx == PERF_CSTATE_PKG_C7_RES)
-                       return true;
-               break;
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-       case 76: /* 14nm Atom "Airmont"                   */
-               if (idx == PERF_CSTATE_CORE_C6_RES)
-                       return true;
-               break;
-       case 69: /* 22nm Haswell ULT */
-               if (idx == PERF_CSTATE_PKG_C2_RES ||
-                   idx == PERF_CSTATE_PKG_C3_RES ||
-                   idx == PERF_CSTATE_PKG_C6_RES ||
-                   idx == PERF_CSTATE_PKG_C7_RES ||
-                   idx == PERF_CSTATE_PKG_C8_RES ||
-                   idx == PERF_CSTATE_PKG_C9_RES ||
-                   idx == PERF_CSTATE_PKG_C10_RES)
-                       return true;
-               break;
-       }
-
-       return false;
-}
-
-PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
-PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
-PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
-PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
-
-static struct perf_cstate_msr pkg_msr[] = {
-       [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,      &evattr_cstate_pkg_c2,  test_pkg, },
-       [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,      &evattr_cstate_pkg_c3,  test_pkg, },
-       [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,      &evattr_cstate_pkg_c6,  test_pkg, },
-       [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,      &evattr_cstate_pkg_c7,  test_pkg, },
-       [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,      &evattr_cstate_pkg_c8,  test_pkg, },
-       [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,      &evattr_cstate_pkg_c9,  test_pkg, },
-       [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,    &evattr_cstate_pkg_c10, test_pkg, },
-};
-
-static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
-       NULL,
-};
-
-static struct attribute_group pkg_events_attr_group = {
-       .name = "events",
-       .attrs = pkg_events_attrs,
-};
-
-DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
-static struct attribute *pkg_format_attrs[] = {
-       &format_attr_pkg_event.attr,
-       NULL,
-};
-static struct attribute_group pkg_format_attr_group = {
-       .name = "format",
-       .attrs = pkg_format_attrs,
-};
-
-static cpumask_t cstate_pkg_cpu_mask;
-
-static const struct attribute_group *pkg_attr_groups[] = {
-       &pkg_events_attr_group,
-       &pkg_format_attr_group,
-       &cpumask_attr_group,
-       NULL,
-};
-
-/* cstate_pkg PMU end*/
-
-static ssize_t cstate_get_attr_cpumask(struct device *dev,
-                                      struct device_attribute *attr,
-                                      char *buf)
-{
-       struct pmu *pmu = dev_get_drvdata(dev);
-
-       if (pmu == &cstate_core_pmu)
-               return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
-       else if (pmu == &cstate_pkg_pmu)
-               return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
-       else
-               return 0;
-}
-
-static int cstate_pmu_event_init(struct perf_event *event)
-{
-       u64 cfg = event->attr.config;
-       int ret = 0;
-
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
-       /* unsupported modes and filters */
-       if (event->attr.exclude_user   ||
-           event->attr.exclude_kernel ||
-           event->attr.exclude_hv     ||
-           event->attr.exclude_idle   ||
-           event->attr.exclude_host   ||
-           event->attr.exclude_guest  ||
-           event->attr.sample_period) /* no sampling */
-               return -EINVAL;
-
-       if (event->pmu == &cstate_core_pmu) {
-               if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
-                       return -EINVAL;
-               if (!core_msr[cfg].attr)
-                       return -EINVAL;
-               event->hw.event_base = core_msr[cfg].msr;
-       } else if (event->pmu == &cstate_pkg_pmu) {
-               if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
-                       return -EINVAL;
-               if (!pkg_msr[cfg].attr)
-                       return -EINVAL;
-               event->hw.event_base = pkg_msr[cfg].msr;
-       } else
-               return -ENOENT;
-
-       /* must be done before validate_group */
-       event->hw.config = cfg;
-       event->hw.idx = -1;
-
-       return ret;
-}
-
-static inline u64 cstate_pmu_read_counter(struct perf_event *event)
-{
-       u64 val;
-
-       rdmsrl(event->hw.event_base, val);
-       return val;
-}
-
-static void cstate_pmu_event_update(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 prev_raw_count, new_raw_count;
-
-again:
-       prev_raw_count = local64_read(&hwc->prev_count);
-       new_raw_count = cstate_pmu_read_counter(event);
-
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                           new_raw_count) != prev_raw_count)
-               goto again;
-
-       local64_add(new_raw_count - prev_raw_count, &event->count);
-}
-
-static void cstate_pmu_event_start(struct perf_event *event, int mode)
-{
-       local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
-}
-
-static void cstate_pmu_event_stop(struct perf_event *event, int mode)
-{
-       cstate_pmu_event_update(event);
-}
-
-static void cstate_pmu_event_del(struct perf_event *event, int mode)
-{
-       cstate_pmu_event_stop(event, PERF_EF_UPDATE);
-}
-
-static int cstate_pmu_event_add(struct perf_event *event, int mode)
-{
-       if (mode & PERF_EF_START)
-               cstate_pmu_event_start(event, mode);
-
-       return 0;
-}
-
-static void cstate_cpu_exit(int cpu)
-{
-       int i, id, target;
-
-       /* cpu exit for cstate core */
-       if (has_cstate_core) {
-               id = topology_core_id(cpu);
-               target = -1;
-
-               for_each_online_cpu(i) {
-                       if (i == cpu)
-                               continue;
-                       if (id == topology_core_id(i)) {
-                               target = i;
-                               break;
-                       }
-               }
-               if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
-                       cpumask_set_cpu(target, &cstate_core_cpu_mask);
-               WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
-               if (target >= 0)
-                       perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
-       }
-
-       /* cpu exit for cstate pkg */
-       if (has_cstate_pkg) {
-               id = topology_physical_package_id(cpu);
-               target = -1;
-
-               for_each_online_cpu(i) {
-                       if (i == cpu)
-                               continue;
-                       if (id == topology_physical_package_id(i)) {
-                               target = i;
-                               break;
-                       }
-               }
-               if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
-                       cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
-               WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
-               if (target >= 0)
-                       perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
-       }
-}
-
-static void cstate_cpu_init(int cpu)
-{
-       int i, id;
-
-       /* cpu init for cstate core */
-       if (has_cstate_core) {
-               id = topology_core_id(cpu);
-               for_each_cpu(i, &cstate_core_cpu_mask) {
-                       if (id == topology_core_id(i))
-                               break;
-               }
-               if (i >= nr_cpu_ids)
-                       cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
-       }
-
-       /* cpu init for cstate pkg */
-       if (has_cstate_pkg) {
-               id = topology_physical_package_id(cpu);
-               for_each_cpu(i, &cstate_pkg_cpu_mask) {
-                       if (id == topology_physical_package_id(i))
-                               break;
-               }
-               if (i >= nr_cpu_ids)
-                       cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
-       }
-}
-
-static int cstate_cpu_notifier(struct notifier_block *self,
-                                 unsigned long action, void *hcpu)
-{
-       unsigned int cpu = (long)hcpu;
-
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               break;
-       case CPU_STARTING:
-               cstate_cpu_init(cpu);
-               break;
-       case CPU_UP_CANCELED:
-       case CPU_DYING:
-               break;
-       case CPU_ONLINE:
-       case CPU_DEAD:
-               break;
-       case CPU_DOWN_PREPARE:
-               cstate_cpu_exit(cpu);
-               break;
-       default:
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-/*
- * Probe the cstate events and insert the available one into sysfs attrs
- * Return false if there is no available events.
- */
-static bool cstate_probe_msr(struct perf_cstate_msr *msr,
-                            struct attribute   **events_attrs,
-                            int max_event_nr)
-{
-       int i, j = 0;
-       u64 val;
-
-       /* Probe the cstate events. */
-       for (i = 0; i < max_event_nr; i++) {
-               if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
-                       msr[i].attr = NULL;
-       }
-
-       /* List remaining events in the sysfs attrs. */
-       for (i = 0; i < max_event_nr; i++) {
-               if (msr[i].attr)
-                       events_attrs[j++] = &msr[i].attr->attr.attr;
-       }
-       events_attrs[j] = NULL;
-
-       return (j > 0) ? true : false;
-}
-
-static int __init cstate_init(void)
-{
-       /* SLM has different MSR for PKG C6 */
-       switch (boot_cpu_data.x86_model) {
-       case 55:
-       case 76:
-       case 77:
-               pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
-       }
-
-       if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
-               has_cstate_core = true;
-
-       if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
-               has_cstate_pkg = true;
-
-       return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
-}
-
-static void __init cstate_cpumask_init(void)
-{
-       int cpu;
-
-       cpu_notifier_register_begin();
-
-       for_each_online_cpu(cpu)
-               cstate_cpu_init(cpu);
-
-       __perf_cpu_notifier(cstate_cpu_notifier);
-
-       cpu_notifier_register_done();
-}
-
-static struct pmu cstate_core_pmu = {
-       .attr_groups    = core_attr_groups,
-       .name           = "cstate_core",
-       .task_ctx_nr    = perf_invalid_context,
-       .event_init     = cstate_pmu_event_init,
-       .add            = cstate_pmu_event_add, /* must have */
-       .del            = cstate_pmu_event_del, /* must have */
-       .start          = cstate_pmu_event_start,
-       .stop           = cstate_pmu_event_stop,
-       .read           = cstate_pmu_event_update,
-       .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
-};
-
-static struct pmu cstate_pkg_pmu = {
-       .attr_groups    = pkg_attr_groups,
-       .name           = "cstate_pkg",
-       .task_ctx_nr    = perf_invalid_context,
-       .event_init     = cstate_pmu_event_init,
-       .add            = cstate_pmu_event_add, /* must have */
-       .del            = cstate_pmu_event_del, /* must have */
-       .start          = cstate_pmu_event_start,
-       .stop           = cstate_pmu_event_stop,
-       .read           = cstate_pmu_event_update,
-       .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
-};
-
-static void __init cstate_pmus_register(void)
-{
-       int err;
-
-       if (has_cstate_core) {
-               err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
-               if (WARN_ON(err))
-                       pr_info("Failed to register PMU %s error %d\n",
-                               cstate_core_pmu.name, err);
-       }
-
-       if (has_cstate_pkg) {
-               err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
-               if (WARN_ON(err))
-                       pr_info("Failed to register PMU %s error %d\n",
-                               cstate_pkg_pmu.name, err);
-       }
-}
-
-static int __init cstate_pmu_init(void)
-{
-       int err;
-
-       if (cpu_has_hypervisor)
-               return -ENODEV;
-
-       err = cstate_init();
-       if (err)
-               return err;
-
-       cstate_cpumask_init();
-
-       cstate_pmus_register();
-
-       return 0;
-}
-
-device_initcall(cstate_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
deleted file mode 100644 (file)
index 10602f0..0000000
+++ /dev/null
@@ -1,1368 +0,0 @@
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-
-#include <asm/perf_event.h>
-#include <asm/insn.h>
-
-#include "perf_event.h"
-
-/* The size of a BTS record in bytes: */
-#define BTS_RECORD_SIZE                24
-
-#define BTS_BUFFER_SIZE                (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE       (PAGE_SIZE << 4)
-#define PEBS_FIXUP_SIZE                PAGE_SIZE
-
-/*
- * pebs_record_32 for p4 and core not supported
-
-struct pebs_record_32 {
-       u32 flags, ip;
-       u32 ax, bc, cx, dx;
-       u32 si, di, bp, sp;
-};
-
- */
-
-union intel_x86_pebs_dse {
-       u64 val;
-       struct {
-               unsigned int ld_dse:4;
-               unsigned int ld_stlb_miss:1;
-               unsigned int ld_locked:1;
-               unsigned int ld_reserved:26;
-       };
-       struct {
-               unsigned int st_l1d_hit:1;
-               unsigned int st_reserved1:3;
-               unsigned int st_stlb_miss:1;
-               unsigned int st_locked:1;
-               unsigned int st_reserved2:26;
-       };
-};
-
-
-/*
- * Map PEBS Load Latency Data Source encodings to generic
- * memory data source information
- */
-#define P(a, b) PERF_MEM_S(a, b)
-#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
-#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
-
-static const u64 pebs_data_source[] = {
-       P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
-       OP_LH | P(LVL, L1)  | P(SNOOP, NONE),   /* 0x01: L1 local */
-       OP_LH | P(LVL, LFB) | P(SNOOP, NONE),   /* 0x02: LFB hit */
-       OP_LH | P(LVL, L2)  | P(SNOOP, NONE),   /* 0x03: L2 hit */
-       OP_LH | P(LVL, L3)  | P(SNOOP, NONE),   /* 0x04: L3 hit */
-       OP_LH | P(LVL, L3)  | P(SNOOP, MISS),   /* 0x05: L3 hit, snoop miss */
-       OP_LH | P(LVL, L3)  | P(SNOOP, HIT),    /* 0x06: L3 hit, snoop hit */
-       OP_LH | P(LVL, L3)  | P(SNOOP, HITM),   /* 0x07: L3 hit, snoop hitm */
-       OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
-       OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
-       OP_LH | P(LVL, LOC_RAM)  | P(SNOOP, HIT),  /* 0x0a: L3 miss, shared */
-       OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
-       OP_LH | P(LVL, LOC_RAM)  | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
-       OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
-       OP_LH | P(LVL, IO)  | P(SNOOP, NONE), /* 0x0e: I/O */
-       OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
-};
-
-static u64 precise_store_data(u64 status)
-{
-       union intel_x86_pebs_dse dse;
-       u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
-
-       dse.val = status;
-
-       /*
-        * bit 4: TLB access
-        * 1 = stored missed 2nd level TLB
-        *
-        * so it either hit the walker or the OS
-        * otherwise hit 2nd level TLB
-        */
-       if (dse.st_stlb_miss)
-               val |= P(TLB, MISS);
-       else
-               val |= P(TLB, HIT);
-
-       /*
-        * bit 0: hit L1 data cache
-        * if not set, then all we know is that
-        * it missed L1D
-        */
-       if (dse.st_l1d_hit)
-               val |= P(LVL, HIT);
-       else
-               val |= P(LVL, MISS);
-
-       /*
-        * bit 5: Locked prefix
-        */
-       if (dse.st_locked)
-               val |= P(LOCK, LOCKED);
-
-       return val;
-}
-
-static u64 precise_datala_hsw(struct perf_event *event, u64 status)
-{
-       union perf_mem_data_src dse;
-
-       dse.val = PERF_MEM_NA;
-
-       if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
-               dse.mem_op = PERF_MEM_OP_STORE;
-       else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
-               dse.mem_op = PERF_MEM_OP_LOAD;
-
-       /*
-        * L1 info only valid for following events:
-        *
-        * MEM_UOPS_RETIRED.STLB_MISS_STORES
-        * MEM_UOPS_RETIRED.LOCK_STORES
-        * MEM_UOPS_RETIRED.SPLIT_STORES
-        * MEM_UOPS_RETIRED.ALL_STORES
-        */
-       if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
-               if (status & 1)
-                       dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
-               else
-                       dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
-       }
-       return dse.val;
-}
-
-static u64 load_latency_data(u64 status)
-{
-       union intel_x86_pebs_dse dse;
-       u64 val;
-       int model = boot_cpu_data.x86_model;
-       int fam = boot_cpu_data.x86;
-
-       dse.val = status;
-
-       /*
-        * use the mapping table for bit 0-3
-        */
-       val = pebs_data_source[dse.ld_dse];
-
-       /*
-        * Nehalem models do not support TLB, Lock infos
-        */
-       if (fam == 0x6 && (model == 26 || model == 30
-           || model == 31 || model == 46)) {
-               val |= P(TLB, NA) | P(LOCK, NA);
-               return val;
-       }
-       /*
-        * bit 4: TLB access
-        * 0 = did not miss 2nd level TLB
-        * 1 = missed 2nd level TLB
-        */
-       if (dse.ld_stlb_miss)
-               val |= P(TLB, MISS) | P(TLB, L2);
-       else
-               val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
-
-       /*
-        * bit 5: locked prefix
-        */
-       if (dse.ld_locked)
-               val |= P(LOCK, LOCKED);
-
-       return val;
-}
-
-struct pebs_record_core {
-       u64 flags, ip;
-       u64 ax, bx, cx, dx;
-       u64 si, di, bp, sp;
-       u64 r8,  r9,  r10, r11;
-       u64 r12, r13, r14, r15;
-};
-
-struct pebs_record_nhm {
-       u64 flags, ip;
-       u64 ax, bx, cx, dx;
-       u64 si, di, bp, sp;
-       u64 r8,  r9,  r10, r11;
-       u64 r12, r13, r14, r15;
-       u64 status, dla, dse, lat;
-};
-
-/*
- * Same as pebs_record_nhm, with two additional fields.
- */
-struct pebs_record_hsw {
-       u64 flags, ip;
-       u64 ax, bx, cx, dx;
-       u64 si, di, bp, sp;
-       u64 r8,  r9,  r10, r11;
-       u64 r12, r13, r14, r15;
-       u64 status, dla, dse, lat;
-       u64 real_ip, tsx_tuning;
-};
-
-union hsw_tsx_tuning {
-       struct {
-               u32 cycles_last_block     : 32,
-                   hle_abort             : 1,
-                   rtm_abort             : 1,
-                   instruction_abort     : 1,
-                   non_instruction_abort : 1,
-                   retry                 : 1,
-                   data_conflict         : 1,
-                   capacity_writes       : 1,
-                   capacity_reads        : 1;
-       };
-       u64         value;
-};
-
-#define PEBS_HSW_TSX_FLAGS     0xff00000000ULL
-
-/* Same as HSW, plus TSC */
-
-struct pebs_record_skl {
-       u64 flags, ip;
-       u64 ax, bx, cx, dx;
-       u64 si, di, bp, sp;
-       u64 r8,  r9,  r10, r11;
-       u64 r12, r13, r14, r15;
-       u64 status, dla, dse, lat;
-       u64 real_ip, tsx_tuning;
-       u64 tsc;
-};
-
-void init_debug_store_on_cpu(int cpu)
-{
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-       if (!ds)
-               return;
-
-       wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
-                    (u32)((u64)(unsigned long)ds),
-                    (u32)((u64)(unsigned long)ds >> 32));
-}
-
-void fini_debug_store_on_cpu(int cpu)
-{
-       if (!per_cpu(cpu_hw_events, cpu).ds)
-               return;
-
-       wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
-}
-
-static DEFINE_PER_CPU(void *, insn_buffer);
-
-static int alloc_pebs_buffer(int cpu)
-{
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-       int node = cpu_to_node(cpu);
-       int max;
-       void *buffer, *ibuffer;
-
-       if (!x86_pmu.pebs)
-               return 0;
-
-       buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
-       if (unlikely(!buffer))
-               return -ENOMEM;
-
-       /*
-        * HSW+ already provides us the eventing ip; no need to allocate this
-        * buffer then.
-        */
-       if (x86_pmu.intel_cap.pebs_format < 2) {
-               ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
-               if (!ibuffer) {
-                       kfree(buffer);
-                       return -ENOMEM;
-               }
-               per_cpu(insn_buffer, cpu) = ibuffer;
-       }
-
-       max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
-
-       ds->pebs_buffer_base = (u64)(unsigned long)buffer;
-       ds->pebs_index = ds->pebs_buffer_base;
-       ds->pebs_absolute_maximum = ds->pebs_buffer_base +
-               max * x86_pmu.pebs_record_size;
-
-       return 0;
-}
-
-static void release_pebs_buffer(int cpu)
-{
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-       if (!ds || !x86_pmu.pebs)
-               return;
-
-       kfree(per_cpu(insn_buffer, cpu));
-       per_cpu(insn_buffer, cpu) = NULL;
-
-       kfree((void *)(unsigned long)ds->pebs_buffer_base);
-       ds->pebs_buffer_base = 0;
-}
-
-static int alloc_bts_buffer(int cpu)
-{
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-       int node = cpu_to_node(cpu);
-       int max, thresh;
-       void *buffer;
-
-       if (!x86_pmu.bts)
-               return 0;
-
-       buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
-       if (unlikely(!buffer)) {
-               WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
-               return -ENOMEM;
-       }
-
-       max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
-       thresh = max / 16;
-
-       ds->bts_buffer_base = (u64)(unsigned long)buffer;
-       ds->bts_index = ds->bts_buffer_base;
-       ds->bts_absolute_maximum = ds->bts_buffer_base +
-               max * BTS_RECORD_SIZE;
-       ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
-               thresh * BTS_RECORD_SIZE;
-
-       return 0;
-}
-
-static void release_bts_buffer(int cpu)
-{
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-       if (!ds || !x86_pmu.bts)
-               return;
-
-       kfree((void *)(unsigned long)ds->bts_buffer_base);
-       ds->bts_buffer_base = 0;
-}
-
-static int alloc_ds_buffer(int cpu)
-{
-       int node = cpu_to_node(cpu);
-       struct debug_store *ds;
-
-       ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
-       if (unlikely(!ds))
-               return -ENOMEM;
-
-       per_cpu(cpu_hw_events, cpu).ds = ds;
-
-       return 0;
-}
-
-static void release_ds_buffer(int cpu)
-{
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-       if (!ds)
-               return;
-
-       per_cpu(cpu_hw_events, cpu).ds = NULL;
-       kfree(ds);
-}
-
-void release_ds_buffers(void)
-{
-       int cpu;
-
-       if (!x86_pmu.bts && !x86_pmu.pebs)
-               return;
-
-       get_online_cpus();
-       for_each_online_cpu(cpu)
-               fini_debug_store_on_cpu(cpu);
-
-       for_each_possible_cpu(cpu) {
-               release_pebs_buffer(cpu);
-               release_bts_buffer(cpu);
-               release_ds_buffer(cpu);
-       }
-       put_online_cpus();
-}
-
-void reserve_ds_buffers(void)
-{
-       int bts_err = 0, pebs_err = 0;
-       int cpu;
-
-       x86_pmu.bts_active = 0;
-       x86_pmu.pebs_active = 0;
-
-       if (!x86_pmu.bts && !x86_pmu.pebs)
-               return;
-
-       if (!x86_pmu.bts)
-               bts_err = 1;
-
-       if (!x86_pmu.pebs)
-               pebs_err = 1;
-
-       get_online_cpus();
-
-       for_each_possible_cpu(cpu) {
-               if (alloc_ds_buffer(cpu)) {
-                       bts_err = 1;
-                       pebs_err = 1;
-               }
-
-               if (!bts_err && alloc_bts_buffer(cpu))
-                       bts_err = 1;
-
-               if (!pebs_err && alloc_pebs_buffer(cpu))
-                       pebs_err = 1;
-
-               if (bts_err && pebs_err)
-                       break;
-       }
-
-       if (bts_err) {
-               for_each_possible_cpu(cpu)
-                       release_bts_buffer(cpu);
-       }
-
-       if (pebs_err) {
-               for_each_possible_cpu(cpu)
-                       release_pebs_buffer(cpu);
-       }
-
-       if (bts_err && pebs_err) {
-               for_each_possible_cpu(cpu)
-                       release_ds_buffer(cpu);
-       } else {
-               if (x86_pmu.bts && !bts_err)
-                       x86_pmu.bts_active = 1;
-
-               if (x86_pmu.pebs && !pebs_err)
-                       x86_pmu.pebs_active = 1;
-
-               for_each_online_cpu(cpu)
-                       init_debug_store_on_cpu(cpu);
-       }
-
-       put_online_cpus();
-}
-
-/*
- * BTS
- */
-
-struct event_constraint bts_constraint =
-       EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
-
-void intel_pmu_enable_bts(u64 config)
-{
-       unsigned long debugctlmsr;
-
-       debugctlmsr = get_debugctlmsr();
-
-       debugctlmsr |= DEBUGCTLMSR_TR;
-       debugctlmsr |= DEBUGCTLMSR_BTS;
-       if (config & ARCH_PERFMON_EVENTSEL_INT)
-               debugctlmsr |= DEBUGCTLMSR_BTINT;
-
-       if (!(config & ARCH_PERFMON_EVENTSEL_OS))
-               debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
-
-       if (!(config & ARCH_PERFMON_EVENTSEL_USR))
-               debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
-
-       update_debugctlmsr(debugctlmsr);
-}
-
-void intel_pmu_disable_bts(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       unsigned long debugctlmsr;
-
-       if (!cpuc->ds)
-               return;
-
-       debugctlmsr = get_debugctlmsr();
-
-       debugctlmsr &=
-               ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
-                 DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
-
-       update_debugctlmsr(debugctlmsr);
-}
-
-int intel_pmu_drain_bts_buffer(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct debug_store *ds = cpuc->ds;
-       struct bts_record {
-               u64     from;
-               u64     to;
-               u64     flags;
-       };
-       struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
-       struct bts_record *at, *base, *top;
-       struct perf_output_handle handle;
-       struct perf_event_header header;
-       struct perf_sample_data data;
-       unsigned long skip = 0;
-       struct pt_regs regs;
-
-       if (!event)
-               return 0;
-
-       if (!x86_pmu.bts_active)
-               return 0;
-
-       base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
-       top  = (struct bts_record *)(unsigned long)ds->bts_index;
-
-       if (top <= base)
-               return 0;
-
-       memset(&regs, 0, sizeof(regs));
-
-       ds->bts_index = ds->bts_buffer_base;
-
-       perf_sample_data_init(&data, 0, event->hw.last_period);
-
-       /*
-        * BTS leaks kernel addresses in branches across the cpl boundary,
-        * such as traps or system calls, so unless the user is asking for
-        * kernel tracing (and right now it's not possible), we'd need to
-        * filter them out. But first we need to count how many of those we
-        * have in the current batch. This is an extra O(n) pass, however,
-        * it's much faster than the other one especially considering that
-        * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
-        * alloc_bts_buffer()).
-        */
-       for (at = base; at < top; at++) {
-               /*
-                * Note that right now *this* BTS code only works if
-                * attr::exclude_kernel is set, but let's keep this extra
-                * check here in case that changes.
-                */
-               if (event->attr.exclude_kernel &&
-                   (kernel_ip(at->from) || kernel_ip(at->to)))
-                       skip++;
-       }
-
-       /*
-        * Prepare a generic sample, i.e. fill in the invariant fields.
-        * We will overwrite the from and to address before we output
-        * the sample.
-        */
-       perf_prepare_sample(&header, &data, event, &regs);
-
-       if (perf_output_begin(&handle, event, header.size *
-                             (top - base - skip)))
-               return 1;
-
-       for (at = base; at < top; at++) {
-               /* Filter out any records that contain kernel addresses. */
-               if (event->attr.exclude_kernel &&
-                   (kernel_ip(at->from) || kernel_ip(at->to)))
-                       continue;
-
-               data.ip         = at->from;
-               data.addr       = at->to;
-
-               perf_output_sample(&handle, &header, &data, event);
-       }
-
-       perf_output_end(&handle);
-
-       /* There's new data available. */
-       event->hw.interrupts++;
-       event->pending_kill = POLL_IN;
-       return 1;
-}
-
-static inline void intel_pmu_drain_pebs_buffer(void)
-{
-       struct pt_regs regs;
-
-       x86_pmu.drain_pebs(&regs);
-}
-
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
-       if (!sched_in)
-               intel_pmu_drain_pebs_buffer();
-}
-
-/*
- * PEBS
- */
-struct event_constraint intel_core2_pebs_event_constraints[] = {
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
-       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_atom_pebs_event_constraints[] = {
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
-       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
-       /* Allow all events as PEBS with no flags */
-       INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_slm_pebs_event_constraints[] = {
-       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
-       /* Allow all events as PEBS with no flags */
-       INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_nehalem_pebs_event_constraints[] = {
-       INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
-       INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
-       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_westmere_pebs_event_constraints[] = {
-       INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
-       INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
-       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_snb_pebs_event_constraints[] = {
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
-       INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
-       INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
-       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
-        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
-        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
-        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-       /* Allow all events as PEBS with no flags */
-       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_ivb_pebs_event_constraints[] = {
-        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
-        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
-       INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
-       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
-       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
-       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-       /* Allow all events as PEBS with no flags */
-       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
-        EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_hsw_pebs_event_constraints[] = {
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
-       INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
-       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
-       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
-       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
-       /* Allow all events as PEBS with no flags */
-       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_skl_pebs_event_constraints[] = {
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),      /* INST_RETIRED.PREC_DIST */
-       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
-       /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
-       INTEL_PLD_CONSTRAINT(0x1cd, 0xf),                     /* MEM_TRANS_RETIRED.* */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
-       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
-       /* Allow all events as PEBS with no flags */
-       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint *intel_pebs_constraints(struct perf_event *event)
-{
-       struct event_constraint *c;
-
-       if (!event->attr.precise_ip)
-               return NULL;
-
-       if (x86_pmu.pebs_constraints) {
-               for_each_event_constraint(c, x86_pmu.pebs_constraints) {
-                       if ((event->hw.config & c->cmask) == c->code) {
-                               event->hw.flags |= c->flags;
-                               return c;
-                       }
-               }
-       }
-
-       return &emptyconstraint;
-}
-
-static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
-{
-       return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
-}
-
-void intel_pmu_pebs_enable(struct perf_event *event)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct hw_perf_event *hwc = &event->hw;
-       struct debug_store *ds = cpuc->ds;
-       bool first_pebs;
-       u64 threshold;
-
-       hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
-
-       first_pebs = !pebs_is_enabled(cpuc);
-       cpuc->pebs_enabled |= 1ULL << hwc->idx;
-
-       if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
-               cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
-       else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
-               cpuc->pebs_enabled |= 1ULL << 63;
-
-       /*
-        * When the event is constrained enough we can use a larger
-        * threshold and run the event with less frequent PMI.
-        */
-       if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
-               threshold = ds->pebs_absolute_maximum -
-                       x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
-
-               if (first_pebs)
-                       perf_sched_cb_inc(event->ctx->pmu);
-       } else {
-               threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
-
-               /*
-                * If not all events can use larger buffer,
-                * roll back to threshold = 1
-                */
-               if (!first_pebs &&
-                   (ds->pebs_interrupt_threshold > threshold))
-                       perf_sched_cb_dec(event->ctx->pmu);
-       }
-
-       /* Use auto-reload if possible to save a MSR write in the PMI */
-       if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
-               ds->pebs_event_reset[hwc->idx] =
-                       (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
-       }
-
-       if (first_pebs || ds->pebs_interrupt_threshold > threshold)
-               ds->pebs_interrupt_threshold = threshold;
-}
-
-void intel_pmu_pebs_disable(struct perf_event *event)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct hw_perf_event *hwc = &event->hw;
-       struct debug_store *ds = cpuc->ds;
-       bool large_pebs = ds->pebs_interrupt_threshold >
-               ds->pebs_buffer_base + x86_pmu.pebs_record_size;
-
-       if (large_pebs)
-               intel_pmu_drain_pebs_buffer();
-
-       cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
-
-       if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
-               cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
-       else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
-               cpuc->pebs_enabled &= ~(1ULL << 63);
-
-       if (large_pebs && !pebs_is_enabled(cpuc))
-               perf_sched_cb_dec(event->ctx->pmu);
-
-       if (cpuc->enabled)
-               wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
-
-       hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
-}
-
-void intel_pmu_pebs_enable_all(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (cpuc->pebs_enabled)
-               wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
-}
-
-void intel_pmu_pebs_disable_all(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (cpuc->pebs_enabled)
-               wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
-}
-
-static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       unsigned long from = cpuc->lbr_entries[0].from;
-       unsigned long old_to, to = cpuc->lbr_entries[0].to;
-       unsigned long ip = regs->ip;
-       int is_64bit = 0;
-       void *kaddr;
-       int size;
-
-       /*
-        * We don't need to fixup if the PEBS assist is fault like
-        */
-       if (!x86_pmu.intel_cap.pebs_trap)
-               return 1;
-
-       /*
-        * No LBR entry, no basic block, no rewinding
-        */
-       if (!cpuc->lbr_stack.nr || !from || !to)
-               return 0;
-
-       /*
-        * Basic blocks should never cross user/kernel boundaries
-        */
-       if (kernel_ip(ip) != kernel_ip(to))
-               return 0;
-
-       /*
-        * unsigned math, either ip is before the start (impossible) or
-        * the basic block is larger than 1 page (sanity)
-        */
-       if ((ip - to) > PEBS_FIXUP_SIZE)
-               return 0;
-
-       /*
-        * We sampled a branch insn, rewind using the LBR stack
-        */
-       if (ip == to) {
-               set_linear_ip(regs, from);
-               return 1;
-       }
-
-       size = ip - to;
-       if (!kernel_ip(ip)) {
-               int bytes;
-               u8 *buf = this_cpu_read(insn_buffer);
-
-               /* 'size' must fit our buffer, see above */
-               bytes = copy_from_user_nmi(buf, (void __user *)to, size);
-               if (bytes != 0)
-                       return 0;
-
-               kaddr = buf;
-       } else {
-               kaddr = (void *)to;
-       }
-
-       do {
-               struct insn insn;
-
-               old_to = to;
-
-#ifdef CONFIG_X86_64
-               is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
-#endif
-               insn_init(&insn, kaddr, size, is_64bit);
-               insn_get_length(&insn);
-               /*
-                * Make sure there was not a problem decoding the
-                * instruction and getting the length.  This is
-                * doubly important because we have an infinite
-                * loop if insn.length=0.
-                */
-               if (!insn.length)
-                       break;
-
-               to += insn.length;
-               kaddr += insn.length;
-               size -= insn.length;
-       } while (to < ip);
-
-       if (to == ip) {
-               set_linear_ip(regs, old_to);
-               return 1;
-       }
-
-       /*
-        * Even though we decoded the basic block, the instruction stream
-        * never matched the given IP, either the TO or the IP got corrupted.
-        */
-       return 0;
-}
-
-static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
-{
-       if (pebs->tsx_tuning) {
-               union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
-               return tsx.cycles_last_block;
-       }
-       return 0;
-}
-
-static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
-{
-       u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
-
-       /* For RTM XABORTs also log the abort code from AX */
-       if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
-               txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
-       return txn;
-}
-
-static void setup_pebs_sample_data(struct perf_event *event,
-                                  struct pt_regs *iregs, void *__pebs,
-                                  struct perf_sample_data *data,
-                                  struct pt_regs *regs)
-{
-#define PERF_X86_EVENT_PEBS_HSW_PREC \
-               (PERF_X86_EVENT_PEBS_ST_HSW | \
-                PERF_X86_EVENT_PEBS_LD_HSW | \
-                PERF_X86_EVENT_PEBS_NA_HSW)
-       /*
-        * We cast to the biggest pebs_record but are careful not to
-        * unconditionally access the 'extra' entries.
-        */
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct pebs_record_skl *pebs = __pebs;
-       u64 sample_type;
-       int fll, fst, dsrc;
-       int fl = event->hw.flags;
-
-       if (pebs == NULL)
-               return;
-
-       sample_type = event->attr.sample_type;
-       dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
-
-       fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
-       fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
-
-       perf_sample_data_init(data, 0, event->hw.last_period);
-
-       data->period = event->hw.last_period;
-
-       /*
-        * Use latency for weight (only avail with PEBS-LL)
-        */
-       if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
-               data->weight = pebs->lat;
-
-       /*
-        * data.data_src encodes the data source
-        */
-       if (dsrc) {
-               u64 val = PERF_MEM_NA;
-               if (fll)
-                       val = load_latency_data(pebs->dse);
-               else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
-                       val = precise_datala_hsw(event, pebs->dse);
-               else if (fst)
-                       val = precise_store_data(pebs->dse);
-               data->data_src.val = val;
-       }
-
-       /*
-        * We use the interrupt regs as a base because the PEBS record
-        * does not contain a full regs set, specifically it seems to
-        * lack segment descriptors, which get used by things like
-        * user_mode().
-        *
-        * In the simple case fix up only the IP and BP,SP regs, for
-        * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
-        * A possible PERF_SAMPLE_REGS will have to transfer all regs.
-        */
-       *regs = *iregs;
-       regs->flags = pebs->flags;
-       set_linear_ip(regs, pebs->ip);
-       regs->bp = pebs->bp;
-       regs->sp = pebs->sp;
-
-       if (sample_type & PERF_SAMPLE_REGS_INTR) {
-               regs->ax = pebs->ax;
-               regs->bx = pebs->bx;
-               regs->cx = pebs->cx;
-               regs->dx = pebs->dx;
-               regs->si = pebs->si;
-               regs->di = pebs->di;
-               regs->bp = pebs->bp;
-               regs->sp = pebs->sp;
-
-               regs->flags = pebs->flags;
-#ifndef CONFIG_X86_32
-               regs->r8 = pebs->r8;
-               regs->r9 = pebs->r9;
-               regs->r10 = pebs->r10;
-               regs->r11 = pebs->r11;
-               regs->r12 = pebs->r12;
-               regs->r13 = pebs->r13;
-               regs->r14 = pebs->r14;
-               regs->r15 = pebs->r15;
-#endif
-       }
-
-       if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
-               regs->ip = pebs->real_ip;
-               regs->flags |= PERF_EFLAGS_EXACT;
-       } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
-               regs->flags |= PERF_EFLAGS_EXACT;
-       else
-               regs->flags &= ~PERF_EFLAGS_EXACT;
-
-       if ((sample_type & PERF_SAMPLE_ADDR) &&
-           x86_pmu.intel_cap.pebs_format >= 1)
-               data->addr = pebs->dla;
-
-       if (x86_pmu.intel_cap.pebs_format >= 2) {
-               /* Only set the TSX weight when no memory weight. */
-               if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
-                       data->weight = intel_hsw_weight(pebs);
-
-               if (sample_type & PERF_SAMPLE_TRANSACTION)
-                       data->txn = intel_hsw_transaction(pebs);
-       }
-
-       /*
-        * v3 supplies an accurate time stamp, so we use that
-        * for the time stamp.
-        *
-        * We can only do this for the default trace clock.
-        */
-       if (x86_pmu.intel_cap.pebs_format >= 3 &&
-               event->attr.use_clockid == 0)
-               data->time = native_sched_clock_from_tsc(pebs->tsc);
-
-       if (has_branch_stack(event))
-               data->br_stack = &cpuc->lbr_stack;
-}
-
-static inline void *
-get_next_pebs_record_by_bit(void *base, void *top, int bit)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       void *at;
-       u64 pebs_status;
-
-       /*
-        * fmt0 does not have a status bitfield (does not use
-        * perf_record_nhm format)
-        */
-       if (x86_pmu.intel_cap.pebs_format < 1)
-               return base;
-
-       if (base == NULL)
-               return NULL;
-
-       for (at = base; at < top; at += x86_pmu.pebs_record_size) {
-               struct pebs_record_nhm *p = at;
-
-               if (test_bit(bit, (unsigned long *)&p->status)) {
-                       /* PEBS v3 has accurate status bits */
-                       if (x86_pmu.intel_cap.pebs_format >= 3)
-                               return at;
-
-                       if (p->status == (1 << bit))
-                               return at;
-
-                       /* clear non-PEBS bit and re-check */
-                       pebs_status = p->status & cpuc->pebs_enabled;
-                       pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
-                       if (pebs_status == (1 << bit))
-                               return at;
-               }
-       }
-       return NULL;
-}
-
-static void __intel_pmu_pebs_event(struct perf_event *event,
-                                  struct pt_regs *iregs,
-                                  void *base, void *top,
-                                  int bit, int count)
-{
-       struct perf_sample_data data;
-       struct pt_regs regs;
-       void *at = get_next_pebs_record_by_bit(base, top, bit);
-
-       if (!intel_pmu_save_and_restart(event) &&
-           !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
-               return;
-
-       while (count > 1) {
-               setup_pebs_sample_data(event, iregs, at, &data, &regs);
-               perf_event_output(event, &data, &regs);
-               at += x86_pmu.pebs_record_size;
-               at = get_next_pebs_record_by_bit(at, top, bit);
-               count--;
-       }
-
-       setup_pebs_sample_data(event, iregs, at, &data, &regs);
-
-       /*
-        * All but the last records are processed.
-        * The last one is left to be able to call the overflow handler.
-        */
-       if (perf_event_overflow(event, &data, &regs)) {
-               x86_pmu_stop(event, 0);
-               return;
-       }
-
-}
-
-static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct debug_store *ds = cpuc->ds;
-       struct perf_event *event = cpuc->events[0]; /* PMC0 only */
-       struct pebs_record_core *at, *top;
-       int n;
-
-       if (!x86_pmu.pebs_active)
-               return;
-
-       at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
-       top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
-
-       /*
-        * Whatever else happens, drain the thing
-        */
-       ds->pebs_index = ds->pebs_buffer_base;
-
-       if (!test_bit(0, cpuc->active_mask))
-               return;
-
-       WARN_ON_ONCE(!event);
-
-       if (!event->attr.precise_ip)
-               return;
-
-       n = top - at;
-       if (n <= 0)
-               return;
-
-       __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
-}
-
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct debug_store *ds = cpuc->ds;
-       struct perf_event *event;
-       void *base, *at, *top;
-       short counts[MAX_PEBS_EVENTS] = {};
-       short error[MAX_PEBS_EVENTS] = {};
-       int bit, i;
-
-       if (!x86_pmu.pebs_active)
-               return;
-
-       base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
-       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
-
-       ds->pebs_index = ds->pebs_buffer_base;
-
-       if (unlikely(base >= top))
-               return;
-
-       for (at = base; at < top; at += x86_pmu.pebs_record_size) {
-               struct pebs_record_nhm *p = at;
-               u64 pebs_status;
-
-               /* PEBS v3 has accurate status bits */
-               if (x86_pmu.intel_cap.pebs_format >= 3) {
-                       for_each_set_bit(bit, (unsigned long *)&p->status,
-                                        MAX_PEBS_EVENTS)
-                               counts[bit]++;
-
-                       continue;
-               }
-
-               pebs_status = p->status & cpuc->pebs_enabled;
-               pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
-
-               /*
-                * On some CPUs the PEBS status can be zero when PEBS is
-                * racing with clearing of GLOBAL_STATUS.
-                *
-                * Normally we would drop that record, but in the
-                * case when there is only a single active PEBS event
-                * we can assume it's for that event.
-                */
-               if (!pebs_status && cpuc->pebs_enabled &&
-                       !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
-                       pebs_status = cpuc->pebs_enabled;
-
-               bit = find_first_bit((unsigned long *)&pebs_status,
-                                       x86_pmu.max_pebs_events);
-               if (bit >= x86_pmu.max_pebs_events)
-                       continue;
-
-               /*
-                * The PEBS hardware does not deal well with the situation
-                * when events happen near to each other and multiple bits
-                * are set. But it should happen rarely.
-                *
-                * If these events include one PEBS and multiple non-PEBS
-                * events, it doesn't impact PEBS record. The record will
-                * be handled normally. (slow path)
-                *
-                * If these events include two or more PEBS events, the
-                * records for the events can be collapsed into a single
-                * one, and it's not possible to reconstruct all events
-                * that caused the PEBS record. It's called collision.
-                * If collision happened, the record will be dropped.
-                */
-               if (p->status != (1ULL << bit)) {
-                       for_each_set_bit(i, (unsigned long *)&pebs_status,
-                                        x86_pmu.max_pebs_events)
-                               error[i]++;
-                       continue;
-               }
-
-               counts[bit]++;
-       }
-
-       for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
-               if ((counts[bit] == 0) && (error[bit] == 0))
-                       continue;
-
-               event = cpuc->events[bit];
-               WARN_ON_ONCE(!event);
-               WARN_ON_ONCE(!event->attr.precise_ip);
-
-               /* log dropped samples number */
-               if (error[bit])
-                       perf_log_lost_samples(event, error[bit]);
-
-               if (counts[bit]) {
-                       __intel_pmu_pebs_event(event, iregs, base,
-                                              top, bit, counts[bit]);
-               }
-       }
-}
-
-/*
- * BTS, PEBS probe and setup
- */
-
-void __init intel_ds_init(void)
-{
-       /*
-        * No support for 32bit formats
-        */
-       if (!boot_cpu_has(X86_FEATURE_DTES64))
-               return;
-
-       x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
-       x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
-       if (x86_pmu.pebs) {
-               char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
-               int format = x86_pmu.intel_cap.pebs_format;
-
-               switch (format) {
-               case 0:
-                       printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
-                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
-                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
-                       break;
-
-               case 1:
-                       printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
-                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
-                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
-                       break;
-
-               case 2:
-                       pr_cont("PEBS fmt2%c, ", pebs_type);
-                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
-                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
-                       break;
-
-               case 3:
-                       pr_cont("PEBS fmt3%c, ", pebs_type);
-                       x86_pmu.pebs_record_size =
-                                               sizeof(struct pebs_record_skl);
-                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
-                       x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
-                       break;
-
-               default:
-                       printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
-                       x86_pmu.pebs = 0;
-               }
-       }
-}
-
-void perf_restore_debug_store(void)
-{
-       struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
-
-       if (!x86_pmu.bts && !x86_pmu.pebs)
-               return;
-
-       wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
-}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
deleted file mode 100644 (file)
index 653f88d..0000000
+++ /dev/null
@@ -1,1062 +0,0 @@
-#include <linux/perf_event.h>
-#include <linux/types.h>
-
-#include <asm/perf_event.h>
-#include <asm/msr.h>
-#include <asm/insn.h>
-
-#include "perf_event.h"
-
-enum {
-       LBR_FORMAT_32           = 0x00,
-       LBR_FORMAT_LIP          = 0x01,
-       LBR_FORMAT_EIP          = 0x02,
-       LBR_FORMAT_EIP_FLAGS    = 0x03,
-       LBR_FORMAT_EIP_FLAGS2   = 0x04,
-       LBR_FORMAT_INFO         = 0x05,
-       LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_INFO,
-};
-
-static enum {
-       LBR_EIP_FLAGS           = 1,
-       LBR_TSX                 = 2,
-} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
-       [LBR_FORMAT_EIP_FLAGS]  = LBR_EIP_FLAGS,
-       [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
-};
-
-/*
- * Intel LBR_SELECT bits
- * Intel Vol3a, April 2011, Section 16.7 Table 16-10
- *
- * Hardware branch filter (not available on all CPUs)
- */
-#define LBR_KERNEL_BIT         0 /* do not capture at ring0 */
-#define LBR_USER_BIT           1 /* do not capture at ring > 0 */
-#define LBR_JCC_BIT            2 /* do not capture conditional branches */
-#define LBR_REL_CALL_BIT       3 /* do not capture relative calls */
-#define LBR_IND_CALL_BIT       4 /* do not capture indirect calls */
-#define LBR_RETURN_BIT         5 /* do not capture near returns */
-#define LBR_IND_JMP_BIT                6 /* do not capture indirect jumps */
-#define LBR_REL_JMP_BIT                7 /* do not capture relative jumps */
-#define LBR_FAR_BIT            8 /* do not capture far branches */
-#define LBR_CALL_STACK_BIT     9 /* enable call stack */
-
-/*
- * Following bit only exists in Linux; we mask it out before writing it to
- * the actual MSR. But it helps the constraint perf code to understand
- * that this is a separate configuration.
- */
-#define LBR_NO_INFO_BIT               63 /* don't read LBR_INFO. */
-
-#define LBR_KERNEL     (1 << LBR_KERNEL_BIT)
-#define LBR_USER       (1 << LBR_USER_BIT)
-#define LBR_JCC                (1 << LBR_JCC_BIT)
-#define LBR_REL_CALL   (1 << LBR_REL_CALL_BIT)
-#define LBR_IND_CALL   (1 << LBR_IND_CALL_BIT)
-#define LBR_RETURN     (1 << LBR_RETURN_BIT)
-#define LBR_REL_JMP    (1 << LBR_REL_JMP_BIT)
-#define LBR_IND_JMP    (1 << LBR_IND_JMP_BIT)
-#define LBR_FAR                (1 << LBR_FAR_BIT)
-#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
-#define LBR_NO_INFO    (1ULL << LBR_NO_INFO_BIT)
-
-#define LBR_PLM (LBR_KERNEL | LBR_USER)
-
-#define LBR_SEL_MASK   0x1ff   /* valid bits in LBR_SELECT */
-#define LBR_NOT_SUPP   -1      /* LBR filter not supported */
-#define LBR_IGN                0       /* ignored */
-
-#define LBR_ANY                 \
-       (LBR_JCC        |\
-        LBR_REL_CALL   |\
-        LBR_IND_CALL   |\
-        LBR_RETURN     |\
-        LBR_REL_JMP    |\
-        LBR_IND_JMP    |\
-        LBR_FAR)
-
-#define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
-#define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
-#define LBR_FROM_FLAG_ABORT    (1ULL << 61)
-
-/*
- * x86control flow change classification
- * x86control flow changes include branches, interrupts, traps, faults
- */
-enum {
-       X86_BR_NONE             = 0,      /* unknown */
-
-       X86_BR_USER             = 1 << 0, /* branch target is user */
-       X86_BR_KERNEL           = 1 << 1, /* branch target is kernel */
-
-       X86_BR_CALL             = 1 << 2, /* call */
-       X86_BR_RET              = 1 << 3, /* return */
-       X86_BR_SYSCALL          = 1 << 4, /* syscall */
-       X86_BR_SYSRET           = 1 << 5, /* syscall return */
-       X86_BR_INT              = 1 << 6, /* sw interrupt */
-       X86_BR_IRET             = 1 << 7, /* return from interrupt */
-       X86_BR_JCC              = 1 << 8, /* conditional */
-       X86_BR_JMP              = 1 << 9, /* jump */
-       X86_BR_IRQ              = 1 << 10,/* hw interrupt or trap or fault */
-       X86_BR_IND_CALL         = 1 << 11,/* indirect calls */
-       X86_BR_ABORT            = 1 << 12,/* transaction abort */
-       X86_BR_IN_TX            = 1 << 13,/* in transaction */
-       X86_BR_NO_TX            = 1 << 14,/* not in transaction */
-       X86_BR_ZERO_CALL        = 1 << 15,/* zero length call */
-       X86_BR_CALL_STACK       = 1 << 16,/* call stack */
-       X86_BR_IND_JMP          = 1 << 17,/* indirect jump */
-};
-
-#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
-#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
-
-#define X86_BR_ANY       \
-       (X86_BR_CALL    |\
-        X86_BR_RET     |\
-        X86_BR_SYSCALL |\
-        X86_BR_SYSRET  |\
-        X86_BR_INT     |\
-        X86_BR_IRET    |\
-        X86_BR_JCC     |\
-        X86_BR_JMP      |\
-        X86_BR_IRQ      |\
-        X86_BR_ABORT    |\
-        X86_BR_IND_CALL |\
-        X86_BR_IND_JMP  |\
-        X86_BR_ZERO_CALL)
-
-#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
-
-#define X86_BR_ANY_CALL                 \
-       (X86_BR_CALL            |\
-        X86_BR_IND_CALL        |\
-        X86_BR_ZERO_CALL       |\
-        X86_BR_SYSCALL         |\
-        X86_BR_IRQ             |\
-        X86_BR_INT)
-
-static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
-
-/*
- * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
- * otherwise it becomes near impossible to get a reliable stack.
- */
-
-static void __intel_pmu_lbr_enable(bool pmi)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       u64 debugctl, lbr_select = 0, orig_debugctl;
-
-       /*
-        * No need to unfreeze manually, as v4 can do that as part
-        * of the GLOBAL_STATUS ack.
-        */
-       if (pmi && x86_pmu.version >= 4)
-               return;
-
-       /*
-        * No need to reprogram LBR_SELECT in a PMI, as it
-        * did not change.
-        */
-       if (cpuc->lbr_sel)
-               lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
-       if (!pmi && cpuc->lbr_sel)
-               wrmsrl(MSR_LBR_SELECT, lbr_select);
-
-       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-       orig_debugctl = debugctl;
-       debugctl |= DEBUGCTLMSR_LBR;
-       /*
-        * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
-        * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
-        * may cause superfluous increase/decrease of LBR_TOS.
-        */
-       if (!(lbr_select & LBR_CALL_STACK))
-               debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
-       if (orig_debugctl != debugctl)
-               wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-}
-
-static void __intel_pmu_lbr_disable(void)
-{
-       u64 debugctl;
-
-       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-       debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
-       wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-}
-
-static void intel_pmu_lbr_reset_32(void)
-{
-       int i;
-
-       for (i = 0; i < x86_pmu.lbr_nr; i++)
-               wrmsrl(x86_pmu.lbr_from + i, 0);
-}
-
-static void intel_pmu_lbr_reset_64(void)
-{
-       int i;
-
-       for (i = 0; i < x86_pmu.lbr_nr; i++) {
-               wrmsrl(x86_pmu.lbr_from + i, 0);
-               wrmsrl(x86_pmu.lbr_to   + i, 0);
-               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-                       wrmsrl(MSR_LBR_INFO_0 + i, 0);
-       }
-}
-
-void intel_pmu_lbr_reset(void)
-{
-       if (!x86_pmu.lbr_nr)
-               return;
-
-       if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
-               intel_pmu_lbr_reset_32();
-       else
-               intel_pmu_lbr_reset_64();
-}
-
-/*
- * TOS = most recently recorded branch
- */
-static inline u64 intel_pmu_lbr_tos(void)
-{
-       u64 tos;
-
-       rdmsrl(x86_pmu.lbr_tos, tos);
-       return tos;
-}
-
-enum {
-       LBR_NONE,
-       LBR_VALID,
-};
-
-static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
-{
-       int i;
-       unsigned lbr_idx, mask;
-       u64 tos;
-
-       if (task_ctx->lbr_callstack_users == 0 ||
-           task_ctx->lbr_stack_state == LBR_NONE) {
-               intel_pmu_lbr_reset();
-               return;
-       }
-
-       mask = x86_pmu.lbr_nr - 1;
-       tos = task_ctx->tos;
-       for (i = 0; i < tos; i++) {
-               lbr_idx = (tos - i) & mask;
-               wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
-               wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
-               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-                       wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
-       }
-       wrmsrl(x86_pmu.lbr_tos, tos);
-       task_ctx->lbr_stack_state = LBR_NONE;
-}
-
-static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
-{
-       int i;
-       unsigned lbr_idx, mask;
-       u64 tos;
-
-       if (task_ctx->lbr_callstack_users == 0) {
-               task_ctx->lbr_stack_state = LBR_NONE;
-               return;
-       }
-
-       mask = x86_pmu.lbr_nr - 1;
-       tos = intel_pmu_lbr_tos();
-       for (i = 0; i < tos; i++) {
-               lbr_idx = (tos - i) & mask;
-               rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
-               rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
-               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-                       rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
-       }
-       task_ctx->tos = tos;
-       task_ctx->lbr_stack_state = LBR_VALID;
-}
-
-void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct x86_perf_task_context *task_ctx;
-
-       /*
-        * If LBR callstack feature is enabled and the stack was saved when
-        * the task was scheduled out, restore the stack. Otherwise flush
-        * the LBR stack.
-        */
-       task_ctx = ctx ? ctx->task_ctx_data : NULL;
-       if (task_ctx) {
-               if (sched_in) {
-                       __intel_pmu_lbr_restore(task_ctx);
-                       cpuc->lbr_context = ctx;
-               } else {
-                       __intel_pmu_lbr_save(task_ctx);
-               }
-               return;
-       }
-
-       /*
-        * When sampling the branck stack in system-wide, it may be
-        * necessary to flush the stack on context switch. This happens
-        * when the branch stack does not tag its entries with the pid
-        * of the current task. Otherwise it becomes impossible to
-        * associate a branch entry with a task. This ambiguity is more
-        * likely to appear when the branch stack supports priv level
-        * filtering and the user sets it to monitor only at the user
-        * level (which could be a useful measurement in system-wide
-        * mode). In that case, the risk is high of having a branch
-        * stack with branch from multiple tasks.
-        */
-       if (sched_in) {
-               intel_pmu_lbr_reset();
-               cpuc->lbr_context = ctx;
-       }
-}
-
-static inline bool branch_user_callstack(unsigned br_sel)
-{
-       return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
-}
-
-void intel_pmu_lbr_enable(struct perf_event *event)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct x86_perf_task_context *task_ctx;
-
-       if (!x86_pmu.lbr_nr)
-               return;
-
-       /*
-        * Reset the LBR stack if we changed task context to
-        * avoid data leaks.
-        */
-       if (event->ctx->task && cpuc->lbr_context != event->ctx) {
-               intel_pmu_lbr_reset();
-               cpuc->lbr_context = event->ctx;
-       }
-       cpuc->br_sel = event->hw.branch_reg.reg;
-
-       if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
-                                       event->ctx->task_ctx_data) {
-               task_ctx = event->ctx->task_ctx_data;
-               task_ctx->lbr_callstack_users++;
-       }
-
-       cpuc->lbr_users++;
-       perf_sched_cb_inc(event->ctx->pmu);
-}
-
-void intel_pmu_lbr_disable(struct perf_event *event)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct x86_perf_task_context *task_ctx;
-
-       if (!x86_pmu.lbr_nr)
-               return;
-
-       if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
-                                       event->ctx->task_ctx_data) {
-               task_ctx = event->ctx->task_ctx_data;
-               task_ctx->lbr_callstack_users--;
-       }
-
-       cpuc->lbr_users--;
-       WARN_ON_ONCE(cpuc->lbr_users < 0);
-       perf_sched_cb_dec(event->ctx->pmu);
-
-       if (cpuc->enabled && !cpuc->lbr_users) {
-               __intel_pmu_lbr_disable();
-               /* avoid stale pointer */
-               cpuc->lbr_context = NULL;
-       }
-}
-
-void intel_pmu_lbr_enable_all(bool pmi)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (cpuc->lbr_users)
-               __intel_pmu_lbr_enable(pmi);
-}
-
-void intel_pmu_lbr_disable_all(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (cpuc->lbr_users)
-               __intel_pmu_lbr_disable();
-}
-
-static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
-{
-       unsigned long mask = x86_pmu.lbr_nr - 1;
-       u64 tos = intel_pmu_lbr_tos();
-       int i;
-
-       for (i = 0; i < x86_pmu.lbr_nr; i++) {
-               unsigned long lbr_idx = (tos - i) & mask;
-               union {
-                       struct {
-                               u32 from;
-                               u32 to;
-                       };
-                       u64     lbr;
-               } msr_lastbranch;
-
-               rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
-
-               cpuc->lbr_entries[i].from       = msr_lastbranch.from;
-               cpuc->lbr_entries[i].to         = msr_lastbranch.to;
-               cpuc->lbr_entries[i].mispred    = 0;
-               cpuc->lbr_entries[i].predicted  = 0;
-               cpuc->lbr_entries[i].reserved   = 0;
-       }
-       cpuc->lbr_stack.nr = i;
-}
-
-/*
- * Due to lack of segmentation in Linux the effective address (offset)
- * is the same as the linear address, allowing us to merge the LIP and EIP
- * LBR formats.
- */
-static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
-{
-       bool need_info = false;
-       unsigned long mask = x86_pmu.lbr_nr - 1;
-       int lbr_format = x86_pmu.intel_cap.lbr_format;
-       u64 tos = intel_pmu_lbr_tos();
-       int i;
-       int out = 0;
-       int num = x86_pmu.lbr_nr;
-
-       if (cpuc->lbr_sel) {
-               need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
-               if (cpuc->lbr_sel->config & LBR_CALL_STACK)
-                       num = tos;
-       }
-
-       for (i = 0; i < num; i++) {
-               unsigned long lbr_idx = (tos - i) & mask;
-               u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
-               int skip = 0;
-               u16 cycles = 0;
-               int lbr_flags = lbr_desc[lbr_format];
-
-               rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
-               rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
-
-               if (lbr_format == LBR_FORMAT_INFO && need_info) {
-                       u64 info;
-
-                       rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
-                       mis = !!(info & LBR_INFO_MISPRED);
-                       pred = !mis;
-                       in_tx = !!(info & LBR_INFO_IN_TX);
-                       abort = !!(info & LBR_INFO_ABORT);
-                       cycles = (info & LBR_INFO_CYCLES);
-               }
-               if (lbr_flags & LBR_EIP_FLAGS) {
-                       mis = !!(from & LBR_FROM_FLAG_MISPRED);
-                       pred = !mis;
-                       skip = 1;
-               }
-               if (lbr_flags & LBR_TSX) {
-                       in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
-                       abort = !!(from & LBR_FROM_FLAG_ABORT);
-                       skip = 3;
-               }
-               from = (u64)((((s64)from) << skip) >> skip);
-
-               /*
-                * Some CPUs report duplicated abort records,
-                * with the second entry not having an abort bit set.
-                * Skip them here. This loop runs backwards,
-                * so we need to undo the previous record.
-                * If the abort just happened outside the window
-                * the extra entry cannot be removed.
-                */
-               if (abort && x86_pmu.lbr_double_abort && out > 0)
-                       out--;
-
-               cpuc->lbr_entries[out].from      = from;
-               cpuc->lbr_entries[out].to        = to;
-               cpuc->lbr_entries[out].mispred   = mis;
-               cpuc->lbr_entries[out].predicted = pred;
-               cpuc->lbr_entries[out].in_tx     = in_tx;
-               cpuc->lbr_entries[out].abort     = abort;
-               cpuc->lbr_entries[out].cycles    = cycles;
-               cpuc->lbr_entries[out].reserved  = 0;
-               out++;
-       }
-       cpuc->lbr_stack.nr = out;
-}
-
-void intel_pmu_lbr_read(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (!cpuc->lbr_users)
-               return;
-
-       if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
-               intel_pmu_lbr_read_32(cpuc);
-       else
-               intel_pmu_lbr_read_64(cpuc);
-
-       intel_pmu_lbr_filter(cpuc);
-}
-
-/*
- * SW filter is used:
- * - in case there is no HW filter
- * - in case the HW filter has errata or limitations
- */
-static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
-{
-       u64 br_type = event->attr.branch_sample_type;
-       int mask = 0;
-
-       if (br_type & PERF_SAMPLE_BRANCH_USER)
-               mask |= X86_BR_USER;
-
-       if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
-               mask |= X86_BR_KERNEL;
-
-       /* we ignore BRANCH_HV here */
-
-       if (br_type & PERF_SAMPLE_BRANCH_ANY)
-               mask |= X86_BR_ANY;
-
-       if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
-               mask |= X86_BR_ANY_CALL;
-
-       if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
-               mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
-
-       if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
-               mask |= X86_BR_IND_CALL;
-
-       if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
-               mask |= X86_BR_ABORT;
-
-       if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
-               mask |= X86_BR_IN_TX;
-
-       if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
-               mask |= X86_BR_NO_TX;
-
-       if (br_type & PERF_SAMPLE_BRANCH_COND)
-               mask |= X86_BR_JCC;
-
-       if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
-               if (!x86_pmu_has_lbr_callstack())
-                       return -EOPNOTSUPP;
-               if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
-                       return -EINVAL;
-               mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
-                       X86_BR_CALL_STACK;
-       }
-
-       if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
-               mask |= X86_BR_IND_JMP;
-
-       if (br_type & PERF_SAMPLE_BRANCH_CALL)
-               mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
-       /*
-        * stash actual user request into reg, it may
-        * be used by fixup code for some CPU
-        */
-       event->hw.branch_reg.reg = mask;
-       return 0;
-}
-
-/*
- * setup the HW LBR filter
- * Used only when available, may not be enough to disambiguate
- * all branches, may need the help of the SW filter
- */
-static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg;
-       u64 br_type = event->attr.branch_sample_type;
-       u64 mask = 0, v;
-       int i;
-
-       for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
-               if (!(br_type & (1ULL << i)))
-                       continue;
-
-               v = x86_pmu.lbr_sel_map[i];
-               if (v == LBR_NOT_SUPP)
-                       return -EOPNOTSUPP;
-
-               if (v != LBR_IGN)
-                       mask |= v;
-       }
-
-       reg = &event->hw.branch_reg;
-       reg->idx = EXTRA_REG_LBR;
-
-       /*
-        * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
-        * in suppress mode. So LBR_SELECT should be set to
-        * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
-        */
-       reg->config = mask ^ x86_pmu.lbr_sel_mask;
-
-       if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
-           (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
-           (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
-               reg->config |= LBR_NO_INFO;
-
-       return 0;
-}
-
-int intel_pmu_setup_lbr_filter(struct perf_event *event)
-{
-       int ret = 0;
-
-       /*
-        * no LBR on this PMU
-        */
-       if (!x86_pmu.lbr_nr)
-               return -EOPNOTSUPP;
-
-       /*
-        * setup SW LBR filter
-        */
-       ret = intel_pmu_setup_sw_lbr_filter(event);
-       if (ret)
-               return ret;
-
-       /*
-        * setup HW LBR filter, if any
-        */
-       if (x86_pmu.lbr_sel_map)
-               ret = intel_pmu_setup_hw_lbr_filter(event);
-
-       return ret;
-}
-
-/*
- * return the type of control flow change at address "from"
- * intruction is not necessarily a branch (in case of interrupt).
- *
- * The branch type returned also includes the priv level of the
- * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
- *
- * If a branch type is unknown OR the instruction cannot be
- * decoded (e.g., text page not present), then X86_BR_NONE is
- * returned.
- */
-static int branch_type(unsigned long from, unsigned long to, int abort)
-{
-       struct insn insn;
-       void *addr;
-       int bytes_read, bytes_left;
-       int ret = X86_BR_NONE;
-       int ext, to_plm, from_plm;
-       u8 buf[MAX_INSN_SIZE];
-       int is64 = 0;
-
-       to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
-       from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
-
-       /*
-        * maybe zero if lbr did not fill up after a reset by the time
-        * we get a PMU interrupt
-        */
-       if (from == 0 || to == 0)
-               return X86_BR_NONE;
-
-       if (abort)
-               return X86_BR_ABORT | to_plm;
-
-       if (from_plm == X86_BR_USER) {
-               /*
-                * can happen if measuring at the user level only
-                * and we interrupt in a kernel thread, e.g., idle.
-                */
-               if (!current->mm)
-                       return X86_BR_NONE;
-
-               /* may fail if text not present */
-               bytes_left = copy_from_user_nmi(buf, (void __user *)from,
-                                               MAX_INSN_SIZE);
-               bytes_read = MAX_INSN_SIZE - bytes_left;
-               if (!bytes_read)
-                       return X86_BR_NONE;
-
-               addr = buf;
-       } else {
-               /*
-                * The LBR logs any address in the IP, even if the IP just
-                * faulted. This means userspace can control the from address.
-                * Ensure we don't blindy read any address by validating it is
-                * a known text address.
-                */
-               if (kernel_text_address(from)) {
-                       addr = (void *)from;
-                       /*
-                        * Assume we can get the maximum possible size
-                        * when grabbing kernel data.  This is not
-                        * _strictly_ true since we could possibly be
-                        * executing up next to a memory hole, but
-                        * it is very unlikely to be a problem.
-                        */
-                       bytes_read = MAX_INSN_SIZE;
-               } else {
-                       return X86_BR_NONE;
-               }
-       }
-
-       /*
-        * decoder needs to know the ABI especially
-        * on 64-bit systems running 32-bit apps
-        */
-#ifdef CONFIG_X86_64
-       is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
-#endif
-       insn_init(&insn, addr, bytes_read, is64);
-       insn_get_opcode(&insn);
-       if (!insn.opcode.got)
-               return X86_BR_ABORT;
-
-       switch (insn.opcode.bytes[0]) {
-       case 0xf:
-               switch (insn.opcode.bytes[1]) {
-               case 0x05: /* syscall */
-               case 0x34: /* sysenter */
-                       ret = X86_BR_SYSCALL;
-                       break;
-               case 0x07: /* sysret */
-               case 0x35: /* sysexit */
-                       ret = X86_BR_SYSRET;
-                       break;
-               case 0x80 ... 0x8f: /* conditional */
-                       ret = X86_BR_JCC;
-                       break;
-               default:
-                       ret = X86_BR_NONE;
-               }
-               break;
-       case 0x70 ... 0x7f: /* conditional */
-               ret = X86_BR_JCC;
-               break;
-       case 0xc2: /* near ret */
-       case 0xc3: /* near ret */
-       case 0xca: /* far ret */
-       case 0xcb: /* far ret */
-               ret = X86_BR_RET;
-               break;
-       case 0xcf: /* iret */
-               ret = X86_BR_IRET;
-               break;
-       case 0xcc ... 0xce: /* int */
-               ret = X86_BR_INT;
-               break;
-       case 0xe8: /* call near rel */
-               insn_get_immediate(&insn);
-               if (insn.immediate1.value == 0) {
-                       /* zero length call */
-                       ret = X86_BR_ZERO_CALL;
-                       break;
-               }
-       case 0x9a: /* call far absolute */
-               ret = X86_BR_CALL;
-               break;
-       case 0xe0 ... 0xe3: /* loop jmp */
-               ret = X86_BR_JCC;
-               break;
-       case 0xe9 ... 0xeb: /* jmp */
-               ret = X86_BR_JMP;
-               break;
-       case 0xff: /* call near absolute, call far absolute ind */
-               insn_get_modrm(&insn);
-               ext = (insn.modrm.bytes[0] >> 3) & 0x7;
-               switch (ext) {
-               case 2: /* near ind call */
-               case 3: /* far ind call */
-                       ret = X86_BR_IND_CALL;
-                       break;
-               case 4:
-               case 5:
-                       ret = X86_BR_IND_JMP;
-                       break;
-               }
-               break;
-       default:
-               ret = X86_BR_NONE;
-       }
-       /*
-        * interrupts, traps, faults (and thus ring transition) may
-        * occur on any instructions. Thus, to classify them correctly,
-        * we need to first look at the from and to priv levels. If they
-        * are different and to is in the kernel, then it indicates
-        * a ring transition. If the from instruction is not a ring
-        * transition instr (syscall, systenter, int), then it means
-        * it was a irq, trap or fault.
-        *
-        * we have no way of detecting kernel to kernel faults.
-        */
-       if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
-           && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
-               ret = X86_BR_IRQ;
-
-       /*
-        * branch priv level determined by target as
-        * is done by HW when LBR_SELECT is implemented
-        */
-       if (ret != X86_BR_NONE)
-               ret |= to_plm;
-
-       return ret;
-}
-
-/*
- * implement actual branch filter based on user demand.
- * Hardware may not exactly satisfy that request, thus
- * we need to inspect opcodes. Mismatched branches are
- * discarded. Therefore, the number of branches returned
- * in PERF_SAMPLE_BRANCH_STACK sample may vary.
- */
-static void
-intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
-{
-       u64 from, to;
-       int br_sel = cpuc->br_sel;
-       int i, j, type;
-       bool compress = false;
-
-       /* if sampling all branches, then nothing to filter */
-       if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
-               return;
-
-       for (i = 0; i < cpuc->lbr_stack.nr; i++) {
-
-               from = cpuc->lbr_entries[i].from;
-               to = cpuc->lbr_entries[i].to;
-
-               type = branch_type(from, to, cpuc->lbr_entries[i].abort);
-               if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
-                       if (cpuc->lbr_entries[i].in_tx)
-                               type |= X86_BR_IN_TX;
-                       else
-                               type |= X86_BR_NO_TX;
-               }
-
-               /* if type does not correspond, then discard */
-               if (type == X86_BR_NONE || (br_sel & type) != type) {
-                       cpuc->lbr_entries[i].from = 0;
-                       compress = true;
-               }
-       }
-
-       if (!compress)
-               return;
-
-       /* remove all entries with from=0 */
-       for (i = 0; i < cpuc->lbr_stack.nr; ) {
-               if (!cpuc->lbr_entries[i].from) {
-                       j = i;
-                       while (++j < cpuc->lbr_stack.nr)
-                               cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
-                       cpuc->lbr_stack.nr--;
-                       if (!cpuc->lbr_entries[i].from)
-                               continue;
-               }
-               i++;
-       }
-}
-
-/*
- * Map interface branch filters onto LBR filters
- */
-static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
-       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
-       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
-       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
-       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
-       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_REL_JMP
-                                               | LBR_IND_JMP | LBR_FAR,
-       /*
-        * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
-        */
-       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
-        LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
-       /*
-        * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
-        */
-       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
-       [PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
-       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
-};
-
-static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
-       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
-       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
-       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
-       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
-       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
-       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
-                                               | LBR_FAR,
-       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
-       [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
-       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
-       [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
-};
-
-static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
-       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
-       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
-       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
-       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
-       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
-       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
-                                               | LBR_FAR,
-       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
-       [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
-       [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = LBR_REL_CALL | LBR_IND_CALL
-                                               | LBR_RETURN | LBR_CALL_STACK,
-       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
-       [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
-};
-
-/* core */
-void __init intel_pmu_lbr_init_core(void)
-{
-       x86_pmu.lbr_nr     = 4;
-       x86_pmu.lbr_tos    = MSR_LBR_TOS;
-       x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
-       x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
-
-       /*
-        * SW branch filter usage:
-        * - compensate for lack of HW filter
-        */
-       pr_cont("4-deep LBR, ");
-}
-
-/* nehalem/westmere */
-void __init intel_pmu_lbr_init_nhm(void)
-{
-       x86_pmu.lbr_nr     = 16;
-       x86_pmu.lbr_tos    = MSR_LBR_TOS;
-       x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
-       x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
-
-       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-       x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
-
-       /*
-        * SW branch filter usage:
-        * - workaround LBR_SEL errata (see above)
-        * - support syscall, sysret capture.
-        *   That requires LBR_FAR but that means far
-        *   jmp need to be filtered out
-        */
-       pr_cont("16-deep LBR, ");
-}
-
-/* sandy bridge */
-void __init intel_pmu_lbr_init_snb(void)
-{
-       x86_pmu.lbr_nr   = 16;
-       x86_pmu.lbr_tos  = MSR_LBR_TOS;
-       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
-       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
-
-       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-       x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
-
-       /*
-        * SW branch filter usage:
-        * - support syscall, sysret capture.
-        *   That requires LBR_FAR but that means far
-        *   jmp need to be filtered out
-        */
-       pr_cont("16-deep LBR, ");
-}
-
-/* haswell */
-void intel_pmu_lbr_init_hsw(void)
-{
-       x86_pmu.lbr_nr   = 16;
-       x86_pmu.lbr_tos  = MSR_LBR_TOS;
-       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
-       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
-
-       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-       x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
-
-       pr_cont("16-deep LBR, ");
-}
-
-/* skylake */
-__init void intel_pmu_lbr_init_skl(void)
-{
-       x86_pmu.lbr_nr   = 32;
-       x86_pmu.lbr_tos  = MSR_LBR_TOS;
-       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
-       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
-
-       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-       x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
-
-       /*
-        * SW branch filter usage:
-        * - support syscall, sysret capture.
-        *   That requires LBR_FAR but that means far
-        *   jmp need to be filtered out
-        */
-       pr_cont("32-deep LBR, ");
-}
-
-/* atom */
-void __init intel_pmu_lbr_init_atom(void)
-{
-       /*
-        * only models starting at stepping 10 seems
-        * to have an operational LBR which can freeze
-        * on PMU interrupt
-        */
-       if (boot_cpu_data.x86_model == 28
-           && boot_cpu_data.x86_mask < 10) {
-               pr_cont("LBR disabled due to erratum");
-               return;
-       }
-
-       x86_pmu.lbr_nr     = 8;
-       x86_pmu.lbr_tos    = MSR_LBR_TOS;
-       x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
-       x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
-
-       /*
-        * SW branch filter usage:
-        * - compensate for lack of HW filter
-        */
-       pr_cont("8-deep LBR, ");
-}
-
-/* Knights Landing */
-void intel_pmu_lbr_init_knl(void)
-{
-       x86_pmu.lbr_nr     = 8;
-       x86_pmu.lbr_tos    = MSR_LBR_TOS;
-       x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
-       x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
-
-       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-       x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
-
-       pr_cont("8-deep LBR, ");
-}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
deleted file mode 100644 (file)
index c0bbd10..0000000
+++ /dev/null
@@ -1,1188 +0,0 @@
-/*
- * Intel(R) Processor Trace PMU driver for perf
- * Copyright (c) 2013-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * Intel PT is specified in the Intel Architecture Instruction Set Extensions
- * Programming Reference:
- * http://software.intel.com/en-us/intel-isa-extensions
- */
-
-#undef DEBUG
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-
-#include <asm/perf_event.h>
-#include <asm/insn.h>
-#include <asm/io.h>
-#include <asm/intel_pt.h>
-
-#include "perf_event.h"
-#include "intel_pt.h"
-
-static DEFINE_PER_CPU(struct pt, pt_ctx);
-
-static struct pt_pmu pt_pmu;
-
-enum cpuid_regs {
-       CR_EAX = 0,
-       CR_ECX,
-       CR_EDX,
-       CR_EBX
-};
-
-/*
- * Capabilities of Intel PT hardware, such as number of address bits or
- * supported output schemes, are cached and exported to userspace as "caps"
- * attribute group of pt pmu device
- * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store
- * relevant bits together with intel_pt traces.
- *
- * These are necessary for both trace decoding (payloads_lip, contains address
- * width encoded in IP-related packets), and event configuration (bitmasks with
- * permitted values for certain bit fields).
- */
-#define PT_CAP(_n, _l, _r, _m)                                         \
-       [PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l,        \
-                           .reg = _r, .mask = _m }
-
-static struct pt_cap_desc {
-       const char      *name;
-       u32             leaf;
-       u8              reg;
-       u32             mask;
-} pt_caps[] = {
-       PT_CAP(max_subleaf,             0, CR_EAX, 0xffffffff),
-       PT_CAP(cr3_filtering,           0, CR_EBX, BIT(0)),
-       PT_CAP(psb_cyc,                 0, CR_EBX, BIT(1)),
-       PT_CAP(mtc,                     0, CR_EBX, BIT(3)),
-       PT_CAP(topa_output,             0, CR_ECX, BIT(0)),
-       PT_CAP(topa_multiple_entries,   0, CR_ECX, BIT(1)),
-       PT_CAP(single_range_output,     0, CR_ECX, BIT(2)),
-       PT_CAP(payloads_lip,            0, CR_ECX, BIT(31)),
-       PT_CAP(mtc_periods,             1, CR_EAX, 0xffff0000),
-       PT_CAP(cycle_thresholds,        1, CR_EBX, 0xffff),
-       PT_CAP(psb_periods,             1, CR_EBX, 0xffff0000),
-};
-
-static u32 pt_cap_get(enum pt_capabilities cap)
-{
-       struct pt_cap_desc *cd = &pt_caps[cap];
-       u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
-       unsigned int shift = __ffs(cd->mask);
-
-       return (c & cd->mask) >> shift;
-}
-
-static ssize_t pt_cap_show(struct device *cdev,
-                          struct device_attribute *attr,
-                          char *buf)
-{
-       struct dev_ext_attribute *ea =
-               container_of(attr, struct dev_ext_attribute, attr);
-       enum pt_capabilities cap = (long)ea->var;
-
-       return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
-}
-
-static struct attribute_group pt_cap_group = {
-       .name   = "caps",
-};
-
-PMU_FORMAT_ATTR(cyc,           "config:1"      );
-PMU_FORMAT_ATTR(mtc,           "config:9"      );
-PMU_FORMAT_ATTR(tsc,           "config:10"     );
-PMU_FORMAT_ATTR(noretcomp,     "config:11"     );
-PMU_FORMAT_ATTR(mtc_period,    "config:14-17"  );
-PMU_FORMAT_ATTR(cyc_thresh,    "config:19-22"  );
-PMU_FORMAT_ATTR(psb_period,    "config:24-27"  );
-
-static struct attribute *pt_formats_attr[] = {
-       &format_attr_cyc.attr,
-       &format_attr_mtc.attr,
-       &format_attr_tsc.attr,
-       &format_attr_noretcomp.attr,
-       &format_attr_mtc_period.attr,
-       &format_attr_cyc_thresh.attr,
-       &format_attr_psb_period.attr,
-       NULL,
-};
-
-static struct attribute_group pt_format_group = {
-       .name   = "format",
-       .attrs  = pt_formats_attr,
-};
-
-static const struct attribute_group *pt_attr_groups[] = {
-       &pt_cap_group,
-       &pt_format_group,
-       NULL,
-};
-
-static int __init pt_pmu_hw_init(void)
-{
-       struct dev_ext_attribute *de_attrs;
-       struct attribute **attrs;
-       size_t size;
-       int ret;
-       long i;
-
-       attrs = NULL;
-
-       for (i = 0; i < PT_CPUID_LEAVES; i++) {
-               cpuid_count(20, i,
-                           &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
-                           &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
-                           &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
-                           &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
-       }
-
-       ret = -ENOMEM;
-       size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1);
-       attrs = kzalloc(size, GFP_KERNEL);
-       if (!attrs)
-               goto fail;
-
-       size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1);
-       de_attrs = kzalloc(size, GFP_KERNEL);
-       if (!de_attrs)
-               goto fail;
-
-       for (i = 0; i < ARRAY_SIZE(pt_caps); i++) {
-               struct dev_ext_attribute *de_attr = de_attrs + i;
-
-               de_attr->attr.attr.name = pt_caps[i].name;
-
-               sysfs_attr_init(&de_attr->attr.attr);
-
-               de_attr->attr.attr.mode         = S_IRUGO;
-               de_attr->attr.show              = pt_cap_show;
-               de_attr->var                    = (void *)i;
-
-               attrs[i] = &de_attr->attr.attr;
-       }
-
-       pt_cap_group.attrs = attrs;
-
-       return 0;
-
-fail:
-       kfree(attrs);
-
-       return ret;
-}
-
-#define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC    | \
-                         RTIT_CTL_CYC_THRESH   | \
-                         RTIT_CTL_PSB_FREQ)
-
-#define RTIT_CTL_MTC   (RTIT_CTL_MTC_EN        | \
-                        RTIT_CTL_MTC_RANGE)
-
-#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN                | \
-                       RTIT_CTL_DISRETC        | \
-                       RTIT_CTL_CYC_PSB        | \
-                       RTIT_CTL_MTC)
-
-static bool pt_event_valid(struct perf_event *event)
-{
-       u64 config = event->attr.config;
-       u64 allowed, requested;
-
-       if ((config & PT_CONFIG_MASK) != config)
-               return false;
-
-       if (config & RTIT_CTL_CYC_PSB) {
-               if (!pt_cap_get(PT_CAP_psb_cyc))
-                       return false;
-
-               allowed = pt_cap_get(PT_CAP_psb_periods);
-               requested = (config & RTIT_CTL_PSB_FREQ) >>
-                       RTIT_CTL_PSB_FREQ_OFFSET;
-               if (requested && (!(allowed & BIT(requested))))
-                       return false;
-
-               allowed = pt_cap_get(PT_CAP_cycle_thresholds);
-               requested = (config & RTIT_CTL_CYC_THRESH) >>
-                       RTIT_CTL_CYC_THRESH_OFFSET;
-               if (requested && (!(allowed & BIT(requested))))
-                       return false;
-       }
-
-       if (config & RTIT_CTL_MTC) {
-               /*
-                * In the unlikely case that CPUID lists valid mtc periods,
-                * but not the mtc capability, drop out here.
-                *
-                * Spec says that setting mtc period bits while mtc bit in
-                * CPUID is 0 will #GP, so better safe than sorry.
-                */
-               if (!pt_cap_get(PT_CAP_mtc))
-                       return false;
-
-               allowed = pt_cap_get(PT_CAP_mtc_periods);
-               if (!allowed)
-                       return false;
-
-               requested = (config & RTIT_CTL_MTC_RANGE) >>
-                       RTIT_CTL_MTC_RANGE_OFFSET;
-
-               if (!(allowed & BIT(requested)))
-                       return false;
-       }
-
-       return true;
-}
-
-/*
- * PT configuration helpers
- * These all are cpu affine and operate on a local PT
- */
-
-static void pt_config(struct perf_event *event)
-{
-       u64 reg;
-
-       if (!event->hw.itrace_started) {
-               event->hw.itrace_started = 1;
-               wrmsrl(MSR_IA32_RTIT_STATUS, 0);
-       }
-
-       reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
-
-       if (!event->attr.exclude_kernel)
-               reg |= RTIT_CTL_OS;
-       if (!event->attr.exclude_user)
-               reg |= RTIT_CTL_USR;
-
-       reg |= (event->attr.config & PT_CONFIG_MASK);
-
-       wrmsrl(MSR_IA32_RTIT_CTL, reg);
-}
-
-static void pt_config_start(bool start)
-{
-       u64 ctl;
-
-       rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-       if (start)
-               ctl |= RTIT_CTL_TRACEEN;
-       else
-               ctl &= ~RTIT_CTL_TRACEEN;
-       wrmsrl(MSR_IA32_RTIT_CTL, ctl);
-
-       /*
-        * A wrmsr that disables trace generation serializes other PT
-        * registers and causes all data packets to be written to memory,
-        * but a fence is required for the data to become globally visible.
-        *
-        * The below WMB, separating data store and aux_head store matches
-        * the consumer's RMB that separates aux_head load and data load.
-        */
-       if (!start)
-               wmb();
-}
-
-static void pt_config_buffer(void *buf, unsigned int topa_idx,
-                            unsigned int output_off)
-{
-       u64 reg;
-
-       wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
-
-       reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
-
-       wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
-}
-
-/*
- * Keep ToPA table-related metadata on the same page as the actual table,
- * taking up a few words from the top
- */
-
-#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
-
-/**
- * struct topa - page-sized ToPA table with metadata at the top
- * @table:     actual ToPA table entries, as understood by PT hardware
- * @list:      linkage to struct pt_buffer's list of tables
- * @phys:      physical address of this page
- * @offset:    offset of the first entry in this table in the buffer
- * @size:      total size of all entries in this table
- * @last:      index of the last initialized entry in this table
- */
-struct topa {
-       struct topa_entry       table[TENTS_PER_PAGE];
-       struct list_head        list;
-       u64                     phys;
-       u64                     offset;
-       size_t                  size;
-       int                     last;
-};
-
-/* make -1 stand for the last table entry */
-#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
-
-/**
- * topa_alloc() - allocate page-sized ToPA table
- * @cpu:       CPU on which to allocate.
- * @gfp:       Allocation flags.
- *
- * Return:     On success, return the pointer to ToPA table page.
- */
-static struct topa *topa_alloc(int cpu, gfp_t gfp)
-{
-       int node = cpu_to_node(cpu);
-       struct topa *topa;
-       struct page *p;
-
-       p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
-       if (!p)
-               return NULL;
-
-       topa = page_address(p);
-       topa->last = 0;
-       topa->phys = page_to_phys(p);
-
-       /*
-        * In case of singe-entry ToPA, always put the self-referencing END
-        * link as the 2nd entry in the table
-        */
-       if (!pt_cap_get(PT_CAP_topa_multiple_entries)) {
-               TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
-               TOPA_ENTRY(topa, 1)->end = 1;
-       }
-
-       return topa;
-}
-
-/**
- * topa_free() - free a page-sized ToPA table
- * @topa:      Table to deallocate.
- */
-static void topa_free(struct topa *topa)
-{
-       free_page((unsigned long)topa);
-}
-
-/**
- * topa_insert_table() - insert a ToPA table into a buffer
- * @buf:        PT buffer that's being extended.
- * @topa:       New topa table to be inserted.
- *
- * If it's the first table in this buffer, set up buffer's pointers
- * accordingly; otherwise, add a END=1 link entry to @topa to the current
- * "last" table and adjust the last table pointer to @topa.
- */
-static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
-{
-       struct topa *last = buf->last;
-
-       list_add_tail(&topa->list, &buf->tables);
-
-       if (!buf->first) {
-               buf->first = buf->last = buf->cur = topa;
-               return;
-       }
-
-       topa->offset = last->offset + last->size;
-       buf->last = topa;
-
-       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
-               return;
-
-       BUG_ON(last->last != TENTS_PER_PAGE - 1);
-
-       TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
-       TOPA_ENTRY(last, -1)->end = 1;
-}
-
-/**
- * topa_table_full() - check if a ToPA table is filled up
- * @topa:      ToPA table.
- */
-static bool topa_table_full(struct topa *topa)
-{
-       /* single-entry ToPA is a special case */
-       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
-               return !!topa->last;
-
-       return topa->last == TENTS_PER_PAGE - 1;
-}
-
-/**
- * topa_insert_pages() - create a list of ToPA tables
- * @buf:       PT buffer being initialized.
- * @gfp:       Allocation flags.
- *
- * This initializes a list of ToPA tables with entries from
- * the data_pages provided by rb_alloc_aux().
- *
- * Return:     0 on success or error code.
- */
-static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
-{
-       struct topa *topa = buf->last;
-       int order = 0;
-       struct page *p;
-
-       p = virt_to_page(buf->data_pages[buf->nr_pages]);
-       if (PagePrivate(p))
-               order = page_private(p);
-
-       if (topa_table_full(topa)) {
-               topa = topa_alloc(buf->cpu, gfp);
-               if (!topa)
-                       return -ENOMEM;
-
-               topa_insert_table(buf, topa);
-       }
-
-       TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
-       TOPA_ENTRY(topa, -1)->size = order;
-       if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) {
-               TOPA_ENTRY(topa, -1)->intr = 1;
-               TOPA_ENTRY(topa, -1)->stop = 1;
-       }
-
-       topa->last++;
-       topa->size += sizes(order);
-
-       buf->nr_pages += 1ul << order;
-
-       return 0;
-}
-
-/**
- * pt_topa_dump() - print ToPA tables and their entries
- * @buf:       PT buffer.
- */
-static void pt_topa_dump(struct pt_buffer *buf)
-{
-       struct topa *topa;
-
-       list_for_each_entry(topa, &buf->tables, list) {
-               int i;
-
-               pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
-                        topa->phys, topa->offset, topa->size);
-               for (i = 0; i < TENTS_PER_PAGE; i++) {
-                       pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
-                                &topa->table[i],
-                                (unsigned long)topa->table[i].base << TOPA_SHIFT,
-                                sizes(topa->table[i].size),
-                                topa->table[i].end ?  'E' : ' ',
-                                topa->table[i].intr ? 'I' : ' ',
-                                topa->table[i].stop ? 'S' : ' ',
-                                *(u64 *)&topa->table[i]);
-                       if ((pt_cap_get(PT_CAP_topa_multiple_entries) &&
-                            topa->table[i].stop) ||
-                           topa->table[i].end)
-                               break;
-               }
-       }
-}
-
-/**
- * pt_buffer_advance() - advance to the next output region
- * @buf:       PT buffer.
- *
- * Advance the current pointers in the buffer to the next ToPA entry.
- */
-static void pt_buffer_advance(struct pt_buffer *buf)
-{
-       buf->output_off = 0;
-       buf->cur_idx++;
-
-       if (buf->cur_idx == buf->cur->last) {
-               if (buf->cur == buf->last)
-                       buf->cur = buf->first;
-               else
-                       buf->cur = list_entry(buf->cur->list.next, struct topa,
-                                             list);
-               buf->cur_idx = 0;
-       }
-}
-
-/**
- * pt_update_head() - calculate current offsets and sizes
- * @pt:                Per-cpu pt context.
- *
- * Update buffer's current write pointer position and data size.
- */
-static void pt_update_head(struct pt *pt)
-{
-       struct pt_buffer *buf = perf_get_aux(&pt->handle);
-       u64 topa_idx, base, old;
-
-       /* offset of the first region in this table from the beginning of buf */
-       base = buf->cur->offset + buf->output_off;
-
-       /* offset of the current output region within this table */
-       for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
-               base += sizes(buf->cur->table[topa_idx].size);
-
-       if (buf->snapshot) {
-               local_set(&buf->data_size, base);
-       } else {
-               old = (local64_xchg(&buf->head, base) &
-                      ((buf->nr_pages << PAGE_SHIFT) - 1));
-               if (base < old)
-                       base += buf->nr_pages << PAGE_SHIFT;
-
-               local_add(base - old, &buf->data_size);
-       }
-}
-
-/**
- * pt_buffer_region() - obtain current output region's address
- * @buf:       PT buffer.
- */
-static void *pt_buffer_region(struct pt_buffer *buf)
-{
-       return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
-}
-
-/**
- * pt_buffer_region_size() - obtain current output region's size
- * @buf:       PT buffer.
- */
-static size_t pt_buffer_region_size(struct pt_buffer *buf)
-{
-       return sizes(buf->cur->table[buf->cur_idx].size);
-}
-
-/**
- * pt_handle_status() - take care of possible status conditions
- * @pt:                Per-cpu pt context.
- */
-static void pt_handle_status(struct pt *pt)
-{
-       struct pt_buffer *buf = perf_get_aux(&pt->handle);
-       int advance = 0;
-       u64 status;
-
-       rdmsrl(MSR_IA32_RTIT_STATUS, status);
-
-       if (status & RTIT_STATUS_ERROR) {
-               pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
-               pt_topa_dump(buf);
-               status &= ~RTIT_STATUS_ERROR;
-       }
-
-       if (status & RTIT_STATUS_STOPPED) {
-               status &= ~RTIT_STATUS_STOPPED;
-
-               /*
-                * On systems that only do single-entry ToPA, hitting STOP
-                * means we are already losing data; need to let the decoder
-                * know.
-                */
-               if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
-                   buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
-                       local_inc(&buf->lost);
-                       advance++;
-               }
-       }
-
-       /*
-        * Also on single-entry ToPA implementations, interrupt will come
-        * before the output reaches its output region's boundary.
-        */
-       if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot &&
-           pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) {
-               void *head = pt_buffer_region(buf);
-
-               /* everything within this margin needs to be zeroed out */
-               memset(head + buf->output_off, 0,
-                      pt_buffer_region_size(buf) -
-                      buf->output_off);
-               advance++;
-       }
-
-       if (advance)
-               pt_buffer_advance(buf);
-
-       wrmsrl(MSR_IA32_RTIT_STATUS, status);
-}
-
-/**
- * pt_read_offset() - translate registers into buffer pointers
- * @buf:       PT buffer.
- *
- * Set buffer's output pointers from MSR values.
- */
-static void pt_read_offset(struct pt_buffer *buf)
-{
-       u64 offset, base_topa;
-
-       rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
-       buf->cur = phys_to_virt(base_topa);
-
-       rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
-       /* offset within current output region */
-       buf->output_off = offset >> 32;
-       /* index of current output region within this table */
-       buf->cur_idx = (offset & 0xffffff80) >> 7;
-}
-
-/**
- * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
- * @buf:       PT buffer.
- * @pg:                Page offset in the buffer.
- *
- * When advancing to the next output region (ToPA entry), given a page offset
- * into the buffer, we need to find the offset of the first page in the next
- * region.
- */
-static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
-{
-       struct topa_entry *te = buf->topa_index[pg];
-
-       /* one region */
-       if (buf->first == buf->last && buf->first->last == 1)
-               return pg;
-
-       do {
-               pg++;
-               pg &= buf->nr_pages - 1;
-       } while (buf->topa_index[pg] == te);
-
-       return pg;
-}
-
-/**
- * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer
- * @buf:       PT buffer.
- * @handle:    Current output handle.
- *
- * Place INT and STOP marks to prevent overwriting old data that the consumer
- * hasn't yet collected and waking up the consumer after a certain fraction of
- * the buffer has filled up. Only needed and sensible for non-snapshot counters.
- *
- * This obviously relies on buf::head to figure out buffer markers, so it has
- * to be called after pt_buffer_reset_offsets() and before the hardware tracing
- * is enabled.
- */
-static int pt_buffer_reset_markers(struct pt_buffer *buf,
-                                  struct perf_output_handle *handle)
-
-{
-       unsigned long head = local64_read(&buf->head);
-       unsigned long idx, npages, wakeup;
-
-       /* can't stop in the middle of an output region */
-       if (buf->output_off + handle->size + 1 <
-           sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
-               return -EINVAL;
-
-
-       /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
-       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
-               return 0;
-
-       /* clear STOP and INT from current entry */
-       buf->topa_index[buf->stop_pos]->stop = 0;
-       buf->topa_index[buf->intr_pos]->intr = 0;
-
-       /* how many pages till the STOP marker */
-       npages = handle->size >> PAGE_SHIFT;
-
-       /* if it's on a page boundary, fill up one more page */
-       if (!offset_in_page(head + handle->size + 1))
-               npages++;
-
-       idx = (head >> PAGE_SHIFT) + npages;
-       idx &= buf->nr_pages - 1;
-       buf->stop_pos = idx;
-
-       wakeup = handle->wakeup >> PAGE_SHIFT;
-
-       /* in the worst case, wake up the consumer one page before hard stop */
-       idx = (head >> PAGE_SHIFT) + npages - 1;
-       if (idx > wakeup)
-               idx = wakeup;
-
-       idx &= buf->nr_pages - 1;
-       buf->intr_pos = idx;
-
-       buf->topa_index[buf->stop_pos]->stop = 1;
-       buf->topa_index[buf->intr_pos]->intr = 1;
-
-       return 0;
-}
-
-/**
- * pt_buffer_setup_topa_index() - build topa_index[] table of regions
- * @buf:       PT buffer.
- *
- * topa_index[] references output regions indexed by offset into the
- * buffer for purposes of quick reverse lookup.
- */
-static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
-{
-       struct topa *cur = buf->first, *prev = buf->last;
-       struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
-               *te_prev = TOPA_ENTRY(prev, prev->last - 1);
-       int pg = 0, idx = 0;
-
-       while (pg < buf->nr_pages) {
-               int tidx;
-
-               /* pages within one topa entry */
-               for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
-                       buf->topa_index[pg] = te_prev;
-
-               te_prev = te_cur;
-
-               if (idx == cur->last - 1) {
-                       /* advance to next topa table */
-                       idx = 0;
-                       cur = list_entry(cur->list.next, struct topa, list);
-               } else {
-                       idx++;
-               }
-               te_cur = TOPA_ENTRY(cur, idx);
-       }
-
-}
-
-/**
- * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
- * @buf:       PT buffer.
- * @head:      Write pointer (aux_head) from AUX buffer.
- *
- * Find the ToPA table and entry corresponding to given @head and set buffer's
- * "current" pointers accordingly. This is done after we have obtained the
- * current aux_head position from a successful call to perf_aux_output_begin()
- * to make sure the hardware is writing to the right place.
- *
- * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
- * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
- * which are used to determine INT and STOP markers' locations by a subsequent
- * call to pt_buffer_reset_markers().
- */
-static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
-{
-       int pg;
-
-       if (buf->snapshot)
-               head &= (buf->nr_pages << PAGE_SHIFT) - 1;
-
-       pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
-       pg = pt_topa_next_entry(buf, pg);
-
-       buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
-       buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
-                       (unsigned long)buf->cur) / sizeof(struct topa_entry);
-       buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
-
-       local64_set(&buf->head, head);
-       local_set(&buf->data_size, 0);
-}
-
-/**
- * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer
- * @buf:       PT buffer.
- */
-static void pt_buffer_fini_topa(struct pt_buffer *buf)
-{
-       struct topa *topa, *iter;
-
-       list_for_each_entry_safe(topa, iter, &buf->tables, list) {
-               /*
-                * right now, this is in free_aux() path only, so
-                * no need to unlink this table from the list
-                */
-               topa_free(topa);
-       }
-}
-
-/**
- * pt_buffer_init_topa() - initialize ToPA table for pt buffer
- * @buf:       PT buffer.
- * @size:      Total size of all regions within this ToPA.
- * @gfp:       Allocation flags.
- */
-static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
-                              gfp_t gfp)
-{
-       struct topa *topa;
-       int err;
-
-       topa = topa_alloc(buf->cpu, gfp);
-       if (!topa)
-               return -ENOMEM;
-
-       topa_insert_table(buf, topa);
-
-       while (buf->nr_pages < nr_pages) {
-               err = topa_insert_pages(buf, gfp);
-               if (err) {
-                       pt_buffer_fini_topa(buf);
-                       return -ENOMEM;
-               }
-       }
-
-       pt_buffer_setup_topa_index(buf);
-
-       /* link last table to the first one, unless we're double buffering */
-       if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
-               TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
-               TOPA_ENTRY(buf->last, -1)->end = 1;
-       }
-
-       pt_topa_dump(buf);
-       return 0;
-}
-
-/**
- * pt_buffer_setup_aux() - set up topa tables for a PT buffer
- * @cpu:       Cpu on which to allocate, -1 means current.
- * @pages:     Array of pointers to buffer pages passed from perf core.
- * @nr_pages:  Number of pages in the buffer.
- * @snapshot:  If this is a snapshot/overwrite counter.
- *
- * This is a pmu::setup_aux callback that sets up ToPA tables and all the
- * bookkeeping for an AUX buffer.
- *
- * Return:     Our private PT buffer structure.
- */
-static void *
-pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
-{
-       struct pt_buffer *buf;
-       int node, ret;
-
-       if (!nr_pages)
-               return NULL;
-
-       if (cpu == -1)
-               cpu = raw_smp_processor_id();
-       node = cpu_to_node(cpu);
-
-       buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
-                          GFP_KERNEL, node);
-       if (!buf)
-               return NULL;
-
-       buf->cpu = cpu;
-       buf->snapshot = snapshot;
-       buf->data_pages = pages;
-
-       INIT_LIST_HEAD(&buf->tables);
-
-       ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
-       if (ret) {
-               kfree(buf);
-               return NULL;
-       }
-
-       return buf;
-}
-
-/**
- * pt_buffer_free_aux() - perf AUX deallocation path callback
- * @data:      PT buffer.
- */
-static void pt_buffer_free_aux(void *data)
-{
-       struct pt_buffer *buf = data;
-
-       pt_buffer_fini_topa(buf);
-       kfree(buf);
-}
-
-/**
- * pt_buffer_is_full() - check if the buffer is full
- * @buf:       PT buffer.
- * @pt:                Per-cpu pt handle.
- *
- * If the user hasn't read data from the output region that aux_head
- * points to, the buffer is considered full: the user needs to read at
- * least this region and update aux_tail to point past it.
- */
-static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
-{
-       if (buf->snapshot)
-               return false;
-
-       if (local_read(&buf->data_size) >= pt->handle.size)
-               return true;
-
-       return false;
-}
-
-/**
- * intel_pt_interrupt() - PT PMI handler
- */
-void intel_pt_interrupt(void)
-{
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-       struct pt_buffer *buf;
-       struct perf_event *event = pt->handle.event;
-
-       /*
-        * There may be a dangling PT bit in the interrupt status register
-        * after PT has been disabled by pt_event_stop(). Make sure we don't
-        * do anything (particularly, re-enable) for this event here.
-        */
-       if (!ACCESS_ONCE(pt->handle_nmi))
-               return;
-
-       pt_config_start(false);
-
-       if (!event)
-               return;
-
-       buf = perf_get_aux(&pt->handle);
-       if (!buf)
-               return;
-
-       pt_read_offset(buf);
-
-       pt_handle_status(pt);
-
-       pt_update_head(pt);
-
-       perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
-                           local_xchg(&buf->lost, 0));
-
-       if (!event->hw.state) {
-               int ret;
-
-               buf = perf_aux_output_begin(&pt->handle, event);
-               if (!buf) {
-                       event->hw.state = PERF_HES_STOPPED;
-                       return;
-               }
-
-               pt_buffer_reset_offsets(buf, pt->handle.head);
-               /* snapshot counters don't use PMI, so it's safe */
-               ret = pt_buffer_reset_markers(buf, &pt->handle);
-               if (ret) {
-                       perf_aux_output_end(&pt->handle, 0, true);
-                       return;
-               }
-
-               pt_config_buffer(buf->cur->table, buf->cur_idx,
-                                buf->output_off);
-               pt_config(event);
-       }
-}
-
-/*
- * PMU callbacks
- */
-
-static void pt_event_start(struct perf_event *event, int mode)
-{
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-       struct pt_buffer *buf = perf_get_aux(&pt->handle);
-
-       if (!buf || pt_buffer_is_full(buf, pt)) {
-               event->hw.state = PERF_HES_STOPPED;
-               return;
-       }
-
-       ACCESS_ONCE(pt->handle_nmi) = 1;
-       event->hw.state = 0;
-
-       pt_config_buffer(buf->cur->table, buf->cur_idx,
-                        buf->output_off);
-       pt_config(event);
-}
-
-static void pt_event_stop(struct perf_event *event, int mode)
-{
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-
-       /*
-        * Protect against the PMI racing with disabling wrmsr,
-        * see comment in intel_pt_interrupt().
-        */
-       ACCESS_ONCE(pt->handle_nmi) = 0;
-       pt_config_start(false);
-
-       if (event->hw.state == PERF_HES_STOPPED)
-               return;
-
-       event->hw.state = PERF_HES_STOPPED;
-
-       if (mode & PERF_EF_UPDATE) {
-               struct pt_buffer *buf = perf_get_aux(&pt->handle);
-
-               if (!buf)
-                       return;
-
-               if (WARN_ON_ONCE(pt->handle.event != event))
-                       return;
-
-               pt_read_offset(buf);
-
-               pt_handle_status(pt);
-
-               pt_update_head(pt);
-       }
-}
-
-static void pt_event_del(struct perf_event *event, int mode)
-{
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-       struct pt_buffer *buf;
-
-       pt_event_stop(event, PERF_EF_UPDATE);
-
-       buf = perf_get_aux(&pt->handle);
-
-       if (buf) {
-               if (buf->snapshot)
-                       pt->handle.head =
-                               local_xchg(&buf->data_size,
-                                          buf->nr_pages << PAGE_SHIFT);
-               perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
-                                   local_xchg(&buf->lost, 0));
-       }
-}
-
-static int pt_event_add(struct perf_event *event, int mode)
-{
-       struct pt_buffer *buf;
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-       struct hw_perf_event *hwc = &event->hw;
-       int ret = -EBUSY;
-
-       if (pt->handle.event)
-               goto fail;
-
-       buf = perf_aux_output_begin(&pt->handle, event);
-       ret = -EINVAL;
-       if (!buf)
-               goto fail_stop;
-
-       pt_buffer_reset_offsets(buf, pt->handle.head);
-       if (!buf->snapshot) {
-               ret = pt_buffer_reset_markers(buf, &pt->handle);
-               if (ret)
-                       goto fail_end_stop;
-       }
-
-       if (mode & PERF_EF_START) {
-               pt_event_start(event, 0);
-               ret = -EBUSY;
-               if (hwc->state == PERF_HES_STOPPED)
-                       goto fail_end_stop;
-       } else {
-               hwc->state = PERF_HES_STOPPED;
-       }
-
-       return 0;
-
-fail_end_stop:
-       perf_aux_output_end(&pt->handle, 0, true);
-fail_stop:
-       hwc->state = PERF_HES_STOPPED;
-fail:
-       return ret;
-}
-
-static void pt_event_read(struct perf_event *event)
-{
-}
-
-static void pt_event_destroy(struct perf_event *event)
-{
-       x86_del_exclusive(x86_lbr_exclusive_pt);
-}
-
-static int pt_event_init(struct perf_event *event)
-{
-       if (event->attr.type != pt_pmu.pmu.type)
-               return -ENOENT;
-
-       if (!pt_event_valid(event))
-               return -EINVAL;
-
-       if (x86_add_exclusive(x86_lbr_exclusive_pt))
-               return -EBUSY;
-
-       event->destroy = pt_event_destroy;
-
-       return 0;
-}
-
-void cpu_emergency_stop_pt(void)
-{
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-
-       if (pt->handle.event)
-               pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
-}
-
-static __init int pt_init(void)
-{
-       int ret, cpu, prior_warn = 0;
-
-       BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
-
-       if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
-               return -ENODEV;
-
-       get_online_cpus();
-       for_each_online_cpu(cpu) {
-               u64 ctl;
-
-               ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
-               if (!ret && (ctl & RTIT_CTL_TRACEEN))
-                       prior_warn++;
-       }
-       put_online_cpus();
-
-       if (prior_warn) {
-               x86_add_exclusive(x86_lbr_exclusive_pt);
-               pr_warn("PT is enabled at boot time, doing nothing\n");
-
-               return -EBUSY;
-       }
-
-       ret = pt_pmu_hw_init();
-       if (ret)
-               return ret;
-
-       if (!pt_cap_get(PT_CAP_topa_output)) {
-               pr_warn("ToPA output is not supported on this CPU\n");
-               return -ENODEV;
-       }
-
-       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
-               pt_pmu.pmu.capabilities =
-                       PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
-
-       pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
-       pt_pmu.pmu.attr_groups  = pt_attr_groups;
-       pt_pmu.pmu.task_ctx_nr  = perf_sw_context;
-       pt_pmu.pmu.event_init   = pt_event_init;
-       pt_pmu.pmu.add          = pt_event_add;
-       pt_pmu.pmu.del          = pt_event_del;
-       pt_pmu.pmu.start        = pt_event_start;
-       pt_pmu.pmu.stop         = pt_event_stop;
-       pt_pmu.pmu.read         = pt_event_read;
-       pt_pmu.pmu.setup_aux    = pt_buffer_setup_aux;
-       pt_pmu.pmu.free_aux     = pt_buffer_free_aux;
-       ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
-
-       return ret;
-}
-arch_initcall(pt_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
deleted file mode 100644 (file)
index 24a351a..0000000
+++ /dev/null
@@ -1,783 +0,0 @@
-/*
- * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
- * Copyright (C) 2013 Google, Inc., Stephane Eranian
- *
- * Intel RAPL interface is specified in the IA-32 Manual Vol3b
- * section 14.7.1 (September 2013)
- *
- * RAPL provides more controls than just reporting energy consumption
- * however here we only expose the 3 energy consumption free running
- * counters (pp0, pkg, dram).
- *
- * Each of those counters increments in a power unit defined by the
- * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules
- * but it can vary.
- *
- * Counter to rapl events mappings:
- *
- *  pp0 counter: consumption of all physical cores (power plane 0)
- *       event: rapl_energy_cores
- *    perf code: 0x1
- *
- *  pkg counter: consumption of the whole processor package
- *       event: rapl_energy_pkg
- *    perf code: 0x2
- *
- * dram counter: consumption of the dram domain (servers only)
- *       event: rapl_energy_dram
- *    perf code: 0x3
- *
- * dram counter: consumption of the builtin-gpu domain (client only)
- *       event: rapl_energy_gpu
- *    perf code: 0x4
- *
- * We manage those counters as free running (read-only). They may be
- * use simultaneously by other tools, such as turbostat.
- *
- * The events only support system-wide mode counting. There is no
- * sampling support because it does not make sense and is not
- * supported by the RAPL hardware.
- *
- * Because we want to avoid floating-point operations in the kernel,
- * the events are all reported in fixed point arithmetic (32.32).
- * Tools must adjust the counts to convert them to Watts using
- * the duration of the measurement. Tools may use a function such as
- * ldexp(raw_count, -32);
- */
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/perf_event.h>
-#include <asm/cpu_device_id.h>
-#include "perf_event.h"
-
-/*
- * RAPL energy status counters
- */
-#define RAPL_IDX_PP0_NRG_STAT  0       /* all cores */
-#define INTEL_RAPL_PP0         0x1     /* pseudo-encoding */
-#define RAPL_IDX_PKG_NRG_STAT  1       /* entire package */
-#define INTEL_RAPL_PKG         0x2     /* pseudo-encoding */
-#define RAPL_IDX_RAM_NRG_STAT  2       /* DRAM */
-#define INTEL_RAPL_RAM         0x3     /* pseudo-encoding */
-#define RAPL_IDX_PP1_NRG_STAT  3       /* gpu */
-#define INTEL_RAPL_PP1         0x4     /* pseudo-encoding */
-
-#define NR_RAPL_DOMAINS         0x4
-static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
-       "pp0-core",
-       "package",
-       "dram",
-       "pp1-gpu",
-};
-
-/* Clients have PP0, PKG */
-#define RAPL_IDX_CLN   (1<<RAPL_IDX_PP0_NRG_STAT|\
-                        1<<RAPL_IDX_PKG_NRG_STAT|\
-                        1<<RAPL_IDX_PP1_NRG_STAT)
-
-/* Servers have PP0, PKG, RAM */
-#define RAPL_IDX_SRV   (1<<RAPL_IDX_PP0_NRG_STAT|\
-                        1<<RAPL_IDX_PKG_NRG_STAT|\
-                        1<<RAPL_IDX_RAM_NRG_STAT)
-
-/* Servers have PP0, PKG, RAM, PP1 */
-#define RAPL_IDX_HSW   (1<<RAPL_IDX_PP0_NRG_STAT|\
-                        1<<RAPL_IDX_PKG_NRG_STAT|\
-                        1<<RAPL_IDX_RAM_NRG_STAT|\
-                        1<<RAPL_IDX_PP1_NRG_STAT)
-
-/* Knights Landing has PKG, RAM */
-#define RAPL_IDX_KNL   (1<<RAPL_IDX_PKG_NRG_STAT|\
-                        1<<RAPL_IDX_RAM_NRG_STAT)
-
-/*
- * event code: LSB 8 bits, passed in attr->config
- * any other bit is reserved
- */
-#define RAPL_EVENT_MASK        0xFFULL
-
-#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format)          \
-static ssize_t __rapl_##_var##_show(struct kobject *kobj,      \
-                               struct kobj_attribute *attr,    \
-                               char *page)                     \
-{                                                              \
-       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);             \
-       return sprintf(page, _format "\n");                     \
-}                                                              \
-static struct kobj_attribute format_attr_##_var =              \
-       __ATTR(_name, 0444, __rapl_##_var##_show, NULL)
-
-#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
-
-#define RAPL_EVENT_ATTR_STR(_name, v, str)                                     \
-static struct perf_pmu_events_attr event_attr_##v = {                          \
-       .attr           = __ATTR(_name, 0444, perf_event_sysfs_show, NULL),     \
-       .id             = 0,                                                    \
-       .event_str      = str,                                                  \
-};
-
-struct rapl_pmu {
-       spinlock_t       lock;
-       int              n_active; /* number of active events */
-       struct list_head active_list;
-       struct pmu       *pmu; /* pointer to rapl_pmu_class */
-       ktime_t          timer_interval; /* in ktime_t unit */
-       struct hrtimer   hrtimer;
-};
-
-static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;  /* 1/2^hw_unit Joule */
-static struct pmu rapl_pmu_class;
-static cpumask_t rapl_cpu_mask;
-static int rapl_cntr_mask;
-
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
-
-static struct x86_pmu_quirk *rapl_quirks;
-static inline u64 rapl_read_counter(struct perf_event *event)
-{
-       u64 raw;
-       rdmsrl(event->hw.event_base, raw);
-       return raw;
-}
-
-#define rapl_add_quirk(func_)                                          \
-do {                                                                   \
-       static struct x86_pmu_quirk __quirk __initdata = {              \
-               .func = func_,                                          \
-       };                                                              \
-       __quirk.next = rapl_quirks;                                     \
-       rapl_quirks = &__quirk;                                         \
-} while (0)
-
-static inline u64 rapl_scale(u64 v, int cfg)
-{
-       if (cfg > NR_RAPL_DOMAINS) {
-               pr_warn("invalid domain %d, failed to scale data\n", cfg);
-               return v;
-       }
-       /*
-        * scale delta to smallest unit (1/2^32)
-        * users must then scale back: count * 1/(1e9*2^32) to get Joules
-        * or use ldexp(count, -32).
-        * Watts = Joules/Time delta
-        */
-       return v << (32 - rapl_hw_unit[cfg - 1]);
-}
-
-static u64 rapl_event_update(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 prev_raw_count, new_raw_count;
-       s64 delta, sdelta;
-       int shift = RAPL_CNTR_WIDTH;
-
-again:
-       prev_raw_count = local64_read(&hwc->prev_count);
-       rdmsrl(event->hw.event_base, new_raw_count);
-
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                           new_raw_count) != prev_raw_count) {
-               cpu_relax();
-               goto again;
-       }
-
-       /*
-        * Now we have the new raw value and have updated the prev
-        * timestamp already. We can now calculate the elapsed delta
-        * (event-)time and add that to the generic event.
-        *
-        * Careful, not all hw sign-extends above the physical width
-        * of the count.
-        */
-       delta = (new_raw_count << shift) - (prev_raw_count << shift);
-       delta >>= shift;
-
-       sdelta = rapl_scale(delta, event->hw.config);
-
-       local64_add(sdelta, &event->count);
-
-       return new_raw_count;
-}
-
-static void rapl_start_hrtimer(struct rapl_pmu *pmu)
-{
-       hrtimer_start(&pmu->hrtimer, pmu->timer_interval,
-                    HRTIMER_MODE_REL_PINNED);
-}
-
-static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
-{
-       hrtimer_cancel(&pmu->hrtimer);
-}
-
-static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
-{
-       struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
-       struct perf_event *event;
-       unsigned long flags;
-
-       if (!pmu->n_active)
-               return HRTIMER_NORESTART;
-
-       spin_lock_irqsave(&pmu->lock, flags);
-
-       list_for_each_entry(event, &pmu->active_list, active_entry) {
-               rapl_event_update(event);
-       }
-
-       spin_unlock_irqrestore(&pmu->lock, flags);
-
-       hrtimer_forward_now(hrtimer, pmu->timer_interval);
-
-       return HRTIMER_RESTART;
-}
-
-static void rapl_hrtimer_init(struct rapl_pmu *pmu)
-{
-       struct hrtimer *hr = &pmu->hrtimer;
-
-       hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-       hr->function = rapl_hrtimer_handle;
-}
-
-static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
-                                  struct perf_event *event)
-{
-       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
-               return;
-
-       event->hw.state = 0;
-
-       list_add_tail(&event->active_entry, &pmu->active_list);
-
-       local64_set(&event->hw.prev_count, rapl_read_counter(event));
-
-       pmu->n_active++;
-       if (pmu->n_active == 1)
-               rapl_start_hrtimer(pmu);
-}
-
-static void rapl_pmu_event_start(struct perf_event *event, int mode)
-{
-       struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
-       unsigned long flags;
-
-       spin_lock_irqsave(&pmu->lock, flags);
-       __rapl_pmu_event_start(pmu, event);
-       spin_unlock_irqrestore(&pmu->lock, flags);
-}
-
-static void rapl_pmu_event_stop(struct perf_event *event, int mode)
-{
-       struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
-       struct hw_perf_event *hwc = &event->hw;
-       unsigned long flags;
-
-       spin_lock_irqsave(&pmu->lock, flags);
-
-       /* mark event as deactivated and stopped */
-       if (!(hwc->state & PERF_HES_STOPPED)) {
-               WARN_ON_ONCE(pmu->n_active <= 0);
-               pmu->n_active--;
-               if (pmu->n_active == 0)
-                       rapl_stop_hrtimer(pmu);
-
-               list_del(&event->active_entry);
-
-               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-               hwc->state |= PERF_HES_STOPPED;
-       }
-
-       /* check if update of sw counter is necessary */
-       if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-               /*
-                * Drain the remaining delta count out of a event
-                * that we are disabling:
-                */
-               rapl_event_update(event);
-               hwc->state |= PERF_HES_UPTODATE;
-       }
-
-       spin_unlock_irqrestore(&pmu->lock, flags);
-}
-
-static int rapl_pmu_event_add(struct perf_event *event, int mode)
-{
-       struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
-       struct hw_perf_event *hwc = &event->hw;
-       unsigned long flags;
-
-       spin_lock_irqsave(&pmu->lock, flags);
-
-       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
-       if (mode & PERF_EF_START)
-               __rapl_pmu_event_start(pmu, event);
-
-       spin_unlock_irqrestore(&pmu->lock, flags);
-
-       return 0;
-}
-
-static void rapl_pmu_event_del(struct perf_event *event, int flags)
-{
-       rapl_pmu_event_stop(event, PERF_EF_UPDATE);
-}
-
-static int rapl_pmu_event_init(struct perf_event *event)
-{
-       u64 cfg = event->attr.config & RAPL_EVENT_MASK;
-       int bit, msr, ret = 0;
-
-       /* only look at RAPL events */
-       if (event->attr.type != rapl_pmu_class.type)
-               return -ENOENT;
-
-       /* check only supported bits are set */
-       if (event->attr.config & ~RAPL_EVENT_MASK)
-               return -EINVAL;
-
-       /*
-        * check event is known (determines counter)
-        */
-       switch (cfg) {
-       case INTEL_RAPL_PP0:
-               bit = RAPL_IDX_PP0_NRG_STAT;
-               msr = MSR_PP0_ENERGY_STATUS;
-               break;
-       case INTEL_RAPL_PKG:
-               bit = RAPL_IDX_PKG_NRG_STAT;
-               msr = MSR_PKG_ENERGY_STATUS;
-               break;
-       case INTEL_RAPL_RAM:
-               bit = RAPL_IDX_RAM_NRG_STAT;
-               msr = MSR_DRAM_ENERGY_STATUS;
-               break;
-       case INTEL_RAPL_PP1:
-               bit = RAPL_IDX_PP1_NRG_STAT;
-               msr = MSR_PP1_ENERGY_STATUS;
-               break;
-       default:
-               return -EINVAL;
-       }
-       /* check event supported */
-       if (!(rapl_cntr_mask & (1 << bit)))
-               return -EINVAL;
-
-       /* unsupported modes and filters */
-       if (event->attr.exclude_user   ||
-           event->attr.exclude_kernel ||
-           event->attr.exclude_hv     ||
-           event->attr.exclude_idle   ||
-           event->attr.exclude_host   ||
-           event->attr.exclude_guest  ||
-           event->attr.sample_period) /* no sampling */
-               return -EINVAL;
-
-       /* must be done before validate_group */
-       event->hw.event_base = msr;
-       event->hw.config = cfg;
-       event->hw.idx = bit;
-
-       return ret;
-}
-
-static void rapl_pmu_event_read(struct perf_event *event)
-{
-       rapl_event_update(event);
-}
-
-static ssize_t rapl_get_attr_cpumask(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
-}
-
-static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
-
-static struct attribute *rapl_pmu_attrs[] = {
-       &dev_attr_cpumask.attr,
-       NULL,
-};
-
-static struct attribute_group rapl_pmu_attr_group = {
-       .attrs = rapl_pmu_attrs,
-};
-
-RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
-RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
-RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
-RAPL_EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
-
-RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
-RAPL_EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
-RAPL_EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
-RAPL_EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
-
-/*
- * we compute in 0.23 nJ increments regardless of MSR
- */
-RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
-RAPL_EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890625e-10");
-RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
-RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
-
-static struct attribute *rapl_events_srv_attr[] = {
-       EVENT_PTR(rapl_cores),
-       EVENT_PTR(rapl_pkg),
-       EVENT_PTR(rapl_ram),
-
-       EVENT_PTR(rapl_cores_unit),
-       EVENT_PTR(rapl_pkg_unit),
-       EVENT_PTR(rapl_ram_unit),
-
-       EVENT_PTR(rapl_cores_scale),
-       EVENT_PTR(rapl_pkg_scale),
-       EVENT_PTR(rapl_ram_scale),
-       NULL,
-};
-
-static struct attribute *rapl_events_cln_attr[] = {
-       EVENT_PTR(rapl_cores),
-       EVENT_PTR(rapl_pkg),
-       EVENT_PTR(rapl_gpu),
-
-       EVENT_PTR(rapl_cores_unit),
-       EVENT_PTR(rapl_pkg_unit),
-       EVENT_PTR(rapl_gpu_unit),
-
-       EVENT_PTR(rapl_cores_scale),
-       EVENT_PTR(rapl_pkg_scale),
-       EVENT_PTR(rapl_gpu_scale),
-       NULL,
-};
-
-static struct attribute *rapl_events_hsw_attr[] = {
-       EVENT_PTR(rapl_cores),
-       EVENT_PTR(rapl_pkg),
-       EVENT_PTR(rapl_gpu),
-       EVENT_PTR(rapl_ram),
-
-       EVENT_PTR(rapl_cores_unit),
-       EVENT_PTR(rapl_pkg_unit),
-       EVENT_PTR(rapl_gpu_unit),
-       EVENT_PTR(rapl_ram_unit),
-
-       EVENT_PTR(rapl_cores_scale),
-       EVENT_PTR(rapl_pkg_scale),
-       EVENT_PTR(rapl_gpu_scale),
-       EVENT_PTR(rapl_ram_scale),
-       NULL,
-};
-
-static struct attribute *rapl_events_knl_attr[] = {
-       EVENT_PTR(rapl_pkg),
-       EVENT_PTR(rapl_ram),
-
-       EVENT_PTR(rapl_pkg_unit),
-       EVENT_PTR(rapl_ram_unit),
-
-       EVENT_PTR(rapl_pkg_scale),
-       EVENT_PTR(rapl_ram_scale),
-       NULL,
-};
-
-static struct attribute_group rapl_pmu_events_group = {
-       .name = "events",
-       .attrs = NULL, /* patched at runtime */
-};
-
-DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
-static struct attribute *rapl_formats_attr[] = {
-       &format_attr_event.attr,
-       NULL,
-};
-
-static struct attribute_group rapl_pmu_format_group = {
-       .name = "format",
-       .attrs = rapl_formats_attr,
-};
-
-const struct attribute_group *rapl_attr_groups[] = {
-       &rapl_pmu_attr_group,
-       &rapl_pmu_format_group,
-       &rapl_pmu_events_group,
-       NULL,
-};
-
-static struct pmu rapl_pmu_class = {
-       .attr_groups    = rapl_attr_groups,
-       .task_ctx_nr    = perf_invalid_context, /* system-wide only */
-       .event_init     = rapl_pmu_event_init,
-       .add            = rapl_pmu_event_add, /* must have */
-       .del            = rapl_pmu_event_del, /* must have */
-       .start          = rapl_pmu_event_start,
-       .stop           = rapl_pmu_event_stop,
-       .read           = rapl_pmu_event_read,
-};
-
-static void rapl_cpu_exit(int cpu)
-{
-       struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-       int i, phys_id = topology_physical_package_id(cpu);
-       int target = -1;
-
-       /* find a new cpu on same package */
-       for_each_online_cpu(i) {
-               if (i == cpu)
-                       continue;
-               if (phys_id == topology_physical_package_id(i)) {
-                       target = i;
-                       break;
-               }
-       }
-       /*
-        * clear cpu from cpumask
-        * if was set in cpumask and still some cpu on package,
-        * then move to new cpu
-        */
-       if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0)
-               cpumask_set_cpu(target, &rapl_cpu_mask);
-
-       WARN_ON(cpumask_empty(&rapl_cpu_mask));
-       /*
-        * migrate events and context to new cpu
-        */
-       if (target >= 0)
-               perf_pmu_migrate_context(pmu->pmu, cpu, target);
-
-       /* cancel overflow polling timer for CPU */
-       rapl_stop_hrtimer(pmu);
-}
-
-static void rapl_cpu_init(int cpu)
-{
-       int i, phys_id = topology_physical_package_id(cpu);
-
-       /* check if phys_is is already covered */
-       for_each_cpu(i, &rapl_cpu_mask) {
-               if (phys_id == topology_physical_package_id(i))
-                       return;
-       }
-       /* was not found, so add it */
-       cpumask_set_cpu(cpu, &rapl_cpu_mask);
-}
-
-static __init void rapl_hsw_server_quirk(void)
-{
-       /*
-        * DRAM domain on HSW server has fixed energy unit which can be
-        * different than the unit from power unit MSR.
-        * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
-        * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
-        */
-       rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
-}
-
-static int rapl_cpu_prepare(int cpu)
-{
-       struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-       int phys_id = topology_physical_package_id(cpu);
-       u64 ms;
-
-       if (pmu)
-               return 0;
-
-       if (phys_id < 0)
-               return -1;
-
-       pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
-       if (!pmu)
-               return -1;
-       spin_lock_init(&pmu->lock);
-
-       INIT_LIST_HEAD(&pmu->active_list);
-
-       pmu->pmu = &rapl_pmu_class;
-
-       /*
-        * use reference of 200W for scaling the timeout
-        * to avoid missing counter overflows.
-        * 200W = 200 Joules/sec
-        * divide interval by 2 to avoid lockstep (2 * 100)
-        * if hw unit is 32, then we use 2 ms 1/200/2
-        */
-       if (rapl_hw_unit[0] < 32)
-               ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
-       else
-               ms = 2;
-
-       pmu->timer_interval = ms_to_ktime(ms);
-
-       rapl_hrtimer_init(pmu);
-
-       /* set RAPL pmu for this cpu for now */
-       per_cpu(rapl_pmu, cpu) = pmu;
-       per_cpu(rapl_pmu_to_free, cpu) = NULL;
-
-       return 0;
-}
-
-static void rapl_cpu_kfree(int cpu)
-{
-       struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu);
-
-       kfree(pmu);
-
-       per_cpu(rapl_pmu_to_free, cpu) = NULL;
-}
-
-static int rapl_cpu_dying(int cpu)
-{
-       struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-
-       if (!pmu)
-               return 0;
-
-       per_cpu(rapl_pmu, cpu) = NULL;
-
-       per_cpu(rapl_pmu_to_free, cpu) = pmu;
-
-       return 0;
-}
-
-static int rapl_cpu_notifier(struct notifier_block *self,
-                            unsigned long action, void *hcpu)
-{
-       unsigned int cpu = (long)hcpu;
-
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               rapl_cpu_prepare(cpu);
-               break;
-       case CPU_STARTING:
-               rapl_cpu_init(cpu);
-               break;
-       case CPU_UP_CANCELED:
-       case CPU_DYING:
-               rapl_cpu_dying(cpu);
-               break;
-       case CPU_ONLINE:
-       case CPU_DEAD:
-               rapl_cpu_kfree(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               rapl_cpu_exit(cpu);
-               break;
-       default:
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-static int rapl_check_hw_unit(void)
-{
-       u64 msr_rapl_power_unit_bits;
-       int i;
-
-       /* protect rdmsrl() to handle virtualization */
-       if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
-               return -1;
-       for (i = 0; i < NR_RAPL_DOMAINS; i++)
-               rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
-
-       return 0;
-}
-
-static const struct x86_cpu_id rapl_cpu_match[] = {
-       [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
-       [1] = {},
-};
-
-static int __init rapl_pmu_init(void)
-{
-       struct rapl_pmu *pmu;
-       int cpu, ret;
-       struct x86_pmu_quirk *quirk;
-       int i;
-
-       /*
-        * check for Intel processor family 6
-        */
-       if (!x86_match_cpu(rapl_cpu_match))
-               return 0;
-
-       /* check supported CPU */
-       switch (boot_cpu_data.x86_model) {
-       case 42: /* Sandy Bridge */
-       case 58: /* Ivy Bridge */
-               rapl_cntr_mask = RAPL_IDX_CLN;
-               rapl_pmu_events_group.attrs = rapl_events_cln_attr;
-               break;
-       case 63: /* Haswell-Server */
-               rapl_add_quirk(rapl_hsw_server_quirk);
-               rapl_cntr_mask = RAPL_IDX_SRV;
-               rapl_pmu_events_group.attrs = rapl_events_srv_attr;
-               break;
-       case 60: /* Haswell */
-       case 69: /* Haswell-Celeron */
-       case 61: /* Broadwell */
-               rapl_cntr_mask = RAPL_IDX_HSW;
-               rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
-               break;
-       case 45: /* Sandy Bridge-EP */
-       case 62: /* IvyTown */
-               rapl_cntr_mask = RAPL_IDX_SRV;
-               rapl_pmu_events_group.attrs = rapl_events_srv_attr;
-               break;
-       case 87: /* Knights Landing */
-               rapl_add_quirk(rapl_hsw_server_quirk);
-               rapl_cntr_mask = RAPL_IDX_KNL;
-               rapl_pmu_events_group.attrs = rapl_events_knl_attr;
-
-       default:
-               /* unsupported */
-               return 0;
-       }
-       ret = rapl_check_hw_unit();
-       if (ret)
-               return ret;
-
-       /* run cpu model quirks */
-       for (quirk = rapl_quirks; quirk; quirk = quirk->next)
-               quirk->func();
-       cpu_notifier_register_begin();
-
-       for_each_online_cpu(cpu) {
-               ret = rapl_cpu_prepare(cpu);
-               if (ret)
-                       goto out;
-               rapl_cpu_init(cpu);
-       }
-
-       __perf_cpu_notifier(rapl_cpu_notifier);
-
-       ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
-       if (WARN_ON(ret)) {
-               pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
-               cpu_notifier_register_done();
-               return -1;
-       }
-
-       pmu = __this_cpu_read(rapl_pmu);
-
-       pr_info("RAPL PMU detected,"
-               " API unit is 2^-32 Joules,"
-               " %d fixed counters"
-               " %llu ms ovfl timer\n",
-               hweight32(rapl_cntr_mask),
-               ktime_to_ms(pmu->timer_interval));
-       for (i = 0; i < NR_RAPL_DOMAINS; i++) {
-               if (rapl_cntr_mask & (1 << i)) {
-                       pr_info("hw unit of domain %s 2^-%d Joules\n",
-                               rapl_domain_names[i], rapl_hw_unit[i]);
-               }
-       }
-out:
-       cpu_notifier_register_done();
-
-       return 0;
-}
-device_initcall(rapl_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
deleted file mode 100644 (file)
index 3bf41d4..0000000
+++ /dev/null
@@ -1,1401 +0,0 @@
-#include "perf_event_intel_uncore.h"
-
-static struct intel_uncore_type *empty_uncore[] = { NULL, };
-struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
-struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
-
-static bool pcidrv_registered;
-struct pci_driver *uncore_pci_driver;
-/* pci bus to socket mapping */
-DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
-struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
-struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
-
-static DEFINE_RAW_SPINLOCK(uncore_box_lock);
-/* mask of cpus that collect uncore events */
-static cpumask_t uncore_cpu_mask;
-
-/* constraint for the fixed counter */
-static struct event_constraint uncore_constraint_fixed =
-       EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
-struct event_constraint uncore_constraint_empty =
-       EVENT_CONSTRAINT(0, 0, 0);
-
-int uncore_pcibus_to_physid(struct pci_bus *bus)
-{
-       struct pci2phy_map *map;
-       int phys_id = -1;
-
-       raw_spin_lock(&pci2phy_map_lock);
-       list_for_each_entry(map, &pci2phy_map_head, list) {
-               if (map->segment == pci_domain_nr(bus)) {
-                       phys_id = map->pbus_to_physid[bus->number];
-                       break;
-               }
-       }
-       raw_spin_unlock(&pci2phy_map_lock);
-
-       return phys_id;
-}
-
-struct pci2phy_map *__find_pci2phy_map(int segment)
-{
-       struct pci2phy_map *map, *alloc = NULL;
-       int i;
-
-       lockdep_assert_held(&pci2phy_map_lock);
-
-lookup:
-       list_for_each_entry(map, &pci2phy_map_head, list) {
-               if (map->segment == segment)
-                       goto end;
-       }
-
-       if (!alloc) {
-               raw_spin_unlock(&pci2phy_map_lock);
-               alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
-               raw_spin_lock(&pci2phy_map_lock);
-
-               if (!alloc)
-                       return NULL;
-
-               goto lookup;
-       }
-
-       map = alloc;
-       alloc = NULL;
-       map->segment = segment;
-       for (i = 0; i < 256; i++)
-               map->pbus_to_physid[i] = -1;
-       list_add_tail(&map->list, &pci2phy_map_head);
-
-end:
-       kfree(alloc);
-       return map;
-}
-
-ssize_t uncore_event_show(struct kobject *kobj,
-                         struct kobj_attribute *attr, char *buf)
-{
-       struct uncore_event_desc *event =
-               container_of(attr, struct uncore_event_desc, attr);
-       return sprintf(buf, "%s", event->config);
-}
-
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
-{
-       return container_of(event->pmu, struct intel_uncore_pmu, pmu);
-}
-
-struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
-{
-       struct intel_uncore_box *box;
-
-       box = *per_cpu_ptr(pmu->box, cpu);
-       if (box)
-               return box;
-
-       raw_spin_lock(&uncore_box_lock);
-       /* Recheck in lock to handle races. */
-       if (*per_cpu_ptr(pmu->box, cpu))
-               goto out;
-       list_for_each_entry(box, &pmu->box_list, list) {
-               if (box->phys_id == topology_physical_package_id(cpu)) {
-                       atomic_inc(&box->refcnt);
-                       *per_cpu_ptr(pmu->box, cpu) = box;
-                       break;
-               }
-       }
-out:
-       raw_spin_unlock(&uncore_box_lock);
-
-       return *per_cpu_ptr(pmu->box, cpu);
-}
-
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
-{
-       /*
-        * perf core schedules event on the basis of cpu, uncore events are
-        * collected by one of the cpus inside a physical package.
-        */
-       return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
-}
-
-u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
-       u64 count;
-
-       rdmsrl(event->hw.event_base, count);
-
-       return count;
-}
-
-/*
- * generic get constraint function for shared match/mask registers.
- */
-struct event_constraint *
-uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct intel_uncore_extra_reg *er;
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
-       unsigned long flags;
-       bool ok = false;
-
-       /*
-        * reg->alloc can be set due to existing state, so for fake box we
-        * need to ignore this, otherwise we might fail to allocate proper
-        * fake state for this extra reg constraint.
-        */
-       if (reg1->idx == EXTRA_REG_NONE ||
-           (!uncore_box_is_fake(box) && reg1->alloc))
-               return NULL;
-
-       er = &box->shared_regs[reg1->idx];
-       raw_spin_lock_irqsave(&er->lock, flags);
-       if (!atomic_read(&er->ref) ||
-           (er->config1 == reg1->config && er->config2 == reg2->config)) {
-               atomic_inc(&er->ref);
-               er->config1 = reg1->config;
-               er->config2 = reg2->config;
-               ok = true;
-       }
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-
-       if (ok) {
-               if (!uncore_box_is_fake(box))
-                       reg1->alloc = 1;
-               return NULL;
-       }
-
-       return &uncore_constraint_empty;
-}
-
-void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct intel_uncore_extra_reg *er;
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-
-       /*
-        * Only put constraint if extra reg was actually allocated. Also
-        * takes care of event which do not use an extra shared reg.
-        *
-        * Also, if this is a fake box we shouldn't touch any event state
-        * (reg->alloc) and we don't care about leaving inconsistent box
-        * state either since it will be thrown out.
-        */
-       if (uncore_box_is_fake(box) || !reg1->alloc)
-               return;
-
-       er = &box->shared_regs[reg1->idx];
-       atomic_dec(&er->ref);
-       reg1->alloc = 0;
-}
-
-u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
-{
-       struct intel_uncore_extra_reg *er;
-       unsigned long flags;
-       u64 config;
-
-       er = &box->shared_regs[idx];
-
-       raw_spin_lock_irqsave(&er->lock, flags);
-       config = er->config;
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-
-       return config;
-}
-
-static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       hwc->idx = idx;
-       hwc->last_tag = ++box->tags[idx];
-
-       if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
-               hwc->event_base = uncore_fixed_ctr(box);
-               hwc->config_base = uncore_fixed_ctl(box);
-               return;
-       }
-
-       hwc->config_base = uncore_event_ctl(box, hwc->idx);
-       hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
-}
-
-void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
-{
-       u64 prev_count, new_count, delta;
-       int shift;
-
-       if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
-               shift = 64 - uncore_fixed_ctr_bits(box);
-       else
-               shift = 64 - uncore_perf_ctr_bits(box);
-
-       /* the hrtimer might modify the previous event value */
-again:
-       prev_count = local64_read(&event->hw.prev_count);
-       new_count = uncore_read_counter(box, event);
-       if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
-               goto again;
-
-       delta = (new_count << shift) - (prev_count << shift);
-       delta >>= shift;
-
-       local64_add(delta, &event->count);
-}
-
-/*
- * The overflow interrupt is unavailable for SandyBridge-EP, is broken
- * for SandyBridge. So we use hrtimer to periodically poll the counter
- * to avoid overflow.
- */
-static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
-{
-       struct intel_uncore_box *box;
-       struct perf_event *event;
-       unsigned long flags;
-       int bit;
-
-       box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
-       if (!box->n_active || box->cpu != smp_processor_id())
-               return HRTIMER_NORESTART;
-       /*
-        * disable local interrupt to prevent uncore_pmu_event_start/stop
-        * to interrupt the update process
-        */
-       local_irq_save(flags);
-
-       /*
-        * handle boxes with an active event list as opposed to active
-        * counters
-        */
-       list_for_each_entry(event, &box->active_list, active_entry) {
-               uncore_perf_event_update(box, event);
-       }
-
-       for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
-               uncore_perf_event_update(box, box->events[bit]);
-
-       local_irq_restore(flags);
-
-       hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
-       return HRTIMER_RESTART;
-}
-
-void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
-{
-       hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
-                     HRTIMER_MODE_REL_PINNED);
-}
-
-void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
-{
-       hrtimer_cancel(&box->hrtimer);
-}
-
-static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
-{
-       hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-       box->hrtimer.function = uncore_pmu_hrtimer;
-}
-
-static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
-{
-       struct intel_uncore_box *box;
-       int i, size;
-
-       size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
-
-       box = kzalloc_node(size, GFP_KERNEL, node);
-       if (!box)
-               return NULL;
-
-       for (i = 0; i < type->num_shared_regs; i++)
-               raw_spin_lock_init(&box->shared_regs[i].lock);
-
-       uncore_pmu_init_hrtimer(box);
-       atomic_set(&box->refcnt, 1);
-       box->cpu = -1;
-       box->phys_id = -1;
-
-       /* set default hrtimer timeout */
-       box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
-
-       INIT_LIST_HEAD(&box->active_list);
-
-       return box;
-}
-
-/*
- * Using uncore_pmu_event_init pmu event_init callback
- * as a detection point for uncore events.
- */
-static int uncore_pmu_event_init(struct perf_event *event);
-
-static bool is_uncore_event(struct perf_event *event)
-{
-       return event->pmu->event_init == uncore_pmu_event_init;
-}
-
-static int
-uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
-{
-       struct perf_event *event;
-       int n, max_count;
-
-       max_count = box->pmu->type->num_counters;
-       if (box->pmu->type->fixed_ctl)
-               max_count++;
-
-       if (box->n_events >= max_count)
-               return -EINVAL;
-
-       n = box->n_events;
-
-       if (is_uncore_event(leader)) {
-               box->event_list[n] = leader;
-               n++;
-       }
-
-       if (!dogrp)
-               return n;
-
-       list_for_each_entry(event, &leader->sibling_list, group_entry) {
-               if (!is_uncore_event(event) ||
-                   event->state <= PERF_EVENT_STATE_OFF)
-                       continue;
-
-               if (n >= max_count)
-                       return -EINVAL;
-
-               box->event_list[n] = event;
-               n++;
-       }
-       return n;
-}
-
-static struct event_constraint *
-uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct intel_uncore_type *type = box->pmu->type;
-       struct event_constraint *c;
-
-       if (type->ops->get_constraint) {
-               c = type->ops->get_constraint(box, event);
-               if (c)
-                       return c;
-       }
-
-       if (event->attr.config == UNCORE_FIXED_EVENT)
-               return &uncore_constraint_fixed;
-
-       if (type->constraints) {
-               for_each_event_constraint(c, type->constraints) {
-                       if ((event->hw.config & c->cmask) == c->code)
-                               return c;
-               }
-       }
-
-       return &type->unconstrainted;
-}
-
-static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       if (box->pmu->type->ops->put_constraint)
-               box->pmu->type->ops->put_constraint(box, event);
-}
-
-static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
-{
-       unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
-       struct event_constraint *c;
-       int i, wmin, wmax, ret = 0;
-       struct hw_perf_event *hwc;
-
-       bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
-
-       for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
-               c = uncore_get_event_constraint(box, box->event_list[i]);
-               box->event_constraint[i] = c;
-               wmin = min(wmin, c->weight);
-               wmax = max(wmax, c->weight);
-       }
-
-       /* fastpath, try to reuse previous register */
-       for (i = 0; i < n; i++) {
-               hwc = &box->event_list[i]->hw;
-               c = box->event_constraint[i];
-
-               /* never assigned */
-               if (hwc->idx == -1)
-                       break;
-
-               /* constraint still honored */
-               if (!test_bit(hwc->idx, c->idxmsk))
-                       break;
-
-               /* not already used */
-               if (test_bit(hwc->idx, used_mask))
-                       break;
-
-               __set_bit(hwc->idx, used_mask);
-               if (assign)
-                       assign[i] = hwc->idx;
-       }
-       /* slow path */
-       if (i != n)
-               ret = perf_assign_events(box->event_constraint, n,
-                                        wmin, wmax, n, assign);
-
-       if (!assign || ret) {
-               for (i = 0; i < n; i++)
-                       uncore_put_event_constraint(box, box->event_list[i]);
-       }
-       return ret ? -EINVAL : 0;
-}
-
-static void uncore_pmu_event_start(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       int idx = event->hw.idx;
-
-       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
-               return;
-
-       if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
-               return;
-
-       event->hw.state = 0;
-       box->events[idx] = event;
-       box->n_active++;
-       __set_bit(idx, box->active_mask);
-
-       local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
-       uncore_enable_event(box, event);
-
-       if (box->n_active == 1) {
-               uncore_enable_box(box);
-               uncore_pmu_start_hrtimer(box);
-       }
-}
-
-static void uncore_pmu_event_stop(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
-               uncore_disable_event(box, event);
-               box->n_active--;
-               box->events[hwc->idx] = NULL;
-               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-               hwc->state |= PERF_HES_STOPPED;
-
-               if (box->n_active == 0) {
-                       uncore_disable_box(box);
-                       uncore_pmu_cancel_hrtimer(box);
-               }
-       }
-
-       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-               /*
-                * Drain the remaining delta count out of a event
-                * that we are disabling:
-                */
-               uncore_perf_event_update(box, event);
-               hwc->state |= PERF_HES_UPTODATE;
-       }
-}
-
-static int uncore_pmu_event_add(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       struct hw_perf_event *hwc = &event->hw;
-       int assign[UNCORE_PMC_IDX_MAX];
-       int i, n, ret;
-
-       if (!box)
-               return -ENODEV;
-
-       ret = n = uncore_collect_events(box, event, false);
-       if (ret < 0)
-               return ret;
-
-       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-       if (!(flags & PERF_EF_START))
-               hwc->state |= PERF_HES_ARCH;
-
-       ret = uncore_assign_events(box, assign, n);
-       if (ret)
-               return ret;
-
-       /* save events moving to new counters */
-       for (i = 0; i < box->n_events; i++) {
-               event = box->event_list[i];
-               hwc = &event->hw;
-
-               if (hwc->idx == assign[i] &&
-                       hwc->last_tag == box->tags[assign[i]])
-                       continue;
-               /*
-                * Ensure we don't accidentally enable a stopped
-                * counter simply because we rescheduled.
-                */
-               if (hwc->state & PERF_HES_STOPPED)
-                       hwc->state |= PERF_HES_ARCH;
-
-               uncore_pmu_event_stop(event, PERF_EF_UPDATE);
-       }
-
-       /* reprogram moved events into new counters */
-       for (i = 0; i < n; i++) {
-               event = box->event_list[i];
-               hwc = &event->hw;
-
-               if (hwc->idx != assign[i] ||
-                       hwc->last_tag != box->tags[assign[i]])
-                       uncore_assign_hw_event(box, event, assign[i]);
-               else if (i < box->n_events)
-                       continue;
-
-               if (hwc->state & PERF_HES_ARCH)
-                       continue;
-
-               uncore_pmu_event_start(event, 0);
-       }
-       box->n_events = n;
-
-       return 0;
-}
-
-static void uncore_pmu_event_del(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       int i;
-
-       uncore_pmu_event_stop(event, PERF_EF_UPDATE);
-
-       for (i = 0; i < box->n_events; i++) {
-               if (event == box->event_list[i]) {
-                       uncore_put_event_constraint(box, event);
-
-                       while (++i < box->n_events)
-                               box->event_list[i - 1] = box->event_list[i];
-
-                       --box->n_events;
-                       break;
-               }
-       }
-
-       event->hw.idx = -1;
-       event->hw.last_tag = ~0ULL;
-}
-
-void uncore_pmu_event_read(struct perf_event *event)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       uncore_perf_event_update(box, event);
-}
-
-/*
- * validation ensures the group can be loaded onto the
- * PMU if it was the only group available.
- */
-static int uncore_validate_group(struct intel_uncore_pmu *pmu,
-                               struct perf_event *event)
-{
-       struct perf_event *leader = event->group_leader;
-       struct intel_uncore_box *fake_box;
-       int ret = -EINVAL, n;
-
-       fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
-       if (!fake_box)
-               return -ENOMEM;
-
-       fake_box->pmu = pmu;
-       /*
-        * the event is not yet connected with its
-        * siblings therefore we must first collect
-        * existing siblings, then add the new event
-        * before we can simulate the scheduling
-        */
-       n = uncore_collect_events(fake_box, leader, true);
-       if (n < 0)
-               goto out;
-
-       fake_box->n_events = n;
-       n = uncore_collect_events(fake_box, event, false);
-       if (n < 0)
-               goto out;
-
-       fake_box->n_events = n;
-
-       ret = uncore_assign_events(fake_box, NULL, n);
-out:
-       kfree(fake_box);
-       return ret;
-}
-
-static int uncore_pmu_event_init(struct perf_event *event)
-{
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box;
-       struct hw_perf_event *hwc = &event->hw;
-       int ret;
-
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
-       pmu = uncore_event_to_pmu(event);
-       /* no device found for this pmu */
-       if (pmu->func_id < 0)
-               return -ENOENT;
-
-       /*
-        * Uncore PMU does measure at all privilege level all the time.
-        * So it doesn't make sense to specify any exclude bits.
-        */
-       if (event->attr.exclude_user || event->attr.exclude_kernel ||
-                       event->attr.exclude_hv || event->attr.exclude_idle)
-               return -EINVAL;
-
-       /* Sampling not supported yet */
-       if (hwc->sample_period)
-               return -EINVAL;
-
-       /*
-        * Place all uncore events for a particular physical package
-        * onto a single cpu
-        */
-       if (event->cpu < 0)
-               return -EINVAL;
-       box = uncore_pmu_to_box(pmu, event->cpu);
-       if (!box || box->cpu < 0)
-               return -EINVAL;
-       event->cpu = box->cpu;
-
-       event->hw.idx = -1;
-       event->hw.last_tag = ~0ULL;
-       event->hw.extra_reg.idx = EXTRA_REG_NONE;
-       event->hw.branch_reg.idx = EXTRA_REG_NONE;
-
-       if (event->attr.config == UNCORE_FIXED_EVENT) {
-               /* no fixed counter */
-               if (!pmu->type->fixed_ctl)
-                       return -EINVAL;
-               /*
-                * if there is only one fixed counter, only the first pmu
-                * can access the fixed counter
-                */
-               if (pmu->type->single_fixed && pmu->pmu_idx > 0)
-                       return -EINVAL;
-
-               /* fixed counters have event field hardcoded to zero */
-               hwc->config = 0ULL;
-       } else {
-               hwc->config = event->attr.config & pmu->type->event_mask;
-               if (pmu->type->ops->hw_config) {
-                       ret = pmu->type->ops->hw_config(box, event);
-                       if (ret)
-                               return ret;
-               }
-       }
-
-       if (event->group_leader != event)
-               ret = uncore_validate_group(pmu, event);
-       else
-               ret = 0;
-
-       return ret;
-}
-
-static ssize_t uncore_get_attr_cpumask(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
-}
-
-static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
-
-static struct attribute *uncore_pmu_attrs[] = {
-       &dev_attr_cpumask.attr,
-       NULL,
-};
-
-static struct attribute_group uncore_pmu_attr_group = {
-       .attrs = uncore_pmu_attrs,
-};
-
-static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
-{
-       int ret;
-
-       if (!pmu->type->pmu) {
-               pmu->pmu = (struct pmu) {
-                       .attr_groups    = pmu->type->attr_groups,
-                       .task_ctx_nr    = perf_invalid_context,
-                       .event_init     = uncore_pmu_event_init,
-                       .add            = uncore_pmu_event_add,
-                       .del            = uncore_pmu_event_del,
-                       .start          = uncore_pmu_event_start,
-                       .stop           = uncore_pmu_event_stop,
-                       .read           = uncore_pmu_event_read,
-               };
-       } else {
-               pmu->pmu = *pmu->type->pmu;
-               pmu->pmu.attr_groups = pmu->type->attr_groups;
-       }
-
-       if (pmu->type->num_boxes == 1) {
-               if (strlen(pmu->type->name) > 0)
-                       sprintf(pmu->name, "uncore_%s", pmu->type->name);
-               else
-                       sprintf(pmu->name, "uncore");
-       } else {
-               sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
-                       pmu->pmu_idx);
-       }
-
-       ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
-       return ret;
-}
-
-static void __init uncore_type_exit(struct intel_uncore_type *type)
-{
-       int i;
-
-       for (i = 0; i < type->num_boxes; i++)
-               free_percpu(type->pmus[i].box);
-       kfree(type->pmus);
-       type->pmus = NULL;
-       kfree(type->events_group);
-       type->events_group = NULL;
-}
-
-static void __init uncore_types_exit(struct intel_uncore_type **types)
-{
-       int i;
-       for (i = 0; types[i]; i++)
-               uncore_type_exit(types[i]);
-}
-
-static int __init uncore_type_init(struct intel_uncore_type *type)
-{
-       struct intel_uncore_pmu *pmus;
-       struct attribute_group *attr_group;
-       struct attribute **attrs;
-       int i, j;
-
-       pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
-       if (!pmus)
-               return -ENOMEM;
-
-       type->pmus = pmus;
-
-       type->unconstrainted = (struct event_constraint)
-               __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
-                               0, type->num_counters, 0, 0);
-
-       for (i = 0; i < type->num_boxes; i++) {
-               pmus[i].func_id = -1;
-               pmus[i].pmu_idx = i;
-               pmus[i].type = type;
-               INIT_LIST_HEAD(&pmus[i].box_list);
-               pmus[i].box = alloc_percpu(struct intel_uncore_box *);
-               if (!pmus[i].box)
-                       goto fail;
-       }
-
-       if (type->event_descs) {
-               i = 0;
-               while (type->event_descs[i].attr.attr.name)
-                       i++;
-
-               attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
-                                       sizeof(*attr_group), GFP_KERNEL);
-               if (!attr_group)
-                       goto fail;
-
-               attrs = (struct attribute **)(attr_group + 1);
-               attr_group->name = "events";
-               attr_group->attrs = attrs;
-
-               for (j = 0; j < i; j++)
-                       attrs[j] = &type->event_descs[j].attr.attr;
-
-               type->events_group = attr_group;
-       }
-
-       type->pmu_group = &uncore_pmu_attr_group;
-       return 0;
-fail:
-       uncore_type_exit(type);
-       return -ENOMEM;
-}
-
-static int __init uncore_types_init(struct intel_uncore_type **types)
-{
-       int i, ret;
-
-       for (i = 0; types[i]; i++) {
-               ret = uncore_type_init(types[i]);
-               if (ret)
-                       goto fail;
-       }
-       return 0;
-fail:
-       while (--i >= 0)
-               uncore_type_exit(types[i]);
-       return ret;
-}
-
-/*
- * add a pci uncore device
- */
-static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box;
-       struct intel_uncore_type *type;
-       int phys_id;
-       bool first_box = false;
-
-       phys_id = uncore_pcibus_to_physid(pdev->bus);
-       if (phys_id < 0)
-               return -ENODEV;
-
-       if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
-               int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
-               uncore_extra_pci_dev[phys_id][idx] = pdev;
-               pci_set_drvdata(pdev, NULL);
-               return 0;
-       }
-
-       type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
-       box = uncore_alloc_box(type, NUMA_NO_NODE);
-       if (!box)
-               return -ENOMEM;
-
-       /*
-        * for performance monitoring unit with multiple boxes,
-        * each box has a different function id.
-        */
-       pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
-       /* Knights Landing uses a common PCI device ID for multiple instances of
-        * an uncore PMU device type. There is only one entry per device type in
-        * the knl_uncore_pci_ids table inspite of multiple devices present for
-        * some device types. Hence PCI device idx would be 0 for all devices.
-        * So increment pmu pointer to point to an unused array element.
-        */
-       if (boot_cpu_data.x86_model == 87)
-               while (pmu->func_id >= 0)
-                       pmu++;
-       if (pmu->func_id < 0)
-               pmu->func_id = pdev->devfn;
-       else
-               WARN_ON_ONCE(pmu->func_id != pdev->devfn);
-
-       box->phys_id = phys_id;
-       box->pci_dev = pdev;
-       box->pmu = pmu;
-       uncore_box_init(box);
-       pci_set_drvdata(pdev, box);
-
-       raw_spin_lock(&uncore_box_lock);
-       if (list_empty(&pmu->box_list))
-               first_box = true;
-       list_add_tail(&box->list, &pmu->box_list);
-       raw_spin_unlock(&uncore_box_lock);
-
-       if (first_box)
-               uncore_pmu_register(pmu);
-       return 0;
-}
-
-static void uncore_pci_remove(struct pci_dev *pdev)
-{
-       struct intel_uncore_box *box = pci_get_drvdata(pdev);
-       struct intel_uncore_pmu *pmu;
-       int i, cpu, phys_id;
-       bool last_box = false;
-
-       phys_id = uncore_pcibus_to_physid(pdev->bus);
-       box = pci_get_drvdata(pdev);
-       if (!box) {
-               for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
-                       if (uncore_extra_pci_dev[phys_id][i] == pdev) {
-                               uncore_extra_pci_dev[phys_id][i] = NULL;
-                               break;
-                       }
-               }
-               WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
-               return;
-       }
-
-       pmu = box->pmu;
-       if (WARN_ON_ONCE(phys_id != box->phys_id))
-               return;
-
-       pci_set_drvdata(pdev, NULL);
-
-       raw_spin_lock(&uncore_box_lock);
-       list_del(&box->list);
-       if (list_empty(&pmu->box_list))
-               last_box = true;
-       raw_spin_unlock(&uncore_box_lock);
-
-       for_each_possible_cpu(cpu) {
-               if (*per_cpu_ptr(pmu->box, cpu) == box) {
-                       *per_cpu_ptr(pmu->box, cpu) = NULL;
-                       atomic_dec(&box->refcnt);
-               }
-       }
-
-       WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
-       kfree(box);
-
-       if (last_box)
-               perf_pmu_unregister(&pmu->pmu);
-}
-
-static int __init uncore_pci_init(void)
-{
-       int ret;
-
-       switch (boot_cpu_data.x86_model) {
-       case 45: /* Sandy Bridge-EP */
-               ret = snbep_uncore_pci_init();
-               break;
-       case 62: /* Ivy Bridge-EP */
-               ret = ivbep_uncore_pci_init();
-               break;
-       case 63: /* Haswell-EP */
-               ret = hswep_uncore_pci_init();
-               break;
-       case 79: /* BDX-EP */
-       case 86: /* BDX-DE */
-               ret = bdx_uncore_pci_init();
-               break;
-       case 42: /* Sandy Bridge */
-               ret = snb_uncore_pci_init();
-               break;
-       case 58: /* Ivy Bridge */
-               ret = ivb_uncore_pci_init();
-               break;
-       case 60: /* Haswell */
-       case 69: /* Haswell Celeron */
-               ret = hsw_uncore_pci_init();
-               break;
-       case 61: /* Broadwell */
-               ret = bdw_uncore_pci_init();
-               break;
-       case 87: /* Knights Landing */
-               ret = knl_uncore_pci_init();
-               break;
-       case 94: /* SkyLake */
-               ret = skl_uncore_pci_init();
-               break;
-       default:
-               return 0;
-       }
-
-       if (ret)
-               return ret;
-
-       ret = uncore_types_init(uncore_pci_uncores);
-       if (ret)
-               return ret;
-
-       uncore_pci_driver->probe = uncore_pci_probe;
-       uncore_pci_driver->remove = uncore_pci_remove;
-
-       ret = pci_register_driver(uncore_pci_driver);
-       if (ret == 0)
-               pcidrv_registered = true;
-       else
-               uncore_types_exit(uncore_pci_uncores);
-
-       return ret;
-}
-
-static void __init uncore_pci_exit(void)
-{
-       if (pcidrv_registered) {
-               pcidrv_registered = false;
-               pci_unregister_driver(uncore_pci_driver);
-               uncore_types_exit(uncore_pci_uncores);
-       }
-}
-
-/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
-static LIST_HEAD(boxes_to_free);
-
-static void uncore_kfree_boxes(void)
-{
-       struct intel_uncore_box *box;
-
-       while (!list_empty(&boxes_to_free)) {
-               box = list_entry(boxes_to_free.next,
-                                struct intel_uncore_box, list);
-               list_del(&box->list);
-               kfree(box);
-       }
-}
-
-static void uncore_cpu_dying(int cpu)
-{
-       struct intel_uncore_type *type;
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box;
-       int i, j;
-
-       for (i = 0; uncore_msr_uncores[i]; i++) {
-               type = uncore_msr_uncores[i];
-               for (j = 0; j < type->num_boxes; j++) {
-                       pmu = &type->pmus[j];
-                       box = *per_cpu_ptr(pmu->box, cpu);
-                       *per_cpu_ptr(pmu->box, cpu) = NULL;
-                       if (box && atomic_dec_and_test(&box->refcnt))
-                               list_add(&box->list, &boxes_to_free);
-               }
-       }
-}
-
-static int uncore_cpu_starting(int cpu)
-{
-       struct intel_uncore_type *type;
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box, *exist;
-       int i, j, k, phys_id;
-
-       phys_id = topology_physical_package_id(cpu);
-
-       for (i = 0; uncore_msr_uncores[i]; i++) {
-               type = uncore_msr_uncores[i];
-               for (j = 0; j < type->num_boxes; j++) {
-                       pmu = &type->pmus[j];
-                       box = *per_cpu_ptr(pmu->box, cpu);
-                       /* called by uncore_cpu_init? */
-                       if (box && box->phys_id >= 0) {
-                               uncore_box_init(box);
-                               continue;
-                       }
-
-                       for_each_online_cpu(k) {
-                               exist = *per_cpu_ptr(pmu->box, k);
-                               if (exist && exist->phys_id == phys_id) {
-                                       atomic_inc(&exist->refcnt);
-                                       *per_cpu_ptr(pmu->box, cpu) = exist;
-                                       if (box) {
-                                               list_add(&box->list,
-                                                        &boxes_to_free);
-                                               box = NULL;
-                                       }
-                                       break;
-                               }
-                       }
-
-                       if (box) {
-                               box->phys_id = phys_id;
-                               uncore_box_init(box);
-                       }
-               }
-       }
-       return 0;
-}
-
-static int uncore_cpu_prepare(int cpu, int phys_id)
-{
-       struct intel_uncore_type *type;
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box;
-       int i, j;
-
-       for (i = 0; uncore_msr_uncores[i]; i++) {
-               type = uncore_msr_uncores[i];
-               for (j = 0; j < type->num_boxes; j++) {
-                       pmu = &type->pmus[j];
-                       if (pmu->func_id < 0)
-                               pmu->func_id = j;
-
-                       box = uncore_alloc_box(type, cpu_to_node(cpu));
-                       if (!box)
-                               return -ENOMEM;
-
-                       box->pmu = pmu;
-                       box->phys_id = phys_id;
-                       *per_cpu_ptr(pmu->box, cpu) = box;
-               }
-       }
-       return 0;
-}
-
-static void
-uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
-{
-       struct intel_uncore_type *type;
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box;
-       int i, j;
-
-       for (i = 0; uncores[i]; i++) {
-               type = uncores[i];
-               for (j = 0; j < type->num_boxes; j++) {
-                       pmu = &type->pmus[j];
-                       if (old_cpu < 0)
-                               box = uncore_pmu_to_box(pmu, new_cpu);
-                       else
-                               box = uncore_pmu_to_box(pmu, old_cpu);
-                       if (!box)
-                               continue;
-
-                       if (old_cpu < 0) {
-                               WARN_ON_ONCE(box->cpu != -1);
-                               box->cpu = new_cpu;
-                               continue;
-                       }
-
-                       WARN_ON_ONCE(box->cpu != old_cpu);
-                       if (new_cpu >= 0) {
-                               uncore_pmu_cancel_hrtimer(box);
-                               perf_pmu_migrate_context(&pmu->pmu,
-                                               old_cpu, new_cpu);
-                               box->cpu = new_cpu;
-                       } else {
-                               box->cpu = -1;
-                       }
-               }
-       }
-}
-
-static void uncore_event_exit_cpu(int cpu)
-{
-       int i, phys_id, target;
-
-       /* if exiting cpu is used for collecting uncore events */
-       if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
-               return;
-
-       /* find a new cpu to collect uncore events */
-       phys_id = topology_physical_package_id(cpu);
-       target = -1;
-       for_each_online_cpu(i) {
-               if (i == cpu)
-                       continue;
-               if (phys_id == topology_physical_package_id(i)) {
-                       target = i;
-                       break;
-               }
-       }
-
-       /* migrate uncore events to the new cpu */
-       if (target >= 0)
-               cpumask_set_cpu(target, &uncore_cpu_mask);
-
-       uncore_change_context(uncore_msr_uncores, cpu, target);
-       uncore_change_context(uncore_pci_uncores, cpu, target);
-}
-
-static void uncore_event_init_cpu(int cpu)
-{
-       int i, phys_id;
-
-       phys_id = topology_physical_package_id(cpu);
-       for_each_cpu(i, &uncore_cpu_mask) {
-               if (phys_id == topology_physical_package_id(i))
-                       return;
-       }
-
-       cpumask_set_cpu(cpu, &uncore_cpu_mask);
-
-       uncore_change_context(uncore_msr_uncores, -1, cpu);
-       uncore_change_context(uncore_pci_uncores, -1, cpu);
-}
-
-static int uncore_cpu_notifier(struct notifier_block *self,
-                              unsigned long action, void *hcpu)
-{
-       unsigned int cpu = (long)hcpu;
-
-       /* allocate/free data structure for uncore box */
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               uncore_cpu_prepare(cpu, -1);
-               break;
-       case CPU_STARTING:
-               uncore_cpu_starting(cpu);
-               break;
-       case CPU_UP_CANCELED:
-       case CPU_DYING:
-               uncore_cpu_dying(cpu);
-               break;
-       case CPU_ONLINE:
-       case CPU_DEAD:
-               uncore_kfree_boxes();
-               break;
-       default:
-               break;
-       }
-
-       /* select the cpu that collects uncore events */
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_DOWN_FAILED:
-       case CPU_STARTING:
-               uncore_event_init_cpu(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               uncore_event_exit_cpu(cpu);
-               break;
-       default:
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block uncore_cpu_nb = {
-       .notifier_call  = uncore_cpu_notifier,
-       /*
-        * to migrate uncore events, our notifier should be executed
-        * before perf core's notifier.
-        */
-       .priority       = CPU_PRI_PERF + 1,
-};
-
-static void __init uncore_cpu_setup(void *dummy)
-{
-       uncore_cpu_starting(smp_processor_id());
-}
-
-static int __init uncore_cpu_init(void)
-{
-       int ret;
-
-       switch (boot_cpu_data.x86_model) {
-       case 26: /* Nehalem */
-       case 30:
-       case 37: /* Westmere */
-       case 44:
-               nhm_uncore_cpu_init();
-               break;
-       case 42: /* Sandy Bridge */
-       case 58: /* Ivy Bridge */
-       case 60: /* Haswell */
-       case 69: /* Haswell */
-       case 70: /* Haswell */
-       case 61: /* Broadwell */
-       case 71: /* Broadwell */
-               snb_uncore_cpu_init();
-               break;
-       case 45: /* Sandy Bridge-EP */
-               snbep_uncore_cpu_init();
-               break;
-       case 46: /* Nehalem-EX */
-       case 47: /* Westmere-EX aka. Xeon E7 */
-               nhmex_uncore_cpu_init();
-               break;
-       case 62: /* Ivy Bridge-EP */
-               ivbep_uncore_cpu_init();
-               break;
-       case 63: /* Haswell-EP */
-               hswep_uncore_cpu_init();
-               break;
-       case 79: /* BDX-EP */
-       case 86: /* BDX-DE */
-               bdx_uncore_cpu_init();
-               break;
-       case 87: /* Knights Landing */
-               knl_uncore_cpu_init();
-               break;
-       default:
-               return 0;
-       }
-
-       ret = uncore_types_init(uncore_msr_uncores);
-       if (ret)
-               return ret;
-
-       return 0;
-}
-
-static int __init uncore_pmus_register(void)
-{
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_type *type;
-       int i, j;
-
-       for (i = 0; uncore_msr_uncores[i]; i++) {
-               type = uncore_msr_uncores[i];
-               for (j = 0; j < type->num_boxes; j++) {
-                       pmu = &type->pmus[j];
-                       uncore_pmu_register(pmu);
-               }
-       }
-
-       return 0;
-}
-
-static void __init uncore_cpumask_init(void)
-{
-       int cpu;
-
-       /*
-        * ony invoke once from msr or pci init code
-        */
-       if (!cpumask_empty(&uncore_cpu_mask))
-               return;
-
-       cpu_notifier_register_begin();
-
-       for_each_online_cpu(cpu) {
-               int i, phys_id = topology_physical_package_id(cpu);
-
-               for_each_cpu(i, &uncore_cpu_mask) {
-                       if (phys_id == topology_physical_package_id(i)) {
-                               phys_id = -1;
-                               break;
-                       }
-               }
-               if (phys_id < 0)
-                       continue;
-
-               uncore_cpu_prepare(cpu, phys_id);
-               uncore_event_init_cpu(cpu);
-       }
-       on_each_cpu(uncore_cpu_setup, NULL, 1);
-
-       __register_cpu_notifier(&uncore_cpu_nb);
-
-       cpu_notifier_register_done();
-}
-
-
-static int __init intel_uncore_init(void)
-{
-       int ret;
-
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
-               return -ENODEV;
-
-       if (cpu_has_hypervisor)
-               return -ENODEV;
-
-       ret = uncore_pci_init();
-       if (ret)
-               goto fail;
-       ret = uncore_cpu_init();
-       if (ret) {
-               uncore_pci_exit();
-               goto fail;
-       }
-       uncore_cpumask_init();
-
-       uncore_pmus_register();
-       return 0;
-fail:
-       return ret;
-}
-device_initcall(intel_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
deleted file mode 100644 (file)
index a7086b8..0000000
+++ /dev/null
@@ -1,357 +0,0 @@
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/perf_event.h>
-#include "perf_event.h"
-
-#define UNCORE_PMU_NAME_LEN            32
-#define UNCORE_PMU_HRTIMER_INTERVAL    (60LL * NSEC_PER_SEC)
-#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC)
-
-#define UNCORE_FIXED_EVENT             0xff
-#define UNCORE_PMC_IDX_MAX_GENERIC     8
-#define UNCORE_PMC_IDX_FIXED           UNCORE_PMC_IDX_MAX_GENERIC
-#define UNCORE_PMC_IDX_MAX             (UNCORE_PMC_IDX_FIXED + 1)
-
-#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
-#define UNCORE_PCI_DEV_TYPE(data)      ((data >> 8) & 0xff)
-#define UNCORE_PCI_DEV_IDX(data)       (data & 0xff)
-#define UNCORE_EXTRA_PCI_DEV           0xff
-#define UNCORE_EXTRA_PCI_DEV_MAX       3
-
-/* support up to 8 sockets */
-#define UNCORE_SOCKET_MAX              8
-
-#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
-
-struct intel_uncore_ops;
-struct intel_uncore_pmu;
-struct intel_uncore_box;
-struct uncore_event_desc;
-
-struct intel_uncore_type {
-       const char *name;
-       int num_counters;
-       int num_boxes;
-       int perf_ctr_bits;
-       int fixed_ctr_bits;
-       unsigned perf_ctr;
-       unsigned event_ctl;
-       unsigned event_mask;
-       unsigned fixed_ctr;
-       unsigned fixed_ctl;
-       unsigned box_ctl;
-       unsigned msr_offset;
-       unsigned num_shared_regs:8;
-       unsigned single_fixed:1;
-       unsigned pair_ctr_ctl:1;
-       unsigned *msr_offsets;
-       struct event_constraint unconstrainted;
-       struct event_constraint *constraints;
-       struct intel_uncore_pmu *pmus;
-       struct intel_uncore_ops *ops;
-       struct uncore_event_desc *event_descs;
-       const struct attribute_group *attr_groups[4];
-       struct pmu *pmu; /* for custom pmu ops */
-};
-
-#define pmu_group attr_groups[0]
-#define format_group attr_groups[1]
-#define events_group attr_groups[2]
-
-struct intel_uncore_ops {
-       void (*init_box)(struct intel_uncore_box *);
-       void (*disable_box)(struct intel_uncore_box *);
-       void (*enable_box)(struct intel_uncore_box *);
-       void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
-       void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
-       u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
-       int (*hw_config)(struct intel_uncore_box *, struct perf_event *);
-       struct event_constraint *(*get_constraint)(struct intel_uncore_box *,
-                                                  struct perf_event *);
-       void (*put_constraint)(struct intel_uncore_box *, struct perf_event *);
-};
-
-struct intel_uncore_pmu {
-       struct pmu pmu;
-       char name[UNCORE_PMU_NAME_LEN];
-       int pmu_idx;
-       int func_id;
-       struct intel_uncore_type *type;
-       struct intel_uncore_box ** __percpu box;
-       struct list_head box_list;
-};
-
-struct intel_uncore_extra_reg {
-       raw_spinlock_t lock;
-       u64 config, config1, config2;
-       atomic_t ref;
-};
-
-struct intel_uncore_box {
-       int phys_id;
-       int n_active;   /* number of active events */
-       int n_events;
-       int cpu;        /* cpu to collect events */
-       unsigned long flags;
-       atomic_t refcnt;
-       struct perf_event *events[UNCORE_PMC_IDX_MAX];
-       struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
-       struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
-       unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
-       u64 tags[UNCORE_PMC_IDX_MAX];
-       struct pci_dev *pci_dev;
-       struct intel_uncore_pmu *pmu;
-       u64 hrtimer_duration; /* hrtimer timeout for this box */
-       struct hrtimer hrtimer;
-       struct list_head list;
-       struct list_head active_list;
-       void *io_addr;
-       struct intel_uncore_extra_reg shared_regs[0];
-};
-
-#define UNCORE_BOX_FLAG_INITIATED      0
-
-struct uncore_event_desc {
-       struct kobj_attribute attr;
-       const char *config;
-};
-
-struct pci2phy_map {
-       struct list_head list;
-       int segment;
-       int pbus_to_physid[256];
-};
-
-int uncore_pcibus_to_physid(struct pci_bus *bus);
-struct pci2phy_map *__find_pci2phy_map(int segment);
-
-ssize_t uncore_event_show(struct kobject *kobj,
-                         struct kobj_attribute *attr, char *buf);
-
-#define INTEL_UNCORE_EVENT_DESC(_name, _config)                        \
-{                                                              \
-       .attr   = __ATTR(_name, 0444, uncore_event_show, NULL), \
-       .config = _config,                                      \
-}
-
-#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)                        \
-static ssize_t __uncore_##_var##_show(struct kobject *kobj,            \
-                               struct kobj_attribute *attr,            \
-                               char *page)                             \
-{                                                                      \
-       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);                     \
-       return sprintf(page, _format "\n");                             \
-}                                                                      \
-static struct kobj_attribute format_attr_##_var =                      \
-       __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
-
-static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
-{
-       return box->pmu->type->box_ctl;
-}
-
-static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box)
-{
-       return box->pmu->type->fixed_ctl;
-}
-
-static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
-{
-       return box->pmu->type->fixed_ctr;
-}
-
-static inline
-unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
-{
-       return idx * 4 + box->pmu->type->event_ctl;
-}
-
-static inline
-unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
-{
-       return idx * 8 + box->pmu->type->perf_ctr;
-}
-
-static inline unsigned uncore_msr_box_offset(struct intel_uncore_box *box)
-{
-       struct intel_uncore_pmu *pmu = box->pmu;
-       return pmu->type->msr_offsets ?
-               pmu->type->msr_offsets[pmu->pmu_idx] :
-               pmu->type->msr_offset * pmu->pmu_idx;
-}
-
-static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
-{
-       if (!box->pmu->type->box_ctl)
-               return 0;
-       return box->pmu->type->box_ctl + uncore_msr_box_offset(box);
-}
-
-static inline unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
-{
-       if (!box->pmu->type->fixed_ctl)
-               return 0;
-       return box->pmu->type->fixed_ctl + uncore_msr_box_offset(box);
-}
-
-static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
-{
-       return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
-}
-
-static inline
-unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
-{
-       return box->pmu->type->event_ctl +
-               (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
-               uncore_msr_box_offset(box);
-}
-
-static inline
-unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
-{
-       return box->pmu->type->perf_ctr +
-               (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
-               uncore_msr_box_offset(box);
-}
-
-static inline
-unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
-{
-       if (box->pci_dev)
-               return uncore_pci_fixed_ctl(box);
-       else
-               return uncore_msr_fixed_ctl(box);
-}
-
-static inline
-unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
-{
-       if (box->pci_dev)
-               return uncore_pci_fixed_ctr(box);
-       else
-               return uncore_msr_fixed_ctr(box);
-}
-
-static inline
-unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
-{
-       if (box->pci_dev)
-               return uncore_pci_event_ctl(box, idx);
-       else
-               return uncore_msr_event_ctl(box, idx);
-}
-
-static inline
-unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
-{
-       if (box->pci_dev)
-               return uncore_pci_perf_ctr(box, idx);
-       else
-               return uncore_msr_perf_ctr(box, idx);
-}
-
-static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
-{
-       return box->pmu->type->perf_ctr_bits;
-}
-
-static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
-{
-       return box->pmu->type->fixed_ctr_bits;
-}
-
-static inline int uncore_num_counters(struct intel_uncore_box *box)
-{
-       return box->pmu->type->num_counters;
-}
-
-static inline void uncore_disable_box(struct intel_uncore_box *box)
-{
-       if (box->pmu->type->ops->disable_box)
-               box->pmu->type->ops->disable_box(box);
-}
-
-static inline void uncore_enable_box(struct intel_uncore_box *box)
-{
-       if (box->pmu->type->ops->enable_box)
-               box->pmu->type->ops->enable_box(box);
-}
-
-static inline void uncore_disable_event(struct intel_uncore_box *box,
-                               struct perf_event *event)
-{
-       box->pmu->type->ops->disable_event(box, event);
-}
-
-static inline void uncore_enable_event(struct intel_uncore_box *box,
-                               struct perf_event *event)
-{
-       box->pmu->type->ops->enable_event(box, event);
-}
-
-static inline u64 uncore_read_counter(struct intel_uncore_box *box,
-                               struct perf_event *event)
-{
-       return box->pmu->type->ops->read_counter(box, event);
-}
-
-static inline void uncore_box_init(struct intel_uncore_box *box)
-{
-       if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
-               if (box->pmu->type->ops->init_box)
-                       box->pmu->type->ops->init_box(box);
-       }
-}
-
-static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
-{
-       return (box->phys_id < 0);
-}
-
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event);
-struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event);
-u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
-void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
-void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
-void uncore_pmu_event_read(struct perf_event *event);
-void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
-struct event_constraint *
-uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
-void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
-u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
-
-extern struct intel_uncore_type **uncore_msr_uncores;
-extern struct intel_uncore_type **uncore_pci_uncores;
-extern struct pci_driver *uncore_pci_driver;
-extern raw_spinlock_t pci2phy_map_lock;
-extern struct list_head pci2phy_map_head;
-extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
-extern struct event_constraint uncore_constraint_empty;
-
-/* perf_event_intel_uncore_snb.c */
-int snb_uncore_pci_init(void);
-int ivb_uncore_pci_init(void);
-int hsw_uncore_pci_init(void);
-int bdw_uncore_pci_init(void);
-int skl_uncore_pci_init(void);
-void snb_uncore_cpu_init(void);
-void nhm_uncore_cpu_init(void);
-int snb_pci2phy_map_init(int devid);
-
-/* perf_event_intel_uncore_snbep.c */
-int snbep_uncore_pci_init(void);
-void snbep_uncore_cpu_init(void);
-int ivbep_uncore_pci_init(void);
-void ivbep_uncore_cpu_init(void);
-int hswep_uncore_pci_init(void);
-void hswep_uncore_cpu_init(void);
-int bdx_uncore_pci_init(void);
-void bdx_uncore_cpu_init(void);
-int knl_uncore_pci_init(void);
-void knl_uncore_cpu_init(void);
-
-/* perf_event_intel_uncore_nhmex.c */
-void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
deleted file mode 100644 (file)
index 2749965..0000000
+++ /dev/null
@@ -1,1221 +0,0 @@
-/* Nehalem-EX/Westmere-EX uncore support */
-#include "perf_event_intel_uncore.h"
-
-/* NHM-EX event control */
-#define NHMEX_PMON_CTL_EV_SEL_MASK     0x000000ff
-#define NHMEX_PMON_CTL_UMASK_MASK      0x0000ff00
-#define NHMEX_PMON_CTL_EN_BIT0         (1 << 0)
-#define NHMEX_PMON_CTL_EDGE_DET                (1 << 18)
-#define NHMEX_PMON_CTL_PMI_EN          (1 << 20)
-#define NHMEX_PMON_CTL_EN_BIT22                (1 << 22)
-#define NHMEX_PMON_CTL_INVERT          (1 << 23)
-#define NHMEX_PMON_CTL_TRESH_MASK      0xff000000
-#define NHMEX_PMON_RAW_EVENT_MASK      (NHMEX_PMON_CTL_EV_SEL_MASK | \
-                                        NHMEX_PMON_CTL_UMASK_MASK | \
-                                        NHMEX_PMON_CTL_EDGE_DET | \
-                                        NHMEX_PMON_CTL_INVERT | \
-                                        NHMEX_PMON_CTL_TRESH_MASK)
-
-/* NHM-EX Ubox */
-#define NHMEX_U_MSR_PMON_GLOBAL_CTL            0xc00
-#define NHMEX_U_MSR_PMON_CTR                   0xc11
-#define NHMEX_U_MSR_PMON_EV_SEL                        0xc10
-
-#define NHMEX_U_PMON_GLOBAL_EN                 (1 << 0)
-#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL       0x0000001e
-#define NHMEX_U_PMON_GLOBAL_EN_ALL             (1 << 28)
-#define NHMEX_U_PMON_GLOBAL_RST_ALL            (1 << 29)
-#define NHMEX_U_PMON_GLOBAL_FRZ_ALL            (1 << 31)
-
-#define NHMEX_U_PMON_RAW_EVENT_MASK            \
-               (NHMEX_PMON_CTL_EV_SEL_MASK |   \
-                NHMEX_PMON_CTL_EDGE_DET)
-
-/* NHM-EX Cbox */
-#define NHMEX_C0_MSR_PMON_GLOBAL_CTL           0xd00
-#define NHMEX_C0_MSR_PMON_CTR0                 0xd11
-#define NHMEX_C0_MSR_PMON_EV_SEL0              0xd10
-#define NHMEX_C_MSR_OFFSET                     0x20
-
-/* NHM-EX Bbox */
-#define NHMEX_B0_MSR_PMON_GLOBAL_CTL           0xc20
-#define NHMEX_B0_MSR_PMON_CTR0                 0xc31
-#define NHMEX_B0_MSR_PMON_CTL0                 0xc30
-#define NHMEX_B_MSR_OFFSET                     0x40
-#define NHMEX_B0_MSR_MATCH                     0xe45
-#define NHMEX_B0_MSR_MASK                      0xe46
-#define NHMEX_B1_MSR_MATCH                     0xe4d
-#define NHMEX_B1_MSR_MASK                      0xe4e
-
-#define NHMEX_B_PMON_CTL_EN                    (1 << 0)
-#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT          1
-#define NHMEX_B_PMON_CTL_EV_SEL_MASK           \
-               (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT)
-#define NHMEX_B_PMON_CTR_SHIFT         6
-#define NHMEX_B_PMON_CTR_MASK          \
-               (0x3 << NHMEX_B_PMON_CTR_SHIFT)
-#define NHMEX_B_PMON_RAW_EVENT_MASK            \
-               (NHMEX_B_PMON_CTL_EV_SEL_MASK | \
-                NHMEX_B_PMON_CTR_MASK)
-
-/* NHM-EX Sbox */
-#define NHMEX_S0_MSR_PMON_GLOBAL_CTL           0xc40
-#define NHMEX_S0_MSR_PMON_CTR0                 0xc51
-#define NHMEX_S0_MSR_PMON_CTL0                 0xc50
-#define NHMEX_S_MSR_OFFSET                     0x80
-#define NHMEX_S0_MSR_MM_CFG                    0xe48
-#define NHMEX_S0_MSR_MATCH                     0xe49
-#define NHMEX_S0_MSR_MASK                      0xe4a
-#define NHMEX_S1_MSR_MM_CFG                    0xe58
-#define NHMEX_S1_MSR_MATCH                     0xe59
-#define NHMEX_S1_MSR_MASK                      0xe5a
-
-#define NHMEX_S_PMON_MM_CFG_EN                 (0x1ULL << 63)
-#define NHMEX_S_EVENT_TO_R_PROG_EV             0
-
-/* NHM-EX Mbox */
-#define NHMEX_M0_MSR_GLOBAL_CTL                        0xca0
-#define NHMEX_M0_MSR_PMU_DSP                   0xca5
-#define NHMEX_M0_MSR_PMU_ISS                   0xca6
-#define NHMEX_M0_MSR_PMU_MAP                   0xca7
-#define NHMEX_M0_MSR_PMU_MSC_THR               0xca8
-#define NHMEX_M0_MSR_PMU_PGT                   0xca9
-#define NHMEX_M0_MSR_PMU_PLD                   0xcaa
-#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC           0xcab
-#define NHMEX_M0_MSR_PMU_CTL0                  0xcb0
-#define NHMEX_M0_MSR_PMU_CNT0                  0xcb1
-#define NHMEX_M_MSR_OFFSET                     0x40
-#define NHMEX_M0_MSR_PMU_MM_CFG                        0xe54
-#define NHMEX_M1_MSR_PMU_MM_CFG                        0xe5c
-
-#define NHMEX_M_PMON_MM_CFG_EN                 (1ULL << 63)
-#define NHMEX_M_PMON_ADDR_MATCH_MASK           0x3ffffffffULL
-#define NHMEX_M_PMON_ADDR_MASK_MASK            0x7ffffffULL
-#define NHMEX_M_PMON_ADDR_MASK_SHIFT           34
-
-#define NHMEX_M_PMON_CTL_EN                    (1 << 0)
-#define NHMEX_M_PMON_CTL_PMI_EN                        (1 << 1)
-#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT      2
-#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK       \
-       (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT)
-#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT    4
-#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK     \
-       (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT)
-#define NHMEX_M_PMON_CTL_WRAP_MODE             (1 << 6)
-#define NHMEX_M_PMON_CTL_FLAG_MODE             (1 << 7)
-#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT         9
-#define NHMEX_M_PMON_CTL_INC_SEL_MASK          \
-       (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
-#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT    19
-#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK     \
-       (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT)
-#define NHMEX_M_PMON_RAW_EVENT_MASK                    \
-               (NHMEX_M_PMON_CTL_COUNT_MODE_MASK |     \
-                NHMEX_M_PMON_CTL_STORAGE_MODE_MASK |   \
-                NHMEX_M_PMON_CTL_WRAP_MODE |           \
-                NHMEX_M_PMON_CTL_FLAG_MODE |           \
-                NHMEX_M_PMON_CTL_INC_SEL_MASK |        \
-                NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
-
-#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK          (((1 << 11) - 1) | (1 << 23))
-#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
-
-#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK          (((1 << 12) - 1) | (1 << 24))
-#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
-
-/*
- * use the 9~13 bits to select event If the 7th bit is not set,
- * otherwise use the 19~21 bits to select event.
- */
-#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
-#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \
-                               NHMEX_M_PMON_CTL_FLAG_MODE)
-#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \
-                          NHMEX_M_PMON_CTL_FLAG_MODE)
-#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \
-                               NHMEX_M_PMON_CTL_FLAG_MODE)
-#define MBOX_INC_SEL_EXTAR_REG(c, r) \
-               EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \
-                               MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r)
-#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \
-               EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \
-                               MBOX_SET_FLAG_SEL_MASK, \
-                               (u64)-1, NHMEX_M_##r)
-
-/* NHM-EX Rbox */
-#define NHMEX_R_MSR_GLOBAL_CTL                 0xe00
-#define NHMEX_R_MSR_PMON_CTL0                  0xe10
-#define NHMEX_R_MSR_PMON_CNT0                  0xe11
-#define NHMEX_R_MSR_OFFSET                     0x20
-
-#define NHMEX_R_MSR_PORTN_QLX_CFG(n)           \
-               ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4))
-#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n)                (0xe04 + (n))
-#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n)                (0xe24 + (n))
-#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n)                \
-               (((n) < 4 ? 0 : 0x10) + (n) * 4)
-#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n)   \
-               (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
-#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n)    \
-               (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1)
-#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n)     \
-               (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2)
-#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n)   \
-               (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
-#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n)    \
-               (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1)
-#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n)     \
-               (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2)
-
-#define NHMEX_R_PMON_CTL_EN                    (1 << 0)
-#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT          1
-#define NHMEX_R_PMON_CTL_EV_SEL_MASK           \
-               (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT)
-#define NHMEX_R_PMON_CTL_PMI_EN                        (1 << 6)
-#define NHMEX_R_PMON_RAW_EVENT_MASK            NHMEX_R_PMON_CTL_EV_SEL_MASK
-
-/* NHM-EX Wbox */
-#define NHMEX_W_MSR_GLOBAL_CTL                 0xc80
-#define NHMEX_W_MSR_PMON_CNT0                  0xc90
-#define NHMEX_W_MSR_PMON_EVT_SEL0              0xc91
-#define NHMEX_W_MSR_PMON_FIXED_CTR             0x394
-#define NHMEX_W_MSR_PMON_FIXED_CTL             0x395
-
-#define NHMEX_W_PMON_GLOBAL_FIXED_EN           (1ULL << 31)
-
-#define __BITS_VALUE(x, i, n)  ((typeof(x))(((x) >> ((i) * (n))) & \
-                               ((1ULL << (n)) - 1)))
-
-DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5");
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
-DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
-DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
-DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7");
-DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63");
-DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
-
-static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
-{
-       wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
-}
-
-static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
-{
-       unsigned msr = uncore_msr_box_ctl(box);
-       u64 config;
-
-       if (msr) {
-               rdmsrl(msr, config);
-               config &= ~((1ULL << uncore_num_counters(box)) - 1);
-               /* WBox has a fixed counter */
-               if (uncore_msr_fixed_ctl(box))
-                       config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN;
-               wrmsrl(msr, config);
-       }
-}
-
-static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box)
-{
-       unsigned msr = uncore_msr_box_ctl(box);
-       u64 config;
-
-       if (msr) {
-               rdmsrl(msr, config);
-               config |= (1ULL << uncore_num_counters(box)) - 1;
-               /* WBox has a fixed counter */
-               if (uncore_msr_fixed_ctl(box))
-                       config |= NHMEX_W_PMON_GLOBAL_FIXED_EN;
-               wrmsrl(msr, config);
-       }
-}
-
-static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       wrmsrl(event->hw.config_base, 0);
-}
-
-static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
-               wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
-       else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
-               wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
-       else
-               wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
-}
-
-#define NHMEX_UNCORE_OPS_COMMON_INIT()                         \
-       .init_box       = nhmex_uncore_msr_init_box,            \
-       .disable_box    = nhmex_uncore_msr_disable_box,         \
-       .enable_box     = nhmex_uncore_msr_enable_box,          \
-       .disable_event  = nhmex_uncore_msr_disable_event,       \
-       .read_counter   = uncore_msr_read_counter
-
-static struct intel_uncore_ops nhmex_uncore_ops = {
-       NHMEX_UNCORE_OPS_COMMON_INIT(),
-       .enable_event   = nhmex_uncore_msr_enable_event,
-};
-
-static struct attribute *nhmex_uncore_ubox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_edge.attr,
-       NULL,
-};
-
-static struct attribute_group nhmex_uncore_ubox_format_group = {
-       .name           = "format",
-       .attrs          = nhmex_uncore_ubox_formats_attr,
-};
-
-static struct intel_uncore_type nhmex_uncore_ubox = {
-       .name           = "ubox",
-       .num_counters   = 1,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 48,
-       .event_ctl      = NHMEX_U_MSR_PMON_EV_SEL,
-       .perf_ctr       = NHMEX_U_MSR_PMON_CTR,
-       .event_mask     = NHMEX_U_PMON_RAW_EVENT_MASK,
-       .box_ctl        = NHMEX_U_MSR_PMON_GLOBAL_CTL,
-       .ops            = &nhmex_uncore_ops,
-       .format_group   = &nhmex_uncore_ubox_format_group
-};
-
-static struct attribute *nhmex_uncore_cbox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       NULL,
-};
-
-static struct attribute_group nhmex_uncore_cbox_format_group = {
-       .name = "format",
-       .attrs = nhmex_uncore_cbox_formats_attr,
-};
-
-/* msr offset for each instance of cbox */
-static unsigned nhmex_cbox_msr_offsets[] = {
-       0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0,
-};
-
-static struct intel_uncore_type nhmex_uncore_cbox = {
-       .name                   = "cbox",
-       .num_counters           = 6,
-       .num_boxes              = 10,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = NHMEX_C0_MSR_PMON_EV_SEL0,
-       .perf_ctr               = NHMEX_C0_MSR_PMON_CTR0,
-       .event_mask             = NHMEX_PMON_RAW_EVENT_MASK,
-       .box_ctl                = NHMEX_C0_MSR_PMON_GLOBAL_CTL,
-       .msr_offsets            = nhmex_cbox_msr_offsets,
-       .pair_ctr_ctl           = 1,
-       .ops                    = &nhmex_uncore_ops,
-       .format_group           = &nhmex_uncore_cbox_format_group
-};
-
-static struct uncore_event_desc nhmex_uncore_wbox_events[] = {
-       INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"),
-       { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_type nhmex_uncore_wbox = {
-       .name                   = "wbox",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = NHMEX_W_MSR_PMON_CNT0,
-       .perf_ctr               = NHMEX_W_MSR_PMON_EVT_SEL0,
-       .fixed_ctr              = NHMEX_W_MSR_PMON_FIXED_CTR,
-       .fixed_ctl              = NHMEX_W_MSR_PMON_FIXED_CTL,
-       .event_mask             = NHMEX_PMON_RAW_EVENT_MASK,
-       .box_ctl                = NHMEX_W_MSR_GLOBAL_CTL,
-       .pair_ctr_ctl           = 1,
-       .event_descs            = nhmex_uncore_wbox_events,
-       .ops                    = &nhmex_uncore_ops,
-       .format_group           = &nhmex_uncore_cbox_format_group
-};
-
-static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-       int ctr, ev_sel;
-
-       ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >>
-               NHMEX_B_PMON_CTR_SHIFT;
-       ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >>
-                 NHMEX_B_PMON_CTL_EV_SEL_SHIFT;
-
-       /* events that do not use the match/mask registers */
-       if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) ||
-           (ctr == 2 && ev_sel != 0x4) || ctr == 3)
-               return 0;
-
-       if (box->pmu->pmu_idx == 0)
-               reg1->reg = NHMEX_B0_MSR_MATCH;
-       else
-               reg1->reg = NHMEX_B1_MSR_MATCH;
-       reg1->idx = 0;
-       reg1->config = event->attr.config1;
-       reg2->config = event->attr.config2;
-       return 0;
-}
-
-static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
-       if (reg1->idx != EXTRA_REG_NONE) {
-               wrmsrl(reg1->reg, reg1->config);
-               wrmsrl(reg1->reg + 1, reg2->config);
-       }
-       wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
-               (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK));
-}
-
-/*
- * The Bbox has 4 counters, but each counter monitors different events.
- * Use bits 6-7 in the event config to select counter.
- */
-static struct event_constraint nhmex_uncore_bbox_constraints[] = {
-       EVENT_CONSTRAINT(0 , 1, 0xc0),
-       EVENT_CONSTRAINT(0x40, 2, 0xc0),
-       EVENT_CONSTRAINT(0x80, 4, 0xc0),
-       EVENT_CONSTRAINT(0xc0, 8, 0xc0),
-       EVENT_CONSTRAINT_END,
-};
-
-static struct attribute *nhmex_uncore_bbox_formats_attr[] = {
-       &format_attr_event5.attr,
-       &format_attr_counter.attr,
-       &format_attr_match.attr,
-       &format_attr_mask.attr,
-       NULL,
-};
-
-static struct attribute_group nhmex_uncore_bbox_format_group = {
-       .name = "format",
-       .attrs = nhmex_uncore_bbox_formats_attr,
-};
-
-static struct intel_uncore_ops nhmex_uncore_bbox_ops = {
-       NHMEX_UNCORE_OPS_COMMON_INIT(),
-       .enable_event           = nhmex_bbox_msr_enable_event,
-       .hw_config              = nhmex_bbox_hw_config,
-       .get_constraint         = uncore_get_constraint,
-       .put_constraint         = uncore_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_bbox = {
-       .name                   = "bbox",
-       .num_counters           = 4,
-       .num_boxes              = 2,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = NHMEX_B0_MSR_PMON_CTL0,
-       .perf_ctr               = NHMEX_B0_MSR_PMON_CTR0,
-       .event_mask             = NHMEX_B_PMON_RAW_EVENT_MASK,
-       .box_ctl                = NHMEX_B0_MSR_PMON_GLOBAL_CTL,
-       .msr_offset             = NHMEX_B_MSR_OFFSET,
-       .pair_ctr_ctl           = 1,
-       .num_shared_regs        = 1,
-       .constraints            = nhmex_uncore_bbox_constraints,
-       .ops                    = &nhmex_uncore_bbox_ops,
-       .format_group           = &nhmex_uncore_bbox_format_group
-};
-
-static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
-       /* only TO_R_PROG_EV event uses the match/mask register */
-       if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) !=
-           NHMEX_S_EVENT_TO_R_PROG_EV)
-               return 0;
-
-       if (box->pmu->pmu_idx == 0)
-               reg1->reg = NHMEX_S0_MSR_MM_CFG;
-       else
-               reg1->reg = NHMEX_S1_MSR_MM_CFG;
-       reg1->idx = 0;
-       reg1->config = event->attr.config1;
-       reg2->config = event->attr.config2;
-       return 0;
-}
-
-static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
-       if (reg1->idx != EXTRA_REG_NONE) {
-               wrmsrl(reg1->reg, 0);
-               wrmsrl(reg1->reg + 1, reg1->config);
-               wrmsrl(reg1->reg + 2, reg2->config);
-               wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
-       }
-       wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
-}
-
-static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_match.attr,
-       &format_attr_mask.attr,
-       NULL,
-};
-
-static struct attribute_group nhmex_uncore_sbox_format_group = {
-       .name                   = "format",
-       .attrs                  = nhmex_uncore_sbox_formats_attr,
-};
-
-static struct intel_uncore_ops nhmex_uncore_sbox_ops = {
-       NHMEX_UNCORE_OPS_COMMON_INIT(),
-       .enable_event           = nhmex_sbox_msr_enable_event,
-       .hw_config              = nhmex_sbox_hw_config,
-       .get_constraint         = uncore_get_constraint,
-       .put_constraint         = uncore_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_sbox = {
-       .name                   = "sbox",
-       .num_counters           = 4,
-       .num_boxes              = 2,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = NHMEX_S0_MSR_PMON_CTL0,
-       .perf_ctr               = NHMEX_S0_MSR_PMON_CTR0,
-       .event_mask             = NHMEX_PMON_RAW_EVENT_MASK,
-       .box_ctl                = NHMEX_S0_MSR_PMON_GLOBAL_CTL,
-       .msr_offset             = NHMEX_S_MSR_OFFSET,
-       .pair_ctr_ctl           = 1,
-       .num_shared_regs        = 1,
-       .ops                    = &nhmex_uncore_sbox_ops,
-       .format_group           = &nhmex_uncore_sbox_format_group
-};
-
-enum {
-       EXTRA_REG_NHMEX_M_FILTER,
-       EXTRA_REG_NHMEX_M_DSP,
-       EXTRA_REG_NHMEX_M_ISS,
-       EXTRA_REG_NHMEX_M_MAP,
-       EXTRA_REG_NHMEX_M_MSC_THR,
-       EXTRA_REG_NHMEX_M_PGT,
-       EXTRA_REG_NHMEX_M_PLD,
-       EXTRA_REG_NHMEX_M_ZDP_CTL_FVC,
-};
-
-static struct extra_reg nhmex_uncore_mbox_extra_regs[] = {
-       MBOX_INC_SEL_EXTAR_REG(0x0, DSP),
-       MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR),
-       MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR),
-       MBOX_INC_SEL_EXTAR_REG(0x9, ISS),
-       /* event 0xa uses two extra registers */
-       MBOX_INC_SEL_EXTAR_REG(0xa, ISS),
-       MBOX_INC_SEL_EXTAR_REG(0xa, PLD),
-       MBOX_INC_SEL_EXTAR_REG(0xb, PLD),
-       /* events 0xd ~ 0x10 use the same extra register */
-       MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC),
-       MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC),
-       MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC),
-       MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC),
-       MBOX_INC_SEL_EXTAR_REG(0x16, PGT),
-       MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP),
-       MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS),
-       MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT),
-       MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP),
-       EVENT_EXTRA_END
-};
-
-/* Nehalem-EX or Westmere-EX ? */
-static bool uncore_nhmex;
-
-static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config)
-{
-       struct intel_uncore_extra_reg *er;
-       unsigned long flags;
-       bool ret = false;
-       u64 mask;
-
-       if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
-               er = &box->shared_regs[idx];
-               raw_spin_lock_irqsave(&er->lock, flags);
-               if (!atomic_read(&er->ref) || er->config == config) {
-                       atomic_inc(&er->ref);
-                       er->config = config;
-                       ret = true;
-               }
-               raw_spin_unlock_irqrestore(&er->lock, flags);
-
-               return ret;
-       }
-       /*
-        * The ZDP_CTL_FVC MSR has 4 fields which are used to control
-        * events 0xd ~ 0x10. Besides these 4 fields, there are additional
-        * fields which are shared.
-        */
-       idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
-       if (WARN_ON_ONCE(idx >= 4))
-               return false;
-
-       /* mask of the shared fields */
-       if (uncore_nhmex)
-               mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
-       else
-               mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK;
-       er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
-
-       raw_spin_lock_irqsave(&er->lock, flags);
-       /* add mask of the non-shared field if it's in use */
-       if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) {
-               if (uncore_nhmex)
-                       mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
-               else
-                       mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
-       }
-
-       if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) {
-               atomic_add(1 << (idx * 8), &er->ref);
-               if (uncore_nhmex)
-                       mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
-                               NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
-               else
-                       mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK |
-                               WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
-               er->config &= ~mask;
-               er->config |= (config & mask);
-               ret = true;
-       }
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-
-       return ret;
-}
-
-static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx)
-{
-       struct intel_uncore_extra_reg *er;
-
-       if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
-               er = &box->shared_regs[idx];
-               atomic_dec(&er->ref);
-               return;
-       }
-
-       idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
-       er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
-       atomic_sub(1 << (idx * 8), &er->ref);
-}
-
-static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
-       u64 config = reg1->config;
-
-       /* get the non-shared control bits and shift them */
-       idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
-       if (uncore_nhmex)
-               config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
-       else
-               config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
-       if (new_idx > orig_idx) {
-               idx = new_idx - orig_idx;
-               config <<= 3 * idx;
-       } else {
-               idx = orig_idx - new_idx;
-               config >>= 3 * idx;
-       }
-
-       /* add the shared control bits back */
-       if (uncore_nhmex)
-               config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
-       else
-               config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
-       config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
-       if (modify) {
-               /* adjust the main event selector */
-               if (new_idx > orig_idx)
-                       hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
-               else
-                       hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
-               reg1->config = config;
-               reg1->idx = ~0xff | new_idx;
-       }
-       return config;
-}
-
-static struct event_constraint *
-nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
-       int i, idx[2], alloc = 0;
-       u64 config1 = reg1->config;
-
-       idx[0] = __BITS_VALUE(reg1->idx, 0, 8);
-       idx[1] = __BITS_VALUE(reg1->idx, 1, 8);
-again:
-       for (i = 0; i < 2; i++) {
-               if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
-                       idx[i] = 0xff;
-
-               if (idx[i] == 0xff)
-                       continue;
-
-               if (!nhmex_mbox_get_shared_reg(box, idx[i],
-                               __BITS_VALUE(config1, i, 32)))
-                       goto fail;
-               alloc |= (0x1 << i);
-       }
-
-       /* for the match/mask registers */
-       if (reg2->idx != EXTRA_REG_NONE &&
-           (uncore_box_is_fake(box) || !reg2->alloc) &&
-           !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config))
-               goto fail;
-
-       /*
-        * If it's a fake box -- as per validate_{group,event}() we
-        * shouldn't touch event state and we can avoid doing so
-        * since both will only call get_event_constraints() once
-        * on each event, this avoids the need for reg->alloc.
-        */
-       if (!uncore_box_is_fake(box)) {
-               if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8))
-                       nhmex_mbox_alter_er(event, idx[0], true);
-               reg1->alloc |= alloc;
-               if (reg2->idx != EXTRA_REG_NONE)
-                       reg2->alloc = 1;
-       }
-       return NULL;
-fail:
-       if (idx[0] != 0xff && !(alloc & 0x1) &&
-           idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
-               /*
-                * events 0xd ~ 0x10 are functional identical, but are
-                * controlled by different fields in the ZDP_CTL_FVC
-                * register. If we failed to take one field, try the
-                * rest 3 choices.
-                */
-               BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff);
-               idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
-               idx[0] = (idx[0] + 1) % 4;
-               idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
-               if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) {
-                       config1 = nhmex_mbox_alter_er(event, idx[0], false);
-                       goto again;
-               }
-       }
-
-       if (alloc & 0x1)
-               nhmex_mbox_put_shared_reg(box, idx[0]);
-       if (alloc & 0x2)
-               nhmex_mbox_put_shared_reg(box, idx[1]);
-       return &uncore_constraint_empty;
-}
-
-static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
-
-       if (uncore_box_is_fake(box))
-               return;
-
-       if (reg1->alloc & 0x1)
-               nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8));
-       if (reg1->alloc & 0x2)
-               nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8));
-       reg1->alloc = 0;
-
-       if (reg2->alloc) {
-               nhmex_mbox_put_shared_reg(box, reg2->idx);
-               reg2->alloc = 0;
-       }
-}
-
-static int nhmex_mbox_extra_reg_idx(struct extra_reg *er)
-{
-       if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
-               return er->idx;
-       return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd;
-}
-
-static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct intel_uncore_type *type = box->pmu->type;
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
-       struct extra_reg *er;
-       unsigned msr;
-       int reg_idx = 0;
-       /*
-        * The mbox events may require 2 extra MSRs at the most. But only
-        * the lower 32 bits in these MSRs are significant, so we can use
-        * config1 to pass two MSRs' config.
-        */
-       for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) {
-               if (er->event != (event->hw.config & er->config_mask))
-                       continue;
-               if (event->attr.config1 & ~er->valid_mask)
-                       return -EINVAL;
-
-               msr = er->msr + type->msr_offset * box->pmu->pmu_idx;
-               if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff))
-                       return -EINVAL;
-
-               /* always use the 32~63 bits to pass the PLD config */
-               if (er->idx == EXTRA_REG_NHMEX_M_PLD)
-                       reg_idx = 1;
-               else if (WARN_ON_ONCE(reg_idx > 0))
-                       return -EINVAL;
-
-               reg1->idx &= ~(0xff << (reg_idx * 8));
-               reg1->reg &= ~(0xffff << (reg_idx * 16));
-               reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8);
-               reg1->reg |= msr << (reg_idx * 16);
-               reg1->config = event->attr.config1;
-               reg_idx++;
-       }
-       /*
-        * The mbox only provides ability to perform address matching
-        * for the PLD events.
-        */
-       if (reg_idx == 2) {
-               reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
-               if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
-                       reg2->config = event->attr.config2;
-               else
-                       reg2->config = ~0ULL;
-               if (box->pmu->pmu_idx == 0)
-                       reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
-               else
-                       reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
-       }
-       return 0;
-}
-
-static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx)
-{
-       struct intel_uncore_extra_reg *er;
-       unsigned long flags;
-       u64 config;
-
-       if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
-               return box->shared_regs[idx].config;
-
-       er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
-       raw_spin_lock_irqsave(&er->lock, flags);
-       config = er->config;
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-       return config;
-}
-
-static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-       int idx;
-
-       idx = __BITS_VALUE(reg1->idx, 0, 8);
-       if (idx != 0xff)
-               wrmsrl(__BITS_VALUE(reg1->reg, 0, 16),
-                       nhmex_mbox_shared_reg_config(box, idx));
-       idx = __BITS_VALUE(reg1->idx, 1, 8);
-       if (idx != 0xff)
-               wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
-                       nhmex_mbox_shared_reg_config(box, idx));
-
-       if (reg2->idx != EXTRA_REG_NONE) {
-               wrmsrl(reg2->reg, 0);
-               if (reg2->config != ~0ULL) {
-                       wrmsrl(reg2->reg + 1,
-                               reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
-                       wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
-                               (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
-                       wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
-               }
-       }
-
-       wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
-}
-
-DEFINE_UNCORE_FORMAT_ATTR(count_mode,          count_mode,     "config:2-3");
-DEFINE_UNCORE_FORMAT_ATTR(storage_mode,                storage_mode,   "config:4-5");
-DEFINE_UNCORE_FORMAT_ATTR(wrap_mode,           wrap_mode,      "config:6");
-DEFINE_UNCORE_FORMAT_ATTR(flag_mode,           flag_mode,      "config:7");
-DEFINE_UNCORE_FORMAT_ATTR(inc_sel,             inc_sel,        "config:9-13");
-DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel,                set_flag_sel,   "config:19-21");
-DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en,       filter_cfg_en,  "config2:63");
-DEFINE_UNCORE_FORMAT_ATTR(filter_match,                filter_match,   "config2:0-33");
-DEFINE_UNCORE_FORMAT_ATTR(filter_mask,         filter_mask,    "config2:34-61");
-DEFINE_UNCORE_FORMAT_ATTR(dsp,                 dsp,            "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(thr,                 thr,            "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(fvc,                 fvc,            "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(pgt,                 pgt,            "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(map,                 map,            "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(iss,                 iss,            "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(pld,                 pld,            "config1:32-63");
-
-static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
-       &format_attr_count_mode.attr,
-       &format_attr_storage_mode.attr,
-       &format_attr_wrap_mode.attr,
-       &format_attr_flag_mode.attr,
-       &format_attr_inc_sel.attr,
-       &format_attr_set_flag_sel.attr,
-       &format_attr_filter_cfg_en.attr,
-       &format_attr_filter_match.attr,
-       &format_attr_filter_mask.attr,
-       &format_attr_dsp.attr,
-       &format_attr_thr.attr,
-       &format_attr_fvc.attr,
-       &format_attr_pgt.attr,
-       &format_attr_map.attr,
-       &format_attr_iss.attr,
-       &format_attr_pld.attr,
-       NULL,
-};
-
-static struct attribute_group nhmex_uncore_mbox_format_group = {
-       .name           = "format",
-       .attrs          = nhmex_uncore_mbox_formats_attr,
-};
-
-static struct uncore_event_desc nhmex_uncore_mbox_events[] = {
-       INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"),
-       INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"),
-       { /* end: all zeroes */ },
-};
-
-static struct uncore_event_desc wsmex_uncore_mbox_events[] = {
-       INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"),
-       INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"),
-       { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_ops nhmex_uncore_mbox_ops = {
-       NHMEX_UNCORE_OPS_COMMON_INIT(),
-       .enable_event   = nhmex_mbox_msr_enable_event,
-       .hw_config      = nhmex_mbox_hw_config,
-       .get_constraint = nhmex_mbox_get_constraint,
-       .put_constraint = nhmex_mbox_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_mbox = {
-       .name                   = "mbox",
-       .num_counters           = 6,
-       .num_boxes              = 2,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = NHMEX_M0_MSR_PMU_CTL0,
-       .perf_ctr               = NHMEX_M0_MSR_PMU_CNT0,
-       .event_mask             = NHMEX_M_PMON_RAW_EVENT_MASK,
-       .box_ctl                = NHMEX_M0_MSR_GLOBAL_CTL,
-       .msr_offset             = NHMEX_M_MSR_OFFSET,
-       .pair_ctr_ctl           = 1,
-       .num_shared_regs        = 8,
-       .event_descs            = nhmex_uncore_mbox_events,
-       .ops                    = &nhmex_uncore_mbox_ops,
-       .format_group           = &nhmex_uncore_mbox_format_group,
-};
-
-static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
-       /* adjust the main event selector and extra register index */
-       if (reg1->idx % 2) {
-               reg1->idx--;
-               hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
-       } else {
-               reg1->idx++;
-               hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
-       }
-
-       /* adjust extra register config */
-       switch (reg1->idx % 6) {
-       case 2:
-               /* shift the 8~15 bits to the 0~7 bits */
-               reg1->config >>= 8;
-               break;
-       case 3:
-               /* shift the 0~7 bits to the 8~15 bits */
-               reg1->config <<= 8;
-               break;
-       }
-}
-
-/*
- * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7.
- * An event set consists of 6 events, the 3rd and 4th events in
- * an event set use the same extra register. So an event set uses
- * 5 extra registers.
- */
-static struct event_constraint *
-nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-       struct intel_uncore_extra_reg *er;
-       unsigned long flags;
-       int idx, er_idx;
-       u64 config1;
-       bool ok = false;
-
-       if (!uncore_box_is_fake(box) && reg1->alloc)
-               return NULL;
-
-       idx = reg1->idx % 6;
-       config1 = reg1->config;
-again:
-       er_idx = idx;
-       /* the 3rd and 4th events use the same extra register */
-       if (er_idx > 2)
-               er_idx--;
-       er_idx += (reg1->idx / 6) * 5;
-
-       er = &box->shared_regs[er_idx];
-       raw_spin_lock_irqsave(&er->lock, flags);
-       if (idx < 2) {
-               if (!atomic_read(&er->ref) || er->config == reg1->config) {
-                       atomic_inc(&er->ref);
-                       er->config = reg1->config;
-                       ok = true;
-               }
-       } else if (idx == 2 || idx == 3) {
-               /*
-                * these two events use different fields in a extra register,
-                * the 0~7 bits and the 8~15 bits respectively.
-                */
-               u64 mask = 0xff << ((idx - 2) * 8);
-               if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) ||
-                               !((er->config ^ config1) & mask)) {
-                       atomic_add(1 << ((idx - 2) * 8), &er->ref);
-                       er->config &= ~mask;
-                       er->config |= config1 & mask;
-                       ok = true;
-               }
-       } else {
-               if (!atomic_read(&er->ref) ||
-                               (er->config == (hwc->config >> 32) &&
-                                er->config1 == reg1->config &&
-                                er->config2 == reg2->config)) {
-                       atomic_inc(&er->ref);
-                       er->config = (hwc->config >> 32);
-                       er->config1 = reg1->config;
-                       er->config2 = reg2->config;
-                       ok = true;
-               }
-       }
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-
-       if (!ok) {
-               /*
-                * The Rbox events are always in pairs. The paired
-                * events are functional identical, but use different
-                * extra registers. If we failed to take an extra
-                * register, try the alternative.
-                */
-               idx ^= 1;
-               if (idx != reg1->idx % 6) {
-                       if (idx == 2)
-                               config1 >>= 8;
-                       else if (idx == 3)
-                               config1 <<= 8;
-                       goto again;
-               }
-       } else {
-               if (!uncore_box_is_fake(box)) {
-                       if (idx != reg1->idx % 6)
-                               nhmex_rbox_alter_er(box, event);
-                       reg1->alloc = 1;
-               }
-               return NULL;
-       }
-       return &uncore_constraint_empty;
-}
-
-static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct intel_uncore_extra_reg *er;
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       int idx, er_idx;
-
-       if (uncore_box_is_fake(box) || !reg1->alloc)
-               return;
-
-       idx = reg1->idx % 6;
-       er_idx = idx;
-       if (er_idx > 2)
-               er_idx--;
-       er_idx += (reg1->idx / 6) * 5;
-
-       er = &box->shared_regs[er_idx];
-       if (idx == 2 || idx == 3)
-               atomic_sub(1 << ((idx - 2) * 8), &er->ref);
-       else
-               atomic_dec(&er->ref);
-
-       reg1->alloc = 0;
-}
-
-static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
-       int idx;
-
-       idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >>
-               NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
-       if (idx >= 0x18)
-               return -EINVAL;
-
-       reg1->idx = idx;
-       reg1->config = event->attr.config1;
-
-       switch (idx % 6) {
-       case 4:
-       case 5:
-               hwc->config |= event->attr.config & (~0ULL << 32);
-               reg2->config = event->attr.config2;
-               break;
-       }
-       return 0;
-}
-
-static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-       int idx, port;
-
-       idx = reg1->idx;
-       port = idx / 6 + box->pmu->pmu_idx * 4;
-
-       switch (idx % 6) {
-       case 0:
-               wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config);
-               break;
-       case 1:
-               wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config);
-               break;
-       case 2:
-       case 3:
-               wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port),
-                       uncore_shared_reg_config(box, 2 + (idx / 6) * 5));
-               break;
-       case 4:
-               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port),
-                       hwc->config >> 32);
-               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config);
-               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config);
-               break;
-       case 5:
-               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port),
-                       hwc->config >> 32);
-               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config);
-               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config);
-               break;
-       }
-
-       wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
-               (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
-}
-
-DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63");
-DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63");
-DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63");
-DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15");
-DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31");
-
-static struct attribute *nhmex_uncore_rbox_formats_attr[] = {
-       &format_attr_event5.attr,
-       &format_attr_xbr_mm_cfg.attr,
-       &format_attr_xbr_match.attr,
-       &format_attr_xbr_mask.attr,
-       &format_attr_qlx_cfg.attr,
-       &format_attr_iperf_cfg.attr,
-       NULL,
-};
-
-static struct attribute_group nhmex_uncore_rbox_format_group = {
-       .name = "format",
-       .attrs = nhmex_uncore_rbox_formats_attr,
-};
-
-static struct uncore_event_desc nhmex_uncore_rbox_events[] = {
-       INTEL_UNCORE_EVENT_DESC(qpi0_flit_send,         "event=0x0,iperf_cfg=0x80000000"),
-       INTEL_UNCORE_EVENT_DESC(qpi1_filt_send,         "event=0x6,iperf_cfg=0x80000000"),
-       INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt,         "event=0x0,iperf_cfg=0x40000000"),
-       INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt,         "event=0x6,iperf_cfg=0x40000000"),
-       INTEL_UNCORE_EVENT_DESC(qpi0_date_response,     "event=0x0,iperf_cfg=0xc4"),
-       INTEL_UNCORE_EVENT_DESC(qpi1_date_response,     "event=0x6,iperf_cfg=0xc4"),
-       { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_ops nhmex_uncore_rbox_ops = {
-       NHMEX_UNCORE_OPS_COMMON_INIT(),
-       .enable_event           = nhmex_rbox_msr_enable_event,
-       .hw_config              = nhmex_rbox_hw_config,
-       .get_constraint         = nhmex_rbox_get_constraint,
-       .put_constraint         = nhmex_rbox_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_rbox = {
-       .name                   = "rbox",
-       .num_counters           = 8,
-       .num_boxes              = 2,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = NHMEX_R_MSR_PMON_CTL0,
-       .perf_ctr               = NHMEX_R_MSR_PMON_CNT0,
-       .event_mask             = NHMEX_R_PMON_RAW_EVENT_MASK,
-       .box_ctl                = NHMEX_R_MSR_GLOBAL_CTL,
-       .msr_offset             = NHMEX_R_MSR_OFFSET,
-       .pair_ctr_ctl           = 1,
-       .num_shared_regs        = 20,
-       .event_descs            = nhmex_uncore_rbox_events,
-       .ops                    = &nhmex_uncore_rbox_ops,
-       .format_group           = &nhmex_uncore_rbox_format_group
-};
-
-static struct intel_uncore_type *nhmex_msr_uncores[] = {
-       &nhmex_uncore_ubox,
-       &nhmex_uncore_cbox,
-       &nhmex_uncore_bbox,
-       &nhmex_uncore_sbox,
-       &nhmex_uncore_mbox,
-       &nhmex_uncore_rbox,
-       &nhmex_uncore_wbox,
-       NULL,
-};
-
-void nhmex_uncore_cpu_init(void)
-{
-       if (boot_cpu_data.x86_model == 46)
-               uncore_nhmex = true;
-       else
-               nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
-       if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
-               nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-       uncore_msr_uncores = nhmex_msr_uncores;
-}
-/* end of Nehalem-EX uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
deleted file mode 100644 (file)
index 2bd030d..0000000
+++ /dev/null
@@ -1,717 +0,0 @@
-/* Nehalem/SandBridge/Haswell uncore support */
-#include "perf_event_intel_uncore.h"
-
-/* Uncore IMC PCI IDs */
-#define PCI_DEVICE_ID_INTEL_SNB_IMC    0x0100
-#define PCI_DEVICE_ID_INTEL_IVB_IMC    0x0154
-#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
-#define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
-#define PCI_DEVICE_ID_INTEL_HSW_U_IMC  0x0a04
-#define PCI_DEVICE_ID_INTEL_BDW_IMC    0x1604
-#define PCI_DEVICE_ID_INTEL_SKL_IMC    0x191f
-
-/* SNB event control */
-#define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
-#define SNB_UNC_CTL_UMASK_MASK                 0x0000ff00
-#define SNB_UNC_CTL_EDGE_DET                   (1 << 18)
-#define SNB_UNC_CTL_EN                         (1 << 22)
-#define SNB_UNC_CTL_INVERT                     (1 << 23)
-#define SNB_UNC_CTL_CMASK_MASK                 0x1f000000
-#define NHM_UNC_CTL_CMASK_MASK                 0xff000000
-#define NHM_UNC_FIXED_CTR_CTL_EN               (1 << 0)
-
-#define SNB_UNC_RAW_EVENT_MASK                 (SNB_UNC_CTL_EV_SEL_MASK | \
-                                                SNB_UNC_CTL_UMASK_MASK | \
-                                                SNB_UNC_CTL_EDGE_DET | \
-                                                SNB_UNC_CTL_INVERT | \
-                                                SNB_UNC_CTL_CMASK_MASK)
-
-#define NHM_UNC_RAW_EVENT_MASK                 (SNB_UNC_CTL_EV_SEL_MASK | \
-                                                SNB_UNC_CTL_UMASK_MASK | \
-                                                SNB_UNC_CTL_EDGE_DET | \
-                                                SNB_UNC_CTL_INVERT | \
-                                                NHM_UNC_CTL_CMASK_MASK)
-
-/* SNB global control register */
-#define SNB_UNC_PERF_GLOBAL_CTL                 0x391
-#define SNB_UNC_FIXED_CTR_CTRL                  0x394
-#define SNB_UNC_FIXED_CTR                       0x395
-
-/* SNB uncore global control */
-#define SNB_UNC_GLOBAL_CTL_CORE_ALL             ((1 << 4) - 1)
-#define SNB_UNC_GLOBAL_CTL_EN                   (1 << 29)
-
-/* SNB Cbo register */
-#define SNB_UNC_CBO_0_PERFEVTSEL0               0x700
-#define SNB_UNC_CBO_0_PER_CTR0                  0x706
-#define SNB_UNC_CBO_MSR_OFFSET                  0x10
-
-/* SNB ARB register */
-#define SNB_UNC_ARB_PER_CTR0                   0x3b0
-#define SNB_UNC_ARB_PERFEVTSEL0                        0x3b2
-#define SNB_UNC_ARB_MSR_OFFSET                 0x10
-
-/* NHM global control register */
-#define NHM_UNC_PERF_GLOBAL_CTL                 0x391
-#define NHM_UNC_FIXED_CTR                       0x394
-#define NHM_UNC_FIXED_CTR_CTRL                  0x395
-
-/* NHM uncore global control */
-#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL            ((1ULL << 8) - 1)
-#define NHM_UNC_GLOBAL_CTL_EN_FC                (1ULL << 32)
-
-/* NHM uncore register */
-#define NHM_UNC_PERFEVTSEL0                     0x3c0
-#define NHM_UNC_UNCORE_PMC0                     0x3b0
-
-DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
-DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
-DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
-DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
-
-/* Sandy Bridge uncore support */
-static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (hwc->idx < UNCORE_PMC_IDX_FIXED)
-               wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
-       else
-               wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
-}
-
-static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       wrmsrl(event->hw.config_base, 0);
-}
-
-static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
-{
-       if (box->pmu->pmu_idx == 0) {
-               wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
-                       SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
-       }
-}
-
-static struct uncore_event_desc snb_uncore_events[] = {
-       INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
-       { /* end: all zeroes */ },
-};
-
-static struct attribute *snb_uncore_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask5.attr,
-       NULL,
-};
-
-static struct attribute_group snb_uncore_format_group = {
-       .name           = "format",
-       .attrs          = snb_uncore_formats_attr,
-};
-
-static struct intel_uncore_ops snb_uncore_msr_ops = {
-       .init_box       = snb_uncore_msr_init_box,
-       .disable_event  = snb_uncore_msr_disable_event,
-       .enable_event   = snb_uncore_msr_enable_event,
-       .read_counter   = uncore_msr_read_counter,
-};
-
-static struct event_constraint snb_uncore_arb_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type snb_uncore_cbox = {
-       .name           = "cbox",
-       .num_counters   = 2,
-       .num_boxes      = 4,
-       .perf_ctr_bits  = 44,
-       .fixed_ctr_bits = 48,
-       .perf_ctr       = SNB_UNC_CBO_0_PER_CTR0,
-       .event_ctl      = SNB_UNC_CBO_0_PERFEVTSEL0,
-       .fixed_ctr      = SNB_UNC_FIXED_CTR,
-       .fixed_ctl      = SNB_UNC_FIXED_CTR_CTRL,
-       .single_fixed   = 1,
-       .event_mask     = SNB_UNC_RAW_EVENT_MASK,
-       .msr_offset     = SNB_UNC_CBO_MSR_OFFSET,
-       .ops            = &snb_uncore_msr_ops,
-       .format_group   = &snb_uncore_format_group,
-       .event_descs    = snb_uncore_events,
-};
-
-static struct intel_uncore_type snb_uncore_arb = {
-       .name           = "arb",
-       .num_counters   = 2,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 44,
-       .perf_ctr       = SNB_UNC_ARB_PER_CTR0,
-       .event_ctl      = SNB_UNC_ARB_PERFEVTSEL0,
-       .event_mask     = SNB_UNC_RAW_EVENT_MASK,
-       .msr_offset     = SNB_UNC_ARB_MSR_OFFSET,
-       .constraints    = snb_uncore_arb_constraints,
-       .ops            = &snb_uncore_msr_ops,
-       .format_group   = &snb_uncore_format_group,
-};
-
-static struct intel_uncore_type *snb_msr_uncores[] = {
-       &snb_uncore_cbox,
-       &snb_uncore_arb,
-       NULL,
-};
-
-void snb_uncore_cpu_init(void)
-{
-       uncore_msr_uncores = snb_msr_uncores;
-       if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
-               snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-}
-
-enum {
-       SNB_PCI_UNCORE_IMC,
-};
-
-static struct uncore_event_desc snb_uncore_imc_events[] = {
-       INTEL_UNCORE_EVENT_DESC(data_reads,  "event=0x01"),
-       INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
-       INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
-
-       INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
-       INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
-       INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
-
-       { /* end: all zeroes */ },
-};
-
-#define SNB_UNCORE_PCI_IMC_EVENT_MASK          0xff
-#define SNB_UNCORE_PCI_IMC_BAR_OFFSET          0x48
-
-/* page size multiple covering all config regs */
-#define SNB_UNCORE_PCI_IMC_MAP_SIZE            0x6000
-
-#define SNB_UNCORE_PCI_IMC_DATA_READS          0x1
-#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE     0x5050
-#define SNB_UNCORE_PCI_IMC_DATA_WRITES         0x2
-#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE    0x5054
-#define SNB_UNCORE_PCI_IMC_CTR_BASE            SNB_UNCORE_PCI_IMC_DATA_READS_BASE
-
-static struct attribute *snb_uncore_imc_formats_attr[] = {
-       &format_attr_event.attr,
-       NULL,
-};
-
-static struct attribute_group snb_uncore_imc_format_group = {
-       .name = "format",
-       .attrs = snb_uncore_imc_formats_attr,
-};
-
-static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
-       resource_size_t addr;
-       u32 pci_dword;
-
-       pci_read_config_dword(pdev, where, &pci_dword);
-       addr = pci_dword;
-
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-       pci_read_config_dword(pdev, where + 4, &pci_dword);
-       addr |= ((resource_size_t)pci_dword << 32);
-#endif
-
-       addr &= ~(PAGE_SIZE - 1);
-
-       box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
-       box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
-}
-
-static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
-{}
-
-static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
-{}
-
-static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{}
-
-static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{}
-
-static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
-}
-
-/*
- * custom event_init() function because we define our own fixed, free
- * running counters, so we do not want to conflict with generic uncore
- * logic. Also simplifies processing
- */
-static int snb_uncore_imc_event_init(struct perf_event *event)
-{
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box;
-       struct hw_perf_event *hwc = &event->hw;
-       u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
-       int idx, base;
-
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
-       pmu = uncore_event_to_pmu(event);
-       /* no device found for this pmu */
-       if (pmu->func_id < 0)
-               return -ENOENT;
-
-       /* Sampling not supported yet */
-       if (hwc->sample_period)
-               return -EINVAL;
-
-       /* unsupported modes and filters */
-       if (event->attr.exclude_user   ||
-           event->attr.exclude_kernel ||
-           event->attr.exclude_hv     ||
-           event->attr.exclude_idle   ||
-           event->attr.exclude_host   ||
-           event->attr.exclude_guest  ||
-           event->attr.sample_period) /* no sampling */
-               return -EINVAL;
-
-       /*
-        * Place all uncore events for a particular physical package
-        * onto a single cpu
-        */
-       if (event->cpu < 0)
-               return -EINVAL;
-
-       /* check only supported bits are set */
-       if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
-               return -EINVAL;
-
-       box = uncore_pmu_to_box(pmu, event->cpu);
-       if (!box || box->cpu < 0)
-               return -EINVAL;
-
-       event->cpu = box->cpu;
-
-       event->hw.idx = -1;
-       event->hw.last_tag = ~0ULL;
-       event->hw.extra_reg.idx = EXTRA_REG_NONE;
-       event->hw.branch_reg.idx = EXTRA_REG_NONE;
-       /*
-        * check event is known (whitelist, determines counter)
-        */
-       switch (cfg) {
-       case SNB_UNCORE_PCI_IMC_DATA_READS:
-               base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
-               idx = UNCORE_PMC_IDX_FIXED;
-               break;
-       case SNB_UNCORE_PCI_IMC_DATA_WRITES:
-               base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
-               idx = UNCORE_PMC_IDX_FIXED + 1;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       /* must be done before validate_group */
-       event->hw.event_base = base;
-       event->hw.config = cfg;
-       event->hw.idx = idx;
-
-       /* no group validation needed, we have free running counters */
-
-       return 0;
-}
-
-static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       return 0;
-}
-
-static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       u64 count;
-
-       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
-               return;
-
-       event->hw.state = 0;
-       box->n_active++;
-
-       list_add_tail(&event->active_entry, &box->active_list);
-
-       count = snb_uncore_imc_read_counter(box, event);
-       local64_set(&event->hw.prev_count, count);
-
-       if (box->n_active == 1)
-               uncore_pmu_start_hrtimer(box);
-}
-
-static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (!(hwc->state & PERF_HES_STOPPED)) {
-               box->n_active--;
-
-               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-               hwc->state |= PERF_HES_STOPPED;
-
-               list_del(&event->active_entry);
-
-               if (box->n_active == 0)
-                       uncore_pmu_cancel_hrtimer(box);
-       }
-
-       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-               /*
-                * Drain the remaining delta count out of a event
-                * that we are disabling:
-                */
-               uncore_perf_event_update(box, event);
-               hwc->state |= PERF_HES_UPTODATE;
-       }
-}
-
-static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (!box)
-               return -ENODEV;
-
-       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-       if (!(flags & PERF_EF_START))
-               hwc->state |= PERF_HES_ARCH;
-
-       snb_uncore_imc_event_start(event, 0);
-
-       box->n_events++;
-
-       return 0;
-}
-
-static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       int i;
-
-       snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-
-       for (i = 0; i < box->n_events; i++) {
-               if (event == box->event_list[i]) {
-                       --box->n_events;
-                       break;
-               }
-       }
-}
-
-int snb_pci2phy_map_init(int devid)
-{
-       struct pci_dev *dev = NULL;
-       struct pci2phy_map *map;
-       int bus, segment;
-
-       dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
-       if (!dev)
-               return -ENOTTY;
-
-       bus = dev->bus->number;
-       segment = pci_domain_nr(dev->bus);
-
-       raw_spin_lock(&pci2phy_map_lock);
-       map = __find_pci2phy_map(segment);
-       if (!map) {
-               raw_spin_unlock(&pci2phy_map_lock);
-               pci_dev_put(dev);
-               return -ENOMEM;
-       }
-       map->pbus_to_physid[bus] = 0;
-       raw_spin_unlock(&pci2phy_map_lock);
-
-       pci_dev_put(dev);
-
-       return 0;
-}
-
-static struct pmu snb_uncore_imc_pmu = {
-       .task_ctx_nr    = perf_invalid_context,
-       .event_init     = snb_uncore_imc_event_init,
-       .add            = snb_uncore_imc_event_add,
-       .del            = snb_uncore_imc_event_del,
-       .start          = snb_uncore_imc_event_start,
-       .stop           = snb_uncore_imc_event_stop,
-       .read           = uncore_pmu_event_read,
-};
-
-static struct intel_uncore_ops snb_uncore_imc_ops = {
-       .init_box       = snb_uncore_imc_init_box,
-       .enable_box     = snb_uncore_imc_enable_box,
-       .disable_box    = snb_uncore_imc_disable_box,
-       .disable_event  = snb_uncore_imc_disable_event,
-       .enable_event   = snb_uncore_imc_enable_event,
-       .hw_config      = snb_uncore_imc_hw_config,
-       .read_counter   = snb_uncore_imc_read_counter,
-};
-
-static struct intel_uncore_type snb_uncore_imc = {
-       .name           = "imc",
-       .num_counters   = 2,
-       .num_boxes      = 1,
-       .fixed_ctr_bits = 32,
-       .fixed_ctr      = SNB_UNCORE_PCI_IMC_CTR_BASE,
-       .event_descs    = snb_uncore_imc_events,
-       .format_group   = &snb_uncore_imc_format_group,
-       .perf_ctr       = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
-       .event_mask     = SNB_UNCORE_PCI_IMC_EVENT_MASK,
-       .ops            = &snb_uncore_imc_ops,
-       .pmu            = &snb_uncore_imc_pmu,
-};
-
-static struct intel_uncore_type *snb_pci_uncores[] = {
-       [SNB_PCI_UNCORE_IMC]    = &snb_uncore_imc,
-       NULL,
-};
-
-static const struct pci_device_id snb_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* end: all zeroes */ },
-};
-
-static const struct pci_device_id ivb_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* end: all zeroes */ },
-};
-
-static const struct pci_device_id hsw_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* end: all zeroes */ },
-};
-
-static const struct pci_device_id bdw_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* end: all zeroes */ },
-};
-
-static const struct pci_device_id skl_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* end: all zeroes */ },
-};
-
-static struct pci_driver snb_uncore_pci_driver = {
-       .name           = "snb_uncore",
-       .id_table       = snb_uncore_pci_ids,
-};
-
-static struct pci_driver ivb_uncore_pci_driver = {
-       .name           = "ivb_uncore",
-       .id_table       = ivb_uncore_pci_ids,
-};
-
-static struct pci_driver hsw_uncore_pci_driver = {
-       .name           = "hsw_uncore",
-       .id_table       = hsw_uncore_pci_ids,
-};
-
-static struct pci_driver bdw_uncore_pci_driver = {
-       .name           = "bdw_uncore",
-       .id_table       = bdw_uncore_pci_ids,
-};
-
-static struct pci_driver skl_uncore_pci_driver = {
-       .name           = "skl_uncore",
-       .id_table       = skl_uncore_pci_ids,
-};
-
-struct imc_uncore_pci_dev {
-       __u32 pci_id;
-       struct pci_driver *driver;
-};
-#define IMC_DEV(a, d) \
-       { .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) }
-
-static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
-       IMC_DEV(SNB_IMC, &snb_uncore_pci_driver),
-       IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */
-       IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
-       IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
-       IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
-       IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
-       IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
-       {  /* end marker */ }
-};
-
-
-#define for_each_imc_pci_id(x, t) \
-       for (x = (t); (x)->pci_id; x++)
-
-static struct pci_driver *imc_uncore_find_dev(void)
-{
-       const struct imc_uncore_pci_dev *p;
-       int ret;
-
-       for_each_imc_pci_id(p, desktop_imc_pci_ids) {
-               ret = snb_pci2phy_map_init(p->pci_id);
-               if (ret == 0)
-                       return p->driver;
-       }
-       return NULL;
-}
-
-static int imc_uncore_pci_init(void)
-{
-       struct pci_driver *imc_drv = imc_uncore_find_dev();
-
-       if (!imc_drv)
-               return -ENODEV;
-
-       uncore_pci_uncores = snb_pci_uncores;
-       uncore_pci_driver = imc_drv;
-
-       return 0;
-}
-
-int snb_uncore_pci_init(void)
-{
-       return imc_uncore_pci_init();
-}
-
-int ivb_uncore_pci_init(void)
-{
-       return imc_uncore_pci_init();
-}
-int hsw_uncore_pci_init(void)
-{
-       return imc_uncore_pci_init();
-}
-
-int bdw_uncore_pci_init(void)
-{
-       return imc_uncore_pci_init();
-}
-
-int skl_uncore_pci_init(void)
-{
-       return imc_uncore_pci_init();
-}
-
-/* end of Sandy Bridge uncore support */
-
-/* Nehalem uncore support */
-static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
-{
-       wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
-}
-
-static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
-{
-       wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
-}
-
-static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (hwc->idx < UNCORE_PMC_IDX_FIXED)
-               wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
-       else
-               wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
-}
-
-static struct attribute *nhm_uncore_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask8.attr,
-       NULL,
-};
-
-static struct attribute_group nhm_uncore_format_group = {
-       .name = "format",
-       .attrs = nhm_uncore_formats_attr,
-};
-
-static struct uncore_event_desc nhm_uncore_events[] = {
-       INTEL_UNCORE_EVENT_DESC(clockticks,                "event=0xff,umask=0x00"),
-       INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any,       "event=0x2f,umask=0x0f"),
-       INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any,      "event=0x2c,umask=0x0f"),
-       INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads,     "event=0x20,umask=0x01"),
-       INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes,    "event=0x20,umask=0x02"),
-       INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads,  "event=0x20,umask=0x04"),
-       INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
-       INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads,   "event=0x20,umask=0x10"),
-       INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes,  "event=0x20,umask=0x20"),
-       { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_ops nhm_uncore_msr_ops = {
-       .disable_box    = nhm_uncore_msr_disable_box,
-       .enable_box     = nhm_uncore_msr_enable_box,
-       .disable_event  = snb_uncore_msr_disable_event,
-       .enable_event   = nhm_uncore_msr_enable_event,
-       .read_counter   = uncore_msr_read_counter,
-};
-
-static struct intel_uncore_type nhm_uncore = {
-       .name           = "",
-       .num_counters   = 8,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 48,
-       .fixed_ctr_bits = 48,
-       .event_ctl      = NHM_UNC_PERFEVTSEL0,
-       .perf_ctr       = NHM_UNC_UNCORE_PMC0,
-       .fixed_ctr      = NHM_UNC_FIXED_CTR,
-       .fixed_ctl      = NHM_UNC_FIXED_CTR_CTRL,
-       .event_mask     = NHM_UNC_RAW_EVENT_MASK,
-       .event_descs    = nhm_uncore_events,
-       .ops            = &nhm_uncore_msr_ops,
-       .format_group   = &nhm_uncore_format_group,
-};
-
-static struct intel_uncore_type *nhm_msr_uncores[] = {
-       &nhm_uncore,
-       NULL,
-};
-
-void nhm_uncore_cpu_init(void)
-{
-       uncore_msr_uncores = nhm_msr_uncores;
-}
-
-/* end of Nehalem uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
deleted file mode 100644 (file)
index 33acb88..0000000
+++ /dev/null
@@ -1,3126 +0,0 @@
-/* SandyBridge-EP/IvyTown uncore support */
-#include "perf_event_intel_uncore.h"
-
-
-/* SNB-EP Box level control */
-#define SNBEP_PMON_BOX_CTL_RST_CTRL    (1 << 0)
-#define SNBEP_PMON_BOX_CTL_RST_CTRS    (1 << 1)
-#define SNBEP_PMON_BOX_CTL_FRZ         (1 << 8)
-#define SNBEP_PMON_BOX_CTL_FRZ_EN      (1 << 16)
-#define SNBEP_PMON_BOX_CTL_INT         (SNBEP_PMON_BOX_CTL_RST_CTRL | \
-                                        SNBEP_PMON_BOX_CTL_RST_CTRS | \
-                                        SNBEP_PMON_BOX_CTL_FRZ_EN)
-/* SNB-EP event control */
-#define SNBEP_PMON_CTL_EV_SEL_MASK     0x000000ff
-#define SNBEP_PMON_CTL_UMASK_MASK      0x0000ff00
-#define SNBEP_PMON_CTL_RST             (1 << 17)
-#define SNBEP_PMON_CTL_EDGE_DET                (1 << 18)
-#define SNBEP_PMON_CTL_EV_SEL_EXT      (1 << 21)
-#define SNBEP_PMON_CTL_EN              (1 << 22)
-#define SNBEP_PMON_CTL_INVERT          (1 << 23)
-#define SNBEP_PMON_CTL_TRESH_MASK      0xff000000
-#define SNBEP_PMON_RAW_EVENT_MASK      (SNBEP_PMON_CTL_EV_SEL_MASK | \
-                                        SNBEP_PMON_CTL_UMASK_MASK | \
-                                        SNBEP_PMON_CTL_EDGE_DET | \
-                                        SNBEP_PMON_CTL_INVERT | \
-                                        SNBEP_PMON_CTL_TRESH_MASK)
-
-/* SNB-EP Ubox event control */
-#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK                0x1f000000
-#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK                \
-                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
-                                SNBEP_PMON_CTL_UMASK_MASK | \
-                                SNBEP_PMON_CTL_EDGE_DET | \
-                                SNBEP_PMON_CTL_INVERT | \
-                                SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
-
-#define SNBEP_CBO_PMON_CTL_TID_EN              (1 << 19)
-#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK      (SNBEP_PMON_RAW_EVENT_MASK | \
-                                                SNBEP_CBO_PMON_CTL_TID_EN)
-
-/* SNB-EP PCU event control */
-#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK    0x0000c000
-#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK      0x1f000000
-#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT      (1 << 30)
-#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET    (1 << 31)
-#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK      \
-                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
-                                SNBEP_PMON_CTL_EDGE_DET | \
-                                SNBEP_PMON_CTL_EV_SEL_EXT | \
-                                SNBEP_PMON_CTL_INVERT | \
-                                SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
-
-#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK      \
-                               (SNBEP_PMON_RAW_EVENT_MASK | \
-                                SNBEP_PMON_CTL_EV_SEL_EXT)
-
-/* SNB-EP pci control register */
-#define SNBEP_PCI_PMON_BOX_CTL                 0xf4
-#define SNBEP_PCI_PMON_CTL0                    0xd8
-/* SNB-EP pci counter register */
-#define SNBEP_PCI_PMON_CTR0                    0xa0
-
-/* SNB-EP home agent register */
-#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0       0x40
-#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1       0x44
-#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH      0x48
-/* SNB-EP memory controller register */
-#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL                0xf0
-#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR                0xd0
-/* SNB-EP QPI register */
-#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0         0x228
-#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1         0x22c
-#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0          0x238
-#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1          0x23c
-
-/* SNB-EP Ubox register */
-#define SNBEP_U_MSR_PMON_CTR0                  0xc16
-#define SNBEP_U_MSR_PMON_CTL0                  0xc10
-
-#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL                0xc08
-#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR                0xc09
-
-/* SNB-EP Cbo register */
-#define SNBEP_C0_MSR_PMON_CTR0                 0xd16
-#define SNBEP_C0_MSR_PMON_CTL0                 0xd10
-#define SNBEP_C0_MSR_PMON_BOX_CTL              0xd04
-#define SNBEP_C0_MSR_PMON_BOX_FILTER           0xd14
-#define SNBEP_CBO_MSR_OFFSET                   0x20
-
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID      0x1f
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID      0x3fc00
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE    0x7c0000
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC      0xff800000
-
-#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) {   \
-       .event = (e),                           \
-       .msr = SNBEP_C0_MSR_PMON_BOX_FILTER,    \
-       .config_mask = (m),                     \
-       .idx = (i)                              \
-}
-
-/* SNB-EP PCU register */
-#define SNBEP_PCU_MSR_PMON_CTR0                        0xc36
-#define SNBEP_PCU_MSR_PMON_CTL0                        0xc30
-#define SNBEP_PCU_MSR_PMON_BOX_CTL             0xc24
-#define SNBEP_PCU_MSR_PMON_BOX_FILTER          0xc34
-#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK     0xffffffff
-#define SNBEP_PCU_MSR_CORE_C3_CTR              0x3fc
-#define SNBEP_PCU_MSR_CORE_C6_CTR              0x3fd
-
-/* IVBEP event control */
-#define IVBEP_PMON_BOX_CTL_INT         (SNBEP_PMON_BOX_CTL_RST_CTRL | \
-                                        SNBEP_PMON_BOX_CTL_RST_CTRS)
-#define IVBEP_PMON_RAW_EVENT_MASK              (SNBEP_PMON_CTL_EV_SEL_MASK | \
-                                        SNBEP_PMON_CTL_UMASK_MASK | \
-                                        SNBEP_PMON_CTL_EDGE_DET | \
-                                        SNBEP_PMON_CTL_TRESH_MASK)
-/* IVBEP Ubox */
-#define IVBEP_U_MSR_PMON_GLOBAL_CTL            0xc00
-#define IVBEP_U_PMON_GLOBAL_FRZ_ALL            (1 << 31)
-#define IVBEP_U_PMON_GLOBAL_UNFRZ_ALL          (1 << 29)
-
-#define IVBEP_U_MSR_PMON_RAW_EVENT_MASK        \
-                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
-                                SNBEP_PMON_CTL_UMASK_MASK | \
-                                SNBEP_PMON_CTL_EDGE_DET | \
-                                SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
-/* IVBEP Cbo */
-#define IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK              (IVBEP_PMON_RAW_EVENT_MASK | \
-                                                SNBEP_CBO_PMON_CTL_TID_EN)
-
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_TID              (0x1fULL << 0)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK     (0xfULL << 5)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE    (0x3fULL << 17)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NID              (0xffffULL << 32)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC              (0x1ffULL << 52)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_C6               (0x1ULL << 61)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NC               (0x1ULL << 62)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC     (0x1ULL << 63)
-
-/* IVBEP home agent */
-#define IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST                (1 << 16)
-#define IVBEP_HA_PCI_PMON_RAW_EVENT_MASK               \
-                               (IVBEP_PMON_RAW_EVENT_MASK | \
-                                IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST)
-/* IVBEP PCU */
-#define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK      \
-                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
-                                SNBEP_PMON_CTL_EV_SEL_EXT | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
-                                SNBEP_PMON_CTL_EDGE_DET | \
-                                SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
-/* IVBEP QPI */
-#define IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK      \
-                               (IVBEP_PMON_RAW_EVENT_MASK | \
-                                SNBEP_PMON_CTL_EV_SEL_EXT)
-
-#define __BITS_VALUE(x, i, n)  ((typeof(x))(((x) >> ((i) * (n))) & \
-                               ((1ULL << (n)) - 1)))
-
-/* Haswell-EP Ubox */
-#define HSWEP_U_MSR_PMON_CTR0                  0x709
-#define HSWEP_U_MSR_PMON_CTL0                  0x705
-#define HSWEP_U_MSR_PMON_FILTER                        0x707
-
-#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL                0x703
-#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTR                0x704
-
-#define HSWEP_U_MSR_PMON_BOX_FILTER_TID                (0x1 << 0)
-#define HSWEP_U_MSR_PMON_BOX_FILTER_CID                (0x1fULL << 1)
-#define HSWEP_U_MSR_PMON_BOX_FILTER_MASK \
-                                       (HSWEP_U_MSR_PMON_BOX_FILTER_TID | \
-                                        HSWEP_U_MSR_PMON_BOX_FILTER_CID)
-
-/* Haswell-EP CBo */
-#define HSWEP_C0_MSR_PMON_CTR0                 0xe08
-#define HSWEP_C0_MSR_PMON_CTL0                 0xe01
-#define HSWEP_C0_MSR_PMON_BOX_CTL                      0xe00
-#define HSWEP_C0_MSR_PMON_BOX_FILTER0          0xe05
-#define HSWEP_CBO_MSR_OFFSET                   0x10
-
-
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_TID              (0x3fULL << 0)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK     (0xfULL << 6)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE    (0x7fULL << 17)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NID              (0xffffULL << 32)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC              (0x1ffULL << 52)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_C6               (0x1ULL << 61)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NC               (0x1ULL << 62)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC     (0x1ULL << 63)
-
-
-/* Haswell-EP Sbox */
-#define HSWEP_S0_MSR_PMON_CTR0                 0x726
-#define HSWEP_S0_MSR_PMON_CTL0                 0x721
-#define HSWEP_S0_MSR_PMON_BOX_CTL                      0x720
-#define HSWEP_SBOX_MSR_OFFSET                  0xa
-#define HSWEP_S_MSR_PMON_RAW_EVENT_MASK                (SNBEP_PMON_RAW_EVENT_MASK | \
-                                                SNBEP_CBO_PMON_CTL_TID_EN)
-
-/* Haswell-EP PCU */
-#define HSWEP_PCU_MSR_PMON_CTR0                        0x717
-#define HSWEP_PCU_MSR_PMON_CTL0                        0x711
-#define HSWEP_PCU_MSR_PMON_BOX_CTL             0x710
-#define HSWEP_PCU_MSR_PMON_BOX_FILTER          0x715
-
-/* KNL Ubox */
-#define KNL_U_MSR_PMON_RAW_EVENT_MASK \
-                                       (SNBEP_U_MSR_PMON_RAW_EVENT_MASK | \
-                                               SNBEP_CBO_PMON_CTL_TID_EN)
-/* KNL CHA */
-#define KNL_CHA_MSR_OFFSET                     0xc
-#define KNL_CHA_MSR_PMON_CTL_QOR               (1 << 16)
-#define KNL_CHA_MSR_PMON_RAW_EVENT_MASK \
-                                       (SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK | \
-                                        KNL_CHA_MSR_PMON_CTL_QOR)
-#define KNL_CHA_MSR_PMON_BOX_FILTER_TID                0x1ff
-#define KNL_CHA_MSR_PMON_BOX_FILTER_STATE      (7 << 18)
-#define KNL_CHA_MSR_PMON_BOX_FILTER_OP         (0xfffffe2aULL << 32)
-
-/* KNL EDC/MC UCLK */
-#define KNL_UCLK_MSR_PMON_CTR0_LOW             0x400
-#define KNL_UCLK_MSR_PMON_CTL0                 0x420
-#define KNL_UCLK_MSR_PMON_BOX_CTL              0x430
-#define KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW       0x44c
-#define KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL       0x454
-#define KNL_PMON_FIXED_CTL_EN                  0x1
-
-/* KNL EDC */
-#define KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW                0xa00
-#define KNL_EDC0_ECLK_MSR_PMON_CTL0            0xa20
-#define KNL_EDC0_ECLK_MSR_PMON_BOX_CTL         0xa30
-#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW  0xa3c
-#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL  0xa44
-
-/* KNL MC */
-#define KNL_MC0_CH0_MSR_PMON_CTR0_LOW          0xb00
-#define KNL_MC0_CH0_MSR_PMON_CTL0              0xb20
-#define KNL_MC0_CH0_MSR_PMON_BOX_CTL           0xb30
-#define KNL_MC0_CH0_MSR_PMON_FIXED_LOW         0xb3c
-#define KNL_MC0_CH0_MSR_PMON_FIXED_CTL         0xb44
-
-/* KNL IRP */
-#define KNL_IRP_PCI_PMON_BOX_CTL               0xf0
-#define KNL_IRP_PCI_PMON_RAW_EVENT_MASK                (SNBEP_PMON_RAW_EVENT_MASK | \
-                                                KNL_CHA_MSR_PMON_CTL_QOR)
-/* KNL PCU */
-#define KNL_PCU_PMON_CTL_EV_SEL_MASK           0x0000007f
-#define KNL_PCU_PMON_CTL_USE_OCC_CTR           (1 << 7)
-#define KNL_PCU_MSR_PMON_CTL_TRESH_MASK                0x3f000000
-#define KNL_PCU_MSR_PMON_RAW_EVENT_MASK        \
-                               (KNL_PCU_PMON_CTL_EV_SEL_MASK | \
-                                KNL_PCU_PMON_CTL_USE_OCC_CTR | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
-                                SNBEP_PMON_CTL_EDGE_DET | \
-                                SNBEP_CBO_PMON_CTL_TID_EN | \
-                                SNBEP_PMON_CTL_EV_SEL_EXT | \
-                                SNBEP_PMON_CTL_INVERT | \
-                                KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
-
-DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
-DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
-DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
-DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
-DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
-DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
-DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
-DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29");
-DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
-DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
-DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
-DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
-DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8");
-DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5");
-DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
-DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
-DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20");
-DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33");
-DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37");
-DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
-DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
-DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62");
-DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61");
-DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
-DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51");
-DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35");
-DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31");
-DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
-DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
-DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
-DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
-DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
-DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
-DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
-DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
-DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
-DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
-DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
-DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
-DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
-
-static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       int box_ctl = uncore_pci_box_ctl(box);
-       u32 config = 0;
-
-       if (!pci_read_config_dword(pdev, box_ctl, &config)) {
-               config |= SNBEP_PMON_BOX_CTL_FRZ;
-               pci_write_config_dword(pdev, box_ctl, config);
-       }
-}
-
-static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       int box_ctl = uncore_pci_box_ctl(box);
-       u32 config = 0;
-
-       if (!pci_read_config_dword(pdev, box_ctl, &config)) {
-               config &= ~SNBEP_PMON_BOX_CTL_FRZ;
-               pci_write_config_dword(pdev, box_ctl, config);
-       }
-}
-
-static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-
-       pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-
-       pci_write_config_dword(pdev, hwc->config_base, hwc->config);
-}
-
-static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-       u64 count = 0;
-
-       pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
-       pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
-
-       return count;
-}
-
-static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       int box_ctl = uncore_pci_box_ctl(box);
-
-       pci_write_config_dword(pdev, box_ctl, SNBEP_PMON_BOX_CTL_INT);
-}
-
-static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
-{
-       u64 config;
-       unsigned msr;
-
-       msr = uncore_msr_box_ctl(box);
-       if (msr) {
-               rdmsrl(msr, config);
-               config |= SNBEP_PMON_BOX_CTL_FRZ;
-               wrmsrl(msr, config);
-       }
-}
-
-static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
-{
-       u64 config;
-       unsigned msr;
-
-       msr = uncore_msr_box_ctl(box);
-       if (msr) {
-               rdmsrl(msr, config);
-               config &= ~SNBEP_PMON_BOX_CTL_FRZ;
-               wrmsrl(msr, config);
-       }
-}
-
-static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
-       if (reg1->idx != EXTRA_REG_NONE)
-               wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0));
-
-       wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
-                                       struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       wrmsrl(hwc->config_base, hwc->config);
-}
-
-static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
-{
-       unsigned msr = uncore_msr_box_ctl(box);
-
-       if (msr)
-               wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
-}
-
-static struct attribute *snbep_uncore_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       NULL,
-};
-
-static struct attribute *snbep_uncore_ubox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh5.attr,
-       NULL,
-};
-
-static struct attribute *snbep_uncore_cbox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_filter_tid.attr,
-       &format_attr_filter_nid.attr,
-       &format_attr_filter_state.attr,
-       &format_attr_filter_opc.attr,
-       NULL,
-};
-
-static struct attribute *snbep_uncore_pcu_formats_attr[] = {
-       &format_attr_event_ext.attr,
-       &format_attr_occ_sel.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh5.attr,
-       &format_attr_occ_invert.attr,
-       &format_attr_occ_edge.attr,
-       &format_attr_filter_band0.attr,
-       &format_attr_filter_band1.attr,
-       &format_attr_filter_band2.attr,
-       &format_attr_filter_band3.attr,
-       NULL,
-};
-
-static struct attribute *snbep_uncore_qpi_formats_attr[] = {
-       &format_attr_event_ext.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_match_rds.attr,
-       &format_attr_match_rnid30.attr,
-       &format_attr_match_rnid4.attr,
-       &format_attr_match_dnid.attr,
-       &format_attr_match_mc.attr,
-       &format_attr_match_opc.attr,
-       &format_attr_match_vnw.attr,
-       &format_attr_match0.attr,
-       &format_attr_match1.attr,
-       &format_attr_mask_rds.attr,
-       &format_attr_mask_rnid30.attr,
-       &format_attr_mask_rnid4.attr,
-       &format_attr_mask_dnid.attr,
-       &format_attr_mask_mc.attr,
-       &format_attr_mask_opc.attr,
-       &format_attr_mask_vnw.attr,
-       &format_attr_mask0.attr,
-       &format_attr_mask1.attr,
-       NULL,
-};
-
-static struct uncore_event_desc snbep_uncore_imc_events[] = {
-       INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0xff,umask=0x00"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
-       { /* end: all zeroes */ },
-};
-
-static struct uncore_event_desc snbep_uncore_qpi_events[] = {
-       INTEL_UNCORE_EVENT_DESC(clockticks,       "event=0x14"),
-       INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
-       INTEL_UNCORE_EVENT_DESC(drs_data,         "event=0x102,umask=0x08"),
-       INTEL_UNCORE_EVENT_DESC(ncb_data,         "event=0x103,umask=0x04"),
-       { /* end: all zeroes */ },
-};
-
-static struct attribute_group snbep_uncore_format_group = {
-       .name = "format",
-       .attrs = snbep_uncore_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_ubox_format_group = {
-       .name = "format",
-       .attrs = snbep_uncore_ubox_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_cbox_format_group = {
-       .name = "format",
-       .attrs = snbep_uncore_cbox_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_pcu_format_group = {
-       .name = "format",
-       .attrs = snbep_uncore_pcu_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_qpi_format_group = {
-       .name = "format",
-       .attrs = snbep_uncore_qpi_formats_attr,
-};
-
-#define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT()                   \
-       .disable_box    = snbep_uncore_msr_disable_box,         \
-       .enable_box     = snbep_uncore_msr_enable_box,          \
-       .disable_event  = snbep_uncore_msr_disable_event,       \
-       .enable_event   = snbep_uncore_msr_enable_event,        \
-       .read_counter   = uncore_msr_read_counter
-
-#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT()                     \
-       __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),                   \
-       .init_box       = snbep_uncore_msr_init_box             \
-
-static struct intel_uncore_ops snbep_uncore_msr_ops = {
-       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-};
-
-#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT()                     \
-       .init_box       = snbep_uncore_pci_init_box,            \
-       .disable_box    = snbep_uncore_pci_disable_box,         \
-       .enable_box     = snbep_uncore_pci_enable_box,          \
-       .disable_event  = snbep_uncore_pci_disable_event,       \
-       .read_counter   = snbep_uncore_pci_read_counter
-
-static struct intel_uncore_ops snbep_uncore_pci_ops = {
-       SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
-       .enable_event   = snbep_uncore_pci_enable_event,        \
-};
-
-static struct event_constraint snbep_uncore_cbox_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x04, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x05, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x07, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x13, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x1b, 0xc),
-       UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
-       UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
-       UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
-       EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff),
-       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint snbep_uncore_r2pcie_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x12, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint snbep_uncore_r3qpi_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2a, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x30, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type snbep_uncore_ubox = {
-       .name           = "ubox",
-       .num_counters   = 2,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 44,
-       .fixed_ctr_bits = 48,
-       .perf_ctr       = SNBEP_U_MSR_PMON_CTR0,
-       .event_ctl      = SNBEP_U_MSR_PMON_CTL0,
-       .event_mask     = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
-       .fixed_ctr      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
-       .fixed_ctl      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
-       .ops            = &snbep_uncore_msr_ops,
-       .format_group   = &snbep_uncore_ubox_format_group,
-};
-
-static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
-       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
-                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2),
-       EVENT_EXTRA_END
-};
-
-static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
-       int i;
-
-       if (uncore_box_is_fake(box))
-               return;
-
-       for (i = 0; i < 5; i++) {
-               if (reg1->alloc & (0x1 << i))
-                       atomic_sub(1 << (i * 6), &er->ref);
-       }
-       reg1->alloc = 0;
-}
-
-static struct event_constraint *
-__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event,
-                           u64 (*cbox_filter_mask)(int fields))
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
-       int i, alloc = 0;
-       unsigned long flags;
-       u64 mask;
-
-       if (reg1->idx == EXTRA_REG_NONE)
-               return NULL;
-
-       raw_spin_lock_irqsave(&er->lock, flags);
-       for (i = 0; i < 5; i++) {
-               if (!(reg1->idx & (0x1 << i)))
-                       continue;
-               if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
-                       continue;
-
-               mask = cbox_filter_mask(0x1 << i);
-               if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) ||
-                   !((reg1->config ^ er->config) & mask)) {
-                       atomic_add(1 << (i * 6), &er->ref);
-                       er->config &= ~mask;
-                       er->config |= reg1->config & mask;
-                       alloc |= (0x1 << i);
-               } else {
-                       break;
-               }
-       }
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-       if (i < 5)
-               goto fail;
-
-       if (!uncore_box_is_fake(box))
-               reg1->alloc |= alloc;
-
-       return NULL;
-fail:
-       for (; i >= 0; i--) {
-               if (alloc & (0x1 << i))
-                       atomic_sub(1 << (i * 6), &er->ref);
-       }
-       return &uncore_constraint_empty;
-}
-
-static u64 snbep_cbox_filter_mask(int fields)
-{
-       u64 mask = 0;
-
-       if (fields & 0x1)
-               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID;
-       if (fields & 0x2)
-               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID;
-       if (fields & 0x4)
-               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
-       if (fields & 0x8)
-               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
-
-       return mask;
-}
-
-static struct event_constraint *
-snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask);
-}
-
-static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct extra_reg *er;
-       int idx = 0;
-
-       for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) {
-               if (er->event != (event->hw.config & er->config_mask))
-                       continue;
-               idx |= er->idx;
-       }
-
-       if (idx) {
-               reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
-                       SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
-               reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx);
-               reg1->idx = idx;
-       }
-       return 0;
-}
-
-static struct intel_uncore_ops snbep_uncore_cbox_ops = {
-       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-       .hw_config              = snbep_cbox_hw_config,
-       .get_constraint         = snbep_cbox_get_constraint,
-       .put_constraint         = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type snbep_uncore_cbox = {
-       .name                   = "cbox",
-       .num_counters           = 4,
-       .num_boxes              = 8,
-       .perf_ctr_bits          = 44,
-       .event_ctl              = SNBEP_C0_MSR_PMON_CTL0,
-       .perf_ctr               = SNBEP_C0_MSR_PMON_CTR0,
-       .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_C0_MSR_PMON_BOX_CTL,
-       .msr_offset             = SNBEP_CBO_MSR_OFFSET,
-       .num_shared_regs        = 1,
-       .constraints            = snbep_uncore_cbox_constraints,
-       .ops                    = &snbep_uncore_cbox_ops,
-       .format_group           = &snbep_uncore_cbox_format_group,
-};
-
-static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       u64 config = reg1->config;
-
-       if (new_idx > reg1->idx)
-               config <<= 8 * (new_idx - reg1->idx);
-       else
-               config >>= 8 * (reg1->idx - new_idx);
-
-       if (modify) {
-               hwc->config += new_idx - reg1->idx;
-               reg1->config = config;
-               reg1->idx = new_idx;
-       }
-       return config;
-}
-
-static struct event_constraint *
-snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
-       unsigned long flags;
-       int idx = reg1->idx;
-       u64 mask, config1 = reg1->config;
-       bool ok = false;
-
-       if (reg1->idx == EXTRA_REG_NONE ||
-           (!uncore_box_is_fake(box) && reg1->alloc))
-               return NULL;
-again:
-       mask = 0xffULL << (idx * 8);
-       raw_spin_lock_irqsave(&er->lock, flags);
-       if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
-           !((config1 ^ er->config) & mask)) {
-               atomic_add(1 << (idx * 8), &er->ref);
-               er->config &= ~mask;
-               er->config |= config1 & mask;
-               ok = true;
-       }
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-
-       if (!ok) {
-               idx = (idx + 1) % 4;
-               if (idx != reg1->idx) {
-                       config1 = snbep_pcu_alter_er(event, idx, false);
-                       goto again;
-               }
-               return &uncore_constraint_empty;
-       }
-
-       if (!uncore_box_is_fake(box)) {
-               if (idx != reg1->idx)
-                       snbep_pcu_alter_er(event, idx, true);
-               reg1->alloc = 1;
-       }
-       return NULL;
-}
-
-static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
-
-       if (uncore_box_is_fake(box) || !reg1->alloc)
-               return;
-
-       atomic_sub(1 << (reg1->idx * 8), &er->ref);
-       reg1->alloc = 0;
-}
-
-static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
-
-       if (ev_sel >= 0xb && ev_sel <= 0xe) {
-               reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
-               reg1->idx = ev_sel - 0xb;
-               reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8));
-       }
-       return 0;
-}
-
-static struct intel_uncore_ops snbep_uncore_pcu_ops = {
-       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-       .hw_config              = snbep_pcu_hw_config,
-       .get_constraint         = snbep_pcu_get_constraint,
-       .put_constraint         = snbep_pcu_put_constraint,
-};
-
-static struct intel_uncore_type snbep_uncore_pcu = {
-       .name                   = "pcu",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCU_MSR_PMON_CTR0,
-       .event_ctl              = SNBEP_PCU_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCU_MSR_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &snbep_uncore_pcu_ops,
-       .format_group           = &snbep_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *snbep_msr_uncores[] = {
-       &snbep_uncore_ubox,
-       &snbep_uncore_cbox,
-       &snbep_uncore_pcu,
-       NULL,
-};
-
-void snbep_uncore_cpu_init(void)
-{
-       if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
-               snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-       uncore_msr_uncores = snbep_msr_uncores;
-}
-
-enum {
-       SNBEP_PCI_QPI_PORT0_FILTER,
-       SNBEP_PCI_QPI_PORT1_FILTER,
-       HSWEP_PCI_PCU_3,
-};
-
-static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
-       if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) {
-               reg1->idx = 0;
-               reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0;
-               reg1->config = event->attr.config1;
-               reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0;
-               reg2->config = event->attr.config2;
-       }
-       return 0;
-}
-
-static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
-       if (reg1->idx != EXTRA_REG_NONE) {
-               int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
-               struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx];
-               if (filter_pdev) {
-                       pci_write_config_dword(filter_pdev, reg1->reg,
-                                               (u32)reg1->config);
-                       pci_write_config_dword(filter_pdev, reg1->reg + 4,
-                                               (u32)(reg1->config >> 32));
-                       pci_write_config_dword(filter_pdev, reg2->reg,
-                                               (u32)reg2->config);
-                       pci_write_config_dword(filter_pdev, reg2->reg + 4,
-                                               (u32)(reg2->config >> 32));
-               }
-       }
-
-       pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops snbep_uncore_qpi_ops = {
-       SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
-       .enable_event           = snbep_qpi_enable_event,
-       .hw_config              = snbep_qpi_hw_config,
-       .get_constraint         = uncore_get_constraint,
-       .put_constraint         = uncore_put_constraint,
-};
-
-#define SNBEP_UNCORE_PCI_COMMON_INIT()                         \
-       .perf_ctr       = SNBEP_PCI_PMON_CTR0,                  \
-       .event_ctl      = SNBEP_PCI_PMON_CTL0,                  \
-       .event_mask     = SNBEP_PMON_RAW_EVENT_MASK,            \
-       .box_ctl        = SNBEP_PCI_PMON_BOX_CTL,               \
-       .ops            = &snbep_uncore_pci_ops,                \
-       .format_group   = &snbep_uncore_format_group
-
-static struct intel_uncore_type snbep_uncore_ha = {
-       .name           = "ha",
-       .num_counters   = 4,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 48,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type snbep_uncore_imc = {
-       .name           = "imc",
-       .num_counters   = 4,
-       .num_boxes      = 4,
-       .perf_ctr_bits  = 48,
-       .fixed_ctr_bits = 48,
-       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
-       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
-       .event_descs    = snbep_uncore_imc_events,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type snbep_uncore_qpi = {
-       .name                   = "qpi",
-       .num_counters           = 4,
-       .num_boxes              = 2,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
-       .event_ctl              = SNBEP_PCI_PMON_CTL0,
-       .event_mask             = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &snbep_uncore_qpi_ops,
-       .event_descs            = snbep_uncore_qpi_events,
-       .format_group           = &snbep_uncore_qpi_format_group,
-};
-
-
-static struct intel_uncore_type snbep_uncore_r2pcie = {
-       .name           = "r2pcie",
-       .num_counters   = 4,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 44,
-       .constraints    = snbep_uncore_r2pcie_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type snbep_uncore_r3qpi = {
-       .name           = "r3qpi",
-       .num_counters   = 3,
-       .num_boxes      = 2,
-       .perf_ctr_bits  = 44,
-       .constraints    = snbep_uncore_r3qpi_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
-       SNBEP_PCI_UNCORE_HA,
-       SNBEP_PCI_UNCORE_IMC,
-       SNBEP_PCI_UNCORE_QPI,
-       SNBEP_PCI_UNCORE_R2PCIE,
-       SNBEP_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *snbep_pci_uncores[] = {
-       [SNBEP_PCI_UNCORE_HA]           = &snbep_uncore_ha,
-       [SNBEP_PCI_UNCORE_IMC]          = &snbep_uncore_imc,
-       [SNBEP_PCI_UNCORE_QPI]          = &snbep_uncore_qpi,
-       [SNBEP_PCI_UNCORE_R2PCIE]       = &snbep_uncore_r2pcie,
-       [SNBEP_PCI_UNCORE_R3QPI]        = &snbep_uncore_r3qpi,
-       NULL,
-};
-
-static const struct pci_device_id snbep_uncore_pci_ids[] = {
-       { /* Home Agent */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
-       },
-       { /* MC Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0),
-       },
-       { /* MC Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1),
-       },
-       { /* MC Channel 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2),
-       },
-       { /* MC Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3),
-       },
-       { /* QPI Port 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0),
-       },
-       { /* QPI Port 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1),
-       },
-       { /* R2PCIe */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0),
-       },
-       { /* R3QPI Link 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0),
-       },
-       { /* R3QPI Link 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
-       },
-       { /* QPI Port 0 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  SNBEP_PCI_QPI_PORT0_FILTER),
-       },
-       { /* QPI Port 0 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  SNBEP_PCI_QPI_PORT1_FILTER),
-       },
-       { /* end: all zeroes */ }
-};
-
-static struct pci_driver snbep_uncore_pci_driver = {
-       .name           = "snbep_uncore",
-       .id_table       = snbep_uncore_pci_ids,
-};
-
-/*
- * build pci bus to socket mapping
- */
-static int snbep_pci2phy_map_init(int devid)
-{
-       struct pci_dev *ubox_dev = NULL;
-       int i, bus, nodeid, segment;
-       struct pci2phy_map *map;
-       int err = 0;
-       u32 config = 0;
-
-       while (1) {
-               /* find the UBOX device */
-               ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev);
-               if (!ubox_dev)
-                       break;
-               bus = ubox_dev->bus->number;
-               /* get the Node ID of the local register */
-               err = pci_read_config_dword(ubox_dev, 0x40, &config);
-               if (err)
-                       break;
-               nodeid = config;
-               /* get the Node ID mapping */
-               err = pci_read_config_dword(ubox_dev, 0x54, &config);
-               if (err)
-                       break;
-
-               segment = pci_domain_nr(ubox_dev->bus);
-               raw_spin_lock(&pci2phy_map_lock);
-               map = __find_pci2phy_map(segment);
-               if (!map) {
-                       raw_spin_unlock(&pci2phy_map_lock);
-                       err = -ENOMEM;
-                       break;
-               }
-
-               /*
-                * every three bits in the Node ID mapping register maps
-                * to a particular node.
-                */
-               for (i = 0; i < 8; i++) {
-                       if (nodeid == ((config >> (3 * i)) & 0x7)) {
-                               map->pbus_to_physid[bus] = i;
-                               break;
-                       }
-               }
-               raw_spin_unlock(&pci2phy_map_lock);
-       }
-
-       if (!err) {
-               /*
-                * For PCI bus with no UBOX device, find the next bus
-                * that has UBOX device and use its mapping.
-                */
-               raw_spin_lock(&pci2phy_map_lock);
-               list_for_each_entry(map, &pci2phy_map_head, list) {
-                       i = -1;
-                       for (bus = 255; bus >= 0; bus--) {
-                               if (map->pbus_to_physid[bus] >= 0)
-                                       i = map->pbus_to_physid[bus];
-                               else
-                                       map->pbus_to_physid[bus] = i;
-                       }
-               }
-               raw_spin_unlock(&pci2phy_map_lock);
-       }
-
-       pci_dev_put(ubox_dev);
-
-       return err ? pcibios_err_to_errno(err) : 0;
-}
-
-int snbep_uncore_pci_init(void)
-{
-       int ret = snbep_pci2phy_map_init(0x3ce0);
-       if (ret)
-               return ret;
-       uncore_pci_uncores = snbep_pci_uncores;
-       uncore_pci_driver = &snbep_uncore_pci_driver;
-       return 0;
-}
-/* end of Sandy Bridge-EP uncore support */
-
-/* IvyTown uncore support */
-static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box)
-{
-       unsigned msr = uncore_msr_box_ctl(box);
-       if (msr)
-               wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT);
-}
-
-static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box)
-{
-       struct pci_dev *pdev = box->pci_dev;
-
-       pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
-}
-
-#define IVBEP_UNCORE_MSR_OPS_COMMON_INIT()                     \
-       .init_box       = ivbep_uncore_msr_init_box,            \
-       .disable_box    = snbep_uncore_msr_disable_box,         \
-       .enable_box     = snbep_uncore_msr_enable_box,          \
-       .disable_event  = snbep_uncore_msr_disable_event,       \
-       .enable_event   = snbep_uncore_msr_enable_event,        \
-       .read_counter   = uncore_msr_read_counter
-
-static struct intel_uncore_ops ivbep_uncore_msr_ops = {
-       IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-};
-
-static struct intel_uncore_ops ivbep_uncore_pci_ops = {
-       .init_box       = ivbep_uncore_pci_init_box,
-       .disable_box    = snbep_uncore_pci_disable_box,
-       .enable_box     = snbep_uncore_pci_enable_box,
-       .disable_event  = snbep_uncore_pci_disable_event,
-       .enable_event   = snbep_uncore_pci_enable_event,
-       .read_counter   = snbep_uncore_pci_read_counter,
-};
-
-#define IVBEP_UNCORE_PCI_COMMON_INIT()                         \
-       .perf_ctr       = SNBEP_PCI_PMON_CTR0,                  \
-       .event_ctl      = SNBEP_PCI_PMON_CTL0,                  \
-       .event_mask     = IVBEP_PMON_RAW_EVENT_MASK,            \
-       .box_ctl        = SNBEP_PCI_PMON_BOX_CTL,               \
-       .ops            = &ivbep_uncore_pci_ops,                        \
-       .format_group   = &ivbep_uncore_format_group
-
-static struct attribute *ivbep_uncore_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       NULL,
-};
-
-static struct attribute *ivbep_uncore_ubox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh5.attr,
-       NULL,
-};
-
-static struct attribute *ivbep_uncore_cbox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_filter_tid.attr,
-       &format_attr_filter_link.attr,
-       &format_attr_filter_state2.attr,
-       &format_attr_filter_nid2.attr,
-       &format_attr_filter_opc2.attr,
-       &format_attr_filter_nc.attr,
-       &format_attr_filter_c6.attr,
-       &format_attr_filter_isoc.attr,
-       NULL,
-};
-
-static struct attribute *ivbep_uncore_pcu_formats_attr[] = {
-       &format_attr_event_ext.attr,
-       &format_attr_occ_sel.attr,
-       &format_attr_edge.attr,
-       &format_attr_thresh5.attr,
-       &format_attr_occ_invert.attr,
-       &format_attr_occ_edge.attr,
-       &format_attr_filter_band0.attr,
-       &format_attr_filter_band1.attr,
-       &format_attr_filter_band2.attr,
-       &format_attr_filter_band3.attr,
-       NULL,
-};
-
-static struct attribute *ivbep_uncore_qpi_formats_attr[] = {
-       &format_attr_event_ext.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_match_rds.attr,
-       &format_attr_match_rnid30.attr,
-       &format_attr_match_rnid4.attr,
-       &format_attr_match_dnid.attr,
-       &format_attr_match_mc.attr,
-       &format_attr_match_opc.attr,
-       &format_attr_match_vnw.attr,
-       &format_attr_match0.attr,
-       &format_attr_match1.attr,
-       &format_attr_mask_rds.attr,
-       &format_attr_mask_rnid30.attr,
-       &format_attr_mask_rnid4.attr,
-       &format_attr_mask_dnid.attr,
-       &format_attr_mask_mc.attr,
-       &format_attr_mask_opc.attr,
-       &format_attr_mask_vnw.attr,
-       &format_attr_mask0.attr,
-       &format_attr_mask1.attr,
-       NULL,
-};
-
-static struct attribute_group ivbep_uncore_format_group = {
-       .name = "format",
-       .attrs = ivbep_uncore_formats_attr,
-};
-
-static struct attribute_group ivbep_uncore_ubox_format_group = {
-       .name = "format",
-       .attrs = ivbep_uncore_ubox_formats_attr,
-};
-
-static struct attribute_group ivbep_uncore_cbox_format_group = {
-       .name = "format",
-       .attrs = ivbep_uncore_cbox_formats_attr,
-};
-
-static struct attribute_group ivbep_uncore_pcu_format_group = {
-       .name = "format",
-       .attrs = ivbep_uncore_pcu_formats_attr,
-};
-
-static struct attribute_group ivbep_uncore_qpi_format_group = {
-       .name = "format",
-       .attrs = ivbep_uncore_qpi_formats_attr,
-};
-
-static struct intel_uncore_type ivbep_uncore_ubox = {
-       .name           = "ubox",
-       .num_counters   = 2,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 44,
-       .fixed_ctr_bits = 48,
-       .perf_ctr       = SNBEP_U_MSR_PMON_CTR0,
-       .event_ctl      = SNBEP_U_MSR_PMON_CTL0,
-       .event_mask     = IVBEP_U_MSR_PMON_RAW_EVENT_MASK,
-       .fixed_ctr      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
-       .fixed_ctl      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
-       .ops            = &ivbep_uncore_msr_ops,
-       .format_group   = &ivbep_uncore_ubox_format_group,
-};
-
-static struct extra_reg ivbep_uncore_cbox_extra_regs[] = {
-       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
-                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
-       EVENT_EXTRA_END
-};
-
-static u64 ivbep_cbox_filter_mask(int fields)
-{
-       u64 mask = 0;
-
-       if (fields & 0x1)
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_TID;
-       if (fields & 0x2)
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK;
-       if (fields & 0x4)
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
-       if (fields & 0x8)
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NID;
-       if (fields & 0x10) {
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NC;
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_C6;
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
-       }
-
-       return mask;
-}
-
-static struct event_constraint *
-ivbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       return __snbep_cbox_get_constraint(box, event, ivbep_cbox_filter_mask);
-}
-
-static int ivbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct extra_reg *er;
-       int idx = 0;
-
-       for (er = ivbep_uncore_cbox_extra_regs; er->msr; er++) {
-               if (er->event != (event->hw.config & er->config_mask))
-                       continue;
-               idx |= er->idx;
-       }
-
-       if (idx) {
-               reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
-                       SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
-               reg1->config = event->attr.config1 & ivbep_cbox_filter_mask(idx);
-               reg1->idx = idx;
-       }
-       return 0;
-}
-
-static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
-       if (reg1->idx != EXTRA_REG_NONE) {
-               u64 filter = uncore_shared_reg_config(box, 0);
-               wrmsrl(reg1->reg, filter & 0xffffffff);
-               wrmsrl(reg1->reg + 6, filter >> 32);
-       }
-
-       wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops ivbep_uncore_cbox_ops = {
-       .init_box               = ivbep_uncore_msr_init_box,
-       .disable_box            = snbep_uncore_msr_disable_box,
-       .enable_box             = snbep_uncore_msr_enable_box,
-       .disable_event          = snbep_uncore_msr_disable_event,
-       .enable_event           = ivbep_cbox_enable_event,
-       .read_counter           = uncore_msr_read_counter,
-       .hw_config              = ivbep_cbox_hw_config,
-       .get_constraint         = ivbep_cbox_get_constraint,
-       .put_constraint         = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type ivbep_uncore_cbox = {
-       .name                   = "cbox",
-       .num_counters           = 4,
-       .num_boxes              = 15,
-       .perf_ctr_bits          = 44,
-       .event_ctl              = SNBEP_C0_MSR_PMON_CTL0,
-       .perf_ctr               = SNBEP_C0_MSR_PMON_CTR0,
-       .event_mask             = IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_C0_MSR_PMON_BOX_CTL,
-       .msr_offset             = SNBEP_CBO_MSR_OFFSET,
-       .num_shared_regs        = 1,
-       .constraints            = snbep_uncore_cbox_constraints,
-       .ops                    = &ivbep_uncore_cbox_ops,
-       .format_group           = &ivbep_uncore_cbox_format_group,
-};
-
-static struct intel_uncore_ops ivbep_uncore_pcu_ops = {
-       IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-       .hw_config              = snbep_pcu_hw_config,
-       .get_constraint         = snbep_pcu_get_constraint,
-       .put_constraint         = snbep_pcu_put_constraint,
-};
-
-static struct intel_uncore_type ivbep_uncore_pcu = {
-       .name                   = "pcu",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCU_MSR_PMON_CTR0,
-       .event_ctl              = SNBEP_PCU_MSR_PMON_CTL0,
-       .event_mask             = IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCU_MSR_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &ivbep_uncore_pcu_ops,
-       .format_group           = &ivbep_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *ivbep_msr_uncores[] = {
-       &ivbep_uncore_ubox,
-       &ivbep_uncore_cbox,
-       &ivbep_uncore_pcu,
-       NULL,
-};
-
-void ivbep_uncore_cpu_init(void)
-{
-       if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
-               ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-       uncore_msr_uncores = ivbep_msr_uncores;
-}
-
-static struct intel_uncore_type ivbep_uncore_ha = {
-       .name           = "ha",
-       .num_counters   = 4,
-       .num_boxes      = 2,
-       .perf_ctr_bits  = 48,
-       IVBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type ivbep_uncore_imc = {
-       .name           = "imc",
-       .num_counters   = 4,
-       .num_boxes      = 8,
-       .perf_ctr_bits  = 48,
-       .fixed_ctr_bits = 48,
-       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
-       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
-       .event_descs    = snbep_uncore_imc_events,
-       IVBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-/* registers in IRP boxes are not properly aligned */
-static unsigned ivbep_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4};
-static unsigned ivbep_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0};
-
-static void ivbep_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-
-       pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx],
-                              hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static void ivbep_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-
-       pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], hwc->config);
-}
-
-static u64 ivbep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-       u64 count = 0;
-
-       pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
-       pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
-
-       return count;
-}
-
-static struct intel_uncore_ops ivbep_uncore_irp_ops = {
-       .init_box       = ivbep_uncore_pci_init_box,
-       .disable_box    = snbep_uncore_pci_disable_box,
-       .enable_box     = snbep_uncore_pci_enable_box,
-       .disable_event  = ivbep_uncore_irp_disable_event,
-       .enable_event   = ivbep_uncore_irp_enable_event,
-       .read_counter   = ivbep_uncore_irp_read_counter,
-};
-
-static struct intel_uncore_type ivbep_uncore_irp = {
-       .name                   = "irp",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .event_mask             = IVBEP_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .ops                    = &ivbep_uncore_irp_ops,
-       .format_group           = &ivbep_uncore_format_group,
-};
-
-static struct intel_uncore_ops ivbep_uncore_qpi_ops = {
-       .init_box       = ivbep_uncore_pci_init_box,
-       .disable_box    = snbep_uncore_pci_disable_box,
-       .enable_box     = snbep_uncore_pci_enable_box,
-       .disable_event  = snbep_uncore_pci_disable_event,
-       .enable_event   = snbep_qpi_enable_event,
-       .read_counter   = snbep_uncore_pci_read_counter,
-       .hw_config      = snbep_qpi_hw_config,
-       .get_constraint = uncore_get_constraint,
-       .put_constraint = uncore_put_constraint,
-};
-
-static struct intel_uncore_type ivbep_uncore_qpi = {
-       .name                   = "qpi",
-       .num_counters           = 4,
-       .num_boxes              = 3,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
-       .event_ctl              = SNBEP_PCI_PMON_CTL0,
-       .event_mask             = IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &ivbep_uncore_qpi_ops,
-       .format_group           = &ivbep_uncore_qpi_format_group,
-};
-
-static struct intel_uncore_type ivbep_uncore_r2pcie = {
-       .name           = "r2pcie",
-       .num_counters   = 4,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 44,
-       .constraints    = snbep_uncore_r2pcie_constraints,
-       IVBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type ivbep_uncore_r3qpi = {
-       .name           = "r3qpi",
-       .num_counters   = 3,
-       .num_boxes      = 2,
-       .perf_ctr_bits  = 44,
-       .constraints    = snbep_uncore_r3qpi_constraints,
-       IVBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
-       IVBEP_PCI_UNCORE_HA,
-       IVBEP_PCI_UNCORE_IMC,
-       IVBEP_PCI_UNCORE_IRP,
-       IVBEP_PCI_UNCORE_QPI,
-       IVBEP_PCI_UNCORE_R2PCIE,
-       IVBEP_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *ivbep_pci_uncores[] = {
-       [IVBEP_PCI_UNCORE_HA]   = &ivbep_uncore_ha,
-       [IVBEP_PCI_UNCORE_IMC]  = &ivbep_uncore_imc,
-       [IVBEP_PCI_UNCORE_IRP]  = &ivbep_uncore_irp,
-       [IVBEP_PCI_UNCORE_QPI]  = &ivbep_uncore_qpi,
-       [IVBEP_PCI_UNCORE_R2PCIE]       = &ivbep_uncore_r2pcie,
-       [IVBEP_PCI_UNCORE_R3QPI]        = &ivbep_uncore_r3qpi,
-       NULL,
-};
-
-static const struct pci_device_id ivbep_uncore_pci_ids[] = {
-       { /* Home Agent 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 0),
-       },
-       { /* Home Agent 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 1),
-       },
-       { /* MC0 Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 0),
-       },
-       { /* MC0 Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 1),
-       },
-       { /* MC0 Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 2),
-       },
-       { /* MC0 Channel 4 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 3),
-       },
-       { /* MC1 Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 4),
-       },
-       { /* MC1 Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 5),
-       },
-       { /* MC1 Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 6),
-       },
-       { /* MC1 Channel 4 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 7),
-       },
-       { /* IRP */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IRP, 0),
-       },
-       { /* QPI0 Port 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 0),
-       },
-       { /* QPI0 Port 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 1),
-       },
-       { /* QPI1 Port 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 2),
-       },
-       { /* R2PCIe */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R2PCIE, 0),
-       },
-       { /* R3QPI0 Link 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 0),
-       },
-       { /* R3QPI0 Link 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 1),
-       },
-       { /* R3QPI1 Link 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 2),
-       },
-       { /* QPI Port 0 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  SNBEP_PCI_QPI_PORT0_FILTER),
-       },
-       { /* QPI Port 0 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  SNBEP_PCI_QPI_PORT1_FILTER),
-       },
-       { /* end: all zeroes */ }
-};
-
-static struct pci_driver ivbep_uncore_pci_driver = {
-       .name           = "ivbep_uncore",
-       .id_table       = ivbep_uncore_pci_ids,
-};
-
-int ivbep_uncore_pci_init(void)
-{
-       int ret = snbep_pci2phy_map_init(0x0e1e);
-       if (ret)
-               return ret;
-       uncore_pci_uncores = ivbep_pci_uncores;
-       uncore_pci_driver = &ivbep_uncore_pci_driver;
-       return 0;
-}
-/* end of IvyTown uncore support */
-
-/* KNL uncore support */
-static struct attribute *knl_uncore_ubox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh5.attr,
-       NULL,
-};
-
-static struct attribute_group knl_uncore_ubox_format_group = {
-       .name = "format",
-       .attrs = knl_uncore_ubox_formats_attr,
-};
-
-static struct intel_uncore_type knl_uncore_ubox = {
-       .name                   = "ubox",
-       .num_counters           = 2,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = HSWEP_U_MSR_PMON_CTR0,
-       .event_ctl              = HSWEP_U_MSR_PMON_CTL0,
-       .event_mask             = KNL_U_MSR_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
-       .fixed_ctl              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
-       .ops                    = &snbep_uncore_msr_ops,
-       .format_group           = &knl_uncore_ubox_format_group,
-};
-
-static struct attribute *knl_uncore_cha_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_qor.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_filter_tid4.attr,
-       &format_attr_filter_link3.attr,
-       &format_attr_filter_state4.attr,
-       &format_attr_filter_local.attr,
-       &format_attr_filter_all_op.attr,
-       &format_attr_filter_nnm.attr,
-       &format_attr_filter_opc3.attr,
-       &format_attr_filter_nc.attr,
-       &format_attr_filter_isoc.attr,
-       NULL,
-};
-
-static struct attribute_group knl_uncore_cha_format_group = {
-       .name = "format",
-       .attrs = knl_uncore_cha_formats_attr,
-};
-
-static struct event_constraint knl_uncore_cha_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x1f, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg knl_uncore_cha_extra_regs[] = {
-       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
-                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x3d, 0xff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x4),
-       EVENT_EXTRA_END
-};
-
-static u64 knl_cha_filter_mask(int fields)
-{
-       u64 mask = 0;
-
-       if (fields & 0x1)
-               mask |= KNL_CHA_MSR_PMON_BOX_FILTER_TID;
-       if (fields & 0x2)
-               mask |= KNL_CHA_MSR_PMON_BOX_FILTER_STATE;
-       if (fields & 0x4)
-               mask |= KNL_CHA_MSR_PMON_BOX_FILTER_OP;
-       return mask;
-}
-
-static struct event_constraint *
-knl_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       return __snbep_cbox_get_constraint(box, event, knl_cha_filter_mask);
-}
-
-static int knl_cha_hw_config(struct intel_uncore_box *box,
-                            struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct extra_reg *er;
-       int idx = 0;
-
-       for (er = knl_uncore_cha_extra_regs; er->msr; er++) {
-               if (er->event != (event->hw.config & er->config_mask))
-                       continue;
-               idx |= er->idx;
-       }
-
-       if (idx) {
-               reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
-                           KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx;
-               reg1->config = event->attr.config1 & knl_cha_filter_mask(idx);
-               reg1->idx = idx;
-       }
-       return 0;
-}
-
-static void hswep_cbox_enable_event(struct intel_uncore_box *box,
-                                   struct perf_event *event);
-
-static struct intel_uncore_ops knl_uncore_cha_ops = {
-       .init_box               = snbep_uncore_msr_init_box,
-       .disable_box            = snbep_uncore_msr_disable_box,
-       .enable_box             = snbep_uncore_msr_enable_box,
-       .disable_event          = snbep_uncore_msr_disable_event,
-       .enable_event           = hswep_cbox_enable_event,
-       .read_counter           = uncore_msr_read_counter,
-       .hw_config              = knl_cha_hw_config,
-       .get_constraint         = knl_cha_get_constraint,
-       .put_constraint         = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type knl_uncore_cha = {
-       .name                   = "cha",
-       .num_counters           = 4,
-       .num_boxes              = 38,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
-       .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
-       .event_mask             = KNL_CHA_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_C0_MSR_PMON_BOX_CTL,
-       .msr_offset             = KNL_CHA_MSR_OFFSET,
-       .num_shared_regs        = 1,
-       .constraints            = knl_uncore_cha_constraints,
-       .ops                    = &knl_uncore_cha_ops,
-       .format_group           = &knl_uncore_cha_format_group,
-};
-
-static struct attribute *knl_uncore_pcu_formats_attr[] = {
-       &format_attr_event2.attr,
-       &format_attr_use_occ_ctr.attr,
-       &format_attr_occ_sel.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh6.attr,
-       &format_attr_occ_invert.attr,
-       &format_attr_occ_edge_det.attr,
-       NULL,
-};
-
-static struct attribute_group knl_uncore_pcu_format_group = {
-       .name = "format",
-       .attrs = knl_uncore_pcu_formats_attr,
-};
-
-static struct intel_uncore_type knl_uncore_pcu = {
-       .name                   = "pcu",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = HSWEP_PCU_MSR_PMON_CTR0,
-       .event_ctl              = HSWEP_PCU_MSR_PMON_CTL0,
-       .event_mask             = KNL_PCU_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_PCU_MSR_PMON_BOX_CTL,
-       .ops                    = &snbep_uncore_msr_ops,
-       .format_group           = &knl_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *knl_msr_uncores[] = {
-       &knl_uncore_ubox,
-       &knl_uncore_cha,
-       &knl_uncore_pcu,
-       NULL,
-};
-
-void knl_uncore_cpu_init(void)
-{
-       uncore_msr_uncores = knl_msr_uncores;
-}
-
-static void knl_uncore_imc_enable_box(struct intel_uncore_box *box)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       int box_ctl = uncore_pci_box_ctl(box);
-
-       pci_write_config_dword(pdev, box_ctl, 0);
-}
-
-static void knl_uncore_imc_enable_event(struct intel_uncore_box *box,
-                                       struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-
-       if ((event->attr.config & SNBEP_PMON_CTL_EV_SEL_MASK)
-                                                       == UNCORE_FIXED_EVENT)
-               pci_write_config_dword(pdev, hwc->config_base,
-                                      hwc->config | KNL_PMON_FIXED_CTL_EN);
-       else
-               pci_write_config_dword(pdev, hwc->config_base,
-                                      hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops knl_uncore_imc_ops = {
-       .init_box       = snbep_uncore_pci_init_box,
-       .disable_box    = snbep_uncore_pci_disable_box,
-       .enable_box     = knl_uncore_imc_enable_box,
-       .read_counter   = snbep_uncore_pci_read_counter,
-       .enable_event   = knl_uncore_imc_enable_event,
-       .disable_event  = snbep_uncore_pci_disable_event,
-};
-
-static struct intel_uncore_type knl_uncore_imc_uclk = {
-       .name                   = "imc_uclk",
-       .num_counters           = 4,
-       .num_boxes              = 2,
-       .perf_ctr_bits          = 48,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = KNL_UCLK_MSR_PMON_CTR0_LOW,
-       .event_ctl              = KNL_UCLK_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
-       .fixed_ctl              = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
-       .box_ctl                = KNL_UCLK_MSR_PMON_BOX_CTL,
-       .ops                    = &knl_uncore_imc_ops,
-       .format_group           = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type knl_uncore_imc_dclk = {
-       .name                   = "imc",
-       .num_counters           = 4,
-       .num_boxes              = 6,
-       .perf_ctr_bits          = 48,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = KNL_MC0_CH0_MSR_PMON_CTR0_LOW,
-       .event_ctl              = KNL_MC0_CH0_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = KNL_MC0_CH0_MSR_PMON_FIXED_LOW,
-       .fixed_ctl              = KNL_MC0_CH0_MSR_PMON_FIXED_CTL,
-       .box_ctl                = KNL_MC0_CH0_MSR_PMON_BOX_CTL,
-       .ops                    = &knl_uncore_imc_ops,
-       .format_group           = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type knl_uncore_edc_uclk = {
-       .name                   = "edc_uclk",
-       .num_counters           = 4,
-       .num_boxes              = 8,
-       .perf_ctr_bits          = 48,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = KNL_UCLK_MSR_PMON_CTR0_LOW,
-       .event_ctl              = KNL_UCLK_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
-       .fixed_ctl              = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
-       .box_ctl                = KNL_UCLK_MSR_PMON_BOX_CTL,
-       .ops                    = &knl_uncore_imc_ops,
-       .format_group           = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type knl_uncore_edc_eclk = {
-       .name                   = "edc_eclk",
-       .num_counters           = 4,
-       .num_boxes              = 8,
-       .perf_ctr_bits          = 48,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW,
-       .event_ctl              = KNL_EDC0_ECLK_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW,
-       .fixed_ctl              = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL,
-       .box_ctl                = KNL_EDC0_ECLK_MSR_PMON_BOX_CTL,
-       .ops                    = &knl_uncore_imc_ops,
-       .format_group           = &snbep_uncore_format_group,
-};
-
-static struct event_constraint knl_uncore_m2pcie_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type knl_uncore_m2pcie = {
-       .name           = "m2pcie",
-       .num_counters   = 4,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 48,
-       .constraints    = knl_uncore_m2pcie_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct attribute *knl_uncore_irp_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_qor.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       NULL,
-};
-
-static struct attribute_group knl_uncore_irp_format_group = {
-       .name = "format",
-       .attrs = knl_uncore_irp_formats_attr,
-};
-
-static struct intel_uncore_type knl_uncore_irp = {
-       .name                   = "irp",
-       .num_counters           = 2,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
-       .event_ctl              = SNBEP_PCI_PMON_CTL0,
-       .event_mask             = KNL_IRP_PCI_PMON_RAW_EVENT_MASK,
-       .box_ctl                = KNL_IRP_PCI_PMON_BOX_CTL,
-       .ops                    = &snbep_uncore_pci_ops,
-       .format_group           = &knl_uncore_irp_format_group,
-};
-
-enum {
-       KNL_PCI_UNCORE_MC_UCLK,
-       KNL_PCI_UNCORE_MC_DCLK,
-       KNL_PCI_UNCORE_EDC_UCLK,
-       KNL_PCI_UNCORE_EDC_ECLK,
-       KNL_PCI_UNCORE_M2PCIE,
-       KNL_PCI_UNCORE_IRP,
-};
-
-static struct intel_uncore_type *knl_pci_uncores[] = {
-       [KNL_PCI_UNCORE_MC_UCLK]        = &knl_uncore_imc_uclk,
-       [KNL_PCI_UNCORE_MC_DCLK]        = &knl_uncore_imc_dclk,
-       [KNL_PCI_UNCORE_EDC_UCLK]       = &knl_uncore_edc_uclk,
-       [KNL_PCI_UNCORE_EDC_ECLK]       = &knl_uncore_edc_eclk,
-       [KNL_PCI_UNCORE_M2PCIE]         = &knl_uncore_m2pcie,
-       [KNL_PCI_UNCORE_IRP]            = &knl_uncore_irp,
-       NULL,
-};
-
-/*
- * KNL uses a common PCI device ID for multiple instances of an Uncore PMU
- * device type. prior to KNL, each instance of a PMU device type had a unique
- * device ID.
- *
- *     PCI Device ID   Uncore PMU Devices
- *     ----------------------------------
- *     0x7841          MC0 UClk, MC1 UClk
- *     0x7843          MC0 DClk CH 0, MC0 DClk CH 1, MC0 DClk CH 2,
- *                     MC1 DClk CH 0, MC1 DClk CH 1, MC1 DClk CH 2
- *     0x7833          EDC0 UClk, EDC1 UClk, EDC2 UClk, EDC3 UClk,
- *                     EDC4 UClk, EDC5 UClk, EDC6 UClk, EDC7 UClk
- *     0x7835          EDC0 EClk, EDC1 EClk, EDC2 EClk, EDC3 EClk,
- *                     EDC4 EClk, EDC5 EClk, EDC6 EClk, EDC7 EClk
- *     0x7817          M2PCIe
- *     0x7814          IRP
-*/
-
-static const struct pci_device_id knl_uncore_pci_ids[] = {
-       { /* MC UClk */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
-       },
-       { /* MC DClk Channel */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
-       },
-       { /* EDC UClk */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
-       },
-       { /* EDC EClk */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
-       },
-       { /* M2PCIe */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_M2PCIE, 0),
-       },
-       { /* IRP */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7814),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_IRP, 0),
-       },
-       { /* end: all zeroes */ }
-};
-
-static struct pci_driver knl_uncore_pci_driver = {
-       .name           = "knl_uncore",
-       .id_table       = knl_uncore_pci_ids,
-};
-
-int knl_uncore_pci_init(void)
-{
-       int ret;
-
-       /* All KNL PCI based PMON units are on the same PCI bus except IRP */
-       ret = snb_pci2phy_map_init(0x7814); /* IRP */
-       if (ret)
-               return ret;
-       ret = snb_pci2phy_map_init(0x7817); /* M2PCIe */
-       if (ret)
-               return ret;
-       uncore_pci_uncores = knl_pci_uncores;
-       uncore_pci_driver = &knl_uncore_pci_driver;
-       return 0;
-}
-
-/* end of KNL uncore support */
-
-/* Haswell-EP uncore support */
-static struct attribute *hswep_uncore_ubox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh5.attr,
-       &format_attr_filter_tid2.attr,
-       &format_attr_filter_cid.attr,
-       NULL,
-};
-
-static struct attribute_group hswep_uncore_ubox_format_group = {
-       .name = "format",
-       .attrs = hswep_uncore_ubox_formats_attr,
-};
-
-static int hswep_ubox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       reg1->reg = HSWEP_U_MSR_PMON_FILTER;
-       reg1->config = event->attr.config1 & HSWEP_U_MSR_PMON_BOX_FILTER_MASK;
-       reg1->idx = 0;
-       return 0;
-}
-
-static struct intel_uncore_ops hswep_uncore_ubox_ops = {
-       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-       .hw_config              = hswep_ubox_hw_config,
-       .get_constraint         = uncore_get_constraint,
-       .put_constraint         = uncore_put_constraint,
-};
-
-static struct intel_uncore_type hswep_uncore_ubox = {
-       .name                   = "ubox",
-       .num_counters           = 2,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 44,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = HSWEP_U_MSR_PMON_CTR0,
-       .event_ctl              = HSWEP_U_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
-       .fixed_ctl              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &hswep_uncore_ubox_ops,
-       .format_group           = &hswep_uncore_ubox_format_group,
-};
-
-static struct attribute *hswep_uncore_cbox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_filter_tid3.attr,
-       &format_attr_filter_link2.attr,
-       &format_attr_filter_state3.attr,
-       &format_attr_filter_nid2.attr,
-       &format_attr_filter_opc2.attr,
-       &format_attr_filter_nc.attr,
-       &format_attr_filter_c6.attr,
-       &format_attr_filter_isoc.attr,
-       NULL,
-};
-
-static struct attribute_group hswep_uncore_cbox_format_group = {
-       .name = "format",
-       .attrs = hswep_uncore_cbox_formats_attr,
-};
-
-static struct event_constraint hswep_uncore_cbox_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x09, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg hswep_uncore_cbox_extra_regs[] = {
-       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
-                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4028, 0x40ff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4032, 0x40ff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4029, 0x40ff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4033, 0x40ff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x402A, 0x40ff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x12),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
-       EVENT_EXTRA_END
-};
-
-static u64 hswep_cbox_filter_mask(int fields)
-{
-       u64 mask = 0;
-       if (fields & 0x1)
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_TID;
-       if (fields & 0x2)
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK;
-       if (fields & 0x4)
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE;
-       if (fields & 0x8)
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NID;
-       if (fields & 0x10) {
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC;
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NC;
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_C6;
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
-       }
-       return mask;
-}
-
-static struct event_constraint *
-hswep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       return __snbep_cbox_get_constraint(box, event, hswep_cbox_filter_mask);
-}
-
-static int hswep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct extra_reg *er;
-       int idx = 0;
-
-       for (er = hswep_uncore_cbox_extra_regs; er->msr; er++) {
-               if (er->event != (event->hw.config & er->config_mask))
-                       continue;
-               idx |= er->idx;
-       }
-
-       if (idx) {
-               reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
-                           HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
-               reg1->config = event->attr.config1 & hswep_cbox_filter_mask(idx);
-               reg1->idx = idx;
-       }
-       return 0;
-}
-
-static void hswep_cbox_enable_event(struct intel_uncore_box *box,
-                                 struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
-       if (reg1->idx != EXTRA_REG_NONE) {
-               u64 filter = uncore_shared_reg_config(box, 0);
-               wrmsrl(reg1->reg, filter & 0xffffffff);
-               wrmsrl(reg1->reg + 1, filter >> 32);
-       }
-
-       wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops hswep_uncore_cbox_ops = {
-       .init_box               = snbep_uncore_msr_init_box,
-       .disable_box            = snbep_uncore_msr_disable_box,
-       .enable_box             = snbep_uncore_msr_enable_box,
-       .disable_event          = snbep_uncore_msr_disable_event,
-       .enable_event           = hswep_cbox_enable_event,
-       .read_counter           = uncore_msr_read_counter,
-       .hw_config              = hswep_cbox_hw_config,
-       .get_constraint         = hswep_cbox_get_constraint,
-       .put_constraint         = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type hswep_uncore_cbox = {
-       .name                   = "cbox",
-       .num_counters           = 4,
-       .num_boxes              = 18,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
-       .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
-       .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_C0_MSR_PMON_BOX_CTL,
-       .msr_offset             = HSWEP_CBO_MSR_OFFSET,
-       .num_shared_regs        = 1,
-       .constraints            = hswep_uncore_cbox_constraints,
-       .ops                    = &hswep_uncore_cbox_ops,
-       .format_group           = &hswep_uncore_cbox_format_group,
-};
-
-/*
- * Write SBOX Initialization register bit by bit to avoid spurious #GPs
- */
-static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box)
-{
-       unsigned msr = uncore_msr_box_ctl(box);
-
-       if (msr) {
-               u64 init = SNBEP_PMON_BOX_CTL_INT;
-               u64 flags = 0;
-               int i;
-
-               for_each_set_bit(i, (unsigned long *)&init, 64) {
-                       flags |= (1ULL << i);
-                       wrmsrl(msr, flags);
-               }
-       }
-}
-
-static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = {
-       __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-       .init_box               = hswep_uncore_sbox_msr_init_box
-};
-
-static struct attribute *hswep_uncore_sbox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       NULL,
-};
-
-static struct attribute_group hswep_uncore_sbox_format_group = {
-       .name = "format",
-       .attrs = hswep_uncore_sbox_formats_attr,
-};
-
-static struct intel_uncore_type hswep_uncore_sbox = {
-       .name                   = "sbox",
-       .num_counters           = 4,
-       .num_boxes              = 4,
-       .perf_ctr_bits          = 44,
-       .event_ctl              = HSWEP_S0_MSR_PMON_CTL0,
-       .perf_ctr               = HSWEP_S0_MSR_PMON_CTR0,
-       .event_mask             = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_S0_MSR_PMON_BOX_CTL,
-       .msr_offset             = HSWEP_SBOX_MSR_OFFSET,
-       .ops                    = &hswep_uncore_sbox_msr_ops,
-       .format_group           = &hswep_uncore_sbox_format_group,
-};
-
-static int hswep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
-
-       if (ev_sel >= 0xb && ev_sel <= 0xe) {
-               reg1->reg = HSWEP_PCU_MSR_PMON_BOX_FILTER;
-               reg1->idx = ev_sel - 0xb;
-               reg1->config = event->attr.config1 & (0xff << reg1->idx);
-       }
-       return 0;
-}
-
-static struct intel_uncore_ops hswep_uncore_pcu_ops = {
-       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-       .hw_config              = hswep_pcu_hw_config,
-       .get_constraint         = snbep_pcu_get_constraint,
-       .put_constraint         = snbep_pcu_put_constraint,
-};
-
-static struct intel_uncore_type hswep_uncore_pcu = {
-       .name                   = "pcu",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = HSWEP_PCU_MSR_PMON_CTR0,
-       .event_ctl              = HSWEP_PCU_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_PCU_MSR_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &hswep_uncore_pcu_ops,
-       .format_group           = &snbep_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *hswep_msr_uncores[] = {
-       &hswep_uncore_ubox,
-       &hswep_uncore_cbox,
-       &hswep_uncore_sbox,
-       &hswep_uncore_pcu,
-       NULL,
-};
-
-void hswep_uncore_cpu_init(void)
-{
-       if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
-               hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-
-       /* Detect 6-8 core systems with only two SBOXes */
-       if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) {
-               u32 capid4;
-
-               pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3],
-                                     0x94, &capid4);
-               if (((capid4 >> 6) & 0x3) == 0)
-                       hswep_uncore_sbox.num_boxes = 2;
-       }
-
-       uncore_msr_uncores = hswep_msr_uncores;
-}
-
-static struct intel_uncore_type hswep_uncore_ha = {
-       .name           = "ha",
-       .num_counters   = 5,
-       .num_boxes      = 2,
-       .perf_ctr_bits  = 48,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct uncore_event_desc hswep_uncore_imc_events[] = {
-       INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0x00,umask=0x00"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
-       { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_type hswep_uncore_imc = {
-       .name           = "imc",
-       .num_counters   = 5,
-       .num_boxes      = 8,
-       .perf_ctr_bits  = 48,
-       .fixed_ctr_bits = 48,
-       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
-       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
-       .event_descs    = hswep_uncore_imc_events,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8};
-
-static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-       u64 count = 0;
-
-       pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
-       pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
-
-       return count;
-}
-
-static struct intel_uncore_ops hswep_uncore_irp_ops = {
-       .init_box       = snbep_uncore_pci_init_box,
-       .disable_box    = snbep_uncore_pci_disable_box,
-       .enable_box     = snbep_uncore_pci_enable_box,
-       .disable_event  = ivbep_uncore_irp_disable_event,
-       .enable_event   = ivbep_uncore_irp_enable_event,
-       .read_counter   = hswep_uncore_irp_read_counter,
-};
-
-static struct intel_uncore_type hswep_uncore_irp = {
-       .name                   = "irp",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .ops                    = &hswep_uncore_irp_ops,
-       .format_group           = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type hswep_uncore_qpi = {
-       .name                   = "qpi",
-       .num_counters           = 5,
-       .num_boxes              = 3,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
-       .event_ctl              = SNBEP_PCI_PMON_CTL0,
-       .event_mask             = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &snbep_uncore_qpi_ops,
-       .format_group           = &snbep_uncore_qpi_format_group,
-};
-
-static struct event_constraint hswep_uncore_r2pcie_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x24, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x27, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2a, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type hswep_uncore_r2pcie = {
-       .name           = "r2pcie",
-       .num_counters   = 4,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 48,
-       .constraints    = hswep_uncore_r2pcie_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x01, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type hswep_uncore_r3qpi = {
-       .name           = "r3qpi",
-       .num_counters   = 4,
-       .num_boxes      = 3,
-       .perf_ctr_bits  = 44,
-       .constraints    = hswep_uncore_r3qpi_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
-       HSWEP_PCI_UNCORE_HA,
-       HSWEP_PCI_UNCORE_IMC,
-       HSWEP_PCI_UNCORE_IRP,
-       HSWEP_PCI_UNCORE_QPI,
-       HSWEP_PCI_UNCORE_R2PCIE,
-       HSWEP_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *hswep_pci_uncores[] = {
-       [HSWEP_PCI_UNCORE_HA]   = &hswep_uncore_ha,
-       [HSWEP_PCI_UNCORE_IMC]  = &hswep_uncore_imc,
-       [HSWEP_PCI_UNCORE_IRP]  = &hswep_uncore_irp,
-       [HSWEP_PCI_UNCORE_QPI]  = &hswep_uncore_qpi,
-       [HSWEP_PCI_UNCORE_R2PCIE]       = &hswep_uncore_r2pcie,
-       [HSWEP_PCI_UNCORE_R3QPI]        = &hswep_uncore_r3qpi,
-       NULL,
-};
-
-static const struct pci_device_id hswep_uncore_pci_ids[] = {
-       { /* Home Agent 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0),
-       },
-       { /* Home Agent 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f38),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 1),
-       },
-       { /* MC0 Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb0),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 0),
-       },
-       { /* MC0 Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb1),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 1),
-       },
-       { /* MC0 Channel 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb4),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 2),
-       },
-       { /* MC0 Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb5),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 3),
-       },
-       { /* MC1 Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd0),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 4),
-       },
-       { /* MC1 Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd1),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 5),
-       },
-       { /* MC1 Channel 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd4),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 6),
-       },
-       { /* MC1 Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd5),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 7),
-       },
-       { /* IRP */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f39),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IRP, 0),
-       },
-       { /* QPI0 Port 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f32),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 0),
-       },
-       { /* QPI0 Port 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f33),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 1),
-       },
-       { /* QPI1 Port 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3a),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 2),
-       },
-       { /* R2PCIe */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f34),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R2PCIE, 0),
-       },
-       { /* R3QPI0 Link 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f36),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 0),
-       },
-       { /* R3QPI0 Link 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f37),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 1),
-       },
-       { /* R3QPI1 Link 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3e),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 2),
-       },
-       { /* QPI Port 0 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f86),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  SNBEP_PCI_QPI_PORT0_FILTER),
-       },
-       { /* QPI Port 1 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  SNBEP_PCI_QPI_PORT1_FILTER),
-       },
-       { /* PCU.3 (for Capability registers) */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  HSWEP_PCI_PCU_3),
-       },
-       { /* end: all zeroes */ }
-};
-
-static struct pci_driver hswep_uncore_pci_driver = {
-       .name           = "hswep_uncore",
-       .id_table       = hswep_uncore_pci_ids,
-};
-
-int hswep_uncore_pci_init(void)
-{
-       int ret = snbep_pci2phy_map_init(0x2f1e);
-       if (ret)
-               return ret;
-       uncore_pci_uncores = hswep_pci_uncores;
-       uncore_pci_driver = &hswep_uncore_pci_driver;
-       return 0;
-}
-/* end of Haswell-EP uncore support */
-
-/* BDX uncore support */
-
-static struct intel_uncore_type bdx_uncore_ubox = {
-       .name                   = "ubox",
-       .num_counters           = 2,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = HSWEP_U_MSR_PMON_CTR0,
-       .event_ctl              = HSWEP_U_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
-       .fixed_ctl              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &ivbep_uncore_msr_ops,
-       .format_group           = &ivbep_uncore_ubox_format_group,
-};
-
-static struct event_constraint bdx_uncore_cbox_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type bdx_uncore_cbox = {
-       .name                   = "cbox",
-       .num_counters           = 4,
-       .num_boxes              = 24,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
-       .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
-       .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_C0_MSR_PMON_BOX_CTL,
-       .msr_offset             = HSWEP_CBO_MSR_OFFSET,
-       .num_shared_regs        = 1,
-       .constraints            = bdx_uncore_cbox_constraints,
-       .ops                    = &hswep_uncore_cbox_ops,
-       .format_group           = &hswep_uncore_cbox_format_group,
-};
-
-static struct intel_uncore_type bdx_uncore_sbox = {
-       .name                   = "sbox",
-       .num_counters           = 4,
-       .num_boxes              = 4,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = HSWEP_S0_MSR_PMON_CTL0,
-       .perf_ctr               = HSWEP_S0_MSR_PMON_CTR0,
-       .event_mask             = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_S0_MSR_PMON_BOX_CTL,
-       .msr_offset             = HSWEP_SBOX_MSR_OFFSET,
-       .ops                    = &hswep_uncore_sbox_msr_ops,
-       .format_group           = &hswep_uncore_sbox_format_group,
-};
-
-static struct intel_uncore_type *bdx_msr_uncores[] = {
-       &bdx_uncore_ubox,
-       &bdx_uncore_cbox,
-       &bdx_uncore_sbox,
-       &hswep_uncore_pcu,
-       NULL,
-};
-
-void bdx_uncore_cpu_init(void)
-{
-       if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
-               bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-       uncore_msr_uncores = bdx_msr_uncores;
-}
-
-static struct intel_uncore_type bdx_uncore_ha = {
-       .name           = "ha",
-       .num_counters   = 4,
-       .num_boxes      = 2,
-       .perf_ctr_bits  = 48,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type bdx_uncore_imc = {
-       .name           = "imc",
-       .num_counters   = 5,
-       .num_boxes      = 8,
-       .perf_ctr_bits  = 48,
-       .fixed_ctr_bits = 48,
-       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
-       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
-       .event_descs    = hswep_uncore_imc_events,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type bdx_uncore_irp = {
-       .name                   = "irp",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .ops                    = &hswep_uncore_irp_ops,
-       .format_group           = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type bdx_uncore_qpi = {
-       .name                   = "qpi",
-       .num_counters           = 4,
-       .num_boxes              = 3,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
-       .event_ctl              = SNBEP_PCI_PMON_CTL0,
-       .event_mask             = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &snbep_uncore_qpi_ops,
-       .format_group           = &snbep_uncore_qpi_format_group,
-};
-
-static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type bdx_uncore_r2pcie = {
-       .name           = "r2pcie",
-       .num_counters   = 4,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 48,
-       .constraints    = bdx_uncore_r2pcie_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct event_constraint bdx_uncore_r3qpi_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x01, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type bdx_uncore_r3qpi = {
-       .name           = "r3qpi",
-       .num_counters   = 3,
-       .num_boxes      = 3,
-       .perf_ctr_bits  = 48,
-       .constraints    = bdx_uncore_r3qpi_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
-       BDX_PCI_UNCORE_HA,
-       BDX_PCI_UNCORE_IMC,
-       BDX_PCI_UNCORE_IRP,
-       BDX_PCI_UNCORE_QPI,
-       BDX_PCI_UNCORE_R2PCIE,
-       BDX_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *bdx_pci_uncores[] = {
-       [BDX_PCI_UNCORE_HA]     = &bdx_uncore_ha,
-       [BDX_PCI_UNCORE_IMC]    = &bdx_uncore_imc,
-       [BDX_PCI_UNCORE_IRP]    = &bdx_uncore_irp,
-       [BDX_PCI_UNCORE_QPI]    = &bdx_uncore_qpi,
-       [BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie,
-       [BDX_PCI_UNCORE_R3QPI]  = &bdx_uncore_r3qpi,
-       NULL,
-};
-
-static const struct pci_device_id bdx_uncore_pci_ids[] = {
-       { /* Home Agent 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
-       },
-       { /* Home Agent 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f38),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 1),
-       },
-       { /* MC0 Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0),
-       },
-       { /* MC0 Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1),
-       },
-       { /* MC0 Channel 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb4),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 2),
-       },
-       { /* MC0 Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb5),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 3),
-       },
-       { /* MC1 Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd0),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 4),
-       },
-       { /* MC1 Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd1),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 5),
-       },
-       { /* MC1 Channel 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd4),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 6),
-       },
-       { /* MC1 Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd5),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 7),
-       },
-       { /* IRP */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0),
-       },
-       { /* QPI0 Port 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f32),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 0),
-       },
-       { /* QPI0 Port 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f33),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 1),
-       },
-       { /* QPI1 Port 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3a),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 2),
-       },
-       { /* R2PCIe */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0),
-       },
-       { /* R3QPI0 Link 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f36),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 0),
-       },
-       { /* R3QPI0 Link 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f37),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 1),
-       },
-       { /* R3QPI1 Link 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3e),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 2),
-       },
-       { /* QPI Port 0 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0),
-       },
-       { /* QPI Port 1 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1),
-       },
-       { /* QPI Port 2 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
-       },
-       { /* end: all zeroes */ }
-};
-
-static struct pci_driver bdx_uncore_pci_driver = {
-       .name           = "bdx_uncore",
-       .id_table       = bdx_uncore_pci_ids,
-};
-
-int bdx_uncore_pci_init(void)
-{
-       int ret = snbep_pci2phy_map_init(0x6f1e);
-
-       if (ret)
-               return ret;
-       uncore_pci_uncores = bdx_pci_uncores;
-       uncore_pci_driver = &bdx_uncore_pci_driver;
-       return 0;
-}
-
-/* end of BDX uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
deleted file mode 100644 (file)
index 5b0c232..0000000
+++ /dev/null
@@ -1,319 +0,0 @@
-/* Driver for Intel Xeon Phi "Knights Corner" PMU */
-
-#include <linux/perf_event.h>
-#include <linux/types.h>
-
-#include <asm/hardirq.h>
-
-#include "perf_event.h"
-
-static const u64 knc_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]           = 0x002a,
-  [PERF_COUNT_HW_INSTRUCTIONS]         = 0x0016,
-  [PERF_COUNT_HW_CACHE_REFERENCES]     = 0x0028,
-  [PERF_COUNT_HW_CACHE_MISSES]         = 0x0029,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]  = 0x0012,
-  [PERF_COUNT_HW_BRANCH_MISSES]                = 0x002b,
-};
-
-static const u64 __initconst knc_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               /* On Xeon Phi event "0" is a valid DATA_READ          */
-               /*   (L1 Data Cache Reads) Instruction.                */
-               /* We code this as ARCH_PERFMON_EVENTSEL_INT as this   */
-               /* bit will always be set in x86_pmu_hw_config().      */
-               [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
-                                               /* DATA_READ           */
-               [ C(RESULT_MISS)   ] = 0x0003,  /* DATA_READ_MISS      */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0001,  /* DATA_WRITE          */
-               [ C(RESULT_MISS)   ] = 0x0004,  /* DATA_WRITE_MISS     */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0011,  /* L1_DATA_PF1         */
-               [ C(RESULT_MISS)   ] = 0x001c,  /* L1_DATA_PF1_MISS    */
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x000c,  /* CODE_READ          */
-               [ C(RESULT_MISS)   ] = 0x000e,  /* CODE_CACHE_MISS    */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0x10cb,  /* L2_READ_MISS */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x10cc,  /* L2_WRITE_HIT */
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x10fc,  /* L2_DATA_PF2      */
-               [ C(RESULT_MISS)   ] = 0x10fe,  /* L2_DATA_PF2_MISS */
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
-                                               /* DATA_READ */
-                                               /* see note on L1 OP_READ */
-               [ C(RESULT_MISS)   ] = 0x0002,  /* DATA_PAGE_WALK */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0001,  /* DATA_WRITE */
-               [ C(RESULT_MISS)   ] = 0x0002,  /* DATA_PAGE_WALK */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x000c,  /* CODE_READ */
-               [ C(RESULT_MISS)   ] = 0x000d,  /* CODE_PAGE_WALK */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0012,  /* BRANCHES */
-               [ C(RESULT_MISS)   ] = 0x002b,  /* BRANCHES_MISPREDICTED */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-
-static u64 knc_pmu_event_map(int hw_event)
-{
-       return knc_perfmon_event_map[hw_event];
-}
-
-static struct event_constraint knc_event_constraints[] =
-{
-       INTEL_EVENT_CONSTRAINT(0xc3, 0x1),      /* HWP_L2HIT */
-       INTEL_EVENT_CONSTRAINT(0xc4, 0x1),      /* HWP_L2MISS */
-       INTEL_EVENT_CONSTRAINT(0xc8, 0x1),      /* L2_READ_HIT_E */
-       INTEL_EVENT_CONSTRAINT(0xc9, 0x1),      /* L2_READ_HIT_M */
-       INTEL_EVENT_CONSTRAINT(0xca, 0x1),      /* L2_READ_HIT_S */
-       INTEL_EVENT_CONSTRAINT(0xcb, 0x1),      /* L2_READ_MISS */
-       INTEL_EVENT_CONSTRAINT(0xcc, 0x1),      /* L2_WRITE_HIT */
-       INTEL_EVENT_CONSTRAINT(0xce, 0x1),      /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
-       INTEL_EVENT_CONSTRAINT(0xcf, 0x1),      /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
-       INTEL_EVENT_CONSTRAINT(0xd7, 0x1),      /* L2_VICTIM_REQ_WITH_DATA */
-       INTEL_EVENT_CONSTRAINT(0xe3, 0x1),      /* SNP_HITM_BUNIT */
-       INTEL_EVENT_CONSTRAINT(0xe6, 0x1),      /* SNP_HIT_L2 */
-       INTEL_EVENT_CONSTRAINT(0xe7, 0x1),      /* SNP_HITM_L2 */
-       INTEL_EVENT_CONSTRAINT(0xf1, 0x1),      /* L2_DATA_READ_MISS_CACHE_FILL */
-       INTEL_EVENT_CONSTRAINT(0xf2, 0x1),      /* L2_DATA_WRITE_MISS_CACHE_FILL */
-       INTEL_EVENT_CONSTRAINT(0xf6, 0x1),      /* L2_DATA_READ_MISS_MEM_FILL */
-       INTEL_EVENT_CONSTRAINT(0xf7, 0x1),      /* L2_DATA_WRITE_MISS_MEM_FILL */
-       INTEL_EVENT_CONSTRAINT(0xfc, 0x1),      /* L2_DATA_PF2 */
-       INTEL_EVENT_CONSTRAINT(0xfd, 0x1),      /* L2_DATA_PF2_DROP */
-       INTEL_EVENT_CONSTRAINT(0xfe, 0x1),      /* L2_DATA_PF2_MISS */
-       INTEL_EVENT_CONSTRAINT(0xff, 0x1),      /* L2_DATA_HIT_INFLIGHT_PF2 */
-       EVENT_CONSTRAINT_END
-};
-
-#define MSR_KNC_IA32_PERF_GLOBAL_STATUS                0x0000002d
-#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL   0x0000002e
-#define MSR_KNC_IA32_PERF_GLOBAL_CTRL          0x0000002f
-
-#define KNC_ENABLE_COUNTER0                    0x00000001
-#define KNC_ENABLE_COUNTER1                    0x00000002
-
-static void knc_pmu_disable_all(void)
-{
-       u64 val;
-
-       rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
-       val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
-       wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
-}
-
-static void knc_pmu_enable_all(int added)
-{
-       u64 val;
-
-       rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
-       val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
-       wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
-}
-
-static inline void
-knc_pmu_disable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 val;
-
-       val = hwc->config;
-       val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-
-       (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
-}
-
-static void knc_pmu_enable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 val;
-
-       val = hwc->config;
-       val |= ARCH_PERFMON_EVENTSEL_ENABLE;
-
-       (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
-}
-
-static inline u64 knc_pmu_get_status(void)
-{
-       u64 status;
-
-       rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
-
-       return status;
-}
-
-static inline void knc_pmu_ack_status(u64 ack)
-{
-       wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
-}
-
-static int knc_pmu_handle_irq(struct pt_regs *regs)
-{
-       struct perf_sample_data data;
-       struct cpu_hw_events *cpuc;
-       int handled = 0;
-       int bit, loops;
-       u64 status;
-
-       cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       knc_pmu_disable_all();
-
-       status = knc_pmu_get_status();
-       if (!status) {
-               knc_pmu_enable_all(0);
-               return handled;
-       }
-
-       loops = 0;
-again:
-       knc_pmu_ack_status(status);
-       if (++loops > 100) {
-               WARN_ONCE(1, "perf: irq loop stuck!\n");
-               perf_event_print_debug();
-               goto done;
-       }
-
-       inc_irq_stat(apic_perf_irqs);
-
-       for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
-               struct perf_event *event = cpuc->events[bit];
-
-               handled++;
-
-               if (!test_bit(bit, cpuc->active_mask))
-                       continue;
-
-               if (!intel_pmu_save_and_restart(event))
-                       continue;
-
-               perf_sample_data_init(&data, 0, event->hw.last_period);
-
-               if (perf_event_overflow(event, &data, regs))
-                       x86_pmu_stop(event, 0);
-       }
-
-       /*
-        * Repeat if there is more work to be done:
-        */
-       status = knc_pmu_get_status();
-       if (status)
-               goto again;
-
-done:
-       knc_pmu_enable_all(0);
-
-       return handled;
-}
-
-
-PMU_FORMAT_ATTR(event, "config:0-7"    );
-PMU_FORMAT_ATTR(umask, "config:8-15"   );
-PMU_FORMAT_ATTR(edge,  "config:18"     );
-PMU_FORMAT_ATTR(inv,   "config:23"     );
-PMU_FORMAT_ATTR(cmask, "config:24-31"  );
-
-static struct attribute *intel_knc_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask.attr,
-       NULL,
-};
-
-static const struct x86_pmu knc_pmu __initconst = {
-       .name                   = "knc",
-       .handle_irq             = knc_pmu_handle_irq,
-       .disable_all            = knc_pmu_disable_all,
-       .enable_all             = knc_pmu_enable_all,
-       .enable                 = knc_pmu_enable_event,
-       .disable                = knc_pmu_disable_event,
-       .hw_config              = x86_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_KNC_EVNTSEL0,
-       .perfctr                = MSR_KNC_PERFCTR0,
-       .event_map              = knc_pmu_event_map,
-       .max_events             = ARRAY_SIZE(knc_perfmon_event_map),
-       .apic                   = 1,
-       .max_period             = (1ULL << 39) - 1,
-       .version                = 0,
-       .num_counters           = 2,
-       .cntval_bits            = 40,
-       .cntval_mask            = (1ULL << 40) - 1,
-       .get_event_constraints  = x86_get_event_constraints,
-       .event_constraints      = knc_event_constraints,
-       .format_attrs           = intel_knc_formats_attr,
-};
-
-__init int knc_pmu_init(void)
-{
-       x86_pmu = knc_pmu;
-
-       memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, 
-               sizeof(hw_cache_event_ids));
-
-       return 0;
-}
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c
deleted file mode 100644 (file)
index ec863b9..0000000
+++ /dev/null
@@ -1,241 +0,0 @@
-#include <linux/perf_event.h>
-
-enum perf_msr_id {
-       PERF_MSR_TSC                    = 0,
-       PERF_MSR_APERF                  = 1,
-       PERF_MSR_MPERF                  = 2,
-       PERF_MSR_PPERF                  = 3,
-       PERF_MSR_SMI                    = 4,
-
-       PERF_MSR_EVENT_MAX,
-};
-
-static bool test_aperfmperf(int idx)
-{
-       return boot_cpu_has(X86_FEATURE_APERFMPERF);
-}
-
-static bool test_intel(int idx)
-{
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-           boot_cpu_data.x86 != 6)
-               return false;
-
-       switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-       case 76: /* 14nm Atom "Airmont"                   */
-               if (idx == PERF_MSR_SMI)
-                       return true;
-               break;
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
-                       return true;
-               break;
-       }
-
-       return false;
-}
-
-struct perf_msr {
-       u64     msr;
-       struct  perf_pmu_events_attr *attr;
-       bool    (*test)(int idx);
-};
-
-PMU_EVENT_ATTR_STRING(tsc,   evattr_tsc,   "event=0x00");
-PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
-PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
-PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
-PMU_EVENT_ATTR_STRING(smi,   evattr_smi,   "event=0x04");
-
-static struct perf_msr msr[] = {
-       [PERF_MSR_TSC]   = { 0,                 &evattr_tsc,    NULL,            },
-       [PERF_MSR_APERF] = { MSR_IA32_APERF,    &evattr_aperf,  test_aperfmperf, },
-       [PERF_MSR_MPERF] = { MSR_IA32_MPERF,    &evattr_mperf,  test_aperfmperf, },
-       [PERF_MSR_PPERF] = { MSR_PPERF,         &evattr_pperf,  test_intel,      },
-       [PERF_MSR_SMI]   = { MSR_SMI_COUNT,     &evattr_smi,    test_intel,      },
-};
-
-static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
-       NULL,
-};
-
-static struct attribute_group events_attr_group = {
-       .name = "events",
-       .attrs = events_attrs,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-63");
-static struct attribute *format_attrs[] = {
-       &format_attr_event.attr,
-       NULL,
-};
-static struct attribute_group format_attr_group = {
-       .name = "format",
-       .attrs = format_attrs,
-};
-
-static const struct attribute_group *attr_groups[] = {
-       &events_attr_group,
-       &format_attr_group,
-       NULL,
-};
-
-static int msr_event_init(struct perf_event *event)
-{
-       u64 cfg = event->attr.config;
-
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
-       if (cfg >= PERF_MSR_EVENT_MAX)
-               return -EINVAL;
-
-       /* unsupported modes and filters */
-       if (event->attr.exclude_user   ||
-           event->attr.exclude_kernel ||
-           event->attr.exclude_hv     ||
-           event->attr.exclude_idle   ||
-           event->attr.exclude_host   ||
-           event->attr.exclude_guest  ||
-           event->attr.sample_period) /* no sampling */
-               return -EINVAL;
-
-       if (!msr[cfg].attr)
-               return -EINVAL;
-
-       event->hw.idx = -1;
-       event->hw.event_base = msr[cfg].msr;
-       event->hw.config = cfg;
-
-       return 0;
-}
-
-static inline u64 msr_read_counter(struct perf_event *event)
-{
-       u64 now;
-
-       if (event->hw.event_base)
-               rdmsrl(event->hw.event_base, now);
-       else
-               rdtscll(now);
-
-       return now;
-}
-static void msr_event_update(struct perf_event *event)
-{
-       u64 prev, now;
-       s64 delta;
-
-       /* Careful, an NMI might modify the previous event value. */
-again:
-       prev = local64_read(&event->hw.prev_count);
-       now = msr_read_counter(event);
-
-       if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
-               goto again;
-
-       delta = now - prev;
-       if (unlikely(event->hw.event_base == MSR_SMI_COUNT))
-               delta = sign_extend64(delta, 31);
-
-       local64_add(now - prev, &event->count);
-}
-
-static void msr_event_start(struct perf_event *event, int flags)
-{
-       u64 now;
-
-       now = msr_read_counter(event);
-       local64_set(&event->hw.prev_count, now);
-}
-
-static void msr_event_stop(struct perf_event *event, int flags)
-{
-       msr_event_update(event);
-}
-
-static void msr_event_del(struct perf_event *event, int flags)
-{
-       msr_event_stop(event, PERF_EF_UPDATE);
-}
-
-static int msr_event_add(struct perf_event *event, int flags)
-{
-       if (flags & PERF_EF_START)
-               msr_event_start(event, flags);
-
-       return 0;
-}
-
-static struct pmu pmu_msr = {
-       .task_ctx_nr    = perf_sw_context,
-       .attr_groups    = attr_groups,
-       .event_init     = msr_event_init,
-       .add            = msr_event_add,
-       .del            = msr_event_del,
-       .start          = msr_event_start,
-       .stop           = msr_event_stop,
-       .read           = msr_event_update,
-       .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
-};
-
-static int __init msr_init(void)
-{
-       int i, j = 0;
-
-       if (!boot_cpu_has(X86_FEATURE_TSC)) {
-               pr_cont("no MSR PMU driver.\n");
-               return 0;
-       }
-
-       /* Probe the MSRs. */
-       for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
-               u64 val;
-
-               /*
-                * Virt sucks arse; you cannot tell if a R/O MSR is present :/
-                */
-               if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
-                       msr[i].attr = NULL;
-       }
-
-       /* List remaining MSRs in the sysfs attrs. */
-       for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
-               if (msr[i].attr)
-                       events_attrs[j++] = &msr[i].attr->attr.attr;
-       }
-       events_attrs[j] = NULL;
-
-       perf_pmu_register(&pmu_msr, "msr", -1);
-
-       return 0;
-}
-device_initcall(msr_init);
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
deleted file mode 100644 (file)
index f2e5678..0000000
+++ /dev/null
@@ -1,1376 +0,0 @@
-/*
- * Netburst Performance Events (P4, old Xeon)
- *
- *  Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
- *  Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-
-#include <asm/perf_event_p4.h>
-#include <asm/hardirq.h>
-#include <asm/apic.h>
-
-#include "perf_event.h"
-
-#define P4_CNTR_LIMIT 3
-/*
- * array indices: 0,1 - HT threads, used with HT enabled cpu
- */
-struct p4_event_bind {
-       unsigned int opcode;                    /* Event code and ESCR selector */
-       unsigned int escr_msr[2];               /* ESCR MSR for this event */
-       unsigned int escr_emask;                /* valid ESCR EventMask bits */
-       unsigned int shared;                    /* event is shared across threads */
-       char cntr[2][P4_CNTR_LIMIT];            /* counter index (offset), -1 on abscence */
-};
-
-struct p4_pebs_bind {
-       unsigned int metric_pebs;
-       unsigned int metric_vert;
-};
-
-/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
-#define P4_GEN_PEBS_BIND(name, pebs, vert)                     \
-       [P4_PEBS_METRIC__##name] = {                            \
-               .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG,   \
-               .metric_vert = vert,                            \
-       }
-
-/*
- * note we have P4_PEBS_ENABLE_UOP_TAG always set here
- *
- * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
- * event configuration to find out which values are to be
- * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
- * resgisters
- */
-static struct p4_pebs_bind p4_pebs_bind_map[] = {
-       P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired,  0x0000001, 0x0000001),
-       P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired,  0x0000002, 0x0000001),
-       P4_GEN_PEBS_BIND(dtlb_load_miss_retired,        0x0000004, 0x0000001),
-       P4_GEN_PEBS_BIND(dtlb_store_miss_retired,       0x0000004, 0x0000002),
-       P4_GEN_PEBS_BIND(dtlb_all_miss_retired,         0x0000004, 0x0000003),
-       P4_GEN_PEBS_BIND(tagged_mispred_branch,         0x0018000, 0x0000010),
-       P4_GEN_PEBS_BIND(mob_load_replay_retired,       0x0000200, 0x0000001),
-       P4_GEN_PEBS_BIND(split_load_retired,            0x0000400, 0x0000001),
-       P4_GEN_PEBS_BIND(split_store_retired,           0x0000400, 0x0000002),
-};
-
-/*
- * Note that we don't use CCCR1 here, there is an
- * exception for P4_BSQ_ALLOCATION but we just have
- * no workaround
- *
- * consider this binding as resources which particular
- * event may borrow, it doesn't contain EventMask,
- * Tags and friends -- they are left to a caller
- */
-static struct p4_event_bind p4_event_bind_map[] = {
-       [P4_EVENT_TC_DELIVER_MODE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
-               .escr_msr       = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
-               .shared         = 1,
-               .cntr           = { {4, 5, -1}, {6, 7, -1} },
-       },
-       [P4_EVENT_BPU_FETCH_REQUEST] = {
-               .opcode         = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
-               .escr_msr       = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_ITLB_REFERENCE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
-               .escr_msr       = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_MEMORY_CANCEL] = {
-               .opcode         = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
-               .escr_msr       = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_MEMORY_COMPLETE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
-               .escr_msr       = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_LOAD_PORT_REPLAY] = {
-               .opcode         = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
-               .escr_msr       = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_STORE_PORT_REPLAY] = {
-               .opcode         = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
-               .escr_msr       = { MSR_P4_SAAT_ESCR0 ,  MSR_P4_SAAT_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_MOB_LOAD_REPLAY] = {
-               .opcode         = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
-               .escr_msr       = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_PAGE_WALK_TYPE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
-               .escr_msr       = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
-               .shared         = 1,
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_BSQ_CACHE_REFERENCE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
-               .escr_msr       = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_IOQ_ALLOCATION] = {
-               .opcode         = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER)               |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_IOQ_ACTIVE_ENTRIES] = {       /* shared ESCR */
-               .opcode         = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
-               .escr_msr       = { MSR_P4_FSB_ESCR1,  MSR_P4_FSB_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ)        |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC)          |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC)          |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT)          |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP)          |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB)          |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
-               .cntr           = { {2, -1, -1}, {3, -1, -1} },
-       },
-       [P4_EVENT_FSB_DATA_ACTIVITY] = {
-               .opcode         = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
-               .shared         = 1,
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_BSQ_ALLOCATION] = {           /* shared ESCR, broken CCCR1 */
-               .opcode         = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
-               .escr_msr       = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE)      |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE)      |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE)        |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE)        |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
-               .cntr           = { {0, -1, -1}, {1, -1, -1} },
-       },
-       [P4_EVENT_BSQ_ACTIVE_ENTRIES] = {       /* shared ESCR */
-               .opcode         = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
-               .escr_msr       = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0)        |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1)        |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE)     |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE)  |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE)  |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE)    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE)    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
-               .cntr           = { {2, -1, -1}, {3, -1, -1} },
-       },
-       [P4_EVENT_SSE_INPUT_ASSIST] = {
-               .opcode         = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_PACKED_SP_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_PACKED_DP_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_SCALAR_SP_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_SCALAR_DP_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_64BIT_MMX_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_128BIT_MMX_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_X87_FP_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_X87_FP_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_TC_MISC] = {
-               .opcode         = P4_OPCODE(P4_EVENT_TC_MISC),
-               .escr_msr       = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
-               .cntr           = { {4, 5, -1}, {6, 7, -1} },
-       },
-       [P4_EVENT_GLOBAL_POWER_EVENTS] = {
-               .opcode         = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_TC_MS_XFER] = {
-               .opcode         = P4_OPCODE(P4_EVENT_TC_MS_XFER),
-               .escr_msr       = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
-               .cntr           = { {4, 5, -1}, {6, 7, -1} },
-       },
-       [P4_EVENT_UOP_QUEUE_WRITES] = {
-               .opcode         = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
-               .escr_msr       = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD)     |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
-               .cntr           = { {4, 5, -1}, {6, 7, -1} },
-       },
-       [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
-               .escr_msr       = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL)    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
-               .cntr           = { {4, 5, -1}, {6, 7, -1} },
-       },
-       [P4_EVENT_RETIRED_BRANCH_TYPE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
-               .escr_msr       = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL)    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
-               .cntr           = { {4, 5, -1}, {6, 7, -1} },
-       },
-       [P4_EVENT_RESOURCE_STALL] = {
-               .opcode         = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
-               .escr_msr       = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_WC_BUFFER] = {
-               .opcode         = P4_OPCODE(P4_EVENT_WC_BUFFER),
-               .escr_msr       = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS)               |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_B2B_CYCLES] = {
-               .opcode         = P4_OPCODE(P4_EVENT_B2B_CYCLES),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     = 0,
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_BNR] = {
-               .opcode         = P4_OPCODE(P4_EVENT_BNR),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     = 0,
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_SNOOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_SNOOP),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     = 0,
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_RESPONSE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_RESPONSE),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     = 0,
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_FRONT_END_EVENT] = {
-               .opcode         = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
-               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_EXECUTION_EVENT] = {
-               .opcode         = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
-               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_REPLAY_EVENT] = {
-               .opcode         = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
-               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_INSTR_RETIRED] = {
-               .opcode         = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
-               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_UOPS_RETIRED] = {
-               .opcode         = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
-               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_UOP_TYPE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_UOP_TYPE),
-               .escr_msr       = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS)                  |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_BRANCH_RETIRED] = {
-               .opcode         = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
-               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
-               .opcode         = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
-               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_X87_ASSIST] = {
-               .opcode         = P4_OPCODE(P4_EVENT_X87_ASSIST),
-               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU)                    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO)                    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO)                    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU)                    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_MACHINE_CLEAR] = {
-               .opcode         = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
-               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_INSTR_COMPLETED] = {
-               .opcode         = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
-               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-};
-
-#define P4_GEN_CACHE_EVENT(event, bit, metric)                           \
-       p4_config_pack_escr(P4_ESCR_EVENT(event)                        | \
-                           P4_ESCR_EMASK_BIT(event, bit))              | \
-       p4_config_pack_cccr(metric                                      | \
-                           P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
-
-static __initconst const u64 p4_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_PEBS_METRIC__1stl_cache_load_miss_retired),
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
-       },
-},
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_PEBS_METRIC__dtlb_load_miss_retired),
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_PEBS_METRIC__dtlb_store_miss_retired),
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
-                                               P4_PEBS_METRIC__none),
-               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
-                                               P4_PEBS_METRIC__none),
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-/*
- * Because of Netburst being quite restricted in how many
- * identical events may run simultaneously, we introduce event aliases,
- * ie the different events which have the same functionality but
- * utilize non-intersected resources (ESCR/CCCR/counter registers).
- *
- * This allow us to relax restrictions a bit and run two or more
- * identical events together.
- *
- * Never set any custom internal bits such as P4_CONFIG_HT,
- * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
- * either up to date automatically or not applicable at all.
- */
-struct p4_event_alias {
-       u64 original;
-       u64 alternative;
-} p4_event_aliases[] = {
-       {
-               /*
-                * Non-halted cycles can be substituted with non-sleeping cycles (see
-                * Intel SDM Vol3b for details). We need this alias to be able
-                * to run nmi-watchdog and 'perf top' (or any other user space tool
-                * which is interested in running PERF_COUNT_HW_CPU_CYCLES)
-                * simultaneously.
-                */
-       .original       =
-               p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)         |
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
-       .alternative    =
-               p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT)             |
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
-               p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT          |
-                                   P4_CCCR_COMPARE),
-       },
-};
-
-static u64 p4_get_alias_event(u64 config)
-{
-       u64 config_match;
-       int i;
-
-       /*
-        * Only event with special mark is allowed,
-        * we're to be sure it didn't come as malformed
-        * RAW event.
-        */
-       if (!(config & P4_CONFIG_ALIASABLE))
-               return 0;
-
-       config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
-
-       for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
-               if (config_match == p4_event_aliases[i].original) {
-                       config_match = p4_event_aliases[i].alternative;
-                       break;
-               } else if (config_match == p4_event_aliases[i].alternative) {
-                       config_match = p4_event_aliases[i].original;
-                       break;
-               }
-       }
-
-       if (i >= ARRAY_SIZE(p4_event_aliases))
-               return 0;
-
-       return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
-}
-
-static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
-  /* non-halted CPU clocks */
-  [PERF_COUNT_HW_CPU_CYCLES] =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING))       |
-               P4_CONFIG_ALIASABLE,
-
-  /*
-   * retired instructions
-   * in a sake of simplicity we don't use the FSB tagging
-   */
-  [PERF_COUNT_HW_INSTRUCTIONS] =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED)               |
-               P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG)           |
-               P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
-
-  /* cache hits */
-  [PERF_COUNT_HW_CACHE_REFERENCES] =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
-
-  /* cache misses */
-  [PERF_COUNT_HW_CACHE_MISSES] =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
-
-  /* branch instructions retired */
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL)    |
-               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL)           |
-               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
-
-  /* mispredicted branches retired */
-  [PERF_COUNT_HW_BRANCH_MISSES]        =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED)      |
-               P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
-
-  /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN):  */
-  [PERF_COUNT_HW_BUS_CYCLES] =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY)           |
-               P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN))        |
-       p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
-};
-
-static struct p4_event_bind *p4_config_get_bind(u64 config)
-{
-       unsigned int evnt = p4_config_unpack_event(config);
-       struct p4_event_bind *bind = NULL;
-
-       if (evnt < ARRAY_SIZE(p4_event_bind_map))
-               bind = &p4_event_bind_map[evnt];
-
-       return bind;
-}
-
-static u64 p4_pmu_event_map(int hw_event)
-{
-       struct p4_event_bind *bind;
-       unsigned int esel;
-       u64 config;
-
-       config = p4_general_events[hw_event];
-       bind = p4_config_get_bind(config);
-       esel = P4_OPCODE_ESEL(bind->opcode);
-       config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
-
-       return config;
-}
-
-/* check cpu model specifics */
-static bool p4_event_match_cpu_model(unsigned int event_idx)
-{
-       /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
-       if (event_idx == P4_EVENT_INSTR_COMPLETED) {
-               if (boot_cpu_data.x86_model != 3 &&
-                       boot_cpu_data.x86_model != 4 &&
-                       boot_cpu_data.x86_model != 6)
-                       return false;
-       }
-
-       /*
-        * For info
-        * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
-        */
-
-       return true;
-}
-
-static int p4_validate_raw_event(struct perf_event *event)
-{
-       unsigned int v, emask;
-
-       /* User data may have out-of-bound event index */
-       v = p4_config_unpack_event(event->attr.config);
-       if (v >= ARRAY_SIZE(p4_event_bind_map))
-               return -EINVAL;
-
-       /* It may be unsupported: */
-       if (!p4_event_match_cpu_model(v))
-               return -EINVAL;
-
-       /*
-        * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
-        * in Architectural Performance Monitoring, it means not
-        * on _which_ logical cpu to count but rather _when_, ie it
-        * depends on logical cpu state -- count event if one cpu active,
-        * none, both or any, so we just allow user to pass any value
-        * desired.
-        *
-        * In turn we always set Tx_OS/Tx_USR bits bound to logical
-        * cpu without their propagation to another cpu
-        */
-
-       /*
-        * if an event is shared across the logical threads
-        * the user needs special permissions to be able to use it
-        */
-       if (p4_ht_active() && p4_event_bind_map[v].shared) {
-               if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-                       return -EACCES;
-       }
-
-       /* ESCR EventMask bits may be invalid */
-       emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
-       if (emask & ~p4_event_bind_map[v].escr_emask)
-               return -EINVAL;
-
-       /*
-        * it may have some invalid PEBS bits
-        */
-       if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
-               return -EINVAL;
-
-       v = p4_config_unpack_metric(event->attr.config);
-       if (v >= ARRAY_SIZE(p4_pebs_bind_map))
-               return -EINVAL;
-
-       return 0;
-}
-
-static int p4_hw_config(struct perf_event *event)
-{
-       int cpu = get_cpu();
-       int rc = 0;
-       u32 escr, cccr;
-
-       /*
-        * the reason we use cpu that early is that: if we get scheduled
-        * first time on the same cpu -- we will not need swap thread
-        * specific flags in config (and will save some cpu cycles)
-        */
-
-       cccr = p4_default_cccr_conf(cpu);
-       escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
-                                        event->attr.exclude_user);
-       event->hw.config = p4_config_pack_escr(escr) |
-                          p4_config_pack_cccr(cccr);
-
-       if (p4_ht_active() && p4_ht_thread(cpu))
-               event->hw.config = p4_set_ht_bit(event->hw.config);
-
-       if (event->attr.type == PERF_TYPE_RAW) {
-               struct p4_event_bind *bind;
-               unsigned int esel;
-               /*
-                * Clear bits we reserve to be managed by kernel itself
-                * and never allowed from a user space
-                */
-                event->attr.config &= P4_CONFIG_MASK;
-
-               rc = p4_validate_raw_event(event);
-               if (rc)
-                       goto out;
-
-               /*
-                * Note that for RAW events we allow user to use P4_CCCR_RESERVED
-                * bits since we keep additional info here (for cache events and etc)
-                */
-               event->hw.config |= event->attr.config;
-               bind = p4_config_get_bind(event->attr.config);
-               if (!bind) {
-                       rc = -EINVAL;
-                       goto out;
-               }
-               esel = P4_OPCODE_ESEL(bind->opcode);
-               event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
-       }
-
-       rc = x86_setup_perfctr(event);
-out:
-       put_cpu();
-       return rc;
-}
-
-static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
-{
-       u64 v;
-
-       /* an official way for overflow indication */
-       rdmsrl(hwc->config_base, v);
-       if (v & P4_CCCR_OVF) {
-               wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
-               return 1;
-       }
-
-       /*
-        * In some circumstances the overflow might issue an NMI but did
-        * not set P4_CCCR_OVF bit. Because a counter holds a negative value
-        * we simply check for high bit being set, if it's cleared it means
-        * the counter has reached zero value and continued counting before
-        * real NMI signal was received:
-        */
-       rdmsrl(hwc->event_base, v);
-       if (!(v & ARCH_P4_UNFLAGGED_BIT))
-               return 1;
-
-       return 0;
-}
-
-static void p4_pmu_disable_pebs(void)
-{
-       /*
-        * FIXME
-        *
-        * It's still allowed that two threads setup same cache
-        * events so we can't simply clear metrics until we knew
-        * no one is depending on us, so we need kind of counter
-        * for "ReplayEvent" users.
-        *
-        * What is more complex -- RAW events, if user (for some
-        * reason) will pass some cache event metric with improper
-        * event opcode -- it's fine from hardware point of view
-        * but completely nonsense from "meaning" of such action.
-        *
-        * So at moment let leave metrics turned on forever -- it's
-        * ok for now but need to be revisited!
-        *
-        * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0);
-        * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0);
-        */
-}
-
-static inline void p4_pmu_disable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       /*
-        * If event gets disabled while counter is in overflowed
-        * state we need to clear P4_CCCR_OVF, otherwise interrupt get
-        * asserted again and again
-        */
-       (void)wrmsrl_safe(hwc->config_base,
-               p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
-}
-
-static void p4_pmu_disable_all(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int idx;
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               struct perf_event *event = cpuc->events[idx];
-               if (!test_bit(idx, cpuc->active_mask))
-                       continue;
-               p4_pmu_disable_event(event);
-       }
-
-       p4_pmu_disable_pebs();
-}
-
-/* configuration must be valid */
-static void p4_pmu_enable_pebs(u64 config)
-{
-       struct p4_pebs_bind *bind;
-       unsigned int idx;
-
-       BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
-
-       idx = p4_config_unpack_metric(config);
-       if (idx == P4_PEBS_METRIC__none)
-               return;
-
-       bind = &p4_pebs_bind_map[idx];
-
-       (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
-       (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT,      (u64)bind->metric_vert);
-}
-
-static void p4_pmu_enable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       int thread = p4_ht_config_thread(hwc->config);
-       u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
-       unsigned int idx = p4_config_unpack_event(hwc->config);
-       struct p4_event_bind *bind;
-       u64 escr_addr, cccr;
-
-       bind = &p4_event_bind_map[idx];
-       escr_addr = bind->escr_msr[thread];
-
-       /*
-        * - we dont support cascaded counters yet
-        * - and counter 1 is broken (erratum)
-        */
-       WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
-       WARN_ON_ONCE(hwc->idx == 1);
-
-       /* we need a real Event value */
-       escr_conf &= ~P4_ESCR_EVENT_MASK;
-       escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
-
-       cccr = p4_config_unpack_cccr(hwc->config);
-
-       /*
-        * it could be Cache event so we need to write metrics
-        * into additional MSRs
-        */
-       p4_pmu_enable_pebs(hwc->config);
-
-       (void)wrmsrl_safe(escr_addr, escr_conf);
-       (void)wrmsrl_safe(hwc->config_base,
-                               (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
-}
-
-static void p4_pmu_enable_all(int added)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int idx;
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               struct perf_event *event = cpuc->events[idx];
-               if (!test_bit(idx, cpuc->active_mask))
-                       continue;
-               p4_pmu_enable_event(event);
-       }
-}
-
-static int p4_pmu_handle_irq(struct pt_regs *regs)
-{
-       struct perf_sample_data data;
-       struct cpu_hw_events *cpuc;
-       struct perf_event *event;
-       struct hw_perf_event *hwc;
-       int idx, handled = 0;
-       u64 val;
-
-       cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               int overflow;
-
-               if (!test_bit(idx, cpuc->active_mask)) {
-                       /* catch in-flight IRQs */
-                       if (__test_and_clear_bit(idx, cpuc->running))
-                               handled++;
-                       continue;
-               }
-
-               event = cpuc->events[idx];
-               hwc = &event->hw;
-
-               WARN_ON_ONCE(hwc->idx != idx);
-
-               /* it might be unflagged overflow */
-               overflow = p4_pmu_clear_cccr_ovf(hwc);
-
-               val = x86_perf_event_update(event);
-               if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
-                       continue;
-
-               handled += overflow;
-
-               /* event overflow for sure */
-               perf_sample_data_init(&data, 0, hwc->last_period);
-
-               if (!x86_perf_event_set_period(event))
-                       continue;
-
-
-               if (perf_event_overflow(event, &data, regs))
-                       x86_pmu_stop(event, 0);
-       }
-
-       if (handled)
-               inc_irq_stat(apic_perf_irqs);
-
-       /*
-        * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
-        * been observed that the OVF bit flag has to be cleared first _before_
-        * the LVTPC can be unmasked.
-        *
-        * The reason is the NMI line will continue to be asserted while the OVF
-        * bit is set.  This causes a second NMI to generate if the LVTPC is
-        * unmasked before the OVF bit is cleared, leading to unknown NMI
-        * messages.
-        */
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-
-       return handled;
-}
-
-/*
- * swap thread specific fields according to a thread
- * we are going to run on
- */
-static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
-{
-       u32 escr, cccr;
-
-       /*
-        * we either lucky and continue on same cpu or no HT support
-        */
-       if (!p4_should_swap_ts(hwc->config, cpu))
-               return;
-
-       /*
-        * the event is migrated from an another logical
-        * cpu, so we need to swap thread specific flags
-        */
-
-       escr = p4_config_unpack_escr(hwc->config);
-       cccr = p4_config_unpack_cccr(hwc->config);
-
-       if (p4_ht_thread(cpu)) {
-               cccr &= ~P4_CCCR_OVF_PMI_T0;
-               cccr |= P4_CCCR_OVF_PMI_T1;
-               if (escr & P4_ESCR_T0_OS) {
-                       escr &= ~P4_ESCR_T0_OS;
-                       escr |= P4_ESCR_T1_OS;
-               }
-               if (escr & P4_ESCR_T0_USR) {
-                       escr &= ~P4_ESCR_T0_USR;
-                       escr |= P4_ESCR_T1_USR;
-               }
-               hwc->config  = p4_config_pack_escr(escr);
-               hwc->config |= p4_config_pack_cccr(cccr);
-               hwc->config |= P4_CONFIG_HT;
-       } else {
-               cccr &= ~P4_CCCR_OVF_PMI_T1;
-               cccr |= P4_CCCR_OVF_PMI_T0;
-               if (escr & P4_ESCR_T1_OS) {
-                       escr &= ~P4_ESCR_T1_OS;
-                       escr |= P4_ESCR_T0_OS;
-               }
-               if (escr & P4_ESCR_T1_USR) {
-                       escr &= ~P4_ESCR_T1_USR;
-                       escr |= P4_ESCR_T0_USR;
-               }
-               hwc->config  = p4_config_pack_escr(escr);
-               hwc->config |= p4_config_pack_cccr(cccr);
-               hwc->config &= ~P4_CONFIG_HT;
-       }
-}
-
-/*
- * ESCR address hashing is tricky, ESCRs are not sequential
- * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and
- * the metric between any ESCRs is laid in range [0xa0,0xe1]
- *
- * so we make ~70% filled hashtable
- */
-
-#define P4_ESCR_MSR_BASE               0x000003a0
-#define P4_ESCR_MSR_MAX                        0x000003e1
-#define P4_ESCR_MSR_TABLE_SIZE         (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)
-#define P4_ESCR_MSR_IDX(msr)           (msr - P4_ESCR_MSR_BASE)
-#define P4_ESCR_MSR_TABLE_ENTRY(msr)   [P4_ESCR_MSR_IDX(msr)] = msr
-
-static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = {
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1),
-};
-
-static int p4_get_escr_idx(unsigned int addr)
-{
-       unsigned int idx = P4_ESCR_MSR_IDX(addr);
-
-       if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE      ||
-                       !p4_escr_table[idx]             ||
-                       p4_escr_table[idx] != addr)) {
-               WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);
-               return -1;
-       }
-
-       return idx;
-}
-
-static int p4_next_cntr(int thread, unsigned long *used_mask,
-                       struct p4_event_bind *bind)
-{
-       int i, j;
-
-       for (i = 0; i < P4_CNTR_LIMIT; i++) {
-               j = bind->cntr[thread][i];
-               if (j != -1 && !test_bit(j, used_mask))
-                       return j;
-       }
-
-       return -1;
-}
-
-static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
-{
-       unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];
-       int cpu = smp_processor_id();
-       struct hw_perf_event *hwc;
-       struct p4_event_bind *bind;
-       unsigned int i, thread, num;
-       int cntr_idx, escr_idx;
-       u64 config_alias;
-       int pass;
-
-       bitmap_zero(used_mask, X86_PMC_IDX_MAX);
-       bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
-
-       for (i = 0, num = n; i < n; i++, num--) {
-
-               hwc = &cpuc->event_list[i]->hw;
-               thread = p4_ht_thread(cpu);
-               pass = 0;
-
-again:
-               /*
-                * It's possible to hit a circular lock
-                * between original and alternative events
-                * if both are scheduled already.
-                */
-               if (pass > 2)
-                       goto done;
-
-               bind = p4_config_get_bind(hwc->config);
-               escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
-               if (unlikely(escr_idx == -1))
-                       goto done;
-
-               if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
-                       cntr_idx = hwc->idx;
-                       if (assign)
-                               assign[i] = hwc->idx;
-                       goto reserve;
-               }
-
-               cntr_idx = p4_next_cntr(thread, used_mask, bind);
-               if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
-                       /*
-                        * Check whether an event alias is still available.
-                        */
-                       config_alias = p4_get_alias_event(hwc->config);
-                       if (!config_alias)
-                               goto done;
-                       hwc->config = config_alias;
-                       pass++;
-                       goto again;
-               }
-               /*
-                * Perf does test runs to see if a whole group can be assigned
-                * together succesfully.  There can be multiple rounds of this.
-                * Unfortunately, p4_pmu_swap_config_ts touches the hwc->config
-                * bits, such that the next round of group assignments will
-                * cause the above p4_should_swap_ts to pass instead of fail.
-                * This leads to counters exclusive to thread0 being used by
-                * thread1.
-                *
-                * Solve this with a cheap hack, reset the idx back to -1 to
-                * force a new lookup (p4_next_cntr) to get the right counter
-                * for the right thread.
-                *
-                * This probably doesn't comply with the general spirit of how
-                * perf wants to work, but P4 is special. :-(
-                */
-               if (p4_should_swap_ts(hwc->config, cpu))
-                       hwc->idx = -1;
-               p4_pmu_swap_config_ts(hwc, cpu);
-               if (assign)
-                       assign[i] = cntr_idx;
-reserve:
-               set_bit(cntr_idx, used_mask);
-               set_bit(escr_idx, escr_mask);
-       }
-
-done:
-       return num ? -EINVAL : 0;
-}
-
-PMU_FORMAT_ATTR(cccr, "config:0-31" );
-PMU_FORMAT_ATTR(escr, "config:32-62");
-PMU_FORMAT_ATTR(ht,   "config:63"   );
-
-static struct attribute *intel_p4_formats_attr[] = {
-       &format_attr_cccr.attr,
-       &format_attr_escr.attr,
-       &format_attr_ht.attr,
-       NULL,
-};
-
-static __initconst const struct x86_pmu p4_pmu = {
-       .name                   = "Netburst P4/Xeon",
-       .handle_irq             = p4_pmu_handle_irq,
-       .disable_all            = p4_pmu_disable_all,
-       .enable_all             = p4_pmu_enable_all,
-       .enable                 = p4_pmu_enable_event,
-       .disable                = p4_pmu_disable_event,
-       .eventsel               = MSR_P4_BPU_CCCR0,
-       .perfctr                = MSR_P4_BPU_PERFCTR0,
-       .event_map              = p4_pmu_event_map,
-       .max_events             = ARRAY_SIZE(p4_general_events),
-       .get_event_constraints  = x86_get_event_constraints,
-       /*
-        * IF HT disabled we may need to use all
-        * ARCH_P4_MAX_CCCR counters simulaneously
-        * though leave it restricted at moment assuming
-        * HT is on
-        */
-       .num_counters           = ARCH_P4_MAX_CCCR,
-       .apic                   = 1,
-       .cntval_bits            = ARCH_P4_CNTRVAL_BITS,
-       .cntval_mask            = ARCH_P4_CNTRVAL_MASK,
-       .max_period             = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
-       .hw_config              = p4_hw_config,
-       .schedule_events        = p4_pmu_schedule_events,
-       /*
-        * This handles erratum N15 in intel doc 249199-029,
-        * the counter may not be updated correctly on write
-        * so we need a second write operation to do the trick
-        * (the official workaround didn't work)
-        *
-        * the former idea is taken from OProfile code
-        */
-       .perfctr_second_write   = 1,
-
-       .format_attrs           = intel_p4_formats_attr,
-};
-
-__init int p4_pmu_init(void)
-{
-       unsigned int low, high;
-       int i, reg;
-
-       /* If we get stripped -- indexing fails */
-       BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
-
-       rdmsr(MSR_IA32_MISC_ENABLE, low, high);
-       if (!(low & (1 << 7))) {
-               pr_cont("unsupported Netburst CPU model %d ",
-                       boot_cpu_data.x86_model);
-               return -ENODEV;
-       }
-
-       memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
-               sizeof(hw_cache_event_ids));
-
-       pr_cont("Netburst events, ");
-
-       x86_pmu = p4_pmu;
-
-       /*
-        * Even though the counters are configured to interrupt a particular
-        * logical processor when an overflow happens, testing has shown that
-        * on kdump kernels (which uses a single cpu), thread1's counter
-        * continues to run and will report an NMI on thread0.  Due to the
-        * overflow bug, this leads to a stream of unknown NMIs.
-        *
-        * Solve this by zero'ing out the registers to mimic a reset.
-        */
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               reg = x86_pmu_config_addr(i);
-               wrmsrl_safe(reg, 0ULL);
-       }
-
-       return 0;
-}
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
deleted file mode 100644 (file)
index 7c1a0c0..0000000
+++ /dev/null
@@ -1,279 +0,0 @@
-#include <linux/perf_event.h>
-#include <linux/types.h>
-
-#include "perf_event.h"
-
-/*
- * Not sure about some of these
- */
-static const u64 p6_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]           = 0x0079,       /* CPU_CLK_UNHALTED */
-  [PERF_COUNT_HW_INSTRUCTIONS]         = 0x00c0,       /* INST_RETIRED     */
-  [PERF_COUNT_HW_CACHE_REFERENCES]     = 0x0f2e,       /* L2_RQSTS:M:E:S:I */
-  [PERF_COUNT_HW_CACHE_MISSES]         = 0x012e,       /* L2_RQSTS:I       */
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]  = 0x00c4,       /* BR_INST_RETIRED  */
-  [PERF_COUNT_HW_BRANCH_MISSES]                = 0x00c5,       /* BR_MISS_PRED_RETIRED */
-  [PERF_COUNT_HW_BUS_CYCLES]           = 0x0062,       /* BUS_DRDY_CLOCKS  */
-  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2,    /* RESOURCE_STALLS  */
-
-};
-
-static const u64 __initconst p6_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0043,  /* DATA_MEM_REFS       */
-                [ C(RESULT_MISS)   ] = 0x0045, /* DCU_LINES_IN        */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0x0f29,  /* L2_LD:M:E:S:I       */
-       },
-        [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-        },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0080,  /* IFU_IFETCH         */
-               [ C(RESULT_MISS)   ] = 0x0f28,  /* L2_IFETCH:M:E:S:I  */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0x0025,  /* L2_M_LINES_INM     */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0043,  /* DATA_MEM_REFS      */
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0080,  /* IFU_IFETCH         */
-               [ C(RESULT_MISS)   ] = 0x0085,  /* ITLB_MISS          */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4,  /* BR_INST_RETIRED      */
-               [ C(RESULT_MISS)   ] = 0x00c5,  /* BR_MISS_PRED_RETIRED */
-        },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-static u64 p6_pmu_event_map(int hw_event)
-{
-       return p6_perfmon_event_map[hw_event];
-}
-
-/*
- * Event setting that is specified not to count anything.
- * We use this to effectively disable a counter.
- *
- * L2_RQSTS with 0 MESI unit mask.
- */
-#define P6_NOP_EVENT                   0x0000002EULL
-
-static struct event_constraint p6_event_constraints[] =
-{
-       INTEL_EVENT_CONSTRAINT(0xc1, 0x1),      /* FLOPS */
-       INTEL_EVENT_CONSTRAINT(0x10, 0x1),      /* FP_COMP_OPS_EXE */
-       INTEL_EVENT_CONSTRAINT(0x11, 0x2),      /* FP_ASSIST */
-       INTEL_EVENT_CONSTRAINT(0x12, 0x2),      /* MUL */
-       INTEL_EVENT_CONSTRAINT(0x13, 0x2),      /* DIV */
-       INTEL_EVENT_CONSTRAINT(0x14, 0x1),      /* CYCLES_DIV_BUSY */
-       EVENT_CONSTRAINT_END
-};
-
-static void p6_pmu_disable_all(void)
-{
-       u64 val;
-
-       /* p6 only has one enable register */
-       rdmsrl(MSR_P6_EVNTSEL0, val);
-       val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-       wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static void p6_pmu_enable_all(int added)
-{
-       unsigned long val;
-
-       /* p6 only has one enable register */
-       rdmsrl(MSR_P6_EVNTSEL0, val);
-       val |= ARCH_PERFMON_EVENTSEL_ENABLE;
-       wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static inline void
-p6_pmu_disable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 val = P6_NOP_EVENT;
-
-       (void)wrmsrl_safe(hwc->config_base, val);
-}
-
-static void p6_pmu_enable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 val;
-
-       val = hwc->config;
-
-       /*
-        * p6 only has a global event enable, set on PerfEvtSel0
-        * We "disable" events by programming P6_NOP_EVENT
-        * and we rely on p6_pmu_enable_all() being called
-        * to actually enable the events.
-        */
-
-       (void)wrmsrl_safe(hwc->config_base, val);
-}
-
-PMU_FORMAT_ATTR(event, "config:0-7"    );
-PMU_FORMAT_ATTR(umask, "config:8-15"   );
-PMU_FORMAT_ATTR(edge,  "config:18"     );
-PMU_FORMAT_ATTR(pc,    "config:19"     );
-PMU_FORMAT_ATTR(inv,   "config:23"     );
-PMU_FORMAT_ATTR(cmask, "config:24-31"  );
-
-static struct attribute *intel_p6_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_pc.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask.attr,
-       NULL,
-};
-
-static __initconst const struct x86_pmu p6_pmu = {
-       .name                   = "p6",
-       .handle_irq             = x86_pmu_handle_irq,
-       .disable_all            = p6_pmu_disable_all,
-       .enable_all             = p6_pmu_enable_all,
-       .enable                 = p6_pmu_enable_event,
-       .disable                = p6_pmu_disable_event,
-       .hw_config              = x86_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_P6_EVNTSEL0,
-       .perfctr                = MSR_P6_PERFCTR0,
-       .event_map              = p6_pmu_event_map,
-       .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
-       .apic                   = 1,
-       .max_period             = (1ULL << 31) - 1,
-       .version                = 0,
-       .num_counters           = 2,
-       /*
-        * Events have 40 bits implemented. However they are designed such
-        * that bits [32-39] are sign extensions of bit 31. As such the
-        * effective width of a event for P6-like PMU is 32 bits only.
-        *
-        * See IA-32 Intel Architecture Software developer manual Vol 3B
-        */
-       .cntval_bits            = 32,
-       .cntval_mask            = (1ULL << 32) - 1,
-       .get_event_constraints  = x86_get_event_constraints,
-       .event_constraints      = p6_event_constraints,
-
-       .format_attrs           = intel_p6_formats_attr,
-       .events_sysfs_show      = intel_event_sysfs_show,
-
-};
-
-static __init void p6_pmu_rdpmc_quirk(void)
-{
-       if (boot_cpu_data.x86_mask < 9) {
-               /*
-                * PPro erratum 26; fixed in stepping 9 and above.
-                */
-               pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n");
-               x86_pmu.attr_rdpmc_broken = 1;
-               x86_pmu.attr_rdpmc = 0;
-       }
-}
-
-__init int p6_pmu_init(void)
-{
-       x86_pmu = p6_pmu;
-
-       switch (boot_cpu_data.x86_model) {
-       case  1: /* Pentium Pro */
-               x86_add_quirk(p6_pmu_rdpmc_quirk);
-               break;
-
-       case  3: /* Pentium II - Klamath */
-       case  5: /* Pentium II - Deschutes */
-       case  6: /* Pentium II - Mendocino */
-               break;
-
-       case  7: /* Pentium III - Katmai */
-       case  8: /* Pentium III - Coppermine */
-       case 10: /* Pentium III Xeon */
-       case 11: /* Pentium III - Tualatin */
-               break;
-
-       case  9: /* Pentium M - Banias */
-       case 13: /* Pentium M - Dothan */
-               break;
-
-       default:
-               pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
-               return -ENODEV;
-       }
-
-       memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
-               sizeof(hw_cache_event_ids));
-
-       return 0;
-}
index 819d94982e078b8597f084f8409fcd3106361293..f6f50c4ceaeceef16170d14d6d55376823bbd63f 100644 (file)
@@ -51,7 +51,7 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
        for (i = 0; i < SANITY_CHECK_LOOPS; i++) {
                if (!rdrand_long(&tmp)) {
                        clear_cpu_cap(c, X86_FEATURE_RDRAND);
-                       printk_once(KERN_WARNING "rdrand: disabled\n");
+                       pr_warn_once("rdrand: disabled\n");
                        return;
                }
        }
index 4c60eaf0571c2fb1a6d73258861398fc6bcc963e..cd531355e8386b4177d4dcecaf1031ba8c2086c8 100644 (file)
@@ -87,10 +87,10 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
        c->x86_max_cores = (core_level_siblings / smp_num_siblings);
 
        if (!printed) {
-               printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+               pr_info("CPU: Physical Processor ID: %d\n",
                       c->phys_proc_id);
                if (c->x86_max_cores > 1)
-                       printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+                       pr_info("CPU: Processor Core ID: %d\n",
                               c->cpu_core_id);
                printed = 1;
        }
index 252da7aceca67ff580e8189ed267b2ce44d83373..e3b4d18411751c50b02967d372d21687d8854307 100644 (file)
@@ -33,7 +33,7 @@ static void init_transmeta(struct cpuinfo_x86 *c)
        if (max >= 0x80860001) {
                cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
                if (cpu_rev != 0x02000000) {
-                       printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
+                       pr_info("CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
                                (cpu_rev >> 24) & 0xff,
                                (cpu_rev >> 16) & 0xff,
                                (cpu_rev >> 8) & 0xff,
@@ -44,10 +44,10 @@ static void init_transmeta(struct cpuinfo_x86 *c)
        if (max >= 0x80860002) {
                cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
                if (cpu_rev == 0x02000000) {
-                       printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
+                       pr_info("CPU: Processor revision %08X, %u MHz\n",
                                new_cpu_rev, cpu_freq);
                }
-               printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
+               pr_info("CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
                       (cms_rev1 >> 24) & 0xff,
                       (cms_rev1 >> 16) & 0xff,
                       (cms_rev1 >> 8) & 0xff,
@@ -76,7 +76,7 @@ static void init_transmeta(struct cpuinfo_x86 *c)
                      (void *)&cpu_info[56],
                      (void *)&cpu_info[60]);
                cpu_info[64] = '\0';
-               printk(KERN_INFO "CPU: %s\n", cpu_info);
+               pr_info("CPU: %s\n", cpu_info);
        }
 
        /* Unhide possibly hidden capability flags */
index 628a059a9a0663ef7cbdf07e4750c9e1e4f53006..364e5834689753fc7da34c6dc8a34cca22ab92db 100644 (file)
@@ -62,7 +62,7 @@ static unsigned long vmware_get_tsc_khz(void)
        tsc_hz = eax | (((uint64_t)ebx) << 32);
        do_div(tsc_hz, 1000);
        BUG_ON(tsc_hz >> 32);
-       printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
+       pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
                         (unsigned long) tsc_hz / 1000,
                         (unsigned long) tsc_hz % 1000);
 
@@ -84,8 +84,7 @@ static void __init vmware_platform_setup(void)
        if (ebx != UINT_MAX)
                x86_platform.calibrate_tsc = vmware_get_tsc_khz;
        else
-               printk(KERN_WARNING
-                      "Failed to get TSC freq from the hypervisor\n");
+               pr_warn("Failed to get TSC freq from the hypervisor\n");
 }
 
 /*
index 30ca7607cbbbbcae4793aa5c14d8f73bbd784d71..97340f2c437c64def7a83f75a8f9b0bc6a968451 100644 (file)
@@ -408,7 +408,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
        processor.cpuflag = CPU_ENABLED;
        processor.cpufeature = (boot_cpu_data.x86 << 8) |
            (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
-       processor.featureflag = boot_cpu_data.x86_capability[0];
+       processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
        processor.reserved[0] = 0;
        processor.reserved[1] = 0;
        for (i = 0; i < 2; i++) {
index 8a2cdd736fa4da82374fa9392e76b5716cc0f89a..04b132a767f116e8ff35efcbbc036e331a145a4b 100644 (file)
@@ -30,6 +30,7 @@
 #include <asm/nmi.h>
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
+#include <asm/cache.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/nmi.h>
@@ -69,7 +70,7 @@ struct nmi_stats {
 
 static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
 
-static int ignore_nmis;
+static int ignore_nmis __read_mostly;
 
 int unknown_nmi_panic;
 /*
index 24d57f77b3c19615840ac4f09c8c0fd299864698..3bf1e0b5f827ae43dca2b2c0c3c4040e56d15c56 100644 (file)
@@ -97,6 +97,14 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
 DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
+/* Logical package management. We might want to allocate that dynamically */
+static int *physical_to_logical_pkg __read_mostly;
+static unsigned long *physical_package_map __read_mostly;;
+static unsigned long *logical_package_map  __read_mostly;
+static unsigned int max_physical_pkg_id __read_mostly;
+unsigned int __max_logical_packages __read_mostly;
+EXPORT_SYMBOL(__max_logical_packages);
+
 static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 {
        unsigned long flags;
@@ -251,6 +259,97 @@ static void notrace start_secondary(void *unused)
        cpu_startup_entry(CPUHP_ONLINE);
 }
 
+int topology_update_package_map(unsigned int apicid, unsigned int cpu)
+{
+       unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits;
+
+       /* Called from early boot ? */
+       if (!physical_package_map)
+               return 0;
+
+       if (pkg >= max_physical_pkg_id)
+               return -EINVAL;
+
+       /* Set the logical package id */
+       if (test_and_set_bit(pkg, physical_package_map))
+               goto found;
+
+       if (pkg < __max_logical_packages) {
+               set_bit(pkg, logical_package_map);
+               physical_to_logical_pkg[pkg] = pkg;
+               goto found;
+       }
+       new = find_first_zero_bit(logical_package_map, __max_logical_packages);
+       if (new >= __max_logical_packages) {
+               physical_to_logical_pkg[pkg] = -1;
+               pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+                       apicid, pkg);
+               return -ENOSPC;
+       }
+       set_bit(new, logical_package_map);
+       pr_info("APIC(%x) Converting physical %u to logical package %u\n",
+               apicid, pkg, new);
+       physical_to_logical_pkg[pkg] = new;
+
+found:
+       cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+       return 0;
+}
+
+/**
+ * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ *
+ * Returns logical package id or -1 if not found
+ */
+int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+{
+       if (phys_pkg >= max_physical_pkg_id)
+               return -1;
+       return physical_to_logical_pkg[phys_pkg];
+}
+EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
+static void __init smp_init_package_map(void)
+{
+       unsigned int ncpus, cpu;
+       size_t size;
+
+       /*
+        * Today neither Intel nor AMD support heterogenous systems. That
+        * might change in the future....
+        */
+       ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings;
+       __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+
+       /*
+        * Possibly larger than what we need as the number of apic ids per
+        * package can be smaller than the actual used apic ids.
+        */
+       max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
+       size = max_physical_pkg_id * sizeof(unsigned int);
+       physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
+       memset(physical_to_logical_pkg, 0xff, size);
+       size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
+       physical_package_map = kzalloc(size, GFP_KERNEL);
+       size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
+       logical_package_map = kzalloc(size, GFP_KERNEL);
+
+       pr_info("Max logical packages: %u\n", __max_logical_packages);
+
+       for_each_present_cpu(cpu) {
+               unsigned int apicid = apic->cpu_present_to_apicid(cpu);
+
+               if (apicid == BAD_APICID || !apic->apic_id_valid(apicid))
+                       continue;
+               if (!topology_update_package_map(apicid, cpu))
+                       continue;
+               pr_warn("CPU %u APICId %x disabled\n", cpu, apicid);
+               per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID;
+               set_cpu_possible(cpu, false);
+               set_cpu_present(cpu, false);
+       }
+}
+
 void __init smp_store_boot_cpu_info(void)
 {
        int id = 0; /* CPU 0 */
@@ -258,6 +357,7 @@ void __init smp_store_boot_cpu_info(void)
 
        *c = boot_cpu_data;
        c->cpu_index = id;
+       smp_init_package_map();
 }
 
 /*
index f56cc418c87de8c783f3071bf598be3878b17acb..fd57d3ae7e16daf24f8cfdb4e708c5e4e9a9f3f9 100644 (file)
@@ -1529,7 +1529,7 @@ __init void lguest_init(void)
         */
        cpu_detect(&new_cpu_data);
        /* head.S usually sets up the first capability word, so do it here. */
-       new_cpu_data.x86_capability[0] = cpuid_edx(1);
+       new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
 
        /* Math is always hard! */
        set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
index d09e4c9d7cc5b4044c4421bde8692aaa6f8b21be..2c261082eadf82f693d062e7f600660fcd56817f 100644 (file)
@@ -1654,7 +1654,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
        cpu_detect(&new_cpu_data);
        set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
        new_cpu_data.wp_works_ok = 1;
-       new_cpu_data.x86_capability[0] = cpuid_edx(1);
+       new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
 #endif
 
        if (xen_start_info->mod_start) {
index 724a08740a04b4a255ca080c103ec3b7381bc4f7..9466354d3e4962f14cdae33b09378a73f9c51a5d 100644 (file)
@@ -11,7 +11,7 @@
 #include "pmu.h"
 
 /* x86_pmu.handle_irq definition */
-#include "../kernel/cpu/perf_event.h"
+#include "../events/perf_event.h"
 
 #define XENPMU_IRQ_PROCESSING    1
 struct xenpmu {
index f5c5a3fa2c8101cc37ea917d29dec30713ec0699..a9d8cab18b00ffe1cd91b71104c82ed6422392d9 100644 (file)
@@ -468,6 +468,7 @@ struct perf_event {
        int                             group_flags;
        struct perf_event               *group_leader;
        struct pmu                      *pmu;
+       void                            *pmu_private;
 
        enum perf_event_active_state    state;
        unsigned int                    attach_state;
index 614614821f00a02928439b068903904d81233808..b7231498de47f79787e0e46b95cf99fb93737b0a 100644 (file)
@@ -6785,7 +6785,7 @@ static void swevent_hlist_release(struct swevent_htable *swhash)
        kfree_rcu(hlist, rcu_head);
 }
 
-static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
+static void swevent_hlist_put_cpu(int cpu)
 {
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
@@ -6797,15 +6797,15 @@ static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
        mutex_unlock(&swhash->hlist_mutex);
 }
 
-static void swevent_hlist_put(struct perf_event *event)
+static void swevent_hlist_put(void)
 {
        int cpu;
 
        for_each_possible_cpu(cpu)
-               swevent_hlist_put_cpu(event, cpu);
+               swevent_hlist_put_cpu(cpu);
 }
 
-static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
+static int swevent_hlist_get_cpu(int cpu)
 {
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
        int err = 0;
@@ -6828,14 +6828,13 @@ exit:
        return err;
 }
 
-static int swevent_hlist_get(struct perf_event *event)
+static int swevent_hlist_get(void)
 {
-       int err;
-       int cpu, failed_cpu;
+       int err, cpu, failed_cpu;
 
        get_online_cpus();
        for_each_possible_cpu(cpu) {
-               err = swevent_hlist_get_cpu(event, cpu);
+               err = swevent_hlist_get_cpu(cpu);
                if (err) {
                        failed_cpu = cpu;
                        goto fail;
@@ -6848,7 +6847,7 @@ fail:
        for_each_possible_cpu(cpu) {
                if (cpu == failed_cpu)
                        break;
-               swevent_hlist_put_cpu(event, cpu);
+               swevent_hlist_put_cpu(cpu);
        }
 
        put_online_cpus();
@@ -6864,7 +6863,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
        WARN_ON(event->parent);
 
        static_key_slow_dec(&perf_swevent_enabled[event_id]);
-       swevent_hlist_put(event);
+       swevent_hlist_put();
 }
 
 static int perf_swevent_init(struct perf_event *event)
@@ -6895,7 +6894,7 @@ static int perf_swevent_init(struct perf_event *event)
        if (!event->parent) {
                int err;
 
-               err = swevent_hlist_get(event);
+               err = swevent_hlist_get();
                if (err)
                        return err;
 
@@ -8001,6 +8000,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
                }
        }
 
+       /* symmetric to unaccount_event() in _free_event() */
+       account_event(event);
+
        return event;
 
 err_per_task:
@@ -8364,8 +8366,6 @@ SYSCALL_DEFINE5(perf_event_open,
                }
        }
 
-       account_event(event);
-
        /*
         * Special case software events and allow them to be part of
         * any hardware group.
@@ -8662,8 +8662,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
        /* Mark owner so we could distinguish it from user events. */
        event->owner = TASK_TOMBSTONE;
 
-       account_event(event);
-
        ctx = find_get_context(event->pmu, task, event);
        if (IS_ERR(ctx)) {
                err = PTR_ERR(ctx);
@@ -9447,6 +9445,7 @@ ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(perf_event_sysfs_show);
 
 static int __init perf_event_sysfs_init(void)
 {
index c9956440d0e609c51e2032f6f625c4061ba764a4..21b81a41dae572a3263c7b731efa97a2d25872e9 100644 (file)
@@ -30,7 +30,7 @@
 struct trace_kprobe {
        struct list_head        list;
        struct kretprobe        rp;     /* Use rp.kp for kprobe use */
-       unsigned long           nhit;
+       unsigned long __percpu *nhit;
        const char              *symbol;        /* symbol name */
        struct trace_probe      tp;
 };
@@ -274,6 +274,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
        if (!tk)
                return ERR_PTR(ret);
 
+       tk->nhit = alloc_percpu(unsigned long);
+       if (!tk->nhit)
+               goto error;
+
        if (symbol) {
                tk->symbol = kstrdup(symbol, GFP_KERNEL);
                if (!tk->symbol)
@@ -313,6 +317,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
 error:
        kfree(tk->tp.call.name);
        kfree(tk->symbol);
+       free_percpu(tk->nhit);
        kfree(tk);
        return ERR_PTR(ret);
 }
@@ -327,6 +332,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
        kfree(tk->tp.call.class->system);
        kfree(tk->tp.call.name);
        kfree(tk->symbol);
+       free_percpu(tk->nhit);
        kfree(tk);
 }
 
@@ -874,9 +880,14 @@ static const struct file_operations kprobe_events_ops = {
 static int probes_profile_seq_show(struct seq_file *m, void *v)
 {
        struct trace_kprobe *tk = v;
+       unsigned long nhit = 0;
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               nhit += *per_cpu_ptr(tk->nhit, cpu);
 
        seq_printf(m, "  %-44s %15lu %15lu\n",
-                  trace_event_name(&tk->tp.call), tk->nhit,
+                  trace_event_name(&tk->tp.call), nhit,
                   tk->rp.kp.nmissed);
 
        return 0;
@@ -1225,7 +1236,7 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 {
        struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
 
-       tk->nhit++;
+       raw_cpu_inc(*tk->nhit);
 
        if (tk->tp.flags & TP_FLAG_TRACE)
                kprobe_trace_func(tk, regs);
@@ -1242,7 +1253,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
        struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
 
-       tk->nhit++;
+       raw_cpu_inc(*tk->nhit);
 
        if (tk->tp.flags & TP_FLAG_TRACE)
                kretprobe_trace_func(tk, ri, regs);
index 0655afbea83f596ded953d6017e0f25a640bc073..d1663083d903aaad3ce4b359cfdb20068004b237 100644 (file)
@@ -186,11 +186,11 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
 
 extern char *__bad_type_size(void);
 
-#define SYSCALL_FIELD(type, name)                                      \
-       sizeof(type) != sizeof(trace.name) ?                            \
+#define SYSCALL_FIELD(type, field, name)                               \
+       sizeof(type) != sizeof(trace.field) ?                           \
                __bad_type_size() :                                     \
-               #type, #name, offsetof(typeof(trace), name),            \
-               sizeof(trace.name), is_signed_type(type)
+               #type, #name, offsetof(typeof(trace), field),           \
+               sizeof(trace.field), is_signed_type(type)
 
 static int __init
 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
@@ -261,7 +261,8 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call)
        int i;
        int offset = offsetof(typeof(trace), args);
 
-       ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+       ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
+                                FILTER_OTHER);
        if (ret)
                return ret;
 
@@ -281,11 +282,12 @@ static int __init syscall_exit_define_fields(struct trace_event_call *call)
        struct syscall_trace_exit trace;
        int ret;
 
-       ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+       ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
+                                FILTER_OTHER);
        if (ret)
                return ret;
 
-       ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
+       ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
                                 FILTER_OTHER);
 
        return ret;
index 5a70f6196f577a071ae0a31e9da7fa0e1dd1bc68..81dedaab36ccfed3150281a0f915a4070f667bfd 100644 (file)
@@ -41,6 +41,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
                        break;
        return i;
 }
+EXPORT_SYMBOL(cpumask_any_but);
 
 /* These are not inline because of header tangles. */
 #ifdef CONFIG_CPUMASK_OFFSTACK
index 4a96473b180f938b038d5e698f3578876eb7c70c..ee566e8bd1cff56efde9200bf4a76caa840669f1 100644 (file)
@@ -85,7 +85,7 @@ $(OUTPUT)%.i: %.c FORCE
        $(call rule_mkdir)
        $(call if_changed_dep,cc_i_c)
 
-$(OUTPUT)%.i: %.S FORCE
+$(OUTPUT)%.s: %.S FORCE
        $(call rule_mkdir)
        $(call if_changed_dep,cc_i_c)
 
index 02db3cdff20ff7653c8ca0db21d28ef7b508a9eb..6b7707270aa3b19791c8b6248f90ecddeabc1fdd 100644 (file)
@@ -27,7 +27,7 @@ endef
 #   the rule that uses them - an example for that is the 'bionic'
 #   feature check. ]
 #
-FEATURE_TESTS ?=                       \
+FEATURE_TESTS_BASIC :=                 \
        backtrace                       \
        dwarf                           \
        fortify-source                  \
@@ -46,6 +46,7 @@ FEATURE_TESTS ?=                      \
        libpython                       \
        libpython-version               \
        libslang                        \
+       libcrypto                       \
        libunwind                       \
        pthread-attr-setaffinity-np     \
        stackprotector-all              \
@@ -56,6 +57,25 @@ FEATURE_TESTS ?=                     \
        get_cpuid                       \
        bpf
 
+# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
+# of all feature tests
+FEATURE_TESTS_EXTRA :=                 \
+       bionic                          \
+       compile-32                      \
+       compile-x32                     \
+       cplus-demangle                  \
+       hello                           \
+       libbabeltrace                   \
+       liberty                         \
+       liberty-z                       \
+       libunwind-debug-frame
+
+FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
+
+ifeq ($(FEATURE_TESTS),all)
+  FEATURE_TESTS := $(FEATURE_TESTS_BASIC) $(FEATURE_TESTS_EXTRA)
+endif
+
 FEATURE_DISPLAY ?=                     \
        dwarf                           \
        glibc                           \
@@ -68,6 +88,7 @@ FEATURE_DISPLAY ?=                    \
        libperl                         \
        libpython                       \
        libslang                        \
+       libcrypto                       \
        libunwind                       \
        libdw-dwarf-unwind              \
        zlib                            \
@@ -100,6 +121,14 @@ ifeq ($(feature-all), 1)
   # test-all.c passed - just set all the core feature flags to 1:
   #
   $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat)))
+  #
+  # test-all.c does not comprise these tests, so we need to
+  # for this case to get features proper values
+  #
+  $(call feature_check,compile-32)
+  $(call feature_check,compile-x32)
+  $(call feature_check,bionic)
+  $(call feature_check,libbabeltrace)
 else
   $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
 endif
index bf8f0352264dcc8a5a1286bc4add2ca9de481357..c5f4c417428d7099fbe4f487a179b0663f478611 100644 (file)
@@ -23,6 +23,7 @@ FILES=                                        \
        test-libpython.bin              \
        test-libpython-version.bin      \
        test-libslang.bin               \
+       test-libcrypto.bin              \
        test-libunwind.bin              \
        test-libunwind-debug-frame.bin  \
        test-pthread-attr-setaffinity-np.bin    \
@@ -105,6 +106,9 @@ $(OUTPUT)test-libaudit.bin:
 $(OUTPUT)test-libslang.bin:
        $(BUILD) -I/usr/include/slang -lslang
 
+$(OUTPUT)test-libcrypto.bin:
+       $(BUILD) -lcrypto
+
 $(OUTPUT)test-gtk2.bin:
        $(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
 
index 81025cade45fa9a3fcc6f8f0aacc8f403e6d9211..e499a36c1e4a9e21e9c355309b53a7dc5901664a 100644 (file)
 # include "test-bpf.c"
 #undef main
 
+#define main main_test_libcrypto
+# include "test-libcrypto.c"
+#undef main
+
 int main(int argc, char *argv[])
 {
        main_test_libpython();
@@ -158,6 +162,7 @@ int main(int argc, char *argv[])
        main_test_lzma();
        main_test_get_cpuid();
        main_test_bpf();
+       main_test_libcrypto();
 
        return 0;
 }
index 31dbf45bf99c5996ca0ee62cba186d912b08e666..c54e6551ae4c54cd6e9f418e6e8ff97280444efd 100644 (file)
@@ -1,4 +1,6 @@
+#include <stdio.h>
 int main(void)
 {
+       printf("Hello World!\n");
        return 0;
 }
diff --git a/tools/build/feature/test-libcrypto.c b/tools/build/feature/test-libcrypto.c
new file mode 100644 (file)
index 0000000..bd79dc7
--- /dev/null
@@ -0,0 +1,17 @@
+#include <openssl/sha.h>
+#include <openssl/md5.h>
+
+int main(void)
+{
+       MD5_CTX context;
+       unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH];
+       unsigned char dat[] = "12345";
+
+       MD5_Init(&context);
+       MD5_Update(&context, &dat[0], sizeof(dat));
+       MD5_Final(&md[0], &context);
+
+       SHA1(&dat[0], sizeof(dat), &md[0]);
+
+       return 0;
+}
index e8b8a23b9bf4cdac8ccfcf95690f8dfd93e1d6e6..954c644f7ad9e5ee0f54bdf16cf2b68766f1e2a5 100644 (file)
@@ -1,3 +1,4 @@
 libapi-y += fd/
 libapi-y += fs/
 libapi-y += cpu.o
+libapi-y += debug.o
index d85904dc9b38747dc0feda60f819cf93ceabbdcf..bbc82c614bee62eec8563c5b1dd9fd222eafa5de 100644 (file)
@@ -18,6 +18,7 @@ LIBFILE = $(OUTPUT)libapi.a
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
 CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+CFLAGS += -I$(srctree)/tools/lib/api
 
 RM = rm -f
 
diff --git a/tools/lib/api/debug-internal.h b/tools/lib/api/debug-internal.h
new file mode 100644 (file)
index 0000000..188f788
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef __API_DEBUG_INTERNAL_H__
+#define __API_DEBUG_INTERNAL_H__
+
+#include "debug.h"
+
+#define __pr(func, fmt, ...)   \
+do {                           \
+       if ((func))             \
+               (func)("libapi: " fmt, ##__VA_ARGS__); \
+} while (0)
+
+extern libapi_print_fn_t __pr_warning;
+extern libapi_print_fn_t __pr_info;
+extern libapi_print_fn_t __pr_debug;
+
+#define pr_warning(fmt, ...)   __pr(__pr_warning, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...)      __pr(__pr_info, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...)     __pr(__pr_debug, fmt, ##__VA_ARGS__)
+
+#endif /* __API_DEBUG_INTERNAL_H__ */
diff --git a/tools/lib/api/debug.c b/tools/lib/api/debug.c
new file mode 100644 (file)
index 0000000..5fa5cf5
--- /dev/null
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include "debug.h"
+#include "debug-internal.h"
+
+static int __base_pr(const char *format, ...)
+{
+       va_list args;
+       int err;
+
+       va_start(args, format);
+       err = vfprintf(stderr, format, args);
+       va_end(args);
+       return err;
+}
+
+libapi_print_fn_t __pr_warning = __base_pr;
+libapi_print_fn_t __pr_info    = __base_pr;
+libapi_print_fn_t __pr_debug;
+
+void libapi_set_print(libapi_print_fn_t warn,
+                     libapi_print_fn_t info,
+                     libapi_print_fn_t debug)
+{
+       __pr_warning = warn;
+       __pr_info    = info;
+       __pr_debug   = debug;
+}
diff --git a/tools/lib/api/debug.h b/tools/lib/api/debug.h
new file mode 100644 (file)
index 0000000..a0872f6
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef __API_DEBUG_H__
+#define __API_DEBUG_H__
+
+typedef int (*libapi_print_fn_t)(const char *, ...);
+
+void libapi_set_print(libapi_print_fn_t warn,
+                     libapi_print_fn_t info,
+                     libapi_print_fn_t debug);
+
+#endif /* __API_DEBUG_H__ */
index 459599d1b6c410b7b41333c13f248ee685c2dd4a..ef78c22ff44d4142f01a1f5aea3bbe4e15fd0101 100644 (file)
@@ -13,6 +13,7 @@
 #include <sys/mount.h>
 
 #include "fs.h"
+#include "debug-internal.h"
 
 #define _STR(x) #x
 #define STR(x) _STR(x)
@@ -300,6 +301,56 @@ int filename__read_ull(const char *filename, unsigned long long *value)
        return err;
 }
 
+#define STRERR_BUFSIZE  128     /* For the buffer size of strerror_r */
+
+int filename__read_str(const char *filename, char **buf, size_t *sizep)
+{
+       size_t size = 0, alloc_size = 0;
+       void *bf = NULL, *nbf;
+       int fd, n, err = 0;
+       char sbuf[STRERR_BUFSIZE];
+
+       fd = open(filename, O_RDONLY);
+       if (fd < 0)
+               return -errno;
+
+       do {
+               if (size == alloc_size) {
+                       alloc_size += BUFSIZ;
+                       nbf = realloc(bf, alloc_size);
+                       if (!nbf) {
+                               err = -ENOMEM;
+                               break;
+                       }
+
+                       bf = nbf;
+               }
+
+               n = read(fd, bf + size, alloc_size - size);
+               if (n < 0) {
+                       if (size) {
+                               pr_warning("read failed %d: %s\n", errno,
+                                        strerror_r(errno, sbuf, sizeof(sbuf)));
+                               err = 0;
+                       } else
+                               err = -errno;
+
+                       break;
+               }
+
+               size += n;
+       } while (n > 0);
+
+       if (!err) {
+               *sizep = size;
+               *buf   = bf;
+       } else
+               free(bf);
+
+       close(fd);
+       return err;
+}
+
 int sysfs__read_ull(const char *entry, unsigned long long *value)
 {
        char path[PATH_MAX];
@@ -326,6 +377,19 @@ int sysfs__read_int(const char *entry, int *value)
        return filename__read_int(path, value);
 }
 
+int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
+{
+       char path[PATH_MAX];
+       const char *sysfs = sysfs__mountpoint();
+
+       if (!sysfs)
+               return -1;
+
+       snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+
+       return filename__read_str(path, buf, sizep);
+}
+
 int sysctl__read_int(const char *sysctl, int *value)
 {
        char path[PATH_MAX];
index d024a7f682f69b27a4bba1aed7b8f1894d07b50f..9f6598098dc5804a5bf660013ad5b07c499b6169 100644 (file)
@@ -2,6 +2,7 @@
 #define __API_FS__
 
 #include <stdbool.h>
+#include <unistd.h>
 
 /*
  * On most systems <limits.h> would have given us this, but  not on some systems
@@ -26,8 +27,10 @@ FS(tracefs)
 
 int filename__read_int(const char *filename, int *value);
 int filename__read_ull(const char *filename, unsigned long long *value);
+int filename__read_str(const char *filename, char **buf, size_t *sizep);
 
 int sysctl__read_int(const char *sysctl, int *value);
 int sysfs__read_int(const char *entry, int *value);
 int sysfs__read_ull(const char *entry, unsigned long long *value);
+int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
 #endif /* __API_FS__ */
index 8334a5a9d5d7f55e97144e1d7462c52deec02e02..7e543c3102d4118a09717bb6fb4c0f779532ae91 100644 (file)
@@ -201,6 +201,7 @@ struct bpf_object {
                        Elf_Data *data;
                } *reloc;
                int nr_reloc;
+               int maps_shndx;
        } efile;
        /*
         * All loaded bpf_object is linked in a list, which is
@@ -350,6 +351,7 @@ static struct bpf_object *bpf_object__new(const char *path,
         */
        obj->efile.obj_buf = obj_buf;
        obj->efile.obj_buf_sz = obj_buf_sz;
+       obj->efile.maps_shndx = -1;
 
        obj->loaded = false;
 
@@ -529,12 +531,12 @@ bpf_object__init_maps(struct bpf_object *obj, void *data,
 }
 
 static int
-bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
+bpf_object__init_maps_name(struct bpf_object *obj)
 {
        int i;
        Elf_Data *symbols = obj->efile.symbols;
 
-       if (!symbols || maps_shndx < 0)
+       if (!symbols || obj->efile.maps_shndx < 0)
                return -EINVAL;
 
        for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
@@ -544,7 +546,7 @@ bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
 
                if (!gelf_getsym(symbols, i, &sym))
                        continue;
-               if (sym.st_shndx != maps_shndx)
+               if (sym.st_shndx != obj->efile.maps_shndx)
                        continue;
 
                map_name = elf_strptr(obj->efile.elf,
@@ -572,7 +574,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
        Elf *elf = obj->efile.elf;
        GElf_Ehdr *ep = &obj->efile.ehdr;
        Elf_Scn *scn = NULL;
-       int idx = 0, err = 0, maps_shndx = -1;
+       int idx = 0, err = 0;
 
        /* Elf is corrupted/truncated, avoid calling elf_strptr. */
        if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
@@ -625,7 +627,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
                else if (strcmp(name, "maps") == 0) {
                        err = bpf_object__init_maps(obj, data->d_buf,
                                                    data->d_size);
-                       maps_shndx = idx;
+                       obj->efile.maps_shndx = idx;
                } else if (sh.sh_type == SHT_SYMTAB) {
                        if (obj->efile.symbols) {
                                pr_warning("bpf: multiple SYMTAB in %s\n",
@@ -674,8 +676,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
                pr_warning("Corrupted ELF file: index of strtab invalid\n");
                return LIBBPF_ERRNO__FORMAT;
        }
-       if (maps_shndx >= 0)
-               err = bpf_object__init_maps_name(obj, maps_shndx);
+       if (obj->efile.maps_shndx >= 0)
+               err = bpf_object__init_maps_name(obj);
 out:
        return err;
 }
@@ -697,7 +699,8 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
 static int
 bpf_program__collect_reloc(struct bpf_program *prog,
                           size_t nr_maps, GElf_Shdr *shdr,
-                          Elf_Data *data, Elf_Data *symbols)
+                          Elf_Data *data, Elf_Data *symbols,
+                          int maps_shndx)
 {
        int i, nrels;
 
@@ -724,9 +727,6 @@ bpf_program__collect_reloc(struct bpf_program *prog,
                        return -LIBBPF_ERRNO__FORMAT;
                }
 
-               insn_idx = rel.r_offset / sizeof(struct bpf_insn);
-               pr_debug("relocation: insn_idx=%u\n", insn_idx);
-
                if (!gelf_getsym(symbols,
                                 GELF_R_SYM(rel.r_info),
                                 &sym)) {
@@ -735,6 +735,15 @@ bpf_program__collect_reloc(struct bpf_program *prog,
                        return -LIBBPF_ERRNO__FORMAT;
                }
 
+               if (sym.st_shndx != maps_shndx) {
+                       pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
+                                  prog->section_name, sym.st_shndx);
+                       return -LIBBPF_ERRNO__RELOC;
+               }
+
+               insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+               pr_debug("relocation: insn_idx=%u\n", insn_idx);
+
                if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
                        pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
                                   insn_idx, insns[insn_idx].code);
@@ -863,7 +872,8 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 
                err = bpf_program__collect_reloc(prog, nr_maps,
                                                 shdr, data,
-                                                obj->efile.symbols);
+                                                obj->efile.symbols,
+                                                obj->efile.maps_shndx);
                if (err)
                        return err;
        }
index c3bd294a63d1f7e4a75b2a950bbe398f33703cd6..190cc886ab9105ea6be58ad58caf8e940274c4a9 100644 (file)
@@ -1951,6 +1951,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
                   strcmp(token, "*") == 0 ||
                   strcmp(token, "^") == 0 ||
                   strcmp(token, "/") == 0 ||
+                  strcmp(token, "%") == 0 ||
                   strcmp(token, "<") == 0 ||
                   strcmp(token, ">") == 0 ||
                   strcmp(token, "<=") == 0 ||
@@ -2397,6 +2398,12 @@ static int arg_num_eval(struct print_arg *arg, long long *val)
                                break;
                        *val = left + right;
                        break;
+               case '~':
+                       ret = arg_num_eval(arg->op.right, &right);
+                       if (!ret)
+                               break;
+                       *val = ~right;
+                       break;
                default:
                        do_warning("unknown op '%s'", arg->op.op);
                        ret = 0;
@@ -2634,6 +2641,7 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok)
 
 free_field:
        free_arg(arg->hex.field);
+       arg->hex.field = NULL;
 out:
        *tok = NULL;
        return EVENT_ERROR;
@@ -2658,8 +2666,10 @@ process_int_array(struct event_format *event, struct print_arg *arg, char **tok)
 
 free_size:
        free_arg(arg->int_array.count);
+       arg->int_array.count = NULL;
 free_field:
        free_arg(arg->int_array.field);
+       arg->int_array.field = NULL;
 out:
        *tok = NULL;
        return EVENT_ERROR;
@@ -3689,6 +3699,9 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
                case '/':
                        val = left / right;
                        break;
+               case '%':
+                       val = left % right;
+                       break;
                case '*':
                        val = left * right;
                        break;
@@ -4971,7 +4984,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
                                                break;
                                        }
                                }
-                               if (pevent->long_size == 8 && ls &&
+                               if (pevent->long_size == 8 && ls == 1 &&
                                    sizeof(long) != 8) {
                                        char *p;
 
@@ -5335,41 +5348,45 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
        return false;
 }
 
-void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
-                       struct pevent_record *record, bool use_trace_clock)
+/**
+ * pevent_find_event_by_record - return the event from a given record
+ * @pevent: a handle to the pevent
+ * @record: The record to get the event from
+ *
+ * Returns the associated event for a given record, or NULL if non is
+ * is found.
+ */
+struct event_format *
+pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record)
 {
-       static const char *spaces = "                    "; /* 20 spaces */
-       struct event_format *event;
-       unsigned long secs;
-       unsigned long usecs;
-       unsigned long nsecs;
-       const char *comm;
-       void *data = record->data;
        int type;
-       int pid;
-       int len;
-       int p;
-       bool use_usec_format;
-
-       use_usec_format = is_timestamp_in_us(pevent->trace_clock,
-                                                       use_trace_clock);
-       if (use_usec_format) {
-               secs = record->ts / NSECS_PER_SEC;
-               nsecs = record->ts - secs * NSECS_PER_SEC;
-       }
 
        if (record->size < 0) {
                do_warning("ug! negative record size %d", record->size);
-               return;
+               return NULL;
        }
 
-       type = trace_parse_common_type(pevent, data);
+       type = trace_parse_common_type(pevent, record->data);
 
-       event = pevent_find_event(pevent, type);
-       if (!event) {
-               do_warning("ug! no event found for type %d", type);
-               return;
-       }
+       return pevent_find_event(pevent, type);
+}
+
+/**
+ * pevent_print_event_task - Write the event task comm, pid and CPU
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the tasks comm, pid and CPU to @s.
+ */
+void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
+                            struct event_format *event,
+                            struct pevent_record *record)
+{
+       void *data = record->data;
+       const char *comm;
+       int pid;
 
        pid = parse_common_pid(pevent, data);
        comm = find_cmdline(pevent, pid);
@@ -5377,9 +5394,43 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
        if (pevent->latency_format) {
                trace_seq_printf(s, "%8.8s-%-5d %3d",
                       comm, pid, record->cpu);
-               pevent_data_lat_fmt(pevent, s, record);
        } else
                trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
+}
+
+/**
+ * pevent_print_event_time - Write the event timestamp
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ * @use_trace_clock: Set to parse according to the @pevent->trace_clock
+ *
+ * Writes the timestamp of the record into @s.
+ */
+void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
+                            struct event_format *event,
+                            struct pevent_record *record,
+                            bool use_trace_clock)
+{
+       unsigned long secs;
+       unsigned long usecs;
+       unsigned long nsecs;
+       int p;
+       bool use_usec_format;
+
+       use_usec_format = is_timestamp_in_us(pevent->trace_clock,
+                                                       use_trace_clock);
+       if (use_usec_format) {
+               secs = record->ts / NSECS_PER_SEC;
+               nsecs = record->ts - secs * NSECS_PER_SEC;
+       }
+
+       if (pevent->latency_format) {
+               trace_seq_printf(s, " %3d", record->cpu);
+               pevent_data_lat_fmt(pevent, s, record);
+       } else
+               trace_seq_printf(s, " [%03d]", record->cpu);
 
        if (use_usec_format) {
                if (pevent->flags & PEVENT_NSEC_OUTPUT) {
@@ -5387,14 +5438,36 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
                        p = 9;
                } else {
                        usecs = (nsecs + 500) / NSECS_PER_USEC;
+                       /* To avoid usecs larger than 1 sec */
+                       if (usecs >= 1000000) {
+                               usecs -= 1000000;
+                               secs++;
+                       }
                        p = 6;
                }
 
-               trace_seq_printf(s, " %5lu.%0*lu: %s: ",
-                                       secs, p, usecs, event->name);
+               trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs);
        } else
-               trace_seq_printf(s, " %12llu: %s: ",
-                                       record->ts, event->name);
+               trace_seq_printf(s, " %12llu:", record->ts);
+}
+
+/**
+ * pevent_print_event_data - Write the event data section
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the parsing of the record's data to @s.
+ */
+void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
+                            struct event_format *event,
+                            struct pevent_record *record)
+{
+       static const char *spaces = "                    "; /* 20 spaces */
+       int len;
+
+       trace_seq_printf(s, " %s: ", event->name);
 
        /* Space out the event names evenly. */
        len = strlen(event->name);
@@ -5404,6 +5477,23 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
        pevent_event_info(s, event, record);
 }
 
+void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
+                       struct pevent_record *record, bool use_trace_clock)
+{
+       struct event_format *event;
+
+       event = pevent_find_event_by_record(pevent, record);
+       if (!event) {
+               do_warning("ug! no event found for type %d",
+                          trace_parse_common_type(pevent, record->data));
+               return;
+       }
+
+       pevent_print_event_task(pevent, s, event, record);
+       pevent_print_event_time(pevent, s, event, record, use_trace_clock);
+       pevent_print_event_data(pevent, s, event, record);
+}
+
 static int events_id_cmp(const void *a, const void *b)
 {
        struct event_format * const * ea = a;
index 706d9bc24066cf308ba17d718eafd95c4ff5550c..9ffde377e89d912ffb2127fe454872346759738b 100644 (file)
@@ -628,6 +628,16 @@ int pevent_register_print_string(struct pevent *pevent, const char *fmt,
                                 unsigned long long addr);
 int pevent_pid_is_registered(struct pevent *pevent, int pid);
 
+void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
+                            struct event_format *event,
+                            struct pevent_record *record);
+void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
+                            struct event_format *event,
+                            struct pevent_record *record,
+                            bool use_trace_clock);
+void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
+                            struct event_format *event,
+                            struct pevent_record *record);
 void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
                        struct pevent_record *record, bool use_trace_clock);
 
@@ -694,6 +704,9 @@ struct event_format *pevent_find_event(struct pevent *pevent, int id);
 struct event_format *
 pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name);
 
+struct event_format *
+pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record);
+
 void pevent_data_lat_fmt(struct pevent *pevent,
                         struct trace_seq *s, struct pevent_record *record);
 int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);
index b9ca1e304158da86f36dafad33e7de0a52e01567..15949e2a7805cc6e7d848ddbf2f99b249e332f3b 100644 (file)
@@ -8,7 +8,7 @@ perf-config - Get and set variables in a configuration file.
 SYNOPSIS
 --------
 [verse]
-'perf config' -l | --list
+'perf config' [<file-option>] -l | --list
 
 DESCRIPTION
 -----------
@@ -21,6 +21,14 @@ OPTIONS
 --list::
        Show current config variables, name and value, for all sections.
 
+--user::
+       For writing and reading options: write to user
+       '$HOME/.perfconfig' file or read it.
+
+--system::
+       For writing and reading options: write to system-wide
+       '$(sysconfdir)/perfconfig' or read it.
+
 CONFIGURATION FILE
 ------------------
 
@@ -30,6 +38,10 @@ The '$HOME/.perfconfig' file is used to store a per-user configuration.
 The file '$(sysconfdir)/perfconfig' can be used to
 store a system-wide default configuration.
 
+When reading or writing, the values are read from the system and user
+configuration files by default, and options '--system' and '--user'
+can be used to tell the command to read from or write to only that location.
+
 Syntax
 ~~~~~~
 
@@ -62,7 +74,7 @@ Given a $HOME/.perfconfig like this:
                medium = green, default
                normal = lightgray, default
                selected = white, lightgray
-               code = blue, default
+               jump_arrows = blue, default
                addr = magenta, default
                root = white, blue
 
@@ -98,6 +110,347 @@ Given a $HOME/.perfconfig like this:
                order = caller
                sort-key = function
 
+Variables
+~~~~~~~~~
+
+colors.*::
+       The variables for customizing the colors used in the output for the
+       'report', 'top' and 'annotate' in the TUI. They should specify the
+       foreground and background colors, separated by a comma, for example:
+
+               medium = green, lightgray
+
+       If you want to use the color configured for you terminal, just leave it
+       as 'default', for example:
+
+               medium = default, lightgray
+
+       Available colors:
+       red, yellow, green, cyan, gray, black, blue,
+       white, default, magenta, lightgray
+
+       colors.top::
+               'top' means a overhead percentage which is more than 5%.
+               And values of this variable specify percentage colors.
+               Basic key values are foreground-color 'red' and
+               background-color 'default'.
+       colors.medium::
+               'medium' means a overhead percentage which has more than 0.5%.
+               Default values are 'green' and 'default'.
+       colors.normal::
+               'normal' means the rest of overhead percentages
+               except 'top', 'medium', 'selected'.
+               Default values are 'lightgray' and 'default'.
+       colors.selected::
+               This selects the colors for the current entry in a list of entries
+               from sub-commands (top, report, annotate).
+               Default values are 'black' and 'lightgray'.
+       colors.jump_arrows::
+               Colors for jump arrows on assembly code listings
+               such as 'jns', 'jmp', 'jane', etc.
+               Default values are 'blue', 'default'.
+       colors.addr::
+               This selects colors for addresses from 'annotate'.
+               Default values are 'magenta', 'default'.
+       colors.root::
+               Colors for headers in the output of a sub-commands (top, report).
+               Default values are 'white', 'blue'.
+
+tui.*, gtk.*::
+       Subcommands that can be configured here are 'top', 'report' and 'annotate'.
+       These values are booleans, for example:
+
+       [tui]
+               top = true
+
+       will make the TUI be the default for the 'top' subcommand. Those will be
+       available if the required libs were detected at tool build time.
+
+buildid.*::
+       buildid.dir::
+               Each executable and shared library in modern distributions comes with a
+               content based identifier that, if available, will be inserted in a
+               'perf.data' file header to, at analysis time find what is needed to do
+               symbol resolution, code annotation, etc.
+
+               The recording tools also stores a hard link or copy in a per-user
+               directory, $HOME/.debug/, of binaries, shared libraries, /proc/kallsyms
+               and /proc/kcore files to be used at analysis time.
+
+               The buildid.dir variable can be used to either change this directory
+               cache location, or to disable it altogether. If you want to disable it,
+               set buildid.dir to /dev/null. The default is $HOME/.debug
+
+annotate.*::
+       These options work only for TUI.
+       These are in control of addresses, jump function, source code
+       in lines of assembly code from a specific program.
+
+       annotate.hide_src_code::
+               If a program which is analyzed has source code,
+               this option lets 'annotate' print a list of assembly code with the source code.
+               For example, let's see a part of a program. There're four lines.
+               If this option is 'true', they can be printed
+               without source code from a program as below.
+
+               â”‚        push   %rbp
+               â”‚        mov    %rsp,%rbp
+               â”‚        sub    $0x10,%rsp
+               â”‚        mov    (%rdi),%rdx
+
+               But if this option is 'false', source code of the part
+               can be also printed as below. Default is 'false'.
+
+               â”‚      struct rb_node *rb_next(const struct rb_node *node)
+               â”‚      {
+               â”‚        push   %rbp
+               â”‚        mov    %rsp,%rbp
+               â”‚        sub    $0x10,%rsp
+               â”‚              struct rb_node *parent;
+               â”‚
+               â”‚              if (RB_EMPTY_NODE(node))
+               â”‚        mov    (%rdi),%rdx
+               â”‚              return n;
+
+        annotate.use_offset::
+               Basing on a first address of a loaded function, offset can be used.
+               Instead of using original addresses of assembly code,
+               addresses subtracted from a base address can be printed.
+               Let's illustrate an example.
+               If a base address is 0XFFFFFFFF81624d50 as below,
+
+               ffffffff81624d50 <load0>
+
+               an address on assembly code has a specific absolute address as below
+
+               ffffffff816250b8:│  mov    0x8(%r14),%rdi
+
+               but if use_offset is 'true', an address subtracted from a base address is printed.
+               Default is true. This option is only applied to TUI.
+
+                            368:│  mov    0x8(%r14),%rdi
+
+       annotate.jump_arrows::
+               There can be jump instruction among assembly code.
+               Depending on a boolean value of jump_arrows,
+               arrows can be printed or not which represent
+               where do the instruction jump into as below.
+
+               â”‚     â”Œâ”€â”€jmp    1333
+               â”‚     â”‚  xchg   %ax,%ax
+               â”‚1330:│  mov    %r15,%r10
+               â”‚1333:└─→cmp    %r15,%r14
+
+               If jump_arrow is 'false', the arrows isn't printed as below.
+               Default is 'false'.
+
+               â”‚      â†“ jmp    1333
+               â”‚        xchg   %ax,%ax
+               â”‚1330:   mov    %r15,%r10
+               â”‚1333:   cmp    %r15,%r14
+
+        annotate.show_linenr::
+               When showing source code if this option is 'true',
+               line numbers are printed as below.
+
+               â”‚1628         if (type & PERF_SAMPLE_IDENTIFIER) {
+               â”‚     â†“ jne    508
+               â”‚1628                 data->id = *array;
+               â”‚1629                 array++;
+               â”‚1630         }
+
+               However if this option is 'false', they aren't printed as below.
+               Default is 'false'.
+
+               â”‚             if (type & PERF_SAMPLE_IDENTIFIER) {
+               â”‚     â†“ jne    508
+               â”‚                     data->id = *array;
+               â”‚                     array++;
+               â”‚             }
+
+        annotate.show_nr_jumps::
+               Let's see a part of assembly code.
+
+               â”‚1382:   movb   $0x1,-0x270(%rbp)
+
+               If use this, the number of branches jumping to that address can be printed as below.
+               Default is 'false'.
+
+               â”‚1 1382:   movb   $0x1,-0x270(%rbp)
+
+        annotate.show_total_period::
+               To compare two records on an instruction base, with this option
+               provided, display total number of samples that belong to a line
+               in assembly code. If this option is 'true', total periods are printed
+               instead of percent values as below.
+
+                 302 â”‚      mov    %eax,%eax
+
+               But if this option is 'false', percent values for overhead are printed i.e.
+               Default is 'false'.
+
+               99.93 â”‚      mov    %eax,%eax
+
+hist.*::
+       hist.percentage::
+               This option control the way to calculate overhead of filtered entries -
+               that means the value of this option is effective only if there's a
+               filter (by comm, dso or symbol name). Suppose a following example:
+
+                      Overhead  Symbols
+                      ........  .......
+                       33.33%     foo
+                       33.33%     bar
+                       33.33%     baz
+
+              This is an original overhead and we'll filter out the first 'foo'
+              entry. The value of 'relative' would increase the overhead of 'bar'
+              and 'baz' to 50.00% for each, while 'absolute' would show their
+              current overhead (33.33%).
+
+ui.*::
+       ui.show-headers::
+               This option controls display of column headers (like 'Overhead' and 'Symbol')
+               in 'report' and 'top'. If this option is false, they are hidden.
+               This option is only applied to TUI.
+
+call-graph.*::
+       When sub-commands 'top' and 'report' work with -g/—-children
+       there're options in control of call-graph.
+
+       call-graph.record-mode::
+               The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
+               The value of 'dwarf' is effective only if perf detect needed library
+               (libunwind or a recent version of libdw).
+               'lbr' only work for cpus that support it.
+
+       call-graph.dump-size::
+               The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
+               When using dwarf into record-mode, the default size will be used if omitted.
+
+       call-graph.print-type::
+               The print-types can be graph (graph absolute), fractal (graph relative),
+               flat and folded. This option controls a way to show overhead for each callchain
+               entry. Suppose a following example.
+
+                Overhead  Symbols
+                ........  .......
+                  40.00%  foo
+                          |
+                          ---foo
+                             |
+                             |--50.00%--bar
+                             |          main
+                             |
+                              --50.00%--baz
+                                        main
+
+               This output is a 'fractal' format. The 'foo' came from 'bar' and 'baz' exactly
+               half and half so 'fractal' shows 50.00% for each
+               (meaning that it assumes 100% total overhead of 'foo').
+
+               The 'graph' uses absolute overhead value of 'foo' as total so each of
+               'bar' and 'baz' callchain will have 20.00% of overhead.
+               If 'flat' is used, single column and linear exposure of call chains.
+               'folded' mean call chains are displayed in a line, separated by semicolons.
+
+       call-graph.order::
+               This option controls print order of callchains. The default is
+               'callee' which means callee is printed at top and then followed by its
+               caller and so on. The 'caller' prints it in reverse order.
+
+               If this option is not set and report.children or top.children is
+               set to true (or the equivalent command line option is given),
+               the default value of this option is changed to 'caller' for the
+               execution of 'perf report' or 'perf top'. Other commands will
+               still default to 'callee'.
+
+       call-graph.sort-key::
+               The callchains are merged if they contain same information.
+               The sort-key option determines a way to compare the callchains.
+               A value of 'sort-key' can be 'function' or 'address'.
+               The default is 'function'.
+
+       call-graph.threshold::
+               When there're many callchains it'd print tons of lines. So perf omits
+               small callchains under a certain overhead (threshold) and this option
+               control the threshold. Default is 0.5 (%). The overhead is calculated
+               by value depends on call-graph.print-type.
+
+       call-graph.print-limit::
+               This is a maximum number of lines of callchain printed for a single
+               histogram entry. Default is 0 which means no limitation.
+
+report.*::
+       report.percent-limit::
+               This one is mostly the same as call-graph.threshold but works for
+               histogram entries. Entries having an overhead lower than this
+               percentage will not be printed. Default is '0'. If percent-limit
+               is '10', only entries which have more than 10% of overhead will be
+               printed.
+
+       report.queue-size::
+               This option sets up the maximum allocation size of the internal
+               event queue for ordering events. Default is 0, meaning no limit.
+
+       report.children::
+               'Children' means functions called from another function.
+               If this option is true, 'perf report' cumulates callchains of children
+               and show (accumulated) total overhead as well as 'Self' overhead.
+               Please refer to the 'perf report' manual. The default is 'true'.
+
+       report.group::
+               This option is to show event group information together.
+               Example output with this turned on, notice that there is one column
+               per event in the group, ref-cycles and cycles:
+
+               # group: {ref-cycles,cycles}
+               # ========
+               #
+               # Samples: 7K of event 'anon group { ref-cycles, cycles }'
+               # Event count (approx.): 6876107743
+               #
+               #         Overhead  Command      Shared Object               Symbol
+               # ................  .......  .................  ...................
+               #
+                   99.84%  99.76%  noploop  noploop            [.] main
+                    0.07%   0.00%  noploop  ld-2.15.so         [.] strcmp
+                    0.03%   0.00%  noploop  [kernel.kallsyms]  [k] timerqueue_del
+
+top.*::
+       top.children::
+               Same as 'report.children'. So if it is enabled, the output of 'top'
+               command will have 'Children' overhead column as well as 'Self' overhead
+               column by default.
+               The default is 'true'.
+
+man.*::
+       man.viewer::
+               This option can assign a tool to view manual pages when 'help'
+               subcommand was invoked. Supported tools are 'man', 'woman'
+               (with emacs client) and 'konqueror'. Default is 'man'.
+
+               New man viewer tool can be also added using 'man.<tool>.cmd'
+               or use different path using 'man.<tool>.path' config option.
+
+pager.*::
+       pager.<subcommand>::
+               When the subcommand is run on stdio, determine whether it uses
+               pager or not based on this value. Default is 'unspecified'.
+
+kmem.*::
+       kmem.default::
+               This option decides which allocator is to be analyzed if neither
+               '--slab' nor '--page' option is used. Default is 'slab'.
+
+record.*::
+       record.build-id::
+               This option can be 'cache', 'no-cache' or 'skip'.
+               'cache' is to post-process data and save/update the binaries into
+               the build-id cache (in ~/.debug). This is the default.
+               But if this option is 'no-cache', it will not update the build-id cache.
+               'skip' skips post-processing and does not update the cache.
+
 SEE ALSO
 --------
 linkperf:perf[1]
index 0b1cedeef895369c9c60aa0a1e6cbb718c65c509..87b2588d1cbdee6060a304be4f79015cb5889171 100644 (file)
@@ -53,6 +53,13 @@ include::itrace.txt[]
 --strip::
        Use with --itrace to strip out non-synthesized events.
 
+-j::
+--jit::
+       Process jitdump files by injecting the mmap records corresponding to jitted
+       functions. This option also generates the ELF images for each jitted function
+       found in the jitdumps files captured in the input perf.data file. Use this option
+       if you are monitoring environment using JIT runtimes, such as Java, DART or V8.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
index fbceb631387c31c44002080130b7ff6e282229d3..19aa17532a16709646dc52487c5f686675c2b658 100644 (file)
@@ -341,6 +341,12 @@ Specify vmlinux path which has debuginfo.
 --buildid-all::
 Record build-id of all DSOs regardless whether it's actually hit or not.
 
+--all-kernel::
+Configure all used events to run in kernel space.
+
+--all-user::
+Configure all used events to run in user space.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
index 8a301f6afb37ad855351fcd5fba1644d12dc2529..12113992ac9d0f5ceca0b003cec568717c4208e5 100644 (file)
@@ -117,6 +117,22 @@ OPTIONS
        And default sort keys are changed to comm, dso_from, symbol_from, dso_to
        and symbol_to, see '--branch-stack'.
 
+       If the --mem-mode option is used, the following sort keys are also available
+       (incompatible with --branch-stack):
+       symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
+
+       - symbol_daddr: name of data symbol being executed on at the time of sample
+       - dso_daddr: name of library or module containing the data being executed
+       on at the time of the sample
+       - locked: whether the bus was locked at the time of the sample
+       - tlb: type of tlb access for the data at the time of the sample
+       - mem: type of memory access for the data at the time of the sample
+       - snoop: type of snoop (if any) for the data at the time of the sample
+       - dcacheline: the cacheline the data address is on at the time of the sample
+
+       And the default sort keys are changed to local_weight, mem, sym, dso,
+       symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
+
        If the data file has tracepoint event(s), following (dynamic) sort keys
        are also available:
        trace, trace_fields, [<event>.]<field>[/raw]
@@ -151,22 +167,6 @@ OPTIONS
        By default, every sort keys not specified in -F will be appended
        automatically.
 
-       If --mem-mode option is used, following sort keys are also available
-       (incompatible with --branch-stack):
-       symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
-
-       - symbol_daddr: name of data symbol being executed on at the time of sample
-       - dso_daddr: name of library or module containing the data being executed
-       on at the time of sample
-       - locked: whether the bus was locked at the time of sample
-       - tlb: type of tlb access for the data at the time of sample
-       - mem: type of memory access for the data at the time of sample
-       - snoop: type of snoop (if any) for the data at the time of sample
-       - dcacheline: the cacheline the data address is on at the time of sample
-
-       And default sort keys are changed to local_weight, mem, sym, dso,
-       symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
-
 -p::
 --parent=<regex>::
         A regex filter to identify parent. The parent is a caller of this
@@ -351,7 +351,10 @@ OPTIONS
 
 --percent-limit::
        Do not show entries which have an overhead under that percent.
-       (Default: 0).
+       (Default: 0).  Note that this option also sets the percent limit (threshold)
+       of callchains.  However the default value of callchain threshold is
+       different than the default value of hist entries.  Please see the
+       --call-graph option for details.
 
 --percentage::
        Determine how to display the overhead percentage of filtered entries.
@@ -398,6 +401,9 @@ include::itrace.txt[]
 --raw-trace::
        When displaying traceevent output, do not use print fmt or plugins.
 
+--hierarchy::
+       Enable hierarchical output.
+
 include::callchain-overhead-calculation.txt[]
 
 SEE ALSO
index 52ef7a9d50aacbc3d3fbb0366852457d39718cdf..04f23b404bbc5a7bd4b5bcdfb7e833d79ba1234e 100644 (file)
@@ -69,6 +69,14 @@ report::
 --scale::
        scale/normalize counter values
 
+-d::
+--detailed::
+       print more detailed statistics, can be specified up to 3 times
+
+          -d:          detailed events, L1 and LLC data cache
+        -d -d:     more detailed events, dTLB and iTLB events
+     -d -d -d:     very detailed events, adding prefetch events
+
 -r::
 --repeat=<n>::
        repeat command and print average + stddev (max: 100). 0 means forever.
@@ -139,6 +147,10 @@ Print count deltas every N milliseconds (minimum: 10ms)
 The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals.  Use with caution.
        example: 'perf stat -I 1000 -e cycles -a sleep 5'
 
+--metric-only::
+Only print computed metrics. Print them in a single line.
+Don't show any raw values. Not supported with --per-thread.
+
 --per-socket::
 Aggregate counts per processor socket for system-wide mode measurements.  This
 is a useful mode to detect imbalance between sockets.  To enable this mode,
@@ -211,6 +223,29 @@ $ perf stat -- make -j
 
  Wall-clock time elapsed:   719.554352 msecs
 
+CSV FORMAT
+----------
+
+With -x, perf stat is able to output a not-quite-CSV format output
+Commas in the output are not put into "". To make it easy to parse
+it is recommended to use a different character like -x \;
+
+The fields are in this order:
+
+       - optional usec time stamp in fractions of second (with -I xxx)
+       - optional CPU, core, or socket identifier
+       - optional number of logical CPUs aggregated
+       - counter value
+       - unit of the counter value or empty
+       - event name
+       - run time of counter
+       - percentage of measurement time the counter was running
+       - optional variance if multiple values are collected with -r
+       - optional metric value
+       - optional unit of metric
+
+Additional metrics may be printed with all earlier fields being empty.
+
 SEE ALSO
 --------
 linkperf:perf-top[1], linkperf:perf-list[1]
index b0e60e17db389d89efafa7025cd47fd44fe625c3..19f046f027cd81e42c5696ab3172539baaeb745d 100644 (file)
@@ -233,6 +233,9 @@ Default is to monitor all CPUS.
 --raw-trace::
        When displaying traceevent output, do not use print fmt or plugins.
 
+--hierarchy::
+       Enable hierarchy output.
+
 INTERACTIVE PROMPTING KEYS
 --------------------------
 
index 767ea2436e1cd841a762ee8cdb039ba80a7120b3..1d8d5bc4cd2de6601f926696d5c172b30fa735d4 100644 (file)
@@ -5,7 +5,7 @@
        medium = green, lightgray
        normal = black, lightgray
        selected = lightgray, magenta
-       code = blue, lightgray
+       jump_arrows = blue, lightgray
        addr = magenta, lightgray
 
 [tui]
index e0ce9573b79bd26ddfe8cb4aee6eade061a09a12..5950b5a24efdf6024ca58a2da084d9cd43295c12 100644 (file)
@@ -27,3 +27,4 @@ Skip collecing build-id when recording: perf record -B
 To change sampling frequency to 100 Hz: perf record -F 100
 See assembly instructions with percentage: perf annotate <symbol>
 If you prefer Intel style assembly, try: perf annotate -M intel
+For hierarchical output, try: perf report --hierarchy
index dcd9a70c7193b43b5791058b01a3773da6e0d870..32a64e6190288e5110cfa6de78138aceb955e7af 100644 (file)
@@ -68,6 +68,20 @@ all tags TAGS:
        $(print_msg)
        $(make)
 
+ifdef MAKECMDGOALS
+has_clean := 0
+ifneq ($(filter clean,$(MAKECMDGOALS)),)
+  has_clean := 1
+endif # clean
+
+ifeq ($(has_clean),1)
+  rest := $(filter-out clean,$(MAKECMDGOALS))
+  ifneq ($(rest),)
+$(rest): clean
+  endif # rest
+endif # has_clean
+endif # MAKECMDGOALS
+
 #
 # The clean target is not really parallel, don't print the jobs info:
 #
@@ -75,10 +89,17 @@ clean:
        $(make)
 
 #
-# The build-test target is not really parallel, don't print the jobs info:
+# The build-test target is not really parallel, don't print the jobs info,
+# it also uses only the tests/make targets that don't pollute the source
+# repository, i.e. that uses O= or builds the tarpkg outside the source
+# repo directories.
+#
+# For a full test, use:
+#
+# make -C tools/perf -f tests/make
 #
 build-test:
-       @$(MAKE) SHUF=1 -f tests/make --no-print-directory
+       @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out
 
 #
 # All other targets get passed through:
index 5d34815c7ccb8e02ac6d4db0d82d4712af73f343..4a4fad4182f534f23550ca5ac1ed5d42000f9b08 100644 (file)
@@ -58,6 +58,9 @@ include config/utilities.mak
 #
 # Define NO_LIBBIONIC if you do not want bionic support
 #
+# Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support
+# used for generating build-ids for ELFs generated by jitdump.
+#
 # Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support
 # for dwarf backtrace post unwind.
 #
@@ -136,6 +139,8 @@ $(call allow-override,CC,$(CROSS_COMPILE)gcc)
 $(call allow-override,AR,$(CROSS_COMPILE)ar)
 $(call allow-override,LD,$(CROSS_COMPILE)ld)
 
+LD += $(EXTRA_LDFLAGS)
+
 PKG_CONFIG = $(CROSS_COMPILE)pkg-config
 
 RM      = rm -f
@@ -165,7 +170,16 @@ ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
 endif
 endif
 
+# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
+# Without this setting the output feature dump file misses some features, for
+# example, liberty. Select all checkers so we won't get an incomplete feature
+# dump file.
 ifeq ($(config),1)
+ifdef MAKECMDGOALS
+ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump)
+FEATURE_TESTS := all
+endif
+endif
 include config/Makefile
 endif
 
@@ -618,7 +632,7 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean
        $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
        $(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
                $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
-               $(OUTPUT)tests/llvm-src-{base,kbuild,prologue}.c
+               $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c
        $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
        $(python-clean)
 
index 7fbca175099ec917ad69b8025c8249ee6c52a6a4..18b13518d8d8701137d489a65f7dd752bdc7f43f 100644 (file)
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+PERF_HAVE_JITDUMP := 1
index 7fbca175099ec917ad69b8025c8249ee6c52a6a4..18b13518d8d8701137d489a65f7dd752bdc7f43f 100644 (file)
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+PERF_HAVE_JITDUMP := 1
index 7fbca175099ec917ad69b8025c8249ee6c52a6a4..56e05f126ad8793d25bb1a70ce3dac5f100d3b3a 100644 (file)
@@ -1,3 +1,6 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_JITDUMP := 1
index 7b8b0d1a1b626065e0b414f42a7a998723e0e57f..c8fe2074d2177f0709f0e1d54f63cd77f84f2233 100644 (file)
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h
new file mode 100644 (file)
index 0000000..0dd6b7f
--- /dev/null
@@ -0,0 +1,123 @@
+#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H
+#define ARCH_PERF_BOOK3S_HV_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall code to name mapping
+ */
+#define kvm_trace_symbol_hcall \
+       {0x4, "H_REMOVE"},                                      \
+       {0x8, "H_ENTER"},                                       \
+       {0xc, "H_READ"},                                        \
+       {0x10, "H_CLEAR_MOD"},                                  \
+       {0x14, "H_CLEAR_REF"},                                  \
+       {0x18, "H_PROTECT"},                                    \
+       {0x1c, "H_GET_TCE"},                                    \
+       {0x20, "H_PUT_TCE"},                                    \
+       {0x24, "H_SET_SPRG0"},                                  \
+       {0x28, "H_SET_DABR"},                                   \
+       {0x2c, "H_PAGE_INIT"},                                  \
+       {0x30, "H_SET_ASR"},                                    \
+       {0x34, "H_ASR_ON"},                                     \
+       {0x38, "H_ASR_OFF"},                                    \
+       {0x3c, "H_LOGICAL_CI_LOAD"},                            \
+       {0x40, "H_LOGICAL_CI_STORE"},                           \
+       {0x44, "H_LOGICAL_CACHE_LOAD"},                         \
+       {0x48, "H_LOGICAL_CACHE_STORE"},                        \
+       {0x4c, "H_LOGICAL_ICBI"},                               \
+       {0x50, "H_LOGICAL_DCBF"},                               \
+       {0x54, "H_GET_TERM_CHAR"},                              \
+       {0x58, "H_PUT_TERM_CHAR"},                              \
+       {0x5c, "H_REAL_TO_LOGICAL"},                            \
+       {0x60, "H_HYPERVISOR_DATA"},                            \
+       {0x64, "H_EOI"},                                        \
+       {0x68, "H_CPPR"},                                       \
+       {0x6c, "H_IPI"},                                        \
+       {0x70, "H_IPOLL"},                                      \
+       {0x74, "H_XIRR"},                                       \
+       {0x78, "H_MIGRATE_DMA"},                                \
+       {0x7c, "H_PERFMON"},                                    \
+       {0xdc, "H_REGISTER_VPA"},                               \
+       {0xe0, "H_CEDE"},                                       \
+       {0xe4, "H_CONFER"},                                     \
+       {0xe8, "H_PROD"},                                       \
+       {0xec, "H_GET_PPP"},                                    \
+       {0xf0, "H_SET_PPP"},                                    \
+       {0xf4, "H_PURR"},                                       \
+       {0xf8, "H_PIC"},                                        \
+       {0xfc, "H_REG_CRQ"},                                    \
+       {0x100, "H_FREE_CRQ"},                                  \
+       {0x104, "H_VIO_SIGNAL"},                                \
+       {0x108, "H_SEND_CRQ"},                                  \
+       {0x110, "H_COPY_RDMA"},                                 \
+       {0x114, "H_REGISTER_LOGICAL_LAN"},                      \
+       {0x118, "H_FREE_LOGICAL_LAN"},                          \
+       {0x11c, "H_ADD_LOGICAL_LAN_BUFFER"},                    \
+       {0x120, "H_SEND_LOGICAL_LAN"},                          \
+       {0x124, "H_BULK_REMOVE"},                               \
+       {0x130, "H_MULTICAST_CTRL"},                            \
+       {0x134, "H_SET_XDABR"},                                 \
+       {0x138, "H_STUFF_TCE"},                                 \
+       {0x13c, "H_PUT_TCE_INDIRECT"},                          \
+       {0x14c, "H_CHANGE_LOGICAL_LAN_MAC"},                    \
+       {0x150, "H_VTERM_PARTNER_INFO"},                        \
+       {0x154, "H_REGISTER_VTERM"},                            \
+       {0x158, "H_FREE_VTERM"},                                \
+       {0x15c, "H_RESET_EVENTS"},                              \
+       {0x160, "H_ALLOC_RESOURCE"},                            \
+       {0x164, "H_FREE_RESOURCE"},                             \
+       {0x168, "H_MODIFY_QP"},                                 \
+       {0x16c, "H_QUERY_QP"},                                  \
+       {0x170, "H_REREGISTER_PMR"},                            \
+       {0x174, "H_REGISTER_SMR"},                              \
+       {0x178, "H_QUERY_MR"},                                  \
+       {0x17c, "H_QUERY_MW"},                                  \
+       {0x180, "H_QUERY_HCA"},                                 \
+       {0x184, "H_QUERY_PORT"},                                \
+       {0x188, "H_MODIFY_PORT"},                               \
+       {0x18c, "H_DEFINE_AQP1"},                               \
+       {0x190, "H_GET_TRACE_BUFFER"},                          \
+       {0x194, "H_DEFINE_AQP0"},                               \
+       {0x198, "H_RESIZE_MR"},                                 \
+       {0x19c, "H_ATTACH_MCQP"},                               \
+       {0x1a0, "H_DETACH_MCQP"},                               \
+       {0x1a4, "H_CREATE_RPT"},                                \
+       {0x1a8, "H_REMOVE_RPT"},                                \
+       {0x1ac, "H_REGISTER_RPAGES"},                           \
+       {0x1b0, "H_DISABLE_AND_GETC"},                          \
+       {0x1b4, "H_ERROR_DATA"},                                \
+       {0x1b8, "H_GET_HCA_INFO"},                              \
+       {0x1bc, "H_GET_PERF_COUNT"},                            \
+       {0x1c0, "H_MANAGE_TRACE"},                              \
+       {0x1d4, "H_FREE_LOGICAL_LAN_BUFFER"},                   \
+       {0x1d8, "H_POLL_PENDING"},                              \
+       {0x1e4, "H_QUERY_INT_STATE"},                           \
+       {0x244, "H_ILLAN_ATTRIBUTES"},                          \
+       {0x250, "H_MODIFY_HEA_QP"},                             \
+       {0x254, "H_QUERY_HEA_QP"},                              \
+       {0x258, "H_QUERY_HEA"},                                 \
+       {0x25c, "H_QUERY_HEA_PORT"},                            \
+       {0x260, "H_MODIFY_HEA_PORT"},                           \
+       {0x264, "H_REG_BCMC"},                                  \
+       {0x268, "H_DEREG_BCMC"},                                \
+       {0x26c, "H_REGISTER_HEA_RPAGES"},                       \
+       {0x270, "H_DISABLE_AND_GET_HEA"},                       \
+       {0x274, "H_GET_HEA_INFO"},                              \
+       {0x278, "H_ALLOC_HEA_RESOURCE"},                        \
+       {0x284, "H_ADD_CONN"},                                  \
+       {0x288, "H_DEL_CONN"},                                  \
+       {0x298, "H_JOIN"},                                      \
+       {0x2a4, "H_VASI_STATE"},                                \
+       {0x2b0, "H_ENABLE_CRQ"},                                \
+       {0x2b8, "H_GET_EM_PARMS"},                              \
+       {0x2d0, "H_SET_MPP"},                                   \
+       {0x2d4, "H_GET_MPP"},                                   \
+       {0x2ec, "H_HOME_NODE_ASSOCIATIVITY"},                   \
+       {0x2f4, "H_BEST_ENERGY"},                               \
+       {0x2fc, "H_XIRR_X"},                                    \
+       {0x300, "H_RANDOM"},                                    \
+       {0x304, "H_COP"},                                       \
+       {0x314, "H_GET_MPP_X"},                                 \
+       {0x31c, "H_SET_MODE"},                                  \
+       {0xf000, "H_RTAS"}                                      \
+
+#endif
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
new file mode 100644 (file)
index 0000000..e68ba2d
--- /dev/null
@@ -0,0 +1,33 @@
+#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H
+#define ARCH_PERF_BOOK3S_HV_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+       {0x0,   "RETURN_TO_HOST"}, \
+       {0x100, "SYSTEM_RESET"}, \
+       {0x200, "MACHINE_CHECK"}, \
+       {0x300, "DATA_STORAGE"}, \
+       {0x380, "DATA_SEGMENT"}, \
+       {0x400, "INST_STORAGE"}, \
+       {0x480, "INST_SEGMENT"}, \
+       {0x500, "EXTERNAL"}, \
+       {0x501, "EXTERNAL_LEVEL"}, \
+       {0x502, "EXTERNAL_HV"}, \
+       {0x600, "ALIGNMENT"}, \
+       {0x700, "PROGRAM"}, \
+       {0x800, "FP_UNAVAIL"}, \
+       {0x900, "DECREMENTER"}, \
+       {0x980, "HV_DECREMENTER"}, \
+       {0xc00, "SYSCALL"}, \
+       {0xd00, "TRACE"}, \
+       {0xe00, "H_DATA_STORAGE"}, \
+       {0xe20, "H_INST_STORAGE"}, \
+       {0xe40, "H_EMUL_ASSIST"}, \
+       {0xf00, "PERFMON"}, \
+       {0xf20, "ALTIVEC"}, \
+       {0xf40, "VSX"}
+
+#endif
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c
new file mode 100644 (file)
index 0000000..74eee30
--- /dev/null
@@ -0,0 +1,170 @@
+#include "util/kvm-stat.h"
+#include "util/parse-events.h"
+#include "util/debug.h"
+
+#include "book3s_hv_exits.h"
+#include "book3s_hcalls.h"
+
+#define NR_TPS 4
+
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 40;
+const char *kvm_entry_trace = "kvm_hv:kvm_guest_enter";
+const char *kvm_exit_trace = "kvm_hv:kvm_guest_exit";
+
+define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall);
+
+/* Tracepoints specific to ppc_book3s_hv */
+const char *ppc_book3s_hv_kvm_tp[] = {
+       "kvm_hv:kvm_guest_enter",
+       "kvm_hv:kvm_guest_exit",
+       "kvm_hv:kvm_hcall_enter",
+       "kvm_hv:kvm_hcall_exit",
+       NULL,
+};
+
+/* 1 extra placeholder for NULL */
+const char *kvm_events_tp[NR_TPS + 1];
+const char *kvm_exit_reason;
+
+static void hcall_event_get_key(struct perf_evsel *evsel,
+                               struct perf_sample *sample,
+                               struct event_key *key)
+{
+       key->info = 0;
+       key->key = perf_evsel__intval(evsel, sample, "req");
+}
+
+static const char *get_hcall_exit_reason(u64 exit_code)
+{
+       struct exit_reasons_table *tbl = hcall_reasons;
+
+       while (tbl->reason != NULL) {
+               if (tbl->exit_code == exit_code)
+                       return tbl->reason;
+               tbl++;
+       }
+
+       pr_debug("Unknown hcall code: %lld\n",
+              (unsigned long long)exit_code);
+       return "UNKNOWN";
+}
+
+static bool hcall_event_end(struct perf_evsel *evsel,
+                           struct perf_sample *sample __maybe_unused,
+                           struct event_key *key __maybe_unused)
+{
+       return (!strcmp(evsel->name, kvm_events_tp[3]));
+}
+
+static bool hcall_event_begin(struct perf_evsel *evsel,
+                             struct perf_sample *sample, struct event_key *key)
+{
+       if (!strcmp(evsel->name, kvm_events_tp[2])) {
+               hcall_event_get_key(evsel, sample, key);
+               return true;
+       }
+
+       return false;
+}
+static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+                                  struct event_key *key,
+                                  char *decode)
+{
+       const char *hcall_reason = get_hcall_exit_reason(key->key);
+
+       scnprintf(decode, decode_str_len, "%s", hcall_reason);
+}
+
+static struct kvm_events_ops hcall_events = {
+       .is_begin_event = hcall_event_begin,
+       .is_end_event = hcall_event_end,
+       .decode_key = hcall_event_decode_key,
+       .name = "HCALL-EVENT",
+};
+
+static struct kvm_events_ops exit_events = {
+       .is_begin_event = exit_event_begin,
+       .is_end_event = exit_event_end,
+       .decode_key = exit_event_decode_key,
+       .name = "VM-EXIT"
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+       { .name = "vmexit", .ops = &exit_events },
+       { .name = "hcall", .ops = &hcall_events },
+       { NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+       NULL,
+};
+
+
+static int is_tracepoint_available(const char *str, struct perf_evlist *evlist)
+{
+       struct parse_events_error err;
+       int ret;
+
+       err.str = NULL;
+       ret = parse_events(evlist, str, &err);
+       if (err.str)
+               pr_err("%s : %s\n", str, err.str);
+       return ret;
+}
+
+static int ppc__setup_book3s_hv(struct perf_kvm_stat *kvm,
+                               struct perf_evlist *evlist)
+{
+       const char **events_ptr;
+       int i, nr_tp = 0, err = -1;
+
+       /* Check for book3s_hv tracepoints */
+       for (events_ptr = ppc_book3s_hv_kvm_tp; *events_ptr; events_ptr++) {
+               err = is_tracepoint_available(*events_ptr, evlist);
+               if (err)
+                       return -1;
+               nr_tp++;
+       }
+
+       for (i = 0; i < nr_tp; i++)
+               kvm_events_tp[i] = ppc_book3s_hv_kvm_tp[i];
+
+       kvm_events_tp[i] = NULL;
+       kvm_exit_reason = "trap";
+       kvm->exit_reasons = hv_exit_reasons;
+       kvm->exit_reasons_isa = "HV";
+
+       return 0;
+}
+
+/* Wrapper to setup kvm tracepoints */
+static int ppc__setup_kvm_tp(struct perf_kvm_stat *kvm)
+{
+       struct perf_evlist *evlist = perf_evlist__new();
+
+       if (evlist == NULL)
+               return -ENOMEM;
+
+       /* Right now, only supported on book3s_hv */
+       return ppc__setup_book3s_hv(kvm, evlist);
+}
+
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm)
+{
+       return ppc__setup_kvm_tp(kvm);
+}
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+       int ret;
+
+       ret = ppc__setup_kvm_tp(kvm);
+       if (ret) {
+               kvm->exit_reasons = NULL;
+               kvm->exit_reasons_isa = NULL;
+       }
+
+       return ret;
+}
index a5dbc07ec9dc70900ed4a593f5979d419061fd35..ed57df2e6d687d89ccf95d9aa03c263411de7fe9 100644 (file)
@@ -10,7 +10,7 @@
  */
 
 #include "../../util/kvm-stat.h"
-#include <asm/kvm_perf.h>
+#include <asm/sie.h>
 
 define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
 define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
@@ -18,6 +18,12 @@ define_exit_reasons_table(sie_sigp_order_codes, sigp_order_codes);
 define_exit_reasons_table(sie_diagnose_codes, diagnose_codes);
 define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes);
 
+const char *vcpu_id_str = "id";
+const int decode_str_len = 40;
+const char *kvm_exit_reason = "icptcode";
+const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter";
+const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit";
+
 static void event_icpt_insn_get_key(struct perf_evsel *evsel,
                                    struct perf_sample *sample,
                                    struct event_key *key)
@@ -73,7 +79,7 @@ static struct kvm_events_ops exit_events = {
        .name = "VM-EXIT"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
        "kvm:kvm_s390_sie_enter",
        "kvm:kvm_s390_sie_exit",
        "kvm:kvm_s390_intercept_instruction",
index 09ba923debe86810f8380f7df54504dee4232ec8..269af21437353b2fb886383ede1e3f9a2f586f25 100644 (file)
@@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1
 endif
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+PERF_HAVE_JITDUMP := 1
index 7bb0d13c235f70326afc28d726f38f1d95499055..72193f19d6d75d3c32e19b424a0f1910ebbc24a6 100644 (file)
@@ -59,7 +59,7 @@ static u64 mmap_read_self(void *addr)
                u64 quot, rem;
 
                quot = (cyc >> time_shift);
-               rem = cyc & ((1 << time_shift) - 1);
+               rem = cyc & (((u64)1 << time_shift) - 1);
                delta = time_offset + quot * time_mult +
                        ((rem * time_mult) >> time_shift);
 
@@ -103,6 +103,7 @@ static int __test__rdpmc(void)
 
        sigfillset(&sa.sa_mask);
        sa.sa_sigaction = segfault_handler;
+       sa.sa_flags = 0;
        sigaction(SIGSEGV, &sa, NULL);
 
        fd = sys_perf_event_open(&attr, 0, -1, -1,
index 8d8150f1cf9bcf426b4c29a19170221db680a980..d66f9ad4df2ea5da6eca22cae351a842d1143508 100644 (file)
@@ -60,7 +60,9 @@ struct branch {
        u64 misc;
 };
 
-static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused)
+static size_t
+intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+                        struct perf_evlist *evlist __maybe_unused)
 {
        return INTEL_BTS_AUXTRACE_PRIV_SIZE;
 }
index f05daacc9e7810117cfe25415a75990b3aa89f63..a3395179c9eebd5fcd39aa4796a0dc3d495f4b5d 100644 (file)
@@ -89,7 +89,7 @@ static int intel_pt_parse_terms_with_default(struct list_head *formats,
 
        *config = attr.config;
 out_free:
-       parse_events__free_terms(terms);
+       parse_events_terms__delete(terms);
        return err;
 }
 
@@ -273,7 +273,9 @@ intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
        return attr;
 }
 
-static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused)
+static size_t
+intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+                       struct perf_evlist *evlist __maybe_unused)
 {
        return INTEL_PT_AUXTRACE_PRIV_SIZE;
 }
index 14e4e668fad733c1802908047ae524318aa586de..b63d4be655a24a678f69d6deea3b4ee87283b938 100644 (file)
@@ -1,5 +1,7 @@
 #include "../../util/kvm-stat.h"
-#include <asm/kvm_perf.h>
+#include <asm/svm.h>
+#include <asm/vmx.h>
+#include <asm/kvm.h>
 
 define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
 define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -11,6 +13,12 @@ static struct kvm_events_ops exit_events = {
        .name = "VM-EXIT"
 };
 
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 20;
+const char *kvm_exit_reason = "exit_reason";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
 /*
  * For the mmio events, we treat:
  * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
@@ -65,7 +73,7 @@ static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
                                  struct event_key *key,
                                  char *decode)
 {
-       scnprintf(decode, DECODE_STR_LEN, "%#lx:%s",
+       scnprintf(decode, decode_str_len, "%#lx:%s",
                  (unsigned long)key->key,
                  key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
 }
@@ -109,7 +117,7 @@ static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
                                    struct event_key *key,
                                    char *decode)
 {
-       scnprintf(decode, DECODE_STR_LEN, "%#llx:%s",
+       scnprintf(decode, decode_str_len, "%#llx:%s",
                  (unsigned long long)key->key,
                  key->info ? "POUT" : "PIN");
 }
@@ -121,7 +129,7 @@ static struct kvm_events_ops ioport_events = {
        .name = "IO Port Access"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
        "kvm:kvm_entry",
        "kvm:kvm_exit",
        "kvm:kvm_mmio",
index e4c2c30143b95133913a2bc3569b2871b79aa75c..5c3cce082cb88c5cab984bdb1ae6944274861ad4 100644 (file)
@@ -1,6 +1,11 @@
+
+/* Various wrappers to make the kernel .S file build in user-space: */
+
 #define memcpy MEMCPY /* don't hide glibc's memcpy() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
+#define _ASM_EXTABLE_FAULT(x, y)
+
 #include "../../../arch/x86/lib/memcpy_64.S"
 /*
  * We need to provide note.GNU-stack section, saying that we want
index cc5c1267c738da038cfb6b824f79854fab41a629..cfe366375c4b89bb3642f5531d04b418a8d76e56 100644 (file)
@@ -245,7 +245,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
                        hists__collapse_resort(hists, NULL);
                        /* Don't sort callchain */
                        perf_evsel__reset_sample_bit(pos, CALLCHAIN);
-                       hists__output_resort(hists, NULL);
+                       perf_evsel__output_resort(pos, NULL);
 
                        if (symbol_conf.event_group &&
                            !perf_evsel__is_group_leader(pos))
index d93bff7fc0e407d6518a6ce0e2ee0dd4b65bf910..632efc6b79a07e20c4b25f9757bcd9c089050270 100644 (file)
@@ -38,19 +38,7 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
 
 static int build_id_cache__kcore_dir(char *dir, size_t sz)
 {
-       struct timeval tv;
-       struct tm tm;
-       char dt[32];
-
-       if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
-               return -1;
-
-       if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
-               return -1;
-
-       scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
-
-       return 0;
+       return fetch_current_timestamp(dir, sz);
 }
 
 static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)
index f04e804a9fadc6807a7bc74810fac5ddaad912b2..c42448ed5dfe20a74af9c671669aa23a25cadb5b 100644 (file)
 #include "util/util.h"
 #include "util/debug.h"
 
+static bool use_system_config, use_user_config;
+
 static const char * const config_usage[] = {
-       "perf config [options]",
+       "perf config [<file-option>] [options]",
        NULL
 };
 
@@ -25,6 +27,8 @@ enum actions {
 static struct option config_options[] = {
        OPT_SET_UINT('l', "list", &actions,
                     "show current config variables", ACTION_LIST),
+       OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"),
+       OPT_BOOLEAN(0, "user", &use_user_config, "use user config file"),
        OPT_END()
 };
 
@@ -42,10 +46,23 @@ static int show_config(const char *key, const char *value,
 int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 {
        int ret = 0;
+       char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
 
        argc = parse_options(argc, argv, config_options, config_usage,
                             PARSE_OPT_STOP_AT_NON_OPTION);
 
+       if (use_system_config && use_user_config) {
+               pr_err("Error: only one config file at a time\n");
+               parse_options_usage(config_usage, config_options, "user", 0);
+               parse_options_usage(NULL, config_options, "system", 0);
+               return -1;
+       }
+
+       if (use_system_config)
+               config_exclusive_filename = perf_etc_perfconfig();
+       else if (use_user_config)
+               config_exclusive_filename = user_config;
+
        switch (actions) {
        case ACTION_LIST:
                if (argc) {
@@ -53,9 +70,13 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
                        parse_options_usage(config_usage, config_options, "l", 1);
                } else {
                        ret = perf_config(show_config, NULL);
-                       if (ret < 0)
+                       if (ret < 0) {
+                               const char * config_filename = config_exclusive_filename;
+                               if (!config_exclusive_filename)
+                                       config_filename = user_config;
                                pr_err("Nothing configured, "
-                                      "please check your ~/.perfconfig file\n");
+                                      "please check your %s \n", config_filename);
+                       }
                }
                break;
        default:
index 36ccc2b8827fdd239b657e79b396ae013c318462..4d72359fd15ad00ca7397bb033e84b4f899546c2 100644 (file)
@@ -1264,8 +1264,6 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
        if (ret < 0)
                return ret;
 
-       perf_config(perf_default_config, NULL);
-
        argc = parse_options(argc, argv, options, diff_usage, 0);
 
        if (symbol__init(NULL) < 0)
index 96c1a4cfbbbf6b1f639d91d562c55b09da5c7e6f..49d55e21b1b06dbecd9c6935a2e0f33550585514 100644 (file)
@@ -86,8 +86,7 @@ static int check_emacsclient_version(void)
                return -1;
        }
 
-       strbuf_remove(&buffer, 0, strlen("emacsclient"));
-       version = atoi(buffer.buf);
+       version = atoi(buffer.buf + strlen("emacsclient"));
 
        if (version < 22) {
                fprintf(stderr,
@@ -273,7 +272,7 @@ static int perf_help_config(const char *var, const char *value, void *cb)
        if (!prefixcmp(var, "man."))
                return add_man_viewer_info(var, value);
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 static struct cmdnames main_cmds, other_cmds;
index 0022e02ed31a7034b9286884ab87c4c131e41fa3..7fa68663ed7287f63adad2342db987941ba0c1b4 100644 (file)
@@ -17,6 +17,7 @@
 #include "util/build-id.h"
 #include "util/data.h"
 #include "util/auxtrace.h"
+#include "util/jit.h"
 
 #include <subcmd/parse-options.h>
 
@@ -29,6 +30,7 @@ struct perf_inject {
        bool                    sched_stat;
        bool                    have_auxtrace;
        bool                    strip;
+       bool                    jit_mode;
        const char              *input_name;
        struct perf_data_file   output;
        u64                     bytes_written;
@@ -71,6 +73,15 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool,
        return perf_event__repipe_synth(tool, event);
 }
 
+#ifdef HAVE_JITDUMP
+static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
+                              union perf_event *event __maybe_unused,
+                              struct ordered_events *oe __maybe_unused)
+{
+       return 0;
+}
+#endif
+
 static int perf_event__repipe_op2_synth(struct perf_tool *tool,
                                        union perf_event *event,
                                        struct perf_session *session
@@ -234,6 +245,31 @@ static int perf_event__repipe_mmap(struct perf_tool *tool,
        return err;
 }
 
+#ifdef HAVE_JITDUMP
+static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
+                                      union perf_event *event,
+                                      struct perf_sample *sample,
+                                      struct machine *machine)
+{
+       struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+       u64 n = 0;
+       int ret;
+
+       /*
+        * if jit marker, then inject jit mmaps and generate ELF images
+        */
+       ret = jit_process(inject->session, &inject->output, machine,
+                         event->mmap.filename, sample->pid, &n);
+       if (ret < 0)
+               return ret;
+       if (ret) {
+               inject->bytes_written += n;
+               return 0;
+       }
+       return perf_event__repipe_mmap(tool, event, sample, machine);
+}
+#endif
+
 static int perf_event__repipe_mmap2(struct perf_tool *tool,
                                   union perf_event *event,
                                   struct perf_sample *sample,
@@ -247,6 +283,31 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool,
        return err;
 }
 
+#ifdef HAVE_JITDUMP
+static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
+                                       union perf_event *event,
+                                       struct perf_sample *sample,
+                                       struct machine *machine)
+{
+       struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+       u64 n = 0;
+       int ret;
+
+       /*
+        * if jit marker, then inject jit mmaps and generate ELF images
+        */
+       ret = jit_process(inject->session, &inject->output, machine,
+                         event->mmap2.filename, sample->pid, &n);
+       if (ret < 0)
+               return ret;
+       if (ret) {
+               inject->bytes_written += n;
+               return 0;
+       }
+       return perf_event__repipe_mmap2(tool, event, sample, machine);
+}
+#endif
+
 static int perf_event__repipe_fork(struct perf_tool *tool,
                                   union perf_event *event,
                                   struct perf_sample *sample,
@@ -626,12 +687,16 @@ static int __cmd_inject(struct perf_inject *inject)
        ret = perf_session__process_events(session);
 
        if (!file_out->is_pipe) {
-               if (inject->build_ids) {
+               if (inject->build_ids)
                        perf_header__set_feat(&session->header,
                                              HEADER_BUILD_ID);
-                       if (inject->have_auxtrace)
-                               dsos__hit_all(session);
-               }
+               /*
+                * Keep all buildids when there is unprocessed AUX data because
+                * it is not known which ones the AUX trace hits.
+                */
+               if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
+                   inject->have_auxtrace && !inject->itrace_synth_opts.set)
+                       dsos__hit_all(session);
                /*
                 * The AUX areas have been removed and replaced with
                 * synthesized hardware events, so clear the feature flag and
@@ -703,7 +768,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
        };
        int ret;
 
-       const struct option options[] = {
+       struct option options[] = {
                OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
                            "Inject build-ids into the output stream"),
                OPT_STRING('i', "input", &inject.input_name, "file",
@@ -713,6 +778,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
                OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
                            "Merge sched-stat and sched-switch for getting events "
                            "where and how long tasks slept"),
+#ifdef HAVE_JITDUMP
+               OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
+#endif
                OPT_INCR('v', "verbose", &verbose,
                         "be more verbose (show build ids, etc)"),
                OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
@@ -729,7 +797,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
                "perf inject [<options>]",
                NULL
        };
-
+#ifndef HAVE_JITDUMP
+       set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
+#endif
        argc = parse_options(argc, argv, options, inject_usage, 0);
 
        /*
@@ -755,6 +825,29 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
        if (inject.session == NULL)
                return -1;
 
+       if (inject.build_ids) {
+               /*
+                * to make sure the mmap records are ordered correctly
+                * and so that the correct especially due to jitted code
+                * mmaps. We cannot generate the buildid hit list and
+                * inject the jit mmaps at the same time for now.
+                */
+               inject.tool.ordered_events = true;
+               inject.tool.ordering_requires_timestamps = true;
+       }
+#ifdef HAVE_JITDUMP
+       if (inject.jit_mode) {
+               inject.tool.mmap2          = perf_event__jit_repipe_mmap2;
+               inject.tool.mmap           = perf_event__jit_repipe_mmap;
+               inject.tool.ordered_events = true;
+               inject.tool.ordering_requires_timestamps = true;
+               /*
+                * JIT MMAP injection injects all MMAP events in one go, so it
+                * does not obey finished_round semantics.
+                */
+               inject.tool.finished_round = perf_event__drop_oe;
+       }
+#endif
        ret = symbol__init(&inject.session->header.env);
        if (ret < 0)
                goto out_delete;
index 118010553d0cf0dca9de6f493a5c8e267b0fff48..4d3340cce9a0200b469fd61a563eee4cb7f48830 100644 (file)
@@ -1834,7 +1834,7 @@ static int __cmd_record(int argc, const char **argv)
        return cmd_record(i, rec_argv, NULL);
 }
 
-static int kmem_config(const char *var, const char *value, void *cb)
+static int kmem_config(const char *var, const char *value, void *cb __maybe_unused)
 {
        if (!strcmp(var, "kmem.default")) {
                if (!strcmp(value, "slab"))
@@ -1847,7 +1847,7 @@ static int kmem_config(const char *var, const char *value, void *cb)
                return 0;
        }
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
index 4418d9214872150648a719dd07fc14a29c913e9b..bff666458b28e24dccac682d0f28b6708a1a7c83 100644 (file)
@@ -30,7 +30,6 @@
 #include <math.h>
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include <asm/kvm_perf.h>
 #include "util/kvm-stat.h"
 
 void exit_event_get_key(struct perf_evsel *evsel,
@@ -38,12 +37,12 @@ void exit_event_get_key(struct perf_evsel *evsel,
                        struct event_key *key)
 {
        key->info = 0;
-       key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON);
+       key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
 }
 
 bool kvm_exit_event(struct perf_evsel *evsel)
 {
-       return !strcmp(evsel->name, KVM_EXIT_TRACE);
+       return !strcmp(evsel->name, kvm_exit_trace);
 }
 
 bool exit_event_begin(struct perf_evsel *evsel,
@@ -59,7 +58,7 @@ bool exit_event_begin(struct perf_evsel *evsel,
 
 bool kvm_entry_event(struct perf_evsel *evsel)
 {
-       return !strcmp(evsel->name, KVM_ENTRY_TRACE);
+       return !strcmp(evsel->name, kvm_entry_trace);
 }
 
 bool exit_event_end(struct perf_evsel *evsel,
@@ -91,7 +90,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm,
        const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
                                                  key->key);
 
-       scnprintf(decode, DECODE_STR_LEN, "%s", exit_reason);
+       scnprintf(decode, decode_str_len, "%s", exit_reason);
 }
 
 static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
@@ -357,7 +356,7 @@ static bool handle_end_event(struct perf_kvm_stat *kvm,
        time_diff = sample->time - time_begin;
 
        if (kvm->duration && time_diff > kvm->duration) {
-               char decode[DECODE_STR_LEN];
+               char decode[decode_str_len];
 
                kvm->events_ops->decode_key(kvm, &event->key, decode);
                if (!skip_event(decode)) {
@@ -385,7 +384,8 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread,
                        return NULL;
                }
 
-               vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, VCPU_ID);
+               vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample,
+                                                         vcpu_id_str);
                thread__set_priv(thread, vcpu_record);
        }
 
@@ -574,7 +574,7 @@ static void show_timeofday(void)
 
 static void print_result(struct perf_kvm_stat *kvm)
 {
-       char decode[DECODE_STR_LEN];
+       char decode[decode_str_len];
        struct kvm_event *event;
        int vcpu = kvm->trace_vcpu;
 
@@ -585,7 +585,7 @@ static void print_result(struct perf_kvm_stat *kvm)
 
        pr_info("\n\n");
        print_vcpu_info(kvm);
-       pr_info("%*s ", DECODE_STR_LEN, kvm->events_ops->name);
+       pr_info("%*s ", decode_str_len, kvm->events_ops->name);
        pr_info("%10s ", "Samples");
        pr_info("%9s ", "Samples%");
 
@@ -604,7 +604,7 @@ static void print_result(struct perf_kvm_stat *kvm)
                min = get_event_min(event, vcpu);
 
                kvm->events_ops->decode_key(kvm, &event->key, decode);
-               pr_info("%*s ", DECODE_STR_LEN, decode);
+               pr_info("%*s ", decode_str_len, decode);
                pr_info("%10llu ", (unsigned long long)ecount);
                pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
                pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
@@ -1132,6 +1132,11 @@ exit:
                _p;                     \
        })
 
+int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused)
+{
+       return 0;
+}
+
 static int
 kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
 {
@@ -1148,7 +1153,14 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
                NULL
        };
        const char * const *events_tp;
+       int ret;
+
        events_tp_size = 0;
+       ret = setup_kvm_events_tp(kvm);
+       if (ret < 0) {
+               pr_err("Unable to setup the kvm tracepoints\n");
+               return ret;
+       }
 
        for (events_tp = kvm_events_tp; *events_tp; events_tp++)
                events_tp_size++;
@@ -1377,6 +1389,12 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
        /*
         * generate the event list
         */
+       err = setup_kvm_events_tp(kvm);
+       if (err < 0) {
+               pr_err("Unable to setup the kvm tracepoints\n");
+               return err;
+       }
+
        kvm->evlist = kvm_live_event_list();
        if (kvm->evlist == NULL) {
                err = -1;
index 39017004169665ec4a2ebd59aa0bc43e58412874..88aeac9aa1da12c664ec64ee419d6ce48c5ad8ec 100644 (file)
@@ -6,6 +6,8 @@
 #include "util/tool.h"
 #include "util/session.h"
 #include "util/data.h"
+#include "util/mem-events.h"
+#include "util/debug.h"
 
 #define MEM_OPERATION_LOAD     0x1
 #define MEM_OPERATION_STORE    0x2
@@ -21,11 +23,56 @@ struct perf_mem {
        DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
+static int parse_record_events(const struct option *opt,
+                              const char *str, int unset __maybe_unused)
+{
+       struct perf_mem *mem = *(struct perf_mem **)opt->value;
+       int j;
+
+       if (strcmp(str, "list")) {
+               if (!perf_mem_events__parse(str)) {
+                       mem->operation = 0;
+                       return 0;
+               }
+               exit(-1);
+       }
+
+       for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+               struct perf_mem_event *e = &perf_mem_events[j];
+
+               fprintf(stderr, "%-13s%-*s%s\n",
+                       e->tag,
+                       verbose ? 25 : 0,
+                       verbose ? perf_mem_events__name(j) : "",
+                       e->supported ? ": available" : "");
+       }
+       exit(0);
+}
+
+static const char * const __usage[] = {
+       "perf mem record [<options>] [<command>]",
+       "perf mem record [<options>] -- <command> [<options>]",
+       NULL
+};
+
+static const char * const *record_mem_usage = __usage;
+
 static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 {
        int rec_argc, i = 0, j;
        const char **rec_argv;
        int ret;
+       struct option options[] = {
+       OPT_CALLBACK('e', "event", &mem, "event",
+                    "event selector. use 'perf mem record -e list' to list available events",
+                    parse_record_events),
+       OPT_INCR('v', "verbose", &verbose,
+                "be more verbose (show counter open errors, etc)"),
+       OPT_END()
+       };
+
+       argc = parse_options(argc, argv, options, record_mem_usage,
+                            PARSE_OPT_STOP_AT_NON_OPTION);
 
        rec_argc = argc + 7; /* max number of arguments */
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
@@ -35,23 +82,40 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
        rec_argv[i++] = "record";
 
        if (mem->operation & MEM_OPERATION_LOAD)
+               perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
+
+       if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
                rec_argv[i++] = "-W";
 
        rec_argv[i++] = "-d";
 
-       if (mem->operation & MEM_OPERATION_LOAD) {
-               rec_argv[i++] = "-e";
-               rec_argv[i++] = "cpu/mem-loads/pp";
-       }
+       for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+               if (!perf_mem_events[j].record)
+                       continue;
+
+               if (!perf_mem_events[j].supported) {
+                       pr_err("failed: event '%s' not supported\n",
+                              perf_mem_events__name(j));
+                       return -1;
+               }
 
-       if (mem->operation & MEM_OPERATION_STORE) {
                rec_argv[i++] = "-e";
-               rec_argv[i++] = "cpu/mem-stores/pp";
-       }
+               rec_argv[i++] = perf_mem_events__name(j);
+       };
 
-       for (j = 1; j < argc; j++, i++)
+       for (j = 0; j < argc; j++, i++)
                rec_argv[i] = argv[j];
 
+       if (verbose > 0) {
+               pr_debug("calling: record ");
+
+               while (rec_argv[j]) {
+                       pr_debug("%s ", rec_argv[j]);
+                       j++;
+               }
+               pr_debug("\n");
+       }
+
        ret = cmd_record(i, rec_argv, NULL);
        free(rec_argv);
        return ret;
@@ -298,6 +362,10 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
                NULL
        };
 
+       if (perf_mem_events__init()) {
+               pr_err("failed: memory events not supported\n");
+               return -1;
+       }
 
        argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
                                        mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
index 319712a4e02b73de7359fc2ed772bed1d9cd424f..515510ecc76a43391e2ac58f830557b51810b466 100644 (file)
@@ -32,6 +32,8 @@
 #include "util/parse-branch-options.h"
 #include "util/parse-regs-options.h"
 #include "util/llvm-utils.h"
+#include "util/bpf-loader.h"
+#include "asm/bug.h"
 
 #include <unistd.h>
 #include <sched.h>
@@ -49,7 +51,9 @@ struct record {
        const char              *progname;
        int                     realtime_prio;
        bool                    no_buildid;
+       bool                    no_buildid_set;
        bool                    no_buildid_cache;
+       bool                    no_buildid_cache_set;
        bool                    buildid_all;
        unsigned long long      samples;
 };
@@ -320,7 +324,10 @@ try_again:
                } else {
                        pr_err("failed to mmap with %d (%s)\n", errno,
                                strerror_r(errno, msg, sizeof(msg)));
-                       rc = -errno;
+                       if (errno)
+                               rc = -errno;
+                       else
+                               rc = -EINVAL;
                }
                goto out;
        }
@@ -464,6 +471,29 @@ static void record__init_features(struct record *rec)
        perf_header__clear_feat(&session->header, HEADER_STAT);
 }
 
+static void
+record__finish_output(struct record *rec)
+{
+       struct perf_data_file *file = &rec->file;
+       int fd = perf_data_file__fd(file);
+
+       if (file->is_pipe)
+               return;
+
+       rec->session->header.data_size += rec->bytes_written;
+       file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
+
+       if (!rec->no_buildid) {
+               process_buildids(rec);
+
+               if (rec->buildid_all)
+                       dsos__hit_all(rec->session);
+       }
+       perf_session__write_header(rec->session, rec->evlist, fd, true);
+
+       return;
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -482,6 +512,74 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 
 static void snapshot_sig_handler(int sig);
 
+static int record__synthesize(struct record *rec)
+{
+       struct perf_session *session = rec->session;
+       struct machine *machine = &session->machines.host;
+       struct perf_data_file *file = &rec->file;
+       struct record_opts *opts = &rec->opts;
+       struct perf_tool *tool = &rec->tool;
+       int fd = perf_data_file__fd(file);
+       int err = 0;
+
+       if (file->is_pipe) {
+               err = perf_event__synthesize_attrs(tool, session,
+                                                  process_synthesized_event);
+               if (err < 0) {
+                       pr_err("Couldn't synthesize attrs.\n");
+                       goto out;
+               }
+
+               if (have_tracepoints(&rec->evlist->entries)) {
+                       /*
+                        * FIXME err <= 0 here actually means that
+                        * there were no tracepoints so its not really
+                        * an error, just that we don't need to
+                        * synthesize anything.  We really have to
+                        * return this more properly and also
+                        * propagate errors that now are calling die()
+                        */
+                       err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
+                                                                 process_synthesized_event);
+                       if (err <= 0) {
+                               pr_err("Couldn't record tracing data.\n");
+                               goto out;
+                       }
+                       rec->bytes_written += err;
+               }
+       }
+
+       if (rec->opts.full_auxtrace) {
+               err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
+                                       session, process_synthesized_event);
+               if (err)
+                       goto out;
+       }
+
+       err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+                                                machine);
+       WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
+                          "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+                          "Check /proc/kallsyms permission or run as root.\n");
+
+       err = perf_event__synthesize_modules(tool, process_synthesized_event,
+                                            machine);
+       WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
+                          "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+                          "Check /proc/modules permission or run as root.\n");
+
+       if (perf_guest) {
+               machines__process_guests(&session->machines,
+                                        perf_event__synthesize_guest_os, tool);
+       }
+
+       err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
+                                           process_synthesized_event, opts->sample_address,
+                                           opts->proc_map_timeout);
+out:
+       return err;
+}
+
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 {
        int err;
@@ -534,6 +632,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                goto out_child;
        }
 
+       err = bpf__apply_obj_config();
+       if (err) {
+               char errbuf[BUFSIZ];
+
+               bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+               pr_err("ERROR: Apply config to BPF failed: %s\n",
+                        errbuf);
+               goto out_child;
+       }
+
        /*
         * Normally perf_session__new would do this, but it doesn't have the
         * evlist.
@@ -566,63 +674,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
        machine = &session->machines.host;
 
-       if (file->is_pipe) {
-               err = perf_event__synthesize_attrs(tool, session,
-                                                  process_synthesized_event);
-               if (err < 0) {
-                       pr_err("Couldn't synthesize attrs.\n");
-                       goto out_child;
-               }
-
-               if (have_tracepoints(&rec->evlist->entries)) {
-                       /*
-                        * FIXME err <= 0 here actually means that
-                        * there were no tracepoints so its not really
-                        * an error, just that we don't need to
-                        * synthesize anything.  We really have to
-                        * return this more properly and also
-                        * propagate errors that now are calling die()
-                        */
-                       err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
-                                                                 process_synthesized_event);
-                       if (err <= 0) {
-                               pr_err("Couldn't record tracing data.\n");
-                               goto out_child;
-                       }
-                       rec->bytes_written += err;
-               }
-       }
-
-       if (rec->opts.full_auxtrace) {
-               err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
-                                       session, process_synthesized_event);
-               if (err)
-                       goto out_delete_session;
-       }
-
-       err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
-                                                machine);
-       if (err < 0)
-               pr_err("Couldn't record kernel reference relocation symbol\n"
-                      "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-                      "Check /proc/kallsyms permission or run as root.\n");
-
-       err = perf_event__synthesize_modules(tool, process_synthesized_event,
-                                            machine);
+       err = record__synthesize(rec);
        if (err < 0)
-               pr_err("Couldn't record kernel module information.\n"
-                      "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-                      "Check /proc/modules permission or run as root.\n");
-
-       if (perf_guest) {
-               machines__process_guests(&session->machines,
-                                        perf_event__synthesize_guest_os, tool);
-       }
-
-       err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
-                                           process_synthesized_event, opts->sample_address,
-                                           opts->proc_map_timeout);
-       if (err != 0)
                goto out_child;
 
        if (rec->realtime_prio) {
@@ -758,18 +811,8 @@ out_child:
        /* this will be recalculated during process_buildids() */
        rec->samples = 0;
 
-       if (!err && !file->is_pipe) {
-               rec->session->header.data_size += rec->bytes_written;
-               file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
-
-               if (!rec->no_buildid) {
-                       process_buildids(rec);
-
-                       if (rec->buildid_all)
-                               dsos__hit_all(rec->session);
-               }
-               perf_session__write_header(rec->session, rec->evlist, fd, true);
-       }
+       if (!err)
+               record__finish_output(rec);
 
        if (!err && !quiet) {
                char samples[128];
@@ -1097,10 +1140,12 @@ struct option __record_options[] = {
        OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
                    "don't sample"),
-       OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
-                   "do not update the buildid cache"),
-       OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
-                   "do not collect buildids in perf.data"),
+       OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
+                       &record.no_buildid_cache_set,
+                       "do not update the buildid cache"),
+       OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
+                       &record.no_buildid_set,
+                       "do not collect buildids in perf.data"),
        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
                     "monitor event in cgroup name only",
                     parse_cgroups),
@@ -1136,6 +1181,12 @@ struct option __record_options[] = {
                        "per thread proc mmap processing timeout in ms"),
        OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
                    "Record context switch events"),
+       OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
+                        "Configure all used events to run in kernel space.",
+                        PARSE_OPT_EXCLUSIVE),
+       OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
+                        "Configure all used events to run in user space.",
+                        PARSE_OPT_EXCLUSIVE),
        OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
                   "clang binary to use for compiling BPF scriptlets"),
        OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
index 2bf537f190a026952bd63126e9c37def827e9914..7eea49f9ed46b520bdce56972379b76c14302c4b 100644 (file)
@@ -75,7 +75,10 @@ static int report__config(const char *var, const char *value, void *cb)
                return 0;
        }
        if (!strcmp(var, "report.percent-limit")) {
-               rep->min_percent = strtof(value, NULL);
+               double pcnt = strtof(value, NULL);
+
+               rep->min_percent = pcnt;
+               callchain_param.min_percent = pcnt;
                return 0;
        }
        if (!strcmp(var, "report.children")) {
@@ -87,7 +90,7 @@ static int report__config(const char *var, const char *value, void *cb)
                return 0;
        }
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 static int hist_iter__report_callback(struct hist_entry_iter *iter,
@@ -466,10 +469,11 @@ static int report__browse_hists(struct report *rep)
        return ret;
 }
 
-static void report__collapse_hists(struct report *rep)
+static int report__collapse_hists(struct report *rep)
 {
        struct ui_progress prog;
        struct perf_evsel *pos;
+       int ret = 0;
 
        ui_progress__init(&prog, rep->nr_entries, "Merging related events...");
 
@@ -481,7 +485,9 @@ static void report__collapse_hists(struct report *rep)
 
                hists->socket_filter = rep->socket_filter;
 
-               hists__collapse_resort(hists, &prog);
+               ret = hists__collapse_resort(hists, &prog);
+               if (ret < 0)
+                       break;
 
                /* Non-group events are considered as leader */
                if (symbol_conf.event_group &&
@@ -494,6 +500,7 @@ static void report__collapse_hists(struct report *rep)
        }
 
        ui_progress__finish();
+       return ret;
 }
 
 static void report__output_resort(struct report *rep)
@@ -504,7 +511,7 @@ static void report__output_resort(struct report *rep)
        ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
 
        evlist__for_each(rep->session->evlist, pos)
-               hists__output_resort(evsel__hists(pos), &prog);
+               perf_evsel__output_resort(pos, &prog);
 
        ui_progress__finish();
 }
@@ -561,7 +568,11 @@ static int __cmd_report(struct report *rep)
                }
        }
 
-       report__collapse_hists(rep);
+       ret = report__collapse_hists(rep);
+       if (ret) {
+               ui__error("failed to process hist entry\n");
+               return ret;
+       }
 
        if (session_done())
                return 0;
@@ -633,8 +644,10 @@ parse_percent_limit(const struct option *opt, const char *str,
                    int unset __maybe_unused)
 {
        struct report *rep = opt->value;
+       double pcnt = strtof(str, NULL);
 
-       rep->min_percent = strtof(str, NULL);
+       rep->min_percent = pcnt;
+       callchain_param.min_percent = pcnt;
        return 0;
 }
 
@@ -798,6 +811,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                    "only show processor socket that match with this filter"),
        OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
                    "Show raw trace event output (do not use print fmt or plugins)"),
+       OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+                   "Show entries in a hierarchy"),
        OPT_END()
        };
        struct perf_data_file file = {
@@ -907,13 +922,19 @@ repeat:
                symbol_conf.cumulate_callchain = false;
        }
 
-       if (setup_sorting(session->evlist) < 0) {
-               if (sort_order)
-                       parse_options_usage(report_usage, options, "s", 1);
-               if (field_order)
-                       parse_options_usage(sort_order ? NULL : report_usage,
-                                           options, "F", 1);
-               goto error;
+       if (symbol_conf.report_hierarchy) {
+               /* disable incompatible options */
+               symbol_conf.event_group = false;
+               symbol_conf.cumulate_callchain = false;
+
+               if (field_order) {
+                       pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+                       parse_options_usage(report_usage, options, "F", 1);
+                       parse_options_usage(NULL, options, "hierarchy", 0);
+                       goto error;
+               }
+
+               sort__need_collapse = true;
        }
 
        /* Force tty output for header output and per-thread stat. */
@@ -925,6 +946,15 @@ repeat:
        else
                use_browser = 0;
 
+       if (setup_sorting(session->evlist) < 0) {
+               if (sort_order)
+                       parse_options_usage(report_usage, options, "s", 1);
+               if (field_order)
+                       parse_options_usage(sort_order ? NULL : report_usage,
+                                           options, "F", 1);
+               goto error;
+       }
+
        if (report.header || report.header_only) {
                perf_session__fprintf_info(session, stdout,
                                           report.show_full_info);
index c691214d820f0050c39e9d376d69cd5891132a47..57f9a7e7f7d3e948a29a92018140902c7f5c840f 100644 (file)
@@ -23,6 +23,7 @@
 #include "util/stat.h"
 #include <linux/bitmap.h>
 #include "asm/bug.h"
+#include "util/mem-events.h"
 
 static char const              *script_name;
 static char const              *generate_script_lang;
@@ -58,6 +59,9 @@ enum perf_output_field {
        PERF_OUTPUT_IREGS           = 1U << 14,
        PERF_OUTPUT_BRSTACK         = 1U << 15,
        PERF_OUTPUT_BRSTACKSYM      = 1U << 16,
+       PERF_OUTPUT_DATA_SRC        = 1U << 17,
+       PERF_OUTPUT_WEIGHT          = 1U << 18,
+       PERF_OUTPUT_BPF_OUTPUT      = 1U << 19,
 };
 
 struct output_option {
@@ -81,6 +85,9 @@ struct output_option {
        {.str = "iregs", .field = PERF_OUTPUT_IREGS},
        {.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
        {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
+       {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
+       {.str = "weight",   .field = PERF_OUTPUT_WEIGHT},
+       {.str = "bpf-output",   .field = PERF_OUTPUT_BPF_OUTPUT},
 };
 
 /* default set to maintain compatibility with current format */
@@ -101,7 +108,7 @@ static struct {
                              PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
                              PERF_OUTPUT_PERIOD,
 
-               .invalid_fields = PERF_OUTPUT_TRACE,
+               .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
        },
 
        [PERF_TYPE_SOFTWARE] = {
@@ -111,7 +118,7 @@ static struct {
                              PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
                              PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
                              PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-                             PERF_OUTPUT_PERIOD,
+                             PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT,
 
                .invalid_fields = PERF_OUTPUT_TRACE,
        },
@@ -121,7 +128,7 @@ static struct {
 
                .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
                                  PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
-                                 PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE,
+                                 PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE
        },
 
        [PERF_TYPE_RAW] = {
@@ -131,9 +138,10 @@ static struct {
                              PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
                              PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
                              PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-                             PERF_OUTPUT_PERIOD,
+                             PERF_OUTPUT_PERIOD |  PERF_OUTPUT_ADDR |
+                             PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT,
 
-               .invalid_fields = PERF_OUTPUT_TRACE,
+               .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
        },
 
        [PERF_TYPE_BREAKPOINT] = {
@@ -145,7 +153,7 @@ static struct {
                              PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
                              PERF_OUTPUT_PERIOD,
 
-               .invalid_fields = PERF_OUTPUT_TRACE,
+               .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
        },
 };
 
@@ -242,6 +250,16 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
                                           PERF_OUTPUT_ADDR, allow_user_set))
                return -EINVAL;
 
+       if (PRINT_FIELD(DATA_SRC) &&
+               perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC",
+                                       PERF_OUTPUT_DATA_SRC))
+               return -EINVAL;
+
+       if (PRINT_FIELD(WEIGHT) &&
+               perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT",
+                                       PERF_OUTPUT_WEIGHT))
+               return -EINVAL;
+
        if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
                pr_err("Display of symbols requested but neither sample IP nor "
                           "sample address\nis selected. Hence, no addresses to convert "
@@ -608,6 +626,84 @@ static void print_sample_flags(u32 flags)
        printf("  %-4s ", str);
 }
 
+struct printer_data {
+       int line_no;
+       bool hit_nul;
+       bool is_printable;
+};
+
+static void
+print_sample_bpf_output_printer(enum binary_printer_ops op,
+                               unsigned int val,
+                               void *extra)
+{
+       unsigned char ch = (unsigned char)val;
+       struct printer_data *printer_data = extra;
+
+       switch (op) {
+       case BINARY_PRINT_DATA_BEGIN:
+               printf("\n");
+               break;
+       case BINARY_PRINT_LINE_BEGIN:
+               printf("%17s", !printer_data->line_no ? "BPF output:" :
+                                                       "           ");
+               break;
+       case BINARY_PRINT_ADDR:
+               printf(" %04x:", val);
+               break;
+       case BINARY_PRINT_NUM_DATA:
+               printf(" %02x", val);
+               break;
+       case BINARY_PRINT_NUM_PAD:
+               printf("   ");
+               break;
+       case BINARY_PRINT_SEP:
+               printf("  ");
+               break;
+       case BINARY_PRINT_CHAR_DATA:
+               if (printer_data->hit_nul && ch)
+                       printer_data->is_printable = false;
+
+               if (!isprint(ch)) {
+                       printf("%c", '.');
+
+                       if (!printer_data->is_printable)
+                               break;
+
+                       if (ch == '\0')
+                               printer_data->hit_nul = true;
+                       else
+                               printer_data->is_printable = false;
+               } else {
+                       printf("%c", ch);
+               }
+               break;
+       case BINARY_PRINT_CHAR_PAD:
+               printf(" ");
+               break;
+       case BINARY_PRINT_LINE_END:
+               printf("\n");
+               printer_data->line_no++;
+               break;
+       case BINARY_PRINT_DATA_END:
+       default:
+               break;
+       }
+}
+
+static void print_sample_bpf_output(struct perf_sample *sample)
+{
+       unsigned int nr_bytes = sample->raw_size;
+       struct printer_data printer_data = {0, false, true};
+
+       print_binary(sample->raw_data, nr_bytes, 8,
+                    print_sample_bpf_output_printer, &printer_data);
+
+       if (printer_data.is_printable && printer_data.hit_nul)
+               printf("%17s \"%s\"\n", "BPF string:",
+                      (char *)(sample->raw_data));
+}
+
 struct perf_script {
        struct perf_tool        tool;
        struct perf_session     *session;
@@ -634,6 +730,23 @@ static int perf_evlist__max_name_len(struct perf_evlist *evlist)
        return max;
 }
 
+static size_t data_src__printf(u64 data_src)
+{
+       struct mem_info mi = { .data_src.val = data_src };
+       char decode[100];
+       char out[100];
+       static int maxlen;
+       int len;
+
+       perf_script__meminfo_scnprintf(decode, 100, &mi);
+
+       len = scnprintf(out, 100, "%16" PRIx64 " %s", data_src, decode);
+       if (maxlen < len)
+               maxlen = len;
+
+       return printf("%-*s", maxlen, out);
+}
+
 static void process_event(struct perf_script *script, union perf_event *event,
                          struct perf_sample *sample, struct perf_evsel *evsel,
                          struct addr_location *al)
@@ -673,6 +786,12 @@ static void process_event(struct perf_script *script, union perf_event *event,
        if (PRINT_FIELD(ADDR))
                print_sample_addr(event, sample, thread, attr);
 
+       if (PRINT_FIELD(DATA_SRC))
+               data_src__printf(sample->data_src);
+
+       if (PRINT_FIELD(WEIGHT))
+               printf("%16" PRIu64, sample->weight);
+
        if (PRINT_FIELD(IP)) {
                if (!symbol_conf.use_callchain)
                        printf(" ");
@@ -692,6 +811,9 @@ static void process_event(struct perf_script *script, union perf_event *event,
        else if (PRINT_FIELD(BRSTACKSYM))
                print_sample_brstacksym(event, sample, thread, attr);
 
+       if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
+               print_sample_bpf_output(sample);
+
        printf("\n");
 }
 
@@ -1090,23 +1212,6 @@ static struct script_spec *script_spec__find(const char *spec)
        return NULL;
 }
 
-static struct script_spec *script_spec__findnew(const char *spec,
-                                               struct scripting_ops *ops)
-{
-       struct script_spec *s = script_spec__find(spec);
-
-       if (s)
-               return s;
-
-       s = script_spec__new(spec, ops);
-       if (!s)
-               return NULL;
-
-       script_spec__add(s);
-
-       return s;
-}
-
 int script_spec_register(const char *spec, struct scripting_ops *ops)
 {
        struct script_spec *s;
@@ -1115,9 +1220,11 @@ int script_spec_register(const char *spec, struct scripting_ops *ops)
        if (s)
                return -1;
 
-       s = script_spec__findnew(spec, ops);
+       s = script_spec__new(spec, ops);
        if (!s)
                return -1;
+       else
+               script_spec__add(s);
 
        return 0;
 }
index 038e877081b682dd8d9ba052a0be66c8ac2786c6..1f19f2f999c841b9da140e10bcaf5e6e0f41ee6b 100644 (file)
@@ -122,6 +122,7 @@ static bool                 sync_run                        = false;
 static unsigned int            initial_delay                   = 0;
 static unsigned int            unit_width                      = 4; /* strlen("unit") */
 static bool                    forever                         = false;
+static bool                    metric_only                     = false;
 static struct timespec         ref_time;
 static struct cpu_map          *aggr_map;
 static aggr_get_id_t           aggr_get_id;
@@ -735,6 +736,191 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
        }
 }
 
+struct outstate {
+       FILE *fh;
+       bool newline;
+       const char *prefix;
+       int  nfields;
+       int  id, nr;
+       struct perf_evsel *evsel;
+};
+
+#define METRIC_LEN  35
+
+static void new_line_std(void *ctx)
+{
+       struct outstate *os = ctx;
+
+       os->newline = true;
+}
+
+static void do_new_line_std(struct outstate *os)
+{
+       fputc('\n', os->fh);
+       fputs(os->prefix, os->fh);
+       aggr_printout(os->evsel, os->id, os->nr);
+       if (stat_config.aggr_mode == AGGR_NONE)
+               fprintf(os->fh, "        ");
+       fprintf(os->fh, "                                                 ");
+}
+
+static void print_metric_std(void *ctx, const char *color, const char *fmt,
+                            const char *unit, double val)
+{
+       struct outstate *os = ctx;
+       FILE *out = os->fh;
+       int n;
+       bool newline = os->newline;
+
+       os->newline = false;
+
+       if (unit == NULL || fmt == NULL) {
+               fprintf(out, "%-*s", METRIC_LEN, "");
+               return;
+       }
+
+       if (newline)
+               do_new_line_std(os);
+
+       n = fprintf(out, " # ");
+       if (color)
+               n += color_fprintf(out, color, fmt, val);
+       else
+               n += fprintf(out, fmt, val);
+       fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
+}
+
+static void new_line_csv(void *ctx)
+{
+       struct outstate *os = ctx;
+       int i;
+
+       fputc('\n', os->fh);
+       if (os->prefix)
+               fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+       aggr_printout(os->evsel, os->id, os->nr);
+       for (i = 0; i < os->nfields; i++)
+               fputs(csv_sep, os->fh);
+}
+
+static void print_metric_csv(void *ctx,
+                            const char *color __maybe_unused,
+                            const char *fmt, const char *unit, double val)
+{
+       struct outstate *os = ctx;
+       FILE *out = os->fh;
+       char buf[64], *vals, *ends;
+
+       if (unit == NULL || fmt == NULL) {
+               fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep);
+               return;
+       }
+       snprintf(buf, sizeof(buf), fmt, val);
+       vals = buf;
+       while (isspace(*vals))
+               vals++;
+       ends = vals;
+       while (isdigit(*ends) || *ends == '.')
+               ends++;
+       *ends = 0;
+       while (isspace(*unit))
+               unit++;
+       fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
+}
+
+#define METRIC_ONLY_LEN 20
+
+/* Filter out some columns that don't work well in metrics only mode */
+
+static bool valid_only_metric(const char *unit)
+{
+       if (!unit)
+               return false;
+       if (strstr(unit, "/sec") ||
+           strstr(unit, "hz") ||
+           strstr(unit, "Hz") ||
+           strstr(unit, "CPUs utilized"))
+               return false;
+       return true;
+}
+
+static const char *fixunit(char *buf, struct perf_evsel *evsel,
+                          const char *unit)
+{
+       if (!strncmp(unit, "of all", 6)) {
+               snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+                        unit);
+               return buf;
+       }
+       return unit;
+}
+
+static void print_metric_only(void *ctx, const char *color, const char *fmt,
+                             const char *unit, double val)
+{
+       struct outstate *os = ctx;
+       FILE *out = os->fh;
+       int n;
+       char buf[1024];
+       unsigned mlen = METRIC_ONLY_LEN;
+
+       if (!valid_only_metric(unit))
+               return;
+       unit = fixunit(buf, os->evsel, unit);
+       if (color)
+               n = color_fprintf(out, color, fmt, val);
+       else
+               n = fprintf(out, fmt, val);
+       if (n > METRIC_ONLY_LEN)
+               n = METRIC_ONLY_LEN;
+       if (mlen < strlen(unit))
+               mlen = strlen(unit) + 1;
+       fprintf(out, "%*s", mlen - n, "");
+}
+
+static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
+                                 const char *fmt,
+                                 const char *unit, double val)
+{
+       struct outstate *os = ctx;
+       FILE *out = os->fh;
+       char buf[64], *vals, *ends;
+       char tbuf[1024];
+
+       if (!valid_only_metric(unit))
+               return;
+       unit = fixunit(tbuf, os->evsel, unit);
+       snprintf(buf, sizeof buf, fmt, val);
+       vals = buf;
+       while (isspace(*vals))
+               vals++;
+       ends = vals;
+       while (isdigit(*ends) || *ends == '.')
+               ends++;
+       *ends = 0;
+       fprintf(out, "%s%s", vals, csv_sep);
+}
+
+static void new_line_metric(void *ctx __maybe_unused)
+{
+}
+
+static void print_metric_header(void *ctx, const char *color __maybe_unused,
+                               const char *fmt __maybe_unused,
+                               const char *unit, double val __maybe_unused)
+{
+       struct outstate *os = ctx;
+       char tbuf[1024];
+
+       if (!valid_only_metric(unit))
+               return;
+       unit = fixunit(tbuf, os->evsel, unit);
+       if (csv_output)
+               fprintf(os->fh, "%s%s", unit, csv_sep);
+       else
+               fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
+}
+
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
        FILE *output = stat_config.output;
@@ -763,6 +949,28 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+       int i;
+
+       if (!aggr_get_id)
+               return 0;
+
+       if (stat_config.aggr_mode == AGGR_NONE)
+               return id;
+
+       if (stat_config.aggr_mode == AGGR_GLOBAL)
+               return 0;
+
+       for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+               int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+               if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+                       return cpu2;
+       }
+       return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
        FILE *output = stat_config.output;
@@ -793,22 +1001,124 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
-static void printout(int id, int nr, struct perf_evsel *counter, double uval)
+static void printout(int id, int nr, struct perf_evsel *counter, double uval,
+                    char *prefix, u64 run, u64 ena, double noise)
 {
-       int cpu = cpu_map__id_to_cpu(id);
+       struct perf_stat_output_ctx out;
+       struct outstate os = {
+               .fh = stat_config.output,
+               .prefix = prefix ? prefix : "",
+               .id = id,
+               .nr = nr,
+               .evsel = counter,
+       };
+       print_metric_t pm = print_metric_std;
+       void (*nl)(void *);
 
-       if (stat_config.aggr_mode == AGGR_GLOBAL)
-               cpu = 0;
+       if (metric_only) {
+               nl = new_line_metric;
+               if (csv_output)
+                       pm = print_metric_only_csv;
+               else
+                       pm = print_metric_only;
+       } else
+               nl = new_line_std;
+
+       if (csv_output && !metric_only) {
+               static int aggr_fields[] = {
+                       [AGGR_GLOBAL] = 0,
+                       [AGGR_THREAD] = 1,
+                       [AGGR_NONE] = 1,
+                       [AGGR_SOCKET] = 2,
+                       [AGGR_CORE] = 2,
+               };
+
+               pm = print_metric_csv;
+               nl = new_line_csv;
+               os.nfields = 3;
+               os.nfields += aggr_fields[stat_config.aggr_mode];
+               if (counter->cgrp)
+                       os.nfields++;
+       }
+       if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+               if (metric_only) {
+                       pm(&os, NULL, "", "", 0);
+                       return;
+               }
+               aggr_printout(counter, id, nr);
+
+               fprintf(stat_config.output, "%*s%s",
+                       csv_output ? 0 : 18,
+                       counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+                       csv_sep);
+
+               fprintf(stat_config.output, "%-*s%s",
+                       csv_output ? 0 : unit_width,
+                       counter->unit, csv_sep);
+
+               fprintf(stat_config.output, "%*s",
+                       csv_output ? 0 : -25,
+                       perf_evsel__name(counter));
+
+               if (counter->cgrp)
+                       fprintf(stat_config.output, "%s%s",
+                               csv_sep, counter->cgrp->name);
 
-       if (nsec_counter(counter))
+               if (!csv_output)
+                       pm(&os, NULL, NULL, "", 0);
+               print_noise(counter, noise);
+               print_running(run, ena);
+               if (csv_output)
+                       pm(&os, NULL, NULL, "", 0);
+               return;
+       }
+
+       if (metric_only)
+               /* nothing */;
+       else if (nsec_counter(counter))
                nsec_printout(id, nr, counter, uval);
        else
                abs_printout(id, nr, counter, uval);
 
-       if (!csv_output && !stat_config.interval)
-               perf_stat__print_shadow_stats(stat_config.output, counter,
-                                             uval, cpu,
-                                             stat_config.aggr_mode);
+       out.print_metric = pm;
+       out.new_line = nl;
+       out.ctx = &os;
+
+       if (csv_output && !metric_only) {
+               print_noise(counter, noise);
+               print_running(run, ena);
+       }
+
+       perf_stat__print_shadow_stats(counter, uval,
+                               first_shadow_cpu(counter, id),
+                               &out);
+       if (!csv_output && !metric_only) {
+               print_noise(counter, noise);
+               print_running(run, ena);
+       }
+}
+
+static void aggr_update_shadow(void)
+{
+       int cpu, s2, id, s;
+       u64 val;
+       struct perf_evsel *counter;
+
+       for (s = 0; s < aggr_map->nr; s++) {
+               id = aggr_map->map[s];
+               evlist__for_each(evsel_list, counter) {
+                       val = 0;
+                       for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+                               s2 = aggr_get_id(evsel_list->cpus, cpu);
+                               if (s2 != id)
+                                       continue;
+                               val += perf_counts(counter->counts, cpu, 0)->val;
+                       }
+                       val = val * counter->scale;
+                       perf_stat__update_shadow_stats(counter, &val,
+                                                      first_shadow_cpu(counter, id));
+               }
+       }
 }
 
 static void print_aggr(char *prefix)
@@ -818,12 +1128,23 @@ static void print_aggr(char *prefix)
        int cpu, s, s2, id, nr;
        double uval;
        u64 ena, run, val;
+       bool first;
 
        if (!(aggr_map || aggr_get_id))
                return;
 
+       aggr_update_shadow();
+
+       /*
+        * With metric_only everything is on a single line.
+        * Without each counter has its own line.
+        */
        for (s = 0; s < aggr_map->nr; s++) {
+               if (prefix && metric_only)
+                       fprintf(output, "%s", prefix);
+
                id = aggr_map->map[s];
+               first = true;
                evlist__for_each(evsel_list, counter) {
                        val = ena = run = 0;
                        nr = 0;
@@ -836,41 +1157,20 @@ static void print_aggr(char *prefix)
                                run += perf_counts(counter->counts, cpu, 0)->run;
                                nr++;
                        }
-                       if (prefix)
-                               fprintf(output, "%s", prefix);
-
-                       if (run == 0 || ena == 0) {
+                       if (first && metric_only) {
+                               first = false;
                                aggr_printout(counter, id, nr);
-
-                               fprintf(output, "%*s%s",
-                                       csv_output ? 0 : 18,
-                                       counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-                                       csv_sep);
-
-                               fprintf(output, "%-*s%s",
-                                       csv_output ? 0 : unit_width,
-                                       counter->unit, csv_sep);
-
-                               fprintf(output, "%*s",
-                                       csv_output ? 0 : -25,
-                                       perf_evsel__name(counter));
-
-                               if (counter->cgrp)
-                                       fprintf(output, "%s%s",
-                                               csv_sep, counter->cgrp->name);
-
-                               print_running(run, ena);
-                               fputc('\n', output);
-                               continue;
                        }
-                       uval = val * counter->scale;
-                       printout(id, nr, counter, uval);
-                       if (!csv_output)
-                               print_noise(counter, 1.0);
+                       if (prefix && !metric_only)
+                               fprintf(output, "%s", prefix);
 
-                       print_running(run, ena);
-                       fputc('\n', output);
+                       uval = val * counter->scale;
+                       printout(id, nr, counter, uval, prefix, run, ena, 1.0);
+                       if (!metric_only)
+                               fputc('\n', output);
                }
+               if (metric_only)
+                       fputc('\n', output);
        }
 }
 
@@ -895,12 +1195,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
                        fprintf(output, "%s", prefix);
 
                uval = val * counter->scale;
-               printout(thread, 0, counter, uval);
-
-               if (!csv_output)
-                       print_noise(counter, 1.0);
-
-               print_running(run, ena);
+               printout(thread, 0, counter, uval, prefix, run, ena, 1.0);
                fputc('\n', output);
        }
 }
@@ -914,43 +1209,19 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
        FILE *output = stat_config.output;
        struct perf_stat_evsel *ps = counter->priv;
        double avg = avg_stats(&ps->res_stats[0]);
-       int scaled = counter->counts->scaled;
        double uval;
        double avg_enabled, avg_running;
 
        avg_enabled = avg_stats(&ps->res_stats[1]);
        avg_running = avg_stats(&ps->res_stats[2]);
 
-       if (prefix)
+       if (prefix && !metric_only)
                fprintf(output, "%s", prefix);
 
-       if (scaled == -1 || !counter->supported) {
-               fprintf(output, "%*s%s",
-                       csv_output ? 0 : 18,
-                       counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-                       csv_sep);
-               fprintf(output, "%-*s%s",
-                       csv_output ? 0 : unit_width,
-                       counter->unit, csv_sep);
-               fprintf(output, "%*s",
-                       csv_output ? 0 : -25,
-                       perf_evsel__name(counter));
-
-               if (counter->cgrp)
-                       fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
-
-               print_running(avg_running, avg_enabled);
-               fputc('\n', output);
-               return;
-       }
-
        uval = avg * counter->scale;
-       printout(-1, 0, counter, uval);
-
-       print_noise(counter, avg);
-
-       print_running(avg_running, avg_enabled);
-       fprintf(output, "\n");
+       printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg);
+       if (!metric_only)
+               fprintf(output, "\n");
 }
 
 /*
@@ -972,39 +1243,78 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
                if (prefix)
                        fprintf(output, "%s", prefix);
 
-               if (run == 0 || ena == 0) {
-                       fprintf(output, "CPU%*d%s%*s%s",
-                               csv_output ? 0 : -4,
-                               perf_evsel__cpus(counter)->map[cpu], csv_sep,
-                               csv_output ? 0 : 18,
-                               counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-                               csv_sep);
+               uval = val * counter->scale;
+               printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
 
-                               fprintf(output, "%-*s%s",
-                                       csv_output ? 0 : unit_width,
-                                       counter->unit, csv_sep);
+               fputc('\n', output);
+       }
+}
 
-                               fprintf(output, "%*s",
-                                       csv_output ? 0 : -25,
-                                       perf_evsel__name(counter));
+static void print_no_aggr_metric(char *prefix)
+{
+       int cpu;
+       int nrcpus = 0;
+       struct perf_evsel *counter;
+       u64 ena, run, val;
+       double uval;
 
-                       if (counter->cgrp)
-                               fprintf(output, "%s%s",
-                                       csv_sep, counter->cgrp->name);
+       nrcpus = evsel_list->cpus->nr;
+       for (cpu = 0; cpu < nrcpus; cpu++) {
+               bool first = true;
 
-                       print_running(run, ena);
-                       fputc('\n', output);
-                       continue;
+               if (prefix)
+                       fputs(prefix, stat_config.output);
+               evlist__for_each(evsel_list, counter) {
+                       if (first) {
+                               aggr_printout(counter, cpu, 0);
+                               first = false;
+                       }
+                       val = perf_counts(counter->counts, cpu, 0)->val;
+                       ena = perf_counts(counter->counts, cpu, 0)->ena;
+                       run = perf_counts(counter->counts, cpu, 0)->run;
+
+                       uval = val * counter->scale;
+                       printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
                }
+               fputc('\n', stat_config.output);
+       }
+}
 
-               uval = val * counter->scale;
-               printout(cpu, 0, counter, uval);
-               if (!csv_output)
-                       print_noise(counter, 1.0);
-               print_running(run, ena);
+static int aggr_header_lens[] = {
+       [AGGR_CORE] = 18,
+       [AGGR_SOCKET] = 12,
+       [AGGR_NONE] = 6,
+       [AGGR_THREAD] = 24,
+       [AGGR_GLOBAL] = 0,
+};
 
-               fputc('\n', output);
+static void print_metric_headers(char *prefix)
+{
+       struct perf_stat_output_ctx out;
+       struct perf_evsel *counter;
+       struct outstate os = {
+               .fh = stat_config.output
+       };
+
+       if (prefix)
+               fprintf(stat_config.output, "%s", prefix);
+
+       if (!csv_output)
+               fprintf(stat_config.output, "%*s",
+                       aggr_header_lens[stat_config.aggr_mode], "");
+
+       /* Print metrics headers only */
+       evlist__for_each(evsel_list, counter) {
+               os.evsel = counter;
+               out.ctx = &os;
+               out.print_metric = print_metric_header;
+               out.new_line = new_line_metric;
+               os.evsel = counter;
+               perf_stat__print_shadow_stats(counter, 0,
+                                             0,
+                                             &out);
        }
+       fputc('\n', stat_config.output);
 }
 
 static void print_interval(char *prefix, struct timespec *ts)
@@ -1014,7 +1324,7 @@ static void print_interval(char *prefix, struct timespec *ts)
 
        sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
 
-       if (num_print_interval == 0 && !csv_output) {
+       if (num_print_interval == 0 && !csv_output && !metric_only) {
                switch (stat_config.aggr_mode) {
                case AGGR_SOCKET:
                        fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
@@ -1101,6 +1411,17 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
        else
                print_header(argc, argv);
 
+       if (metric_only) {
+               static int num_print_iv;
+
+               if (num_print_iv == 0)
+                       print_metric_headers(prefix);
+               if (num_print_iv++ == 25)
+                       num_print_iv = 0;
+               if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
+                       fprintf(stat_config.output, "%s", prefix);
+       }
+
        switch (stat_config.aggr_mode) {
        case AGGR_CORE:
        case AGGR_SOCKET:
@@ -1113,10 +1434,16 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
        case AGGR_GLOBAL:
                evlist__for_each(evsel_list, counter)
                        print_counter_aggr(counter, prefix);
+               if (metric_only)
+                       fputc('\n', stat_config.output);
                break;
        case AGGR_NONE:
-               evlist__for_each(evsel_list, counter)
-                       print_counter(counter, prefix);
+               if (metric_only)
+                       print_no_aggr_metric(prefix);
+               else {
+                       evlist__for_each(evsel_list, counter)
+                               print_counter(counter, prefix);
+               }
                break;
        case AGGR_UNSET:
        default:
@@ -1237,6 +1564,8 @@ static const struct option stat_options[] = {
                     "aggregate counts per thread", AGGR_THREAD),
        OPT_UINTEGER('D', "delay", &initial_delay,
                     "ms to wait before starting measurement after program start"),
+       OPT_BOOLEAN(0, "metric-only", &metric_only,
+                       "Only print computed metrics. No raw values"),
        OPT_END()
 };
 
@@ -1435,7 +1764,7 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
  */
 static int add_default_attributes(void)
 {
-       struct perf_event_attr default_attrs[] = {
+       struct perf_event_attr default_attrs0[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK             },
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES       },
@@ -1443,8 +1772,14 @@ static int add_default_attributes(void)
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS            },
 
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES             },
+};
+       struct perf_event_attr frontend_attrs[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND        },
+};
+       struct perf_event_attr backend_attrs[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
+};
+       struct perf_event_attr default_attrs1[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS           },
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS    },
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES          },
@@ -1561,7 +1896,19 @@ static int add_default_attributes(void)
        }
 
        if (!evsel_list->nr_entries) {
-               if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
+               if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
+                       return -1;
+               if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
+                       if (perf_evlist__add_default_attrs(evsel_list,
+                                               frontend_attrs) < 0)
+                               return -1;
+               }
+               if (pmu_have_event("cpu", "stalled-cycles-backend")) {
+                       if (perf_evlist__add_default_attrs(evsel_list,
+                                               backend_attrs) < 0)
+                               return -1;
+               }
+               if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
                        return -1;
        }
 
@@ -1825,9 +2172,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
        if (evsel_list == NULL)
                return -ENOMEM;
 
+       parse_events__shrink_config_terms();
        argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
                                        (const char **) stat_usage,
                                        PARSE_OPT_STOP_AT_NON_OPTION);
+       perf_stat__init_shadow_stats();
 
        if (csv_sep) {
                csv_output = true;
@@ -1858,6 +2207,16 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
                goto out;
        }
 
+       if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
+               fprintf(stderr, "--metric-only is not supported with --per-thread\n");
+               goto out;
+       }
+
+       if (metric_only && run_count > 1) {
+               fprintf(stderr, "--metric-only is not supported with -r\n");
+               goto out;
+       }
+
        if (output_fd < 0) {
                fprintf(stderr, "argument to --log-fd must be a > 0\n");
                parse_options_usage(stat_usage, stat_options, "log-fd", 0);
index bf01cbb0ef2369f2fc904809b86b70a823bdb604..94af190f6843d2c92e68b2dfef8dc51017c42079 100644 (file)
@@ -252,7 +252,8 @@ static void perf_top__print_sym_table(struct perf_top *top)
        char bf[160];
        int printed = 0;
        const int win_width = top->winsize.ws_col - 1;
-       struct hists *hists = evsel__hists(top->sym_evsel);
+       struct perf_evsel *evsel = top->sym_evsel;
+       struct hists *hists = evsel__hists(evsel);
 
        puts(CONSOLE_CLEAR);
 
@@ -288,7 +289,7 @@ static void perf_top__print_sym_table(struct perf_top *top)
        }
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 
        hists__output_recalc_col_len(hists, top->print_entries - printed);
        putchar('\n');
@@ -540,6 +541,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
 static void perf_top__sort_new_samples(void *arg)
 {
        struct perf_top *t = arg;
+       struct perf_evsel *evsel = t->sym_evsel;
        struct hists *hists;
 
        perf_top__reset_sample_counters(t);
@@ -547,7 +549,7 @@ static void perf_top__sort_new_samples(void *arg)
        if (t->evlist->selected != NULL)
                t->sym_evsel = t->evlist->selected;
 
-       hists = evsel__hists(t->sym_evsel);
+       hists = evsel__hists(evsel);
 
        if (t->evlist->enabled) {
                if (t->zero) {
@@ -559,7 +561,7 @@ static void perf_top__sort_new_samples(void *arg)
        }
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 }
 
 static void *display_thread_tui(void *arg)
@@ -1063,7 +1065,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
        return parse_callchain_top_opt(arg);
 }
 
-static int perf_top_config(const char *var, const char *value, void *cb)
+static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
 {
        if (!strcmp(var, "top.call-graph"))
                var = "call-graph.record-mode"; /* fall-through */
@@ -1072,7 +1074,7 @@ static int perf_top_config(const char *var, const char *value, void *cb)
                return 0;
        }
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 static int
@@ -1212,6 +1214,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                     parse_branch_stack),
        OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
                    "Show raw trace event output (do not use print fmt or plugins)"),
+       OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+                   "Show entries in a hierarchy"),
        OPT_END()
        };
        const char * const top_usage[] = {
@@ -1239,10 +1243,30 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                goto out_delete_evlist;
        }
 
+       if (symbol_conf.report_hierarchy) {
+               /* disable incompatible options */
+               symbol_conf.event_group = false;
+               symbol_conf.cumulate_callchain = false;
+
+               if (field_order) {
+                       pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+                       parse_options_usage(top_usage, options, "fields", 0);
+                       parse_options_usage(NULL, options, "hierarchy", 0);
+                       goto out_delete_evlist;
+               }
+       }
+
        sort__mode = SORT_MODE__TOP;
        /* display thread wants entries to be collapsed in a different tree */
        sort__need_collapse = 1;
 
+       if (top.use_stdio)
+               use_browser = 0;
+       else if (top.use_tui)
+               use_browser = 1;
+
+       setup_browser(false);
+
        if (setup_sorting(top.evlist) < 0) {
                if (sort_order)
                        parse_options_usage(top_usage, options, "s", 1);
@@ -1252,13 +1276,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                goto out_delete_evlist;
        }
 
-       if (top.use_stdio)
-               use_browser = 0;
-       else if (top.use_tui)
-               use_browser = 1;
-
-       setup_browser(false);
-
        status = target__validate(target);
        if (status) {
                target__strerror(target, status, errbuf, BUFSIZ);
index 20916dd77aac24847bffbaa5d1c0fb56580d6e39..8dc98c598b1aed734ab6a6695c7308f3c16e0910 100644 (file)
@@ -33,6 +33,7 @@
 #include "util/stat.h"
 #include "trace-event.h"
 #include "util/parse-events.h"
+#include "util/bpf-loader.h"
 
 #include <libaudit.h>
 #include <stdlib.h>
@@ -1724,8 +1725,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 
        sc->args = sc->tp_format->format.fields;
        sc->nr_args = sc->tp_format->format.nr_fields;
-       /* drop nr field - not relevant here; does not exist on older kernels */
-       if (sc->args && strcmp(sc->args->name, "nr") == 0) {
+       /*
+        * We need to check and discard the first variable '__syscall_nr'
+        * or 'nr' that mean the syscall number. It is needless here.
+        * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
+        */
+       if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
                sc->args = sc->args->next;
                --sc->nr_args;
        }
@@ -2177,6 +2182,37 @@ out_dump:
        return 0;
 }
 
+static void bpf_output__printer(enum binary_printer_ops op,
+                               unsigned int val, void *extra)
+{
+       FILE *output = extra;
+       unsigned char ch = (unsigned char)val;
+
+       switch (op) {
+       case BINARY_PRINT_CHAR_DATA:
+               fprintf(output, "%c", isprint(ch) ? ch : '.');
+               break;
+       case BINARY_PRINT_DATA_BEGIN:
+       case BINARY_PRINT_LINE_BEGIN:
+       case BINARY_PRINT_ADDR:
+       case BINARY_PRINT_NUM_DATA:
+       case BINARY_PRINT_NUM_PAD:
+       case BINARY_PRINT_SEP:
+       case BINARY_PRINT_CHAR_PAD:
+       case BINARY_PRINT_LINE_END:
+       case BINARY_PRINT_DATA_END:
+       default:
+               break;
+       }
+}
+
+static void bpf_output__fprintf(struct trace *trace,
+                               struct perf_sample *sample)
+{
+       print_binary(sample->raw_data, sample->raw_size, 8,
+                    bpf_output__printer, trace->output);
+}
+
 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
                                union perf_event *event __maybe_unused,
                                struct perf_sample *sample)
@@ -2189,7 +2225,9 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 
        fprintf(trace->output, "%s:", evsel->name);
 
-       if (evsel->tp_format) {
+       if (perf_evsel__is_bpf_output(evsel)) {
+               bpf_output__fprintf(trace, sample);
+       } else if (evsel->tp_format) {
                event_format__fprintf(evsel->tp_format, sample->cpu,
                                      sample->raw_data, sample->raw_size,
                                      trace->output);
@@ -2586,6 +2624,16 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
        if (err < 0)
                goto out_error_open;
 
+       err = bpf__apply_obj_config();
+       if (err) {
+               char errbuf[BUFSIZ];
+
+               bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+               pr_err("ERROR: Apply config to BPF failed: %s\n",
+                        errbuf);
+               goto out_error_open;
+       }
+
        /*
         * Better not use !target__has_task() here because we need to cover the
         * case where no threads were specified in the command line, but a
index 511141b102e8464fe3e102007f792220306ad3b9..eca6a912e8c22b0df342a7d955c6bad2db5ddf47 100644 (file)
@@ -61,50 +61,45 @@ endif
 
 ifeq ($(LIBUNWIND_LIBS),)
   NO_LIBUNWIND := 1
-else
-  #
-  # For linking with debug library, run like:
-  #
-  #   make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
-  #
-  ifdef LIBUNWIND_DIR
-    LIBUNWIND_CFLAGS  = -I$(LIBUNWIND_DIR)/include
-    LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
-  endif
-  LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS)
-
-  # Set per-feature check compilation flags
-  FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
-  FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
 endif
+#
+# For linking with debug library, run like:
+#
+#   make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
+#
+ifdef LIBUNWIND_DIR
+  LIBUNWIND_CFLAGS  = -I$(LIBUNWIND_DIR)/include
+  LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
+endif
+LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS)
+
+# Set per-feature check compilation flags
+FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
+FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
 
 ifeq ($(NO_PERF_REGS),0)
   CFLAGS += -DHAVE_PERF_REGS_SUPPORT
 endif
 
-ifndef NO_LIBELF
-  # for linking with debug library, run like:
-  # make DEBUG=1 LIBDW_DIR=/opt/libdw/
-  ifdef LIBDW_DIR
-    LIBDW_CFLAGS  := -I$(LIBDW_DIR)/include
-    LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
-  endif
-  FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw
+# for linking with debug library, run like:
+# make DEBUG=1 LIBDW_DIR=/opt/libdw/
+ifdef LIBDW_DIR
+  LIBDW_CFLAGS  := -I$(LIBDW_DIR)/include
+  LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
 endif
+FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw
 
-ifdef LIBBABELTRACE
-  # for linking with debug library, run like:
-  # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/
-  ifdef LIBBABELTRACE_DIR
-    LIBBABELTRACE_CFLAGS  := -I$(LIBBABELTRACE_DIR)/include
-    LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib
-  endif
-  FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
+# for linking with debug library, run like:
+# make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/
+ifdef LIBBABELTRACE_DIR
+  LIBBABELTRACE_CFLAGS  := -I$(LIBBABELTRACE_DIR)/include
+  LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib
 endif
+FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
 
 FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi
 # include ARCH specific config
@@ -145,28 +140,26 @@ ifdef PARSER_DEBUG
   $(call detected_var,PARSER_DEBUG_FLEX)
 endif
 
-ifndef NO_LIBPYTHON
-  # Try different combinations to accommodate systems that only have
-  # python[2][-config] in weird combinations but always preferring
-  # python2 and python2-config as per pep-0394. If we catch a
-  # python[-config] in version 3, the version check will kill it.
-  PYTHON2 := $(if $(call get-executable,python2),python2,python)
-  override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2))
-  PYTHON2_CONFIG := \
-    $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
-  override PYTHON_CONFIG := \
-    $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
+# Try different combinations to accommodate systems that only have
+# python[2][-config] in weird combinations but always preferring
+# python2 and python2-config as per pep-0394. If we catch a
+# python[-config] in version 3, the version check will kill it.
+PYTHON2 := $(if $(call get-executable,python2),python2,python)
+override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2))
+PYTHON2_CONFIG := \
+  $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
+override PYTHON_CONFIG := \
+  $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
 
-  PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
+PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
 
-  PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
-  PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
+PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
+PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
 
-  FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
-  FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
-  FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
-  FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
-endif
+FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
+FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
+FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
+FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
 
 CFLAGS += -fno-omit-frame-pointer
 CFLAGS += -ggdb3
@@ -335,6 +328,13 @@ ifndef NO_LIBELF
   endif # NO_LIBBPF
 endif # NO_LIBELF
 
+ifdef PERF_HAVE_JITDUMP
+  ifndef NO_DWARF
+    $(call detected,CONFIG_JITDUMP)
+    CFLAGS += -DHAVE_JITDUMP
+  endif
+endif
+
 ifeq ($(ARCH),powerpc)
   ifndef NO_DWARF
     CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
@@ -411,6 +411,17 @@ ifndef NO_LIBAUDIT
   endif
 endif
 
+ifndef NO_LIBCRYPTO
+  ifneq ($(feature-libcrypto), 1)
+    msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev);
+    NO_LIBCRYPTO := 1
+  else
+    CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT
+    EXTLIBS += -lcrypto
+    $(call detected,CONFIG_CRYPTO)
+  endif
+endif
+
 ifdef NO_NEWT
   NO_SLANG=1
 endif
diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile
new file mode 100644 (file)
index 0000000..5ce61a1
--- /dev/null
@@ -0,0 +1,89 @@
+ARCH=$(shell uname -m)
+
+ifeq ($(ARCH), x86_64)
+JARCH=amd64
+endif
+ifeq ($(ARCH), armv7l)
+JARCH=armhf
+endif
+ifeq ($(ARCH), armv6l)
+JARCH=armhf
+endif
+ifeq ($(ARCH), aarch64)
+JARCH=aarch64
+endif
+ifeq ($(ARCH), ppc64)
+JARCH=powerpc
+endif
+ifeq ($(ARCH), ppc64le)
+JARCH=powerpc
+endif
+
+DESTDIR=/usr/local
+
+VERSION=1
+REVISION=0
+AGE=0
+
+LN=ln -sf
+RM=rm
+
+SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE)
+VLIBJVMTI=libjvmti.so.$(VERSION)
+SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI)
+SOLIBEXT=so
+
+# The following works at least on fedora 23, you may need the next
+# line for other distros.
+ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
+JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3)
+else
+  ifneq (,$(wildcard /usr/sbin/alternatives))
+    JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+  endif
+endif
+ifndef JDIR
+$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory)
+else
+  ifeq (,$(wildcard $(JDIR)/include/jvmti.h))
+  $(error the openjdk development package appears to me missing, install and try again)
+  endif
+endif
+$(info Using Java from $(JDIR))
+# -lrt required in 32-bit mode for clock_gettime()
+LIBS=-lelf -lrt
+INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux
+
+TARGETS=$(SLIBJVMTI)
+
+SRCS=libjvmti.c jvmti_agent.c
+OBJS=$(SRCS:.c=.o)
+SOBJS=$(OBJS:.o=.lo)
+OPT=-O2 -g -Werror -Wall
+
+CFLAGS=$(INCDIR) $(OPT)
+
+all: $(TARGETS)
+
+.c.o:
+       $(CC) $(CFLAGS) -c $*.c
+.c.lo:
+       $(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo
+
+$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h
+
+$(SLIBJVMTI):  $(SOBJS)
+       $(CC) $(CFLAGS) $(SLDFLAGS)  -o $@ $(SOBJS) $(LIBS)
+       $(LN) $@ libjvmti.$(SOLIBEXT)
+
+clean:
+       $(RM) -f *.o *.so.* *.so *.lo
+
+install:
+       -mkdir -p $(DESTDIR)/lib
+       install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/
+       (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI))
+       (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT))
+       ldconfig
+
+.SUFFIXES: .c .S .o .lo
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
new file mode 100644 (file)
index 0000000..6461e02
--- /dev/null
@@ -0,0 +1,465 @@
+/*
+ * jvmti_agent.c: JVMTI agent interface
+ *
+ * Adapted from the Oprofile code in opagent.c:
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#include <sys/types.h>
+#include <sys/stat.h> /* for mkdir() */
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <syscall.h> /* for gettid() */
+#include <err.h>
+
+#include "jvmti_agent.h"
+#include "../util/jitdump.h"
+
+#define JIT_LANG "java"
+
+static char jit_path[PATH_MAX];
+static void *marker_addr;
+
+/*
+ * padding buffer
+ */
+static const char pad_bytes[7];
+
+static inline pid_t gettid(void)
+{
+       return (pid_t)syscall(__NR_gettid);
+}
+
+static int get_e_machine(struct jitheader *hdr)
+{
+       ssize_t sret;
+       char id[16];
+       int fd, ret = -1;
+       int m = -1;
+       struct {
+               uint16_t e_type;
+               uint16_t e_machine;
+       } info;
+
+       fd = open("/proc/self/exe", O_RDONLY);
+       if (fd == -1)
+               return -1;
+
+       sret = read(fd, id, sizeof(id));
+       if (sret != sizeof(id))
+               goto error;
+
+       /* check ELF signature */
+       if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F')
+               goto error;
+
+       sret = read(fd, &info, sizeof(info));
+       if (sret != sizeof(info))
+               goto error;
+
+       m = info.e_machine;
+       if (m < 0)
+               m = 0; /* ELF EM_NONE */
+
+       hdr->elf_mach = m;
+       ret = 0;
+error:
+       close(fd);
+       return ret;
+}
+
+#define NSEC_PER_SEC   1000000000
+static int perf_clk_id = CLOCK_MONOTONIC;
+
+static inline uint64_t
+timespec_to_ns(const struct timespec *ts)
+{
+        return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+static inline uint64_t
+perf_get_timestamp(void)
+{
+       struct timespec ts;
+       int ret;
+
+       ret = clock_gettime(perf_clk_id, &ts);
+       if (ret)
+               return 0;
+
+       return timespec_to_ns(&ts);
+}
+
+static int
+debug_cache_init(void)
+{
+       char str[32];
+       char *base, *p;
+       struct tm tm;
+       time_t t;
+       int ret;
+
+       time(&t);
+       localtime_r(&t, &tm);
+
+       base = getenv("JITDUMPDIR");
+       if (!base)
+               base = getenv("HOME");
+       if (!base)
+               base = ".";
+
+       strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm);
+
+       snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base);
+
+       ret = mkdir(jit_path, 0755);
+       if (ret == -1) {
+               if (errno != EEXIST) {
+                       warn("jvmti: cannot create jit cache dir %s", jit_path);
+                       return -1;
+               }
+       }
+
+       snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base);
+       ret = mkdir(jit_path, 0755);
+       if (ret == -1) {
+               if (errno != EEXIST) {
+                       warn("cannot create jit cache dir %s", jit_path);
+                       return -1;
+               }
+       }
+
+       snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str);
+
+       p = mkdtemp(jit_path);
+       if (p != jit_path) {
+               warn("cannot create jit cache dir %s", jit_path);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+perf_open_marker_file(int fd)
+{
+       long pgsz;
+
+       pgsz = sysconf(_SC_PAGESIZE);
+       if (pgsz == -1)
+               return -1;
+
+       /*
+        * we mmap the jitdump to create an MMAP RECORD in perf.data file.
+        * The mmap is captured either live (perf record running when we mmap)
+        * or  in deferred mode, via /proc/PID/maps
+        * the MMAP record is used as a marker of a jitdump file for more meta
+        * data info about the jitted code. Perf report/annotate detect this
+        * special filename and process the jitdump file.
+        *
+        * mapping must be PROT_EXEC to ensure it is captured by perf record
+        * even when not using -d option
+        */
+       marker_addr = mmap(NULL, pgsz, PROT_READ|PROT_EXEC, MAP_PRIVATE, fd, 0);
+       return (marker_addr == MAP_FAILED) ? -1 : 0;
+}
+
+static void
+perf_close_marker_file(void)
+{
+       long pgsz;
+
+       if (!marker_addr)
+               return;
+
+       pgsz = sysconf(_SC_PAGESIZE);
+       if (pgsz == -1)
+               return;
+
+       munmap(marker_addr, pgsz);
+}
+
+void *jvmti_open(void)
+{
+       int pad_cnt;
+       char dump_path[PATH_MAX];
+       struct jitheader header;
+       int fd;
+       FILE *fp;
+
+       /*
+        * check if clockid is supported
+        */
+       if (!perf_get_timestamp())
+               warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
+
+       memset(&header, 0, sizeof(header));
+
+       debug_cache_init();
+
+       /*
+        * jitdump file name
+        */
+       snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+
+       fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666);
+       if (fd == -1)
+               return NULL;
+
+       /*
+        * create perf.data maker for the jitdump file
+        */
+       if (perf_open_marker_file(fd)) {
+               warnx("jvmti: failed to create marker file");
+               return NULL;
+       }
+
+       fp = fdopen(fd, "w+");
+       if (!fp) {
+               warn("jvmti: cannot create %s", dump_path);
+               close(fd);
+               goto error;
+       }
+
+       warnx("jvmti: jitdump in %s", dump_path);
+
+       if (get_e_machine(&header)) {
+               warn("get_e_machine failed\n");
+               goto error;
+       }
+
+       header.magic      = JITHEADER_MAGIC;
+       header.version    = JITHEADER_VERSION;
+       header.total_size = sizeof(header);
+       header.pid        = getpid();
+
+       /* calculate amount of padding '\0' */
+       pad_cnt = PADDING_8ALIGNED(header.total_size);
+       header.total_size += pad_cnt;
+
+       header.timestamp = perf_get_timestamp();
+
+       if (!fwrite(&header, sizeof(header), 1, fp)) {
+               warn("jvmti: cannot write dumpfile header");
+               goto error;
+       }
+
+       /* write padding '\0' if necessary */
+       if (pad_cnt && !fwrite(pad_bytes, pad_cnt, 1, fp)) {
+               warn("jvmti: cannot write dumpfile header padding");
+               goto error;
+       }
+
+       return fp;
+error:
+       fclose(fp);
+       return NULL;
+}
+
+int
+jvmti_close(void *agent)
+{
+       struct jr_code_close rec;
+       FILE *fp = agent;
+
+       if (!fp) {
+               warnx("jvmti: incalid fd in close_agent");
+               return -1;
+       }
+
+       rec.p.id = JIT_CODE_CLOSE;
+       rec.p.total_size = sizeof(rec);
+
+       rec.p.timestamp = perf_get_timestamp();
+
+       if (!fwrite(&rec, sizeof(rec), 1, fp))
+               return -1;
+
+       fclose(fp);
+
+       fp = NULL;
+
+       perf_close_marker_file();
+
+       return 0;
+}
+
+int
+jvmti_write_code(void *agent, char const *sym,
+       uint64_t vma, void const *code, unsigned int const size)
+{
+       static int code_generation = 1;
+       struct jr_code_load rec;
+       size_t sym_len;
+       size_t padding_count;
+       FILE *fp = agent;
+       int ret = -1;
+
+       /* don't care about 0 length function, no samples */
+       if (size == 0)
+               return 0;
+
+       if (!fp) {
+               warnx("jvmti: invalid fd in write_native_code");
+               return -1;
+       }
+
+       sym_len = strlen(sym) + 1;
+
+       rec.p.id           = JIT_CODE_LOAD;
+       rec.p.total_size   = sizeof(rec) + sym_len;
+       padding_count      = PADDING_8ALIGNED(rec.p.total_size);
+       rec.p. total_size += padding_count;
+       rec.p.timestamp    = perf_get_timestamp();
+
+       rec.code_size  = size;
+       rec.vma        = vma;
+       rec.code_addr  = vma;
+       rec.pid        = getpid();
+       rec.tid        = gettid();
+
+       if (code)
+               rec.p.total_size += size;
+
+       /*
+        * If JVM is multi-threaded, nultiple concurrent calls to agent
+        * may be possible, so protect file writes
+        */
+       flockfile(fp);
+
+       /*
+        * get code index inside lock to avoid race condition
+        */
+       rec.code_index = code_generation++;
+
+       ret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
+       fwrite_unlocked(sym, sym_len, 1, fp);
+
+       if (padding_count)
+               fwrite_unlocked(pad_bytes, padding_count, 1, fp);
+
+       if (code)
+               fwrite_unlocked(code, size, 1, fp);
+
+       funlockfile(fp);
+
+       ret = 0;
+
+       return ret;
+}
+
+int
+jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
+                      jvmti_line_info_t *li, int nr_lines)
+{
+       struct jr_code_debug_info rec;
+       size_t sret, len, size, flen;
+       size_t padding_count;
+       uint64_t addr;
+       const char *fn = file;
+       FILE *fp = agent;
+       int i;
+
+       /*
+        * no entry to write
+        */
+       if (!nr_lines)
+               return 0;
+
+       if (!fp) {
+               warnx("jvmti: invalid fd in write_debug_info");
+               return -1;
+       }
+
+       flen = strlen(file) + 1;
+
+       rec.p.id        = JIT_CODE_DEBUG_INFO;
+       size            = sizeof(rec);
+       rec.p.timestamp = perf_get_timestamp();
+       rec.code_addr   = (uint64_t)(uintptr_t)code;
+       rec.nr_entry    = nr_lines;
+
+       /*
+        * on disk source line info layout:
+        * uint64_t : addr
+        * int      : line number
+        * int      : column discriminator
+        * file[]   : source file name
+        * padding  : pad to multiple of 8 bytes
+        */
+       size += nr_lines * sizeof(struct debug_entry);
+       size += flen * nr_lines;
+       /*
+        * pad to 8 bytes
+        */
+       padding_count = PADDING_8ALIGNED(size);
+
+       rec.p.total_size = size + padding_count;
+
+       /*
+        * If JVM is multi-threaded, nultiple concurrent calls to agent
+        * may be possible, so protect file writes
+        */
+       flockfile(fp);
+
+       sret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
+       if (sret != 1)
+               goto error;
+
+       for (i = 0; i < nr_lines; i++) {
+
+               addr = (uint64_t)li[i].pc;
+               len  = sizeof(addr);
+               sret = fwrite_unlocked(&addr, len, 1, fp);
+               if (sret != 1)
+                       goto error;
+
+               len  = sizeof(li[0].line_number);
+               sret = fwrite_unlocked(&li[i].line_number, len, 1, fp);
+               if (sret != 1)
+                       goto error;
+
+               len  = sizeof(li[0].discrim);
+               sret = fwrite_unlocked(&li[i].discrim, len, 1, fp);
+               if (sret != 1)
+                       goto error;
+
+               sret = fwrite_unlocked(fn, flen, 1, fp);
+               if (sret != 1)
+                       goto error;
+       }
+       if (padding_count)
+               sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp);
+               if (sret != 1)
+                       goto error;
+
+       funlockfile(fp);
+       return 0;
+error:
+       funlockfile(fp);
+       return -1;
+}
diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h
new file mode 100644 (file)
index 0000000..bedf5d0
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef __JVMTI_AGENT_H__
+#define __JVMTI_AGENT_H__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <jvmti.h>
+
+#define __unused __attribute__((unused))
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+typedef struct {
+       unsigned long   pc;
+       int             line_number;
+       int             discrim; /* discriminator -- 0 for now */
+} jvmti_line_info_t;
+
+void *jvmti_open(void);
+int   jvmti_close(void *agent);
+int   jvmti_write_code(void *agent, char const *symbol_name,
+                      uint64_t vma, void const *code,
+                      const unsigned int code_size);
+
+int   jvmti_write_debug_info(void *agent,
+                            uint64_t code,
+                            const char *file,
+                            jvmti_line_info_t *li,
+                            int nr_lines);
+
+#if defined(__cplusplus)
+}
+
+#endif
+#endif /* __JVMTI_H__ */
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
new file mode 100644 (file)
index 0000000..ac12e4b
--- /dev/null
@@ -0,0 +1,304 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <err.h>
+#include <jvmti.h>
+#include <jvmticmlr.h>
+#include <limits.h>
+
+#include "jvmti_agent.h"
+
+static int has_line_numbers;
+void *jvmti_agent;
+
+static jvmtiError
+do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
+                   jvmti_line_info_t *tab, jint *nr)
+{
+       jint i, lines = 0;
+       jint nr_lines = 0;
+       jvmtiLineNumberEntry *loc_tab = NULL;
+       jvmtiError ret;
+
+       ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab);
+       if (ret != JVMTI_ERROR_NONE)
+               return ret;
+
+       for (i = 0; i < nr_lines; i++) {
+               if (loc_tab[i].start_location < bci) {
+                       tab[lines].pc = (unsigned long)pc;
+                       tab[lines].line_number = loc_tab[i].line_number;
+                       tab[lines].discrim = 0; /* not yet used */
+                       lines++;
+               } else {
+                       break;
+               }
+       }
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab);
+       *nr = lines;
+       return JVMTI_ERROR_NONE;
+}
+
+static jvmtiError
+get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **tab, int *nr_lines)
+{
+       const jvmtiCompiledMethodLoadRecordHeader *hdr;
+       jvmtiCompiledMethodLoadInlineRecord *rec;
+       jvmtiLineNumberEntry *lne = NULL;
+       PCStackInfo *c;
+       jint nr, ret;
+       int nr_total = 0;
+       int i, lines_total = 0;
+
+       if (!(tab && nr_lines))
+               return JVMTI_ERROR_NULL_POINTER;
+
+       /*
+        * Phase 1 -- get the number of lines necessary
+        */
+       for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
+               if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
+                       rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
+                       for (i = 0; i < rec->numpcs; i++) {
+                               c = rec->pcinfo + i;
+                               nr = 0;
+                               /*
+                                * unfortunately, need a tab to get the number of lines!
+                                */
+                               ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne);
+                               if (ret == JVMTI_ERROR_NONE) {
+                                       /* free what was allocated for nothing */
+                                       (*jvmti)->Deallocate(jvmti, (unsigned char *)lne);
+                                       nr_total += (int)nr;
+                               }
+                       }
+               }
+       }
+
+       if (nr_total == 0)
+               return JVMTI_ERROR_NOT_FOUND;
+
+       /*
+        * Phase 2 -- allocate big enough line table
+        */
+       *tab = malloc(nr_total * sizeof(**tab));
+       if (!*tab)
+               return JVMTI_ERROR_OUT_OF_MEMORY;
+
+       for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
+               if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
+                       rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
+                       for (i = 0; i < rec->numpcs; i++) {
+                               c = rec->pcinfo + i;
+                               nr = 0;
+                               ret = do_get_line_numbers(jvmti, c->pc,
+                                                         c->methods[0],
+                                                         c->bcis[0],
+                                                         *tab + lines_total,
+                                                         &nr);
+                               if (ret == JVMTI_ERROR_NONE)
+                                       lines_total += nr;
+                       }
+               }
+       }
+       *nr_lines = lines_total;
+       return JVMTI_ERROR_NONE;
+}
+
+static void JNICALL
+compiled_method_load_cb(jvmtiEnv *jvmti,
+                       jmethodID method,
+                       jint code_size,
+                       void const *code_addr,
+                       jint map_length,
+                       jvmtiAddrLocationMap const *map,
+                       const void *compile_info)
+{
+       jvmti_line_info_t *line_tab = NULL;
+       jclass decl_class;
+       char *class_sign = NULL;
+       char *func_name = NULL;
+       char *func_sign = NULL;
+       char *file_name= NULL;
+       char fn[PATH_MAX];
+       uint64_t addr = (uint64_t)(uintptr_t)code_addr;
+       jvmtiError ret;
+       int nr_lines = 0; /* in line_tab[] */
+       size_t len;
+
+       ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method,
+                                               &decl_class);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: cannot get declaring class");
+               return;
+       }
+
+       if (has_line_numbers && map && map_length) {
+               ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines);
+               if (ret != JVMTI_ERROR_NONE) {
+                       warnx("jvmti: cannot get line table for method");
+                       nr_lines = 0;
+               }
+       }
+
+       ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: cannot get source filename ret=%d", ret);
+               goto error;
+       }
+
+       ret = (*jvmti)->GetClassSignature(jvmti, decl_class,
+                                         &class_sign, NULL);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: getclassignature failed");
+               goto error;
+       }
+
+       ret = (*jvmti)->GetMethodName(jvmti, method, &func_name,
+                                     &func_sign, NULL);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: failed getmethodname");
+               goto error;
+       }
+
+       /*
+        * Assume path name is class hierarchy, this is a common practice with Java programs
+        */
+       if (*class_sign == 'L') {
+               int j, i = 0;
+               char *p = strrchr(class_sign, '/');
+               if (p) {
+                       /* drop the 'L' prefix and copy up to the final '/' */
+                       for (i = 0; i < (p - class_sign); i++)
+                               fn[i] = class_sign[i+1];
+               }
+               /*
+                * append file name, we use loops and not string ops to avoid modifying
+                * class_sign which is used later for the symbol name
+                */
+               for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++)
+                       fn[i] = file_name[j];
+               fn[i] = '\0';
+       } else {
+               /* fallback case */
+               strcpy(fn, file_name);
+       }
+       /*
+        * write source line info record if we have it
+        */
+       if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines))
+               warnx("jvmti: write_debug_info() failed");
+
+       len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2;
+       {
+               char str[len];
+               snprintf(str, len, "%s%s%s", class_sign, func_name, func_sign);
+
+               if (jvmti_write_code(jvmti_agent, str, addr, code_addr, code_size))
+                       warnx("jvmti: write_code() failed");
+       }
+error:
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)func_name);
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign);
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
+       free(line_tab);
+}
+
+static void JNICALL
+code_generated_cb(jvmtiEnv *jvmti,
+                 char const *name,
+                 void const *code_addr,
+                 jint code_size)
+{
+       uint64_t addr = (uint64_t)(unsigned long)code_addr;
+       int ret;
+
+       ret = jvmti_write_code(jvmti_agent, name, addr, code_addr, code_size);
+       if (ret)
+               warnx("jvmti: write_code() failed for code_generated");
+}
+
+JNIEXPORT jint JNICALL
+Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused)
+{
+       jvmtiEventCallbacks cb;
+       jvmtiCapabilities caps1;
+       jvmtiJlocationFormat format;
+       jvmtiEnv *jvmti = NULL;
+       jint ret;
+
+       jvmti_agent = jvmti_open();
+       if (!jvmti_agent) {
+               warnx("jvmti: open_agent failed");
+               return -1;
+       }
+
+       /*
+        * Request a JVMTI interface version 1 environment
+        */
+       ret = (*jvm)->GetEnv(jvm, (void *)&jvmti, JVMTI_VERSION_1);
+       if (ret != JNI_OK) {
+               warnx("jvmti: jvmti version 1 not supported");
+               return -1;
+       }
+
+       /*
+        * acquire method_load capability, we require it
+        * request line numbers (optional)
+        */
+       memset(&caps1, 0, sizeof(caps1));
+       caps1.can_generate_compiled_method_load_events = 1;
+
+       ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: acquire compiled_method capability failed");
+               return -1;
+       }
+       ret = (*jvmti)->GetJLocationFormat(jvmti, &format);
+        if (ret == JVMTI_ERROR_NONE && format == JVMTI_JLOCATION_JVMBCI) {
+                memset(&caps1, 0, sizeof(caps1));
+                caps1.can_get_line_numbers = 1;
+                caps1.can_get_source_file_name = 1;
+               ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
+                if (ret == JVMTI_ERROR_NONE)
+                        has_line_numbers = 1;
+        }
+
+       memset(&cb, 0, sizeof(cb));
+
+       cb.CompiledMethodLoad   = compiled_method_load_cb;
+       cb.DynamicCodeGenerated = code_generated_cb;
+
+       ret = (*jvmti)->SetEventCallbacks(jvmti, &cb, sizeof(cb));
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: cannot set event callbacks");
+               return -1;
+       }
+
+       ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
+                       JVMTI_EVENT_COMPILED_METHOD_LOAD, NULL);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: setnotification failed for method_load");
+               return -1;
+       }
+
+       ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
+                       JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: setnotification failed on code_generated");
+               return -1;
+       }
+       return 0;
+}
+
+JNIEXPORT void JNICALL
+Agent_OnUnload(JavaVM *jvm __unused)
+{
+       int ret;
+
+       ret = jvmti_close(jvmti_agent);
+       if (ret)
+               errx(1, "Error: op_close_agent()");
+}
index a929618b8eb616f90c9bb8e26bad5416a0fdbc19..aaee0a7827477810c5c0d2d82753545592d7f22f 100644 (file)
@@ -454,11 +454,12 @@ static void handle_internal_command(int argc, const char **argv)
 
 static void execv_dashed_external(const char **argv)
 {
-       struct strbuf cmd = STRBUF_INIT;
+       char *cmd;
        const char *tmp;
        int status;
 
-       strbuf_addf(&cmd, "perf-%s", argv[0]);
+       if (asprintf(&cmd, "perf-%s", argv[0]) < 0)
+               goto do_die;
 
        /*
         * argv[0] must be the perf command, but the argv array
@@ -467,7 +468,7 @@ static void execv_dashed_external(const char **argv)
         * restore it on error.
         */
        tmp = argv[0];
-       argv[0] = cmd.buf;
+       argv[0] = cmd;
 
        /*
         * if we fail because the command is not found, it is
@@ -475,15 +476,16 @@ static void execv_dashed_external(const char **argv)
         */
        status = run_command_v_opt(argv, 0);
        if (status != -ERR_RUN_COMMAND_EXEC) {
-               if (IS_RUN_COMMAND_ERR(status))
+               if (IS_RUN_COMMAND_ERR(status)) {
+do_die:
                        die("unable to run '%s'", argv[0]);
+               }
                exit(-status);
        }
        errno = ENOENT; /* as if we called execvp */
 
        argv[0] = tmp;
-
-       strbuf_release(&cmd);
+       zfree(&cmd);
 }
 
 static int run_argv(int *argcp, const char ***argv)
@@ -546,6 +548,8 @@ int main(int argc, const char **argv)
 
        srandom(time(NULL));
 
+       perf_config(perf_default_config, NULL);
+
        /* get debugfs/tracefs mount point from /proc/mounts */
        tracing_path_mount();
 
@@ -613,6 +617,8 @@ int main(int argc, const char **argv)
         */
        pthread__block_sigwinch();
 
+       perf_debug_setup();
+
        while (1) {
                static int done_help;
                int was_alias = run_argv(&argc, &argv);
index 90129accffbe824e37d9831ba3b323011295f78a..5381a01c0610c0e61f079140ed5cdc2df3f87b0d 100644 (file)
@@ -58,6 +58,8 @@ struct record_opts {
        bool         full_auxtrace;
        bool         auxtrace_snapshot_mode;
        bool         record_switch_events;
+       bool         all_kernel;
+       bool         all_user;
        unsigned int freq;
        unsigned int mmap_pages;
        unsigned int auxtrace_mmap_pages;
index 15c8400240fd9029ae34fca077304337d9c75ca6..1d95009592eb3a8a8d053e1340a3e42dd412a68c 100644 (file)
@@ -71,7 +71,10 @@ try:
 except:
        if not audit_package_warned:
                audit_package_warned = True
-               print "Install the audit-libs-python package to get syscall names"
+               print "Install the audit-libs-python package to get syscall names.\n" \
+                    "For example:\n  # apt-get install python-audit (Ubuntu)" \
+                    "\n  # yum install audit-libs-python (Fedora)" \
+                    "\n  etc.\n"
 
 def syscall_name(id):
        try:
index bf016c439fbd10a3cada60253c98d60ce441aa98..8cc30e731c739495f3eb61a0da9a76246ad35600 100644 (file)
@@ -1,3 +1,4 @@
 llvm-src-base.c
 llvm-src-kbuild.c
 llvm-src-prologue.c
+llvm-src-relocation.c
index 614899b88b377e07f9615d618ba7045f66051bea..1ba628ed049adbafc27c7b8900ecb838165a2aa7 100644 (file)
@@ -31,7 +31,7 @@ perf-y += sample-parsing.o
 perf-y += parse-no-sample-id-all.o
 perf-y += kmod-path.o
 perf-y += thread-map.o
-perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o
+perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o
 perf-y += bpf.o
 perf-y += topology.o
 perf-y += cpumap.o
@@ -59,6 +59,13 @@ $(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c tests/Build
        $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
        $(Q)echo ';' >> $@
 
+$(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/Build
+       $(call rule_mkdir)
+       $(Q)echo '#include <tests/llvm.h>' > $@
+       $(Q)echo 'const char test_llvm__bpf_test_relocation[] =' >> $@
+       $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+       $(Q)echo ';' >> $@
+
 ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif
index fb80c9eb6a95b67947b23adaeca69b5e57bac704..e7664fe3bd33739fd92be2579c30102e481e8f03 100644 (file)
 
 static int fd1;
 static int fd2;
+static int fd3;
 static int overflows;
+static int overflows_2;
+
+volatile long the_var;
+
+
+/*
+ * Use ASM to ensure watchpoint and breakpoint can be triggered
+ * at one instruction.
+ */
+#if defined (__x86_64__)
+extern void __test_function(volatile long *ptr);
+asm (
+       ".globl __test_function\n"
+       "__test_function:\n"
+       "incq (%rdi)\n"
+       "ret\n");
+#elif defined (__aarch64__)
+extern void __test_function(volatile long *ptr);
+asm (
+       ".globl __test_function\n"
+       "__test_function:\n"
+       "str x30, [x0]\n"
+       "ret\n");
+
+#else
+static void __test_function(volatile long *ptr)
+{
+       *ptr = 0x1234;
+}
+#endif
 
 __attribute__ ((noinline))
 static int test_function(void)
 {
+       __test_function(&the_var);
+       the_var++;
        return time(NULL);
 }
 
+static void sig_handler_2(int signum __maybe_unused,
+                         siginfo_t *oh __maybe_unused,
+                         void *uc __maybe_unused)
+{
+       overflows_2++;
+       if (overflows_2 > 10) {
+               ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
+               ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+               ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
+       }
+}
+
 static void sig_handler(int signum __maybe_unused,
                        siginfo_t *oh __maybe_unused,
                        void *uc __maybe_unused)
@@ -54,10 +99,11 @@ static void sig_handler(int signum __maybe_unused,
                 */
                ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
                ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+               ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
        }
 }
 
-static int bp_event(void *fn, int setup_signal)
+static int __event(bool is_x, void *addr, int sig)
 {
        struct perf_event_attr pe;
        int fd;
@@ -67,8 +113,8 @@ static int bp_event(void *fn, int setup_signal)
        pe.size = sizeof(struct perf_event_attr);
 
        pe.config = 0;
-       pe.bp_type = HW_BREAKPOINT_X;
-       pe.bp_addr = (unsigned long) fn;
+       pe.bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W;
+       pe.bp_addr = (unsigned long) addr;
        pe.bp_len = sizeof(long);
 
        pe.sample_period = 1;
@@ -86,17 +132,25 @@ static int bp_event(void *fn, int setup_signal)
                return TEST_FAIL;
        }
 
-       if (setup_signal) {
-               fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
-               fcntl(fd, F_SETSIG, SIGIO);
-               fcntl(fd, F_SETOWN, getpid());
-       }
+       fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
+       fcntl(fd, F_SETSIG, sig);
+       fcntl(fd, F_SETOWN, getpid());
 
        ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 
        return fd;
 }
 
+static int bp_event(void *addr, int sig)
+{
+       return __event(true, addr, sig);
+}
+
+static int wp_event(void *addr, int sig)
+{
+       return __event(false, addr, sig);
+}
+
 static long long bp_count(int fd)
 {
        long long count;
@@ -114,7 +168,7 @@ static long long bp_count(int fd)
 int test__bp_signal(int subtest __maybe_unused)
 {
        struct sigaction sa;
-       long long count1, count2;
+       long long count1, count2, count3;
 
        /* setup SIGIO signal handler */
        memset(&sa, 0, sizeof(struct sigaction));
@@ -126,21 +180,52 @@ int test__bp_signal(int subtest __maybe_unused)
                return TEST_FAIL;
        }
 
+       sa.sa_sigaction = (void *) sig_handler_2;
+       if (sigaction(SIGUSR1, &sa, NULL) < 0) {
+               pr_debug("failed setting up signal handler 2\n");
+               return TEST_FAIL;
+       }
+
        /*
         * We create following events:
         *
-        * fd1 - breakpoint event on test_function with SIGIO
+        * fd1 - breakpoint event on __test_function with SIGIO
         *       signal configured. We should get signal
         *       notification each time the breakpoint is hit
         *
-        * fd2 - breakpoint event on sig_handler without SIGIO
+        * fd2 - breakpoint event on sig_handler with SIGUSR1
+        *       configured. We should get SIGUSR1 each time when
+        *       breakpoint is hit
+        *
+        * fd3 - watchpoint event on __test_function with SIGIO
         *       configured.
         *
         * Following processing should happen:
-        *   - execute test_function
-        *   - fd1 event breakpoint hit -> count1 == 1
-        *   - SIGIO is delivered       -> overflows == 1
-        *   - fd2 event breakpoint hit -> count2 == 1
+        *   Exec:               Action:                       Result:
+        *   incq (%rdi)       - fd1 event breakpoint hit   -> count1 == 1
+        *                     - SIGIO is delivered
+        *   sig_handler       - fd2 event breakpoint hit   -> count2 == 1
+        *                     - SIGUSR1 is delivered
+        *   sig_handler_2                                  -> overflows_2 == 1  (nested signal)
+        *   sys_rt_sigreturn  - return from sig_handler_2
+        *   overflows++                                    -> overflows = 1
+        *   sys_rt_sigreturn  - return from sig_handler
+        *   incq (%rdi)       - fd3 event watchpoint hit   -> count3 == 1       (wp and bp in one insn)
+        *                     - SIGIO is delivered
+        *   sig_handler       - fd2 event breakpoint hit   -> count2 == 2
+        *                     - SIGUSR1 is delivered
+        *   sig_handler_2                                  -> overflows_2 == 2  (nested signal)
+        *   sys_rt_sigreturn  - return from sig_handler_2
+        *   overflows++                                    -> overflows = 2
+        *   sys_rt_sigreturn  - return from sig_handler
+        *   the_var++         - fd3 event watchpoint hit   -> count3 == 2       (standalone watchpoint)
+        *                     - SIGIO is delivered
+        *   sig_handler       - fd2 event breakpoint hit   -> count2 == 3
+        *                     - SIGUSR1 is delivered
+        *   sig_handler_2                                  -> overflows_2 == 3  (nested signal)
+        *   sys_rt_sigreturn  - return from sig_handler_2
+        *   overflows++                                    -> overflows == 3
+        *   sys_rt_sigreturn  - return from sig_handler
         *
         * The test case check following error conditions:
         * - we get stuck in signal handler because of debug
@@ -152,11 +237,13 @@ int test__bp_signal(int subtest __maybe_unused)
         *
         */
 
-       fd1 = bp_event(test_function, 1);
-       fd2 = bp_event(sig_handler, 0);
+       fd1 = bp_event(__test_function, SIGIO);
+       fd2 = bp_event(sig_handler, SIGUSR1);
+       fd3 = wp_event((void *)&the_var, SIGIO);
 
        ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
        ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
+       ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0);
 
        /*
         * Kick off the test by trigering 'fd1'
@@ -166,15 +253,18 @@ int test__bp_signal(int subtest __maybe_unused)
 
        ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
        ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+       ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
 
        count1 = bp_count(fd1);
        count2 = bp_count(fd2);
+       count3 = bp_count(fd3);
 
        close(fd1);
        close(fd2);
+       close(fd3);
 
-       pr_debug("count1 %lld, count2 %lld, overflow %d\n",
-                count1, count2, overflows);
+       pr_debug("count1 %lld, count2 %lld, count3 %lld, overflow %d, overflows_2 %d\n",
+                count1, count2, count3, overflows, overflows_2);
 
        if (count1 != 1) {
                if (count1 == 11)
@@ -183,12 +273,18 @@ int test__bp_signal(int subtest __maybe_unused)
                        pr_debug("failed: wrong count for bp1%lld\n", count1);
        }
 
-       if (overflows != 1)
+       if (overflows != 3)
                pr_debug("failed: wrong overflow hit\n");
 
-       if (count2 != 1)
+       if (overflows_2 != 3)
+               pr_debug("failed: wrong overflow_2 hit\n");
+
+       if (count2 != 3)
                pr_debug("failed: wrong count for bp2\n");
 
-       return count1 == 1 && overflows == 1 && count2 == 1 ?
+       if (count3 != 2)
+               pr_debug("failed: wrong count for bp3\n");
+
+       return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ?
                TEST_OK : TEST_FAIL;
 }
diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c
new file mode 100644 (file)
index 0000000..93af774
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * bpf-script-test-relocation.c
+ * Test BPF loader checking relocation
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define BPF_ANY 0
+#define BPF_MAP_TYPE_ARRAY 2
+#define BPF_FUNC_map_lookup_elem 1
+#define BPF_FUNC_map_update_elem 2
+
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+       (void *) BPF_FUNC_map_lookup_elem;
+static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) =
+       (void *) BPF_FUNC_map_update_elem;
+
+struct bpf_map_def {
+       unsigned int type;
+       unsigned int key_size;
+       unsigned int value_size;
+       unsigned int max_entries;
+};
+
+#define SEC(NAME) __attribute__((section(NAME), used))
+struct bpf_map_def SEC("maps") my_table = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 1,
+};
+
+int this_is_a_global_val;
+
+SEC("func=sys_write")
+int bpf_func__sys_write(void *ctx)
+{
+       int key = 0;
+       int value = 0;
+
+       /*
+        * Incorrect relocation. Should not allow this program be
+        * loaded into kernel.
+        */
+       bpf_map_update_elem(&this_is_a_global_val, &key, &value, 0);
+       return 0;
+}
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
index 33689a0cf821e5b9608e54b981e3280ee580890c..199501c71e272491850065910aae5003603ab10e 100644 (file)
@@ -1,7 +1,11 @@
 #include <stdio.h>
 #include <sys/epoll.h>
+#include <util/util.h>
 #include <util/bpf-loader.h>
 #include <util/evlist.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <bpf/bpf.h>
 #include "tests.h"
 #include "llvm.h"
 #include "debug.h"
@@ -71,6 +75,15 @@ static struct {
                (NR_ITERS + 1) / 4,
        },
 #endif
+       {
+               LLVM_TESTCASE_BPF_RELOCATION,
+               "Test BPF relocation checker",
+               "[bpf_relocation_test]",
+               "fix 'perf test LLVM' first",
+               "libbpf error when dealing with relocation",
+               NULL,
+               0,
+       },
 };
 
 static int do_test(struct bpf_object *obj, int (*func)(void),
@@ -99,7 +112,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
        parse_evlist.error = &parse_error;
        INIT_LIST_HEAD(&parse_evlist.list);
 
-       err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj);
+       err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj, NULL);
        if (err || list_empty(&parse_evlist.list)) {
                pr_debug("Failed to add events selected by BPF\n");
                return TEST_FAIL;
@@ -190,7 +203,7 @@ static int __test__bpf(int idx)
 
        ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
                                       bpf_testcase_table[idx].prog_id,
-                                      true);
+                                      true, NULL);
        if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
                pr_debug("Unable to get BPF object, %s\n",
                         bpf_testcase_table[idx].msg_compile_fail);
@@ -202,14 +215,21 @@ static int __test__bpf(int idx)
 
        obj = prepare_bpf(obj_buf, obj_buf_sz,
                          bpf_testcase_table[idx].name);
-       if (!obj) {
+       if ((!!bpf_testcase_table[idx].target_func) != (!!obj)) {
+               if (!obj)
+                       pr_debug("Fail to load BPF object: %s\n",
+                                bpf_testcase_table[idx].msg_load_fail);
+               else
+                       pr_debug("Success unexpectedly: %s\n",
+                                bpf_testcase_table[idx].msg_load_fail);
                ret = TEST_FAIL;
                goto out;
        }
 
-       ret = do_test(obj,
-                     bpf_testcase_table[idx].target_func,
-                     bpf_testcase_table[idx].expect_result);
+       if (obj)
+               ret = do_test(obj,
+                             bpf_testcase_table[idx].target_func,
+                             bpf_testcase_table[idx].expect_result);
 out:
        bpf__clear();
        return ret;
@@ -227,6 +247,36 @@ const char *test__bpf_subtest_get_desc(int i)
        return bpf_testcase_table[i].desc;
 }
 
+static int check_env(void)
+{
+       int err;
+       unsigned int kver_int;
+       char license[] = "GPL";
+
+       struct bpf_insn insns[] = {
+               BPF_MOV64_IMM(BPF_REG_0, 1),
+               BPF_EXIT_INSN(),
+       };
+
+       err = fetch_kernel_version(&kver_int, NULL, 0);
+       if (err) {
+               pr_debug("Unable to get kernel version\n");
+               return err;
+       }
+
+       err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
+                              sizeof(insns) / sizeof(insns[0]),
+                              license, kver_int, NULL, 0);
+       if (err < 0) {
+               pr_err("Missing basic BPF support, skip this test: %s\n",
+                      strerror(errno));
+               return err;
+       }
+       close(err);
+
+       return 0;
+}
+
 int test__bpf(int i)
 {
        int err;
@@ -239,6 +289,9 @@ int test__bpf(int i)
                return TEST_SKIP;
        }
 
+       if (check_env())
+               return TEST_SKIP;
+
        err = __test__bpf(i);
        return err;
 }
index 313a48c6b2bc8e111e113e79c4a72fdc2d720ac5..afc9ad0a0515c5db77745d0d0c6964d950fa3bfc 100644 (file)
@@ -439,7 +439,7 @@ static int do_test_code_reading(bool try_kcore)
                .mmap_pages          = UINT_MAX,
                .user_freq           = UINT_MAX,
                .user_interval       = ULLONG_MAX,
-               .freq                = 4000,
+               .freq                = 500,
                .target              = {
                        .uses_mmap   = true,
                },
@@ -559,7 +559,13 @@ static int do_test_code_reading(bool try_kcore)
                                evlist = NULL;
                                continue;
                        }
-                       pr_debug("perf_evlist__open failed\n");
+
+                       if (verbose) {
+                               char errbuf[512];
+                               perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+                               pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
+                       }
+
                        goto out_put;
                }
                break;
index 5e6a86e50fb97aae648c16ce627157e8377ebb02..ecf136c385d5ff2f21111e813a9ce1e30c03365a 100644 (file)
@@ -191,7 +191,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec
         * function since TEST_ASSERT_VAL() returns in case of failure.
         */
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(hists_to_evsel(hists), NULL);
 
        if (verbose > 2) {
                pr_info("use callchain: %d, cumulate callchain: %d\n",
index 351a42463444a3df9f80daea3848499b8d39659a..34b945a55d4d2864cc561f36275191f57f84210a 100644 (file)
@@ -145,7 +145,7 @@ int test__hists_filter(int subtest __maybe_unused)
                struct hists *hists = evsel__hists(evsel);
 
                hists__collapse_resort(hists, NULL);
-               hists__output_resort(hists, NULL);
+               perf_evsel__output_resort(evsel, NULL);
 
                if (verbose > 2) {
                        pr_info("Normal histogram\n");
index b231265148d89a28e39387d423711c643351eb70..23cce67c7e48902b86c00cc6e56df22ef16386e4 100644 (file)
@@ -156,7 +156,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)
                goto out;
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 
        if (verbose > 2) {
                pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -256,7 +256,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)
                goto out;
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 
        if (verbose > 2) {
                pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -310,7 +310,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)
                goto out;
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 
        if (verbose > 2) {
                pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -388,7 +388,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
                goto out;
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 
        if (verbose > 2) {
                pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -491,7 +491,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine)
                goto out;
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 
        if (verbose > 2) {
                pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
index 06f45c1d42561df030e35d55078d70d910e79998..cff564fb4b66761f7f7fdcd5ee10727ad3b08726 100644 (file)
@@ -6,12 +6,6 @@
 #include "tests.h"
 #include "debug.h"
 
-static int perf_config_cb(const char *var, const char *val,
-                         void *arg __maybe_unused)
-{
-       return perf_default_config(var, val, arg);
-}
-
 #ifdef HAVE_LIBBPF_SUPPORT
 static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
 {
@@ -35,6 +29,7 @@ static int test__bpf_parsing(void *obj_buf __maybe_unused,
 static struct {
        const char *source;
        const char *desc;
+       bool should_load_fail;
 } bpf_source_table[__LLVM_TESTCASE_MAX] = {
        [LLVM_TESTCASE_BASE] = {
                .source = test_llvm__bpf_base_prog,
@@ -48,14 +43,19 @@ static struct {
                .source = test_llvm__bpf_test_prologue_prog,
                .desc = "Compile source for BPF prologue generation test",
        },
+       [LLVM_TESTCASE_BPF_RELOCATION] = {
+               .source = test_llvm__bpf_test_relocation,
+               .desc = "Compile source for BPF relocation test",
+               .should_load_fail = true,
+       },
 };
 
-
 int
 test_llvm__fetch_bpf_obj(void **p_obj_buf,
                         size_t *p_obj_buf_sz,
                         enum test_llvm__testcase idx,
-                        bool force)
+                        bool force,
+                        bool *should_load_fail)
 {
        const char *source;
        const char *desc;
@@ -68,8 +68,8 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf,
 
        source = bpf_source_table[idx].source;
        desc = bpf_source_table[idx].desc;
-
-       perf_config(perf_config_cb, NULL);
+       if (should_load_fail)
+               *should_load_fail = bpf_source_table[idx].should_load_fail;
 
        /*
         * Skip this test if user's .perfconfig doesn't set [llvm] section
@@ -136,14 +136,15 @@ int test__llvm(int subtest)
        int ret;
        void *obj_buf = NULL;
        size_t obj_buf_sz = 0;
+       bool should_load_fail = false;
 
        if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
                return TEST_FAIL;
 
        ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
-                                      subtest, false);
+                                      subtest, false, &should_load_fail);
 
-       if (ret == TEST_OK) {
+       if (ret == TEST_OK && !should_load_fail) {
                ret = test__bpf_parsing(obj_buf, obj_buf_sz);
                if (ret != TEST_OK) {
                        pr_debug("Failed to parse test case '%s'\n",
index 5150b4d6ef50afe357f5f86300f4d39d28bec658..0eaa604be99defec6dcf3b09ee9a75eb348096fb 100644 (file)
@@ -7,14 +7,17 @@
 extern const char test_llvm__bpf_base_prog[];
 extern const char test_llvm__bpf_test_kbuild_prog[];
 extern const char test_llvm__bpf_test_prologue_prog[];
+extern const char test_llvm__bpf_test_relocation[];
 
 enum test_llvm__testcase {
        LLVM_TESTCASE_BASE,
        LLVM_TESTCASE_KBUILD,
        LLVM_TESTCASE_BPF_PROLOGUE,
+       LLVM_TESTCASE_BPF_RELOCATION,
        __LLVM_TESTCASE_MAX,
 };
 
 int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz,
-                            enum test_llvm__testcase index, bool force);
+                            enum test_llvm__testcase index, bool force,
+                            bool *should_load_fail);
 #endif
index f918015512af96d1173891dac2e8f460ff65ca85..cac15d93aea656f96ad449cba1f9529e42afcbec 100644 (file)
@@ -15,6 +15,7 @@ else
 PERF := .
 PERF_O := $(PERF)
 O_OPT :=
+FULL_O := $(shell readlink -f $(PERF_O) || echo $(PERF_O))
 
 ifneq ($(O),)
   FULL_O := $(shell readlink -f $(O) || echo $(O))
@@ -79,6 +80,7 @@ make_no_libaudit    := NO_LIBAUDIT=1
 make_no_libbionic   := NO_LIBBIONIC=1
 make_no_auxtrace    := NO_AUXTRACE=1
 make_no_libbpf     := NO_LIBBPF=1
+make_no_libcrypto   := NO_LIBCRYPTO=1
 make_tags           := tags
 make_cscope         := cscope
 make_help           := help
@@ -102,6 +104,7 @@ make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
 make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
 make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
 make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
+make_minimal        += NO_LIBCRYPTO=1
 
 # $(run) contains all available tests
 run := make_pure
@@ -110,6 +113,9 @@ run := make_pure
 # disable features detection
 ifeq ($(MK),Makefile)
 run += make_clean_all
+MAKE_F := $(MAKE)
+else
+MAKE_F := $(MAKE) -f $(MK)
 endif
 run += make_python_perf_so
 run += make_debug
@@ -260,6 +266,8 @@ run := $(shell shuf -e $(run))
 run_O := $(shell shuf -e $(run_O))
 endif
 
+max_width := $(shell echo $(run_O) | sed 's/ /\n/g' | wc -L)
+
 ifdef DEBUG
 d := $(info run   $(run))
 d := $(info run_O $(run_O))
@@ -267,13 +275,13 @@ endif
 
 MAKEFLAGS := --no-print-directory
 
-clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null)
+clean := @(cd $(PERF); $(MAKE_F) -s $(O_OPT) clean >/dev/null)
 
 $(run):
        $(call clean)
        @TMP_DEST=$$(mktemp -d); \
-       cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \
-       echo "- $@: $$cmd" && echo $$cmd > $@ && \
+       cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \
+       printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1; \
        echo "  test: $(call test,$@)" >> $@ 2>&1; \
        $(call test,$@) && \
@@ -283,8 +291,8 @@ $(run_O):
        $(call clean)
        @TMP_O=$$(mktemp -d); \
        TMP_DEST=$$(mktemp -d); \
-       cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
-       echo "- $@: $$cmd" && echo $$cmd > $@ && \
+       cmd="cd $(PERF) && $(MAKE_F) $($(patsubst %_O,%,$@)) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST"; \
+       printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1 && \
        echo "  test: $(call test_O,$@)" >> $@ 2>&1; \
        $(call test_O,$@) && \
@@ -313,11 +321,43 @@ make_kernelsrc_tools:
        (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
        test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
+FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP
+FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC
+
 all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
        @echo OK
+       @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC)
 
 out: $(run_O)
        @echo OK
+       @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC)
+
+ifeq ($(REUSE_FEATURES_DUMP),1)
+$(FEATURES_DUMP_FILE):
+       $(call clean)
+       @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) feature-dump"; \
+       echo "- $@: $$cmd" && echo $$cmd && \
+       ( eval $$cmd ) > /dev/null 2>&1
+
+$(FEATURES_DUMP_FILE_STATIC):
+       $(call clean)
+       @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) LDFLAGS='-static' feature-dump"; \
+       echo "- $@: $$cmd" && echo $$cmd && \
+       ( eval $$cmd ) > /dev/null 2>&1
+
+# Add feature dump dependency for run/run_O targets
+$(foreach t,$(run) $(run_O),$(eval \
+       $(t): $(if $(findstring make_static,$(t)),\
+               $(FEATURES_DUMP_FILE_STATIC),\
+               $(FEATURES_DUMP_FILE))))
+
+# Append 'FEATURES_DUMP=' option to all test cases. For example:
+# make_no_libbpf: NO_LIBBPF=1  --> NO_LIBBPF=1 FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP
+# make_static: LDFLAGS=-static --> LDFLAGS=-static FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP_STATIC
+$(foreach t,$(run),$(if $(findstring make_static,$(t)),\
+                       $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE_STATIC)),\
+                       $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE))))
+endif
 
 .PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools
 endif # ifndef MK
index abe8849d1d7030bda2ae65f770424e3c9ad84330..7865f68dc0d82bea12c960c61792e9fd28857305 100644 (file)
@@ -1271,6 +1271,38 @@ static int test__checkevent_precise_max_modifier(struct perf_evlist *evlist)
        return 0;
 }
 
+static int test__checkevent_config_symbol(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+       TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0);
+       return 0;
+}
+
+static int test__checkevent_config_raw(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+       TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0);
+       return 0;
+}
+
+static int test__checkevent_config_num(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+       TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0);
+       return 0;
+}
+
+static int test__checkevent_config_cache(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+       TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0);
+       return 0;
+}
+
 static int count_tracepoints(void)
 {
        struct dirent *events_ent;
@@ -1579,6 +1611,26 @@ static struct evlist_test test__events[] = {
                .check = test__checkevent_precise_max_modifier,
                .id    = 47,
        },
+       {
+               .name  = "instructions/name=insn/",
+               .check = test__checkevent_config_symbol,
+               .id    = 48,
+       },
+       {
+               .name  = "r1234/name=rawpmu/",
+               .check = test__checkevent_config_raw,
+               .id    = 49,
+       },
+       {
+               .name  = "4:0x6530160/name=numpmu/",
+               .check = test__checkevent_config_num,
+               .id    = 50,
+       },
+       {
+               .name  = "L1-dcache-misses/name=cachepmu/",
+               .check = test__checkevent_config_cache,
+               .id    = 51,
+       },
 };
 
 static struct evlist_test test__events_pmu[] = {
@@ -1666,7 +1718,7 @@ static int test_term(struct terms_test *t)
        }
 
        ret = t->check(&terms);
-       parse_events__free_terms(&terms);
+       parse_events_terms__purge(&terms);
 
        return ret;
 }
index f0bfc9e8fd9f617d69c0b3cb36fe6c210ebbd6d5..630b0b409b973f87ba00312e5e4e3d8fdf829f32 100644 (file)
@@ -110,7 +110,6 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
         */
        for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
                struct symbol *pair, *first_pair;
-               bool backwards = true;
 
                sym  = rb_entry(nd, struct symbol, rb_node);
 
@@ -151,27 +150,14 @@ next_pair:
                                continue;
 
                        } else {
-                               struct rb_node *nnd;
-detour:
-                               nnd = backwards ? rb_prev(&pair->rb_node) :
-                                                 rb_next(&pair->rb_node);
-                               if (nnd) {
-                                       struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
-
-                                       if (UM(next->start) == mem_start) {
-                                               pair = next;
+                               pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL);
+                               if (pair) {
+                                       if (UM(pair->start) == mem_start)
                                                goto next_pair;
-                                       }
-                               }
 
-                               if (backwards) {
-                                       backwards = false;
-                                       pair = first_pair;
-                                       goto detour;
+                                       pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+                                                mem_start, sym->name, pair->name);
                                }
-
-                               pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
-                                        mem_start, sym->name, pair->name);
                        }
                } else
                        pr_debug("%#" PRIx64 ": %s not on kallsyms\n",
index d37202121689a017b0790d552e088d695716c527..af68a9d488bfce964c84e67cf5394e7e13daab29 100644 (file)
@@ -531,8 +531,8 @@ static struct ui_browser_colorset {
                .bg       = "yellow",
        },
        {
-               .colorset = HE_COLORSET_CODE,
-               .name     = "code",
+               .colorset = HE_COLORSET_JUMP_ARROWS,
+               .name     = "jump_arrows",
                .fg       = "blue",
                .bg       = "default",
        },
index 01781de59532ce9c9fd1ff7f8c7fdf97d45fa105..be3b70eb5fca6e402830570c4111f10258702c04 100644 (file)
@@ -7,7 +7,7 @@
 #define HE_COLORSET_MEDIUM     51
 #define HE_COLORSET_NORMAL     52
 #define HE_COLORSET_SELECTED   53
-#define HE_COLORSET_CODE       54
+#define HE_COLORSET_JUMP_ARROWS        54
 #define HE_COLORSET_ADDR       55
 #define HE_COLORSET_ROOT       56
 
index 718bd46d47fa7bc88674a192dd1a8e8ab1fb7ca9..4fc208e82c6fc7b28d99af147d1c75738bc338e6 100644 (file)
@@ -284,7 +284,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
                to = (u64)btarget->idx;
        }
 
-       ui_browser__set_color(browser, HE_COLORSET_CODE);
+       ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
        __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
                                 from, to);
 }
index 08c09ad755d2d2f8dd55be06b258e16fb2c8335f..4b98165559462025ded4db4a41fe8b5e66a9f151 100644 (file)
@@ -32,6 +32,7 @@ struct hist_browser {
        bool                 show_headers;
        float                min_pcnt;
        u64                  nr_non_filtered_entries;
+       u64                  nr_hierarchy_entries;
        u64                  nr_callchain_rows;
 };
 
@@ -58,11 +59,11 @@ static int hist_browser__get_folding(struct hist_browser *browser)
 
        for (nd = rb_first(&hists->entries);
             (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
-            nd = rb_next(nd)) {
+            nd = rb_hierarchy_next(nd)) {
                struct hist_entry *he =
                        rb_entry(nd, struct hist_entry, rb_node);
 
-               if (he->unfolded)
+               if (he->leaf && he->unfolded)
                        unfolded_rows += he->nr_rows;
        }
        return unfolded_rows;
@@ -72,7 +73,9 @@ static u32 hist_browser__nr_entries(struct hist_browser *hb)
 {
        u32 nr_entries;
 
-       if (hist_browser__has_filter(hb))
+       if (symbol_conf.report_hierarchy)
+               nr_entries = hb->nr_hierarchy_entries;
+       else if (hist_browser__has_filter(hb))
                nr_entries = hb->nr_non_filtered_entries;
        else
                nr_entries = hb->hists->nr_entries;
@@ -247,6 +250,38 @@ static int callchain__count_rows(struct rb_root *chain)
        return n;
 }
 
+static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he,
+                               bool include_children)
+{
+       int count = 0;
+       struct rb_node *node;
+       struct hist_entry *child;
+
+       if (he->leaf)
+               return callchain__count_rows(&he->sorted_chain);
+
+       if (he->has_no_entry)
+               return 1;
+
+       node = rb_first(&he->hroot_out);
+       while (node) {
+               float percent;
+
+               child = rb_entry(node, struct hist_entry, rb_node);
+               percent = hist_entry__get_percent_limit(child);
+
+               if (!child->filtered && percent >= hb->min_pcnt) {
+                       count++;
+
+                       if (include_children && child->unfolded)
+                               count += hierarchy_count_rows(hb, child, true);
+               }
+
+               node = rb_next(node);
+       }
+       return count;
+}
+
 static bool hist_entry__toggle_fold(struct hist_entry *he)
 {
        if (!he)
@@ -326,11 +361,17 @@ static void callchain__init_have_children(struct rb_root *root)
 
 static void hist_entry__init_have_children(struct hist_entry *he)
 {
-       if (!he->init_have_children) {
+       if (he->init_have_children)
+               return;
+
+       if (he->leaf) {
                he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
                callchain__init_have_children(&he->sorted_chain);
-               he->init_have_children = true;
+       } else {
+               he->has_children = !RB_EMPTY_ROOT(&he->hroot_out);
        }
+
+       he->init_have_children = true;
 }
 
 static bool hist_browser__toggle_fold(struct hist_browser *browser)
@@ -349,17 +390,49 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser)
                has_children = callchain_list__toggle_fold(cl);
 
        if (has_children) {
+               int child_rows = 0;
+
                hist_entry__init_have_children(he);
                browser->b.nr_entries -= he->nr_rows;
-               browser->nr_callchain_rows -= he->nr_rows;
 
-               if (he->unfolded)
-                       he->nr_rows = callchain__count_rows(&he->sorted_chain);
+               if (he->leaf)
+                       browser->nr_callchain_rows -= he->nr_rows;
                else
+                       browser->nr_hierarchy_entries -= he->nr_rows;
+
+               if (symbol_conf.report_hierarchy)
+                       child_rows = hierarchy_count_rows(browser, he, true);
+
+               if (he->unfolded) {
+                       if (he->leaf)
+                               he->nr_rows = callchain__count_rows(&he->sorted_chain);
+                       else
+                               he->nr_rows = hierarchy_count_rows(browser, he, false);
+
+                       /* account grand children */
+                       if (symbol_conf.report_hierarchy)
+                               browser->b.nr_entries += child_rows - he->nr_rows;
+
+                       if (!he->leaf && he->nr_rows == 0) {
+                               he->has_no_entry = true;
+                               he->nr_rows = 1;
+                       }
+               } else {
+                       if (symbol_conf.report_hierarchy)
+                               browser->b.nr_entries -= child_rows - he->nr_rows;
+
+                       if (he->has_no_entry)
+                               he->has_no_entry = false;
+
                        he->nr_rows = 0;
+               }
 
                browser->b.nr_entries += he->nr_rows;
-               browser->nr_callchain_rows += he->nr_rows;
+
+               if (he->leaf)
+                       browser->nr_callchain_rows += he->nr_rows;
+               else
+                       browser->nr_hierarchy_entries += he->nr_rows;
 
                return true;
        }
@@ -422,13 +495,38 @@ static int callchain__set_folding(struct rb_root *chain, bool unfold)
        return n;
 }
 
-static void hist_entry__set_folding(struct hist_entry *he, bool unfold)
+static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he,
+                                bool unfold __maybe_unused)
+{
+       float percent;
+       struct rb_node *nd;
+       struct hist_entry *child;
+       int n = 0;
+
+       for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) {
+               child = rb_entry(nd, struct hist_entry, rb_node);
+               percent = hist_entry__get_percent_limit(child);
+               if (!child->filtered && percent >= hb->min_pcnt)
+                       n++;
+       }
+
+       return n;
+}
+
+static void hist_entry__set_folding(struct hist_entry *he,
+                                   struct hist_browser *hb, bool unfold)
 {
        hist_entry__init_have_children(he);
        he->unfolded = unfold ? he->has_children : false;
 
        if (he->has_children) {
-               int n = callchain__set_folding(&he->sorted_chain, unfold);
+               int n;
+
+               if (he->leaf)
+                       n = callchain__set_folding(&he->sorted_chain, unfold);
+               else
+                       n = hierarchy_set_folding(hb, he, unfold);
+
                he->nr_rows = unfold ? n : 0;
        } else
                he->nr_rows = 0;
@@ -438,19 +536,38 @@ static void
 __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
 {
        struct rb_node *nd;
-       struct hists *hists = browser->hists;
+       struct hist_entry *he;
+       double percent;
 
-       for (nd = rb_first(&hists->entries);
-            (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
-            nd = rb_next(nd)) {
-               struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
-               hist_entry__set_folding(he, unfold);
-               browser->nr_callchain_rows += he->nr_rows;
+       nd = rb_first(&browser->hists->entries);
+       while (nd) {
+               he = rb_entry(nd, struct hist_entry, rb_node);
+
+               /* set folding state even if it's currently folded */
+               nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+               hist_entry__set_folding(he, browser, unfold);
+
+               percent = hist_entry__get_percent_limit(he);
+               if (he->filtered || percent < browser->min_pcnt)
+                       continue;
+
+               if (!he->depth || unfold)
+                       browser->nr_hierarchy_entries++;
+               if (he->leaf)
+                       browser->nr_callchain_rows += he->nr_rows;
+               else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
+                       browser->nr_hierarchy_entries++;
+                       he->has_no_entry = true;
+                       he->nr_rows = 1;
+               } else
+                       he->has_no_entry = false;
        }
 }
 
 static void hist_browser__set_folding(struct hist_browser *browser, bool unfold)
 {
+       browser->nr_hierarchy_entries = 0;
        browser->nr_callchain_rows = 0;
        __hist_browser__set_folding(browser, unfold);
 
@@ -657,9 +774,24 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
        return 1;
 }
 
+static bool check_percent_display(struct rb_node *node, u64 parent_total)
+{
+       struct callchain_node *child;
+
+       if (node == NULL)
+               return false;
+
+       if (rb_next(node))
+               return true;
+
+       child = rb_entry(node, struct callchain_node, rb_node);
+       return callchain_cumul_hits(child) != parent_total;
+}
+
 static int hist_browser__show_callchain_flat(struct hist_browser *browser,
                                             struct rb_root *root,
                                             unsigned short row, u64 total,
+                                            u64 parent_total,
                                             print_callchain_entry_fn print,
                                             struct callchain_print_arg *arg,
                                             check_output_full_fn is_output_full)
@@ -669,7 +801,7 @@ static int hist_browser__show_callchain_flat(struct hist_browser *browser,
        bool need_percent;
 
        node = rb_first(root);
-       need_percent = node && rb_next(node);
+       need_percent = check_percent_display(node, parent_total);
 
        while (node) {
                struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
@@ -763,6 +895,7 @@ static char *hist_browser__folded_callchain_str(struct hist_browser *browser,
 static int hist_browser__show_callchain_folded(struct hist_browser *browser,
                                               struct rb_root *root,
                                               unsigned short row, u64 total,
+                                              u64 parent_total,
                                               print_callchain_entry_fn print,
                                               struct callchain_print_arg *arg,
                                               check_output_full_fn is_output_full)
@@ -772,7 +905,7 @@ static int hist_browser__show_callchain_folded(struct hist_browser *browser,
        bool need_percent;
 
        node = rb_first(root);
-       need_percent = node && rb_next(node);
+       need_percent = check_percent_display(node, parent_total);
 
        while (node) {
                struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
@@ -844,20 +977,24 @@ next:
        return row - first_row;
 }
 
-static int hist_browser__show_callchain(struct hist_browser *browser,
+static int hist_browser__show_callchain_graph(struct hist_browser *browser,
                                        struct rb_root *root, int level,
                                        unsigned short row, u64 total,
+                                       u64 parent_total,
                                        print_callchain_entry_fn print,
                                        struct callchain_print_arg *arg,
                                        check_output_full_fn is_output_full)
 {
        struct rb_node *node;
        int first_row = row, offset = level * LEVEL_OFFSET_STEP;
-       u64 new_total;
        bool need_percent;
+       u64 percent_total = total;
+
+       if (callchain_param.mode == CHAIN_GRAPH_REL)
+               percent_total = parent_total;
 
        node = rb_first(root);
-       need_percent = node && rb_next(node);
+       need_percent = check_percent_display(node, parent_total);
 
        while (node) {
                struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
@@ -878,7 +1015,7 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
                        folded_sign = callchain_list__folded(chain);
 
                        row += hist_browser__show_callchain_list(browser, child,
-                                                       chain, row, total,
+                                                       chain, row, percent_total,
                                                        was_first && need_percent,
                                                        offset + extra_offset,
                                                        print, arg);
@@ -893,13 +1030,9 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
                if (folded_sign == '-') {
                        const int new_level = level + (extra_offset ? 2 : 1);
 
-                       if (callchain_param.mode == CHAIN_GRAPH_REL)
-                               new_total = child->children_hit;
-                       else
-                               new_total = total;
-
-                       row += hist_browser__show_callchain(browser, &child->rb_root,
-                                                           new_level, row, new_total,
+                       row += hist_browser__show_callchain_graph(browser, &child->rb_root,
+                                                           new_level, row, total,
+                                                           child->children_hit,
                                                            print, arg, is_output_full);
                }
                if (is_output_full(browser, row))
@@ -910,6 +1043,45 @@ out:
        return row - first_row;
 }
 
+static int hist_browser__show_callchain(struct hist_browser *browser,
+                                       struct hist_entry *entry, int level,
+                                       unsigned short row,
+                                       print_callchain_entry_fn print,
+                                       struct callchain_print_arg *arg,
+                                       check_output_full_fn is_output_full)
+{
+       u64 total = hists__total_period(entry->hists);
+       u64 parent_total;
+       int printed;
+
+       if (symbol_conf.cumulate_callchain)
+               parent_total = entry->stat_acc->period;
+       else
+               parent_total = entry->stat.period;
+
+       if (callchain_param.mode == CHAIN_FLAT) {
+               printed = hist_browser__show_callchain_flat(browser,
+                                               &entry->sorted_chain, row,
+                                               total, parent_total, print, arg,
+                                               is_output_full);
+       } else if (callchain_param.mode == CHAIN_FOLDED) {
+               printed = hist_browser__show_callchain_folded(browser,
+                                               &entry->sorted_chain, row,
+                                               total, parent_total, print, arg,
+                                               is_output_full);
+       } else {
+               printed = hist_browser__show_callchain_graph(browser,
+                                               &entry->sorted_chain, level, row,
+                                               total, parent_total, print, arg,
+                                               is_output_full);
+       }
+
+       if (arg->is_current_entry)
+               browser->he_selection = entry;
+
+       return printed;
+}
+
 struct hpp_arg {
        struct ui_browser *b;
        char folded_sign;
@@ -1006,7 +1178,6 @@ static int hist_browser__show_entry(struct hist_browser *browser,
                                    struct hist_entry *entry,
                                    unsigned short row)
 {
-       char s[256];
        int printed = 0;
        int width = browser->b.width;
        char folded_sign = ' ';
@@ -1031,16 +1202,18 @@ static int hist_browser__show_entry(struct hist_browser *browser,
                        .folded_sign    = folded_sign,
                        .current_entry  = current_entry,
                };
-               struct perf_hpp hpp = {
-                       .buf            = s,
-                       .size           = sizeof(s),
-                       .ptr            = &arg,
-               };
                int column = 0;
 
                hist_browser__gotorc(browser, row, 0);
 
-               perf_hpp__for_each_format(fmt) {
+               hists__for_each_format(browser->hists, fmt) {
+                       char s[2048];
+                       struct perf_hpp hpp = {
+                               .buf    = s,
+                               .size   = sizeof(s),
+                               .ptr    = &arg,
+                       };
+
                        if (perf_hpp__should_skip(fmt, entry->hists) ||
                            column++ < browser->b.horiz_scroll)
                                continue;
@@ -1065,11 +1238,18 @@ static int hist_browser__show_entry(struct hist_browser *browser,
                        }
 
                        if (fmt->color) {
-                               width -= fmt->color(fmt, &hpp, entry);
+                               int ret = fmt->color(fmt, &hpp, entry);
+                               hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+                               /*
+                                * fmt->color() already used ui_browser to
+                                * print the non alignment bits, skip it (+ret):
+                                */
+                               ui_browser__printf(&browser->b, "%s", s + ret);
                        } else {
-                               width -= fmt->entry(fmt, &hpp, entry);
+                               hist_entry__snprintf_alignment(entry, &hpp, fmt, fmt->entry(fmt, &hpp, entry));
                                ui_browser__printf(&browser->b, "%s", s);
                        }
+                       width -= hpp.buf - s;
                }
 
                /* The scroll bar isn't being used */
@@ -1084,43 +1264,246 @@ static int hist_browser__show_entry(struct hist_browser *browser,
                --row_offset;
 
        if (folded_sign == '-' && row != browser->b.rows) {
-               u64 total = hists__total_period(entry->hists);
                struct callchain_print_arg arg = {
                        .row_offset = row_offset,
                        .is_current_entry = current_entry,
                };
 
-               if (callchain_param.mode == CHAIN_GRAPH_REL) {
-                       if (symbol_conf.cumulate_callchain)
-                               total = entry->stat_acc->period;
-                       else
-                               total = entry->stat.period;
-               }
-
-               if (callchain_param.mode == CHAIN_FLAT) {
-                       printed += hist_browser__show_callchain_flat(browser,
-                                       &entry->sorted_chain, row, total,
-                                       hist_browser__show_callchain_entry, &arg,
-                                       hist_browser__check_output_full);
-               } else if (callchain_param.mode == CHAIN_FOLDED) {
-                       printed += hist_browser__show_callchain_folded(browser,
-                                       &entry->sorted_chain, row, total,
+               printed += hist_browser__show_callchain(browser, entry, 1, row,
                                        hist_browser__show_callchain_entry, &arg,
                                        hist_browser__check_output_full);
+       }
+
+       return printed;
+}
+
+static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
+                                             struct hist_entry *entry,
+                                             unsigned short row,
+                                             int level)
+{
+       int printed = 0;
+       int width = browser->b.width;
+       char folded_sign = ' ';
+       bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+       off_t row_offset = entry->row_offset;
+       bool first = true;
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+       struct hpp_arg arg = {
+               .b              = &browser->b,
+               .current_entry  = current_entry,
+       };
+       int column = 0;
+       int hierarchy_indent = (entry->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
+
+       if (current_entry) {
+               browser->he_selection = entry;
+               browser->selection = &entry->ms;
+       }
+
+       hist_entry__init_have_children(entry);
+       folded_sign = hist_entry__folded(entry);
+       arg.folded_sign = folded_sign;
+
+       if (entry->leaf && row_offset) {
+               row_offset--;
+               goto show_callchain;
+       }
+
+       hist_browser__gotorc(browser, row, 0);
+
+       if (current_entry && browser->b.navkeypressed)
+               ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+       else
+               ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+       ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+       width -= level * HIERARCHY_INDENT;
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&entry->hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               char s[2048];
+               struct perf_hpp hpp = {
+                       .buf            = s,
+                       .size           = sizeof(s),
+                       .ptr            = &arg,
+               };
+
+               if (perf_hpp__should_skip(fmt, entry->hists) ||
+                   column++ < browser->b.horiz_scroll)
+                       continue;
+
+               if (current_entry && browser->b.navkeypressed) {
+                       ui_browser__set_color(&browser->b,
+                                             HE_COLORSET_SELECTED);
                } else {
-                       printed += hist_browser__show_callchain(browser,
-                                       &entry->sorted_chain, 1, row, total,
-                                       hist_browser__show_callchain_entry, &arg,
-                                       hist_browser__check_output_full);
+                       ui_browser__set_color(&browser->b,
+                                             HE_COLORSET_NORMAL);
+               }
+
+               if (first) {
+                       ui_browser__printf(&browser->b, "%c", folded_sign);
+                       width--;
+                       first = false;
+               } else {
+                       ui_browser__printf(&browser->b, "  ");
+                       width -= 2;
+               }
+
+               if (fmt->color) {
+                       int ret = fmt->color(fmt, &hpp, entry);
+                       hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+                       /*
+                        * fmt->color() already used ui_browser to
+                        * print the non alignment bits, skip it (+ret):
+                        */
+                       ui_browser__printf(&browser->b, "%s", s + ret);
+               } else {
+                       int ret = fmt->entry(fmt, &hpp, entry);
+                       hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+                       ui_browser__printf(&browser->b, "%s", s);
+               }
+               width -= hpp.buf - s;
+       }
+
+       ui_browser__write_nstring(&browser->b, "", hierarchy_indent);
+       width -= hierarchy_indent;
+
+       if (column >= browser->b.horiz_scroll) {
+               char s[2048];
+               struct perf_hpp hpp = {
+                       .buf            = s,
+                       .size           = sizeof(s),
+                       .ptr            = &arg,
+               };
+
+               if (current_entry && browser->b.navkeypressed) {
+                       ui_browser__set_color(&browser->b,
+                                             HE_COLORSET_SELECTED);
+               } else {
+                       ui_browser__set_color(&browser->b,
+                                             HE_COLORSET_NORMAL);
                }
 
-               if (arg.is_current_entry)
-                       browser->he_selection = entry;
+               perf_hpp_list__for_each_format(entry->hpp_list, fmt) {
+                       ui_browser__write_nstring(&browser->b, "", 2);
+                       width -= 2;
+
+                       /*
+                        * No need to call hist_entry__snprintf_alignment()
+                        * since this fmt is always the last column in the
+                        * hierarchy mode.
+                        */
+                       if (fmt->color) {
+                               width -= fmt->color(fmt, &hpp, entry);
+                       } else {
+                               int i = 0;
+
+                               width -= fmt->entry(fmt, &hpp, entry);
+                               ui_browser__printf(&browser->b, "%s", ltrim(s));
+
+                               while (isspace(s[i++]))
+                                       width++;
+                       }
+               }
+       }
+
+       /* The scroll bar isn't being used */
+       if (!browser->b.navkeypressed)
+               width += 1;
+
+       ui_browser__write_nstring(&browser->b, "", width);
+
+       ++row;
+       ++printed;
+
+show_callchain:
+       if (entry->leaf && folded_sign == '-' && row != browser->b.rows) {
+               struct callchain_print_arg carg = {
+                       .row_offset = row_offset,
+               };
+
+               printed += hist_browser__show_callchain(browser, entry,
+                                       level + 1, row,
+                                       hist_browser__show_callchain_entry, &carg,
+                                       hist_browser__check_output_full);
        }
 
        return printed;
 }
 
+static int hist_browser__show_no_entry(struct hist_browser *browser,
+                                      unsigned short row, int level)
+{
+       int width = browser->b.width;
+       bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+       bool first = true;
+       int column = 0;
+       int ret;
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+       int indent = browser->hists->nr_hpp_node - 2;
+
+       if (current_entry) {
+               browser->he_selection = NULL;
+               browser->selection = NULL;
+       }
+
+       hist_browser__gotorc(browser, row, 0);
+
+       if (current_entry && browser->b.navkeypressed)
+               ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+       else
+               ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+       ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+       width -= level * HIERARCHY_INDENT;
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&browser->hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               if (perf_hpp__should_skip(fmt, browser->hists) ||
+                   column++ < browser->b.horiz_scroll)
+                       continue;
+
+               ret = fmt->width(fmt, NULL, hists_to_evsel(browser->hists));
+
+               if (first) {
+                       /* for folded sign */
+                       first = false;
+                       ret++;
+               } else {
+                       /* space between columns */
+                       ret += 2;
+               }
+
+               ui_browser__write_nstring(&browser->b, "", ret);
+               width -= ret;
+       }
+
+       ui_browser__write_nstring(&browser->b, "", indent * HIERARCHY_INDENT);
+       width -= indent * HIERARCHY_INDENT;
+
+       if (column >= browser->b.horiz_scroll) {
+               char buf[32];
+
+               ret = snprintf(buf, sizeof(buf), "no entry >= %.2f%%", browser->min_pcnt);
+               ui_browser__printf(&browser->b, "  %s", buf);
+               width -= ret + 2;
+       }
+
+       /* The scroll bar isn't being used */
+       if (!browser->b.navkeypressed)
+               width += 1;
+
+       ui_browser__write_nstring(&browser->b, "", width);
+       return 1;
+}
+
 static int advance_hpp_check(struct perf_hpp *hpp, int inc)
 {
        advance_hpp(hpp, inc);
@@ -1144,7 +1527,7 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *
                        return ret;
        }
 
-       perf_hpp__for_each_format(fmt) {
+       hists__for_each_format(browser->hists, fmt) {
                if (perf_hpp__should_skip(fmt, hists)  || column++ < browser->b.horiz_scroll)
                        continue;
 
@@ -1160,11 +1543,96 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *
        return ret;
 }
 
+static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *browser, char *buf, size_t size)
+{
+       struct hists *hists = browser->hists;
+       struct perf_hpp dummy_hpp = {
+               .buf    = buf,
+               .size   = size,
+       };
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+       size_t ret = 0;
+       int column = 0;
+       int indent = hists->nr_hpp_node - 2;
+       bool first_node, first_col;
+
+       ret = scnprintf(buf, size, " ");
+       if (advance_hpp_check(&dummy_hpp, ret))
+               return ret;
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               if (column++ < browser->b.horiz_scroll)
+                       continue;
+
+               ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+               if (advance_hpp_check(&dummy_hpp, ret))
+                       break;
+
+               ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "  ");
+               if (advance_hpp_check(&dummy_hpp, ret))
+                       break;
+       }
+
+       ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s",
+                       indent * HIERARCHY_INDENT, "");
+       if (advance_hpp_check(&dummy_hpp, ret))
+               return ret;
+
+       first_node = true;
+       list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+               if (!first_node) {
+                       ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " / ");
+                       if (advance_hpp_check(&dummy_hpp, ret))
+                               break;
+               }
+               first_node = false;
+
+               first_col = true;
+               perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+                       char *start;
+
+                       if (perf_hpp__should_skip(fmt, hists))
+                               continue;
+
+                       if (!first_col) {
+                               ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "+");
+                               if (advance_hpp_check(&dummy_hpp, ret))
+                                       break;
+                       }
+                       first_col = false;
+
+                       ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+                       dummy_hpp.buf[ret] = '\0';
+                       rtrim(dummy_hpp.buf);
+
+                       start = ltrim(dummy_hpp.buf);
+                       ret = strlen(start);
+
+                       if (start != dummy_hpp.buf)
+                               memmove(dummy_hpp.buf, start, ret + 1);
+
+                       if (advance_hpp_check(&dummy_hpp, ret))
+                               break;
+               }
+       }
+
+       return ret;
+}
+
 static void hist_browser__show_headers(struct hist_browser *browser)
 {
        char headers[1024];
 
-       hists_browser__scnprintf_headers(browser, headers, sizeof(headers));
+       if (symbol_conf.report_hierarchy)
+               hists_browser__scnprintf_hierarchy_headers(browser, headers,
+                                                          sizeof(headers));
+       else
+               hists_browser__scnprintf_headers(browser, headers,
+                                                sizeof(headers));
        ui_browser__gotorc(&browser->b, 0, 0);
        ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
        ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
@@ -1196,18 +1664,34 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
        hb->he_selection = NULL;
        hb->selection = NULL;
 
-       for (nd = browser->top; nd; nd = rb_next(nd)) {
+       for (nd = browser->top; nd; nd = rb_hierarchy_next(nd)) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
                float percent;
 
-               if (h->filtered)
+               if (h->filtered) {
+                       /* let it move to sibling */
+                       h->unfolded = false;
                        continue;
+               }
 
                percent = hist_entry__get_percent_limit(h);
                if (percent < hb->min_pcnt)
                        continue;
 
-               row += hist_browser__show_entry(hb, h, row);
+               if (symbol_conf.report_hierarchy) {
+                       row += hist_browser__show_hierarchy_entry(hb, h, row,
+                                                                 h->depth);
+                       if (row == browser->rows)
+                               break;
+
+                       if (h->has_no_entry) {
+                               hist_browser__show_no_entry(hb, row, h->depth + 1);
+                               row++;
+                       }
+               } else {
+                       row += hist_browser__show_entry(hb, h, row);
+               }
+
                if (row == browser->rows)
                        break;
        }
@@ -1225,7 +1709,14 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,
                if (!h->filtered && percent >= min_pcnt)
                        return nd;
 
-               nd = rb_next(nd);
+               /*
+                * If it's filtered, its all children also were filtered.
+                * So move to sibling node.
+                */
+               if (rb_next(nd))
+                       nd = rb_next(nd);
+               else
+                       nd = rb_hierarchy_next(nd);
        }
 
        return NULL;
@@ -1241,7 +1732,7 @@ static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
                if (!h->filtered && percent >= min_pcnt)
                        return nd;
 
-               nd = rb_prev(nd);
+               nd = rb_hierarchy_prev(nd);
        }
 
        return NULL;
@@ -1271,8 +1762,8 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
                nd = browser->top;
                goto do_offset;
        case SEEK_END:
-               nd = hists__filter_prev_entries(rb_last(browser->entries),
-                                               hb->min_pcnt);
+               nd = rb_hierarchy_last(rb_last(browser->entries));
+               nd = hists__filter_prev_entries(nd, hb->min_pcnt);
                first = false;
                break;
        default:
@@ -1306,7 +1797,7 @@ do_offset:
        if (offset > 0) {
                do {
                        h = rb_entry(nd, struct hist_entry, rb_node);
-                       if (h->unfolded) {
+                       if (h->unfolded && h->leaf) {
                                u16 remaining = h->nr_rows - h->row_offset;
                                if (offset > remaining) {
                                        offset -= remaining;
@@ -1318,7 +1809,8 @@ do_offset:
                                        break;
                                }
                        }
-                       nd = hists__filter_entries(rb_next(nd), hb->min_pcnt);
+                       nd = hists__filter_entries(rb_hierarchy_next(nd),
+                                                  hb->min_pcnt);
                        if (nd == NULL)
                                break;
                        --offset;
@@ -1327,7 +1819,7 @@ do_offset:
        } else if (offset < 0) {
                while (1) {
                        h = rb_entry(nd, struct hist_entry, rb_node);
-                       if (h->unfolded) {
+                       if (h->unfolded && h->leaf) {
                                if (first) {
                                        if (-offset > h->row_offset) {
                                                offset += h->row_offset;
@@ -1351,7 +1843,7 @@ do_offset:
                                }
                        }
 
-                       nd = hists__filter_prev_entries(rb_prev(nd),
+                       nd = hists__filter_prev_entries(rb_hierarchy_prev(nd),
                                                        hb->min_pcnt);
                        if (nd == NULL)
                                break;
@@ -1364,7 +1856,7 @@ do_offset:
                                 * row_offset at its last entry.
                                 */
                                h = rb_entry(nd, struct hist_entry, rb_node);
-                               if (h->unfolded)
+                               if (h->unfolded && h->leaf)
                                        h->row_offset = h->nr_rows;
                                break;
                        }
@@ -1378,17 +1870,14 @@ do_offset:
 }
 
 static int hist_browser__fprintf_callchain(struct hist_browser *browser,
-                                          struct hist_entry *he, FILE *fp)
+                                          struct hist_entry *he, FILE *fp,
+                                          int level)
 {
-       u64 total = hists__total_period(he->hists);
        struct callchain_print_arg arg  = {
                .fp = fp,
        };
 
-       if (symbol_conf.cumulate_callchain)
-               total = he->stat_acc->period;
-
-       hist_browser__show_callchain(browser, &he->sorted_chain, 1, 0, total,
+       hist_browser__show_callchain(browser, he, level, 0,
                                     hist_browser__fprintf_callchain_entry, &arg,
                                     hist_browser__check_dump_full);
        return arg.printed;
@@ -1414,7 +1903,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
        if (symbol_conf.use_callchain)
                printed += fprintf(fp, "%c ", folded_sign);
 
-       perf_hpp__for_each_format(fmt) {
+       hists__for_each_format(browser->hists, fmt) {
                if (perf_hpp__should_skip(fmt, he->hists))
                        continue;
 
@@ -1425,12 +1914,71 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
                        first = false;
 
                ret = fmt->entry(fmt, &hpp, he);
+               ret = hist_entry__snprintf_alignment(he, &hpp, fmt, ret);
                advance_hpp(&hpp, ret);
        }
-       printed += fprintf(fp, "%s\n", rtrim(s));
+       printed += fprintf(fp, "%s\n", s);
 
        if (folded_sign == '-')
-               printed += hist_browser__fprintf_callchain(browser, he, fp);
+               printed += hist_browser__fprintf_callchain(browser, he, fp, 1);
+
+       return printed;
+}
+
+
+static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser,
+                                                struct hist_entry *he,
+                                                FILE *fp, int level)
+{
+       char s[8192];
+       int printed = 0;
+       char folded_sign = ' ';
+       struct perf_hpp hpp = {
+               .buf = s,
+               .size = sizeof(s),
+       };
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+       bool first = true;
+       int ret;
+       int hierarchy_indent = (he->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
+
+       printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, "");
+
+       folded_sign = hist_entry__folded(he);
+       printed += fprintf(fp, "%c", folded_sign);
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&he->hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               if (!first) {
+                       ret = scnprintf(hpp.buf, hpp.size, "  ");
+                       advance_hpp(&hpp, ret);
+               } else
+                       first = false;
+
+               ret = fmt->entry(fmt, &hpp, he);
+               advance_hpp(&hpp, ret);
+       }
+
+       ret = scnprintf(hpp.buf, hpp.size, "%*s", hierarchy_indent, "");
+       advance_hpp(&hpp, ret);
+
+       perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+               ret = scnprintf(hpp.buf, hpp.size, "  ");
+               advance_hpp(&hpp, ret);
+
+               ret = fmt->entry(fmt, &hpp, he);
+               advance_hpp(&hpp, ret);
+       }
+
+       printed += fprintf(fp, "%s\n", rtrim(s));
+
+       if (he->leaf && folded_sign == '-') {
+               printed += hist_browser__fprintf_callchain(browser, he, fp,
+                                                          he->depth + 1);
+       }
 
        return printed;
 }
@@ -1444,8 +1992,16 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
        while (nd) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
-               printed += hist_browser__fprintf_entry(browser, h, fp);
-               nd = hists__filter_entries(rb_next(nd), browser->min_pcnt);
+               if (symbol_conf.report_hierarchy) {
+                       printed += hist_browser__fprintf_hierarchy_entry(browser,
+                                                                        h, fp,
+                                                                        h->depth);
+               } else {
+                       printed += hist_browser__fprintf_entry(browser, h, fp);
+               }
+
+               nd = hists__filter_entries(rb_hierarchy_next(nd),
+                                          browser->min_pcnt);
        }
 
        return printed;
@@ -1580,11 +2136,18 @@ static int hists__browser_title(struct hists *hists,
        if (hists->uid_filter_str)
                printed += snprintf(bf + printed, size - printed,
                                    ", UID: %s", hists->uid_filter_str);
-       if (thread)
-               printed += scnprintf(bf + printed, size - printed,
+       if (thread) {
+               if (sort__has_thread) {
+                       printed += scnprintf(bf + printed, size - printed,
                                    ", Thread: %s(%d)",
                                     (thread->comm_set ? thread__comm_str(thread) : ""),
                                    thread->tid);
+               } else {
+                       printed += scnprintf(bf + printed, size - printed,
+                                   ", Thread: %s",
+                                    (thread->comm_set ? thread__comm_str(thread) : ""));
+               }
+       }
        if (dso)
                printed += scnprintf(bf + printed, size - printed,
                                    ", DSO: %s", dso->short_name);
@@ -1759,15 +2322,24 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
 {
        struct thread *thread = act->thread;
 
+       if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+               return 0;
+
        if (browser->hists->thread_filter) {
                pstack__remove(browser->pstack, &browser->hists->thread_filter);
                perf_hpp__set_elide(HISTC_THREAD, false);
                thread__zput(browser->hists->thread_filter);
                ui_helpline__pop();
        } else {
-               ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
-                                  thread->comm_set ? thread__comm_str(thread) : "",
-                                  thread->tid);
+               if (sort__has_thread) {
+                       ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
+                                          thread->comm_set ? thread__comm_str(thread) : "",
+                                          thread->tid);
+               } else {
+                       ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"",
+                                          thread->comm_set ? thread__comm_str(thread) : "");
+               }
+
                browser->hists->thread_filter = thread__get(thread);
                perf_hpp__set_elide(HISTC_THREAD, false);
                pstack__push(browser->pstack, &browser->hists->thread_filter);
@@ -1782,13 +2354,22 @@ static int
 add_thread_opt(struct hist_browser *browser, struct popup_action *act,
               char **optstr, struct thread *thread)
 {
-       if (thread == NULL)
+       int ret;
+
+       if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
                return 0;
 
-       if (asprintf(optstr, "Zoom %s %s(%d) thread",
-                    browser->hists->thread_filter ? "out of" : "into",
-                    thread->comm_set ? thread__comm_str(thread) : "",
-                    thread->tid) < 0)
+       if (sort__has_thread) {
+               ret = asprintf(optstr, "Zoom %s %s(%d) thread",
+                              browser->hists->thread_filter ? "out of" : "into",
+                              thread->comm_set ? thread__comm_str(thread) : "",
+                              thread->tid);
+       } else {
+               ret = asprintf(optstr, "Zoom %s %s thread",
+                              browser->hists->thread_filter ? "out of" : "into",
+                              thread->comm_set ? thread__comm_str(thread) : "");
+       }
+       if (ret < 0)
                return 0;
 
        act->thread = thread;
@@ -1801,6 +2382,9 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
 {
        struct map *map = act->ms.map;
 
+       if (!sort__has_dso || map == NULL)
+               return 0;
+
        if (browser->hists->dso_filter) {
                pstack__remove(browser->pstack, &browser->hists->dso_filter);
                perf_hpp__set_elide(HISTC_DSO, false);
@@ -1825,7 +2409,7 @@ static int
 add_dso_opt(struct hist_browser *browser, struct popup_action *act,
            char **optstr, struct map *map)
 {
-       if (map == NULL)
+       if (!sort__has_dso || map == NULL)
                return 0;
 
        if (asprintf(optstr, "Zoom %s %s DSO",
@@ -1850,7 +2434,7 @@ static int
 add_map_opt(struct hist_browser *browser __maybe_unused,
            struct popup_action *act, char **optstr, struct map *map)
 {
-       if (map == NULL)
+       if (!sort__has_dso || map == NULL)
                return 0;
 
        if (asprintf(optstr, "Browse map details") < 0)
@@ -1952,6 +2536,9 @@ add_exit_opt(struct hist_browser *browser __maybe_unused,
 static int
 do_zoom_socket(struct hist_browser *browser, struct popup_action *act)
 {
+       if (!sort__has_socket || act->socket < 0)
+               return 0;
+
        if (browser->hists->socket_filter > -1) {
                pstack__remove(browser->pstack, &browser->hists->socket_filter);
                browser->hists->socket_filter = -1;
@@ -1971,7 +2558,7 @@ static int
 add_socket_opt(struct hist_browser *browser, struct popup_action *act,
               char **optstr, int socket_id)
 {
-       if (socket_id < 0)
+       if (!sort__has_socket || socket_id < 0)
                return 0;
 
        if (asprintf(optstr, "Zoom %s Processor Socket %d",
@@ -1989,17 +2576,60 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb)
        u64 nr_entries = 0;
        struct rb_node *nd = rb_first(&hb->hists->entries);
 
-       if (hb->min_pcnt == 0) {
+       if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) {
                hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries;
                return;
        }
 
        while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
                nr_entries++;
-               nd = rb_next(nd);
+               nd = rb_hierarchy_next(nd);
        }
 
        hb->nr_non_filtered_entries = nr_entries;
+       hb->nr_hierarchy_entries = nr_entries;
+}
+
+static void hist_browser__update_percent_limit(struct hist_browser *hb,
+                                              double percent)
+{
+       struct hist_entry *he;
+       struct rb_node *nd = rb_first(&hb->hists->entries);
+       u64 total = hists__total_period(hb->hists);
+       u64 min_callchain_hits = total * (percent / 100);
+
+       hb->min_pcnt = callchain_param.min_percent = percent;
+
+       while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
+               he = rb_entry(nd, struct hist_entry, rb_node);
+
+               if (he->has_no_entry) {
+                       he->has_no_entry = false;
+                       he->nr_rows = 0;
+               }
+
+               if (!he->leaf || !symbol_conf.use_callchain)
+                       goto next;
+
+               if (callchain_param.mode == CHAIN_GRAPH_REL) {
+                       total = he->stat.period;
+
+                       if (symbol_conf.cumulate_callchain)
+                               total = he->stat_acc->period;
+
+                       min_callchain_hits = total * (percent / 100);
+               }
+
+               callchain_param.sort(&he->sorted_chain, he->callchain,
+                                    min_callchain_hits, &callchain_param);
+
+next:
+               nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+               /* force to re-evaluate folding state of callchains */
+               he->init_have_children = false;
+               hist_entry__set_folding(he, hb, false);
+       }
 }
 
 static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
@@ -2037,6 +2667,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
        "E             Expand all callchains\n"                         \
        "F             Toggle percentage of filtered entries\n"         \
        "H             Display column headers\n"                        \
+       "L             Change percent limit\n"                          \
        "m             Display context menu\n"                          \
        "S             Zoom into current Processor Socket\n"            \
 
@@ -2077,7 +2708,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
        memset(options, 0, sizeof(options));
        memset(actions, 0, sizeof(actions));
 
-       perf_hpp__for_each_format(fmt) {
+       hists__for_each_format(browser->hists, fmt) {
                perf_hpp__reset_width(fmt, hists);
                /*
                 * This is done just once, and activates the horizontal scrolling
@@ -2192,6 +2823,24 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                                top->zero = !top->zero;
                        }
                        continue;
+               case 'L':
+                       if (ui_browser__input_window("Percent Limit",
+                                       "Please enter the value you want to hide entries under that percent.",
+                                       buf, "ENTER: OK, ESC: Cancel",
+                                       delay_secs * 2) == K_ENTER) {
+                               char *end;
+                               double new_percent = strtod(buf, &end);
+
+                               if (new_percent < 0 || new_percent > 100) {
+                                       ui_browser__warning(&browser->b, delay_secs * 2,
+                                               "Invalid percent: %.2f", new_percent);
+                                       continue;
+                               }
+
+                               hist_browser__update_percent_limit(browser, new_percent);
+                               hist_browser__reset(browser);
+                       }
+                       continue;
                case K_F1:
                case 'h':
                case '?':
@@ -2263,10 +2912,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                        continue;
                }
 
-               if (!sort__has_sym)
-                       goto add_exit_option;
-
-               if (browser->selection == NULL)
+               if (!sort__has_sym || browser->selection == NULL)
                        goto skip_annotation;
 
                if (sort__mode == SORT_MODE__BRANCH) {
@@ -2306,11 +2952,16 @@ skip_annotation:
                                             &options[nr_options],
                                             socked_id);
                /* perf script support */
+               if (!is_report_browser(hbt))
+                       goto skip_scripting;
+
                if (browser->he_selection) {
-                       nr_options += add_script_opt(browser,
-                                                    &actions[nr_options],
-                                                    &options[nr_options],
-                                                    thread, NULL);
+                       if (sort__has_thread && thread) {
+                               nr_options += add_script_opt(browser,
+                                                            &actions[nr_options],
+                                                            &options[nr_options],
+                                                            thread, NULL);
+                       }
                        /*
                         * Note that browser->selection != NULL
                         * when browser->he_selection is not NULL,
@@ -2320,16 +2971,18 @@ skip_annotation:
                         *
                         * See hist_browser__show_entry.
                         */
-                       nr_options += add_script_opt(browser,
-                                                    &actions[nr_options],
-                                                    &options[nr_options],
-                                                    NULL, browser->selection->sym);
+                       if (sort__has_sym && browser->selection->sym) {
+                               nr_options += add_script_opt(browser,
+                                                            &actions[nr_options],
+                                                            &options[nr_options],
+                                                            NULL, browser->selection->sym);
+                       }
                }
                nr_options += add_script_opt(browser, &actions[nr_options],
                                             &options[nr_options], NULL, NULL);
                nr_options += add_switch_opt(browser, &actions[nr_options],
                                             &options[nr_options]);
-add_exit_option:
+skip_scripting:
                nr_options += add_exit_opt(browser, &actions[nr_options],
                                           &options[nr_options]);
 
index 0f8dcfdfb10f3856abefc7f252631e8937bacf6b..bd9bf7e343b1e310d20587d0b0f7b75c6b10a574 100644 (file)
@@ -306,7 +306,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 
        nr_cols = 0;
 
-       perf_hpp__for_each_format(fmt)
+       hists__for_each_format(hists, fmt)
                col_types[nr_cols++] = G_TYPE_STRING;
 
        store = gtk_tree_store_newv(nr_cols, col_types);
@@ -317,7 +317,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 
        col_idx = 0;
 
-       perf_hpp__for_each_format(fmt) {
+       hists__for_each_format(hists, fmt) {
                if (perf_hpp__should_skip(fmt, hists))
                        continue;
 
@@ -367,7 +367,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 
                col_idx = 0;
 
-               perf_hpp__for_each_format(fmt) {
+               hists__for_each_format(hists, fmt) {
                        if (perf_hpp__should_skip(fmt, h->hists))
                                continue;
 
@@ -396,6 +396,194 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
        gtk_container_add(GTK_CONTAINER(window), view);
 }
 
+static void perf_gtk__add_hierarchy_entries(struct hists *hists,
+                                           struct rb_root *root,
+                                           GtkTreeStore *store,
+                                           GtkTreeIter *parent,
+                                           struct perf_hpp *hpp,
+                                           float min_pcnt)
+{
+       int col_idx = 0;
+       struct rb_node *node;
+       struct hist_entry *he;
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+       u64 total = hists__total_period(hists);
+       int size;
+
+       for (node = rb_first(root); node; node = rb_next(node)) {
+               GtkTreeIter iter;
+               float percent;
+               char *bf;
+
+               he = rb_entry(node, struct hist_entry, rb_node);
+               if (he->filtered)
+                       continue;
+
+               percent = hist_entry__get_percent_limit(he);
+               if (percent < min_pcnt)
+                       continue;
+
+               gtk_tree_store_append(store, &iter, parent);
+
+               col_idx = 0;
+
+               /* the first hpp_list_node is for overhead columns */
+               fmt_node = list_first_entry(&hists->hpp_formats,
+                                           struct perf_hpp_list_node, list);
+               perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+                       if (fmt->color)
+                               fmt->color(fmt, hpp, he);
+                       else
+                               fmt->entry(fmt, hpp, he);
+
+                       gtk_tree_store_set(store, &iter, col_idx++, hpp->buf, -1);
+               }
+
+               bf = hpp->buf;
+               size = hpp->size;
+               perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+                       int ret;
+
+                       if (fmt->color)
+                               ret = fmt->color(fmt, hpp, he);
+                       else
+                               ret = fmt->entry(fmt, hpp, he);
+
+                       snprintf(hpp->buf + ret, hpp->size - ret, "  ");
+                       advance_hpp(hpp, ret + 2);
+               }
+
+               gtk_tree_store_set(store, &iter, col_idx, ltrim(rtrim(bf)), -1);
+
+               if (!he->leaf) {
+                       hpp->buf = bf;
+                       hpp->size = size;
+
+                       perf_gtk__add_hierarchy_entries(hists, &he->hroot_out,
+                                                       store, &iter, hpp,
+                                                       min_pcnt);
+
+                       if (!hist_entry__has_hierarchy_children(he, min_pcnt)) {
+                               char buf[32];
+                               GtkTreeIter child;
+
+                               snprintf(buf, sizeof(buf), "no entry >= %.2f%%",
+                                        min_pcnt);
+
+                               gtk_tree_store_append(store, &child, &iter);
+                               gtk_tree_store_set(store, &child, col_idx, buf, -1);
+                       }
+               }
+
+               if (symbol_conf.use_callchain && he->leaf) {
+                       if (callchain_param.mode == CHAIN_GRAPH_REL)
+                               total = symbol_conf.cumulate_callchain ?
+                                       he->stat_acc->period : he->stat.period;
+
+                       perf_gtk__add_callchain(&he->sorted_chain, store, &iter,
+                                               col_idx, total);
+               }
+       }
+
+}
+
+static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
+                                    float min_pcnt)
+{
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+       GType col_types[MAX_COLUMNS];
+       GtkCellRenderer *renderer;
+       GtkTreeStore *store;
+       GtkWidget *view;
+       int col_idx;
+       int nr_cols = 0;
+       char s[512];
+       char buf[512];
+       bool first_node, first_col;
+       struct perf_hpp hpp = {
+               .buf            = s,
+               .size           = sizeof(s),
+       };
+
+       hists__for_each_format(hists, fmt) {
+               if (perf_hpp__is_sort_entry(fmt) ||
+                   perf_hpp__is_dynamic_entry(fmt))
+                       break;
+
+               col_types[nr_cols++] = G_TYPE_STRING;
+       }
+       col_types[nr_cols++] = G_TYPE_STRING;
+
+       store = gtk_tree_store_newv(nr_cols, col_types);
+       view = gtk_tree_view_new();
+       renderer = gtk_cell_renderer_text_new();
+
+       col_idx = 0;
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+                                                           -1, fmt->name,
+                                                           renderer, "markup",
+                                                           col_idx++, NULL);
+       }
+
+       /* construct merged column header since sort keys share single column */
+       buf[0] = '\0';
+       first_node = true;
+       list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+               if (!first_node)
+                       strcat(buf, " / ");
+               first_node = false;
+
+               first_col = true;
+               perf_hpp_list__for_each_format(&fmt_node->hpp ,fmt) {
+                       if (perf_hpp__should_skip(fmt, hists))
+                               continue;
+
+                       if (!first_col)
+                               strcat(buf, "+");
+                       first_col = false;
+
+                       fmt->header(fmt, &hpp, hists_to_evsel(hists));
+                       strcat(buf, ltrim(rtrim(hpp.buf)));
+               }
+       }
+
+       gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+                                                   -1, buf,
+                                                   renderer, "markup",
+                                                   col_idx++, NULL);
+
+       for (col_idx = 0; col_idx < nr_cols; col_idx++) {
+               GtkTreeViewColumn *column;
+
+               column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx);
+               gtk_tree_view_column_set_resizable(column, TRUE);
+
+               if (col_idx == 0) {
+                       gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view),
+                                                         column);
+               }
+       }
+
+       gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+       g_object_unref(GTK_TREE_MODEL(store));
+
+       perf_gtk__add_hierarchy_entries(hists, &hists->entries, store,
+                                       NULL, &hpp, min_pcnt);
+
+       gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE);
+
+       g_signal_connect(view, "row-activated",
+                        G_CALLBACK(on_row_activated), NULL);
+       gtk_container_add(GTK_CONTAINER(window), view);
+}
+
 int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
                                  const char *help,
                                  struct hist_browser_timer *hbt __maybe_unused,
@@ -463,7 +651,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
                                                        GTK_POLICY_AUTOMATIC,
                                                        GTK_POLICY_AUTOMATIC);
 
-               perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
+               if (symbol_conf.report_hierarchy)
+                       perf_gtk__show_hierarchy(scrolled_window, hists, min_pcnt);
+               else
+                       perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
 
                tab_label = gtk_label_new(evname);
 
index bf2a66e254eac35c63a0dd44e83628f34e4dc2f9..3baeaa6e71b5a51e113b8b485df97b7c8ae003a2 100644 (file)
@@ -5,6 +5,7 @@
 #include "../util/util.h"
 #include "../util/sort.h"
 #include "../util/evsel.h"
+#include "../util/evlist.h"
 
 /* hist period print (hpp) functions */
 
@@ -371,7 +372,20 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
        return 0;
 }
 
-#define HPP__COLOR_PRINT_FNS(_name, _fn)               \
+static bool perf_hpp__is_hpp_entry(struct perf_hpp_fmt *a)
+{
+       return a->header == hpp__header_fn;
+}
+
+static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+       if (!perf_hpp__is_hpp_entry(a) || !perf_hpp__is_hpp_entry(b))
+               return false;
+
+       return a->idx == b->idx;
+}
+
+#define HPP__COLOR_PRINT_FNS(_name, _fn, _idx)         \
        {                                               \
                .name   = _name,                        \
                .header = hpp__header_fn,               \
@@ -381,9 +395,11 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
                .cmp    = hpp__nop_cmp,                 \
                .collapse = hpp__nop_cmp,               \
                .sort   = hpp__sort_ ## _fn,            \
+               .idx    = PERF_HPP__ ## _idx,           \
+               .equal  = hpp__equal,                   \
        }
 
-#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn)           \
+#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn, _idx)     \
        {                                               \
                .name   = _name,                        \
                .header = hpp__header_fn,               \
@@ -393,9 +409,11 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
                .cmp    = hpp__nop_cmp,                 \
                .collapse = hpp__nop_cmp,               \
                .sort   = hpp__sort_ ## _fn,            \
+               .idx    = PERF_HPP__ ## _idx,           \
+               .equal  = hpp__equal,                   \
        }
 
-#define HPP__PRINT_FNS(_name, _fn)                     \
+#define HPP__PRINT_FNS(_name, _fn, _idx)               \
        {                                               \
                .name   = _name,                        \
                .header = hpp__header_fn,               \
@@ -404,22 +422,25 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
                .cmp    = hpp__nop_cmp,                 \
                .collapse = hpp__nop_cmp,               \
                .sort   = hpp__sort_ ## _fn,            \
+               .idx    = PERF_HPP__ ## _idx,           \
+               .equal  = hpp__equal,                   \
        }
 
 struct perf_hpp_fmt perf_hpp__format[] = {
-       HPP__COLOR_PRINT_FNS("Overhead", overhead),
-       HPP__COLOR_PRINT_FNS("sys", overhead_sys),
-       HPP__COLOR_PRINT_FNS("usr", overhead_us),
-       HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys),
-       HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us),
-       HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc),
-       HPP__PRINT_FNS("Samples", samples),
-       HPP__PRINT_FNS("Period", period)
+       HPP__COLOR_PRINT_FNS("Overhead", overhead, OVERHEAD),
+       HPP__COLOR_PRINT_FNS("sys", overhead_sys, OVERHEAD_SYS),
+       HPP__COLOR_PRINT_FNS("usr", overhead_us, OVERHEAD_US),
+       HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys, OVERHEAD_GUEST_SYS),
+       HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us, OVERHEAD_GUEST_US),
+       HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc, OVERHEAD_ACC),
+       HPP__PRINT_FNS("Samples", samples, SAMPLES),
+       HPP__PRINT_FNS("Period", period, PERIOD)
 };
 
-LIST_HEAD(perf_hpp__list);
-LIST_HEAD(perf_hpp__sort_list);
-
+struct perf_hpp_list perf_hpp_list = {
+       .fields = LIST_HEAD_INIT(perf_hpp_list.fields),
+       .sorts  = LIST_HEAD_INIT(perf_hpp_list.sorts),
+};
 
 #undef HPP__COLOR_PRINT_FNS
 #undef HPP__COLOR_ACC_PRINT_FNS
@@ -485,63 +506,60 @@ void perf_hpp__init(void)
                hpp_dimension__add_output(PERF_HPP__PERIOD);
 }
 
-void perf_hpp__column_register(struct perf_hpp_fmt *format)
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+                                   struct perf_hpp_fmt *format)
 {
-       list_add_tail(&format->list, &perf_hpp__list);
+       list_add_tail(&format->list, &list->fields);
 }
 
-void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+                                       struct perf_hpp_fmt *format)
 {
-       list_del(&format->list);
+       list_add_tail(&format->sort_list, &list->sorts);
 }
 
-void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
-{
-       list_add_tail(&format->sort_list, &perf_hpp__sort_list);
-}
-
-void perf_hpp__column_enable(unsigned col)
-{
-       BUG_ON(col >= PERF_HPP__MAX_INDEX);
-       perf_hpp__column_register(&perf_hpp__format[col]);
-}
-
-void perf_hpp__column_disable(unsigned col)
+void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
 {
-       BUG_ON(col >= PERF_HPP__MAX_INDEX);
-       perf_hpp__column_unregister(&perf_hpp__format[col]);
+       list_del(&format->list);
 }
 
 void perf_hpp__cancel_cumulate(void)
 {
+       struct perf_hpp_fmt *fmt, *acc, *ovh, *tmp;
+
        if (is_strict_order(field_order))
                return;
 
-       perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC);
-       perf_hpp__format[PERF_HPP__OVERHEAD].name = "Overhead";
+       ovh = &perf_hpp__format[PERF_HPP__OVERHEAD];
+       acc = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC];
+
+       perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) {
+               if (acc->equal(acc, fmt)) {
+                       perf_hpp__column_unregister(fmt);
+                       continue;
+               }
+
+               if (ovh->equal(ovh, fmt))
+                       fmt->name = "Overhead";
+       }
 }
 
-void perf_hpp__setup_output_field(void)
+static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+       return a->equal && a->equal(a, b);
+}
+
+void perf_hpp__setup_output_field(struct perf_hpp_list *list)
 {
        struct perf_hpp_fmt *fmt;
 
        /* append sort keys to output field */
-       perf_hpp__for_each_sort_list(fmt) {
-               if (!list_empty(&fmt->list))
-                       continue;
-
-               /*
-                * sort entry fields are dynamically created,
-                * so they can share a same sort key even though
-                * the list is empty.
-                */
-               if (perf_hpp__is_sort_entry(fmt)) {
-                       struct perf_hpp_fmt *pos;
+       perf_hpp_list__for_each_sort_list(list, fmt) {
+               struct perf_hpp_fmt *pos;
 
-                       perf_hpp__for_each_format(pos) {
-                               if (perf_hpp__same_sort_entry(pos, fmt))
-                                       goto next;
-                       }
+               perf_hpp_list__for_each_format(list, pos) {
+                       if (fmt_equal(fmt, pos))
+                               goto next;
                }
 
                perf_hpp__column_register(fmt);
@@ -550,27 +568,17 @@ next:
        }
 }
 
-void perf_hpp__append_sort_keys(void)
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list)
 {
        struct perf_hpp_fmt *fmt;
 
        /* append output fields to sort keys */
-       perf_hpp__for_each_format(fmt) {
-               if (!list_empty(&fmt->sort_list))
-                       continue;
-
-               /*
-                * sort entry fields are dynamically created,
-                * so they can share a same sort key even though
-                * the list is empty.
-                */
-               if (perf_hpp__is_sort_entry(fmt)) {
-                       struct perf_hpp_fmt *pos;
+       perf_hpp_list__for_each_format(list, fmt) {
+               struct perf_hpp_fmt *pos;
 
-                       perf_hpp__for_each_sort_list(pos) {
-                               if (perf_hpp__same_sort_entry(pos, fmt))
-                                       goto next;
-                       }
+               perf_hpp_list__for_each_sort_list(list, pos) {
+                       if (fmt_equal(fmt, pos))
+                               goto next;
                }
 
                perf_hpp__register_sort_field(fmt);
@@ -579,20 +587,29 @@ next:
        }
 }
 
-void perf_hpp__reset_output_field(void)
+
+static void fmt_free(struct perf_hpp_fmt *fmt)
+{
+       if (fmt->free)
+               fmt->free(fmt);
+}
+
+void perf_hpp__reset_output_field(struct perf_hpp_list *list)
 {
        struct perf_hpp_fmt *fmt, *tmp;
 
        /* reset output fields */
-       perf_hpp__for_each_format_safe(fmt, tmp) {
+       perf_hpp_list__for_each_format_safe(list, fmt, tmp) {
                list_del_init(&fmt->list);
                list_del_init(&fmt->sort_list);
+               fmt_free(fmt);
        }
 
        /* reset sort keys */
-       perf_hpp__for_each_sort_list_safe(fmt, tmp) {
+       perf_hpp_list__for_each_sort_list_safe(list, fmt, tmp) {
                list_del_init(&fmt->list);
                list_del_init(&fmt->sort_list);
+               fmt_free(fmt);
        }
 }
 
@@ -606,7 +623,7 @@ unsigned int hists__sort_list_width(struct hists *hists)
        bool first = true;
        struct perf_hpp dummy_hpp;
 
-       perf_hpp__for_each_format(fmt) {
+       hists__for_each_format(hists, fmt) {
                if (perf_hpp__should_skip(fmt, hists))
                        continue;
 
@@ -624,22 +641,39 @@ unsigned int hists__sort_list_width(struct hists *hists)
        return ret;
 }
 
-void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
+unsigned int hists__overhead_width(struct hists *hists)
 {
-       int idx;
-
-       if (perf_hpp__is_sort_entry(fmt))
-               return perf_hpp__reset_sort_width(fmt, hists);
+       struct perf_hpp_fmt *fmt;
+       int ret = 0;
+       bool first = true;
+       struct perf_hpp dummy_hpp;
 
-       for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) {
-               if (fmt == &perf_hpp__format[idx])
+       hists__for_each_format(hists, fmt) {
+               if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
                        break;
+
+               if (first)
+                       first = false;
+               else
+                       ret += 2;
+
+               ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
        }
 
-       if (idx == PERF_HPP__MAX_INDEX)
+       return ret;
+}
+
+void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+       if (perf_hpp__is_sort_entry(fmt))
+               return perf_hpp__reset_sort_width(fmt, hists);
+
+       if (perf_hpp__is_dynamic_entry(fmt))
                return;
 
-       switch (idx) {
+       BUG_ON(fmt->idx >= PERF_HPP__MAX_INDEX);
+
+       switch (fmt->idx) {
        case PERF_HPP__OVERHEAD:
        case PERF_HPP__OVERHEAD_SYS:
        case PERF_HPP__OVERHEAD_US:
@@ -667,7 +701,7 @@ void perf_hpp__set_user_width(const char *width_list_str)
        struct perf_hpp_fmt *fmt;
        const char *ptr = width_list_str;
 
-       perf_hpp__for_each_format(fmt) {
+       perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
                char *p;
 
                int len = strtol(ptr, &p, 10);
@@ -679,3 +713,71 @@ void perf_hpp__set_user_width(const char *width_list_str)
                        break;
        }
 }
+
+static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt)
+{
+       struct perf_hpp_list_node *node = NULL;
+       struct perf_hpp_fmt *fmt_copy;
+       bool found = false;
+       bool skip = perf_hpp__should_skip(fmt, hists);
+
+       list_for_each_entry(node, &hists->hpp_formats, list) {
+               if (node->level == fmt->level) {
+                       found = true;
+                       break;
+               }
+       }
+
+       if (!found) {
+               node = malloc(sizeof(*node));
+               if (node == NULL)
+                       return -1;
+
+               node->skip = skip;
+               node->level = fmt->level;
+               perf_hpp_list__init(&node->hpp);
+
+               hists->nr_hpp_node++;
+               list_add_tail(&node->list, &hists->hpp_formats);
+       }
+
+       fmt_copy = perf_hpp_fmt__dup(fmt);
+       if (fmt_copy == NULL)
+               return -1;
+
+       if (!skip)
+               node->skip = false;
+
+       list_add_tail(&fmt_copy->list, &node->hpp.fields);
+       list_add_tail(&fmt_copy->sort_list, &node->hpp.sorts);
+
+       return 0;
+}
+
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+                                 struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel;
+       struct perf_hpp_fmt *fmt;
+       struct hists *hists;
+       int ret;
+
+       if (!symbol_conf.report_hierarchy)
+               return 0;
+
+       evlist__for_each(evlist, evsel) {
+               hists = evsel__hists(evsel);
+
+               perf_hpp_list__for_each_sort_list(list, fmt) {
+                       if (perf_hpp__is_dynamic_entry(fmt) &&
+                           !perf_hpp__defined_dynamic_entry(fmt, hists))
+                               continue;
+
+                       ret = add_hierarchy_fmt(hists, fmt);
+                       if (ret < 0)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
index 387110d50b002557d99e723605407c3db990d71a..7aff5acf3265782e03254de2d8bc1dfafb56e03a 100644 (file)
@@ -165,8 +165,28 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
        return ret;
 }
 
+/*
+ * If have one single callchain root, don't bother printing
+ * its percentage (100 % in fractal mode and the same percentage
+ * than the hist in graph mode). This also avoid one level of column.
+ *
+ * However when percent-limit applied, it's possible that single callchain
+ * node have different (non-100% in fractal mode) percentage.
+ */
+static bool need_percent_display(struct rb_node *node, u64 parent_samples)
+{
+       struct callchain_node *cnode;
+
+       if (rb_next(node))
+               return true;
+
+       cnode = rb_entry(node, struct callchain_node, rb_node);
+       return callchain_cumul_hits(cnode) != parent_samples;
+}
+
 static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
-                                      u64 total_samples, int left_margin)
+                                      u64 total_samples, u64 parent_samples,
+                                      int left_margin)
 {
        struct callchain_node *cnode;
        struct callchain_list *chain;
@@ -177,13 +197,8 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
        int ret = 0;
        char bf[1024];
 
-       /*
-        * If have one single callchain root, don't bother printing
-        * its percentage (100 % in fractal mode and the same percentage
-        * than the hist in graph mode). This also avoid one level of column.
-        */
        node = rb_first(root);
-       if (node && !rb_next(node)) {
+       if (node && !need_percent_display(node, parent_samples)) {
                cnode = rb_entry(node, struct callchain_node, rb_node);
                list_for_each_entry(chain, &cnode->val, list) {
                        /*
@@ -213,9 +228,15 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
                root = &cnode->rb_root;
        }
 
+       if (callchain_param.mode == CHAIN_GRAPH_REL)
+               total_samples = parent_samples;
+
        ret += __callchain__fprintf_graph(fp, root, total_samples,
                                          1, 1, left_margin);
-       ret += fprintf(fp, "\n");
+       if (ret) {
+               /* do not add a blank line if it printed nothing */
+               ret += fprintf(fp, "\n");
+       }
 
        return ret;
 }
@@ -323,16 +344,19 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
                                            u64 total_samples, int left_margin,
                                            FILE *fp)
 {
+       u64 parent_samples = he->stat.period;
+
+       if (symbol_conf.cumulate_callchain)
+               parent_samples = he->stat_acc->period;
+
        switch (callchain_param.mode) {
        case CHAIN_GRAPH_REL:
-               return callchain__fprintf_graph(fp, &he->sorted_chain,
-                                               symbol_conf.cumulate_callchain ?
-                                               he->stat_acc->period : he->stat.period,
-                                               left_margin);
+               return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples,
+                                               parent_samples, left_margin);
                break;
        case CHAIN_GRAPH_ABS:
                return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples,
-                                               left_margin);
+                                               parent_samples, left_margin);
                break;
        case CHAIN_FLAT:
                return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples);
@@ -349,45 +373,66 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
        return 0;
 }
 
-static size_t hist_entry__callchain_fprintf(struct hist_entry *he,
-                                           struct hists *hists,
-                                           FILE *fp)
+static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
 {
-       int left_margin = 0;
-       u64 total_period = hists->stats.total_period;
+       const char *sep = symbol_conf.field_sep;
+       struct perf_hpp_fmt *fmt;
+       char *start = hpp->buf;
+       int ret;
+       bool first = true;
 
-       if (field_order == NULL && (sort_order == NULL ||
-                                   !prefixcmp(sort_order, "comm"))) {
-               struct perf_hpp_fmt *fmt;
+       if (symbol_conf.exclude_other && !he->parent)
+               return 0;
 
-               perf_hpp__for_each_format(fmt) {
-                       if (!perf_hpp__is_sort_entry(fmt))
-                               continue;
+       hists__for_each_format(he->hists, fmt) {
+               if (perf_hpp__should_skip(fmt, he->hists))
+                       continue;
 
-                       /* must be 'comm' sort entry */
-                       left_margin = fmt->width(fmt, NULL, hists_to_evsel(hists));
-                       left_margin -= thread__comm_len(he->thread);
-                       break;
-               }
+               /*
+                * If there's no field_sep, we still need
+                * to display initial '  '.
+                */
+               if (!sep || !first) {
+                       ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: "  ");
+                       advance_hpp(hpp, ret);
+               } else
+                       first = false;
+
+               if (perf_hpp__use_color() && fmt->color)
+                       ret = fmt->color(fmt, hpp, he);
+               else
+                       ret = fmt->entry(fmt, hpp, he);
+
+               ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
+               advance_hpp(hpp, ret);
        }
-       return hist_entry_callchain__fprintf(he, total_period, left_margin, fp);
+
+       return hpp->buf - start;
 }
 
-static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
+                                        struct perf_hpp *hpp,
+                                        struct hists *hists,
+                                        FILE *fp)
 {
        const char *sep = symbol_conf.field_sep;
        struct perf_hpp_fmt *fmt;
-       char *start = hpp->buf;
-       int ret;
+       struct perf_hpp_list_node *fmt_node;
+       char *buf = hpp->buf;
+       size_t size = hpp->size;
+       int ret, printed = 0;
        bool first = true;
 
        if (symbol_conf.exclude_other && !he->parent)
                return 0;
 
-       perf_hpp__for_each_format(fmt) {
-               if (perf_hpp__should_skip(fmt, he->hists))
-                       continue;
+       ret = scnprintf(hpp->buf, hpp->size, "%*s", he->depth * HIERARCHY_INDENT, "");
+       advance_hpp(hpp, ret);
 
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
                /*
                 * If there's no field_sep, we still need
                 * to display initial '  '.
@@ -403,10 +448,47 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
                else
                        ret = fmt->entry(fmt, hpp, he);
 
+               ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
                advance_hpp(hpp, ret);
        }
 
-       return hpp->buf - start;
+       if (!sep)
+               ret = scnprintf(hpp->buf, hpp->size, "%*s",
+                               (hists->nr_hpp_node - 2) * HIERARCHY_INDENT, "");
+       advance_hpp(hpp, ret);
+
+       printed += fprintf(fp, "%s", buf);
+
+       perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+               hpp->buf  = buf;
+               hpp->size = size;
+
+               /*
+                * No need to call hist_entry__snprintf_alignment() since this
+                * fmt is always the last column in the hierarchy mode.
+                */
+               if (perf_hpp__use_color() && fmt->color)
+                       fmt->color(fmt, hpp, he);
+               else
+                       fmt->entry(fmt, hpp, he);
+
+               /*
+                * dynamic entries are right-aligned but we want left-aligned
+                * in the hierarchy mode
+                */
+               printed += fprintf(fp, "%s%s", sep ?: "  ", ltrim(buf));
+       }
+       printed += putc('\n', fp);
+
+       if (symbol_conf.use_callchain && he->leaf) {
+               u64 total = hists__total_period(hists);
+
+               printed += hist_entry_callchain__fprintf(he, total, 0, fp);
+               goto out;
+       }
+
+out:
+       return printed;
 }
 
 static int hist_entry__fprintf(struct hist_entry *he, size_t size,
@@ -418,24 +500,134 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
                .buf            = bf,
                .size           = size,
        };
+       u64 total_period = hists->stats.total_period;
 
        if (size == 0 || size > bfsz)
                size = hpp.size = bfsz;
 
+       if (symbol_conf.report_hierarchy)
+               return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp);
+
        hist_entry__snprintf(he, &hpp);
 
        ret = fprintf(fp, "%s\n", bf);
 
        if (symbol_conf.use_callchain)
-               ret += hist_entry__callchain_fprintf(he, hists, fp);
+               ret += hist_entry_callchain__fprintf(he, total_period, 0, fp);
 
        return ret;
 }
 
+static int print_hierarchy_indent(const char *sep, int indent,
+                                 const char *line, FILE *fp)
+{
+       if (sep != NULL || indent < 2)
+               return 0;
+
+       return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line);
+}
+
+static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
+                                 const char *sep, FILE *fp)
+{
+       bool first_node, first_col;
+       int indent;
+       int depth;
+       unsigned width = 0;
+       unsigned header_width = 0;
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+
+       indent = hists->nr_hpp_node;
+
+       /* preserve max indent depth for column headers */
+       print_hierarchy_indent(sep, indent, spaces, fp);
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               fmt->header(fmt, hpp, hists_to_evsel(hists));
+               fprintf(fp, "%s%s", hpp->buf, sep ?: "  ");
+       }
+
+       /* combine sort headers with ' / ' */
+       first_node = true;
+       list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+               if (!first_node)
+                       header_width += fprintf(fp, " / ");
+               first_node = false;
+
+               first_col = true;
+               perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+                       if (perf_hpp__should_skip(fmt, hists))
+                               continue;
+
+                       if (!first_col)
+                               header_width += fprintf(fp, "+");
+                       first_col = false;
+
+                       fmt->header(fmt, hpp, hists_to_evsel(hists));
+                       rtrim(hpp->buf);
+
+                       header_width += fprintf(fp, "%s", ltrim(hpp->buf));
+               }
+       }
+
+       fprintf(fp, "\n# ");
+
+       /* preserve max indent depth for initial dots */
+       print_hierarchy_indent(sep, indent, dots, fp);
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+
+       first_col = true;
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               if (!first_col)
+                       fprintf(fp, "%s", sep ?: "..");
+               first_col = false;
+
+               width = fmt->width(fmt, hpp, hists_to_evsel(hists));
+               fprintf(fp, "%.*s", width, dots);
+       }
+
+       depth = 0;
+       list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+               first_col = true;
+               width = depth * HIERARCHY_INDENT;
+
+               perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+                       if (perf_hpp__should_skip(fmt, hists))
+                               continue;
+
+                       if (!first_col)
+                               width++;  /* for '+' sign between column header */
+                       first_col = false;
+
+                       width += fmt->width(fmt, hpp, hists_to_evsel(hists));
+               }
+
+               if (width > header_width)
+                       header_width = width;
+
+               depth++;
+       }
+
+       fprintf(fp, "%s%-.*s", sep ?: "  ", header_width, dots);
+
+       fprintf(fp, "\n#\n");
+
+       return 2;
+}
+
 size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
                      int max_cols, float min_pcnt, FILE *fp)
 {
        struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
        struct rb_node *nd;
        size_t ret = 0;
        unsigned int width;
@@ -449,10 +641,11 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
        bool first = true;
        size_t linesz;
        char *line = NULL;
+       unsigned indent;
 
        init_rem_hits();
 
-       perf_hpp__for_each_format(fmt)
+       hists__for_each_format(hists, fmt)
                perf_hpp__reset_width(fmt, hists);
 
        if (symbol_conf.col_width_list_str)
@@ -463,7 +656,16 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 
        fprintf(fp, "# ");
 
-       perf_hpp__for_each_format(fmt) {
+       if (symbol_conf.report_hierarchy) {
+               list_for_each_entry(fmt_node, &hists->hpp_formats, list) {
+                       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
+                               perf_hpp__reset_width(fmt, hists);
+               }
+               nr_rows += print_hierarchy_header(hists, &dummy_hpp, sep, fp);
+               goto print_entries;
+       }
+
+       hists__for_each_format(hists, fmt) {
                if (perf_hpp__should_skip(fmt, hists))
                        continue;
 
@@ -487,7 +689,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 
        fprintf(fp, "# ");
 
-       perf_hpp__for_each_format(fmt) {
+       hists__for_each_format(hists, fmt) {
                unsigned int i;
 
                if (perf_hpp__should_skip(fmt, hists))
@@ -520,7 +722,9 @@ print_entries:
                goto out;
        }
 
-       for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+       indent = hists__overhead_width(hists) + 4;
+
+       for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
                float percent;
 
@@ -536,6 +740,20 @@ print_entries:
                if (max_rows && ++nr_rows >= max_rows)
                        break;
 
+               /*
+                * If all children are filtered out or percent-limited,
+                * display "no entry >= x.xx%" message.
+                */
+               if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) {
+                       int depth = hists->nr_hpp_node + h->depth + 1;
+
+                       print_hierarchy_indent(sep, depth, spaces, fp);
+                       fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt);
+
+                       if (max_rows && ++nr_rows >= max_rows)
+                               break;
+               }
+
                if (h->ms.map == NULL && verbose > 1) {
                        __map_groups__fprintf_maps(h->thread->mg,
                                                   MAP__FUNCTION, fp);
index 5eec53a3f4ac7dbedb54776f746705acfa4fa2b4..eea25e2424e99172715c681a05ffc68b3c58e78a 100644 (file)
@@ -82,6 +82,7 @@ libperf-y += parse-branch-options.o
 libperf-y += parse-regs-options.o
 libperf-y += term.o
 libperf-y += help-unknown-cmd.o
+libperf-y += mem-events.o
 
 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
 libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
@@ -105,8 +106,17 @@ libperf-y += scripting-engines/
 
 libperf-$(CONFIG_ZLIB) += zlib.o
 libperf-$(CONFIG_LZMA) += lzma.o
+libperf-y += demangle-java.o
+
+ifdef CONFIG_JITDUMP
+libperf-$(CONFIG_LIBELF) += jitdump.o
+libperf-$(CONFIG_LIBELF) += genelf.o
+libperf-$(CONFIG_LIBELF) += genelf_debug.o
+endif
 
 CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+# avoid compiler warnings in 32-bit mode
+CFLAGS_genelf_debug.o  += -Wno-packed
 
 $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
        $(call rule_mkdir)
index 360fda01f3b0d17369254d03fe16daf24f30c698..ec164fe70718df1480b02733d8701c7ab2b74297 100644 (file)
@@ -478,10 +478,11 @@ void auxtrace_heap__pop(struct auxtrace_heap *heap)
                         heap_array[last].ordinal);
 }
 
-size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr)
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+                                      struct perf_evlist *evlist)
 {
        if (itr)
-               return itr->info_priv_size(itr);
+               return itr->info_priv_size(itr, evlist);
        return 0;
 }
 
@@ -852,7 +853,7 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
        int err;
 
        pr_debug2("Synthesizing auxtrace information\n");
-       priv_size = auxtrace_record__info_priv_size(itr);
+       priv_size = auxtrace_record__info_priv_size(itr, session->evlist);
        ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size);
        if (!ev)
                return -ENOMEM;
index b86f90db1352a6c8635e3ea5d02aa3c21bccc323..e5a8e2d4f2af4e9b717be7ce27c587f569983027 100644 (file)
@@ -293,7 +293,8 @@ struct auxtrace_record {
        int (*recording_options)(struct auxtrace_record *itr,
                                 struct perf_evlist *evlist,
                                 struct record_opts *opts);
-       size_t (*info_priv_size)(struct auxtrace_record *itr);
+       size_t (*info_priv_size)(struct auxtrace_record *itr,
+                                struct perf_evlist *evlist);
        int (*info_fill)(struct auxtrace_record *itr,
                         struct perf_session *session,
                         struct auxtrace_info_event *auxtrace_info,
@@ -429,7 +430,8 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
 int auxtrace_record__options(struct auxtrace_record *itr,
                             struct perf_evlist *evlist,
                             struct record_opts *opts);
-size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr);
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+                                      struct perf_evlist *evlist);
 int auxtrace_record__info_fill(struct auxtrace_record *itr,
                               struct perf_session *session,
                               struct auxtrace_info_event *auxtrace_info,
index 540a7efa657ea8668eeddf1b476bbcb3c0490bbc..0967ce601931685ed294827e8aef7c30c47736c6 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/bpf.h>
 #include <bpf/libbpf.h>
+#include <bpf/bpf.h>
 #include <linux/err.h>
 #include <linux/string.h>
 #include "perf.h"
@@ -16,6 +17,7 @@
 #include "llvm-utils.h"
 #include "probe-event.h"
 #include "probe-finder.h" // for MAX_PROBES
+#include "parse-events.h"
 #include "llvm-utils.h"
 
 #define DEFINE_PRINT_FN(name, level) \
@@ -108,8 +110,8 @@ void bpf__clear(void)
 }
 
 static void
-bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused,
-                    void *_priv)
+clear_prog_priv(struct bpf_program *prog __maybe_unused,
+               void *_priv)
 {
        struct bpf_prog_priv *priv = _priv;
 
@@ -337,7 +339,7 @@ config_bpf_program(struct bpf_program *prog)
        }
        pr_debug("bpf: config '%s' is ok\n", config_str);
 
-       err = bpf_program__set_private(prog, priv, bpf_prog_priv__clear);
+       err = bpf_program__set_private(prog, priv, clear_prog_priv);
        if (err) {
                pr_debug("Failed to set priv for program '%s'\n", config_str);
                goto errout;
@@ -739,6 +741,682 @@ int bpf__foreach_tev(struct bpf_object *obj,
        return 0;
 }
 
+enum bpf_map_op_type {
+       BPF_MAP_OP_SET_VALUE,
+       BPF_MAP_OP_SET_EVSEL,
+};
+
+enum bpf_map_key_type {
+       BPF_MAP_KEY_ALL,
+       BPF_MAP_KEY_RANGES,
+};
+
+struct bpf_map_op {
+       struct list_head list;
+       enum bpf_map_op_type op_type;
+       enum bpf_map_key_type key_type;
+       union {
+               struct parse_events_array array;
+       } k;
+       union {
+               u64 value;
+               struct perf_evsel *evsel;
+       } v;
+};
+
+struct bpf_map_priv {
+       struct list_head ops_list;
+};
+
+static void
+bpf_map_op__delete(struct bpf_map_op *op)
+{
+       if (!list_empty(&op->list))
+               list_del(&op->list);
+       if (op->key_type == BPF_MAP_KEY_RANGES)
+               parse_events__clear_array(&op->k.array);
+       free(op);
+}
+
+static void
+bpf_map_priv__purge(struct bpf_map_priv *priv)
+{
+       struct bpf_map_op *pos, *n;
+
+       list_for_each_entry_safe(pos, n, &priv->ops_list, list) {
+               list_del_init(&pos->list);
+               bpf_map_op__delete(pos);
+       }
+}
+
+static void
+bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
+                   void *_priv)
+{
+       struct bpf_map_priv *priv = _priv;
+
+       bpf_map_priv__purge(priv);
+       free(priv);
+}
+
+static int
+bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term)
+{
+       op->key_type = BPF_MAP_KEY_ALL;
+       if (!term)
+               return 0;
+
+       if (term->array.nr_ranges) {
+               size_t memsz = term->array.nr_ranges *
+                               sizeof(op->k.array.ranges[0]);
+
+               op->k.array.ranges = memdup(term->array.ranges, memsz);
+               if (!op->k.array.ranges) {
+                       pr_debug("No enough memory to alloc indices for map\n");
+                       return -ENOMEM;
+               }
+               op->key_type = BPF_MAP_KEY_RANGES;
+               op->k.array.nr_ranges = term->array.nr_ranges;
+       }
+       return 0;
+}
+
+static struct bpf_map_op *
+bpf_map_op__new(struct parse_events_term *term)
+{
+       struct bpf_map_op *op;
+       int err;
+
+       op = zalloc(sizeof(*op));
+       if (!op) {
+               pr_debug("Failed to alloc bpf_map_op\n");
+               return ERR_PTR(-ENOMEM);
+       }
+       INIT_LIST_HEAD(&op->list);
+
+       err = bpf_map_op_setkey(op, term);
+       if (err) {
+               free(op);
+               return ERR_PTR(err);
+       }
+       return op;
+}
+
+static int
+bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
+{
+       struct bpf_map_priv *priv;
+       const char *map_name;
+       int err;
+
+       map_name = bpf_map__get_name(map);
+       err = bpf_map__get_private(map, (void **)&priv);
+       if (err) {
+               pr_debug("Failed to get private from map %s\n", map_name);
+               return err;
+       }
+
+       if (!priv) {
+               priv = zalloc(sizeof(*priv));
+               if (!priv) {
+                       pr_debug("No enough memory to alloc map private\n");
+                       return -ENOMEM;
+               }
+               INIT_LIST_HEAD(&priv->ops_list);
+
+               if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) {
+                       free(priv);
+                       return -BPF_LOADER_ERRNO__INTERNAL;
+               }
+       }
+
+       list_add_tail(&op->list, &priv->ops_list);
+       return 0;
+}
+
+static struct bpf_map_op *
+bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term)
+{
+       struct bpf_map_op *op;
+       int err;
+
+       op = bpf_map_op__new(term);
+       if (IS_ERR(op))
+               return op;
+
+       err = bpf_map__add_op(map, op);
+       if (err) {
+               bpf_map_op__delete(op);
+               return ERR_PTR(err);
+       }
+       return op;
+}
+
+static int
+__bpf_map__config_value(struct bpf_map *map,
+                       struct parse_events_term *term)
+{
+       struct bpf_map_def def;
+       struct bpf_map_op *op;
+       const char *map_name;
+       int err;
+
+       map_name = bpf_map__get_name(map);
+
+       err = bpf_map__get_def(map, &def);
+       if (err) {
+               pr_debug("Unable to get map definition from '%s'\n",
+                        map_name);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+
+       if (def.type != BPF_MAP_TYPE_ARRAY) {
+               pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
+                        map_name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+       }
+       if (def.key_size < sizeof(unsigned int)) {
+               pr_debug("Map %s has incorrect key size\n", map_name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
+       }
+       switch (def.value_size) {
+       case 1:
+       case 2:
+       case 4:
+       case 8:
+               break;
+       default:
+               pr_debug("Map %s has incorrect value size\n", map_name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+       }
+
+       op = bpf_map__add_newop(map, term);
+       if (IS_ERR(op))
+               return PTR_ERR(op);
+       op->op_type = BPF_MAP_OP_SET_VALUE;
+       op->v.value = term->val.num;
+       return 0;
+}
+
+static int
+bpf_map__config_value(struct bpf_map *map,
+                     struct parse_events_term *term,
+                     struct perf_evlist *evlist __maybe_unused)
+{
+       if (!term->err_val) {
+               pr_debug("Config value not set\n");
+               return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+       }
+
+       if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) {
+               pr_debug("ERROR: wrong value type for 'value'\n");
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+       }
+
+       return __bpf_map__config_value(map, term);
+}
+
+static int
+__bpf_map__config_event(struct bpf_map *map,
+                       struct parse_events_term *term,
+                       struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel;
+       struct bpf_map_def def;
+       struct bpf_map_op *op;
+       const char *map_name;
+       int err;
+
+       map_name = bpf_map__get_name(map);
+       evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
+       if (!evsel) {
+               pr_debug("Event (for '%s') '%s' doesn't exist\n",
+                        map_name, term->val.str);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
+       }
+
+       err = bpf_map__get_def(map, &def);
+       if (err) {
+               pr_debug("Unable to get map definition from '%s'\n",
+                        map_name);
+               return err;
+       }
+
+       /*
+        * No need to check key_size and value_size:
+        * kernel has already checked them.
+        */
+       if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+               pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+                        map_name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+       }
+
+       op = bpf_map__add_newop(map, term);
+       if (IS_ERR(op))
+               return PTR_ERR(op);
+       op->op_type = BPF_MAP_OP_SET_EVSEL;
+       op->v.evsel = evsel;
+       return 0;
+}
+
+static int
+bpf_map__config_event(struct bpf_map *map,
+                     struct parse_events_term *term,
+                     struct perf_evlist *evlist)
+{
+       if (!term->err_val) {
+               pr_debug("Config value not set\n");
+               return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+       }
+
+       if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) {
+               pr_debug("ERROR: wrong value type for 'event'\n");
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+       }
+
+       return __bpf_map__config_event(map, term, evlist);
+}
+
+struct bpf_obj_config__map_func {
+       const char *config_opt;
+       int (*config_func)(struct bpf_map *, struct parse_events_term *,
+                          struct perf_evlist *);
+};
+
+struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = {
+       {"value", bpf_map__config_value},
+       {"event", bpf_map__config_event},
+};
+
+static int
+config_map_indices_range_check(struct parse_events_term *term,
+                              struct bpf_map *map,
+                              const char *map_name)
+{
+       struct parse_events_array *array = &term->array;
+       struct bpf_map_def def;
+       unsigned int i;
+       int err;
+
+       if (!array->nr_ranges)
+               return 0;
+       if (!array->ranges) {
+               pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n",
+                        map_name, (int)array->nr_ranges);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+
+       err = bpf_map__get_def(map, &def);
+       if (err) {
+               pr_debug("ERROR: Unable to get map definition from '%s'\n",
+                        map_name);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+
+       for (i = 0; i < array->nr_ranges; i++) {
+               unsigned int start = array->ranges[i].start;
+               size_t length = array->ranges[i].length;
+               unsigned int idx = start + length - 1;
+
+               if (idx >= def.max_entries) {
+                       pr_debug("ERROR: index %d too large\n", idx);
+                       return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
+               }
+       }
+       return 0;
+}
+
+static int
+bpf__obj_config_map(struct bpf_object *obj,
+                   struct parse_events_term *term,
+                   struct perf_evlist *evlist,
+                   int *key_scan_pos)
+{
+       /* key is "map:<mapname>.<config opt>" */
+       char *map_name = strdup(term->config + sizeof("map:") - 1);
+       struct bpf_map *map;
+       int err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+       char *map_opt;
+       size_t i;
+
+       if (!map_name)
+               return -ENOMEM;
+
+       map_opt = strchr(map_name, '.');
+       if (!map_opt) {
+               pr_debug("ERROR: Invalid map config: %s\n", map_name);
+               goto out;
+       }
+
+       *map_opt++ = '\0';
+       if (*map_opt == '\0') {
+               pr_debug("ERROR: Invalid map option: %s\n", term->config);
+               goto out;
+       }
+
+       map = bpf_object__get_map_by_name(obj, map_name);
+       if (!map) {
+               pr_debug("ERROR: Map %s doesn't exist\n", map_name);
+               err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
+               goto out;
+       }
+
+       *key_scan_pos += strlen(map_opt);
+       err = config_map_indices_range_check(term, map, map_name);
+       if (err)
+               goto out;
+       *key_scan_pos -= strlen(map_opt);
+
+       for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) {
+               struct bpf_obj_config__map_func *func =
+                               &bpf_obj_config__map_funcs[i];
+
+               if (strcmp(map_opt, func->config_opt) == 0) {
+                       err = func->config_func(map, term, evlist);
+                       goto out;
+               }
+       }
+
+       pr_debug("ERROR: Invalid map config option '%s'\n", map_opt);
+       err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
+out:
+       free(map_name);
+       if (!err)
+               key_scan_pos += strlen(map_opt);
+       return err;
+}
+
+int bpf__config_obj(struct bpf_object *obj,
+                   struct parse_events_term *term,
+                   struct perf_evlist *evlist,
+                   int *error_pos)
+{
+       int key_scan_pos = 0;
+       int err;
+
+       if (!obj || !term || !term->config)
+               return -EINVAL;
+
+       if (!prefixcmp(term->config, "map:")) {
+               key_scan_pos = sizeof("map:") - 1;
+               err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos);
+               goto out;
+       }
+       err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+out:
+       if (error_pos)
+               *error_pos = key_scan_pos;
+       return err;
+
+}
+
+typedef int (*map_config_func_t)(const char *name, int map_fd,
+                                struct bpf_map_def *pdef,
+                                struct bpf_map_op *op,
+                                void *pkey, void *arg);
+
+static int
+foreach_key_array_all(map_config_func_t func,
+                     void *arg, const char *name,
+                     int map_fd, struct bpf_map_def *pdef,
+                     struct bpf_map_op *op)
+{
+       unsigned int i;
+       int err;
+
+       for (i = 0; i < pdef->max_entries; i++) {
+               err = func(name, map_fd, pdef, op, &i, arg);
+               if (err) {
+                       pr_debug("ERROR: failed to insert value to %s[%u]\n",
+                                name, i);
+                       return err;
+               }
+       }
+       return 0;
+}
+
+static int
+foreach_key_array_ranges(map_config_func_t func, void *arg,
+                        const char *name, int map_fd,
+                        struct bpf_map_def *pdef,
+                        struct bpf_map_op *op)
+{
+       unsigned int i, j;
+       int err;
+
+       for (i = 0; i < op->k.array.nr_ranges; i++) {
+               unsigned int start = op->k.array.ranges[i].start;
+               size_t length = op->k.array.ranges[i].length;
+
+               for (j = 0; j < length; j++) {
+                       unsigned int idx = start + j;
+
+                       err = func(name, map_fd, pdef, op, &idx, arg);
+                       if (err) {
+                               pr_debug("ERROR: failed to insert value to %s[%u]\n",
+                                        name, idx);
+                               return err;
+                       }
+               }
+       }
+       return 0;
+}
+
+static int
+bpf_map_config_foreach_key(struct bpf_map *map,
+                          map_config_func_t func,
+                          void *arg)
+{
+       int err, map_fd;
+       const char *name;
+       struct bpf_map_op *op;
+       struct bpf_map_def def;
+       struct bpf_map_priv *priv;
+
+       name = bpf_map__get_name(map);
+
+       err = bpf_map__get_private(map, (void **)&priv);
+       if (err) {
+               pr_debug("ERROR: failed to get private from map %s\n", name);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+       if (!priv || list_empty(&priv->ops_list)) {
+               pr_debug("INFO: nothing to config for map %s\n", name);
+               return 0;
+       }
+
+       err = bpf_map__get_def(map, &def);
+       if (err) {
+               pr_debug("ERROR: failed to get definition from map %s\n", name);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+       map_fd = bpf_map__get_fd(map);
+       if (map_fd < 0) {
+               pr_debug("ERROR: failed to get fd from map %s\n", name);
+               return map_fd;
+       }
+
+       list_for_each_entry(op, &priv->ops_list, list) {
+               switch (def.type) {
+               case BPF_MAP_TYPE_ARRAY:
+               case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+                       switch (op->key_type) {
+                       case BPF_MAP_KEY_ALL:
+                               err = foreach_key_array_all(func, arg, name,
+                                                           map_fd, &def, op);
+                               break;
+                       case BPF_MAP_KEY_RANGES:
+                               err = foreach_key_array_ranges(func, arg, name,
+                                                              map_fd, &def,
+                                                              op);
+                               break;
+                       default:
+                               pr_debug("ERROR: keytype for map '%s' invalid\n",
+                                        name);
+                               return -BPF_LOADER_ERRNO__INTERNAL;
+                       }
+                       if (err)
+                               return err;
+                       break;
+               default:
+                       pr_debug("ERROR: type of '%s' incorrect\n", name);
+                       return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+               }
+       }
+
+       return 0;
+}
+
+static int
+apply_config_value_for_key(int map_fd, void *pkey,
+                          size_t val_size, u64 val)
+{
+       int err = 0;
+
+       switch (val_size) {
+       case 1: {
+               u8 _val = (u8)(val);
+               err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+               break;
+       }
+       case 2: {
+               u16 _val = (u16)(val);
+               err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+               break;
+       }
+       case 4: {
+               u32 _val = (u32)(val);
+               err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+               break;
+       }
+       case 8: {
+               err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY);
+               break;
+       }
+       default:
+               pr_debug("ERROR: invalid value size\n");
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+       }
+       if (err && errno)
+               err = -errno;
+       return err;
+}
+
+static int
+apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
+                          struct perf_evsel *evsel)
+{
+       struct xyarray *xy = evsel->fd;
+       struct perf_event_attr *attr;
+       unsigned int key, events;
+       bool check_pass = false;
+       int *evt_fd;
+       int err;
+
+       if (!xy) {
+               pr_debug("ERROR: evsel not ready for map %s\n", name);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+
+       if (xy->row_size / xy->entry_size != 1) {
+               pr_debug("ERROR: Dimension of target event is incorrect for map %s\n",
+                        name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM;
+       }
+
+       attr = &evsel->attr;
+       if (attr->inherit) {
+               pr_debug("ERROR: Can't put inherit event into map %s\n", name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
+       }
+
+       if (perf_evsel__is_bpf_output(evsel))
+               check_pass = true;
+       if (attr->type == PERF_TYPE_RAW)
+               check_pass = true;
+       if (attr->type == PERF_TYPE_HARDWARE)
+               check_pass = true;
+       if (!check_pass) {
+               pr_debug("ERROR: Event type is wrong for map %s\n", name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE;
+       }
+
+       events = xy->entries / (xy->row_size / xy->entry_size);
+       key = *((unsigned int *)pkey);
+       if (key >= events) {
+               pr_debug("ERROR: there is no event %d for map %s\n",
+                        key, name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE;
+       }
+       evt_fd = xyarray__entry(xy, key, 0);
+       err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY);
+       if (err && errno)
+               err = -errno;
+       return err;
+}
+
+static int
+apply_obj_config_map_for_key(const char *name, int map_fd,
+                            struct bpf_map_def *pdef __maybe_unused,
+                            struct bpf_map_op *op,
+                            void *pkey, void *arg __maybe_unused)
+{
+       int err;
+
+       switch (op->op_type) {
+       case BPF_MAP_OP_SET_VALUE:
+               err = apply_config_value_for_key(map_fd, pkey,
+                                                pdef->value_size,
+                                                op->v.value);
+               break;
+       case BPF_MAP_OP_SET_EVSEL:
+               err = apply_config_evsel_for_key(name, map_fd, pkey,
+                                                op->v.evsel);
+               break;
+       default:
+               pr_debug("ERROR: unknown value type for '%s'\n", name);
+               err = -BPF_LOADER_ERRNO__INTERNAL;
+       }
+       return err;
+}
+
+static int
+apply_obj_config_map(struct bpf_map *map)
+{
+       return bpf_map_config_foreach_key(map,
+                                         apply_obj_config_map_for_key,
+                                         NULL);
+}
+
+static int
+apply_obj_config_object(struct bpf_object *obj)
+{
+       struct bpf_map *map;
+       int err;
+
+       bpf_map__for_each(map, obj) {
+               err = apply_obj_config_map(map);
+               if (err)
+                       return err;
+       }
+       return 0;
+}
+
+int bpf__apply_obj_config(void)
+{
+       struct bpf_object *obj, *tmp;
+       int err;
+
+       bpf_object__for_each_safe(obj, tmp) {
+               err = apply_obj_config_object(obj);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
 #define ERRNO_OFFSET(e)                ((e) - __BPF_LOADER_ERRNO__START)
 #define ERRCODE_OFFSET(c)      ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
 #define NR_ERRNO       (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
@@ -753,6 +1431,20 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = {
        [ERRCODE_OFFSET(PROLOGUE)]      = "Failed to generate prologue",
        [ERRCODE_OFFSET(PROLOGUE2BIG)]  = "Prologue too big for program",
        [ERRCODE_OFFSET(PROLOGUEOOB)]   = "Offset out of bound for prologue",
+       [ERRCODE_OFFSET(OBJCONF_OPT)]   = "Invalid object config option",
+       [ERRCODE_OFFSET(OBJCONF_CONF)]  = "Config value not set (missing '=')",
+       [ERRCODE_OFFSET(OBJCONF_MAP_OPT)]       = "Invalid object map config option",
+       [ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)]  = "Target map doesn't exist",
+       [ERRCODE_OFFSET(OBJCONF_MAP_VALUE)]     = "Incorrect value type for map",
+       [ERRCODE_OFFSET(OBJCONF_MAP_TYPE)]      = "Incorrect map type",
+       [ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)]   = "Incorrect map key size",
+       [ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size",
+       [ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)]     = "Event not found for map setting",
+       [ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)]   = "Invalid map size for event setting",
+       [ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)]    = "Event dimension too large",
+       [ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)]    = "Doesn't support inherit event",
+       [ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)]   = "Wrong event type for map",
+       [ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)]   = "Index too large",
 };
 
 static int
@@ -872,3 +1564,29 @@ int bpf__strerror_load(struct bpf_object *obj,
        bpf__strerror_end(buf, size);
        return 0;
 }
+
+int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+                            struct parse_events_term *term __maybe_unused,
+                            struct perf_evlist *evlist __maybe_unused,
+                            int *error_pos __maybe_unused, int err,
+                            char *buf, size_t size)
+{
+       bpf__strerror_head(err, buf, size);
+       bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,
+                           "Can't use this config term with this map type");
+       bpf__strerror_end(buf, size);
+       return 0;
+}
+
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
+{
+       bpf__strerror_head(err, buf, size);
+       bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,
+                           "Cannot set event to BPF map in multi-thread tracing");
+       bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,
+                           "%s (Hint: use -i to turn off inherit)", emsg);
+       bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,
+                           "Can only put raw, hardware and BPF output event into a BPF map");
+       bpf__strerror_end(buf, size);
+       return 0;
+}
index 6fdc0457e2b66ea3fedd8e26c4a0e8f6a4bcc6e2..be4311944e3daa2abc87cdd54be72ed7bce70682 100644 (file)
@@ -10,6 +10,7 @@
 #include <string.h>
 #include <bpf/libbpf.h>
 #include "probe-event.h"
+#include "evlist.h"
 #include "debug.h"
 
 enum bpf_loader_errno {
@@ -24,10 +25,25 @@ enum bpf_loader_errno {
        BPF_LOADER_ERRNO__PROLOGUE,     /* Failed to generate prologue */
        BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */
        BPF_LOADER_ERRNO__PROLOGUEOOB,  /* Offset out of bound for prologue */
+       BPF_LOADER_ERRNO__OBJCONF_OPT,  /* Invalid object config option */
+       BPF_LOADER_ERRNO__OBJCONF_CONF, /* Config value not set (lost '=')) */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_OPT,      /* Invalid object map config option */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST, /* Target map not exist */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE,    /* Incorrect value type for map */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,     /* Incorrect map type */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE,  /* Incorrect map key size */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT,    /* Event not found for map setting */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE,  /* Invalid map size for event setting */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,   /* Event dimension too large */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,   /* Doesn't support inherit event */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,  /* Wrong event type for map */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG,  /* Index too large */
        __BPF_LOADER_ERRNO__END,
 };
 
 struct bpf_object;
+struct parse_events_term;
 #define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
 
 typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev,
@@ -53,6 +69,16 @@ int bpf__strerror_load(struct bpf_object *obj, int err,
                       char *buf, size_t size);
 int bpf__foreach_tev(struct bpf_object *obj,
                     bpf_prog_iter_callback_t func, void *arg);
+
+int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term,
+                   struct perf_evlist *evlist, int *error_pos);
+int bpf__strerror_config_obj(struct bpf_object *obj,
+                            struct parse_events_term *term,
+                            struct perf_evlist *evlist,
+                            int *error_pos, int err, char *buf,
+                            size_t size);
+int bpf__apply_obj_config(void);
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
 #else
 static inline struct bpf_object *
 bpf__prepare_load(const char *filename __maybe_unused,
@@ -83,6 +109,21 @@ bpf__foreach_tev(struct bpf_object *obj __maybe_unused,
        return 0;
 }
 
+static inline int
+bpf__config_obj(struct bpf_object *obj __maybe_unused,
+               struct parse_events_term *term __maybe_unused,
+               struct perf_evlist *evlist __maybe_unused,
+               int *error_pos __maybe_unused)
+{
+       return 0;
+}
+
+static inline int
+bpf__apply_obj_config(void)
+{
+       return 0;
+}
+
 static inline int
 __bpf_strerror(char *buf, size_t size)
 {
@@ -118,5 +159,23 @@ static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused,
 {
        return __bpf_strerror(buf, size);
 }
+
+static inline int
+bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+                        struct parse_events_term *term __maybe_unused,
+                        struct perf_evlist *evlist __maybe_unused,
+                        int *error_pos __maybe_unused,
+                        int err __maybe_unused,
+                        char *buf, size_t size)
+{
+       return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_apply_obj_config(int err __maybe_unused,
+                              char *buf, size_t size)
+{
+       return __bpf_strerror(buf, size);
+}
 #endif
 #endif
index 6a7e273a514a642b30a477c3119696dc7fa09975..f1479eeef7daf9ae2c386abcef7d079a0370c836 100644 (file)
@@ -166,6 +166,50 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size)
        return build_id__filename(build_id_hex, bf, size);
 }
 
+bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
+{
+       char *id_name, *ch;
+       struct stat sb;
+
+       id_name = dso__build_id_filename(dso, bf, size);
+       if (!id_name)
+               goto err;
+       if (access(id_name, F_OK))
+               goto err;
+       if (lstat(id_name, &sb) == -1)
+               goto err;
+       if ((size_t)sb.st_size > size - 1)
+               goto err;
+       if (readlink(id_name, bf, size - 1) < 0)
+               goto err;
+
+       bf[sb.st_size] = '\0';
+
+       /*
+        * link should be:
+        * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92
+        */
+       ch = strrchr(bf, '/');
+       if (!ch)
+               goto err;
+       if (ch - 3 < bf)
+               goto err;
+
+       return strncmp(".ko", ch - 3, 3) == 0;
+err:
+       /*
+        * If dso__build_id_filename work, get id_name again,
+        * because id_name points to bf and is broken.
+        */
+       if (id_name)
+               id_name = dso__build_id_filename(dso, bf, size);
+       pr_err("Invalid build id: %s\n", id_name ? :
+                                        dso->long_name ? :
+                                        dso->short_name ? :
+                                        "[unknown]");
+       return false;
+}
+
 #define dsos__for_each_with_build_id(pos, head)        \
        list_for_each_entry(pos, head, node)    \
                if (!pos->has_build_id)         \
@@ -211,6 +255,7 @@ static int machine__write_buildid_table(struct machine *machine, int fd)
        dsos__for_each_with_build_id(pos, &machine->dsos.head) {
                const char *name;
                size_t name_len;
+               bool in_kernel = false;
 
                if (!pos->hit)
                        continue;
@@ -227,8 +272,11 @@ static int machine__write_buildid_table(struct machine *machine, int fd)
                        name_len = pos->long_name_len + 1;
                }
 
+               in_kernel = pos->kernel ||
+                               is_kernel_module(name,
+                                       PERF_RECORD_MISC_CPUMODE_UNKNOWN);
                err = write_buildid(name, name_len, pos->build_id, machine->pid,
-                                   pos->kernel ? kmisc : umisc, fd);
+                                   in_kernel ? kmisc : umisc, fd);
                if (err)
                        break;
        }
index 27a14a8a945beb8eec9cc389ca7d6545b44a2ac4..64af3e20610d7718ecacfad3810fc4bff4ee882d 100644 (file)
@@ -16,6 +16,7 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
 int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
 
 char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
+bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size);
 
 int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
                           struct perf_sample *sample, struct perf_evsel *evsel,
index 07b5d63947b11ec5f70029c6e41266e56b2bd8aa..3ca453f0c51f6f5b269f84270e78132e45f9768c 100644 (file)
@@ -23,6 +23,8 @@
 #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
 #define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
 
+extern const char *config_exclusive_filename;
+
 typedef int (*config_fn_t)(const char *, const char *, void *);
 extern int perf_default_config(const char *, const char *, void *);
 extern int perf_config(config_fn_t fn, void *);
@@ -31,6 +33,7 @@ extern u64 perf_config_u64(const char *, const char *);
 extern int perf_config_bool(const char *, const char *);
 extern int config_error_nonbool(const char *);
 extern const char *perf_config_dirname(const char *, const char *);
+extern const char *perf_etc_perfconfig(void);
 
 char *alias_lookup(const char *alias);
 int split_cmdline(char *cmdline, const char ***argv);
index 53c43eb9489e4ba4a4e56a795bc05c2b85fb4cbc..24b4bd0d77545e7bb9f95e83222eb103c92f5151 100644 (file)
@@ -416,7 +416,7 @@ create_child(struct callchain_node *parent, bool inherit_children)
 /*
  * Fill the node with callchain values
  */
-static void
+static int
 fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 {
        struct callchain_cursor_node *cursor_node;
@@ -433,7 +433,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
                call = zalloc(sizeof(*call));
                if (!call) {
                        perror("not enough memory for the code path tree");
-                       return;
+                       return -1;
                }
                call->ip = cursor_node->ip;
                call->ms.sym = cursor_node->sym;
@@ -443,6 +443,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
                callchain_cursor_advance(cursor);
                cursor_node = callchain_cursor_current(cursor);
        }
+       return 0;
 }
 
 static struct callchain_node *
@@ -453,7 +454,19 @@ add_child(struct callchain_node *parent,
        struct callchain_node *new;
 
        new = create_child(parent, false);
-       fill_node(new, cursor);
+       if (new == NULL)
+               return NULL;
+
+       if (fill_node(new, cursor) < 0) {
+               struct callchain_list *call, *tmp;
+
+               list_for_each_entry_safe(call, tmp, &new->val, list) {
+                       list_del(&call->list);
+                       free(call);
+               }
+               free(new);
+               return NULL;
+       }
 
        new->children_hit = 0;
        new->hit = period;
@@ -462,16 +475,32 @@ add_child(struct callchain_node *parent,
        return new;
 }
 
-static s64 match_chain(struct callchain_cursor_node *node,
-                     struct callchain_list *cnode)
+enum match_result {
+       MATCH_ERROR  = -1,
+       MATCH_EQ,
+       MATCH_LT,
+       MATCH_GT,
+};
+
+static enum match_result match_chain(struct callchain_cursor_node *node,
+                                    struct callchain_list *cnode)
 {
        struct symbol *sym = node->sym;
+       u64 left, right;
 
        if (cnode->ms.sym && sym &&
-           callchain_param.key == CCKEY_FUNCTION)
-               return cnode->ms.sym->start - sym->start;
-       else
-               return cnode->ip - node->ip;
+           callchain_param.key == CCKEY_FUNCTION) {
+               left = cnode->ms.sym->start;
+               right = sym->start;
+       } else {
+               left = cnode->ip;
+               right = node->ip;
+       }
+
+       if (left == right)
+               return MATCH_EQ;
+
+       return left > right ? MATCH_GT : MATCH_LT;
 }
 
 /*
@@ -479,7 +508,7 @@ static s64 match_chain(struct callchain_cursor_node *node,
  * give a part of its callchain to the created child.
  * Then create another child to host the given callchain of new branch
  */
-static void
+static int
 split_add_child(struct callchain_node *parent,
                struct callchain_cursor *cursor,
                struct callchain_list *to_split,
@@ -491,6 +520,8 @@ split_add_child(struct callchain_node *parent,
 
        /* split */
        new = create_child(parent, true);
+       if (new == NULL)
+               return -1;
 
        /* split the callchain and move a part to the new child */
        old_tail = parent->val.prev;
@@ -524,6 +555,8 @@ split_add_child(struct callchain_node *parent,
 
                node = callchain_cursor_current(cursor);
                new = add_child(parent, cursor, period);
+               if (new == NULL)
+                       return -1;
 
                /*
                 * This is second child since we moved parent's children
@@ -534,7 +567,7 @@ split_add_child(struct callchain_node *parent,
                cnode = list_first_entry(&first->val, struct callchain_list,
                                         list);
 
-               if (match_chain(node, cnode) < 0)
+               if (match_chain(node, cnode) == MATCH_LT)
                        pp = &p->rb_left;
                else
                        pp = &p->rb_right;
@@ -545,14 +578,15 @@ split_add_child(struct callchain_node *parent,
                parent->hit = period;
                parent->count = 1;
        }
+       return 0;
 }
 
-static int
+static enum match_result
 append_chain(struct callchain_node *root,
             struct callchain_cursor *cursor,
             u64 period);
 
-static void
+static int
 append_chain_children(struct callchain_node *root,
                      struct callchain_cursor *cursor,
                      u64 period)
@@ -564,36 +598,42 @@ append_chain_children(struct callchain_node *root,
 
        node = callchain_cursor_current(cursor);
        if (!node)
-               return;
+               return -1;
 
        /* lookup in childrens */
        while (*p) {
-               s64 ret;
+               enum match_result ret;
 
                parent = *p;
                rnode = rb_entry(parent, struct callchain_node, rb_node_in);
 
                /* If at least first entry matches, rely to children */
                ret = append_chain(rnode, cursor, period);
-               if (ret == 0)
+               if (ret == MATCH_EQ)
                        goto inc_children_hit;
+               if (ret == MATCH_ERROR)
+                       return -1;
 
-               if (ret < 0)
+               if (ret == MATCH_LT)
                        p = &parent->rb_left;
                else
                        p = &parent->rb_right;
        }
        /* nothing in children, add to the current node */
        rnode = add_child(root, cursor, period);
+       if (rnode == NULL)
+               return -1;
+
        rb_link_node(&rnode->rb_node_in, parent, p);
        rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
 
 inc_children_hit:
        root->children_hit += period;
        root->children_count++;
+       return 0;
 }
 
-static int
+static enum match_result
 append_chain(struct callchain_node *root,
             struct callchain_cursor *cursor,
             u64 period)
@@ -602,7 +642,7 @@ append_chain(struct callchain_node *root,
        u64 start = cursor->pos;
        bool found = false;
        u64 matches;
-       int cmp = 0;
+       enum match_result cmp = MATCH_ERROR;
 
        /*
         * Lookup in the current node
@@ -618,7 +658,7 @@ append_chain(struct callchain_node *root,
                        break;
 
                cmp = match_chain(node, cnode);
-               if (cmp)
+               if (cmp != MATCH_EQ)
                        break;
 
                found = true;
@@ -628,7 +668,7 @@ append_chain(struct callchain_node *root,
 
        /* matches not, relay no the parent */
        if (!found) {
-               WARN_ONCE(!cmp, "Chain comparison error\n");
+               WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n");
                return cmp;
        }
 
@@ -636,21 +676,25 @@ append_chain(struct callchain_node *root,
 
        /* we match only a part of the node. Split it and add the new chain */
        if (matches < root->val_nr) {
-               split_add_child(root, cursor, cnode, start, matches, period);
-               return 0;
+               if (split_add_child(root, cursor, cnode, start, matches,
+                                   period) < 0)
+                       return MATCH_ERROR;
+
+               return MATCH_EQ;
        }
 
        /* we match 100% of the path, increment the hit */
        if (matches == root->val_nr && cursor->pos == cursor->nr) {
                root->hit += period;
                root->count++;
-               return 0;
+               return MATCH_EQ;
        }
 
        /* We match the node and still have a part remaining */
-       append_chain_children(root, cursor, period);
+       if (append_chain_children(root, cursor, period) < 0)
+               return MATCH_ERROR;
 
-       return 0;
+       return MATCH_EQ;
 }
 
 int callchain_append(struct callchain_root *root,
@@ -662,7 +706,8 @@ int callchain_append(struct callchain_root *root,
 
        callchain_cursor_commit(cursor);
 
-       append_chain_children(&root->node, cursor, period);
+       if (append_chain_children(&root->node, cursor, period) < 0)
+               return -1;
 
        if (cursor->nr > root->max_depth)
                root->max_depth = cursor->nr;
@@ -690,7 +735,8 @@ merge_chain_branch(struct callchain_cursor *cursor,
 
        if (src->hit) {
                callchain_cursor_commit(cursor);
-               append_chain_children(dst, cursor, src->hit);
+               if (append_chain_children(dst, cursor, src->hit) < 0)
+                       return -1;
        }
 
        n = rb_first(&src->rb_root_in);
index e5fb88bab9e1c416a3c53fe5036f8eb57f64a55d..43e84aa27e4a6489eafdebc3636d8bfb33334e3f 100644 (file)
@@ -32,14 +32,15 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
        return 0;
 }
 
-int perf_color_default_config(const char *var, const char *value, void *cb)
+int perf_color_default_config(const char *var, const char *value,
+                             void *cb __maybe_unused)
 {
        if (!strcmp(var, "color.ui")) {
                perf_use_color_default = perf_config_colorbool(var, value, -1);
                return 0;
        }
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 static int __color_vsnprintf(char *bf, size_t size, const char *color,
index d3e12e30e1d520f8073d1f01d17e1eaf4ac30254..4e727635476eadf5b105a7be4620f86a4bf46499 100644 (file)
@@ -26,7 +26,7 @@ static const char *config_file_name;
 static int config_linenr;
 static int config_file_eof;
 
-static const char *config_exclusive_filename;
+const char *config_exclusive_filename;
 
 static int get_next_char(void)
 {
@@ -434,7 +434,7 @@ static int perf_config_from_file(config_fn_t fn, const char *filename, void *dat
        return ret;
 }
 
-static const char *perf_etc_perfconfig(void)
+const char *perf_etc_perfconfig(void)
 {
        static const char *system_wide;
        if (!system_wide)
index fa935093a599429011fa214c24645fd0230e3b3a..9bcf2bed3a6d1b7369ee4deee7f38e9c4abab06d 100644 (file)
@@ -8,6 +8,10 @@
 #include <linux/bitmap.h>
 #include "asm/bug.h"
 
+static int max_cpu_num;
+static int max_node_num;
+static int *cpunode_map;
+
 static struct cpu_map *cpu_map__default_new(void)
 {
        struct cpu_map *cpus;
@@ -486,6 +490,32 @@ out:
                pr_err("Failed to read max nodes, using default of %d\n", max_node_num);
 }
 
+int cpu__max_node(void)
+{
+       if (unlikely(!max_node_num))
+               set_max_node_num();
+
+       return max_node_num;
+}
+
+int cpu__max_cpu(void)
+{
+       if (unlikely(!max_cpu_num))
+               set_max_cpu_num();
+
+       return max_cpu_num;
+}
+
+int cpu__get_node(int cpu)
+{
+       if (unlikely(cpunode_map == NULL)) {
+               pr_debug("cpu_map not initialized\n");
+               return -1;
+       }
+
+       return cpunode_map[cpu];
+}
+
 static int init_cpunode_map(void)
 {
        int i;
index 71c41b9efabb3b38dd17a7374413985dfd944f77..81a2562aaa2b02261b88c960997238dc9e0925ab 100644 (file)
@@ -57,37 +57,11 @@ static inline bool cpu_map__empty(const struct cpu_map *map)
        return map ? map->map[0] == -1 : true;
 }
 
-int max_cpu_num;
-int max_node_num;
-int *cpunode_map;
-
 int cpu__setup_cpunode_map(void);
 
-static inline int cpu__max_node(void)
-{
-       if (unlikely(!max_node_num))
-               pr_debug("cpu_map not initialized\n");
-
-       return max_node_num;
-}
-
-static inline int cpu__max_cpu(void)
-{
-       if (unlikely(!max_cpu_num))
-               pr_debug("cpu_map not initialized\n");
-
-       return max_cpu_num;
-}
-
-static inline int cpu__get_node(int cpu)
-{
-       if (unlikely(cpunode_map == NULL)) {
-               pr_debug("cpu_map not initialized\n");
-               return -1;
-       }
-
-       return cpunode_map[cpu];
-}
+int cpu__max_node(void);
+int cpu__max_cpu(void);
+int cpu__get_node(int cpu);
 
 int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
                       int (*f)(struct cpu_map *map, int cpu, void *data),
index aada3ac5e891f85be1f4d25791d4ee17dd859077..d4a5a21c2a7e2e47596444665b5f5828874da337 100644 (file)
@@ -31,9 +31,18 @@ unsigned char sane_ctype[256] = {
 };
 
 const char *graph_line =
+       "_____________________________________________________________________"
        "_____________________________________________________________________"
        "_____________________________________________________________________";
 const char *graph_dotted_line =
        "---------------------------------------------------------------------"
        "---------------------------------------------------------------------"
        "---------------------------------------------------------------------";
+const char *spaces =
+       "                                                                     "
+       "                                                                     "
+       "                                                                     ";
+const char *dots =
+       "....................................................................."
+       "....................................................................."
+       ".....................................................................";
index 34cd1e4039d35e05be0460ff0a259e7d8b2bb80a..811af89ce0bb8f37b99898dc8b2a2d5f6e183d26 100644 (file)
@@ -352,6 +352,84 @@ static int add_tracepoint_values(struct ctf_writer *cw,
        return ret;
 }
 
+static int
+add_bpf_output_values(struct bt_ctf_event_class *event_class,
+                     struct bt_ctf_event *event,
+                     struct perf_sample *sample)
+{
+       struct bt_ctf_field_type *len_type, *seq_type;
+       struct bt_ctf_field *len_field, *seq_field;
+       unsigned int raw_size = sample->raw_size;
+       unsigned int nr_elements = raw_size / sizeof(u32);
+       unsigned int i;
+       int ret;
+
+       if (nr_elements * sizeof(u32) != raw_size)
+               pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n",
+                          raw_size, nr_elements * sizeof(u32) - raw_size);
+
+       len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
+       len_field = bt_ctf_field_create(len_type);
+       if (!len_field) {
+               pr_err("failed to create 'raw_len' for bpf output event\n");
+               ret = -1;
+               goto put_len_type;
+       }
+
+       ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+       if (ret) {
+               pr_err("failed to set field value for raw_len\n");
+               goto put_len_field;
+       }
+       ret = bt_ctf_event_set_payload(event, "raw_len", len_field);
+       if (ret) {
+               pr_err("failed to set payload to raw_len\n");
+               goto put_len_field;
+       }
+
+       seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data");
+       seq_field = bt_ctf_field_create(seq_type);
+       if (!seq_field) {
+               pr_err("failed to create 'raw_data' for bpf output event\n");
+               ret = -1;
+               goto put_seq_type;
+       }
+
+       ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+       if (ret) {
+               pr_err("failed to set length of 'raw_data'\n");
+               goto put_seq_field;
+       }
+
+       for (i = 0; i < nr_elements; i++) {
+               struct bt_ctf_field *elem_field =
+                       bt_ctf_field_sequence_get_field(seq_field, i);
+
+               ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+                               ((u32 *)(sample->raw_data))[i]);
+
+               bt_ctf_field_put(elem_field);
+               if (ret) {
+                       pr_err("failed to set raw_data[%d]\n", i);
+                       goto put_seq_field;
+               }
+       }
+
+       ret = bt_ctf_event_set_payload(event, "raw_data", seq_field);
+       if (ret)
+               pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+       bt_ctf_field_put(seq_field);
+put_seq_type:
+       bt_ctf_field_type_put(seq_type);
+put_len_field:
+       bt_ctf_field_put(len_field);
+put_len_type:
+       bt_ctf_field_type_put(len_type);
+       return ret;
+}
+
 static int add_generic_values(struct ctf_writer *cw,
                              struct bt_ctf_event *event,
                              struct perf_evsel *evsel,
@@ -597,6 +675,12 @@ static int process_sample_event(struct perf_tool *tool,
                        return -1;
        }
 
+       if (perf_evsel__is_bpf_output(evsel)) {
+               ret = add_bpf_output_values(event_class, event, sample);
+               if (ret)
+                       return -1;
+       }
+
        cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
        if (cs) {
                if (is_flush_needed(cs))
@@ -744,6 +828,25 @@ static int add_tracepoint_types(struct ctf_writer *cw,
        return ret;
 }
 
+static int add_bpf_output_types(struct ctf_writer *cw,
+                               struct bt_ctf_event_class *class)
+{
+       struct bt_ctf_field_type *len_type = cw->data.u32;
+       struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex;
+       struct bt_ctf_field_type *seq_type;
+       int ret;
+
+       ret = bt_ctf_event_class_add_field(class, len_type, "raw_len");
+       if (ret)
+               return ret;
+
+       seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len");
+       if (!seq_type)
+               return -1;
+
+       return bt_ctf_event_class_add_field(class, seq_type, "raw_data");
+}
+
 static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
                             struct bt_ctf_event_class *event_class)
 {
@@ -755,7 +858,8 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
         *                              ctf event header
         *   PERF_SAMPLE_READ         - TODO
         *   PERF_SAMPLE_CALLCHAIN    - TODO
-        *   PERF_SAMPLE_RAW          - tracepoint fields are handled separately
+        *   PERF_SAMPLE_RAW          - tracepoint fields and BPF output
+        *                              are handled separately
         *   PERF_SAMPLE_BRANCH_STACK - TODO
         *   PERF_SAMPLE_REGS_USER    - TODO
         *   PERF_SAMPLE_STACK_USER   - TODO
@@ -824,6 +928,12 @@ static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel)
                        goto err;
        }
 
+       if (perf_evsel__is_bpf_output(evsel)) {
+               ret = add_bpf_output_types(cw, event_class);
+               if (ret)
+                       goto err;
+       }
+
        ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
        if (ret) {
                pr("Failed to add event class into stream.\n");
@@ -858,6 +968,23 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session)
        return 0;
 }
 
+static void cleanup_events(struct perf_session *session)
+{
+       struct perf_evlist *evlist = session->evlist;
+       struct perf_evsel *evsel;
+
+       evlist__for_each(evlist, evsel) {
+               struct evsel_priv *priv;
+
+               priv = evsel->priv;
+               bt_ctf_event_class_put(priv->event_class);
+               zfree(&evsel->priv);
+       }
+
+       perf_evlist__delete(evlist);
+       session->evlist = NULL;
+}
+
 static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
 {
        struct ctf_stream **stream;
@@ -953,6 +1080,12 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex)
            bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
                goto err;
 
+#if __BYTE_ORDER == __BIG_ENDIAN
+       bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN);
+#else
+       bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN);
+#endif
+
        pr2("Created type: INTEGER %d-bit %ssigned %s\n",
            size, sign ? "un" : "", hex ? "hex" : "");
        return type;
@@ -1100,7 +1233,7 @@ static int convert__config(const char *var, const char *value, void *cb)
                return 0;
        }
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 int bt_convert__perf2ctf(const char *input, const char *path, bool force)
@@ -1171,6 +1304,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
                (double) c.events_size / 1024.0 / 1024.0,
                c.events_count);
 
+       cleanup_events(session);
        perf_session__delete(session);
        ctf_writer__cleanup(cw);
 
index 86d9c73025983d0132a4dddc8607161d713bfbf2..8c4212abd19b48b9e84ca1f9978f06561c05576a 100644 (file)
@@ -5,6 +5,7 @@
 #include <string.h>
 #include <stdarg.h>
 #include <stdio.h>
+#include <api/debug.h>
 
 #include "cache.h"
 #include "color.h"
@@ -22,7 +23,7 @@ int debug_ordered_events;
 static int redirect_to_stderr;
 int debug_data_convert;
 
-static int _eprintf(int level, int var, const char *fmt, va_list args)
+int veprintf(int level, int var, const char *fmt, va_list args)
 {
        int ret = 0;
 
@@ -36,24 +37,19 @@ static int _eprintf(int level, int var, const char *fmt, va_list args)
        return ret;
 }
 
-int veprintf(int level, int var, const char *fmt, va_list args)
-{
-       return _eprintf(level, var, fmt, args);
-}
-
 int eprintf(int level, int var, const char *fmt, ...)
 {
        va_list args;
        int ret;
 
        va_start(args, fmt);
-       ret = _eprintf(level, var, fmt, args);
+       ret = veprintf(level, var, fmt, args);
        va_end(args);
 
        return ret;
 }
 
-static int __eprintf_time(u64 t, const char *fmt, va_list args)
+static int veprintf_time(u64 t, const char *fmt, va_list args)
 {
        int ret = 0;
        u64 secs, usecs, nsecs = t;
@@ -75,7 +71,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...)
 
        if (var >= level) {
                va_start(args, fmt);
-               ret = __eprintf_time(t, fmt, args);
+               ret = veprintf_time(t, fmt, args);
                va_end(args);
        }
 
@@ -91,7 +87,7 @@ void pr_stat(const char *fmt, ...)
        va_list args;
 
        va_start(args, fmt);
-       _eprintf(1, verbose, fmt, args);
+       veprintf(1, verbose, fmt, args);
        va_end(args);
        eprintf(1, verbose, "\n");
 }
@@ -110,40 +106,61 @@ int dump_printf(const char *fmt, ...)
        return ret;
 }
 
+static void trace_event_printer(enum binary_printer_ops op,
+                               unsigned int val, void *extra)
+{
+       const char *color = PERF_COLOR_BLUE;
+       union perf_event *event = (union perf_event *)extra;
+       unsigned char ch = (unsigned char)val;
+
+       switch (op) {
+       case BINARY_PRINT_DATA_BEGIN:
+               printf(".");
+               color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
+                               event->header.size);
+               break;
+       case BINARY_PRINT_LINE_BEGIN:
+               printf(".");
+               break;
+       case BINARY_PRINT_ADDR:
+               color_fprintf(stdout, color, "  %04x: ", val);
+               break;
+       case BINARY_PRINT_NUM_DATA:
+               color_fprintf(stdout, color, " %02x", val);
+               break;
+       case BINARY_PRINT_NUM_PAD:
+               color_fprintf(stdout, color, "   ");
+               break;
+       case BINARY_PRINT_SEP:
+               color_fprintf(stdout, color, "  ");
+               break;
+       case BINARY_PRINT_CHAR_DATA:
+               color_fprintf(stdout, color, "%c",
+                             isprint(ch) ? ch : '.');
+               break;
+       case BINARY_PRINT_CHAR_PAD:
+               color_fprintf(stdout, color, " ");
+               break;
+       case BINARY_PRINT_LINE_END:
+               color_fprintf(stdout, color, "\n");
+               break;
+       case BINARY_PRINT_DATA_END:
+               printf("\n");
+               break;
+       default:
+               break;
+       }
+}
+
 void trace_event(union perf_event *event)
 {
        unsigned char *raw_event = (void *)event;
-       const char *color = PERF_COLOR_BLUE;
-       int i, j;
 
        if (!dump_trace)
                return;
 
-       printf(".");
-       color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
-                     event->header.size);
-
-       for (i = 0; i < event->header.size; i++) {
-               if ((i & 15) == 0) {
-                       printf(".");
-                       color_fprintf(stdout, color, "  %04x: ", i);
-               }
-
-               color_fprintf(stdout, color, " %02x", raw_event[i]);
-
-               if (((i & 15) == 15) || i == event->header.size-1) {
-                       color_fprintf(stdout, color, "  ");
-                       for (j = 0; j < 15-(i & 15); j++)
-                               color_fprintf(stdout, color, "   ");
-                       for (j = i & ~15; j <= i; j++) {
-                               color_fprintf(stdout, color, "%c",
-                                             isprint(raw_event[j]) ?
-                                             raw_event[j] : '.');
-                       }
-                       color_fprintf(stdout, color, "\n");
-               }
-       }
-       printf(".\n");
+       print_binary(raw_event, event->header.size, 16,
+                    trace_event_printer, event);
 }
 
 static struct debug_variable {
@@ -192,3 +209,23 @@ int perf_debug_option(const char *str)
        free(s);
        return 0;
 }
+
+#define DEBUG_WRAPPER(__n, __l)                                \
+static int pr_ ## __n ## _wrapper(const char *fmt, ...)        \
+{                                                      \
+       va_list args;                                   \
+       int ret;                                        \
+                                                       \
+       va_start(args, fmt);                            \
+       ret = veprintf(__l, verbose, fmt, args);        \
+       va_end(args);                                   \
+       return ret;                                     \
+}
+
+DEBUG_WRAPPER(warning, 0);
+DEBUG_WRAPPER(debug, 1);
+
+void perf_debug_setup(void)
+{
+       libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper);
+}
index 8b9a088c32ab4e330ece47ae91397bdd87bdd0ee..14bafda79edaeba1bbd3e007b5faa8607d0570bf 100644 (file)
@@ -53,5 +53,6 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__(
 int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
+void perf_debug_setup(void);
 
 #endif /* __PERF_DEBUG_H */
diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c
new file mode 100644 (file)
index 0000000..3e6062a
--- /dev/null
@@ -0,0 +1,199 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include "util.h"
+#include "debug.h"
+#include "symbol.h"
+
+#include "demangle-java.h"
+
+enum {
+       MODE_PREFIX = 0,
+       MODE_CLASS  = 1,
+       MODE_FUNC   = 2,
+       MODE_TYPE   = 3,
+       MODE_CTYPE  = 3, /* class arg */
+};
+
+#define BASE_ENT(c, n) [c - 'A']=n
+static const char *base_types['Z' - 'A' + 1] = {
+       BASE_ENT('B', "byte" ),
+       BASE_ENT('C', "char" ),
+       BASE_ENT('D', "double" ),
+       BASE_ENT('F', "float" ),
+       BASE_ENT('I', "int" ),
+       BASE_ENT('J', "long" ),
+       BASE_ENT('S', "short" ),
+       BASE_ENT('Z', "bool" ),
+};
+
+/*
+ * demangle Java symbol between str and end positions and stores
+ * up to maxlen characters into buf. The parser starts in mode.
+ *
+ * Use MODE_PREFIX to process entire prototype till end position
+ * Use MODE_TYPE to process return type if str starts on return type char
+ *
+ *  Return:
+ *     success: buf
+ *     error  : NULL
+ */
+static char *
+__demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int mode)
+{
+       int rlen = 0;
+       int array = 0;
+       int narg = 0;
+       const char *q;
+
+       if (!end)
+               end = str + strlen(str);
+
+       for (q = str; q != end; q++) {
+
+               if (rlen == (maxlen - 1))
+                       break;
+
+               switch (*q) {
+               case 'L':
+                       if (mode == MODE_PREFIX || mode == MODE_CTYPE) {
+                               if (mode == MODE_CTYPE) {
+                                       if (narg)
+                                               rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+                                       narg++;
+                               }
+                               rlen += scnprintf(buf + rlen, maxlen - rlen, "class ");
+                               if (mode == MODE_PREFIX)
+                                       mode = MODE_CLASS;
+                       } else
+                               buf[rlen++] = *q;
+                       break;
+               case 'B':
+               case 'C':
+               case 'D':
+               case 'F':
+               case 'I':
+               case 'J':
+               case 'S':
+               case 'Z':
+                       if (mode == MODE_TYPE) {
+                               if (narg)
+                                       rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+                               rlen += scnprintf(buf + rlen, maxlen - rlen, "%s", base_types[*q - 'A']);
+                               while (array--)
+                                       rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+                               array = 0;
+                               narg++;
+                       } else
+                               buf[rlen++] = *q;
+                       break;
+               case 'V':
+                       if (mode == MODE_TYPE) {
+                               rlen += scnprintf(buf + rlen, maxlen - rlen, "void");
+                               while (array--)
+                                       rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+                               array = 0;
+                       } else
+                               buf[rlen++] = *q;
+                       break;
+               case '[':
+                       if (mode != MODE_TYPE)
+                               goto error;
+                       array++;
+                       break;
+               case '(':
+                       if (mode != MODE_FUNC)
+                               goto error;
+                       buf[rlen++] = *q;
+                       mode = MODE_TYPE;
+                       break;
+               case ')':
+                       if (mode != MODE_TYPE)
+                               goto error;
+                       buf[rlen++] = *q;
+                       narg = 0;
+                       break;
+               case ';':
+                       if (mode != MODE_CLASS && mode != MODE_CTYPE)
+                               goto error;
+                       /* safe because at least one other char to process */
+                       if (isalpha(*(q + 1)))
+                               rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+                       if (mode == MODE_CLASS)
+                               mode = MODE_FUNC;
+                       else if (mode == MODE_CTYPE)
+                               mode = MODE_TYPE;
+                       break;
+               case '/':
+                       if (mode != MODE_CLASS && mode != MODE_CTYPE)
+                               goto error;
+                       rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+                       break;
+               default :
+                       buf[rlen++] = *q;
+               }
+       }
+       buf[rlen] = '\0';
+       return buf;
+error:
+       return NULL;
+}
+
+/*
+ * Demangle Java function signature (openJDK, not GCJ)
+ * input:
+ *     str: string to parse. String is not modified
+ *    flags: comobination of JAVA_DEMANGLE_* flags to modify demangling
+ * return:
+ *     if input can be demangled, then a newly allocated string is returned.
+ *     if input cannot be demangled, then NULL is returned
+ *
+ * Note: caller is responsible for freeing demangled string
+ */
+char *
+java_demangle_sym(const char *str, int flags)
+{
+       char *buf, *ptr;
+       char *p;
+       size_t len, l1 = 0;
+
+       if (!str)
+               return NULL;
+
+       /* find start of retunr type */
+       p = strrchr(str, ')');
+       if (!p)
+               return NULL;
+
+       /*
+        * expansion factor estimated to 3x
+        */
+       len = strlen(str) * 3 + 1;
+       buf = malloc(len);
+       if (!buf)
+               return NULL;
+
+       buf[0] = '\0';
+       if (!(flags & JAVA_DEMANGLE_NORET)) {
+               /*
+                * get return type first
+                */
+               ptr = __demangle_java_sym(p + 1, NULL, buf, len, MODE_TYPE);
+               if (!ptr)
+                       goto error;
+
+               /* add space between return type and function prototype */
+               l1 = strlen(buf);
+               buf[l1++] = ' ';
+       }
+
+       /* process function up to return type */
+       ptr = __demangle_java_sym(str, p + 1, buf + l1, len - l1, MODE_PREFIX);
+       if (!ptr)
+               goto error;
+
+       return buf;
+error:
+       free(buf);
+       return NULL;
+}
diff --git a/tools/perf/util/demangle-java.h b/tools/perf/util/demangle-java.h
new file mode 100644 (file)
index 0000000..a981c1f
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef __PERF_DEMANGLE_JAVA
+#define __PERF_DEMANGLE_JAVA 1
+/*
+ * demangle function flags
+ */
+#define JAVA_DEMANGLE_NORET    0x1 /* do not process return type */
+
+char * java_demangle_sym(const char *str, int flags);
+
+#endif /* __PERF_DEMANGLE_JAVA */
index e8e9a9dbf5e395a20d589c80b253c2d869b927c1..8e6395439ca0830cefaaa5b6dbe905ae2af93011 100644 (file)
@@ -52,6 +52,11 @@ int dso__read_binary_type_filename(const struct dso *dso,
                        debuglink--;
                if (*debuglink == '/')
                        debuglink++;
+
+               ret = -1;
+               if (!is_regular_file(filename))
+                       break;
+
                ret = filename__read_debuglink(filename, debuglink,
                                               size - (debuglink - filename));
                }
index 7dd5939dea2e58385bd374a3e59aecf766e799c1..49a11d9d8b8f050efa48715cfba9e54731f63eef 100644 (file)
@@ -6,6 +6,8 @@ struct perf_env perf_env;
 
 void perf_env__exit(struct perf_env *env)
 {
+       int i;
+
        zfree(&env->hostname);
        zfree(&env->os_release);
        zfree(&env->version);
@@ -19,6 +21,10 @@ void perf_env__exit(struct perf_env *env)
        zfree(&env->numa_nodes);
        zfree(&env->pmu_mappings);
        zfree(&env->cpu);
+
+       for (i = 0; i < env->caches_cnt; i++)
+               cpu_cache_level__free(&env->caches[i]);
+       zfree(&env->caches);
 }
 
 int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
@@ -75,3 +81,10 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
        env->nr_cpus_avail = nr_cpus;
        return 0;
 }
+
+void cpu_cache_level__free(struct cpu_cache_level *cache)
+{
+       free(cache->type);
+       free(cache->map);
+       free(cache->size);
+}
index 0132b9557c02b56f7f31e3e51981d55b0d1027bd..56cffb60a0b42e456a11fb2bd489d74a69893b0c 100644 (file)
@@ -1,11 +1,23 @@
 #ifndef __PERF_ENV_H
 #define __PERF_ENV_H
 
+#include <linux/types.h>
+
 struct cpu_topology_map {
        int     socket_id;
        int     core_id;
 };
 
+struct cpu_cache_level {
+       u32     level;
+       u32     line_size;
+       u32     sets;
+       u32     ways;
+       char    *type;
+       char    *size;
+       char    *map;
+};
+
 struct perf_env {
        char                    *hostname;
        char                    *os_release;
@@ -31,6 +43,8 @@ struct perf_env {
        char                    *numa_nodes;
        char                    *pmu_mappings;
        struct cpu_topology_map *cpu;
+       struct cpu_cache_level  *caches;
+       int                      caches_cnt;
 };
 
 extern struct perf_env perf_env;
@@ -41,4 +55,5 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
 
 int perf_env__read_cpu_topology_map(struct perf_env *env);
 
+void cpu_cache_level__free(struct cpu_cache_level *cache);
 #endif /* __PERF_ENV_H */
index 85155e91b61ba9b70feaf867b109270df498b9b8..7bad5c3fa7b7175862f5e3bdf38ffca0e1b14ee1 100644 (file)
@@ -282,7 +282,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
                strcpy(execname, "");
 
                /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-               n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n",
+               n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n",
                       &event->mmap2.start, &event->mmap2.len, prot,
                       &event->mmap2.pgoff, &event->mmap2.maj,
                       &event->mmap2.min,
index d81f13de24769963f6c32c3f809a714e6868dc5e..86a03836a83fc3f8ee8648d83317b8d91e3f48d8 100644 (file)
@@ -1181,12 +1181,12 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
         */
        if (cpus != evlist->cpus) {
                cpu_map__put(evlist->cpus);
-               evlist->cpus = cpus;
+               evlist->cpus = cpu_map__get(cpus);
        }
 
        if (threads != evlist->threads) {
                thread_map__put(evlist->threads);
-               evlist->threads = threads;
+               evlist->threads = thread_map__get(threads);
        }
 
        perf_evlist__propagate_maps(evlist);
@@ -1223,6 +1223,9 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
        int err = 0;
 
        evlist__for_each(evlist, evsel) {
+               if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+                       continue;
+
                err = perf_evsel__set_filter(evsel, filter);
                if (err)
                        break;
@@ -1624,7 +1627,7 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
        return printed + fprintf(fp, "\n");
 }
 
-int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
+int perf_evlist__strerror_open(struct perf_evlist *evlist,
                               int err, char *buf, size_t size)
 {
        int printed, value;
@@ -1652,7 +1655,25 @@ int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
                                    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
                                    "Hint:\tThe current value is %d.", value);
                break;
+       case EINVAL: {
+               struct perf_evsel *first = perf_evlist__first(evlist);
+               int max_freq;
+
+               if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
+                       goto out_default;
+
+               if (first->attr.sample_freq < (u64)max_freq)
+                       goto out_default;
+
+               printed = scnprintf(buf, size,
+                                   "Error:\t%s.\n"
+                                   "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
+                                   "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
+                                   emsg, max_freq, first->attr.sample_freq);
+               break;
+       }
        default:
+out_default:
                scnprintf(buf, size, "%s", emsg);
                break;
        }
@@ -1723,3 +1744,19 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
 
        tracking_evsel->tracking = true;
 }
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
+                              const char *str)
+{
+       struct perf_evsel *evsel;
+
+       evlist__for_each(evlist, evsel) {
+               if (!evsel->name)
+                       continue;
+               if (strcmp(str, evsel->name) == 0)
+                       return evsel;
+       }
+
+       return NULL;
+}
index 7c4d9a2067769b0e3de2d96a11e6f7dd7d62106a..a0d15221db6e878412126f1ec5de08cb030f132f 100644 (file)
@@ -294,4 +294,7 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
                                     struct perf_evsel *tracking_evsel);
 
 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
 #endif /* __PERF_EVLIST_H */
index cdbaf9b51e428ad4537a38ded24ee07bec54e90c..0902fe418754ec0c3149d3daeafc122fee65e243 100644 (file)
@@ -225,6 +225,11 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
        if (evsel != NULL)
                perf_evsel__init(evsel, attr, idx);
 
+       if (perf_evsel__is_bpf_output(evsel)) {
+               evsel->attr.sample_type |= PERF_SAMPLE_RAW;
+               evsel->attr.sample_period = 1;
+       }
+
        return evsel;
 }
 
@@ -898,6 +903,16 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
        if (evsel->precise_max)
                perf_event_attr__set_max_precise_ip(attr);
 
+       if (opts->all_user) {
+               attr->exclude_kernel = 1;
+               attr->exclude_user   = 0;
+       }
+
+       if (opts->all_kernel) {
+               attr->exclude_kernel = 0;
+               attr->exclude_user   = 1;
+       }
+
        /*
         * Apply event specific term settings,
         * it overloads any global configuration.
@@ -2362,12 +2377,15 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
        case EPERM:
        case EACCES:
                return scnprintf(msg, size,
-                "You may not have permission to collect %sstats.\n"
-                "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n"
-                " -1 - Not paranoid at all\n"
-                "  0 - Disallow raw tracepoint access for unpriv\n"
-                "  1 - Disallow cpu events for unpriv\n"
-                "  2 - Disallow kernel profiling for unpriv",
+                "You may not have permission to collect %sstats.\n\n"
+                "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
+                "which controls use of the performance events system by\n"
+                "unprivileged users (without CAP_SYS_ADMIN).\n\n"
+                "The default value is 1:\n\n"
+                "  -1: Allow use of (almost) all events by all users\n"
+                ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n"
+                ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
+                ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN",
                                 target->system_wide ? "system-wide " : "");
        case ENOENT:
                return scnprintf(msg, size, "The %s event is not supported.",
index 8e75434bd01c671a8ed2e0c0b03a139212d7001c..501ea6e565f13a4a4817947957c79f15d805d130 100644 (file)
@@ -93,10 +93,8 @@ struct perf_evsel {
        const char              *unit;
        struct event_format     *tp_format;
        off_t                   id_offset;
-       union {
-               void            *priv;
-               u64             db_id;
-       };
+       void                    *priv;
+       u64                     db_id;
        struct cgroup_sel       *cgrp;
        void                    *handler;
        struct cpu_map          *cpus;
@@ -364,6 +362,14 @@ static inline bool perf_evsel__is_function_event(struct perf_evsel *evsel)
 #undef FUNCTION_EVENT
 }
 
+static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel)
+{
+       struct perf_event_attr *attr = &evsel->attr;
+
+       return (attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
+               (attr->type == PERF_TYPE_SOFTWARE);
+}
+
 struct perf_attr_details {
        bool freq;
        bool verbose;
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
new file mode 100644 (file)
index 0000000..c1ef805
--- /dev/null
@@ -0,0 +1,449 @@
+/*
+ * genelf.c
+ * Copyright (C) 2014, Google, Inc
+ *
+ * Contributed by:
+ *     Stephane Eranian <eranian@gmail.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#include <dwarf.h>
+
+#include "perf.h"
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#define JVMTI
+
+#define BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef HAVE_LIBCRYPTO
+
+#define BUILD_ID_MD5
+#undef BUILD_ID_SHA    /* does not seem to work well when linked with Java */
+#undef BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef BUILD_ID_SHA
+#include <openssl/sha.h>
+#endif
+
+#ifdef BUILD_ID_MD5
+#include <openssl/md5.h>
+#endif
+#endif
+
+
+typedef struct {
+  unsigned int namesz;  /* Size of entry's owner string */
+  unsigned int descsz;  /* Size of the note descriptor */
+  unsigned int type;    /* Interpretation of the descriptor */
+  char         name[0]; /* Start of the name+desc data */
+} Elf_Note;
+
+struct options {
+       char *output;
+       int fd;
+};
+
+static char shd_string_table[] = {
+       0,
+       '.', 't', 'e', 'x', 't', 0,                     /*  1 */
+       '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', 0, /*  7 */
+       '.', 's', 'y', 'm', 't', 'a', 'b', 0,           /* 17 */
+       '.', 's', 't', 'r', 't', 'a', 'b', 0,           /* 25 */
+       '.', 'n', 'o', 't', 'e', '.', 'g', 'n', 'u', '.', 'b', 'u', 'i', 'l', 'd', '-', 'i', 'd', 0, /* 33 */
+       '.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */
+       '.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */
+       '.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */
+};
+
+static struct buildid_note {
+       Elf_Note desc;          /* descsz: size of build-id, must be multiple of 4 */
+       char     name[4];       /* GNU\0 */
+       char     build_id[20];
+} bnote;
+
+static Elf_Sym symtab[]={
+       /* symbol 0 MUST be the undefined symbol */
+       { .st_name  = 0, /* index in sym_string table */
+         .st_info  = ELF_ST_TYPE(STT_NOTYPE),
+         .st_shndx = 0, /* for now */
+         .st_value = 0x0,
+         .st_other = ELF_ST_VIS(STV_DEFAULT),
+         .st_size  = 0,
+       },
+       { .st_name  = 1, /* index in sym_string table */
+         .st_info  = ELF_ST_BIND(STB_LOCAL) | ELF_ST_TYPE(STT_FUNC),
+         .st_shndx = 1,
+         .st_value = 0, /* for now */
+         .st_other = ELF_ST_VIS(STV_DEFAULT),
+         .st_size  = 0, /* for now */
+       }
+};
+
+#ifdef BUILD_ID_URANDOM
+static void
+gen_build_id(struct buildid_note *note,
+            unsigned long load_addr __maybe_unused,
+            const void *code __maybe_unused,
+            size_t csize __maybe_unused)
+{
+       int fd;
+       size_t sz = sizeof(note->build_id);
+       ssize_t sret;
+
+       fd = open("/dev/urandom", O_RDONLY);
+       if (fd == -1)
+               err(1, "cannot access /dev/urandom for builid");
+
+       sret = read(fd, note->build_id, sz);
+
+       close(fd);
+
+       if (sret != (ssize_t)sz)
+               memset(note->build_id, 0, sz);
+}
+#endif
+
+#ifdef BUILD_ID_SHA
+static void
+gen_build_id(struct buildid_note *note,
+            unsigned long load_addr __maybe_unused,
+            const void *code,
+            size_t csize)
+{
+       if (sizeof(note->build_id) < SHA_DIGEST_LENGTH)
+               errx(1, "build_id too small for SHA1");
+
+       SHA1(code, csize, (unsigned char *)note->build_id);
+}
+#endif
+
+#ifdef BUILD_ID_MD5
+static void
+gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize)
+{
+       MD5_CTX context;
+
+       if (sizeof(note->build_id) < 16)
+               errx(1, "build_id too small for MD5");
+
+       MD5_Init(&context);
+       MD5_Update(&context, &load_addr, sizeof(load_addr));
+       MD5_Update(&context, code, csize);
+       MD5_Final((unsigned char *)note->build_id, &context);
+}
+#endif
+
+/*
+ * fd: file descriptor open for writing for the output file
+ * load_addr: code load address (could be zero, just used for buildid)
+ * sym: function name (for native code - used as the symbol)
+ * code: the native code
+ * csize: the code size in bytes
+ */
+int
+jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+             const void *code, int csize,
+             void *debug, int nr_debug_entries)
+{
+       Elf *e;
+       Elf_Data *d;
+       Elf_Scn *scn;
+       Elf_Ehdr *ehdr;
+       Elf_Shdr *shdr;
+       char *strsym = NULL;
+       int symlen;
+       int retval = -1;
+
+       if (elf_version(EV_CURRENT) == EV_NONE) {
+               warnx("ELF initialization failed");
+               return -1;
+       }
+
+       e = elf_begin(fd, ELF_C_WRITE, NULL);
+       if (!e) {
+               warnx("elf_begin failed");
+               goto error;
+       }
+
+       /*
+        * setup ELF header
+        */
+       ehdr = elf_newehdr(e);
+       if (!ehdr) {
+               warnx("cannot get ehdr");
+               goto error;
+       }
+
+       ehdr->e_ident[EI_DATA] = GEN_ELF_ENDIAN;
+       ehdr->e_ident[EI_CLASS] = GEN_ELF_CLASS;
+       ehdr->e_machine = GEN_ELF_ARCH;
+       ehdr->e_type = ET_DYN;
+       ehdr->e_entry = GEN_ELF_TEXT_OFFSET;
+       ehdr->e_version = EV_CURRENT;
+       ehdr->e_shstrndx= 2; /* shdr index for section name */
+
+       /*
+        * setup text section
+        */
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               goto error;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               goto error;
+       }
+
+       d->d_align = 16;
+       d->d_off = 0LL;
+       d->d_buf = (void *)code;
+       d->d_type = ELF_T_BYTE;
+       d->d_size = csize;
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               goto error;
+       }
+
+       shdr->sh_name = 1;
+       shdr->sh_type = SHT_PROGBITS;
+       shdr->sh_addr = GEN_ELF_TEXT_OFFSET;
+       shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+       shdr->sh_entsize = 0;
+
+       /*
+        * setup section headers string table
+        */
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               goto error;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               goto error;
+       }
+
+       d->d_align = 1;
+       d->d_off = 0LL;
+       d->d_buf = shd_string_table;
+       d->d_type = ELF_T_BYTE;
+       d->d_size = sizeof(shd_string_table);
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               goto error;
+       }
+
+       shdr->sh_name = 7; /* offset of '.shstrtab' in shd_string_table */
+       shdr->sh_type = SHT_STRTAB;
+       shdr->sh_flags = 0;
+       shdr->sh_entsize = 0;
+
+       /*
+        * setup symtab section
+        */
+       symtab[1].st_size  = csize;
+       symtab[1].st_value = GEN_ELF_TEXT_OFFSET;
+
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               goto error;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               goto error;
+       }
+
+       d->d_align = 8;
+       d->d_off = 0LL;
+       d->d_buf = symtab;
+       d->d_type = ELF_T_SYM;
+       d->d_size = sizeof(symtab);
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               goto error;
+       }
+
+       shdr->sh_name = 17; /* offset of '.symtab' in shd_string_table */
+       shdr->sh_type = SHT_SYMTAB;
+       shdr->sh_flags = 0;
+       shdr->sh_entsize = sizeof(Elf_Sym);
+       shdr->sh_link = 4; /* index of .strtab section */
+
+       /*
+        * setup symbols string table
+        * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry
+        */
+       symlen = 2 + strlen(sym);
+       strsym = calloc(1, symlen);
+       if (!strsym) {
+               warnx("cannot allocate strsym");
+               goto error;
+       }
+       strcpy(strsym + 1, sym);
+
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               goto error;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               goto error;
+       }
+
+       d->d_align = 1;
+       d->d_off = 0LL;
+       d->d_buf = strsym;
+       d->d_type = ELF_T_BYTE;
+       d->d_size = symlen;
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               goto error;
+       }
+
+       shdr->sh_name = 25; /* offset in shd_string_table */
+       shdr->sh_type = SHT_STRTAB;
+       shdr->sh_flags = 0;
+       shdr->sh_entsize = 0;
+
+       /*
+        * setup build-id section
+        */
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               goto error;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               goto error;
+       }
+
+       /*
+        * build-id generation
+        */
+       gen_build_id(&bnote, load_addr, code, csize);
+       bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */
+       bnote.desc.descsz = sizeof(bnote.build_id);
+       bnote.desc.type   = NT_GNU_BUILD_ID;
+       strcpy(bnote.name, "GNU");
+
+       d->d_align = 4;
+       d->d_off = 0LL;
+       d->d_buf = &bnote;
+       d->d_type = ELF_T_BYTE;
+       d->d_size = sizeof(bnote);
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               goto error;
+       }
+
+       shdr->sh_name = 33; /* offset in shd_string_table */
+       shdr->sh_type = SHT_NOTE;
+       shdr->sh_addr = 0x0;
+       shdr->sh_flags = SHF_ALLOC;
+       shdr->sh_size = sizeof(bnote);
+       shdr->sh_entsize = 0;
+
+       if (debug && nr_debug_entries) {
+               retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries);
+               if (retval)
+                       goto error;
+       } else {
+               if (elf_update(e, ELF_C_WRITE) < 0) {
+                       warnx("elf_update 4 failed");
+                       goto error;
+               }
+       }
+
+       retval = 0;
+error:
+       (void)elf_end(e);
+
+       free(strsym);
+
+
+       return retval;
+}
+
+#ifndef JVMTI
+
+static unsigned char x86_code[] = {
+    0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */
+    0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */
+    0xCD, 0x80            /* int $0x80 */
+};
+
+static struct options options;
+
+int main(int argc, char **argv)
+{
+       int c, fd, ret;
+
+       while ((c = getopt(argc, argv, "o:h")) != -1) {
+               switch (c) {
+               case 'o':
+                       options.output = optarg;
+                       break;
+               case 'h':
+                       printf("Usage: genelf -o output_file [-h]\n");
+                       return 0;
+               default:
+                       errx(1, "unknown option");
+               }
+       }
+
+       fd = open(options.output, O_CREAT|O_TRUNC|O_RDWR, 0666);
+       if (fd == -1)
+               err(1, "cannot create file %s", options.output);
+
+       ret = jit_write_elf(fd, "main", x86_code, sizeof(x86_code));
+       close(fd);
+
+       if (ret != 0)
+               unlink(options.output);
+
+       return ret;
+}
+#endif
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
new file mode 100644 (file)
index 0000000..45bf9c6
--- /dev/null
@@ -0,0 +1,67 @@
+#ifndef __GENELF_H__
+#define __GENELF_H__
+
+/* genelf.c */
+extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
+                        const void *code, int csize,
+                        void *debug, int nr_debug_entries);
+/* genelf_debug.c */
+extern int jit_add_debug_info(Elf *e, uint64_t code_addr,
+                             void *debug, int nr_debug_entries);
+
+#if   defined(__arm__)
+#define GEN_ELF_ARCH   EM_ARM
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#define GEN_ELF_CLASS  ELFCLASS32
+#elif defined(__aarch64__)
+#define GEN_ELF_ARCH   EM_AARCH64
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#define GEN_ELF_CLASS  ELFCLASS64
+#elif defined(__x86_64__)
+#define GEN_ELF_ARCH   EM_X86_64
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#define GEN_ELF_CLASS  ELFCLASS64
+#elif defined(__i386__)
+#define GEN_ELF_ARCH   EM_386
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#define GEN_ELF_CLASS  ELFCLASS32
+#elif defined(__ppcle__)
+#define GEN_ELF_ARCH   EM_PPC
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#define GEN_ELF_CLASS  ELFCLASS64
+#elif defined(__powerpc__)
+#define GEN_ELF_ARCH   EM_PPC64
+#define GEN_ELF_ENDIAN ELFDATA2MSB
+#define GEN_ELF_CLASS  ELFCLASS64
+#elif defined(__powerpcle__)
+#define GEN_ELF_ARCH   EM_PPC64
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#define GEN_ELF_CLASS  ELFCLASS64
+#else
+#error "unsupported architecture"
+#endif
+
+#if GEN_ELF_CLASS == ELFCLASS64
+#define elf_newehdr    elf64_newehdr
+#define elf_getshdr    elf64_getshdr
+#define Elf_Ehdr       Elf64_Ehdr
+#define Elf_Shdr       Elf64_Shdr
+#define Elf_Sym                Elf64_Sym
+#define ELF_ST_TYPE(a) ELF64_ST_TYPE(a)
+#define ELF_ST_BIND(a) ELF64_ST_BIND(a)
+#define ELF_ST_VIS(a)  ELF64_ST_VISIBILITY(a)
+#else
+#define elf_newehdr    elf32_newehdr
+#define elf_getshdr    elf32_getshdr
+#define Elf_Ehdr       Elf32_Ehdr
+#define Elf_Shdr       Elf32_Shdr
+#define Elf_Sym                Elf32_Sym
+#define ELF_ST_TYPE(a) ELF32_ST_TYPE(a)
+#define ELF_ST_BIND(a) ELF32_ST_BIND(a)
+#define ELF_ST_VIS(a)  ELF32_ST_VISIBILITY(a)
+#endif
+
+/* The .text section is directly after the ELF header */
+#define GEN_ELF_TEXT_OFFSET sizeof(Elf_Ehdr)
+
+#endif
diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
new file mode 100644 (file)
index 0000000..5980f7d
--- /dev/null
@@ -0,0 +1,610 @@
+/*
+ * genelf_debug.c
+ * Copyright (C) 2015, Google, Inc
+ *
+ * Contributed by:
+ *     Stephane Eranian <eranian@google.com>
+ *
+ * Released under the GPL v2.
+ *
+ * based on GPLv2 source code from Oprofile
+ * @remark Copyright 2007 OProfile authors
+ * @author Philippe Elie
+ */
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#include <dwarf.h>
+
+#include "perf.h"
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#define BUFFER_EXT_DFL_SIZE    (4 * 1024)
+
+typedef uint32_t uword;
+typedef uint16_t uhalf;
+typedef int32_t  sword;
+typedef int16_t  shalf;
+typedef uint8_t  ubyte;
+typedef int8_t   sbyte;
+
+struct buffer_ext {
+       size_t cur_pos;
+       size_t max_sz;
+       void *data;
+};
+
+static void
+buffer_ext_dump(struct buffer_ext *be, const char *msg)
+{
+       size_t i;
+       warnx("DUMP for %s", msg);
+       for (i = 0 ; i < be->cur_pos; i++)
+               warnx("%4zu 0x%02x", i, (((char *)be->data)[i]) & 0xff);
+}
+
+static inline int
+buffer_ext_add(struct buffer_ext *be, void *addr, size_t sz)
+{
+       void *tmp;
+       size_t be_sz = be->max_sz;
+
+retry:
+       if ((be->cur_pos + sz) < be_sz) {
+               memcpy(be->data + be->cur_pos, addr, sz);
+               be->cur_pos += sz;
+               return 0;
+       }
+
+       if (!be_sz)
+               be_sz = BUFFER_EXT_DFL_SIZE;
+       else
+               be_sz <<= 1;
+
+       tmp = realloc(be->data, be_sz);
+       if (!tmp)
+               return -1;
+
+       be->data   = tmp;
+       be->max_sz = be_sz;
+
+       goto retry;
+}
+
+static void
+buffer_ext_init(struct buffer_ext *be)
+{
+       be->data = NULL;
+       be->cur_pos = 0;
+       be->max_sz = 0;
+}
+
+static inline size_t
+buffer_ext_size(struct buffer_ext *be)
+{
+       return be->cur_pos;
+}
+
+static inline void *
+buffer_ext_addr(struct buffer_ext *be)
+{
+       return be->data;
+}
+
+struct debug_line_header {
+       // Not counting this field
+       uword total_length;
+       // version number (2 currently)
+       uhalf version;
+       // relative offset from next field to
+       // program statement
+       uword prolog_length;
+       ubyte minimum_instruction_length;
+       ubyte default_is_stmt;
+       // line_base - see DWARF 2 specs
+       sbyte line_base;
+       // line_range - see DWARF 2 specs
+       ubyte line_range;
+       // number of opcode + 1
+       ubyte opcode_base;
+       /* follow the array of opcode args nr: ubytes [nr_opcode_base] */
+       /* follow the search directories index, zero terminated string
+        * terminated by an empty string.
+        */
+       /* follow an array of { filename, LEB128, LEB128, LEB128 }, first is
+        * the directory index entry, 0 means current directory, then mtime
+        * and filesize, last entry is followed by en empty string.
+        */
+       /* follow the first program statement */
+} __attribute__((packed));
+
+/* DWARF 2 spec talk only about one possible compilation unit header while
+ * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not
+ * related to the used arch, an ELF 32 can hold more than 4 Go of debug
+ * information. For now we handle only DWARF 2 32 bits comp unit. It'll only
+ * become a problem if we generate more than 4GB of debug information.
+ */
+struct compilation_unit_header {
+       uword total_length;
+       uhalf version;
+       uword debug_abbrev_offset;
+       ubyte pointer_size;
+} __attribute__((packed));
+
+#define DW_LNS_num_opcode (DW_LNS_set_isa + 1)
+
+/* field filled at run time are marked with -1 */
+static struct debug_line_header const default_debug_line_header = {
+       .total_length = -1,
+       .version = 2,
+       .prolog_length = -1,
+       .minimum_instruction_length = 1,        /* could be better when min instruction size != 1 */
+       .default_is_stmt = 1,   /* we don't take care about basic block */
+       .line_base = -5,        /* sensible value for line base ... */
+       .line_range = -14,     /* ... and line range are guessed statically */
+       .opcode_base = DW_LNS_num_opcode
+};
+
+static ubyte standard_opcode_length[] =
+{
+       0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1
+};
+#if 0
+{
+       [DW_LNS_advance_pc]   = 1,
+       [DW_LNS_advance_line] = 1,
+       [DW_LNS_set_file] =  1,
+       [DW_LNS_set_column] = 1,
+       [DW_LNS_fixed_advance_pc] = 1,
+       [DW_LNS_set_isa] = 1,
+};
+#endif
+
+/* field filled at run time are marked with -1 */
+static struct compilation_unit_header default_comp_unit_header = {
+       .total_length = -1,
+       .version = 2,
+       .debug_abbrev_offset = 0,     /* we reuse the same abbrev entries for all comp unit */
+       .pointer_size = sizeof(void *)
+};
+
+static void emit_uword(struct buffer_ext *be, uword data)
+{
+       buffer_ext_add(be, &data, sizeof(uword));
+}
+
+static void emit_string(struct buffer_ext *be, const char *s)
+{
+       buffer_ext_add(be, (void *)s, strlen(s) + 1);
+}
+
+static void emit_unsigned_LEB128(struct buffer_ext *be,
+                                unsigned long data)
+{
+       do {
+               ubyte cur = data & 0x7F;
+               data >>= 7;
+               if (data)
+                       cur |= 0x80;
+               buffer_ext_add(be, &cur, 1);
+       } while (data);
+}
+
+static void emit_signed_LEB128(struct buffer_ext *be, long data)
+{
+       int more = 1;
+       int negative = data < 0;
+       int size = sizeof(long) * CHAR_BIT;
+       while (more) {
+               ubyte cur = data & 0x7F;
+               data >>= 7;
+               if (negative)
+                       data |= - (1 << (size - 7));
+               if ((data == 0 && !(cur & 0x40)) ||
+                   (data == -1l && (cur & 0x40)))
+                       more = 0;
+               else
+                       cur |= 0x80;
+               buffer_ext_add(be, &cur, 1);
+       }
+}
+
+static void emit_extended_opcode(struct buffer_ext *be, ubyte opcode,
+                                void *data, size_t data_len)
+{
+       buffer_ext_add(be, (char *)"", 1);
+
+       emit_unsigned_LEB128(be, data_len + 1);
+
+       buffer_ext_add(be, &opcode, 1);
+       buffer_ext_add(be, data, data_len);
+}
+
+static void emit_opcode(struct buffer_ext *be, ubyte opcode)
+{
+       buffer_ext_add(be, &opcode, 1);
+}
+
+static void emit_opcode_signed(struct buffer_ext  *be,
+                              ubyte opcode, long data)
+{
+       buffer_ext_add(be, &opcode, 1);
+       emit_signed_LEB128(be, data);
+}
+
+static void emit_opcode_unsigned(struct buffer_ext *be, ubyte opcode,
+                                unsigned long data)
+{
+       buffer_ext_add(be, &opcode, 1);
+       emit_unsigned_LEB128(be, data);
+}
+
+static void emit_advance_pc(struct buffer_ext *be, unsigned long delta_pc)
+{
+       emit_opcode_unsigned(be, DW_LNS_advance_pc, delta_pc);
+}
+
+static void emit_advance_lineno(struct buffer_ext  *be, long delta_lineno)
+{
+       emit_opcode_signed(be, DW_LNS_advance_line, delta_lineno);
+}
+
+static void emit_lne_end_of_sequence(struct buffer_ext *be)
+{
+       emit_extended_opcode(be, DW_LNE_end_sequence, NULL, 0);
+}
+
+static void emit_set_file(struct buffer_ext *be, unsigned long idx)
+{
+       emit_opcode_unsigned(be, DW_LNS_set_file, idx);
+}
+
+static void emit_lne_define_filename(struct buffer_ext *be,
+                                    const char *filename)
+{
+       buffer_ext_add(be, (void *)"", 1);
+
+       /* LNE field, strlen(filename) + zero termination, 3 bytes for: the dir entry, timestamp, filesize */
+       emit_unsigned_LEB128(be, strlen(filename) + 5);
+       emit_opcode(be, DW_LNE_define_file);
+       emit_string(be, filename);
+       /* directory index 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+       /* last modification date on file 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+       /* filesize 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+}
+
+static void emit_lne_set_address(struct buffer_ext *be,
+                                void *address)
+{
+       emit_extended_opcode(be, DW_LNE_set_address, &address, sizeof(unsigned long));
+}
+
+static ubyte get_special_opcode(struct debug_entry *ent,
+                               unsigned int last_line,
+                               unsigned long last_vma)
+{
+       unsigned int temp;
+       unsigned long delta_addr;
+
+       /*
+        * delta from line_base
+        */
+       temp = (ent->lineno - last_line) - default_debug_line_header.line_base;
+
+       if (temp >= default_debug_line_header.line_range)
+               return 0;
+
+       /*
+        * delta of addresses
+        */
+       delta_addr = (ent->addr - last_vma) / default_debug_line_header.minimum_instruction_length;
+
+       /* This is not sufficient to ensure opcode will be in [0-256] but
+        * sufficient to ensure when summing with the delta lineno we will
+        * not overflow the unsigned long opcode */
+
+       if (delta_addr <= 256 / default_debug_line_header.line_range) {
+               unsigned long opcode = temp +
+                       (delta_addr * default_debug_line_header.line_range) +
+                       default_debug_line_header.opcode_base;
+
+               return opcode <= 255 ? opcode : 0;
+       }
+       return 0;
+}
+
+static void emit_lineno_info(struct buffer_ext *be,
+                            struct debug_entry *ent, size_t nr_entry,
+                            unsigned long code_addr)
+{
+       size_t i;
+
+       /*
+        * Machine state at start of a statement program
+        * address = 0
+        * file    = 1
+        * line    = 1
+        * column  = 0
+        * is_stmt = default_is_stmt as given in the debug_line_header
+        * basic block = 0
+        * end sequence = 0
+        */
+
+       /* start state of the state machine we take care of */
+       unsigned long last_vma = code_addr;
+       char const  *cur_filename = NULL;
+       unsigned long cur_file_idx = 0;
+       int last_line = 1;
+
+       emit_lne_set_address(be, (void *)code_addr);
+
+       for (i = 0; i < nr_entry; i++, ent = debug_entry_next(ent)) {
+               int need_copy = 0;
+               ubyte special_opcode;
+
+               /*
+                * check if filename changed, if so add it
+                */
+               if (!cur_filename || strcmp(cur_filename, ent->name)) {
+                       emit_lne_define_filename(be, ent->name);
+                       cur_filename = ent->name;
+                       emit_set_file(be, ++cur_file_idx);
+                       need_copy = 1;
+               }
+
+               special_opcode = get_special_opcode(ent, last_line, last_vma);
+               if (special_opcode != 0) {
+                       last_line = ent->lineno;
+                       last_vma  = ent->addr;
+                       emit_opcode(be, special_opcode);
+               } else {
+                       /*
+                        * lines differ, emit line delta
+                        */
+                       if (last_line != ent->lineno) {
+                               emit_advance_lineno(be, ent->lineno - last_line);
+                               last_line = ent->lineno;
+                               need_copy = 1;
+                       }
+                       /*
+                        * addresses differ, emit address delta
+                        */
+                       if (last_vma != ent->addr) {
+                               emit_advance_pc(be, ent->addr - last_vma);
+                               last_vma = ent->addr;
+                               need_copy = 1;
+                       }
+                       /*
+                        * add new row to matrix
+                        */
+                       if (need_copy)
+                               emit_opcode(be, DW_LNS_copy);
+               }
+       }
+}
+
+static void add_debug_line(struct buffer_ext *be,
+       struct debug_entry *ent, size_t nr_entry,
+       unsigned long code_addr)
+{
+       struct debug_line_header * dbg_header;
+       size_t old_size;
+
+       old_size = buffer_ext_size(be);
+
+       buffer_ext_add(be, (void *)&default_debug_line_header,
+                sizeof(default_debug_line_header));
+
+       buffer_ext_add(be, &standard_opcode_length,  sizeof(standard_opcode_length));
+
+       // empty directory entry
+       buffer_ext_add(be, (void *)"", 1);
+
+       // empty filename directory
+       buffer_ext_add(be, (void *)"", 1);
+
+       dbg_header = buffer_ext_addr(be) + old_size;
+       dbg_header->prolog_length = (buffer_ext_size(be) - old_size) -
+               offsetof(struct debug_line_header, minimum_instruction_length);
+
+       emit_lineno_info(be, ent, nr_entry, code_addr);
+
+       emit_lne_end_of_sequence(be);
+
+       dbg_header = buffer_ext_addr(be) + old_size;
+       dbg_header->total_length = (buffer_ext_size(be) - old_size) -
+               offsetof(struct debug_line_header, version);
+}
+
+static void
+add_debug_abbrev(struct buffer_ext *be)
+{
+        emit_unsigned_LEB128(be, 1);
+        emit_unsigned_LEB128(be, DW_TAG_compile_unit);
+        emit_unsigned_LEB128(be, DW_CHILDREN_yes);
+        emit_unsigned_LEB128(be, DW_AT_stmt_list);
+        emit_unsigned_LEB128(be, DW_FORM_data4);
+        emit_unsigned_LEB128(be, 0);
+        emit_unsigned_LEB128(be, 0);
+        emit_unsigned_LEB128(be, 0);
+}
+
+static void
+add_compilation_unit(struct buffer_ext *be,
+                    size_t offset_debug_line)
+{
+       struct compilation_unit_header *comp_unit_header;
+       size_t old_size = buffer_ext_size(be);
+
+       buffer_ext_add(be, &default_comp_unit_header,
+                      sizeof(default_comp_unit_header));
+
+       emit_unsigned_LEB128(be, 1);
+       emit_uword(be, offset_debug_line);
+
+       comp_unit_header = buffer_ext_addr(be) + old_size;
+       comp_unit_header->total_length = (buffer_ext_size(be) - old_size) -
+               offsetof(struct compilation_unit_header, version);
+}
+
+static int
+jit_process_debug_info(uint64_t code_addr,
+                      void *debug, int nr_debug_entries,
+                      struct buffer_ext *dl,
+                      struct buffer_ext *da,
+                      struct buffer_ext *di)
+{
+       struct debug_entry *ent = debug;
+       int i;
+
+       for (i = 0; i < nr_debug_entries; i++) {
+               ent->addr = ent->addr - code_addr;
+               ent = debug_entry_next(ent);
+       }
+       add_compilation_unit(di, buffer_ext_size(dl));
+       add_debug_line(dl, debug, nr_debug_entries, 0);
+       add_debug_abbrev(da);
+       if (0) buffer_ext_dump(da, "abbrev");
+
+       return 0;
+}
+
+int
+jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries)
+{
+       Elf_Data *d;
+       Elf_Scn *scn;
+       Elf_Shdr *shdr;
+       struct buffer_ext dl, di, da;
+       int ret;
+
+       buffer_ext_init(&dl);
+       buffer_ext_init(&di);
+       buffer_ext_init(&da);
+
+       ret = jit_process_debug_info(code_addr, debug, nr_debug_entries, &dl, &da, &di);
+       if (ret)
+               return -1;
+       /*
+        * setup .debug_line section
+        */
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               return -1;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               return -1;
+       }
+
+       d->d_align = 1;
+       d->d_off = 0LL;
+       d->d_buf = buffer_ext_addr(&dl);
+       d->d_type = ELF_T_BYTE;
+       d->d_size = buffer_ext_size(&dl);
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               return -1;
+       }
+
+       shdr->sh_name = 52; /* .debug_line */
+       shdr->sh_type = SHT_PROGBITS;
+       shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+       shdr->sh_flags = 0;
+       shdr->sh_entsize = 0;
+
+       /*
+        * setup .debug_info section
+        */
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               return -1;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               return -1;
+       }
+
+       d->d_align = 1;
+       d->d_off = 0LL;
+       d->d_buf = buffer_ext_addr(&di);
+       d->d_type = ELF_T_BYTE;
+       d->d_size = buffer_ext_size(&di);
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               return -1;
+       }
+
+       shdr->sh_name = 64; /* .debug_info */
+       shdr->sh_type = SHT_PROGBITS;
+       shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+       shdr->sh_flags = 0;
+       shdr->sh_entsize = 0;
+
+       /*
+        * setup .debug_abbrev section
+        */
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               return -1;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               return -1;
+       }
+
+       d->d_align = 1;
+       d->d_off = 0LL;
+       d->d_buf = buffer_ext_addr(&da);
+       d->d_type = ELF_T_BYTE;
+       d->d_size = buffer_ext_size(&da);
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               return -1;
+       }
+
+       shdr->sh_name = 76; /* .debug_info */
+       shdr->sh_type = SHT_PROGBITS;
+       shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+       shdr->sh_flags = 0;
+       shdr->sh_entsize = 0;
+
+       /*
+        * now we update the ELF image with all the sections
+        */
+       if (elf_update(e, ELF_C_WRITE) < 0) {
+               warnx("elf_update debug failed");
+               return -1;
+       }
+       return 0;
+}
index f50b7235ecb6558d167a475bf4199e8b05f52143..73e38e472ecd7771695b7ac2d91829a35529f419 100644 (file)
@@ -23,6 +23,8 @@
 #include "strbuf.h"
 #include "build-id.h"
 #include "data.h"
+#include <api/fs/fs.h>
+#include "asm/bug.h"
 
 /*
  * magic2 = "PERFILE2"
@@ -868,6 +870,199 @@ static int write_auxtrace(int fd, struct perf_header *h,
        return err;
 }
 
+static int cpu_cache_level__sort(const void *a, const void *b)
+{
+       struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
+       struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b;
+
+       return cache_a->level - cache_b->level;
+}
+
+static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b)
+{
+       if (a->level != b->level)
+               return false;
+
+       if (a->line_size != b->line_size)
+               return false;
+
+       if (a->sets != b->sets)
+               return false;
+
+       if (a->ways != b->ways)
+               return false;
+
+       if (strcmp(a->type, b->type))
+               return false;
+
+       if (strcmp(a->size, b->size))
+               return false;
+
+       if (strcmp(a->map, b->map))
+               return false;
+
+       return true;
+}
+
+static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level)
+{
+       char path[PATH_MAX], file[PATH_MAX];
+       struct stat st;
+       size_t len;
+
+       scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level);
+       scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path);
+
+       if (stat(file, &st))
+               return 1;
+
+       scnprintf(file, PATH_MAX, "%s/level", path);
+       if (sysfs__read_int(file, (int *) &cache->level))
+               return -1;
+
+       scnprintf(file, PATH_MAX, "%s/coherency_line_size", path);
+       if (sysfs__read_int(file, (int *) &cache->line_size))
+               return -1;
+
+       scnprintf(file, PATH_MAX, "%s/number_of_sets", path);
+       if (sysfs__read_int(file, (int *) &cache->sets))
+               return -1;
+
+       scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path);
+       if (sysfs__read_int(file, (int *) &cache->ways))
+               return -1;
+
+       scnprintf(file, PATH_MAX, "%s/type", path);
+       if (sysfs__read_str(file, &cache->type, &len))
+               return -1;
+
+       cache->type[len] = 0;
+       cache->type = rtrim(cache->type);
+
+       scnprintf(file, PATH_MAX, "%s/size", path);
+       if (sysfs__read_str(file, &cache->size, &len)) {
+               free(cache->type);
+               return -1;
+       }
+
+       cache->size[len] = 0;
+       cache->size = rtrim(cache->size);
+
+       scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
+       if (sysfs__read_str(file, &cache->map, &len)) {
+               free(cache->map);
+               free(cache->type);
+               return -1;
+       }
+
+       cache->map[len] = 0;
+       cache->map = rtrim(cache->map);
+       return 0;
+}
+
+static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c)
+{
+       fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map);
+}
+
+static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
+{
+       u32 i, cnt = 0;
+       long ncpus;
+       u32 nr, cpu;
+       u16 level;
+
+       ncpus = sysconf(_SC_NPROCESSORS_CONF);
+       if (ncpus < 0)
+               return -1;
+
+       nr = (u32)(ncpus & UINT_MAX);
+
+       for (cpu = 0; cpu < nr; cpu++) {
+               for (level = 0; level < 10; level++) {
+                       struct cpu_cache_level c;
+                       int err;
+
+                       err = cpu_cache_level__read(&c, cpu, level);
+                       if (err < 0)
+                               return err;
+
+                       if (err == 1)
+                               break;
+
+                       for (i = 0; i < cnt; i++) {
+                               if (cpu_cache_level__cmp(&c, &caches[i]))
+                                       break;
+                       }
+
+                       if (i == cnt)
+                               caches[cnt++] = c;
+                       else
+                               cpu_cache_level__free(&c);
+
+                       if (WARN_ONCE(cnt == size, "way too many cpu caches.."))
+                               goto out;
+               }
+       }
+ out:
+       *cntp = cnt;
+       return 0;
+}
+
+#define MAX_CACHES 2000
+
+static int write_cache(int fd, struct perf_header *h __maybe_unused,
+                         struct perf_evlist *evlist __maybe_unused)
+{
+       struct cpu_cache_level caches[MAX_CACHES];
+       u32 cnt = 0, i, version = 1;
+       int ret;
+
+       ret = build_caches(caches, MAX_CACHES, &cnt);
+       if (ret)
+               goto out;
+
+       qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort);
+
+       ret = do_write(fd, &version, sizeof(u32));
+       if (ret < 0)
+               goto out;
+
+       ret = do_write(fd, &cnt, sizeof(u32));
+       if (ret < 0)
+               goto out;
+
+       for (i = 0; i < cnt; i++) {
+               struct cpu_cache_level *c = &caches[i];
+
+               #define _W(v)                                   \
+                       ret = do_write(fd, &c->v, sizeof(u32)); \
+                       if (ret < 0)                            \
+                               goto out;
+
+               _W(level)
+               _W(line_size)
+               _W(sets)
+               _W(ways)
+               #undef _W
+
+               #define _W(v)                                           \
+                       ret = do_write_string(fd, (const char *) c->v); \
+                       if (ret < 0)                                    \
+                               goto out;
+
+               _W(type)
+               _W(size)
+               _W(map)
+               #undef _W
+       }
+
+out:
+       for (i = 0; i < cnt; i++)
+               cpu_cache_level__free(&caches[i]);
+       return ret;
+}
+
 static int write_stat(int fd __maybe_unused,
                      struct perf_header *h __maybe_unused,
                      struct perf_evlist *evlist __maybe_unused)
@@ -1172,6 +1367,18 @@ static void print_stat(struct perf_header *ph __maybe_unused,
        fprintf(fp, "# contains stat data\n");
 }
 
+static void print_cache(struct perf_header *ph __maybe_unused,
+                       int fd __maybe_unused, FILE *fp __maybe_unused)
+{
+       int i;
+
+       fprintf(fp, "# CPU cache info:\n");
+       for (i = 0; i < ph->env.caches_cnt; i++) {
+               fprintf(fp, "#  ");
+               cpu_cache_level__fprintf(fp, &ph->env.caches[i]);
+       }
+}
+
 static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
                               FILE *fp)
 {
@@ -1920,6 +2127,68 @@ static int process_auxtrace(struct perf_file_section *section,
        return err;
 }
 
+static int process_cache(struct perf_file_section *section __maybe_unused,
+                        struct perf_header *ph __maybe_unused, int fd __maybe_unused,
+                        void *data __maybe_unused)
+{
+       struct cpu_cache_level *caches;
+       u32 cnt, i, version;
+
+       if (readn(fd, &version, sizeof(version)) != sizeof(version))
+               return -1;
+
+       if (ph->needs_swap)
+               version = bswap_32(version);
+
+       if (version != 1)
+               return -1;
+
+       if (readn(fd, &cnt, sizeof(cnt)) != sizeof(cnt))
+               return -1;
+
+       if (ph->needs_swap)
+               cnt = bswap_32(cnt);
+
+       caches = zalloc(sizeof(*caches) * cnt);
+       if (!caches)
+               return -1;
+
+       for (i = 0; i < cnt; i++) {
+               struct cpu_cache_level c;
+
+               #define _R(v)                                           \
+                       if (readn(fd, &c.v, sizeof(u32)) != sizeof(u32))\
+                               goto out_free_caches;                   \
+                       if (ph->needs_swap)                             \
+                               c.v = bswap_32(c.v);                    \
+
+               _R(level)
+               _R(line_size)
+               _R(sets)
+               _R(ways)
+               #undef _R
+
+               #define _R(v)                           \
+                       c.v = do_read_string(fd, ph);   \
+                       if (!c.v)                       \
+                               goto out_free_caches;
+
+               _R(type)
+               _R(size)
+               _R(map)
+               #undef _R
+
+               caches[i] = c;
+       }
+
+       ph->env.caches = caches;
+       ph->env.caches_cnt = cnt;
+       return 0;
+out_free_caches:
+       free(caches);
+       return -1;
+}
+
 struct feature_ops {
        int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
        void (*print)(struct perf_header *h, int fd, FILE *fp);
@@ -1962,6 +2231,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
        FEAT_OPP(HEADER_GROUP_DESC,     group_desc),
        FEAT_OPP(HEADER_AUXTRACE,       auxtrace),
        FEAT_OPA(HEADER_STAT,           stat),
+       FEAT_OPF(HEADER_CACHE,          cache),
 };
 
 struct header_print_data {
index cff9892452ee393764726a12895ad95d36f766fe..3d87ca823c0ae55591e753f6f928802e831c0e3c 100644 (file)
@@ -32,6 +32,7 @@ enum {
        HEADER_GROUP_DESC,
        HEADER_AUXTRACE,
        HEADER_STAT,
+       HEADER_CACHE,
        HEADER_LAST_FEATURE,
        HEADER_FEAT_BITS        = 256,
 };
index dc1e41c9b054b186c0d9349d97e9d0f467099ac0..43a98a4dc1e1e90c7079fba26929022ec0959a31 100644 (file)
@@ -6,7 +6,8 @@
 static int autocorrect;
 static struct cmdnames aliases;
 
-static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
+static int perf_unknown_cmd_config(const char *var, const char *value,
+                                  void *cb __maybe_unused)
 {
        if (!strcmp(var, "help.autocorrect"))
                autocorrect = perf_config_int(var,value);
@@ -14,7 +15,7 @@ static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
        if (!prefixcmp(var, "alias."))
                add_cmdname(&aliases, var + 6, strlen(var + 6));
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 static int levenshtein_compare(const void *p1, const void *p2)
index 68a7612019dc3c3be315604693b68170183044d6..290b3cbf68772de883bff4fee5c2294234dbefd3 100644 (file)
@@ -179,6 +179,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
        if (h->transaction)
                hists__new_col_len(hists, HISTC_TRANSACTION,
                                   hist_entry__transaction_len());
+
+       if (h->trace_output)
+               hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output));
 }
 
 void hists__output_recalc_col_len(struct hists *hists, int max_rows)
@@ -245,6 +248,8 @@ static void he_stat__decay(struct he_stat *he_stat)
        /* XXX need decay for weight too? */
 }
 
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
+
 static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
 {
        u64 prev_period = he->stat.period;
@@ -260,21 +265,45 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
 
        diff = prev_period - he->stat.period;
 
-       hists->stats.total_period -= diff;
-       if (!he->filtered)
-               hists->stats.total_non_filtered_period -= diff;
+       if (!he->depth) {
+               hists->stats.total_period -= diff;
+               if (!he->filtered)
+                       hists->stats.total_non_filtered_period -= diff;
+       }
+
+       if (!he->leaf) {
+               struct hist_entry *child;
+               struct rb_node *node = rb_first(&he->hroot_out);
+               while (node) {
+                       child = rb_entry(node, struct hist_entry, rb_node);
+                       node = rb_next(node);
+
+                       if (hists__decay_entry(hists, child))
+                               hists__delete_entry(hists, child);
+               }
+       }
 
        return he->stat.period == 0;
 }
 
 static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
 {
-       rb_erase(&he->rb_node, &hists->entries);
+       struct rb_root *root_in;
+       struct rb_root *root_out;
 
-       if (sort__need_collapse)
-               rb_erase(&he->rb_node_in, &hists->entries_collapsed);
-       else
-               rb_erase(&he->rb_node_in, hists->entries_in);
+       if (he->parent_he) {
+               root_in  = &he->parent_he->hroot_in;
+               root_out = &he->parent_he->hroot_out;
+       } else {
+               if (sort__need_collapse)
+                       root_in = &hists->entries_collapsed;
+               else
+                       root_in = hists->entries_in;
+               root_out = &hists->entries;
+       }
+
+       rb_erase(&he->rb_node_in, root_in);
+       rb_erase(&he->rb_node, root_out);
 
        --hists->nr_entries;
        if (!he->filtered)
@@ -393,6 +422,9 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
                }
                INIT_LIST_HEAD(&he->pairs.node);
                thread__get(he->thread);
+
+               if (!symbol_conf.report_hierarchy)
+                       he->leaf = true;
        }
 
        return he;
@@ -405,6 +437,16 @@ static u8 symbol__parent_filter(const struct symbol *parent)
        return 0;
 }
 
+static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
+{
+       if (!symbol_conf.use_callchain)
+               return;
+
+       he->hists->callchain_period += period;
+       if (!he->filtered)
+               he->hists->callchain_non_filtered_period += period;
+}
+
 static struct hist_entry *hists__findnew_entry(struct hists *hists,
                                               struct hist_entry *entry,
                                               struct addr_location *al,
@@ -432,8 +474,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
                cmp = hist_entry__cmp(he, entry);
 
                if (!cmp) {
-                       if (sample_self)
+                       if (sample_self) {
                                he_stat__add_period(&he->stat, period, weight);
+                               hist_entry__add_callchain_period(he, period);
+                       }
                        if (symbol_conf.cumulate_callchain)
                                he_stat__add_period(he->stat_acc, period, weight);
 
@@ -466,6 +510,8 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
        if (!he)
                return NULL;
 
+       if (sample_self)
+               hist_entry__add_callchain_period(he, period);
        hists->nr_entries++;
 
        rb_link_node(&he->rb_node_in, parent, p);
@@ -951,10 +997,15 @@ out:
 int64_t
 hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 {
+       struct hists *hists = left->hists;
        struct perf_hpp_fmt *fmt;
        int64_t cmp = 0;
 
-       perf_hpp__for_each_sort_list(fmt) {
+       hists__for_each_sort_list(hists, fmt) {
+               if (perf_hpp__is_dynamic_entry(fmt) &&
+                   !perf_hpp__defined_dynamic_entry(fmt, hists))
+                       continue;
+
                cmp = fmt->cmp(fmt, left, right);
                if (cmp)
                        break;
@@ -966,10 +1017,15 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 int64_t
 hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 {
+       struct hists *hists = left->hists;
        struct perf_hpp_fmt *fmt;
        int64_t cmp = 0;
 
-       perf_hpp__for_each_sort_list(fmt) {
+       hists__for_each_sort_list(hists, fmt) {
+               if (perf_hpp__is_dynamic_entry(fmt) &&
+                   !perf_hpp__defined_dynamic_entry(fmt, hists))
+                       continue;
+
                cmp = fmt->collapse(fmt, left, right);
                if (cmp)
                        break;
@@ -1005,18 +1061,251 @@ void hist_entry__delete(struct hist_entry *he)
        free(he);
 }
 
+/*
+ * If this is not the last column, then we need to pad it according to the
+ * pre-calculated max lenght for this column, otherwise don't bother adding
+ * spaces because that would break viewing this with, for instance, 'less',
+ * that would show tons of trailing spaces when a long C++ demangled method
+ * names is sampled.
+*/
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+                                  struct perf_hpp_fmt *fmt, int printed)
+{
+       if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) {
+               const int width = fmt->width(fmt, hpp, hists_to_evsel(he->hists));
+               if (printed < width) {
+                       advance_hpp(hpp, printed);
+                       printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " ");
+               }
+       }
+
+       return printed;
+}
+
 /*
  * collapse the histogram
  */
 
-bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
-                                 struct rb_root *root, struct hist_entry *he)
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he);
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he,
+                                      enum hist_filter type);
+
+typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt);
+
+static bool check_thread_entry(struct perf_hpp_fmt *fmt)
+{
+       return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt);
+}
+
+static void hist_entry__check_and_remove_filter(struct hist_entry *he,
+                                               enum hist_filter type,
+                                               fmt_chk_fn check)
+{
+       struct perf_hpp_fmt *fmt;
+       bool type_match = false;
+       struct hist_entry *parent = he->parent_he;
+
+       switch (type) {
+       case HIST_FILTER__THREAD:
+               if (symbol_conf.comm_list == NULL &&
+                   symbol_conf.pid_list == NULL &&
+                   symbol_conf.tid_list == NULL)
+                       return;
+               break;
+       case HIST_FILTER__DSO:
+               if (symbol_conf.dso_list == NULL)
+                       return;
+               break;
+       case HIST_FILTER__SYMBOL:
+               if (symbol_conf.sym_list == NULL)
+                       return;
+               break;
+       case HIST_FILTER__PARENT:
+       case HIST_FILTER__GUEST:
+       case HIST_FILTER__HOST:
+       case HIST_FILTER__SOCKET:
+       default:
+               return;
+       }
+
+       /* if it's filtered by own fmt, it has to have filter bits */
+       perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+               if (check(fmt)) {
+                       type_match = true;
+                       break;
+               }
+       }
+
+       if (type_match) {
+               /*
+                * If the filter is for current level entry, propagate
+                * filter marker to parents.  The marker bit was
+                * already set by default so it only needs to clear
+                * non-filtered entries.
+                */
+               if (!(he->filtered & (1 << type))) {
+                       while (parent) {
+                               parent->filtered &= ~(1 << type);
+                               parent = parent->parent_he;
+                       }
+               }
+       } else {
+               /*
+                * If current entry doesn't have matching formats, set
+                * filter marker for upper level entries.  it will be
+                * cleared if its lower level entries is not filtered.
+                *
+                * For lower-level entries, it inherits parent's
+                * filter bit so that lower level entries of a
+                * non-filtered entry won't set the filter marker.
+                */
+               if (parent == NULL)
+                       he->filtered |= (1 << type);
+               else
+                       he->filtered |= (parent->filtered & (1 << type));
+       }
+}
+
+static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
+{
+       hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD,
+                                           check_thread_entry);
+
+       hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO,
+                                           perf_hpp__is_dso_entry);
+
+       hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
+                                           perf_hpp__is_sym_entry);
+
+       hists__apply_filters(he->hists, he);
+}
+
+static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
+                                                struct rb_root *root,
+                                                struct hist_entry *he,
+                                                struct hist_entry *parent_he,
+                                                struct perf_hpp_list *hpp_list)
+{
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct hist_entry *iter, *new;
+       struct perf_hpp_fmt *fmt;
+       int64_t cmp;
+
+       while (*p != NULL) {
+               parent = *p;
+               iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+               cmp = 0;
+               perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+                       cmp = fmt->collapse(fmt, iter, he);
+                       if (cmp)
+                               break;
+               }
+
+               if (!cmp) {
+                       he_stat__add_stat(&iter->stat, &he->stat);
+                       return iter;
+               }
+
+               if (cmp < 0)
+                       p = &parent->rb_left;
+               else
+                       p = &parent->rb_right;
+       }
+
+       new = hist_entry__new(he, true);
+       if (new == NULL)
+               return NULL;
+
+       hists->nr_entries++;
+
+       /* save related format list for output */
+       new->hpp_list = hpp_list;
+       new->parent_he = parent_he;
+
+       hist_entry__apply_hierarchy_filters(new);
+
+       /* some fields are now passed to 'new' */
+       perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+               if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+                       he->trace_output = NULL;
+               else
+                       new->trace_output = NULL;
+
+               if (perf_hpp__is_srcline_entry(fmt))
+                       he->srcline = NULL;
+               else
+                       new->srcline = NULL;
+
+               if (perf_hpp__is_srcfile_entry(fmt))
+                       he->srcfile = NULL;
+               else
+                       new->srcfile = NULL;
+       }
+
+       rb_link_node(&new->rb_node_in, parent, p);
+       rb_insert_color(&new->rb_node_in, root);
+       return new;
+}
+
+static int hists__hierarchy_insert_entry(struct hists *hists,
+                                        struct rb_root *root,
+                                        struct hist_entry *he)
+{
+       struct perf_hpp_list_node *node;
+       struct hist_entry *new_he = NULL;
+       struct hist_entry *parent = NULL;
+       int depth = 0;
+       int ret = 0;
+
+       list_for_each_entry(node, &hists->hpp_formats, list) {
+               /* skip period (overhead) and elided columns */
+               if (node->level == 0 || node->skip)
+                       continue;
+
+               /* insert copy of 'he' for each fmt into the hierarchy */
+               new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp);
+               if (new_he == NULL) {
+                       ret = -1;
+                       break;
+               }
+
+               root = &new_he->hroot_in;
+               new_he->depth = depth++;
+               parent = new_he;
+       }
+
+       if (new_he) {
+               new_he->leaf = true;
+
+               if (symbol_conf.use_callchain) {
+                       callchain_cursor_reset(&callchain_cursor);
+                       if (callchain_merge(&callchain_cursor,
+                                           new_he->callchain,
+                                           he->callchain) < 0)
+                               ret = -1;
+               }
+       }
+
+       /* 'he' is no longer used */
+       hist_entry__delete(he);
+
+       /* return 0 (or -1) since it already applied filters */
+       return ret;
+}
+
+int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root,
+                                struct hist_entry *he)
 {
        struct rb_node **p = &root->rb_node;
        struct rb_node *parent = NULL;
        struct hist_entry *iter;
        int64_t cmp;
 
+       if (symbol_conf.report_hierarchy)
+               return hists__hierarchy_insert_entry(hists, root, he);
+
        while (*p != NULL) {
                parent = *p;
                iter = rb_entry(parent, struct hist_entry, rb_node_in);
@@ -1024,18 +1313,21 @@ bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
                cmp = hist_entry__collapse(iter, he);
 
                if (!cmp) {
+                       int ret = 0;
+
                        he_stat__add_stat(&iter->stat, &he->stat);
                        if (symbol_conf.cumulate_callchain)
                                he_stat__add_stat(iter->stat_acc, he->stat_acc);
 
                        if (symbol_conf.use_callchain) {
                                callchain_cursor_reset(&callchain_cursor);
-                               callchain_merge(&callchain_cursor,
-                                               iter->callchain,
-                                               he->callchain);
+                               if (callchain_merge(&callchain_cursor,
+                                                   iter->callchain,
+                                                   he->callchain) < 0)
+                                       ret = -1;
                        }
                        hist_entry__delete(he);
-                       return false;
+                       return ret;
                }
 
                if (cmp < 0)
@@ -1047,7 +1339,7 @@ bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
 
        rb_link_node(&he->rb_node_in, parent, p);
        rb_insert_color(&he->rb_node_in, root);
-       return true;
+       return 1;
 }
 
 struct rb_root *hists__get_rotate_entries_in(struct hists *hists)
@@ -1073,14 +1365,15 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
        hists__filter_entry_by_socket(hists, he);
 }
 
-void hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
 {
        struct rb_root *root;
        struct rb_node *next;
        struct hist_entry *n;
+       int ret;
 
        if (!sort__need_collapse)
-               return;
+               return 0;
 
        hists->nr_entries = 0;
 
@@ -1095,7 +1388,11 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
                next = rb_next(&n->rb_node_in);
 
                rb_erase(&n->rb_node_in, root);
-               if (hists__collapse_insert_entry(hists, &hists->entries_collapsed, n)) {
+               ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n);
+               if (ret < 0)
+                       return -1;
+
+               if (ret) {
                        /*
                         * If it wasn't combined with one of the entries already
                         * collapsed, we need to apply the filters that may have
@@ -1106,14 +1403,16 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
                if (prog)
                        ui_progress__update(prog, 1);
        }
+       return 0;
 }
 
 static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
 {
+       struct hists *hists = a->hists;
        struct perf_hpp_fmt *fmt;
        int64_t cmp = 0;
 
-       perf_hpp__for_each_sort_list(fmt) {
+       hists__for_each_sort_list(hists, fmt) {
                if (perf_hpp__should_skip(fmt, a->hists))
                        continue;
 
@@ -1154,6 +1453,113 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)
        hists->stats.total_period += h->stat.period;
 }
 
+static void hierarchy_recalc_total_periods(struct hists *hists)
+{
+       struct rb_node *node;
+       struct hist_entry *he;
+
+       node = rb_first(&hists->entries);
+
+       hists->stats.total_period = 0;
+       hists->stats.total_non_filtered_period = 0;
+
+       /*
+        * recalculate total period using top-level entries only
+        * since lower level entries only see non-filtered entries
+        * but upper level entries have sum of both entries.
+        */
+       while (node) {
+               he = rb_entry(node, struct hist_entry, rb_node);
+               node = rb_next(node);
+
+               hists->stats.total_period += he->stat.period;
+               if (!he->filtered)
+                       hists->stats.total_non_filtered_period += he->stat.period;
+       }
+}
+
+static void hierarchy_insert_output_entry(struct rb_root *root,
+                                         struct hist_entry *he)
+{
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct hist_entry *iter;
+       struct perf_hpp_fmt *fmt;
+
+       while (*p != NULL) {
+               parent = *p;
+               iter = rb_entry(parent, struct hist_entry, rb_node);
+
+               if (hist_entry__sort(he, iter) > 0)
+                       p = &parent->rb_left;
+               else
+                       p = &parent->rb_right;
+       }
+
+       rb_link_node(&he->rb_node, parent, p);
+       rb_insert_color(&he->rb_node, root);
+
+       /* update column width of dynamic entry */
+       perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+               if (perf_hpp__is_dynamic_entry(fmt))
+                       fmt->sort(fmt, he, NULL);
+       }
+}
+
+static void hists__hierarchy_output_resort(struct hists *hists,
+                                          struct ui_progress *prog,
+                                          struct rb_root *root_in,
+                                          struct rb_root *root_out,
+                                          u64 min_callchain_hits,
+                                          bool use_callchain)
+{
+       struct rb_node *node;
+       struct hist_entry *he;
+
+       *root_out = RB_ROOT;
+       node = rb_first(root_in);
+
+       while (node) {
+               he = rb_entry(node, struct hist_entry, rb_node_in);
+               node = rb_next(node);
+
+               hierarchy_insert_output_entry(root_out, he);
+
+               if (prog)
+                       ui_progress__update(prog, 1);
+
+               if (!he->leaf) {
+                       hists__hierarchy_output_resort(hists, prog,
+                                                      &he->hroot_in,
+                                                      &he->hroot_out,
+                                                      min_callchain_hits,
+                                                      use_callchain);
+                       hists->nr_entries++;
+                       if (!he->filtered) {
+                               hists->nr_non_filtered_entries++;
+                               hists__calc_col_len(hists, he);
+                       }
+
+                       continue;
+               }
+
+               if (!use_callchain)
+                       continue;
+
+               if (callchain_param.mode == CHAIN_GRAPH_REL) {
+                       u64 total = he->stat.period;
+
+                       if (symbol_conf.cumulate_callchain)
+                               total = he->stat_acc->period;
+
+                       min_callchain_hits = total * (callchain_param.min_percent / 100);
+               }
+
+               callchain_param.sort(&he->sorted_chain, he->callchain,
+                                    min_callchain_hits, &callchain_param);
+       }
+}
+
 static void __hists__insert_output_entry(struct rb_root *entries,
                                         struct hist_entry *he,
                                         u64 min_callchain_hits,
@@ -1162,10 +1568,20 @@ static void __hists__insert_output_entry(struct rb_root *entries,
        struct rb_node **p = &entries->rb_node;
        struct rb_node *parent = NULL;
        struct hist_entry *iter;
+       struct perf_hpp_fmt *fmt;
+
+       if (use_callchain) {
+               if (callchain_param.mode == CHAIN_GRAPH_REL) {
+                       u64 total = he->stat.period;
+
+                       if (symbol_conf.cumulate_callchain)
+                               total = he->stat_acc->period;
 
-       if (use_callchain)
+                       min_callchain_hits = total * (callchain_param.min_percent / 100);
+               }
                callchain_param.sort(&he->sorted_chain, he->callchain,
                                      min_callchain_hits, &callchain_param);
+       }
 
        while (*p != NULL) {
                parent = *p;
@@ -1179,23 +1595,41 @@ static void __hists__insert_output_entry(struct rb_root *entries,
 
        rb_link_node(&he->rb_node, parent, p);
        rb_insert_color(&he->rb_node, entries);
+
+       perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) {
+               if (perf_hpp__is_dynamic_entry(fmt) &&
+                   perf_hpp__defined_dynamic_entry(fmt, he->hists))
+                       fmt->sort(fmt, he, NULL);  /* update column width */
+       }
 }
 
-void hists__output_resort(struct hists *hists, struct ui_progress *prog)
+static void output_resort(struct hists *hists, struct ui_progress *prog,
+                         bool use_callchain)
 {
        struct rb_root *root;
        struct rb_node *next;
        struct hist_entry *n;
+       u64 callchain_total;
        u64 min_callchain_hits;
-       struct perf_evsel *evsel = hists_to_evsel(hists);
-       bool use_callchain;
 
-       if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
-               use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
-       else
-               use_callchain = symbol_conf.use_callchain;
+       callchain_total = hists->callchain_period;
+       if (symbol_conf.filter_relative)
+               callchain_total = hists->callchain_non_filtered_period;
 
-       min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);
+       min_callchain_hits = callchain_total * (callchain_param.min_percent / 100);
+
+       hists__reset_stats(hists);
+       hists__reset_col_len(hists);
+
+       if (symbol_conf.report_hierarchy) {
+               hists__hierarchy_output_resort(hists, prog,
+                                              &hists->entries_collapsed,
+                                              &hists->entries,
+                                              min_callchain_hits,
+                                              use_callchain);
+               hierarchy_recalc_total_periods(hists);
+               return;
+       }
 
        if (sort__need_collapse)
                root = &hists->entries_collapsed;
@@ -1205,9 +1639,6 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
        next = rb_first(root);
        hists->entries = RB_ROOT;
 
-       hists__reset_stats(hists);
-       hists__reset_col_len(hists);
-
        while (next) {
                n = rb_entry(next, struct hist_entry, rb_node_in);
                next = rb_next(&n->rb_node_in);
@@ -1223,15 +1654,136 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
        }
 }
 
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog)
+{
+       bool use_callchain;
+
+       if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
+               use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
+       else
+               use_callchain = symbol_conf.use_callchain;
+
+       output_resort(evsel__hists(evsel), prog, use_callchain);
+}
+
+void hists__output_resort(struct hists *hists, struct ui_progress *prog)
+{
+       output_resort(hists, prog, symbol_conf.use_callchain);
+}
+
+static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
+{
+       if (he->leaf || hmd == HMD_FORCE_SIBLING)
+               return false;
+
+       if (he->unfolded || hmd == HMD_FORCE_CHILD)
+               return true;
+
+       return false;
+}
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node)
+{
+       struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+       while (can_goto_child(he, HMD_NORMAL)) {
+               node = rb_last(&he->hroot_out);
+               he = rb_entry(node, struct hist_entry, rb_node);
+       }
+       return node;
+}
+
+struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_dir hmd)
+{
+       struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+       if (can_goto_child(he, hmd))
+               node = rb_first(&he->hroot_out);
+       else
+               node = rb_next(node);
+
+       while (node == NULL) {
+               he = he->parent_he;
+               if (he == NULL)
+                       break;
+
+               node = rb_next(&he->rb_node);
+       }
+       return node;
+}
+
+struct rb_node *rb_hierarchy_prev(struct rb_node *node)
+{
+       struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+       node = rb_prev(node);
+       if (node)
+               return rb_hierarchy_last(node);
+
+       he = he->parent_he;
+       if (he == NULL)
+               return NULL;
+
+       return &he->rb_node;
+}
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit)
+{
+       struct rb_node *node;
+       struct hist_entry *child;
+       float percent;
+
+       if (he->leaf)
+               return false;
+
+       node = rb_first(&he->hroot_out);
+       child = rb_entry(node, struct hist_entry, rb_node);
+
+       while (node && child->filtered) {
+               node = rb_next(node);
+               child = rb_entry(node, struct hist_entry, rb_node);
+       }
+
+       if (node)
+               percent = hist_entry__get_percent_limit(child);
+       else
+               percent = 0;
+
+       return node && percent >= limit;
+}
+
 static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h,
                                       enum hist_filter filter)
 {
        h->filtered &= ~(1 << filter);
+
+       if (symbol_conf.report_hierarchy) {
+               struct hist_entry *parent = h->parent_he;
+
+               while (parent) {
+                       he_stat__add_stat(&parent->stat, &h->stat);
+
+                       parent->filtered &= ~(1 << filter);
+
+                       if (parent->filtered)
+                               goto next;
+
+                       /* force fold unfiltered entry for simplicity */
+                       parent->unfolded = false;
+                       parent->has_no_entry = false;
+                       parent->row_offset = 0;
+                       parent->nr_rows = 0;
+next:
+                       parent = parent->parent_he;
+               }
+       }
+
        if (h->filtered)
                return;
 
        /* force fold unfiltered entry for simplicity */
        h->unfolded = false;
+       h->has_no_entry = false;
        h->row_offset = 0;
        h->nr_rows = 0;
 
@@ -1254,28 +1806,6 @@ static bool hists__filter_entry_by_dso(struct hists *hists,
        return false;
 }
 
-void hists__filter_by_dso(struct hists *hists)
-{
-       struct rb_node *nd;
-
-       hists->stats.nr_non_filtered_samples = 0;
-
-       hists__reset_filter_stats(hists);
-       hists__reset_col_len(hists);
-
-       for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
-               struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-
-               if (symbol_conf.exclude_other && !h->parent)
-                       continue;
-
-               if (hists__filter_entry_by_dso(hists, h))
-                       continue;
-
-               hists__remove_entry_filter(hists, h, HIST_FILTER__DSO);
-       }
-}
-
 static bool hists__filter_entry_by_thread(struct hists *hists,
                                          struct hist_entry *he)
 {
@@ -1288,25 +1818,6 @@ static bool hists__filter_entry_by_thread(struct hists *hists,
        return false;
 }
 
-void hists__filter_by_thread(struct hists *hists)
-{
-       struct rb_node *nd;
-
-       hists->stats.nr_non_filtered_samples = 0;
-
-       hists__reset_filter_stats(hists);
-       hists__reset_col_len(hists);
-
-       for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
-               struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-
-               if (hists__filter_entry_by_thread(hists, h))
-                       continue;
-
-               hists__remove_entry_filter(hists, h, HIST_FILTER__THREAD);
-       }
-}
-
 static bool hists__filter_entry_by_symbol(struct hists *hists,
                                          struct hist_entry *he)
 {
@@ -1320,7 +1831,21 @@ static bool hists__filter_entry_by_symbol(struct hists *hists,
        return false;
 }
 
-void hists__filter_by_symbol(struct hists *hists)
+static bool hists__filter_entry_by_socket(struct hists *hists,
+                                         struct hist_entry *he)
+{
+       if ((hists->socket_filter > -1) &&
+           (he->socket != hists->socket_filter)) {
+               he->filtered |= (1 << HIST_FILTER__SOCKET);
+               return true;
+       }
+
+       return false;
+}
+
+typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
+
+static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
 {
        struct rb_node *nd;
 
@@ -1332,42 +1857,155 @@ void hists__filter_by_symbol(struct hists *hists)
        for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
-               if (hists__filter_entry_by_symbol(hists, h))
+               if (filter(hists, h))
                        continue;
 
-               hists__remove_entry_filter(hists, h, HIST_FILTER__SYMBOL);
+               hists__remove_entry_filter(hists, h, type);
        }
 }
 
-static bool hists__filter_entry_by_socket(struct hists *hists,
-                                         struct hist_entry *he)
+static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he)
 {
-       if ((hists->socket_filter > -1) &&
-           (he->socket != hists->socket_filter)) {
-               he->filtered |= (1 << HIST_FILTER__SOCKET);
-               return true;
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct hist_entry *iter;
+       struct rb_root new_root = RB_ROOT;
+       struct rb_node *nd;
+
+       while (*p != NULL) {
+               parent = *p;
+               iter = rb_entry(parent, struct hist_entry, rb_node);
+
+               if (hist_entry__sort(he, iter) > 0)
+                       p = &(*p)->rb_left;
+               else
+                       p = &(*p)->rb_right;
        }
 
-       return false;
+       rb_link_node(&he->rb_node, parent, p);
+       rb_insert_color(&he->rb_node, root);
+
+       if (he->leaf || he->filtered)
+               return;
+
+       nd = rb_first(&he->hroot_out);
+       while (nd) {
+               struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+               nd = rb_next(nd);
+               rb_erase(&h->rb_node, &he->hroot_out);
+
+               resort_filtered_entry(&new_root, h);
+       }
+
+       he->hroot_out = new_root;
 }
 
-void hists__filter_by_socket(struct hists *hists)
+static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg)
 {
        struct rb_node *nd;
+       struct rb_root new_root = RB_ROOT;
 
        hists->stats.nr_non_filtered_samples = 0;
 
        hists__reset_filter_stats(hists);
        hists__reset_col_len(hists);
 
-       for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+       nd = rb_first(&hists->entries);
+       while (nd) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+               int ret;
 
-               if (hists__filter_entry_by_socket(hists, h))
-                       continue;
+               ret = hist_entry__filter(h, type, arg);
 
-               hists__remove_entry_filter(hists, h, HIST_FILTER__SOCKET);
+               /*
+                * case 1. non-matching type
+                * zero out the period, set filter marker and move to child
+                */
+               if (ret < 0) {
+                       memset(&h->stat, 0, sizeof(h->stat));
+                       h->filtered |= (1 << type);
+
+                       nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_CHILD);
+               }
+               /*
+                * case 2. matched type (filter out)
+                * set filter marker and move to next
+                */
+               else if (ret == 1) {
+                       h->filtered |= (1 << type);
+
+                       nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+               }
+               /*
+                * case 3. ok (not filtered)
+                * add period to hists and parents, erase the filter marker
+                * and move to next sibling
+                */
+               else {
+                       hists__remove_entry_filter(hists, h, type);
+
+                       nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+               }
+       }
+
+       hierarchy_recalc_total_periods(hists);
+
+       /*
+        * resort output after applying a new filter since filter in a lower
+        * hierarchy can change periods in a upper hierarchy.
+        */
+       nd = rb_first(&hists->entries);
+       while (nd) {
+               struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+               nd = rb_next(nd);
+               rb_erase(&h->rb_node, &hists->entries);
+
+               resort_filtered_entry(&new_root, h);
        }
+
+       hists->entries = new_root;
+}
+
+void hists__filter_by_thread(struct hists *hists)
+{
+       if (symbol_conf.report_hierarchy)
+               hists__filter_hierarchy(hists, HIST_FILTER__THREAD,
+                                       hists->thread_filter);
+       else
+               hists__filter_by_type(hists, HIST_FILTER__THREAD,
+                                     hists__filter_entry_by_thread);
+}
+
+void hists__filter_by_dso(struct hists *hists)
+{
+       if (symbol_conf.report_hierarchy)
+               hists__filter_hierarchy(hists, HIST_FILTER__DSO,
+                                       hists->dso_filter);
+       else
+               hists__filter_by_type(hists, HIST_FILTER__DSO,
+                                     hists__filter_entry_by_dso);
+}
+
+void hists__filter_by_symbol(struct hists *hists)
+{
+       if (symbol_conf.report_hierarchy)
+               hists__filter_hierarchy(hists, HIST_FILTER__SYMBOL,
+                                       hists->symbol_filter_str);
+       else
+               hists__filter_by_type(hists, HIST_FILTER__SYMBOL,
+                                     hists__filter_entry_by_symbol);
+}
+
+void hists__filter_by_socket(struct hists *hists)
+{
+       if (symbol_conf.report_hierarchy)
+               hists__filter_hierarchy(hists, HIST_FILTER__SOCKET,
+                                       &hists->socket_filter);
+       else
+               hists__filter_by_type(hists, HIST_FILTER__SOCKET,
+                                     hists__filter_entry_by_socket);
 }
 
 void events_stats__inc(struct events_stats *stats, u32 type)
@@ -1585,7 +2223,7 @@ int perf_hist_config(const char *var, const char *value)
        return 0;
 }
 
-int __hists__init(struct hists *hists)
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
 {
        memset(hists, 0, sizeof(*hists));
        hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
@@ -1594,6 +2232,8 @@ int __hists__init(struct hists *hists)
        hists->entries = RB_ROOT;
        pthread_mutex_init(&hists->lock, NULL);
        hists->socket_filter = -1;
+       hists->hpp_list = hpp_list;
+       INIT_LIST_HEAD(&hists->hpp_formats);
        return 0;
 }
 
@@ -1622,15 +2262,26 @@ static void hists__delete_all_entries(struct hists *hists)
 static void hists_evsel__exit(struct perf_evsel *evsel)
 {
        struct hists *hists = evsel__hists(evsel);
+       struct perf_hpp_fmt *fmt, *pos;
+       struct perf_hpp_list_node *node, *tmp;
 
        hists__delete_all_entries(hists);
+
+       list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) {
+               perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) {
+                       list_del(&fmt->list);
+                       free(fmt);
+               }
+               list_del(&node->list);
+               free(node);
+       }
 }
 
 static int hists_evsel__init(struct perf_evsel *evsel)
 {
        struct hists *hists = evsel__hists(evsel);
 
-       __hists__init(hists);
+       __hists__init(hists, &perf_hpp_list);
        return 0;
 }
 
@@ -1649,3 +2300,9 @@ int hists__init(void)
 
        return err;
 }
+
+void perf_hpp_list__init(struct perf_hpp_list *list)
+{
+       INIT_LIST_HEAD(&list->fields);
+       INIT_LIST_HEAD(&list->sorts);
+}
index d4ec4822a1038611a7269aa0df2eb37171a647a6..ead18c82294faf881f1d4bc89be653e27ff5c708 100644 (file)
@@ -66,6 +66,8 @@ struct hists {
        struct rb_root          entries_collapsed;
        u64                     nr_entries;
        u64                     nr_non_filtered_entries;
+       u64                     callchain_period;
+       u64                     callchain_non_filtered_period;
        struct thread           *thread_filter;
        const struct dso        *dso_filter;
        const char              *uid_filter_str;
@@ -75,6 +77,9 @@ struct hists {
        u64                     event_stream;
        u16                     col_len[HISTC_NR_COLS];
        int                     socket_filter;
+       struct perf_hpp_list    *hpp_list;
+       struct list_head        hpp_formats;
+       int                     nr_hpp_node;
 };
 
 struct hist_entry_iter;
@@ -121,15 +126,21 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
                         int max_stack_depth, void *arg);
 
+struct perf_hpp;
+struct perf_hpp_fmt;
+
 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
 int hist_entry__transaction_len(void);
 int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
                              struct hists *hists);
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+                                  struct perf_hpp_fmt *fmt, int printed);
 void hist_entry__delete(struct hist_entry *he);
 
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog);
 void hists__output_resort(struct hists *hists, struct ui_progress *prog);
-void hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
 
 void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
 void hists__delete_entries(struct hists *hists);
@@ -185,10 +196,10 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel)
 }
 
 int hists__init(void);
-int __hists__init(struct hists *hists);
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
 
 struct rb_root *hists__get_rotate_entries_in(struct hists *hists);
-bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
+int hists__collapse_insert_entry(struct hists *hists,
                                  struct rb_root *root, struct hist_entry *he);
 
 struct perf_hpp {
@@ -214,28 +225,64 @@ struct perf_hpp_fmt {
                            struct hist_entry *a, struct hist_entry *b);
        int64_t (*sort)(struct perf_hpp_fmt *fmt,
                        struct hist_entry *a, struct hist_entry *b);
+       bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
+       void (*free)(struct perf_hpp_fmt *fmt);
 
        struct list_head list;
        struct list_head sort_list;
        bool elide;
        int len;
        int user_len;
+       int idx;
+       int level;
+};
+
+struct perf_hpp_list {
+       struct list_head fields;
+       struct list_head sorts;
 };
 
-extern struct list_head perf_hpp__list;
-extern struct list_head perf_hpp__sort_list;
+extern struct perf_hpp_list perf_hpp_list;
+
+struct perf_hpp_list_node {
+       struct list_head        list;
+       struct perf_hpp_list    hpp;
+       int                     level;
+       bool                    skip;
+};
+
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+                                   struct perf_hpp_fmt *format);
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+                                       struct perf_hpp_fmt *format);
+
+static inline void perf_hpp__column_register(struct perf_hpp_fmt *format)
+{
+       perf_hpp_list__column_register(&perf_hpp_list, format);
+}
+
+static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
+{
+       perf_hpp_list__register_sort_field(&perf_hpp_list, format);
+}
+
+#define perf_hpp_list__for_each_format(_list, format) \
+       list_for_each_entry(format, &(_list)->fields, list)
 
-#define perf_hpp__for_each_format(format) \
-       list_for_each_entry(format, &perf_hpp__list, list)
+#define perf_hpp_list__for_each_format_safe(_list, format, tmp)        \
+       list_for_each_entry_safe(format, tmp, &(_list)->fields, list)
 
-#define perf_hpp__for_each_format_safe(format, tmp)    \
-       list_for_each_entry_safe(format, tmp, &perf_hpp__list, list)
+#define perf_hpp_list__for_each_sort_list(_list, format) \
+       list_for_each_entry(format, &(_list)->sorts, sort_list)
 
-#define perf_hpp__for_each_sort_list(format) \
-       list_for_each_entry(format, &perf_hpp__sort_list, sort_list)
+#define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp)     \
+       list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list)
 
-#define perf_hpp__for_each_sort_list_safe(format, tmp) \
-       list_for_each_entry_safe(format, tmp, &perf_hpp__sort_list, sort_list)
+#define hists__for_each_format(hists, format) \
+       perf_hpp_list__for_each_format((hists)->hpp_list, fmt)
+
+#define hists__for_each_sort_list(hists, format) \
+       perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt)
 
 extern struct perf_hpp_fmt perf_hpp__format[];
 
@@ -254,21 +301,29 @@ enum {
 };
 
 void perf_hpp__init(void);
-void perf_hpp__column_register(struct perf_hpp_fmt *format);
 void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
-void perf_hpp__column_enable(unsigned col);
-void perf_hpp__column_disable(unsigned col);
 void perf_hpp__cancel_cumulate(void);
+void perf_hpp__setup_output_field(struct perf_hpp_list *list);
+void perf_hpp__reset_output_field(struct perf_hpp_list *list);
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+                                 struct perf_evlist *evlist);
 
-void perf_hpp__register_sort_field(struct perf_hpp_fmt *format);
-void perf_hpp__setup_output_field(void);
-void perf_hpp__reset_output_field(void);
-void perf_hpp__append_sort_keys(void);
 
 bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
-bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
 bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format);
 bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists);
+bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg);
 
 static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format,
                                         struct hists *hists)
@@ -372,6 +427,7 @@ static inline int script_browse(const char *script_opt __maybe_unused)
 #endif
 
 unsigned int hists__sort_list_width(struct hists *hists);
+unsigned int hists__overhead_width(struct hists *hists);
 
 void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
                          struct perf_sample *sample, bool nonany_branch_mode);
@@ -381,4 +437,26 @@ int parse_filter_percentage(const struct option *opt __maybe_unused,
                            const char *arg, int unset __maybe_unused);
 int perf_hist_config(const char *var, const char *value);
 
+void perf_hpp_list__init(struct perf_hpp_list *list);
+
+enum hierarchy_move_dir {
+       HMD_NORMAL,
+       HMD_FORCE_SIBLING,
+       HMD_FORCE_CHILD,
+};
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node);
+struct rb_node *__rb_hierarchy_next(struct rb_node *node,
+                                   enum hierarchy_move_dir hmd);
+struct rb_node *rb_hierarchy_prev(struct rb_node *node);
+
+static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
+{
+       return __rb_hierarchy_next(node, HMD_NORMAL);
+}
+
+#define HIERARCHY_INDENT  3
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
+
 #endif /* __PERF_HIST_H */
diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
new file mode 100644 (file)
index 0000000..a1e99da
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef __JIT_H__
+#define __JIT_H__
+
+#include <data.h>
+
+extern int jit_process(struct perf_session *session,
+                      struct perf_data_file *output,
+                      struct machine *machine,
+                      char *filename,
+                      pid_t pid,
+                      u64 *nbytes);
+
+extern int jit_inject_record(const char *filename);
+
+#endif /* __JIT_H__ */
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
new file mode 100644 (file)
index 0000000..cd272cc
--- /dev/null
@@ -0,0 +1,697 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <byteswap.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "util.h"
+#include "event.h"
+#include "debug.h"
+#include "evlist.h"
+#include "symbol.h"
+#include "strlist.h"
+#include <elf.h>
+
+#include "session.h"
+#include "jit.h"
+#include "jitdump.h"
+#include "genelf.h"
+#include "../builtin.h"
+
+struct jit_buf_desc {
+       struct perf_data_file *output;
+       struct perf_session *session;
+       struct machine *machine;
+       union jr_entry   *entry;
+       void             *buf;
+       uint64_t         sample_type;
+       size_t           bufsize;
+       FILE             *in;
+       bool             needs_bswap; /* handles cross-endianess */
+       void             *debug_data;
+       size_t           nr_debug_entries;
+       uint32_t         code_load_count;
+       u64              bytes_written;
+       struct rb_root   code_root;
+       char             dir[PATH_MAX];
+};
+
+struct debug_line_info {
+       unsigned long vma;
+       unsigned int lineno;
+       /* The filename format is unspecified, absolute path, relative etc. */
+       char const filename[0];
+};
+
+struct jit_tool {
+       struct perf_tool tool;
+       struct perf_data_file   output;
+       struct perf_data_file   input;
+       u64 bytes_written;
+};
+
+#define hmax(a, b) ((a) > (b) ? (a) : (b))
+#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool))
+
+static int
+jit_emit_elf(char *filename,
+            const char *sym,
+            uint64_t code_addr,
+            const void *code,
+            int csize,
+            void *debug,
+            int nr_debug_entries)
+{
+       int ret, fd;
+
+       if (verbose > 0)
+               fprintf(stderr, "write ELF image %s\n", filename);
+
+       fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644);
+       if (fd == -1) {
+               pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno));
+               return -1;
+       }
+
+        ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries);
+
+        close(fd);
+
+        if (ret)
+                unlink(filename);
+
+       return ret;
+}
+
+static void
+jit_close(struct jit_buf_desc *jd)
+{
+       if (!(jd && jd->in))
+               return;
+       funlockfile(jd->in);
+       fclose(jd->in);
+       jd->in = NULL;
+}
+
+static int
+jit_validate_events(struct perf_session *session)
+{
+       struct perf_evsel *evsel;
+
+       /*
+        * check that all events use CLOCK_MONOTONIC
+        */
+       evlist__for_each(session->evlist, evsel) {
+               if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC)
+                       return -1;
+       }
+       return 0;
+}
+
+static int
+jit_open(struct jit_buf_desc *jd, const char *name)
+{
+       struct jitheader header;
+       struct jr_prefix *prefix;
+       ssize_t bs, bsz = 0;
+       void *n, *buf = NULL;
+       int ret, retval = -1;
+
+       jd->in = fopen(name, "r");
+       if (!jd->in)
+               return -1;
+
+       bsz = hmax(sizeof(header), sizeof(*prefix));
+
+       buf = malloc(bsz);
+       if (!buf)
+               goto error;
+
+       /*
+        * protect from writer modifying the file while we are reading it
+        */
+       flockfile(jd->in);
+
+       ret = fread(buf, sizeof(header), 1, jd->in);
+       if (ret != 1)
+               goto error;
+
+       memcpy(&header, buf, sizeof(header));
+
+       if (header.magic != JITHEADER_MAGIC) {
+               if (header.magic != JITHEADER_MAGIC_SW)
+                       goto error;
+               jd->needs_bswap = true;
+       }
+
+       if (jd->needs_bswap) {
+               header.version    = bswap_32(header.version);
+               header.total_size = bswap_32(header.total_size);
+               header.pid        = bswap_32(header.pid);
+               header.elf_mach   = bswap_32(header.elf_mach);
+               header.timestamp  = bswap_64(header.timestamp);
+               header.flags      = bswap_64(header.flags);
+       }
+
+       if (verbose > 2)
+               pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n",
+                       header.version,
+                       header.total_size,
+                       (unsigned long long)header.timestamp,
+                       header.pid,
+                       header.elf_mach);
+
+       if (header.flags & JITDUMP_FLAGS_RESERVED) {
+               pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
+                      (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED);
+               goto error;
+       }
+
+       /*
+        * validate event is using the correct clockid
+        */
+       if (jit_validate_events(jd->session)) {
+               pr_err("error, jitted code must be sampled with perf record -k 1\n");
+               goto error;
+       }
+
+       bs = header.total_size - sizeof(header);
+
+       if (bs > bsz) {
+               n = realloc(buf, bs);
+               if (!n)
+                       goto error;
+               bsz = bs;
+               buf = n;
+               /* read extra we do not know about */
+               ret = fread(buf, bs - bsz, 1, jd->in);
+               if (ret != 1)
+                       goto error;
+       }
+       /*
+        * keep dirname for generating files and mmap records
+        */
+       strcpy(jd->dir, name);
+       dirname(jd->dir);
+
+       return 0;
+error:
+       funlockfile(jd->in);
+       fclose(jd->in);
+       return retval;
+}
+
+static union jr_entry *
+jit_get_next_entry(struct jit_buf_desc *jd)
+{
+       struct jr_prefix *prefix;
+       union jr_entry *jr;
+       void *addr;
+       size_t bs, size;
+       int id, ret;
+
+       if (!(jd && jd->in))
+               return NULL;
+
+       if (jd->buf == NULL) {
+               size_t sz = getpagesize();
+               if (sz < sizeof(*prefix))
+                       sz = sizeof(*prefix);
+
+               jd->buf = malloc(sz);
+               if (jd->buf == NULL)
+                       return NULL;
+
+               jd->bufsize = sz;
+       }
+
+       prefix = jd->buf;
+
+       /*
+        * file is still locked at this point
+        */
+       ret = fread(prefix, sizeof(*prefix), 1, jd->in);
+       if (ret  != 1)
+               return NULL;
+
+       if (jd->needs_bswap) {
+               prefix->id         = bswap_32(prefix->id);
+               prefix->total_size = bswap_32(prefix->total_size);
+               prefix->timestamp  = bswap_64(prefix->timestamp);
+       }
+       id   = prefix->id;
+       size = prefix->total_size;
+
+       bs = (size_t)size;
+       if (bs < sizeof(*prefix))
+               return NULL;
+
+       if (id >= JIT_CODE_MAX) {
+               pr_warning("next_entry: unknown prefix %d, skipping\n", id);
+               return NULL;
+       }
+       if (bs > jd->bufsize) {
+               void *n;
+               n = realloc(jd->buf, bs);
+               if (!n)
+                       return NULL;
+               jd->buf = n;
+               jd->bufsize = bs;
+       }
+
+       addr = ((void *)jd->buf) + sizeof(*prefix);
+
+       ret = fread(addr, bs - sizeof(*prefix), 1, jd->in);
+       if (ret != 1)
+               return NULL;
+
+       jr = (union jr_entry *)jd->buf;
+
+       switch(id) {
+       case JIT_CODE_DEBUG_INFO:
+               if (jd->needs_bswap) {
+                       uint64_t n;
+                       jr->info.code_addr = bswap_64(jr->info.code_addr);
+                       jr->info.nr_entry  = bswap_64(jr->info.nr_entry);
+                       for (n = 0 ; n < jr->info.nr_entry; n++) {
+                               jr->info.entries[n].addr    = bswap_64(jr->info.entries[n].addr);
+                               jr->info.entries[n].lineno  = bswap_32(jr->info.entries[n].lineno);
+                               jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim);
+                       }
+               }
+               break;
+       case JIT_CODE_CLOSE:
+               break;
+       case JIT_CODE_LOAD:
+               if (jd->needs_bswap) {
+                       jr->load.pid       = bswap_32(jr->load.pid);
+                       jr->load.tid       = bswap_32(jr->load.tid);
+                       jr->load.vma       = bswap_64(jr->load.vma);
+                       jr->load.code_addr = bswap_64(jr->load.code_addr);
+                       jr->load.code_size = bswap_64(jr->load.code_size);
+                       jr->load.code_index= bswap_64(jr->load.code_index);
+               }
+               jd->code_load_count++;
+               break;
+       case JIT_CODE_MOVE:
+               if (jd->needs_bswap) {
+                       jr->move.pid           = bswap_32(jr->move.pid);
+                       jr->move.tid           = bswap_32(jr->move.tid);
+                       jr->move.vma           = bswap_64(jr->move.vma);
+                       jr->move.old_code_addr = bswap_64(jr->move.old_code_addr);
+                       jr->move.new_code_addr = bswap_64(jr->move.new_code_addr);
+                       jr->move.code_size     = bswap_64(jr->move.code_size);
+                       jr->move.code_index    = bswap_64(jr->move.code_index);
+               }
+               break;
+       case JIT_CODE_MAX:
+       default:
+               return NULL;
+       }
+       return jr;
+}
+
+static int
+jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
+{
+       ssize_t size;
+
+       size = perf_data_file__write(jd->output, event, event->header.size);
+       if (size < 0)
+               return -1;
+
+       jd->bytes_written += size;
+       return 0;
+}
+
+static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+       struct perf_sample sample;
+       union perf_event *event;
+       struct perf_tool *tool = jd->session->tool;
+       uint64_t code, addr;
+       uintptr_t uaddr;
+       char *filename;
+       struct stat st;
+       size_t size;
+       u16 idr_size;
+       const char *sym;
+       uint32_t count;
+       int ret, csize;
+       pid_t pid, tid;
+       struct {
+               u32 pid, tid;
+               u64 time;
+       } *id;
+
+       pid   = jr->load.pid;
+       tid   = jr->load.tid;
+       csize = jr->load.code_size;
+       addr  = jr->load.code_addr;
+       sym   = (void *)((unsigned long)jr + sizeof(jr->load));
+       code  = (unsigned long)jr + jr->load.p.total_size - csize;
+       count = jr->load.code_index;
+       idr_size = jd->machine->id_hdr_size;
+
+       event = calloc(1, sizeof(*event) + idr_size);
+       if (!event)
+               return -1;
+
+       filename = event->mmap2.filename;
+       size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so",
+                       jd->dir,
+                       pid,
+                       count);
+
+       size++; /* for \0 */
+
+       size = PERF_ALIGN(size, sizeof(u64));
+       uaddr = (uintptr_t)code;
+       ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries);
+
+       if (jd->debug_data && jd->nr_debug_entries) {
+               free(jd->debug_data);
+               jd->debug_data = NULL;
+               jd->nr_debug_entries = 0;
+       }
+
+       if (ret) {
+               free(event);
+               return -1;
+       }
+       if (stat(filename, &st))
+               memset(&st, 0, sizeof(stat));
+
+       event->mmap2.header.type = PERF_RECORD_MMAP2;
+       event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+       event->mmap2.header.size = (sizeof(event->mmap2) -
+                       (sizeof(event->mmap2.filename) - size) + idr_size);
+
+       event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+       event->mmap2.start = addr;
+       event->mmap2.len   = csize;
+       event->mmap2.pid   = pid;
+       event->mmap2.tid   = tid;
+       event->mmap2.ino   = st.st_ino;
+       event->mmap2.maj   = major(st.st_dev);
+       event->mmap2.min   = minor(st.st_dev);
+       event->mmap2.prot  = st.st_mode;
+       event->mmap2.flags = MAP_SHARED;
+       event->mmap2.ino_generation = 1;
+
+       id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+       if (jd->sample_type & PERF_SAMPLE_TID) {
+               id->pid  = pid;
+               id->tid  = tid;
+       }
+       if (jd->sample_type & PERF_SAMPLE_TIME)
+               id->time = jr->load.p.timestamp;
+
+       /*
+        * create pseudo sample to induce dso hit increment
+        * use first address as sample address
+        */
+       memset(&sample, 0, sizeof(sample));
+       sample.pid  = pid;
+       sample.tid  = tid;
+       sample.time = id->time;
+       sample.ip   = addr;
+
+       ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+       if (ret)
+               return ret;
+
+       ret = jit_inject_event(jd, event);
+       /*
+        * mark dso as use to generate buildid in the header
+        */
+       if (!ret)
+               build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+       return ret;
+}
+
+static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+       struct perf_sample sample;
+       union perf_event *event;
+       struct perf_tool *tool = jd->session->tool;
+       char *filename;
+       size_t size;
+       struct stat st;
+       u16 idr_size;
+       int ret;
+       pid_t pid, tid;
+       struct {
+               u32 pid, tid;
+               u64 time;
+       } *id;
+
+       pid = jr->move.pid;
+       tid =  jr->move.tid;
+       idr_size = jd->machine->id_hdr_size;
+
+       /*
+        * +16 to account for sample_id_all (hack)
+        */
+       event = calloc(1, sizeof(*event) + 16);
+       if (!event)
+               return -1;
+
+       filename = event->mmap2.filename;
+       size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64,
+                jd->dir,
+                pid,
+                jr->move.code_index);
+
+       size++; /* for \0 */
+
+       if (stat(filename, &st))
+               memset(&st, 0, sizeof(stat));
+
+       size = PERF_ALIGN(size, sizeof(u64));
+
+       event->mmap2.header.type = PERF_RECORD_MMAP2;
+       event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+       event->mmap2.header.size = (sizeof(event->mmap2) -
+                       (sizeof(event->mmap2.filename) - size) + idr_size);
+       event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+       event->mmap2.start = jr->move.new_code_addr;
+       event->mmap2.len   = jr->move.code_size;
+       event->mmap2.pid   = pid;
+       event->mmap2.tid   = tid;
+       event->mmap2.ino   = st.st_ino;
+       event->mmap2.maj   = major(st.st_dev);
+       event->mmap2.min   = minor(st.st_dev);
+       event->mmap2.prot  = st.st_mode;
+       event->mmap2.flags = MAP_SHARED;
+       event->mmap2.ino_generation = 1;
+
+       id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+       if (jd->sample_type & PERF_SAMPLE_TID) {
+               id->pid  = pid;
+               id->tid  = tid;
+       }
+       if (jd->sample_type & PERF_SAMPLE_TIME)
+               id->time = jr->load.p.timestamp;
+
+       /*
+        * create pseudo sample to induce dso hit increment
+        * use first address as sample address
+        */
+       memset(&sample, 0, sizeof(sample));
+       sample.pid  = pid;
+       sample.tid  = tid;
+       sample.time = id->time;
+       sample.ip   = jr->move.new_code_addr;
+
+       ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+       if (ret)
+               return ret;
+
+       ret = jit_inject_event(jd, event);
+       if (!ret)
+               build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+       return ret;
+}
+
+static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+       void *data;
+       size_t sz;
+
+       if (!(jd && jr))
+               return -1;
+
+       sz  = jr->prefix.total_size - sizeof(jr->info);
+       data = malloc(sz);
+       if (!data)
+               return -1;
+
+       memcpy(data, &jr->info.entries, sz);
+
+       jd->debug_data       = data;
+
+       /*
+        * we must use nr_entry instead of size here because
+        * we cannot distinguish actual entry from padding otherwise
+        */
+       jd->nr_debug_entries = jr->info.nr_entry;
+
+       return 0;
+}
+
+static int
+jit_process_dump(struct jit_buf_desc *jd)
+{
+       union jr_entry *jr;
+       int ret;
+
+       while ((jr = jit_get_next_entry(jd))) {
+               switch(jr->prefix.id) {
+               case JIT_CODE_LOAD:
+                       ret = jit_repipe_code_load(jd, jr);
+                       break;
+               case JIT_CODE_MOVE:
+                       ret = jit_repipe_code_move(jd, jr);
+                       break;
+               case JIT_CODE_DEBUG_INFO:
+                       ret = jit_repipe_debug_info(jd, jr);
+                       break;
+               default:
+                       ret = 0;
+                       continue;
+               }
+       }
+       return ret;
+}
+
+static int
+jit_inject(struct jit_buf_desc *jd, char *path)
+{
+       int ret;
+
+       if (verbose > 0)
+               fprintf(stderr, "injecting: %s\n", path);
+
+       ret = jit_open(jd, path);
+       if (ret)
+               return -1;
+
+       ret = jit_process_dump(jd);
+
+       jit_close(jd);
+
+       if (verbose > 0)
+               fprintf(stderr, "injected: %s (%d)\n", path, ret);
+
+       return 0;
+}
+
+/*
+ * File must be with pattern .../jit-XXXX.dump
+ * where XXXX is the PID of the process which did the mmap()
+ * as captured in the RECORD_MMAP record
+ */
+static int
+jit_detect(char *mmap_name, pid_t pid)
+ {
+       char *p;
+       char *end = NULL;
+       pid_t pid2;
+
+       if (verbose > 2)
+               fprintf(stderr, "jit marker trying : %s\n", mmap_name);
+       /*
+        * get file name
+        */
+       p = strrchr(mmap_name, '/');
+       if (!p)
+               return -1;
+
+       /*
+        * match prefix
+        */
+       if (strncmp(p, "/jit-", 5))
+               return -1;
+
+       /*
+        * skip prefix
+        */
+       p += 5;
+
+       /*
+        * must be followed by a pid
+        */
+       if (!isdigit(*p))
+               return -1;
+
+       pid2 = (int)strtol(p, &end, 10);
+       if (!end)
+               return -1;
+
+       /*
+        * pid does not match mmap pid
+        * pid==0 in system-wide mode (synthesized)
+        */
+       if (pid && pid2 != pid)
+               return -1;
+       /*
+        * validate suffix
+        */
+       if (strcmp(end, ".dump"))
+               return -1;
+
+       if (verbose > 0)
+               fprintf(stderr, "jit marker found: %s\n", mmap_name);
+
+       return 0;
+}
+
+int
+jit_process(struct perf_session *session,
+           struct perf_data_file *output,
+           struct machine *machine,
+           char *filename,
+           pid_t pid,
+           u64 *nbytes)
+{
+       struct perf_evsel *first;
+       struct jit_buf_desc jd;
+       int ret;
+
+       /*
+        * first, detect marker mmap (i.e., the jitdump mmap)
+        */
+       if (jit_detect(filename, pid))
+               return 0;
+
+       memset(&jd, 0, sizeof(jd));
+
+       jd.session = session;
+       jd.output  = output;
+       jd.machine = machine;
+
+       /*
+        * track sample_type to compute id_all layout
+        * perf sets the same sample type to all events as of now
+        */
+       first = perf_evlist__first(session->evlist);
+       jd.sample_type = first->attr.sample_type;
+
+       *nbytes = 0;
+
+       ret = jit_inject(&jd, filename);
+       if (!ret) {
+               *nbytes = jd.bytes_written;
+               ret = 1;
+       }
+
+       return ret;
+}
diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
new file mode 100644 (file)
index 0000000..b66c1f5
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * jitdump.h: jitted code info encapsulation file format
+ *
+ * Adapted from OProfile GPLv2 support jidump.h:
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#ifndef JITDUMP_H
+#define JITDUMP_H
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdint.h>
+
+/* JiTD */
+#define JITHEADER_MAGIC                0x4A695444
+#define JITHEADER_MAGIC_SW     0x4454694A
+
+#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7)
+
+#define JITHEADER_VERSION 1
+
+enum jitdump_flags_bits {
+       JITDUMP_FLAGS_MAX_BIT,
+};
+
+#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \
+                               (~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0)
+
+struct jitheader {
+       uint32_t magic;         /* characters "jItD" */
+       uint32_t version;       /* header version */
+       uint32_t total_size;    /* total size of header */
+       uint32_t elf_mach;      /* elf mach target */
+       uint32_t pad1;          /* reserved */
+       uint32_t pid;           /* JIT process id */
+       uint64_t timestamp;     /* timestamp */
+       uint64_t flags;         /* flags */
+};
+
+enum jit_record_type {
+       JIT_CODE_LOAD           = 0,
+        JIT_CODE_MOVE           = 1,
+       JIT_CODE_DEBUG_INFO     = 2,
+       JIT_CODE_CLOSE          = 3,
+
+       JIT_CODE_MAX,
+};
+
+/* record prefix (mandatory in each record) */
+struct jr_prefix {
+       uint32_t id;
+       uint32_t total_size;
+       uint64_t timestamp;
+};
+
+struct jr_code_load {
+       struct jr_prefix p;
+
+       uint32_t pid;
+       uint32_t tid;
+       uint64_t vma;
+       uint64_t code_addr;
+       uint64_t code_size;
+       uint64_t code_index;
+};
+
+struct jr_code_close {
+       struct jr_prefix p;
+};
+
+struct jr_code_move {
+       struct jr_prefix p;
+
+       uint32_t pid;
+       uint32_t tid;
+       uint64_t vma;
+       uint64_t old_code_addr;
+       uint64_t new_code_addr;
+       uint64_t code_size;
+       uint64_t code_index;
+};
+
+struct debug_entry {
+       uint64_t addr;
+       int lineno;         /* source line number starting at 1 */
+       int discrim;        /* column discriminator, 0 is default */
+       const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */
+};
+
+struct jr_code_debug_info {
+       struct jr_prefix p;
+
+       uint64_t code_addr;
+       uint64_t nr_entry;
+       struct debug_entry entries[0];
+};
+
+union jr_entry {
+        struct jr_code_debug_info info;
+        struct jr_code_close close;
+        struct jr_code_load load;
+        struct jr_code_move move;
+        struct jr_prefix prefix;
+};
+
+static inline struct debug_entry *
+debug_entry_next(struct debug_entry *ent)
+{
+       void *a = ent + 1;
+       size_t l = strlen(ent->name) + 1;
+       return a + l;
+}
+
+static inline char *
+debug_entry_file(struct debug_entry *ent)
+{
+       void *a = ent + 1;
+       return a;
+}
+
+#endif /* !JITDUMP_H */
index ae825d4ec110fcfe6a06ef5e4daf50b03a214a87..d01e73592f6e34347f0c26531b58aa6ce1d57d52 100644 (file)
@@ -122,6 +122,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm,
 
 bool kvm_exit_event(struct perf_evsel *evsel);
 bool kvm_entry_event(struct perf_evsel *evsel);
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm);
 
 #define define_exit_reasons_table(name, symbols)       \
        static struct exit_reasons_table name[] = {     \
@@ -133,8 +134,13 @@ bool kvm_entry_event(struct perf_evsel *evsel);
  */
 int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
 
-extern const char * const kvm_events_tp[];
+extern const char *kvm_events_tp[];
 extern struct kvm_reg_events_ops kvm_reg_events_ops[];
 extern const char * const kvm_skip_events[];
+extern const char *vcpu_id_str;
+extern const int decode_str_len;
+extern const char *kvm_exit_reason;
+extern const char *kvm_entry_trace;
+extern const char *kvm_exit_trace;
 
 #endif /* __PERF_KVM_STAT_H */
index 2c2b443df5ba796c43ee12c6c0f2061b5d7c7b80..1a3e45baf97fb575c02afe49707727aff0f628ec 100644 (file)
@@ -179,6 +179,16 @@ struct symbol *machine__find_kernel_symbol(struct machine *machine,
                                       mapp, filter);
 }
 
+static inline
+struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
+                                                  enum map_type type, const char *name,
+                                                  struct map **mapp,
+                                                  symbol_filter_t filter)
+{
+       return map_groups__find_symbol_by_name(&machine->kmaps, type, name,
+                                              mapp, filter);
+}
+
 static inline
 struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
                                             struct map **mapp,
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
new file mode 100644 (file)
index 0000000..75465f8
--- /dev/null
@@ -0,0 +1,255 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <api/fs/fs.h>
+#include "mem-events.h"
+#include "debug.h"
+#include "symbol.h"
+
+#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
+struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+       E("ldlat-loads",        "cpu/mem-loads,ldlat=30/P",     "mem-loads"),
+       E("ldlat-stores",       "cpu/mem-stores/P",             "mem-stores"),
+};
+#undef E
+
+#undef E
+
+char *perf_mem_events__name(int i)
+{
+       return (char *)perf_mem_events[i].name;
+}
+
+int perf_mem_events__parse(const char *str)
+{
+       char *tok, *saveptr = NULL;
+       bool found = false;
+       char *buf;
+       int j;
+
+       /* We need buffer that we know we can write to. */
+       buf = malloc(strlen(str) + 1);
+       if (!buf)
+               return -ENOMEM;
+
+       strcpy(buf, str);
+
+       tok = strtok_r((char *)buf, ",", &saveptr);
+
+       while (tok) {
+               for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+                       struct perf_mem_event *e = &perf_mem_events[j];
+
+                       if (strstr(e->tag, tok))
+                               e->record = found = true;
+               }
+
+               tok = strtok_r(NULL, ",", &saveptr);
+       }
+
+       free(buf);
+
+       if (found)
+               return 0;
+
+       pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
+       return -1;
+}
+
+int perf_mem_events__init(void)
+{
+       const char *mnt = sysfs__mount();
+       bool found = false;
+       int j;
+
+       if (!mnt)
+               return -ENOENT;
+
+       for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+               char path[PATH_MAX];
+               struct perf_mem_event *e = &perf_mem_events[j];
+               struct stat st;
+
+               scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s",
+                         mnt, e->sysfs_name);
+
+               if (!stat(path, &st))
+                       e->supported = found = true;
+       }
+
+       return found ? 0 : -ENOENT;
+}
+
+static const char * const tlb_access[] = {
+       "N/A",
+       "HIT",
+       "MISS",
+       "L1",
+       "L2",
+       "Walker",
+       "Fault",
+};
+
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+       size_t l = 0, i;
+       u64 m = PERF_MEM_TLB_NA;
+       u64 hit, miss;
+
+       sz -= 1; /* -1 for null termination */
+       out[0] = '\0';
+
+       if (mem_info)
+               m = mem_info->data_src.mem_dtlb;
+
+       hit = m & PERF_MEM_TLB_HIT;
+       miss = m & PERF_MEM_TLB_MISS;
+
+       /* already taken care of */
+       m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
+
+       for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
+               if (!(m & 0x1))
+                       continue;
+               if (l) {
+                       strcat(out, " or ");
+                       l += 4;
+               }
+               l += scnprintf(out + l, sz - l, tlb_access[i]);
+       }
+       if (*out == '\0')
+               l += scnprintf(out, sz - l, "N/A");
+       if (hit)
+               l += scnprintf(out + l, sz - l, " hit");
+       if (miss)
+               l += scnprintf(out + l, sz - l, " miss");
+
+       return l;
+}
+
+static const char * const mem_lvl[] = {
+       "N/A",
+       "HIT",
+       "MISS",
+       "L1",
+       "LFB",
+       "L2",
+       "L3",
+       "Local RAM",
+       "Remote RAM (1 hop)",
+       "Remote RAM (2 hops)",
+       "Remote Cache (1 hop)",
+       "Remote Cache (2 hops)",
+       "I/O",
+       "Uncached",
+};
+
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+       size_t i, l = 0;
+       u64 m =  PERF_MEM_LVL_NA;
+       u64 hit, miss;
+
+       if (mem_info)
+               m  = mem_info->data_src.mem_lvl;
+
+       sz -= 1; /* -1 for null termination */
+       out[0] = '\0';
+
+       hit = m & PERF_MEM_LVL_HIT;
+       miss = m & PERF_MEM_LVL_MISS;
+
+       /* already taken care of */
+       m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
+
+       for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
+               if (!(m & 0x1))
+                       continue;
+               if (l) {
+                       strcat(out, " or ");
+                       l += 4;
+               }
+               l += scnprintf(out + l, sz - l, mem_lvl[i]);
+       }
+       if (*out == '\0')
+               l += scnprintf(out, sz - l, "N/A");
+       if (hit)
+               l += scnprintf(out + l, sz - l, " hit");
+       if (miss)
+               l += scnprintf(out + l, sz - l, " miss");
+
+       return l;
+}
+
+static const char * const snoop_access[] = {
+       "N/A",
+       "None",
+       "Miss",
+       "Hit",
+       "HitM",
+};
+
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+       size_t i, l = 0;
+       u64 m = PERF_MEM_SNOOP_NA;
+
+       sz -= 1; /* -1 for null termination */
+       out[0] = '\0';
+
+       if (mem_info)
+               m = mem_info->data_src.mem_snoop;
+
+       for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
+               if (!(m & 0x1))
+                       continue;
+               if (l) {
+                       strcat(out, " or ");
+                       l += 4;
+               }
+               l += scnprintf(out + l, sz - l, snoop_access[i]);
+       }
+
+       if (*out == '\0')
+               l += scnprintf(out, sz - l, "N/A");
+
+       return l;
+}
+
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+       u64 mask = PERF_MEM_LOCK_NA;
+       int l;
+
+       if (mem_info)
+               mask = mem_info->data_src.mem_lock;
+
+       if (mask & PERF_MEM_LOCK_NA)
+               l = scnprintf(out, sz, "N/A");
+       else if (mask & PERF_MEM_LOCK_LOCKED)
+               l = scnprintf(out, sz, "Yes");
+       else
+               l = scnprintf(out, sz, "No");
+
+       return l;
+}
+
+int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+       int i = 0;
+
+       i += perf_mem__lvl_scnprintf(out, sz, mem_info);
+       i += scnprintf(out + i, sz - i, "|SNP ");
+       i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
+       i += scnprintf(out + i, sz - i, "|TLB ");
+       i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
+       i += scnprintf(out + i, sz - i, "|LCK ");
+       i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
+
+       return i;
+}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
new file mode 100644 (file)
index 0000000..5d6d930
--- /dev/null
@@ -0,0 +1,35 @@
+#ifndef __PERF_MEM_EVENTS_H
+#define __PERF_MEM_EVENTS_H
+
+#include <stdbool.h>
+
+struct perf_mem_event {
+       bool            record;
+       bool            supported;
+       const char      *tag;
+       const char      *name;
+       const char      *sysfs_name;
+};
+
+enum {
+       PERF_MEM_EVENTS__LOAD,
+       PERF_MEM_EVENTS__STORE,
+       PERF_MEM_EVENTS__MAX,
+};
+
+extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
+
+int perf_mem_events__parse(const char *str);
+int perf_mem_events__init(void);
+
+char *perf_mem_events__name(int i);
+
+struct mem_info;
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+
+int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
+
+#endif /* __PERF_MEM_EVENTS_H */
index 813d9b272c813b0dd749470f6209aa3a0a0607b4..4c19d5e79d8c4d626eb3fa91486cc1d83447aeeb 100644 (file)
@@ -279,7 +279,24 @@ const char *event_type(int type)
        return "unknown";
 }
 
+static int parse_events__is_name_term(struct parse_events_term *term)
+{
+       return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
+}
 
+static char *get_config_name(struct list_head *head_terms)
+{
+       struct parse_events_term *term;
+
+       if (!head_terms)
+               return NULL;
+
+       list_for_each_entry(term, head_terms, list)
+               if (parse_events__is_name_term(term))
+                       return term->val.str;
+
+       return NULL;
+}
 
 static struct perf_evsel *
 __add_event(struct list_head *list, int *idx,
@@ -333,11 +350,25 @@ static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES]
        return -1;
 }
 
+typedef int config_term_func_t(struct perf_event_attr *attr,
+                              struct parse_events_term *term,
+                              struct parse_events_error *err);
+static int config_term_common(struct perf_event_attr *attr,
+                             struct parse_events_term *term,
+                             struct parse_events_error *err);
+static int config_attr(struct perf_event_attr *attr,
+                      struct list_head *head,
+                      struct parse_events_error *err,
+                      config_term_func_t config_term);
+
 int parse_events_add_cache(struct list_head *list, int *idx,
-                          char *type, char *op_result1, char *op_result2)
+                          char *type, char *op_result1, char *op_result2,
+                          struct parse_events_error *err,
+                          struct list_head *head_config)
 {
        struct perf_event_attr attr;
-       char name[MAX_NAME_LEN];
+       LIST_HEAD(config_terms);
+       char name[MAX_NAME_LEN], *config_name;
        int cache_type = -1, cache_op = -1, cache_result = -1;
        char *op_result[2] = { op_result1, op_result2 };
        int i, n;
@@ -351,6 +382,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
        if (cache_type == -1)
                return -EINVAL;
 
+       config_name = get_config_name(head_config);
        n = snprintf(name, MAX_NAME_LEN, "%s", type);
 
        for (i = 0; (i < 2) && (op_result[i]); i++) {
@@ -391,7 +423,16 @@ int parse_events_add_cache(struct list_head *list, int *idx,
        memset(&attr, 0, sizeof(attr));
        attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
        attr.type = PERF_TYPE_HW_CACHE;
-       return add_event(list, idx, &attr, name, NULL);
+
+       if (head_config) {
+               if (config_attr(&attr, head_config, err,
+                               config_term_common))
+                       return -EINVAL;
+
+               if (get_config_terms(head_config, &config_terms))
+                       return -ENOMEM;
+       }
+       return add_event(list, idx, &attr, config_name ? : name, &config_terms);
 }
 
 static void tracepoint_error(struct parse_events_error *e, int err,
@@ -540,6 +581,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
 struct __add_bpf_event_param {
        struct parse_events_evlist *data;
        struct list_head *list;
+       struct list_head *head_config;
 };
 
 static int add_bpf_event(struct probe_trace_event *tev, int fd,
@@ -556,7 +598,8 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd,
                 tev->group, tev->event, fd);
 
        err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, tev->group,
-                                         tev->event, evlist->error, NULL);
+                                         tev->event, evlist->error,
+                                         param->head_config);
        if (err) {
                struct perf_evsel *evsel, *tmp;
 
@@ -581,11 +624,12 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd,
 
 int parse_events_load_bpf_obj(struct parse_events_evlist *data,
                              struct list_head *list,
-                             struct bpf_object *obj)
+                             struct bpf_object *obj,
+                             struct list_head *head_config)
 {
        int err;
        char errbuf[BUFSIZ];
-       struct __add_bpf_event_param param = {data, list};
+       struct __add_bpf_event_param param = {data, list, head_config};
        static bool registered_unprobe_atexit = false;
 
        if (IS_ERR(obj) || !obj) {
@@ -631,17 +675,99 @@ errout:
        return err;
 }
 
+static int
+parse_events_config_bpf(struct parse_events_evlist *data,
+                       struct bpf_object *obj,
+                       struct list_head *head_config)
+{
+       struct parse_events_term *term;
+       int error_pos;
+
+       if (!head_config || list_empty(head_config))
+               return 0;
+
+       list_for_each_entry(term, head_config, list) {
+               char errbuf[BUFSIZ];
+               int err;
+
+               if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
+                       snprintf(errbuf, sizeof(errbuf),
+                                "Invalid config term for BPF object");
+                       errbuf[BUFSIZ - 1] = '\0';
+
+                       data->error->idx = term->err_term;
+                       data->error->str = strdup(errbuf);
+                       return -EINVAL;
+               }
+
+               err = bpf__config_obj(obj, term, data->evlist, &error_pos);
+               if (err) {
+                       bpf__strerror_config_obj(obj, term, data->evlist,
+                                                &error_pos, err, errbuf,
+                                                sizeof(errbuf));
+                       data->error->help = strdup(
+"Hint:\tValid config terms:\n"
+"     \tmap:[<arraymap>].value<indices>=[value]\n"
+"     \tmap:[<eventmap>].event<indices>=[event]\n"
+"\n"
+"     \twhere <indices> is something like [0,3...5] or [all]\n"
+"     \t(add -v to see detail)");
+                       data->error->str = strdup(errbuf);
+                       if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
+                               data->error->idx = term->err_val;
+                       else
+                               data->error->idx = term->err_term + error_pos;
+                       return err;
+               }
+       }
+       return 0;
+}
+
+/*
+ * Split config terms:
+ * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ...
+ *  'call-graph=fp' is 'evt config', should be applied to each
+ *  events in bpf.c.
+ * 'map:array.value[0]=1' is 'obj config', should be processed
+ * with parse_events_config_bpf.
+ *
+ * Move object config terms from the first list to obj_head_config.
+ */
+static void
+split_bpf_config_terms(struct list_head *evt_head_config,
+                      struct list_head *obj_head_config)
+{
+       struct parse_events_term *term, *temp;
+
+       /*
+        * Currectly, all possible user config term
+        * belong to bpf object. parse_events__is_hardcoded_term()
+        * happends to be a good flag.
+        *
+        * See parse_events_config_bpf() and
+        * config_term_tracepoint().
+        */
+       list_for_each_entry_safe(term, temp, evt_head_config, list)
+               if (!parse_events__is_hardcoded_term(term))
+                       list_move_tail(&term->list, obj_head_config);
+}
+
 int parse_events_load_bpf(struct parse_events_evlist *data,
                          struct list_head *list,
                          char *bpf_file_name,
-                         bool source)
+                         bool source,
+                         struct list_head *head_config)
 {
+       int err;
        struct bpf_object *obj;
+       LIST_HEAD(obj_head_config);
+
+       if (head_config)
+               split_bpf_config_terms(head_config, &obj_head_config);
 
        obj = bpf__prepare_load(bpf_file_name, source);
        if (IS_ERR(obj)) {
                char errbuf[BUFSIZ];
-               int err;
 
                err = PTR_ERR(obj);
 
@@ -659,7 +785,18 @@ int parse_events_load_bpf(struct parse_events_evlist *data,
                return err;
        }
 
-       return parse_events_load_bpf_obj(data, list, obj);
+       err = parse_events_load_bpf_obj(data, list, obj, head_config);
+       if (err)
+               return err;
+       err = parse_events_config_bpf(data, obj, &obj_head_config);
+
+       /*
+        * Caller doesn't know anything about obj_head_config,
+        * so combine them together again before returnning.
+        */
+       if (head_config)
+               list_splice_tail(&obj_head_config, head_config);
+       return err;
 }
 
 static int
@@ -746,9 +883,59 @@ static int check_type_val(struct parse_events_term *term,
        return -EINVAL;
 }
 
-typedef int config_term_func_t(struct perf_event_attr *attr,
-                              struct parse_events_term *term,
-                              struct parse_events_error *err);
+/*
+ * Update according to parse-events.l
+ */
+static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
+       [PARSE_EVENTS__TERM_TYPE_USER]                  = "<sysfs term>",
+       [PARSE_EVENTS__TERM_TYPE_CONFIG]                = "config",
+       [PARSE_EVENTS__TERM_TYPE_CONFIG1]               = "config1",
+       [PARSE_EVENTS__TERM_TYPE_CONFIG2]               = "config2",
+       [PARSE_EVENTS__TERM_TYPE_NAME]                  = "name",
+       [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD]         = "period",
+       [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ]           = "freq",
+       [PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE]    = "branch_type",
+       [PARSE_EVENTS__TERM_TYPE_TIME]                  = "time",
+       [PARSE_EVENTS__TERM_TYPE_CALLGRAPH]             = "call-graph",
+       [PARSE_EVENTS__TERM_TYPE_STACKSIZE]             = "stack-size",
+       [PARSE_EVENTS__TERM_TYPE_NOINHERIT]             = "no-inherit",
+       [PARSE_EVENTS__TERM_TYPE_INHERIT]               = "inherit",
+};
+
+static bool config_term_shrinked;
+
+static bool
+config_term_avail(int term_type, struct parse_events_error *err)
+{
+       if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) {
+               err->str = strdup("Invalid term_type");
+               return false;
+       }
+       if (!config_term_shrinked)
+               return true;
+
+       switch (term_type) {
+       case PARSE_EVENTS__TERM_TYPE_CONFIG:
+       case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+       case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+       case PARSE_EVENTS__TERM_TYPE_NAME:
+               return true;
+       default:
+               if (!err)
+                       return false;
+
+               /* term_type is validated so indexing is safe */
+               if (asprintf(&err->str, "'%s' is not usable in 'perf stat'",
+                            config_term_names[term_type]) < 0)
+                       err->str = NULL;
+               return false;
+       }
+}
+
+void parse_events__shrink_config_terms(void)
+{
+       config_term_shrinked = true;
+}
 
 static int config_term_common(struct perf_event_attr *attr,
                              struct parse_events_term *term,
@@ -815,6 +1002,17 @@ do {                                                                         \
                return -EINVAL;
        }
 
+       /*
+        * Check term availbility after basic checking so
+        * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered.
+        *
+        * If check availbility at the entry of this function,
+        * user will see "'<sysfs term>' is not usable in 'perf stat'"
+        * if an invalid config term is provided for legacy events
+        * (for example, instructions/badterm/...), which is confusing.
+        */
+       if (!config_term_avail(term->type_term, err))
+               return -EINVAL;
        return 0;
 #undef CHECK_TYPE_VAL
 }
@@ -961,23 +1159,8 @@ int parse_events_add_numeric(struct parse_events_evlist *data,
                        return -ENOMEM;
        }
 
-       return add_event(list, &data->idx, &attr, NULL, &config_terms);
-}
-
-static int parse_events__is_name_term(struct parse_events_term *term)
-{
-       return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
-}
-
-static char *pmu_event_name(struct list_head *head_terms)
-{
-       struct parse_events_term *term;
-
-       list_for_each_entry(term, head_terms, list)
-               if (parse_events__is_name_term(term))
-                       return term->val.str;
-
-       return NULL;
+       return add_event(list, &data->idx, &attr,
+                        get_config_name(head_config), &config_terms);
 }
 
 int parse_events_add_pmu(struct parse_events_evlist *data,
@@ -1024,7 +1207,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
                return -EINVAL;
 
        evsel = __add_event(list, &data->idx, &attr,
-                           pmu_event_name(head_config), pmu->cpus,
+                           get_config_name(head_config), pmu->cpus,
                            &config_terms);
        if (evsel) {
                evsel->unit = info.unit;
@@ -1386,8 +1569,7 @@ int parse_events_terms(struct list_head *terms, const char *str)
                return 0;
        }
 
-       if (data.terms)
-               parse_events__free_terms(data.terms);
+       parse_events_terms__delete(data.terms);
        return ret;
 }
 
@@ -1395,9 +1577,10 @@ int parse_events(struct perf_evlist *evlist, const char *str,
                 struct parse_events_error *err)
 {
        struct parse_events_evlist data = {
-               .list  = LIST_HEAD_INIT(data.list),
-               .idx   = evlist->nr_entries,
-               .error = err,
+               .list   = LIST_HEAD_INIT(data.list),
+               .idx    = evlist->nr_entries,
+               .error  = err,
+               .evlist = evlist,
        };
        int ret;
 
@@ -2068,12 +2251,29 @@ int parse_events_term__clone(struct parse_events_term **new,
                        term->err_term, term->err_val);
 }
 
-void parse_events__free_terms(struct list_head *terms)
+void parse_events_terms__purge(struct list_head *terms)
 {
        struct parse_events_term *term, *h;
 
-       list_for_each_entry_safe(term, h, terms, list)
+       list_for_each_entry_safe(term, h, terms, list) {
+               if (term->array.nr_ranges)
+                       free(term->array.ranges);
+               list_del_init(&term->list);
                free(term);
+       }
+}
+
+void parse_events_terms__delete(struct list_head *terms)
+{
+       if (!terms)
+               return;
+       parse_events_terms__purge(terms);
+       free(terms);
+}
+
+void parse_events__clear_array(struct parse_events_array *a)
+{
+       free(a->ranges);
 }
 
 void parse_events_evlist_error(struct parse_events_evlist *data,
@@ -2088,6 +2288,33 @@ void parse_events_evlist_error(struct parse_events_evlist *data,
        WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
 }
 
+static void config_terms_list(char *buf, size_t buf_sz)
+{
+       int i;
+       bool first = true;
+
+       buf[0] = '\0';
+       for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) {
+               const char *name = config_term_names[i];
+
+               if (!config_term_avail(i, NULL))
+                       continue;
+               if (!name)
+                       continue;
+               if (name[0] == '<')
+                       continue;
+
+               if (strlen(buf) + strlen(name) + 2 >= buf_sz)
+                       return;
+
+               if (!first)
+                       strcat(buf, ",");
+               else
+                       first = false;
+               strcat(buf, name);
+       }
+}
+
 /*
  * Return string contains valid config terms of an event.
  * @additional_terms: For terms such as PMU sysfs terms.
@@ -2095,17 +2322,18 @@ void parse_events_evlist_error(struct parse_events_evlist *data,
 char *parse_events_formats_error_string(char *additional_terms)
 {
        char *str;
-       static const char *static_terms = "config,config1,config2,name,"
-                                         "period,freq,branch_type,time,"
-                                         "call-graph,stack-size\n";
+       /* "branch_type" is the longest name */
+       char static_terms[__PARSE_EVENTS__TERM_TYPE_NR *
+                         (sizeof("branch_type") - 1)];
 
+       config_terms_list(static_terms, sizeof(static_terms));
        /* valid terms */
        if (additional_terms) {
-               if (!asprintf(&str, "valid terms: %s,%s",
-                             additional_terms, static_terms))
+               if (asprintf(&str, "valid terms: %s,%s",
+                            additional_terms, static_terms) < 0)
                        goto fail;
        } else {
-               if (!asprintf(&str, "valid terms: %s", static_terms))
+               if (asprintf(&str, "valid terms: %s", static_terms) < 0)
                        goto fail;
        }
        return str;
index f1a6db107241b1c8ffaf03a3514ba1549df6fd1b..67e493088e81c29c17819e94b37da4f83fae073f 100644 (file)
@@ -68,11 +68,21 @@ enum {
        PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
        PARSE_EVENTS__TERM_TYPE_STACKSIZE,
        PARSE_EVENTS__TERM_TYPE_NOINHERIT,
-       PARSE_EVENTS__TERM_TYPE_INHERIT
+       PARSE_EVENTS__TERM_TYPE_INHERIT,
+       __PARSE_EVENTS__TERM_TYPE_NR,
+};
+
+struct parse_events_array {
+       size_t nr_ranges;
+       struct {
+               unsigned int start;
+               size_t length;
+       } *ranges;
 };
 
 struct parse_events_term {
        char *config;
+       struct parse_events_array array;
        union {
                char *str;
                u64  num;
@@ -98,12 +108,14 @@ struct parse_events_evlist {
        int                        idx;
        int                        nr_groups;
        struct parse_events_error *error;
+       struct perf_evlist        *evlist;
 };
 
 struct parse_events_terms {
        struct list_head *terms;
 };
 
+void parse_events__shrink_config_terms(void);
 int parse_events__is_hardcoded_term(struct parse_events_term *term);
 int parse_events_term__num(struct parse_events_term **term,
                           int type_term, char *config, u64 num,
@@ -115,7 +127,9 @@ int parse_events_term__sym_hw(struct parse_events_term **term,
                              char *config, unsigned idx);
 int parse_events_term__clone(struct parse_events_term **new,
                             struct parse_events_term *term);
-void parse_events__free_terms(struct list_head *terms);
+void parse_events_terms__delete(struct list_head *terms);
+void parse_events_terms__purge(struct list_head *terms);
+void parse_events__clear_array(struct parse_events_array *a);
 int parse_events__modifier_event(struct list_head *list, char *str, bool add);
 int parse_events__modifier_group(struct list_head *list, char *event_mod);
 int parse_events_name(struct list_head *list, char *name);
@@ -126,18 +140,22 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx,
 int parse_events_load_bpf(struct parse_events_evlist *data,
                          struct list_head *list,
                          char *bpf_file_name,
-                         bool source);
+                         bool source,
+                         struct list_head *head_config);
 /* Provide this function for perf test */
 struct bpf_object;
 int parse_events_load_bpf_obj(struct parse_events_evlist *data,
                              struct list_head *list,
-                             struct bpf_object *obj);
+                             struct bpf_object *obj,
+                             struct list_head *head_config);
 int parse_events_add_numeric(struct parse_events_evlist *data,
                             struct list_head *list,
                             u32 type, u64 config,
                             struct list_head *head_config);
 int parse_events_add_cache(struct list_head *list, int *idx,
-                          char *type, char *op_result1, char *op_result2);
+                          char *type, char *op_result1, char *op_result2,
+                          struct parse_events_error *error,
+                          struct list_head *head_config);
 int parse_events_add_breakpoint(struct list_head *list, int *idx,
                                void *ptr, char *type, u64 len);
 int parse_events_add_pmu(struct parse_events_evlist *data,
index 58c5831ffd5c22133f48a4c1a3a07721c71362fa..1477fbc78993c7b31d7bb1531f630cca8587d93f 100644 (file)
@@ -9,8 +9,8 @@
 %{
 #include <errno.h>
 #include "../perf.h"
-#include "parse-events-bison.h"
 #include "parse-events.h"
+#include "parse-events-bison.h"
 
 char *parse_events_get_text(yyscan_t yyscanner);
 YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
@@ -111,6 +111,7 @@ do {                                                        \
 %x mem
 %s config
 %x event
+%x array
 
 group          [^,{}/]*[{][^}]*[}][^,{}/]*
 event_pmu      [^,{}/]+[/][^/]*[/][^,{}/]*
@@ -122,7 +123,7 @@ num_dec             [0-9]+
 num_hex                0x[a-fA-F0-9]+
 num_raw_hex    [a-fA-F0-9]+
 name           [a-zA-Z_*?][a-zA-Z0-9_*?.]*
-name_minus     [a-zA-Z_*?][a-zA-Z0-9\-_*?.]*
+name_minus     [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
 /* If you add a modifier you need to update check_modifier() */
 modifier_event [ukhpPGHSDI]+
 modifier_bp    [rwx]{1,3}
@@ -176,10 +177,17 @@ modifier_bp       [rwx]{1,3}
 
 }
 
+<array>{
+"]"                    { BEGIN(config); return ']'; }
+{num_dec}              { return value(yyscanner, 10); }
+{num_hex}              { return value(yyscanner, 16); }
+,                      { return ','; }
+"\.\.\."               { return PE_ARRAY_RANGE; }
+}
+
 <config>{
        /*
-        * Please update parse_events_formats_error_string any time
-        * new static term is added.
+        * Please update config_term_names when new static term is added.
         */
 config                 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
 config1                        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
@@ -196,6 +204,8 @@ no-inherit          { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
 ,                      { return ','; }
 "/"                    { BEGIN(INITIAL); return '/'; }
 {name_minus}           { return str(yyscanner, PE_NAME); }
+\[all\]                        { return PE_ARRAY_ALL; }
+"["                    { BEGIN(array); return '['; }
 }
 
 <mem>{
@@ -238,6 +248,7 @@ cpu-migrations|migrations                   { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU
 alignment-faults                               { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
 emulation-faults                               { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
 dummy                                          { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+bpf-output                                     { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
 
        /*
         * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
index ad379968d4c10c0fb7bb2ddc3bacce3dc1166f43..5be4a5f216d6d23889e07e225810fec40df67c30 100644 (file)
@@ -28,7 +28,7 @@ do { \
        INIT_LIST_HEAD(list);         \
 } while (0)
 
-static inc_group_count(struct list_head *list,
+static void inc_group_count(struct list_head *list,
                       struct parse_events_evlist *data)
 {
        /* Count groups only have more than 1 members */
@@ -48,6 +48,7 @@ static inc_group_count(struct list_head *list,
 %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
 %token PE_ERROR
 %token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%token PE_ARRAY_ALL PE_ARRAY_RANGE
 %type <num> PE_VALUE
 %type <num> PE_VALUE_SYM_HW
 %type <num> PE_VALUE_SYM_SW
@@ -64,6 +65,7 @@ static inc_group_count(struct list_head *list,
 %type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
 %type <num> value_sym
 %type <head> event_config
+%type <head> opt_event_config
 %type <term> event_term
 %type <head> event_pmu
 %type <head> event_legacy_symbol
@@ -82,6 +84,9 @@ static inc_group_count(struct list_head *list,
 %type <head> group_def
 %type <head> group
 %type <head> groups
+%type <array> array
+%type <array> array_term
+%type <array> array_terms
 
 %union
 {
@@ -93,6 +98,7 @@ static inc_group_count(struct list_head *list,
                char *sys;
                char *event;
        } tracepoint_name;
+       struct parse_events_array array;
 }
 %%
 
@@ -211,24 +217,14 @@ event_def: event_pmu |
           event_bpf_file
 
 event_pmu:
-PE_NAME '/' event_config '/'
+PE_NAME opt_event_config
 {
        struct parse_events_evlist *data = _data;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_pmu(data, list, $1, $3));
-       parse_events__free_terms($3);
-       $$ = list;
-}
-|
-PE_NAME '/' '/'
-{
-       struct parse_events_evlist *data = _data;
-       struct list_head *list;
-
-       ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_pmu(data, list, $1, NULL));
+       ABORT_ON(parse_events_add_pmu(data, list, $1, $2));
+       parse_events_terms__delete($2);
        $$ = list;
 }
 |
@@ -246,7 +242,7 @@ PE_KERNEL_PMU_EVENT sep_dc
 
        ALLOC_LIST(list);
        ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
-       parse_events__free_terms(head);
+       parse_events_terms__delete(head);
        $$ = list;
 }
 |
@@ -266,7 +262,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
 
        ALLOC_LIST(list);
        ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
-       parse_events__free_terms(head);
+       parse_events_terms__delete(head);
        $$ = list;
 }
 
@@ -285,7 +281,7 @@ value_sym '/' event_config '/'
 
        ALLOC_LIST(list);
        ABORT_ON(parse_events_add_numeric(data, list, type, config, $3));
-       parse_events__free_terms($3);
+       parse_events_terms__delete($3);
        $$ = list;
 }
 |
@@ -302,33 +298,39 @@ value_sym sep_slash_dc
 }
 
 event_legacy_cache:
-PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config
 {
        struct parse_events_evlist *data = _data;
+       struct parse_events_error *error = data->error;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5));
+       ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5, error, $6));
+       parse_events_terms__delete($6);
        $$ = list;
 }
 |
-PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config
 {
        struct parse_events_evlist *data = _data;
+       struct parse_events_error *error = data->error;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL));
+       ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL, error, $4));
+       parse_events_terms__delete($4);
        $$ = list;
 }
 |
-PE_NAME_CACHE_TYPE
+PE_NAME_CACHE_TYPE opt_event_config
 {
        struct parse_events_evlist *data = _data;
+       struct parse_events_error *error = data->error;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL));
+       ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL, error, $2));
+       parse_events_terms__delete($2);
        $$ = list;
 }
 
@@ -378,24 +380,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc
 }
 
 event_legacy_tracepoint:
-tracepoint_name
-{
-       struct parse_events_evlist *data = _data;
-       struct parse_events_error *error = data->error;
-       struct list_head *list;
-
-       ALLOC_LIST(list);
-       if (error)
-               error->idx = @1.first_column;
-
-       if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event,
-                                       error, NULL))
-               return -1;
-
-       $$ = list;
-}
-|
-tracepoint_name '/' event_config '/'
+tracepoint_name opt_event_config
 {
        struct parse_events_evlist *data = _data;
        struct parse_events_error *error = data->error;
@@ -406,7 +391,7 @@ tracepoint_name '/' event_config '/'
                error->idx = @1.first_column;
 
        if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event,
-                                       error, $3))
+                                       error, $2))
                return -1;
 
        $$ = list;
@@ -433,49 +418,68 @@ PE_NAME ':' PE_NAME
 }
 
 event_legacy_numeric:
-PE_VALUE ':' PE_VALUE
+PE_VALUE ':' PE_VALUE opt_event_config
 {
        struct parse_events_evlist *data = _data;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, NULL));
+       ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, $4));
+       parse_events_terms__delete($4);
        $$ = list;
 }
 
 event_legacy_raw:
-PE_RAW
+PE_RAW opt_event_config
 {
        struct parse_events_evlist *data = _data;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, NULL));
+       ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, $2));
+       parse_events_terms__delete($2);
        $$ = list;
 }
 
 event_bpf_file:
-PE_BPF_OBJECT
+PE_BPF_OBJECT opt_event_config
 {
        struct parse_events_evlist *data = _data;
        struct parse_events_error *error = data->error;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_load_bpf(data, list, $1, false));
+       ABORT_ON(parse_events_load_bpf(data, list, $1, false, $2));
+       parse_events_terms__delete($2);
        $$ = list;
 }
 |
-PE_BPF_SOURCE
+PE_BPF_SOURCE opt_event_config
 {
        struct parse_events_evlist *data = _data;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_load_bpf(data, list, $1, true));
+       ABORT_ON(parse_events_load_bpf(data, list, $1, true, $2));
+       parse_events_terms__delete($2);
        $$ = list;
 }
 
+opt_event_config:
+'/' event_config '/'
+{
+       $$ = $2;
+}
+|
+'/' '/'
+{
+       $$ = NULL;
+}
+|
+{
+       $$ = NULL;
+}
+
 start_terms: event_config
 {
        struct parse_events_terms *data = _data;
@@ -573,6 +577,86 @@ PE_TERM
        ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL));
        $$ = term;
 }
+|
+PE_NAME array '=' PE_NAME
+{
+       struct parse_events_term *term;
+       int i;
+
+       ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+                                       $1, $4, &@1, &@4));
+
+       term->array = $2;
+       $$ = term;
+}
+|
+PE_NAME array '=' PE_VALUE
+{
+       struct parse_events_term *term;
+
+       ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+                                       $1, $4, &@1, &@4));
+       term->array = $2;
+       $$ = term;
+}
+
+array:
+'[' array_terms ']'
+{
+       $$ = $2;
+}
+|
+PE_ARRAY_ALL
+{
+       $$.nr_ranges = 0;
+       $$.ranges = NULL;
+}
+
+array_terms:
+array_terms ',' array_term
+{
+       struct parse_events_array new_array;
+
+       new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges;
+       new_array.ranges = malloc(sizeof(new_array.ranges[0]) *
+                                 new_array.nr_ranges);
+       ABORT_ON(!new_array.ranges);
+       memcpy(&new_array.ranges[0], $1.ranges,
+              $1.nr_ranges * sizeof(new_array.ranges[0]));
+       memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
+              $3.nr_ranges * sizeof(new_array.ranges[0]));
+       free($1.ranges);
+       free($3.ranges);
+       $$ = new_array;
+}
+|
+array_term
+
+array_term:
+PE_VALUE
+{
+       struct parse_events_array array;
+
+       array.nr_ranges = 1;
+       array.ranges = malloc(sizeof(array.ranges[0]));
+       ABORT_ON(!array.ranges);
+       array.ranges[0].start = $1;
+       array.ranges[0].length = 1;
+       $$ = array;
+}
+|
+PE_VALUE PE_ARRAY_RANGE PE_VALUE
+{
+       struct parse_events_array array;
+
+       ABORT_ON($3 < $1);
+       array.nr_ranges = 1;
+       array.ranges = malloc(sizeof(array.ranges[0]));
+       ABORT_ON(!array.ranges);
+       array.ranges[0].start = $1;
+       array.ranges[0].length = $3 - $1 + 1;
+       $$ = array;
+}
 
 sep_dc: ':' |
 
index b597bcc8fc781f4fa2c631044bddaab447b756e9..adef23b1352e836fec9f0f9a5290c578bb25cc43 100644 (file)
@@ -98,7 +98,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
        char scale[128];
        int fd, ret = -1;
        char path[PATH_MAX];
-       const char *lc;
+       char *lc;
 
        snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
 
@@ -123,6 +123,17 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
         */
        lc = setlocale(LC_NUMERIC, NULL);
 
+       /*
+        * The lc string may be allocated in static storage,
+        * so get a dynamic copy to make it survive setlocale
+        * call below.
+        */
+       lc = strdup(lc);
+       if (!lc) {
+               ret = -ENOMEM;
+               goto error;
+       }
+
        /*
         * force to C locale to ensure kernel
         * scale string is converted correctly.
@@ -135,6 +146,8 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
        /* restore locale */
        setlocale(LC_NUMERIC, lc);
 
+       free(lc);
+
        ret = 0;
 error:
        close(fd);
@@ -153,7 +166,7 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n
        if (fd == -1)
                return -1;
 
-               sret = read(fd, alias->unit, UNIT_MAX_LEN);
+       sret = read(fd, alias->unit, UNIT_MAX_LEN);
        if (sret < 0)
                goto error;
 
@@ -284,13 +297,12 @@ static int pmu_aliases_parse(char *dir, struct list_head *head)
 {
        struct dirent *evt_ent;
        DIR *event_dir;
-       int ret = 0;
 
        event_dir = opendir(dir);
        if (!event_dir)
                return -EINVAL;
 
-       while (!ret && (evt_ent = readdir(event_dir))) {
+       while ((evt_ent = readdir(event_dir))) {
                char path[PATH_MAX];
                char *name = evt_ent->d_name;
                FILE *file;
@@ -306,17 +318,19 @@ static int pmu_aliases_parse(char *dir, struct list_head *head)
 
                snprintf(path, PATH_MAX, "%s/%s", dir, name);
 
-               ret = -EINVAL;
                file = fopen(path, "r");
-               if (!file)
-                       break;
+               if (!file) {
+                       pr_debug("Cannot open %s\n", path);
+                       continue;
+               }
 
-               ret = perf_pmu__new_alias(head, dir, name, file);
+               if (perf_pmu__new_alias(head, dir, name, file) < 0)
+                       pr_debug("Cannot set up %s\n", name);
                fclose(file);
        }
 
        closedir(event_dir);
-       return ret;
+       return 0;
 }
 
 /*
@@ -354,7 +368,7 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias,
        list_for_each_entry(term, &alias->terms, list) {
                ret = parse_events_term__clone(&cloned, term);
                if (ret) {
-                       parse_events__free_terms(&list);
+                       parse_events_terms__purge(&list);
                        return ret;
                }
                list_add_tail(&cloned->list, &list);
index 544509c159cec4111e5865007b513e1197c576c4..b3aabc0d4eb0096fff41fb078a09d77988f103ff 100644 (file)
@@ -187,6 +187,9 @@ static void define_event_symbols(struct event_format *event,
                                 const char *ev_name,
                                 struct print_arg *args)
 {
+       if (args == NULL)
+               return;
+
        switch (args->type) {
        case PRINT_NULL:
                break;
index d72fafc1c800db1a699ac37e4ecb4acd0f1e3326..fbd05242b4e59786ca0e081a52729248d780f5a0 100644 (file)
@@ -205,6 +205,9 @@ static void define_event_symbols(struct event_format *event,
                                 const char *ev_name,
                                 struct print_arg *args)
 {
+       if (args == NULL)
+               return;
+
        switch (args->type) {
        case PRINT_NULL:
                break;
@@ -1091,8 +1094,6 @@ static int python_start_script(const char *script, int argc, const char **argv)
                goto error;
        }
 
-       free(command_line);
-
        set_table_handlers(tables);
 
        if (tables->db_export_mode) {
@@ -1101,6 +1102,8 @@ static int python_start_script(const char *script, int argc, const char **argv)
                        goto error;
        }
 
+       free(command_line);
+
        return err;
 error:
        Py_Finalize();
index 40b7a0d0905b8d7f01972c6e38e225f108b15133..60b3593d210dbc3b52997a693bcc8f9645687f08 100644 (file)
@@ -240,14 +240,6 @@ static int process_event_stub(struct perf_tool *tool __maybe_unused,
        return 0;
 }
 
-static int process_build_id_stub(struct perf_tool *tool __maybe_unused,
-                                union perf_event *event __maybe_unused,
-                                struct perf_session *session __maybe_unused)
-{
-       dump_printf(": unhandled!\n");
-       return 0;
-}
-
 static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
                                       union perf_event *event __maybe_unused,
                                       struct ordered_events *oe __maybe_unused)
@@ -260,23 +252,6 @@ static int process_finished_round(struct perf_tool *tool,
                                  union perf_event *event,
                                  struct ordered_events *oe);
 
-static int process_id_index_stub(struct perf_tool *tool __maybe_unused,
-                                union perf_event *event __maybe_unused,
-                                struct perf_session *perf_session
-                                __maybe_unused)
-{
-       dump_printf(": unhandled!\n");
-       return 0;
-}
-
-static int process_event_auxtrace_info_stub(struct perf_tool *tool __maybe_unused,
-                               union perf_event *event __maybe_unused,
-                               struct perf_session *session __maybe_unused)
-{
-       dump_printf(": unhandled!\n");
-       return 0;
-}
-
 static int skipn(int fd, off_t n)
 {
        char buf[4096];
@@ -303,10 +278,9 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
        return event->auxtrace.size;
 }
 
-static
-int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused,
-                                     union perf_event *event __maybe_unused,
-                                     struct perf_session *session __maybe_unused)
+static int process_event_op2_stub(struct perf_tool *tool __maybe_unused,
+                                 union perf_event *event __maybe_unused,
+                                 struct perf_session *session __maybe_unused)
 {
        dump_printf(": unhandled!\n");
        return 0;
@@ -410,7 +384,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
        if (tool->tracing_data == NULL)
                tool->tracing_data = process_event_synth_tracing_data_stub;
        if (tool->build_id == NULL)
-               tool->build_id = process_build_id_stub;
+               tool->build_id = process_event_op2_stub;
        if (tool->finished_round == NULL) {
                if (tool->ordered_events)
                        tool->finished_round = process_finished_round;
@@ -418,13 +392,13 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
                        tool->finished_round = process_finished_round_stub;
        }
        if (tool->id_index == NULL)
-               tool->id_index = process_id_index_stub;
+               tool->id_index = process_event_op2_stub;
        if (tool->auxtrace_info == NULL)
-               tool->auxtrace_info = process_event_auxtrace_info_stub;
+               tool->auxtrace_info = process_event_op2_stub;
        if (tool->auxtrace == NULL)
                tool->auxtrace = process_event_auxtrace_stub;
        if (tool->auxtrace_error == NULL)
-               tool->auxtrace_error = process_event_auxtrace_error_stub;
+               tool->auxtrace_error = process_event_op2_stub;
        if (tool->thread_map == NULL)
                tool->thread_map = process_event_thread_map_stub;
        if (tool->cpu_map == NULL)
index 1833103768cb99fbdbb92ea8910b08488fd2aac3..c8680984d2d6680f56a974a1b54056a5e74263e1 100644 (file)
@@ -22,6 +22,7 @@ cflags = getenv('CFLAGS', '').split()
 # switch off several checks (need to be at the end of cflags list)
 cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
 
+src_perf  = getenv('srctree') + '/tools/perf'
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
 libtraceevent = getenv('LIBTRACEEVENT')
@@ -30,6 +31,9 @@ libapikfs = getenv('LIBAPI')
 ext_sources = [f.strip() for f in file('util/python-ext-sources')
                                if len(f.strip()) > 0 and f[0] != '#']
 
+# use full paths with source files
+ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)
+
 perf = Extension('perf',
                  sources = ext_sources,
                  include_dirs = ['util/include'],
index ec722346e6ffb8dd6531e94576bb15b548a1487b..93fa136b0025c02e1e4383be523fcf37c092f36e 100644 (file)
@@ -6,6 +6,7 @@
 #include "evsel.h"
 #include "evlist.h"
 #include <traceevent/event-parse.h>
+#include "mem-events.h"
 
 regex_t                parent_regex;
 const char     default_parent_pattern[] = "^sys_|^do_page_fault";
@@ -25,9 +26,19 @@ int          sort__has_parent = 0;
 int            sort__has_sym = 0;
 int            sort__has_dso = 0;
 int            sort__has_socket = 0;
+int            sort__has_thread = 0;
+int            sort__has_comm = 0;
 enum sort_mode sort__mode = SORT_MODE__NORMAL;
 
-
+/*
+ * Replaces all occurrences of a char used with the:
+ *
+ * -t, --field-separator
+ *
+ * option, that uses a special separator character and don't pad with spaces,
+ * replacing all occurances of this separator in symbol names (and other
+ * output) with a '.' character, that thus it's the only non valid separator.
+*/
 static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
 {
        int n;
@@ -80,10 +91,21 @@ static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
                               width, width, comm ?: "");
 }
 
+static int hist_entry__thread_filter(struct hist_entry *he, int type, const void *arg)
+{
+       const struct thread *th = arg;
+
+       if (type != HIST_FILTER__THREAD)
+               return -1;
+
+       return th && he->thread != th;
+}
+
 struct sort_entry sort_thread = {
        .se_header      = "  Pid:Command",
        .se_cmp         = sort__thread_cmp,
        .se_snprintf    = hist_entry__thread_snprintf,
+       .se_filter      = hist_entry__thread_filter,
        .se_width_idx   = HISTC_THREAD,
 };
 
@@ -121,6 +143,7 @@ struct sort_entry sort_comm = {
        .se_collapse    = sort__comm_collapse,
        .se_sort        = sort__comm_sort,
        .se_snprintf    = hist_entry__comm_snprintf,
+       .se_filter      = hist_entry__thread_filter,
        .se_width_idx   = HISTC_COMM,
 };
 
@@ -170,10 +193,21 @@ static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf,
        return _hist_entry__dso_snprintf(he->ms.map, bf, size, width);
 }
 
+static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *arg)
+{
+       const struct dso *dso = arg;
+
+       if (type != HIST_FILTER__DSO)
+               return -1;
+
+       return dso && (!he->ms.map || he->ms.map->dso != dso);
+}
+
 struct sort_entry sort_dso = {
        .se_header      = "Shared Object",
        .se_cmp         = sort__dso_cmp,
        .se_snprintf    = hist_entry__dso_snprintf,
+       .se_filter      = hist_entry__dso_filter,
        .se_width_idx   = HISTC_DSO,
 };
 
@@ -246,10 +280,8 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
                        ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
                        ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
                                        ip - map->unmap_ip(map, sym->start));
-                       ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
-                                      width - ret, "");
                } else {
-                       ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
+                       ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
                                               width - ret,
                                               sym->name);
                }
@@ -257,14 +289,9 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
                size_t len = BITS_PER_LONG / 4;
                ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
                                       len, ip);
-               ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
-                                      width - ret, "");
        }
 
-       if (ret > width)
-               bf[width] = '\0';
-
-       return width;
+       return ret;
 }
 
 static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
@@ -274,46 +301,56 @@ static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
                                         he->level, bf, size, width);
 }
 
+static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *arg)
+{
+       const char *sym = arg;
+
+       if (type != HIST_FILTER__SYMBOL)
+               return -1;
+
+       return sym && (!he->ms.sym || !strstr(he->ms.sym->name, sym));
+}
+
 struct sort_entry sort_sym = {
        .se_header      = "Symbol",
        .se_cmp         = sort__sym_cmp,
        .se_sort        = sort__sym_sort,
        .se_snprintf    = hist_entry__sym_snprintf,
+       .se_filter      = hist_entry__sym_filter,
        .se_width_idx   = HISTC_SYMBOL,
 };
 
 /* --sort srcline */
 
+static char *hist_entry__get_srcline(struct hist_entry *he)
+{
+       struct map *map = he->ms.map;
+
+       if (!map)
+               return SRCLINE_UNKNOWN;
+
+       return get_srcline(map->dso, map__rip_2objdump(map, he->ip),
+                          he->ms.sym, true);
+}
+
 static int64_t
 sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-       if (!left->srcline) {
-               if (!left->ms.map)
-                       left->srcline = SRCLINE_UNKNOWN;
-               else {
-                       struct map *map = left->ms.map;
-                       left->srcline = get_srcline(map->dso,
-                                          map__rip_2objdump(map, left->ip),
-                                                   left->ms.sym, true);
-               }
-       }
-       if (!right->srcline) {
-               if (!right->ms.map)
-                       right->srcline = SRCLINE_UNKNOWN;
-               else {
-                       struct map *map = right->ms.map;
-                       right->srcline = get_srcline(map->dso,
-                                            map__rip_2objdump(map, right->ip),
-                                                    right->ms.sym, true);
-               }
-       }
+       if (!left->srcline)
+               left->srcline = hist_entry__get_srcline(left);
+       if (!right->srcline)
+               right->srcline = hist_entry__get_srcline(right);
+
        return strcmp(right->srcline, left->srcline);
 }
 
 static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
                                        size_t size, unsigned int width)
 {
-       return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcline);
+       if (!he->srcline)
+               he->srcline = hist_entry__get_srcline(he);
+
+       return repsep_snprintf(bf, size, "%-.*s", width, he->srcline);
 }
 
 struct sort_entry sort_srcline = {
@@ -327,11 +364,14 @@ struct sort_entry sort_srcline = {
 
 static char no_srcfile[1];
 
-static char *get_srcfile(struct hist_entry *e)
+static char *hist_entry__get_srcfile(struct hist_entry *e)
 {
        char *sf, *p;
        struct map *map = e->ms.map;
 
+       if (!map)
+               return no_srcfile;
+
        sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip),
                         e->ms.sym, false, true);
        if (!strcmp(sf, SRCLINE_UNKNOWN))
@@ -348,25 +388,21 @@ static char *get_srcfile(struct hist_entry *e)
 static int64_t
 sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-       if (!left->srcfile) {
-               if (!left->ms.map)
-                       left->srcfile = no_srcfile;
-               else
-                       left->srcfile = get_srcfile(left);
-       }
-       if (!right->srcfile) {
-               if (!right->ms.map)
-                       right->srcfile = no_srcfile;
-               else
-                       right->srcfile = get_srcfile(right);
-       }
+       if (!left->srcfile)
+               left->srcfile = hist_entry__get_srcfile(left);
+       if (!right->srcfile)
+               right->srcfile = hist_entry__get_srcfile(right);
+
        return strcmp(right->srcfile, left->srcfile);
 }
 
 static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
                                        size_t size, unsigned int width)
 {
-       return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile);
+       if (!he->srcfile)
+               he->srcfile = hist_entry__get_srcfile(he);
+
+       return repsep_snprintf(bf, size, "%-.*s", width, he->srcfile);
 }
 
 struct sort_entry sort_srcfile = {
@@ -439,10 +475,21 @@ static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf,
        return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket);
 }
 
+static int hist_entry__socket_filter(struct hist_entry *he, int type, const void *arg)
+{
+       int sk = *(const int *)arg;
+
+       if (type != HIST_FILTER__SOCKET)
+               return -1;
+
+       return sk >= 0 && he->socket != sk;
+}
+
 struct sort_entry sort_socket = {
        .se_header      = "Socket",
        .se_cmp         = sort__socket_cmp,
        .se_snprintf    = hist_entry__socket_snprintf,
+       .se_filter      = hist_entry__socket_filter,
        .se_width_idx   = HISTC_SOCKET,
 };
 
@@ -483,9 +530,6 @@ sort__trace_cmp(struct hist_entry *left, struct hist_entry *right)
        if (right->trace_output == NULL)
                right->trace_output = get_trace_output(right);
 
-       hists__new_col_len(left->hists, HISTC_TRACE, strlen(left->trace_output));
-       hists__new_col_len(right->hists, HISTC_TRACE, strlen(right->trace_output));
-
        return strcmp(right->trace_output, left->trace_output);
 }
 
@@ -496,11 +540,11 @@ static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf,
 
        evsel = hists_to_evsel(he->hists);
        if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
-               return scnprintf(bf, size, "%-*.*s", width, width, "N/A");
+               return scnprintf(bf, size, "%-.*s", width, "N/A");
 
        if (he->trace_output == NULL)
                he->trace_output = get_trace_output(he);
-       return repsep_snprintf(bf, size, "%-*.*s", width, width, he->trace_output);
+       return repsep_snprintf(bf, size, "%-.*s", width, he->trace_output);
 }
 
 struct sort_entry sort_trace = {
@@ -532,6 +576,18 @@ static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf,
                return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__dso_from_filter(struct hist_entry *he, int type,
+                                      const void *arg)
+{
+       const struct dso *dso = arg;
+
+       if (type != HIST_FILTER__DSO)
+               return -1;
+
+       return dso && (!he->branch_info || !he->branch_info->from.map ||
+                      he->branch_info->from.map->dso != dso);
+}
+
 static int64_t
 sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -552,6 +608,18 @@ static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf,
                return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__dso_to_filter(struct hist_entry *he, int type,
+                                    const void *arg)
+{
+       const struct dso *dso = arg;
+
+       if (type != HIST_FILTER__DSO)
+               return -1;
+
+       return dso && (!he->branch_info || !he->branch_info->to.map ||
+                      he->branch_info->to.map->dso != dso);
+}
+
 static int64_t
 sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -613,10 +681,35 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
        return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__sym_from_filter(struct hist_entry *he, int type,
+                                      const void *arg)
+{
+       const char *sym = arg;
+
+       if (type != HIST_FILTER__SYMBOL)
+               return -1;
+
+       return sym && !(he->branch_info && he->branch_info->from.sym &&
+                       strstr(he->branch_info->from.sym->name, sym));
+}
+
+static int hist_entry__sym_to_filter(struct hist_entry *he, int type,
+                                      const void *arg)
+{
+       const char *sym = arg;
+
+       if (type != HIST_FILTER__SYMBOL)
+               return -1;
+
+       return sym && !(he->branch_info && he->branch_info->to.sym &&
+                       strstr(he->branch_info->to.sym->name, sym));
+}
+
 struct sort_entry sort_dso_from = {
        .se_header      = "Source Shared Object",
        .se_cmp         = sort__dso_from_cmp,
        .se_snprintf    = hist_entry__dso_from_snprintf,
+       .se_filter      = hist_entry__dso_from_filter,
        .se_width_idx   = HISTC_DSO_FROM,
 };
 
@@ -624,6 +717,7 @@ struct sort_entry sort_dso_to = {
        .se_header      = "Target Shared Object",
        .se_cmp         = sort__dso_to_cmp,
        .se_snprintf    = hist_entry__dso_to_snprintf,
+       .se_filter      = hist_entry__dso_to_filter,
        .se_width_idx   = HISTC_DSO_TO,
 };
 
@@ -631,6 +725,7 @@ struct sort_entry sort_sym_from = {
        .se_header      = "Source Symbol",
        .se_cmp         = sort__sym_from_cmp,
        .se_snprintf    = hist_entry__sym_from_snprintf,
+       .se_filter      = hist_entry__sym_from_filter,
        .se_width_idx   = HISTC_SYMBOL_FROM,
 };
 
@@ -638,6 +733,7 @@ struct sort_entry sort_sym_to = {
        .se_header      = "Target Symbol",
        .se_cmp         = sort__sym_to_cmp,
        .se_snprintf    = hist_entry__sym_to_snprintf,
+       .se_filter      = hist_entry__sym_to_filter,
        .se_width_idx   = HISTC_SYMBOL_TO,
 };
 
@@ -797,20 +893,10 @@ sort__locked_cmp(struct hist_entry *left, struct hist_entry *right)
 static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
-       const char *out;
-       u64 mask = PERF_MEM_LOCK_NA;
+       char out[10];
 
-       if (he->mem_info)
-               mask = he->mem_info->data_src.mem_lock;
-
-       if (mask & PERF_MEM_LOCK_NA)
-               out = "N/A";
-       else if (mask & PERF_MEM_LOCK_LOCKED)
-               out = "Yes";
-       else
-               out = "No";
-
-       return repsep_snprintf(bf, size, "%-*s", width, out);
+       perf_mem__lck_scnprintf(out, sizeof(out), he->mem_info);
+       return repsep_snprintf(bf, size, "%.*s", width, out);
 }
 
 static int64_t
@@ -832,54 +918,12 @@ sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right)
        return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
 }
 
-static const char * const tlb_access[] = {
-       "N/A",
-       "HIT",
-       "MISS",
-       "L1",
-       "L2",
-       "Walker",
-       "Fault",
-};
-#define NUM_TLB_ACCESS (sizeof(tlb_access)/sizeof(const char *))
-
 static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
        char out[64];
-       size_t sz = sizeof(out) - 1; /* -1 for null termination */
-       size_t l = 0, i;
-       u64 m = PERF_MEM_TLB_NA;
-       u64 hit, miss;
-
-       out[0] = '\0';
-
-       if (he->mem_info)
-               m = he->mem_info->data_src.mem_dtlb;
-
-       hit = m & PERF_MEM_TLB_HIT;
-       miss = m & PERF_MEM_TLB_MISS;
-
-       /* already taken care of */
-       m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
-
-       for (i = 0; m && i < NUM_TLB_ACCESS; i++, m >>= 1) {
-               if (!(m & 0x1))
-                       continue;
-               if (l) {
-                       strcat(out, " or ");
-                       l += 4;
-               }
-               strncat(out, tlb_access[i], sz - l);
-               l += strlen(tlb_access[i]);
-       }
-       if (*out == '\0')
-               strcpy(out, "N/A");
-       if (hit)
-               strncat(out, " hit", sz - l);
-       if (miss)
-               strncat(out, " miss", sz - l);
 
+       perf_mem__tlb_scnprintf(out, sizeof(out), he->mem_info);
        return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
@@ -902,61 +946,12 @@ sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right)
        return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
 }
 
-static const char * const mem_lvl[] = {
-       "N/A",
-       "HIT",
-       "MISS",
-       "L1",
-       "LFB",
-       "L2",
-       "L3",
-       "Local RAM",
-       "Remote RAM (1 hop)",
-       "Remote RAM (2 hops)",
-       "Remote Cache (1 hop)",
-       "Remote Cache (2 hops)",
-       "I/O",
-       "Uncached",
-};
-#define NUM_MEM_LVL (sizeof(mem_lvl)/sizeof(const char *))
-
 static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
        char out[64];
-       size_t sz = sizeof(out) - 1; /* -1 for null termination */
-       size_t i, l = 0;
-       u64 m =  PERF_MEM_LVL_NA;
-       u64 hit, miss;
-
-       if (he->mem_info)
-               m  = he->mem_info->data_src.mem_lvl;
-
-       out[0] = '\0';
-
-       hit = m & PERF_MEM_LVL_HIT;
-       miss = m & PERF_MEM_LVL_MISS;
-
-       /* already taken care of */
-       m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
-
-       for (i = 0; m && i < NUM_MEM_LVL; i++, m >>= 1) {
-               if (!(m & 0x1))
-                       continue;
-               if (l) {
-                       strcat(out, " or ");
-                       l += 4;
-               }
-               strncat(out, mem_lvl[i], sz - l);
-               l += strlen(mem_lvl[i]);
-       }
-       if (*out == '\0')
-               strcpy(out, "N/A");
-       if (hit)
-               strncat(out, " hit", sz - l);
-       if (miss)
-               strncat(out, " miss", sz - l);
 
+       perf_mem__lvl_scnprintf(out, sizeof(out), he->mem_info);
        return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
@@ -979,51 +974,15 @@ sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right)
        return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
 }
 
-static const char * const snoop_access[] = {
-       "N/A",
-       "None",
-       "Miss",
-       "Hit",
-       "HitM",
-};
-#define NUM_SNOOP_ACCESS (sizeof(snoop_access)/sizeof(const char *))
-
 static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
        char out[64];
-       size_t sz = sizeof(out) - 1; /* -1 for null termination */
-       size_t i, l = 0;
-       u64 m = PERF_MEM_SNOOP_NA;
-
-       out[0] = '\0';
-
-       if (he->mem_info)
-               m = he->mem_info->data_src.mem_snoop;
-
-       for (i = 0; m && i < NUM_SNOOP_ACCESS; i++, m >>= 1) {
-               if (!(m & 0x1))
-                       continue;
-               if (l) {
-                       strcat(out, " or ");
-                       l += 4;
-               }
-               strncat(out, snoop_access[i], sz - l);
-               l += strlen(snoop_access[i]);
-       }
-
-       if (*out == '\0')
-               strcpy(out, "N/A");
 
+       perf_mem__snp_scnprintf(out, sizeof(out), he->mem_info);
        return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
-static inline  u64 cl_address(u64 address)
-{
-       /* return the cacheline of the address */
-       return (address & ~(cacheline_size - 1));
-}
-
 static int64_t
 sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -1440,20 +1399,6 @@ struct hpp_sort_entry {
        struct sort_entry *se;
 };
 
-bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
-{
-       struct hpp_sort_entry *hse_a;
-       struct hpp_sort_entry *hse_b;
-
-       if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
-               return false;
-
-       hse_a = container_of(a, struct hpp_sort_entry, hpp);
-       hse_b = container_of(b, struct hpp_sort_entry, hpp);
-
-       return hse_a->se == hse_b->se;
-}
-
 void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
 {
        struct hpp_sort_entry *hse;
@@ -1539,8 +1484,56 @@ static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt,
        return sort_fn(a, b);
 }
 
+bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
+{
+       return format->header == __sort__hpp_header;
+}
+
+#define MK_SORT_ENTRY_CHK(key)                                 \
+bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt)  \
+{                                                              \
+       struct hpp_sort_entry *hse;                             \
+                                                               \
+       if (!perf_hpp__is_sort_entry(fmt))                      \
+               return false;                                   \
+                                                               \
+       hse = container_of(fmt, struct hpp_sort_entry, hpp);    \
+       return hse->se == &sort_ ## key ;                       \
+}
+
+MK_SORT_ENTRY_CHK(trace)
+MK_SORT_ENTRY_CHK(srcline)
+MK_SORT_ENTRY_CHK(srcfile)
+MK_SORT_ENTRY_CHK(thread)
+MK_SORT_ENTRY_CHK(comm)
+MK_SORT_ENTRY_CHK(dso)
+MK_SORT_ENTRY_CHK(sym)
+
+
+static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+       struct hpp_sort_entry *hse_a;
+       struct hpp_sort_entry *hse_b;
+
+       if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
+               return false;
+
+       hse_a = container_of(a, struct hpp_sort_entry, hpp);
+       hse_b = container_of(b, struct hpp_sort_entry, hpp);
+
+       return hse_a->se == hse_b->se;
+}
+
+static void hse_free(struct perf_hpp_fmt *fmt)
+{
+       struct hpp_sort_entry *hse;
+
+       hse = container_of(fmt, struct hpp_sort_entry, hpp);
+       free(hse);
+}
+
 static struct hpp_sort_entry *
-__sort_dimension__alloc_hpp(struct sort_dimension *sd)
+__sort_dimension__alloc_hpp(struct sort_dimension *sd, int level)
 {
        struct hpp_sort_entry *hse;
 
@@ -1560,40 +1553,92 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd)
        hse->hpp.cmp = __sort__hpp_cmp;
        hse->hpp.collapse = __sort__hpp_collapse;
        hse->hpp.sort = __sort__hpp_sort;
+       hse->hpp.equal = __sort__hpp_equal;
+       hse->hpp.free = hse_free;
 
        INIT_LIST_HEAD(&hse->hpp.list);
        INIT_LIST_HEAD(&hse->hpp.sort_list);
        hse->hpp.elide = false;
        hse->hpp.len = 0;
        hse->hpp.user_len = 0;
+       hse->hpp.level = level;
 
        return hse;
 }
 
-bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
+static void hpp_free(struct perf_hpp_fmt *fmt)
 {
-       return format->header == __sort__hpp_header;
+       free(fmt);
+}
+
+static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd,
+                                                      int level)
+{
+       struct perf_hpp_fmt *fmt;
+
+       fmt = memdup(hd->fmt, sizeof(*fmt));
+       if (fmt) {
+               INIT_LIST_HEAD(&fmt->list);
+               INIT_LIST_HEAD(&fmt->sort_list);
+               fmt->free = hpp_free;
+               fmt->level = level;
+       }
+
+       return fmt;
+}
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg)
+{
+       struct perf_hpp_fmt *fmt;
+       struct hpp_sort_entry *hse;
+       int ret = -1;
+       int r;
+
+       perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+               if (!perf_hpp__is_sort_entry(fmt))
+                       continue;
+
+               hse = container_of(fmt, struct hpp_sort_entry, hpp);
+               if (hse->se->se_filter == NULL)
+                       continue;
+
+               /*
+                * hist entry is filtered if any of sort key in the hpp list
+                * is applied.  But it should skip non-matched filter types.
+                */
+               r = hse->se->se_filter(he, type, arg);
+               if (r >= 0) {
+                       if (ret < 0)
+                               ret = 0;
+                       ret |= r;
+               }
+       }
+
+       return ret;
 }
 
-static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd)
+static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd,
+                                         struct perf_hpp_list *list,
+                                         int level)
 {
-       struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);
+       struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level);
 
        if (hse == NULL)
                return -1;
 
-       perf_hpp__register_sort_field(&hse->hpp);
+       perf_hpp_list__register_sort_field(list, &hse->hpp);
        return 0;
 }
 
-static int __sort_dimension__add_hpp_output(struct sort_dimension *sd)
+static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
+                                           struct perf_hpp_list *list)
 {
-       struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);
+       struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0);
 
        if (hse == NULL)
                return -1;
 
-       perf_hpp__column_register(&hse->hpp);
+       perf_hpp_list__column_register(list, &hse->hpp);
        return 0;
 }
 
@@ -1727,6 +1772,9 @@ static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
        if (hde->raw_trace)
                goto raw_field;
 
+       if (!he->trace_output)
+               he->trace_output = get_trace_output(he);
+
        field = hde->field;
        namelen = strlen(field->name);
        str = he->trace_output;
@@ -1776,6 +1824,11 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
 
        hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
 
+       if (b == NULL) {
+               update_dynamic_len(hde, a);
+               return 0;
+       }
+
        field = hde->field;
        if (field->flags & FIELD_IS_DYNAMIC) {
                unsigned long long dyn;
@@ -1790,9 +1843,6 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
        } else {
                offset = field->offset;
                size = field->size;
-
-               update_dynamic_len(hde, a);
-               update_dynamic_len(hde, b);
        }
 
        return memcmp(a->raw_data + offset, b->raw_data + offset, size);
@@ -1803,8 +1853,31 @@ bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt)
        return fmt->cmp == __sort__hde_cmp;
 }
 
+static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+       struct hpp_dynamic_entry *hde_a;
+       struct hpp_dynamic_entry *hde_b;
+
+       if (!perf_hpp__is_dynamic_entry(a) || !perf_hpp__is_dynamic_entry(b))
+               return false;
+
+       hde_a = container_of(a, struct hpp_dynamic_entry, hpp);
+       hde_b = container_of(b, struct hpp_dynamic_entry, hpp);
+
+       return hde_a->field == hde_b->field;
+}
+
+static void hde_free(struct perf_hpp_fmt *fmt)
+{
+       struct hpp_dynamic_entry *hde;
+
+       hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+       free(hde);
+}
+
 static struct hpp_dynamic_entry *
-__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field)
+__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field,
+                     int level)
 {
        struct hpp_dynamic_entry *hde;
 
@@ -1827,16 +1900,47 @@ __alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field)
        hde->hpp.cmp = __sort__hde_cmp;
        hde->hpp.collapse = __sort__hde_cmp;
        hde->hpp.sort = __sort__hde_cmp;
+       hde->hpp.equal = __sort__hde_equal;
+       hde->hpp.free = hde_free;
 
        INIT_LIST_HEAD(&hde->hpp.list);
        INIT_LIST_HEAD(&hde->hpp.sort_list);
        hde->hpp.elide = false;
        hde->hpp.len = 0;
        hde->hpp.user_len = 0;
+       hde->hpp.level = level;
 
        return hde;
 }
 
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt)
+{
+       struct perf_hpp_fmt *new_fmt = NULL;
+
+       if (perf_hpp__is_sort_entry(fmt)) {
+               struct hpp_sort_entry *hse, *new_hse;
+
+               hse = container_of(fmt, struct hpp_sort_entry, hpp);
+               new_hse = memdup(hse, sizeof(*hse));
+               if (new_hse)
+                       new_fmt = &new_hse->hpp;
+       } else if (perf_hpp__is_dynamic_entry(fmt)) {
+               struct hpp_dynamic_entry *hde, *new_hde;
+
+               hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+               new_hde = memdup(hde, sizeof(*hde));
+               if (new_hde)
+                       new_fmt = &new_hde->hpp;
+       } else {
+               new_fmt = memdup(fmt, sizeof(*fmt));
+       }
+
+       INIT_LIST_HEAD(&new_fmt->list);
+       INIT_LIST_HEAD(&new_fmt->sort_list);
+
+       return new_fmt;
+}
+
 static int parse_field_name(char *str, char **event, char **field, char **opt)
 {
        char *event_name, *field_name, *opt_name;
@@ -1908,11 +2012,11 @@ static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_nam
 
 static int __dynamic_dimension__add(struct perf_evsel *evsel,
                                    struct format_field *field,
-                                   bool raw_trace)
+                                   bool raw_trace, int level)
 {
        struct hpp_dynamic_entry *hde;
 
-       hde = __alloc_dynamic_entry(evsel, field);
+       hde = __alloc_dynamic_entry(evsel, field, level);
        if (hde == NULL)
                return -ENOMEM;
 
@@ -1922,14 +2026,14 @@ static int __dynamic_dimension__add(struct perf_evsel *evsel,
        return 0;
 }
 
-static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace)
+static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level)
 {
        int ret;
        struct format_field *field;
 
        field = evsel->tp_format->format.fields;
        while (field) {
-               ret = __dynamic_dimension__add(evsel, field, raw_trace);
+               ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
                if (ret < 0)
                        return ret;
 
@@ -1938,7 +2042,8 @@ static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace)
        return 0;
 }
 
-static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace)
+static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace,
+                                 int level)
 {
        int ret;
        struct perf_evsel *evsel;
@@ -1947,7 +2052,7 @@ static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace)
                if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
                        continue;
 
-               ret = add_evsel_fields(evsel, raw_trace);
+               ret = add_evsel_fields(evsel, raw_trace, level);
                if (ret < 0)
                        return ret;
        }
@@ -1955,7 +2060,7 @@ static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace)
 }
 
 static int add_all_matching_fields(struct perf_evlist *evlist,
-                                  char *field_name, bool raw_trace)
+                                  char *field_name, bool raw_trace, int level)
 {
        int ret = -ESRCH;
        struct perf_evsel *evsel;
@@ -1969,14 +2074,15 @@ static int add_all_matching_fields(struct perf_evlist *evlist,
                if (field == NULL)
                        continue;
 
-               ret = __dynamic_dimension__add(evsel, field, raw_trace);
+               ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
                if (ret < 0)
                        break;
        }
        return ret;
 }
 
-static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
+static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok,
+                            int level)
 {
        char *str, *event_name, *field_name, *opt_name;
        struct perf_evsel *evsel;
@@ -2006,12 +2112,12 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
        }
 
        if (!strcmp(field_name, "trace_fields")) {
-               ret = add_all_dynamic_fields(evlist, raw_trace);
+               ret = add_all_dynamic_fields(evlist, raw_trace, level);
                goto out;
        }
 
        if (event_name == NULL) {
-               ret = add_all_matching_fields(evlist, field_name, raw_trace);
+               ret = add_all_matching_fields(evlist, field_name, raw_trace, level);
                goto out;
        }
 
@@ -2029,7 +2135,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
        }
 
        if (!strcmp(field_name, "*")) {
-               ret = add_evsel_fields(evsel, raw_trace);
+               ret = add_evsel_fields(evsel, raw_trace, level);
        } else {
                field = pevent_find_any_field(evsel->tp_format, field_name);
                if (field == NULL) {
@@ -2038,7 +2144,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
                        return -ENOENT;
                }
 
-               ret = __dynamic_dimension__add(evsel, field, raw_trace);
+               ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
        }
 
 out:
@@ -2046,12 +2152,14 @@ out:
        return ret;
 }
 
-static int __sort_dimension__add(struct sort_dimension *sd)
+static int __sort_dimension__add(struct sort_dimension *sd,
+                                struct perf_hpp_list *list,
+                                int level)
 {
        if (sd->taken)
                return 0;
 
-       if (__sort_dimension__add_hpp_sort(sd) < 0)
+       if (__sort_dimension__add_hpp_sort(sd, list, level) < 0)
                return -1;
 
        if (sd->entry->se_collapse)
@@ -2062,46 +2170,63 @@ static int __sort_dimension__add(struct sort_dimension *sd)
        return 0;
 }
 
-static int __hpp_dimension__add(struct hpp_dimension *hd)
+static int __hpp_dimension__add(struct hpp_dimension *hd,
+                               struct perf_hpp_list *list,
+                               int level)
 {
-       if (!hd->taken) {
-               hd->taken = 1;
+       struct perf_hpp_fmt *fmt;
 
-               perf_hpp__register_sort_field(hd->fmt);
-       }
+       if (hd->taken)
+               return 0;
+
+       fmt = __hpp_dimension__alloc_hpp(hd, level);
+       if (!fmt)
+               return -1;
+
+       hd->taken = 1;
+       perf_hpp_list__register_sort_field(list, fmt);
        return 0;
 }
 
-static int __sort_dimension__add_output(struct sort_dimension *sd)
+static int __sort_dimension__add_output(struct perf_hpp_list *list,
+                                       struct sort_dimension *sd)
 {
        if (sd->taken)
                return 0;
 
-       if (__sort_dimension__add_hpp_output(sd) < 0)
+       if (__sort_dimension__add_hpp_output(sd, list) < 0)
                return -1;
 
        sd->taken = 1;
        return 0;
 }
 
-static int __hpp_dimension__add_output(struct hpp_dimension *hd)
+static int __hpp_dimension__add_output(struct perf_hpp_list *list,
+                                      struct hpp_dimension *hd)
 {
-       if (!hd->taken) {
-               hd->taken = 1;
+       struct perf_hpp_fmt *fmt;
 
-               perf_hpp__column_register(hd->fmt);
-       }
+       if (hd->taken)
+               return 0;
+
+       fmt = __hpp_dimension__alloc_hpp(hd, 0);
+       if (!fmt)
+               return -1;
+
+       hd->taken = 1;
+       perf_hpp_list__column_register(list, fmt);
        return 0;
 }
 
 int hpp_dimension__add_output(unsigned col)
 {
        BUG_ON(col >= PERF_HPP__MAX_INDEX);
-       return __hpp_dimension__add_output(&hpp_sort_dimensions[col]);
+       return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
 }
 
-static int sort_dimension__add(const char *tok,
-                              struct perf_evlist *evlist __maybe_unused)
+static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+                              struct perf_evlist *evlist __maybe_unused,
+                              int level)
 {
        unsigned int i;
 
@@ -2136,9 +2261,13 @@ static int sort_dimension__add(const char *tok,
                        sort__has_dso = 1;
                } else if (sd->entry == &sort_socket) {
                        sort__has_socket = 1;
+               } else if (sd->entry == &sort_thread) {
+                       sort__has_thread = 1;
+               } else if (sd->entry == &sort_comm) {
+                       sort__has_comm = 1;
                }
 
-               return __sort_dimension__add(sd);
+               return __sort_dimension__add(sd, list, level);
        }
 
        for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
@@ -2147,7 +2276,7 @@ static int sort_dimension__add(const char *tok,
                if (strncasecmp(tok, hd->name, strlen(tok)))
                        continue;
 
-               return __hpp_dimension__add(hd);
+               return __hpp_dimension__add(hd, list, level);
        }
 
        for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
@@ -2162,7 +2291,7 @@ static int sort_dimension__add(const char *tok,
                if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
                        sort__has_sym = 1;
 
-               __sort_dimension__add(sd);
+               __sort_dimension__add(sd, list, level);
                return 0;
        }
 
@@ -2178,16 +2307,60 @@ static int sort_dimension__add(const char *tok,
                if (sd->entry == &sort_mem_daddr_sym)
                        sort__has_sym = 1;
 
-               __sort_dimension__add(sd);
+               __sort_dimension__add(sd, list, level);
                return 0;
        }
 
-       if (!add_dynamic_entry(evlist, tok))
+       if (!add_dynamic_entry(evlist, tok, level))
                return 0;
 
        return -ESRCH;
 }
 
+static int setup_sort_list(struct perf_hpp_list *list, char *str,
+                          struct perf_evlist *evlist)
+{
+       char *tmp, *tok;
+       int ret = 0;
+       int level = 0;
+       int next_level = 1;
+       bool in_group = false;
+
+       do {
+               tok = str;
+               tmp = strpbrk(str, "{}, ");
+               if (tmp) {
+                       if (in_group)
+                               next_level = level;
+                       else
+                               next_level = level + 1;
+
+                       if (*tmp == '{')
+                               in_group = true;
+                       else if (*tmp == '}')
+                               in_group = false;
+
+                       *tmp = '\0';
+                       str = tmp + 1;
+               }
+
+               if (*tok) {
+                       ret = sort_dimension__add(list, tok, evlist, level);
+                       if (ret == -EINVAL) {
+                               error("Invalid --sort key: `%s'", tok);
+                               break;
+                       } else if (ret == -ESRCH) {
+                               error("Unknown --sort key: `%s'", tok);
+                               break;
+                       }
+               }
+
+               level = next_level;
+       } while (tmp);
+
+       return ret;
+}
+
 static const char *get_default_sort_order(struct perf_evlist *evlist)
 {
        const char *default_sort_orders[] = {
@@ -2282,7 +2455,7 @@ static char *setup_overhead(char *keys)
 
 static int __setup_sorting(struct perf_evlist *evlist)
 {
-       char *tmp, *tok, *str;
+       char *str;
        const char *sort_keys;
        int ret = 0;
 
@@ -2320,17 +2493,7 @@ static int __setup_sorting(struct perf_evlist *evlist)
                }
        }
 
-       for (tok = strtok_r(str, ", ", &tmp);
-                       tok; tok = strtok_r(NULL, ", ", &tmp)) {
-               ret = sort_dimension__add(tok, evlist);
-               if (ret == -EINVAL) {
-                       error("Invalid --sort key: `%s'", tok);
-                       break;
-               } else if (ret == -ESRCH) {
-                       error("Unknown --sort key: `%s'", tok);
-                       break;
-               }
-       }
+       ret = setup_sort_list(&perf_hpp_list, str, evlist);
 
        free(str);
        return ret;
@@ -2341,7 +2504,7 @@ void perf_hpp__set_elide(int idx, bool elide)
        struct perf_hpp_fmt *fmt;
        struct hpp_sort_entry *hse;
 
-       perf_hpp__for_each_format(fmt) {
+       perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
                if (!perf_hpp__is_sort_entry(fmt))
                        continue;
 
@@ -2401,7 +2564,7 @@ void sort__setup_elide(FILE *output)
        struct perf_hpp_fmt *fmt;
        struct hpp_sort_entry *hse;
 
-       perf_hpp__for_each_format(fmt) {
+       perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
                if (!perf_hpp__is_sort_entry(fmt))
                        continue;
 
@@ -2413,7 +2576,7 @@ void sort__setup_elide(FILE *output)
         * It makes no sense to elide all of sort entries.
         * Just revert them to show up again.
         */
-       perf_hpp__for_each_format(fmt) {
+       perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
                if (!perf_hpp__is_sort_entry(fmt))
                        continue;
 
@@ -2421,7 +2584,7 @@ void sort__setup_elide(FILE *output)
                        return;
        }
 
-       perf_hpp__for_each_format(fmt) {
+       perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
                if (!perf_hpp__is_sort_entry(fmt))
                        continue;
 
@@ -2429,7 +2592,7 @@ void sort__setup_elide(FILE *output)
        }
 }
 
-static int output_field_add(char *tok)
+static int output_field_add(struct perf_hpp_list *list, char *tok)
 {
        unsigned int i;
 
@@ -2439,7 +2602,7 @@ static int output_field_add(char *tok)
                if (strncasecmp(tok, sd->name, strlen(tok)))
                        continue;
 
-               return __sort_dimension__add_output(sd);
+               return __sort_dimension__add_output(list, sd);
        }
 
        for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
@@ -2448,7 +2611,7 @@ static int output_field_add(char *tok)
                if (strncasecmp(tok, hd->name, strlen(tok)))
                        continue;
 
-               return __hpp_dimension__add_output(hd);
+               return __hpp_dimension__add_output(list, hd);
        }
 
        for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
@@ -2457,7 +2620,7 @@ static int output_field_add(char *tok)
                if (strncasecmp(tok, sd->name, strlen(tok)))
                        continue;
 
-               return __sort_dimension__add_output(sd);
+               return __sort_dimension__add_output(list, sd);
        }
 
        for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
@@ -2466,12 +2629,32 @@ static int output_field_add(char *tok)
                if (strncasecmp(tok, sd->name, strlen(tok)))
                        continue;
 
-               return __sort_dimension__add_output(sd);
+               return __sort_dimension__add_output(list, sd);
        }
 
        return -ESRCH;
 }
 
+static int setup_output_list(struct perf_hpp_list *list, char *str)
+{
+       char *tmp, *tok;
+       int ret = 0;
+
+       for (tok = strtok_r(str, ", ", &tmp);
+                       tok; tok = strtok_r(NULL, ", ", &tmp)) {
+               ret = output_field_add(list, tok);
+               if (ret == -EINVAL) {
+                       error("Invalid --fields key: `%s'", tok);
+                       break;
+               } else if (ret == -ESRCH) {
+                       error("Unknown --fields key: `%s'", tok);
+                       break;
+               }
+       }
+
+       return ret;
+}
+
 static void reset_dimensions(void)
 {
        unsigned int i;
@@ -2496,7 +2679,7 @@ bool is_strict_order(const char *order)
 
 static int __setup_output_field(void)
 {
-       char *tmp, *tok, *str, *strp;
+       char *str, *strp;
        int ret = -EINVAL;
 
        if (field_order == NULL)
@@ -2516,17 +2699,7 @@ static int __setup_output_field(void)
                goto out;
        }
 
-       for (tok = strtok_r(strp, ", ", &tmp);
-                       tok; tok = strtok_r(NULL, ", ", &tmp)) {
-               ret = output_field_add(tok);
-               if (ret == -EINVAL) {
-                       error("Invalid --fields key: `%s'", tok);
-                       break;
-               } else if (ret == -ESRCH) {
-                       error("Unknown --fields key: `%s'", tok);
-                       break;
-               }
-       }
+       ret = setup_output_list(&perf_hpp_list, strp);
 
 out:
        free(str);
@@ -2542,7 +2715,7 @@ int setup_sorting(struct perf_evlist *evlist)
                return err;
 
        if (parent_pattern != default_parent_pattern) {
-               err = sort_dimension__add("parent", evlist);
+               err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1);
                if (err < 0)
                        return err;
        }
@@ -2560,9 +2733,13 @@ int setup_sorting(struct perf_evlist *evlist)
                return err;
 
        /* copy sort keys to output fields */
-       perf_hpp__setup_output_field();
+       perf_hpp__setup_output_field(&perf_hpp_list);
        /* and then copy output fields to sort keys */
-       perf_hpp__append_sort_keys();
+       perf_hpp__append_sort_keys(&perf_hpp_list);
+
+       /* setup hists-specific output fields */
+       if (perf_hpp__setup_hists_formats(&perf_hpp_list, evlist) < 0)
+               return -1;
 
        return 0;
 }
@@ -2578,5 +2755,5 @@ void reset_output_field(void)
        sort_order = NULL;
 
        reset_dimensions();
-       perf_hpp__reset_output_field();
+       perf_hpp__reset_output_field(&perf_hpp_list);
 }
index 687bbb1244281ba65a99c3cb78f5bcb8a1eaa41b..3f4e359981192ac50b56e770aa472749666e1f98 100644 (file)
@@ -32,9 +32,12 @@ extern const char default_sort_order[];
 extern regex_t ignore_callees_regex;
 extern int have_ignore_callees;
 extern int sort__need_collapse;
+extern int sort__has_dso;
 extern int sort__has_parent;
 extern int sort__has_sym;
 extern int sort__has_socket;
+extern int sort__has_thread;
+extern int sort__has_comm;
 extern enum sort_mode sort__mode;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;
@@ -94,9 +97,11 @@ struct hist_entry {
        s32                     socket;
        s32                     cpu;
        u8                      cpumode;
+       u8                      depth;
 
        /* We are added by hists__add_dummy_entry. */
        bool                    dummy;
+       bool                    leaf;
 
        char                    level;
        u8                      filtered;
@@ -113,18 +118,28 @@ struct hist_entry {
                        bool    init_have_children;
                        bool    unfolded;
                        bool    has_children;
+                       bool    has_no_entry;
                };
        };
        char                    *srcline;
        char                    *srcfile;
        struct symbol           *parent;
-       struct rb_root          sorted_chain;
        struct branch_info      *branch_info;
        struct hists            *hists;
        struct mem_info         *mem_info;
        void                    *raw_data;
        u32                     raw_size;
        void                    *trace_output;
+       struct perf_hpp_list    *hpp_list;
+       struct hist_entry       *parent_he;
+       union {
+               /* this is for hierarchical entry structure */
+               struct {
+                       struct rb_root  hroot_in;
+                       struct rb_root  hroot_out;
+               };                              /* non-leaf entries */
+               struct rb_root  sorted_chain;   /* leaf entry has callchains */
+       };
        struct callchain_root   callchain[0]; /* must be last member */
 };
 
@@ -160,6 +175,17 @@ static inline float hist_entry__get_percent_limit(struct hist_entry *he)
        return period * 100.0 / total_period;
 }
 
+static inline u64 cl_address(u64 address)
+{
+       /* return the cacheline of the address */
+       return (address & ~(cacheline_size - 1));
+}
+
+static inline u64 cl_offset(u64 address)
+{
+       /* return the cacheline of the address */
+       return (address & (cacheline_size - 1));
+}
 
 enum sort_mode {
        SORT_MODE__NORMAL,
@@ -221,6 +247,7 @@ struct sort_entry {
        int64_t (*se_sort)(struct hist_entry *, struct hist_entry *);
        int     (*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
                               unsigned int width);
+       int     (*se_filter)(struct hist_entry *he, int type, const void *arg);
        u8      se_width_idx;
 };
 
index 6ac03146889d29be60707efd6e04c27c919f64de..b33ffb2af2cf5900378fb9fb8ef60e9eddaaf9db 100644 (file)
@@ -2,6 +2,7 @@
 #include "evsel.h"
 #include "stat.h"
 #include "color.h"
+#include "pmu.h"
 
 enum {
        CTX_BIT_USER    = 1 << 0,
@@ -14,6 +15,13 @@ enum {
 
 #define NUM_CTX CTX_BIT_MAX
 
+/*
+ * AGGR_GLOBAL: Use CPU 0
+ * AGGR_SOCKET: Use first CPU of socket
+ * AGGR_CORE: Use first CPU of core
+ * AGGR_NONE: Use matching CPU
+ * AGGR_THREAD: Not supported?
+ */
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
@@ -28,9 +36,15 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+static bool have_frontend_stalled;
 
 struct stats walltime_nsecs_stats;
 
+void perf_stat__init_shadow_stats(void)
+{
+       have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
+}
+
 static int evsel_context(struct perf_evsel *evsel)
 {
        int ctx = 0;
@@ -137,9 +151,10 @@ static const char *get_ratio_color(enum grc_type type, double ratio)
        return color;
 }
 
-static void print_stalled_cycles_frontend(FILE *out, int cpu,
+static void print_stalled_cycles_frontend(int cpu,
                                          struct perf_evsel *evsel
-                                         __maybe_unused, double avg)
+                                         __maybe_unused, double avg,
+                                         struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -152,14 +167,17 @@ static void print_stalled_cycles_frontend(FILE *out, int cpu,
 
        color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
 
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " frontend cycles idle   ");
+       if (ratio)
+               out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
+                                 ratio);
+       else
+               out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
 }
 
-static void print_stalled_cycles_backend(FILE *out, int cpu,
+static void print_stalled_cycles_backend(int cpu,
                                         struct perf_evsel *evsel
-                                        __maybe_unused, double avg)
+                                        __maybe_unused, double avg,
+                                        struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -172,14 +190,13 @@ static void print_stalled_cycles_backend(FILE *out, int cpu,
 
        color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
 
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " backend  cycles idle   ");
+       out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio);
 }
 
-static void print_branch_misses(FILE *out, int cpu,
+static void print_branch_misses(int cpu,
                                struct perf_evsel *evsel __maybe_unused,
-                               double avg)
+                               double avg,
+                               struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -192,14 +209,13 @@ static void print_branch_misses(FILE *out, int cpu,
 
        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " of all branches        ");
+       out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
 }
 
-static void print_l1_dcache_misses(FILE *out, int cpu,
+static void print_l1_dcache_misses(int cpu,
                                   struct perf_evsel *evsel __maybe_unused,
-                                  double avg)
+                                  double avg,
+                                  struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -212,14 +228,13 @@ static void print_l1_dcache_misses(FILE *out, int cpu,
 
        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " of all L1-dcache hits  ");
+       out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
 }
 
-static void print_l1_icache_misses(FILE *out, int cpu,
+static void print_l1_icache_misses(int cpu,
                                   struct perf_evsel *evsel __maybe_unused,
-                                  double avg)
+                                  double avg,
+                                  struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -231,15 +246,13 @@ static void print_l1_icache_misses(FILE *out, int cpu,
                ratio = avg / total * 100.0;
 
        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " of all L1-icache hits  ");
+       out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
 }
 
-static void print_dtlb_cache_misses(FILE *out, int cpu,
+static void print_dtlb_cache_misses(int cpu,
                                    struct perf_evsel *evsel __maybe_unused,
-                                   double avg)
+                                   double avg,
+                                   struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -251,15 +264,13 @@ static void print_dtlb_cache_misses(FILE *out, int cpu,
                ratio = avg / total * 100.0;
 
        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " of all dTLB cache hits ");
+       out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
 }
 
-static void print_itlb_cache_misses(FILE *out, int cpu,
+static void print_itlb_cache_misses(int cpu,
                                    struct perf_evsel *evsel __maybe_unused,
-                                   double avg)
+                                   double avg,
+                                   struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -271,15 +282,13 @@ static void print_itlb_cache_misses(FILE *out, int cpu,
                ratio = avg / total * 100.0;
 
        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " of all iTLB cache hits ");
+       out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
 }
 
-static void print_ll_cache_misses(FILE *out, int cpu,
+static void print_ll_cache_misses(int cpu,
                                  struct perf_evsel *evsel __maybe_unused,
-                                 double avg)
+                                 double avg,
+                                 struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -291,15 +300,15 @@ static void print_ll_cache_misses(FILE *out, int cpu,
                ratio = avg / total * 100.0;
 
        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " of all LL-cache hits   ");
+       out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
 }
 
-void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
-                                  double avg, int cpu, enum aggr_mode aggr)
+void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+                                  double avg, int cpu,
+                                  struct perf_stat_output_ctx *out)
 {
+       void *ctxp = out->ctx;
+       print_metric_t print_metric = out->print_metric;
        double total, ratio = 0.0, total2;
        int ctx = evsel_context(evsel);
 
@@ -307,119 +316,145 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
                total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
                if (total) {
                        ratio = avg / total;
-                       fprintf(out, " #   %5.2f  insns per cycle        ", ratio);
+                       print_metric(ctxp, NULL, "%7.2f ",
+                                       "insn per cycle", ratio);
                } else {
-                       fprintf(out, "                                   ");
+                       print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
                }
                total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
                total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
 
                if (total && avg) {
+                       out->new_line(ctxp);
                        ratio = total / avg;
-                       fprintf(out, "\n");
-                       if (aggr == AGGR_NONE)
-                               fprintf(out, "        ");
-                       fprintf(out, "                                                  #   %5.2f  stalled cycles per insn", ratio);
+                       print_metric(ctxp, NULL, "%7.2f ",
+                                       "stalled cycles per insn",
+                                       ratio);
+               } else if (have_frontend_stalled) {
+                       print_metric(ctxp, NULL, NULL,
+                                    "stalled cycles per insn", 0);
                }
-
-       } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
-                       runtime_branches_stats[ctx][cpu].n != 0) {
-               print_branch_misses(out, cpu, evsel, avg);
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
+               if (runtime_branches_stats[ctx][cpu].n != 0)
+                       print_branch_misses(cpu, evsel, avg, out);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all branches", 0);
        } else if (
                evsel->attr.type == PERF_TYPE_HW_CACHE &&
                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_l1_dcache_stats[ctx][cpu].n != 0) {
-               print_l1_dcache_misses(out, cpu, evsel, avg);
+                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+               if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
+                       print_l1_dcache_misses(cpu, evsel, avg, out);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
        } else if (
                evsel->attr.type == PERF_TYPE_HW_CACHE &&
                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_l1_icache_stats[ctx][cpu].n != 0) {
-               print_l1_icache_misses(out, cpu, evsel, avg);
+                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+               if (runtime_l1_icache_stats[ctx][cpu].n != 0)
+                       print_l1_icache_misses(cpu, evsel, avg, out);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
        } else if (
                evsel->attr.type == PERF_TYPE_HW_CACHE &&
                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
-               print_dtlb_cache_misses(out, cpu, evsel, avg);
+                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+               if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
+                       print_dtlb_cache_misses(cpu, evsel, avg, out);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
        } else if (
                evsel->attr.type == PERF_TYPE_HW_CACHE &&
                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_itlb_cache_stats[ctx][cpu].n != 0) {
-               print_itlb_cache_misses(out, cpu, evsel, avg);
+                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+               if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
+                       print_itlb_cache_misses(cpu, evsel, avg, out);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
        } else if (
                evsel->attr.type == PERF_TYPE_HW_CACHE &&
                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_ll_cache_stats[ctx][cpu].n != 0) {
-               print_ll_cache_misses(out, cpu, evsel, avg);
-       } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
-                       runtime_cacherefs_stats[ctx][cpu].n != 0) {
+                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+               if (runtime_ll_cache_stats[ctx][cpu].n != 0)
+                       print_ll_cache_misses(cpu, evsel, avg, out);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
                total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
 
                if (total)
                        ratio = avg * 100 / total;
 
-               fprintf(out, " # %8.3f %% of all cache refs    ", ratio);
-
+               if (runtime_cacherefs_stats[ctx][cpu].n != 0)
+                       print_metric(ctxp, NULL, "%8.3f %%",
+                                    "of all cache refs", ratio);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
        } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
-               print_stalled_cycles_frontend(out, cpu, evsel, avg);
+               print_stalled_cycles_frontend(cpu, evsel, avg, out);
        } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-               print_stalled_cycles_backend(out, cpu, evsel, avg);
+               print_stalled_cycles_backend(cpu, evsel, avg, out);
        } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
                total = avg_stats(&runtime_nsecs_stats[cpu]);
 
                if (total) {
                        ratio = avg / total;
-                       fprintf(out, " # %8.3f GHz                    ", ratio);
+                       print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
                } else {
-                       fprintf(out, "                                   ");
+                       print_metric(ctxp, NULL, NULL, "Ghz", 0);
                }
        } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
                total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
                if (total)
-                       fprintf(out,
-                               " #   %5.2f%% transactional cycles   ",
-                               100.0 * (avg / total));
+                       print_metric(ctxp, NULL,
+                                       "%7.2f%%", "transactional cycles",
+                                       100.0 * (avg / total));
+               else
+                       print_metric(ctxp, NULL, NULL, "transactional cycles",
+                                    0);
        } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
                total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
                total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
                if (total2 < avg)
                        total2 = avg;
                if (total)
-                       fprintf(out,
-                               " #   %5.2f%% aborted cycles         ",
+                       print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
                                100.0 * ((total2-avg) / total));
-       } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
-                  runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+               else
+                       print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
+       } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
                total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 
                if (avg)
                        ratio = total / avg;
 
-               fprintf(out, " # %8.0f cycles / transaction   ", ratio);
-       } else if (perf_stat_evsel__is(evsel, ELISION_START) &&
-                  runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+               if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
+                       print_metric(ctxp, NULL, "%8.0f",
+                                    "cycles / transaction", ratio);
+               else
+                       print_metric(ctxp, NULL, NULL, "cycles / transaction",
+                                    0);
+       } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
                total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 
                if (avg)
                        ratio = total / avg;
 
-               fprintf(out, " # %8.0f cycles / elision       ", ratio);
+               print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
        } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) {
                if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
-                       fprintf(out, " # %8.3f CPUs utilized          ", avg / ratio);
+                       print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
+                                    avg / ratio);
                else
-                       fprintf(out, "                                   ");
+                       print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
        } else if (runtime_nsecs_stats[cpu].n != 0) {
                char unit = 'M';
+               char unit_buf[10];
 
                total = avg_stats(&runtime_nsecs_stats[cpu]);
 
@@ -429,9 +464,9 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
                        ratio *= 1000;
                        unit = 'K';
                }
-
-               fprintf(out, " # %8.3f %c/sec                  ", ratio, unit);
+               snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
+               print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
        } else {
-               fprintf(out, "                                   ");
+               print_metric(ctxp, NULL, NULL, NULL, 0);
        }
 }
index afb0c45eba34ba8db7a6cb6d258326f545f7aca1..4d9b481cf3b6edbb7d6161cbd238709241dedc5b 100644 (file)
@@ -97,7 +97,7 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel)
        }
 }
 
-void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
+static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 {
        int i;
        struct perf_stat_evsel *ps = evsel->priv;
@@ -108,7 +108,7 @@ void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
        perf_stat_evsel_id_init(evsel);
 }
 
-int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
+static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
 {
        evsel->priv = zalloc(sizeof(struct perf_stat_evsel));
        if (evsel->priv == NULL)
@@ -117,13 +117,13 @@ int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
        return 0;
 }
 
-void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
+static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
 {
        zfree(&evsel->priv);
 }
 
-int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
-                                     int ncpus, int nthreads)
+static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
+                                            int ncpus, int nthreads)
 {
        struct perf_counts *counts;
 
@@ -134,13 +134,13 @@ int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
        return counts ? 0 : -ENOMEM;
 }
 
-void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
+static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
 {
        perf_counts__delete(evsel->prev_raw_counts);
        evsel->prev_raw_counts = NULL;
 }
 
-int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
+static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
 {
        int ncpus = perf_evsel__nr_cpus(evsel);
        int nthreads = thread_map__nr(evsel->threads);
index 086f4e128d6351f0e0de862c2ebcc44801cbc2f8..0150e786ccc7c1f48e407323b606f47155a076c9 100644 (file)
@@ -68,21 +68,23 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel);
 
 extern struct stats walltime_nsecs_stats;
 
+typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,
+                              const char *fmt, double val);
+typedef void (*new_line_t )(void *ctx);
+
+void perf_stat__init_shadow_stats(void);
 void perf_stat__reset_shadow_stats(void);
 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
                                    int cpu);
-void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
-                                  double avg, int cpu, enum aggr_mode aggr);
-
-void perf_evsel__reset_stat_priv(struct perf_evsel *evsel);
-int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel);
-void perf_evsel__free_stat_priv(struct perf_evsel *evsel);
-
-int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
-                                     int ncpus, int nthreads);
-void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel);
+struct perf_stat_output_ctx {
+       void *ctx;
+       print_metric_t print_metric;
+       new_line_t new_line;
+};
 
-int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
+void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+                                  double avg, int cpu,
+                                  struct perf_stat_output_ctx *out);
 
 int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
 void perf_evlist__free_stats(struct perf_evlist *evlist);
index 25671fa166188413c66758a978082e89a1ba08d2..d3d279275432ee663641a1a6dbb0a0cf8d3b8334 100644 (file)
@@ -51,30 +51,6 @@ void strbuf_grow(struct strbuf *sb, size_t extra)
        ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
 }
 
-static void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
-                                  const void *data, size_t dlen)
-{
-       if (pos + len < pos)
-               die("you want to use way too much memory");
-       if (pos > sb->len)
-               die("`pos' is too far after the end of the buffer");
-       if (pos + len > sb->len)
-               die("`pos + len' is too far after the end of the buffer");
-
-       if (dlen >= len)
-               strbuf_grow(sb, dlen - len);
-       memmove(sb->buf + pos + dlen,
-                       sb->buf + pos + len,
-                       sb->len - pos - len);
-       memcpy(sb->buf + pos, data, dlen);
-       strbuf_setlen(sb, sb->len + dlen - len);
-}
-
-void strbuf_remove(struct strbuf *sb, size_t pos, size_t len)
-{
-       strbuf_splice(sb, pos, len, NULL, 0);
-}
-
 void strbuf_add(struct strbuf *sb, const void *data, size_t len)
 {
        strbuf_grow(sb, len);
index 529f2f03524915ab9cae7c5608de444fd875812d..7a32c838884d8de6feaa3223e388199c5c939301 100644 (file)
@@ -77,8 +77,6 @@ static inline void strbuf_addch(struct strbuf *sb, int c) {
        sb->buf[sb->len] = '\0';
 }
 
-extern void strbuf_remove(struct strbuf *, size_t pos, size_t len);
-
 extern void strbuf_add(struct strbuf *, const void *, size_t);
 static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
        strbuf_add(sb, s, strlen(s));
index 562b8ebeae5b2414b6bac867c928cb22ee9a5f13..b1dd68f358fcd8e7390b5929ed736a357c7853c1 100644 (file)
@@ -6,6 +6,7 @@
 #include <inttypes.h>
 
 #include "symbol.h"
+#include "demangle-java.h"
 #include "machine.h"
 #include "vdso.h"
 #include <symbol/kallsyms.h>
@@ -1077,6 +1078,8 @@ new_symbol:
                                demangle_flags = DMGL_PARAMS | DMGL_ANSI;
 
                        demangled = bfd_demangle(NULL, elf_name, demangle_flags);
+                       if (demangled == NULL)
+                               demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
                        if (demangled != NULL)
                                elf_name = demangled;
                }
index ab02209a7cf3b162431bfc006287f5fbd8c68f43..e7588dc915181729394c1d2195c78576c47d20df 100644 (file)
@@ -1466,7 +1466,8 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
         * Read the build id if possible. This is required for
         * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
         */
-       if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
+       if (is_regular_file(name) &&
+           filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
                dso__set_build_id(dso, build_id);
 
        /*
@@ -1487,6 +1488,9 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
                                                   root_dir, name, PATH_MAX))
                        continue;
 
+               if (!is_regular_file(name))
+                       continue;
+
                /* Name is now the name of the next image to try */
                if (symsrc__init(ss, dso, name, symtab_type) < 0)
                        continue;
@@ -1525,6 +1529,10 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
        if (!runtime_ss && syms_ss)
                runtime_ss = syms_ss;
 
+       if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE)
+               if (dso__build_id_is_kmod(dso, name, PATH_MAX))
+                       kmod = true;
+
        if (syms_ss)
                ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, kmod);
        else
index ccd1caa40e116645be37025665388886d7c97546..a937053a0ae07a6214fb03753faa66819ed40884 100644 (file)
@@ -110,7 +110,8 @@ struct symbol_conf {
                        has_filter,
                        show_ref_callgraph,
                        hide_unresolved,
-                       raw_trace;
+                       raw_trace,
+                       report_hierarchy;
        const char      *vmlinux_name,
                        *kallsyms_name,
                        *source_prefix,
index 802bb868d446cafa7c5383982193ad13d87b785a..8ae051e0ec79090e674fdf9a6cc9fbece8275ebb 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/err.h>
 #include <traceevent/event-parse.h>
 #include <api/fs/tracing_path.h>
+#include <api/fs/fs.h>
 #include "trace-event.h"
 #include "machine.h"
 #include "util.h"
index 4d4210d4e13d1d7fbc32a5c1f4afff58a0810fd0..1b741646eed00b7754ba0a618b4fb57b8c87d9ac 100644 (file)
@@ -19,7 +19,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
        u64 quot, rem;
 
        quot = cyc >> tc->time_shift;
-       rem  = cyc & ((1 << tc->time_shift) - 1);
+       rem  = cyc & (((u64)1 << tc->time_shift) - 1);
        return tc->time_zero + quot * tc->time_mult +
               ((rem * tc->time_mult) >> tc->time_shift);
 }
index ead9509835d23e2aee29b9f3548613973a5937fb..b7766c577b015d978fd3e9960c451692f81daa6e 100644 (file)
@@ -14,6 +14,7 @@
 #include <limits.h>
 #include <byteswap.h>
 #include <linux/kernel.h>
+#include <linux/log2.h>
 #include <unistd.h>
 #include "callchain.h"
 #include "strlist.h"
@@ -507,54 +508,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
        return ret;
 }
 
-int filename__read_str(const char *filename, char **buf, size_t *sizep)
-{
-       size_t size = 0, alloc_size = 0;
-       void *bf = NULL, *nbf;
-       int fd, n, err = 0;
-       char sbuf[STRERR_BUFSIZE];
-
-       fd = open(filename, O_RDONLY);
-       if (fd < 0)
-               return -errno;
-
-       do {
-               if (size == alloc_size) {
-                       alloc_size += BUFSIZ;
-                       nbf = realloc(bf, alloc_size);
-                       if (!nbf) {
-                               err = -ENOMEM;
-                               break;
-                       }
-
-                       bf = nbf;
-               }
-
-               n = read(fd, bf + size, alloc_size - size);
-               if (n < 0) {
-                       if (size) {
-                               pr_warning("read failed %d: %s\n", errno,
-                                        strerror_r(errno, sbuf, sizeof(sbuf)));
-                               err = 0;
-                       } else
-                               err = -errno;
-
-                       break;
-               }
-
-               size += n;
-       } while (n > 0);
-
-       if (!err) {
-               *sizep = size;
-               *buf   = bf;
-       } else
-               free(bf);
-
-       close(fd);
-       return err;
-}
-
 const char *get_filename_for_perf_kvm(void)
 {
        const char *filename;
@@ -691,3 +644,66 @@ out:
 
        return tip;
 }
+
+bool is_regular_file(const char *file)
+{
+       struct stat st;
+
+       if (stat(file, &st))
+               return false;
+
+       return S_ISREG(st.st_mode);
+}
+
+int fetch_current_timestamp(char *buf, size_t sz)
+{
+       struct timeval tv;
+       struct tm tm;
+       char dt[32];
+
+       if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
+               return -1;
+
+       if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
+               return -1;
+
+       scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
+
+       return 0;
+}
+
+void print_binary(unsigned char *data, size_t len,
+                 size_t bytes_per_line, print_binary_t printer,
+                 void *extra)
+{
+       size_t i, j, mask;
+
+       if (!printer)
+               return;
+
+       bytes_per_line = roundup_pow_of_two(bytes_per_line);
+       mask = bytes_per_line - 1;
+
+       printer(BINARY_PRINT_DATA_BEGIN, 0, extra);
+       for (i = 0; i < len; i++) {
+               if ((i & mask) == 0) {
+                       printer(BINARY_PRINT_LINE_BEGIN, -1, extra);
+                       printer(BINARY_PRINT_ADDR, i, extra);
+               }
+
+               printer(BINARY_PRINT_NUM_DATA, data[i], extra);
+
+               if (((i & mask) == mask) || i == len - 1) {
+                       for (j = 0; j < mask-(i & mask); j++)
+                               printer(BINARY_PRINT_NUM_PAD, -1, extra);
+
+                       printer(BINARY_PRINT_SEP, i, extra);
+                       for (j = i & ~mask; j <= i; j++)
+                               printer(BINARY_PRINT_CHAR_DATA, data[j], extra);
+                       for (j = 0; j < mask-(i & mask); j++)
+                               printer(BINARY_PRINT_CHAR_PAD, i, extra);
+                       printer(BINARY_PRINT_LINE_END, -1, extra);
+               }
+       }
+       printer(BINARY_PRINT_DATA_END, -1, extra);
+}
index fe915e616f9b65388e15be963d0ef5cf58c325fd..d0d50cef8b2af31703c7d298f68d93b68326c3c5 100644 (file)
@@ -82,6 +82,8 @@
 
 extern const char *graph_line;
 extern const char *graph_dotted_line;
+extern const char *spaces;
+extern const char *dots;
 extern char buildid_dir[];
 
 /* On most systems <limits.h> would have given us this, but
@@ -303,7 +305,6 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
                  bool show_sym, bool unwind_inlines);
 void free_srcline(char *srcline);
 
-int filename__read_str(const char *filename, char **buf, size_t *sizep);
 int perf_event_paranoid(void);
 
 void mem_bswap_64(void *src, int byte_size);
@@ -343,5 +344,27 @@ int fetch_kernel_version(unsigned int *puint,
 #define KVER_PARAM(x)  KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
 
 const char *perf_tip(const char *dirpath);
+bool is_regular_file(const char *file);
+int fetch_current_timestamp(char *buf, size_t sz);
+
+enum binary_printer_ops {
+       BINARY_PRINT_DATA_BEGIN,
+       BINARY_PRINT_LINE_BEGIN,
+       BINARY_PRINT_ADDR,
+       BINARY_PRINT_NUM_DATA,
+       BINARY_PRINT_NUM_PAD,
+       BINARY_PRINT_SEP,
+       BINARY_PRINT_CHAR_DATA,
+       BINARY_PRINT_CHAR_PAD,
+       BINARY_PRINT_LINE_END,
+       BINARY_PRINT_DATA_END,
+};
+
+typedef void (*print_binary_t)(enum binary_printer_ops,
+                              unsigned int val,
+                              void *extra);
 
+void print_binary(unsigned char *data, size_t len,
+                 size_t bytes_per_line, print_binary_t printer,
+                 void *extra);
 #endif /* GIT_COMPAT_UTIL_H */
index 0dac7e05a6ac9e5f1500eca1cebbce2d900ab511..3fa94e291d16a2891b3799797c1a8da289d89b20 100644 (file)
@@ -1970,7 +1970,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
 }
 
 static void
-dump_cstate_pstate_config_info(family, model)
+dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 {
        if (!do_nhm_platform_info)
                return;
@@ -2142,7 +2142,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 #define        RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
 #define        RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
 
-double get_tdp(model)
+double get_tdp(unsigned int model)
 {
        unsigned long long msr;
 
@@ -2256,7 +2256,7 @@ void rapl_probe(unsigned int family, unsigned int model)
        return;
 }
 
-void perf_limit_reasons_probe(family, model)
+void perf_limit_reasons_probe(unsigned int family, unsigned int model)
 {
        if (!genuine_intel)
                return;
@@ -2792,7 +2792,7 @@ void process_cpuid()
        perf_limit_reasons_probe(family, model);
 
        if (debug)
-               dump_cstate_pstate_config_info();
+               dump_cstate_pstate_config_info(family, model);
 
        if (has_skl_msrs(family, model))
                calculate_tsc_tweak();