]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - arch/x86/kernel/cpu/perf_event_intel_lbr.c
Merge tag 'pinctrl-v4.1-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[mirror_ubuntu-zesty-kernel.git] / arch / x86 / kernel / cpu / perf_event_intel_lbr.c
index 58f1a94beaf09f6fdab1dd1b12727ad045a58913..94e5b506caa6d13206956095e646bfacbf558fb1 100644 (file)
@@ -39,6 +39,7 @@ static enum {
 #define LBR_IND_JMP_BIT                6 /* do not capture indirect jumps */
 #define LBR_REL_JMP_BIT                7 /* do not capture relative jumps */
 #define LBR_FAR_BIT            8 /* do not capture far branches */
+#define LBR_CALL_STACK_BIT     9 /* enable call stack */
 
 #define LBR_KERNEL     (1 << LBR_KERNEL_BIT)
 #define LBR_USER       (1 << LBR_USER_BIT)
@@ -49,6 +50,7 @@ static enum {
 #define LBR_REL_JMP    (1 << LBR_REL_JMP_BIT)
 #define LBR_IND_JMP    (1 << LBR_IND_JMP_BIT)
 #define LBR_FAR                (1 << LBR_FAR_BIT)
+#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
 
 #define LBR_PLM (LBR_KERNEL | LBR_USER)
 
@@ -69,33 +71,31 @@ static enum {
 #define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
 #define LBR_FROM_FLAG_ABORT    (1ULL << 61)
 
-#define for_each_branch_sample_type(x) \
-       for ((x) = PERF_SAMPLE_BRANCH_USER; \
-            (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
-
 /*
  * x86control flow change classification
  * x86control flow changes include branches, interrupts, traps, faults
  */
 enum {
-       X86_BR_NONE     = 0,      /* unknown */
-
-       X86_BR_USER     = 1 << 0, /* branch target is user */
-       X86_BR_KERNEL   = 1 << 1, /* branch target is kernel */
-
-       X86_BR_CALL     = 1 << 2, /* call */
-       X86_BR_RET      = 1 << 3, /* return */
-       X86_BR_SYSCALL  = 1 << 4, /* syscall */
-       X86_BR_SYSRET   = 1 << 5, /* syscall return */
-       X86_BR_INT      = 1 << 6, /* sw interrupt */
-       X86_BR_IRET     = 1 << 7, /* return from interrupt */
-       X86_BR_JCC      = 1 << 8, /* conditional */
-       X86_BR_JMP      = 1 << 9, /* jump */
-       X86_BR_IRQ      = 1 << 10,/* hw interrupt or trap or fault */
-       X86_BR_IND_CALL = 1 << 11,/* indirect calls */
-       X86_BR_ABORT    = 1 << 12,/* transaction abort */
-       X86_BR_IN_TX    = 1 << 13,/* in transaction */
-       X86_BR_NO_TX    = 1 << 14,/* not in transaction */
+       X86_BR_NONE             = 0,      /* unknown */
+
+       X86_BR_USER             = 1 << 0, /* branch target is user */
+       X86_BR_KERNEL           = 1 << 1, /* branch target is kernel */
+
+       X86_BR_CALL             = 1 << 2, /* call */
+       X86_BR_RET              = 1 << 3, /* return */
+       X86_BR_SYSCALL          = 1 << 4, /* syscall */
+       X86_BR_SYSRET           = 1 << 5, /* syscall return */
+       X86_BR_INT              = 1 << 6, /* sw interrupt */
+       X86_BR_IRET             = 1 << 7, /* return from interrupt */
+       X86_BR_JCC              = 1 << 8, /* conditional */
+       X86_BR_JMP              = 1 << 9, /* jump */
+       X86_BR_IRQ              = 1 << 10,/* hw interrupt or trap or fault */
+       X86_BR_IND_CALL         = 1 << 11,/* indirect calls */
+       X86_BR_ABORT            = 1 << 12,/* transaction abort */
+       X86_BR_IN_TX            = 1 << 13,/* in transaction */
+       X86_BR_NO_TX            = 1 << 14,/* not in transaction */
+       X86_BR_ZERO_CALL        = 1 << 15,/* zero length call */
+       X86_BR_CALL_STACK       = 1 << 16,/* call stack */
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -112,13 +112,15 @@ enum {
         X86_BR_JMP      |\
         X86_BR_IRQ      |\
         X86_BR_ABORT    |\
-        X86_BR_IND_CALL)
+        X86_BR_IND_CALL |\
+        X86_BR_ZERO_CALL)
 
 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
 
 #define X86_BR_ANY_CALL                 \
        (X86_BR_CALL            |\
         X86_BR_IND_CALL        |\
+        X86_BR_ZERO_CALL       |\
         X86_BR_SYSCALL         |\
         X86_BR_IRQ             |\
         X86_BR_INT)
@@ -130,17 +132,32 @@ static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
  * otherwise it becomes near impossible to get a reliable stack.
  */
 
-static void __intel_pmu_lbr_enable(void)
+static void __intel_pmu_lbr_enable(bool pmi)
 {
-       u64 debugctl;
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       u64 debugctl, lbr_select = 0, orig_debugctl;
 
-       if (cpuc->lbr_sel)
-               wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
+       /*
+        * No need to reprogram LBR_SELECT in a PMI, as it
+        * did not change.
+        */
+       if (cpuc->lbr_sel && !pmi) {
+               lbr_select = cpuc->lbr_sel->config;
+               wrmsrl(MSR_LBR_SELECT, lbr_select);
+       }
 
        rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-       debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
-       wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+       orig_debugctl = debugctl;
+       debugctl |= DEBUGCTLMSR_LBR;
+       /*
+        * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
+        * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
+        * may cause superfluous increase/decrease of LBR_TOS.
+        */
+       if (!(lbr_select & LBR_CALL_STACK))
+               debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+       if (orig_debugctl != debugctl)
+               wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
 }
 
 static void __intel_pmu_lbr_disable(void)
@@ -181,9 +198,116 @@ void intel_pmu_lbr_reset(void)
                intel_pmu_lbr_reset_64();
 }
 
+/*
+ * TOS = most recently recorded branch
+ */
+static inline u64 intel_pmu_lbr_tos(void)
+{
+       u64 tos;
+
+       rdmsrl(x86_pmu.lbr_tos, tos);
+       return tos;
+}
+
+enum {
+       LBR_NONE,
+       LBR_VALID,
+};
+
+static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
+{
+       int i;
+       unsigned lbr_idx, mask;
+       u64 tos;
+
+       if (task_ctx->lbr_callstack_users == 0 ||
+           task_ctx->lbr_stack_state == LBR_NONE) {
+               intel_pmu_lbr_reset();
+               return;
+       }
+
+       mask = x86_pmu.lbr_nr - 1;
+       tos = intel_pmu_lbr_tos();
+       for (i = 0; i < x86_pmu.lbr_nr; i++) {
+               lbr_idx = (tos - i) & mask;
+               wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+               wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+       }
+       task_ctx->lbr_stack_state = LBR_NONE;
+}
+
+static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
+{
+       int i;
+       unsigned lbr_idx, mask;
+       u64 tos;
+
+       if (task_ctx->lbr_callstack_users == 0) {
+               task_ctx->lbr_stack_state = LBR_NONE;
+               return;
+       }
+
+       mask = x86_pmu.lbr_nr - 1;
+       tos = intel_pmu_lbr_tos();
+       for (i = 0; i < x86_pmu.lbr_nr; i++) {
+               lbr_idx = (tos - i) & mask;
+               rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+               rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+       }
+       task_ctx->lbr_stack_state = LBR_VALID;
+}
+
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct x86_perf_task_context *task_ctx;
+
+       if (!x86_pmu.lbr_nr)
+               return;
+
+       /*
+        * If LBR callstack feature is enabled and the stack was saved when
+        * the task was scheduled out, restore the stack. Otherwise flush
+        * the LBR stack.
+        */
+       task_ctx = ctx ? ctx->task_ctx_data : NULL;
+       if (task_ctx) {
+               if (sched_in) {
+                       __intel_pmu_lbr_restore(task_ctx);
+                       cpuc->lbr_context = ctx;
+               } else {
+                       __intel_pmu_lbr_save(task_ctx);
+               }
+               return;
+       }
+
+       /*
+        * When sampling the branck stack in system-wide, it may be
+        * necessary to flush the stack on context switch. This happens
+        * when the branch stack does not tag its entries with the pid
+        * of the current task. Otherwise it becomes impossible to
+        * associate a branch entry with a task. This ambiguity is more
+        * likely to appear when the branch stack supports priv level
+        * filtering and the user sets it to monitor only at the user
+        * level (which could be a useful measurement in system-wide
+        * mode). In that case, the risk is high of having a branch
+        * stack with branch from multiple tasks.
+        */
+       if (sched_in) {
+               intel_pmu_lbr_reset();
+               cpuc->lbr_context = ctx;
+       }
+}
+
+static inline bool branch_user_callstack(unsigned br_sel)
+{
+       return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
+}
+
 void intel_pmu_lbr_enable(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct x86_perf_task_context *task_ctx;
 
        if (!x86_pmu.lbr_nr)
                return;
@@ -198,18 +322,33 @@ void intel_pmu_lbr_enable(struct perf_event *event)
        }
        cpuc->br_sel = event->hw.branch_reg.reg;
 
+       if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+                                       event->ctx->task_ctx_data) {
+               task_ctx = event->ctx->task_ctx_data;
+               task_ctx->lbr_callstack_users++;
+       }
+
        cpuc->lbr_users++;
+       perf_sched_cb_inc(event->ctx->pmu);
 }
 
 void intel_pmu_lbr_disable(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct x86_perf_task_context *task_ctx;
 
        if (!x86_pmu.lbr_nr)
                return;
 
+       if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+                                       event->ctx->task_ctx_data) {
+               task_ctx = event->ctx->task_ctx_data;
+               task_ctx->lbr_callstack_users--;
+       }
+
        cpuc->lbr_users--;
        WARN_ON_ONCE(cpuc->lbr_users < 0);
+       perf_sched_cb_dec(event->ctx->pmu);
 
        if (cpuc->enabled && !cpuc->lbr_users) {
                __intel_pmu_lbr_disable();
@@ -218,12 +357,12 @@ void intel_pmu_lbr_disable(struct perf_event *event)
        }
 }
 
-void intel_pmu_lbr_enable_all(void)
+void intel_pmu_lbr_enable_all(bool pmi)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
        if (cpuc->lbr_users)
-               __intel_pmu_lbr_enable();
+               __intel_pmu_lbr_enable(pmi);
 }
 
 void intel_pmu_lbr_disable_all(void)
@@ -234,18 +373,6 @@ void intel_pmu_lbr_disable_all(void)
                __intel_pmu_lbr_disable();
 }
 
-/*
- * TOS = most recently recorded branch
- */
-static inline u64 intel_pmu_lbr_tos(void)
-{
-       u64 tos;
-
-       rdmsrl(x86_pmu.lbr_tos, tos);
-
-       return tos;
-}
-
 static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 {
        unsigned long mask = x86_pmu.lbr_nr - 1;
@@ -350,7 +477,7 @@ void intel_pmu_lbr_read(void)
  * - in case there is no HW filter
  * - in case the HW filter has errata or limitations
  */
-static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 {
        u64 br_type = event->attr.branch_sample_type;
        int mask = 0;
@@ -387,11 +514,21 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
        if (br_type & PERF_SAMPLE_BRANCH_COND)
                mask |= X86_BR_JCC;
 
+       if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
+               if (!x86_pmu_has_lbr_callstack())
+                       return -EOPNOTSUPP;
+               if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
+                       return -EINVAL;
+               mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
+                       X86_BR_CALL_STACK;
+       }
+
        /*
         * stash actual user request into reg, it may
         * be used by fixup code for some CPU
         */
        event->hw.branch_reg.reg = mask;
+       return 0;
 }
 
 /*
@@ -403,14 +540,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
 {
        struct hw_perf_event_extra *reg;
        u64 br_type = event->attr.branch_sample_type;
-       u64 mask = 0, m;
-       u64 v;
+       u64 mask = 0, v;
+       int i;
 
-       for_each_branch_sample_type(m) {
-               if (!(br_type & m))
+       for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
+               if (!(br_type & (1ULL << i)))
                        continue;
 
-               v = x86_pmu.lbr_sel_map[m];
+               v = x86_pmu.lbr_sel_map[i];
                if (v == LBR_NOT_SUPP)
                        return -EOPNOTSUPP;
 
@@ -420,8 +557,12 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
        reg = &event->hw.branch_reg;
        reg->idx = EXTRA_REG_LBR;
 
-       /* LBR_SELECT operates in suppress mode so invert mask */
-       reg->config = ~mask & x86_pmu.lbr_sel_mask;
+       /*
+        * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
+        * in suppress mode. So LBR_SELECT should be set to
+        * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
+        */
+       reg->config = mask ^ x86_pmu.lbr_sel_mask;
 
        return 0;
 }
@@ -439,7 +580,9 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
        /*
         * setup SW LBR filter
         */
-       intel_pmu_setup_sw_lbr_filter(event);
+       ret = intel_pmu_setup_sw_lbr_filter(event);
+       if (ret)
+               return ret;
 
        /*
         * setup HW LBR filter, if any
@@ -568,6 +711,12 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
                ret = X86_BR_INT;
                break;
        case 0xe8: /* call near rel */
+               insn_get_immediate(&insn);
+               if (insn.immediate1.value == 0) {
+                       /* zero length call */
+                       ret = X86_BR_ZERO_CALL;
+                       break;
+               }
        case 0x9a: /* call far absolute */
                ret = X86_BR_CALL;
                break;
@@ -678,35 +827,49 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 /*
  * Map interface branch filters onto LBR filters
  */
-static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
-       [PERF_SAMPLE_BRANCH_ANY]        = LBR_ANY,
-       [PERF_SAMPLE_BRANCH_USER]       = LBR_USER,
-       [PERF_SAMPLE_BRANCH_KERNEL]     = LBR_KERNEL,
-       [PERF_SAMPLE_BRANCH_HV]         = LBR_IGN,
-       [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
-                                       | LBR_IND_JMP | LBR_FAR,
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
+       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
+       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
+       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_REL_JMP
+                                               | LBR_IND_JMP | LBR_FAR,
        /*
         * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
         */
-       [PERF_SAMPLE_BRANCH_ANY_CALL] =
+       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
         LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
        /*
         * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
         */
-       [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
-       [PERF_SAMPLE_BRANCH_COND]     = LBR_JCC,
+       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
+       [PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
 };
 
-static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
-       [PERF_SAMPLE_BRANCH_ANY]        = LBR_ANY,
-       [PERF_SAMPLE_BRANCH_USER]       = LBR_USER,
-       [PERF_SAMPLE_BRANCH_KERNEL]     = LBR_KERNEL,
-       [PERF_SAMPLE_BRANCH_HV]         = LBR_IGN,
-       [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
-       [PERF_SAMPLE_BRANCH_ANY_CALL]   = LBR_REL_CALL | LBR_IND_CALL
-                                       | LBR_FAR,
-       [PERF_SAMPLE_BRANCH_IND_CALL]   = LBR_IND_CALL,
-       [PERF_SAMPLE_BRANCH_COND]       = LBR_JCC,
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
+       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
+       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
+       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
+                                               | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
+       [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
+};
+
+static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
+       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
+       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
+       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
+                                               | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
+       [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
+       [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = LBR_REL_CALL | LBR_IND_CALL
+                                               | LBR_RETURN | LBR_CALL_STACK,
 };
 
 /* core */
@@ -765,6 +928,20 @@ void __init intel_pmu_lbr_init_snb(void)
        pr_cont("16-deep LBR, ");
 }
 
+/* haswell */
+void intel_pmu_lbr_init_hsw(void)
+{
+       x86_pmu.lbr_nr   = 16;
+       x86_pmu.lbr_tos  = MSR_LBR_TOS;
+       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+
+       pr_cont("16-deep LBR, ");
+}
+
 /* atom */
 void __init intel_pmu_lbr_init_atom(void)
 {