]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - drivers/cpufreq/intel_pstate.c
cpufreq: dt: Add support for APM X-Gene 2
[mirror_ubuntu-zesty-kernel.git] / drivers / cpufreq / intel_pstate.c
index 0d82bf320838c5188fd67d99b97bb462f9f721a1..e261438cd6905b537598194622968e4f2136afb6 100644 (file)
@@ -54,6 +54,8 @@
 
 #define EXT_BITS 6
 #define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
+#define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
+#define int_ext_tofp(X) ((int64_t)(X) << EXT_FRAC_BITS)
 
 static inline int32_t mul_fp(int32_t x, int32_t y)
 {
@@ -247,6 +249,14 @@ struct perf_limits {
  *                     when per cpu controls are enforced
  * @acpi_perf_data:    Stores ACPI perf information read from _PSS
  * @valid_pss_table:   Set to true for valid ACPI _PSS entries found
+ * @epp_powersave:     Last saved HWP energy performance preference
+ *                     (EPP) or energy performance bias (EPB),
+ *                     when policy switched to performance
+ * @epp_policy:                Last saved policy used to set EPP/EPB
+ * @epp_default:       Power on default HWP energy performance
+ *                     preference/bias
+ * @epp_saved:         Saved EPP/EPB during system suspend or CPU offline
+ *                     operation
  *
  * This structure stores per CPU instance data for all CPUs.
  */
@@ -274,6 +284,10 @@ struct cpudata {
        bool valid_pss_table;
 #endif
        unsigned int iowait_boost;
+       s16 epp_powersave;
+       s16 epp_policy;
+       s16 epp_default;
+       s16 epp_saved;
 };
 
 static struct cpudata **all_cpu_data;
@@ -351,9 +365,9 @@ static struct perf_limits performance_limits = {
        .no_turbo = 0,
        .turbo_disabled = 0,
        .max_perf_pct = 100,
-       .max_perf = int_tofp(1),
+       .max_perf = int_ext_tofp(1),
        .min_perf_pct = 100,
-       .min_perf = int_tofp(1),
+       .min_perf = int_ext_tofp(1),
        .max_policy_pct = 100,
        .max_sysfs_pct = 100,
        .min_policy_pct = 0,
@@ -364,7 +378,7 @@ static struct perf_limits powersave_limits = {
        .no_turbo = 0,
        .turbo_disabled = 0,
        .max_perf_pct = 100,
-       .max_perf = int_tofp(1),
+       .max_perf = int_ext_tofp(1),
        .min_perf_pct = 0,
        .min_perf = 0,
        .max_policy_pct = 100,
@@ -472,11 +486,11 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 }
 
 #else
-static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 {
 }
 
-static inline int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 {
 }
 #endif
@@ -572,14 +586,233 @@ static inline void update_turbo_state(void)
                 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 }
 
-static void intel_pstate_hwp_set(const struct cpumask *cpumask)
+static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
+{
+       u64 epb;
+       int ret;
+
+       if (!static_cpu_has(X86_FEATURE_EPB))
+               return -ENXIO;
+
+       ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+       if (ret)
+               return (s16)ret;
+
+       return (s16)(epb & 0x0f);
+}
+
+static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
+{
+       s16 epp;
+
+       if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+               /*
+                * When hwp_req_data is 0, means that caller didn't read
+                * MSR_HWP_REQUEST, so need to read and get EPP.
+                */
+               if (!hwp_req_data) {
+                       epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
+                                           &hwp_req_data);
+                       if (epp)
+                               return epp;
+               }
+               epp = (hwp_req_data >> 24) & 0xff;
+       } else {
+               /* When there is no EPP present, HWP uses EPB settings */
+               epp = intel_pstate_get_epb(cpu_data);
+       }
+
+       return epp;
+}
+
+static int intel_pstate_set_epb(int cpu, s16 pref)
+{
+       u64 epb;
+       int ret;
+
+       if (!static_cpu_has(X86_FEATURE_EPB))
+               return -ENXIO;
+
+       ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+       if (ret)
+               return ret;
+
+       epb = (epb & ~0x0f) | pref;
+       wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
+
+       return 0;
+}
+
+/*
+ * EPP/EPB display strings corresponding to EPP index in the
+ * energy_perf_strings[]
+ *     index           String
+ *-------------------------------------
+ *     0               default
+ *     1               performance
+ *     2               balance_performance
+ *     3               balance_power
+ *     4               power
+ */
+static const char * const energy_perf_strings[] = {
+       "default",
+       "performance",
+       "balance_performance",
+       "balance_power",
+       "power",
+       NULL
+};
+
+static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
+{
+       s16 epp;
+       int index = -EINVAL;
+
+       epp = intel_pstate_get_epp(cpu_data, 0);
+       if (epp < 0)
+               return epp;
+
+       if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+               /*
+                * Range:
+                *      0x00-0x3F       :       Performance
+                *      0x40-0x7F       :       Balance performance
+                *      0x80-0xBF       :       Balance power
+                *      0xC0-0xFF       :       Power
+                * The EPP is a 8 bit value, but our ranges restrict the
+                * value which can be set. Here only using top two bits
+                * effectively.
+                */
+               index = (epp >> 6) + 1;
+       } else if (static_cpu_has(X86_FEATURE_EPB)) {
+               /*
+                * Range:
+                *      0x00-0x03       :       Performance
+                *      0x04-0x07       :       Balance performance
+                *      0x08-0x0B       :       Balance power
+                *      0x0C-0x0F       :       Power
+                * The EPB is a 4 bit value, but our ranges restrict the
+                * value which can be set. Here only using top two bits
+                * effectively.
+                */
+               index = (epp >> 2) + 1;
+       }
+
+       return index;
+}
+
+static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
+                                             int pref_index)
+{
+       int epp = -EINVAL;
+       int ret;
+
+       if (!pref_index)
+               epp = cpu_data->epp_default;
+
+       mutex_lock(&intel_pstate_limits_lock);
+
+       if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+               u64 value;
+
+               ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
+               if (ret)
+                       goto return_pref;
+
+               value &= ~GENMASK_ULL(31, 24);
+
+               /*
+                * If epp is not default, convert from index into
+                * energy_perf_strings to epp value, by shifting 6
+                * bits left to use only top two bits in epp.
+                * The resultant epp need to shifted by 24 bits to
+                * epp position in MSR_HWP_REQUEST.
+                */
+               if (epp == -EINVAL)
+                       epp = (pref_index - 1) << 6;
+
+               value |= (u64)epp << 24;
+               ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
+       } else {
+               if (epp == -EINVAL)
+                       epp = (pref_index - 1) << 2;
+               ret = intel_pstate_set_epb(cpu_data->cpu, epp);
+       }
+return_pref:
+       mutex_unlock(&intel_pstate_limits_lock);
+
+       return ret;
+}
+
+static ssize_t show_energy_performance_available_preferences(
+                               struct cpufreq_policy *policy, char *buf)
+{
+       int i = 0;
+       int ret = 0;
+
+       while (energy_perf_strings[i] != NULL)
+               ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
+
+       ret += sprintf(&buf[ret], "\n");
+
+       return ret;
+}
+
+cpufreq_freq_attr_ro(energy_performance_available_preferences);
+
+static ssize_t store_energy_performance_preference(
+               struct cpufreq_policy *policy, const char *buf, size_t count)
+{
+       struct cpudata *cpu_data = all_cpu_data[policy->cpu];
+       char str_preference[21];
+       int ret, i = 0;
+
+       ret = sscanf(buf, "%20s", str_preference);
+       if (ret != 1)
+               return -EINVAL;
+
+       while (energy_perf_strings[i] != NULL) {
+               if (!strcmp(str_preference, energy_perf_strings[i])) {
+                       intel_pstate_set_energy_pref_index(cpu_data, i);
+                       return count;
+               }
+               ++i;
+       }
+
+       return -EINVAL;
+}
+
+static ssize_t show_energy_performance_preference(
+                               struct cpufreq_policy *policy, char *buf)
+{
+       struct cpudata *cpu_data = all_cpu_data[policy->cpu];
+       int preference;
+
+       preference = intel_pstate_get_energy_pref_index(cpu_data);
+       if (preference < 0)
+               return preference;
+
+       return  sprintf(buf, "%s\n", energy_perf_strings[preference]);
+}
+
+cpufreq_freq_attr_rw(energy_performance_preference);
+
+static struct freq_attr *hwp_cpufreq_attrs[] = {
+       &energy_performance_preference,
+       &energy_performance_available_preferences,
+       NULL,
+};
+
+static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 {
        int min, hw_min, max, hw_max, cpu, range, adj_range;
        struct perf_limits *perf_limits = limits;
        u64 value, cap;
 
-       for_each_cpu(cpu, cpumask) {
+       for_each_cpu(cpu, policy->cpus) {
                int max_perf_pct, min_perf_pct;
+               struct cpudata *cpu_data = all_cpu_data[cpu];
+               s16 epp;
 
                if (per_cpu_limits)
                        perf_limits = all_cpu_data[cpu]->perf_limits;
@@ -608,6 +841,53 @@ static void intel_pstate_hwp_set(const struct cpumask *cpumask)
 
                value &= ~HWP_MAX_PERF(~0L);
                value |= HWP_MAX_PERF(max);
+
+               if (cpu_data->epp_policy == cpu_data->policy)
+                       goto skip_epp;
+
+               cpu_data->epp_policy = cpu_data->policy;
+
+               if (cpu_data->epp_saved >= 0) {
+                       epp = cpu_data->epp_saved;
+                       cpu_data->epp_saved = -EINVAL;
+                       goto update_epp;
+               }
+
+               if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
+                       epp = intel_pstate_get_epp(cpu_data, value);
+                       cpu_data->epp_powersave = epp;
+                       /* If EPP read was failed, then don't try to write */
+                       if (epp < 0)
+                               goto skip_epp;
+
+
+                       epp = 0;
+               } else {
+                       /* skip setting EPP, when saved value is invalid */
+                       if (cpu_data->epp_powersave < 0)
+                               goto skip_epp;
+
+                       /*
+                        * No need to restore EPP when it is not zero. This
+                        * means:
+                        *  - Policy is not changed
+                        *  - user has manually changed
+                        *  - Error reading EPB
+                        */
+                       epp = intel_pstate_get_epp(cpu_data, value);
+                       if (epp)
+                               goto skip_epp;
+
+                       epp = cpu_data->epp_powersave;
+               }
+update_epp:
+               if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+                       value &= ~GENMASK_ULL(31, 24);
+                       value |= (u64)epp << 24;
+               } else {
+                       intel_pstate_set_epb(cpu, epp);
+               }
+skip_epp:
                wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
        }
 }
@@ -615,16 +895,47 @@ static void intel_pstate_hwp_set(const struct cpumask *cpumask)
 static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy)
 {
        if (hwp_active)
-               intel_pstate_hwp_set(policy->cpus);
+               intel_pstate_hwp_set(policy);
 
        return 0;
 }
 
-static void intel_pstate_hwp_set_online_cpus(void)
+static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
 {
-       get_online_cpus();
-       intel_pstate_hwp_set(cpu_online_mask);
-       put_online_cpus();
+       struct cpudata *cpu_data = all_cpu_data[policy->cpu];
+
+       if (!hwp_active)
+               return 0;
+
+       cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0);
+
+       return 0;
+}
+
+static int intel_pstate_resume(struct cpufreq_policy *policy)
+{
+       int ret;
+
+       if (!hwp_active)
+               return 0;
+
+       mutex_lock(&intel_pstate_limits_lock);
+
+       all_cpu_data[policy->cpu]->epp_policy = 0;
+
+       ret = intel_pstate_hwp_set_policy(policy);
+
+       mutex_unlock(&intel_pstate_limits_lock);
+
+       return ret;
+}
+
+static void intel_pstate_update_policies(void)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               cpufreq_update_policy(cpu);
 }
 
 /************************** debugfs begin ************************/
@@ -662,10 +973,6 @@ static void __init intel_pstate_debug_expose_params(void)
        struct dentry *debugfs_parent;
        int i = 0;
 
-       if (hwp_active ||
-           pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load)
-               return;
-
        debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
        if (IS_ERR_OR_NULL(debugfs_parent))
                return;
@@ -751,8 +1058,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 
        mutex_unlock(&intel_pstate_limits_lock);
 
-       if (hwp_active)
-               intel_pstate_hwp_set_online_cpus();
+       intel_pstate_update_policies();
 
        return count;
 }
@@ -776,12 +1082,12 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
                                   limits->max_perf_pct);
        limits->max_perf_pct = max(limits->min_perf_pct,
                                   limits->max_perf_pct);
-       limits->max_perf = div_fp(limits->max_perf_pct, 100);
+       limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
 
        mutex_unlock(&intel_pstate_limits_lock);
 
-       if (hwp_active)
-               intel_pstate_hwp_set_online_cpus();
+       intel_pstate_update_policies();
+
        return count;
 }
 
@@ -804,12 +1110,12 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
                                   limits->min_perf_pct);
        limits->min_perf_pct = min(limits->max_perf_pct,
                                   limits->min_perf_pct);
-       limits->min_perf = div_fp(limits->min_perf_pct, 100);
+       limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
 
        mutex_unlock(&intel_pstate_limits_lock);
 
-       if (hwp_active)
-               intel_pstate_hwp_set_online_cpus();
+       intel_pstate_update_policies();
+
        return count;
 }
 
@@ -870,6 +1176,9 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
                wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
 
        wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
+       cpudata->epp_policy = 0;
+       if (cpudata->epp_default == -EINVAL)
+               cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
 }
 
 static int atom_get_min_pstate(void)
@@ -1189,11 +1498,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
         * policy, or by cpu specific default values determined through
         * experimentation.
         */
-       max_perf_adj = fp_toint(max_perf * perf_limits->max_perf);
+       max_perf_adj = fp_ext_toint(max_perf * perf_limits->max_perf);
        *max = clamp_t(int, max_perf_adj,
                        cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
 
-       min_perf = fp_toint(max_perf * perf_limits->min_perf);
+       min_perf = fp_ext_toint(max_perf * perf_limits->min_perf);
        *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
 }
 
@@ -1469,6 +1778,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
        ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,        core_params),
        ICPU(INTEL_FAM6_BROADWELL_XEON_D,       core_params),
        ICPU(INTEL_FAM6_XEON_PHI_KNL,           knl_params),
+       ICPU(INTEL_FAM6_XEON_PHI_KNM,           knl_params),
        ICPU(INTEL_FAM6_ATOM_GOLDMONT,          bxt_params),
        {}
 };
@@ -1501,6 +1811,9 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
                if (per_cpu_limits)
                        cpu->perf_limits = (struct perf_limits *)(cpu + 1);
 
+               cpu->epp_default = -EINVAL;
+               cpu->epp_powersave = -EINVAL;
+               cpu->epp_saved = -EINVAL;
        }
 
        cpu = all_cpu_data[cpunum];
@@ -1557,34 +1870,30 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 
 static void intel_pstate_set_performance_limits(struct perf_limits *limits)
 {
-       mutex_lock(&intel_pstate_limits_lock);
        limits->no_turbo = 0;
        limits->turbo_disabled = 0;
        limits->max_perf_pct = 100;
-       limits->max_perf = int_tofp(1);
+       limits->max_perf = int_ext_tofp(1);
        limits->min_perf_pct = 100;
-       limits->min_perf = int_tofp(1);
+       limits->min_perf = int_ext_tofp(1);
        limits->max_policy_pct = 100;
        limits->max_sysfs_pct = 100;
        limits->min_policy_pct = 0;
        limits->min_sysfs_pct = 0;
-       mutex_unlock(&intel_pstate_limits_lock);
 }
 
 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
                                            struct perf_limits *limits)
 {
 
-       mutex_lock(&intel_pstate_limits_lock);
-
        limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
                                              policy->cpuinfo.max_freq);
        limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
        if (policy->max == policy->min) {
                limits->min_policy_pct = limits->max_policy_pct;
        } else {
-               limits->min_policy_pct = (policy->min * 100) /
-                                               policy->cpuinfo.max_freq;
+               limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100,
+                                                     policy->cpuinfo.max_freq);
                limits->min_policy_pct = clamp_t(int, limits->min_policy_pct,
                                                 0, 100);
        }
@@ -1602,11 +1911,10 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
        /* Make sure min_perf_pct <= max_perf_pct */
        limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
 
-       limits->min_perf = div_fp(limits->min_perf_pct, 100);
-       limits->max_perf = div_fp(limits->max_perf_pct, 100);
-       limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
-
-       mutex_unlock(&intel_pstate_limits_lock);
+       limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
+       limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
+       limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
+       limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
 
        pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
                 limits->max_perf_pct, limits->min_perf_pct);
@@ -1636,6 +1944,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
        if (per_cpu_limits)
                perf_limits = cpu->perf_limits;
 
+       mutex_lock(&intel_pstate_limits_lock);
+
        if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
                if (!perf_limits) {
                        limits = &performance_limits;
@@ -1670,6 +1980,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 
        intel_pstate_hwp_set_policy(policy);
 
+       mutex_unlock(&intel_pstate_limits_lock);
+
        return 0;
 }
 
@@ -1694,7 +2006,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
        pr_debug("CPU %d exiting\n", policy->cpu);
 
        intel_pstate_clear_update_util_hook(policy->cpu);
-       if (!hwp_active)
+       if (hwp_active)
+               intel_pstate_hwp_save_state(policy);
+       else
                intel_cpufreq_stop_cpu(policy);
 }
 
@@ -1764,7 +2078,8 @@ static struct cpufreq_driver intel_pstate = {
        .flags          = CPUFREQ_CONST_LOOPS,
        .verify         = intel_pstate_verify_policy,
        .setpolicy      = intel_pstate_set_policy,
-       .resume         = intel_pstate_hwp_set_policy,
+       .suspend        = intel_pstate_hwp_save_state,
+       .resume         = intel_pstate_resume,
        .get            = intel_pstate_get,
        .init           = intel_pstate_cpu_init,
        .exit           = intel_pstate_cpu_exit,
@@ -1786,8 +2101,12 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
        if (per_cpu_limits)
                perf_limits = cpu->perf_limits;
 
+       mutex_lock(&intel_pstate_limits_lock);
+
        intel_pstate_update_perf_limits(policy, perf_limits);
 
+       mutex_unlock(&intel_pstate_limits_lock);
+
        return 0;
 }
 
@@ -2081,6 +2400,7 @@ static int __init intel_pstate_init(void)
        if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
                copy_cpu_funcs(&core_params.funcs);
                hwp_active++;
+               intel_pstate.attr = hwp_cpufreq_attrs;
                goto hwp_cpu_matched;
        }
 
@@ -2119,7 +2439,10 @@ hwp_cpu_matched:
        if (rc)
                goto out;
 
-       intel_pstate_debug_expose_params();
+       if (intel_pstate_driver == &intel_pstate && !hwp_active &&
+           pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+               intel_pstate_debug_expose_params();
+
        intel_pstate_sysfs_expose_params();
 
        if (hwp_active)