* when per cpu controls are enforced
* @acpi_perf_data: Stores ACPI perf information read from _PSS
* @valid_pss_table: Set to true for valid ACPI _PSS entries found
+ * @epp_powersave: Last saved HWP energy performance preference
+ * (EPP) or energy performance bias (EPB),
+ * when policy switched to performance
+ * @epp_policy: Last saved policy used to set EPP/EPB
+ * @epp_default: Power on default HWP energy performance
+ * preference/bias
+ * @epp_saved: Saved EPP/EPB during system suspend or CPU offline
+ * operation
*
* This structure stores per CPU instance data for all CPUs.
*/
bool valid_pss_table;
#endif
unsigned int iowait_boost;
+ s16 epp_powersave;
+ s16 epp_policy;
+ s16 epp_default;
+ s16 epp_saved;
};
static struct cpudata **all_cpu_data;
}
#else
-static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
{
}
-static inline int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
{
}
#endif
cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
}
+static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
+{
+ u64 epb;
+ int ret;
+
+ if (!static_cpu_has(X86_FEATURE_EPB))
+ return -ENXIO;
+
+ ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ if (ret)
+ return (s16)ret;
+
+ return (s16)(epb & 0x0f);
+}
+
+static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
+{
+ s16 epp;
+
+ if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ /*
+ * When hwp_req_data is 0, means that caller didn't read
+ * MSR_HWP_REQUEST, so need to read and get EPP.
+ */
+ if (!hwp_req_data) {
+ epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
+ &hwp_req_data);
+ if (epp)
+ return epp;
+ }
+ epp = (hwp_req_data >> 24) & 0xff;
+ } else {
+ /* When there is no EPP present, HWP uses EPB settings */
+ epp = intel_pstate_get_epb(cpu_data);
+ }
+
+ return epp;
+}
+
+static int intel_pstate_set_epb(int cpu, s16 pref)
+{
+ u64 epb;
+ int ret;
+
+ if (!static_cpu_has(X86_FEATURE_EPB))
+ return -ENXIO;
+
+ ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ if (ret)
+ return ret;
+
+ epb = (epb & ~0x0f) | pref;
+ wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
+
+ return 0;
+}
+
+/*
+ * EPP/EPB display strings corresponding to EPP index in the
+ * energy_perf_strings[]
+ * index String
+ *-------------------------------------
+ * 0 default
+ * 1 performance
+ * 2 balance_performance
+ * 3 balance_power
+ * 4 power
+ */
+static const char * const energy_perf_strings[] = {
+ "default",
+ "performance",
+ "balance_performance",
+ "balance_power",
+ "power",
+ NULL
+};
+
+static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
+{
+ s16 epp;
+ int index = -EINVAL;
+
+ epp = intel_pstate_get_epp(cpu_data, 0);
+ if (epp < 0)
+ return epp;
+
+ if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ /*
+ * Range:
+ * 0x00-0x3F : Performance
+ * 0x40-0x7F : Balance performance
+ * 0x80-0xBF : Balance power
+ * 0xC0-0xFF : Power
+ * The EPP is a 8 bit value, but our ranges restrict the
+ * value which can be set. Here only using top two bits
+ * effectively.
+ */
+ index = (epp >> 6) + 1;
+ } else if (static_cpu_has(X86_FEATURE_EPB)) {
+ /*
+ * Range:
+ * 0x00-0x03 : Performance
+ * 0x04-0x07 : Balance performance
+ * 0x08-0x0B : Balance power
+ * 0x0C-0x0F : Power
+ * The EPB is a 4 bit value, but our ranges restrict the
+ * value which can be set. Here only using top two bits
+ * effectively.
+ */
+ index = (epp >> 2) + 1;
+ }
+
+ return index;
+}
+
+static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
+ int pref_index)
+{
+ int epp = -EINVAL;
+ int ret;
+
+ if (!pref_index)
+ epp = cpu_data->epp_default;
+
+ mutex_lock(&intel_pstate_limits_lock);
+
+ if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ u64 value;
+
+ ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
+ if (ret)
+ goto return_pref;
+
+ value &= ~GENMASK_ULL(31, 24);
+
+ /*
+ * If epp is not default, convert from index into
+ * energy_perf_strings to epp value, by shifting 6
+ * bits left to use only top two bits in epp.
+ * The resultant epp need to shifted by 24 bits to
+ * epp position in MSR_HWP_REQUEST.
+ */
+ if (epp == -EINVAL)
+ epp = (pref_index - 1) << 6;
+
+ value |= (u64)epp << 24;
+ ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
+ } else {
+ if (epp == -EINVAL)
+ epp = (pref_index - 1) << 2;
+ ret = intel_pstate_set_epb(cpu_data->cpu, epp);
+ }
+return_pref:
+ mutex_unlock(&intel_pstate_limits_lock);
+
+ return ret;
+}
+
+static ssize_t show_energy_performance_available_preferences(
+ struct cpufreq_policy *policy, char *buf)
+{
+ int i = 0;
+ int ret = 0;
+
+ while (energy_perf_strings[i] != NULL)
+ ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
+
+ ret += sprintf(&buf[ret], "\n");
+
+ return ret;
+}
+
+cpufreq_freq_attr_ro(energy_performance_available_preferences);
+
+static ssize_t store_energy_performance_preference(
+ struct cpufreq_policy *policy, const char *buf, size_t count)
+{
+ struct cpudata *cpu_data = all_cpu_data[policy->cpu];
+ char str_preference[21];
+ int ret, i = 0;
+
+ ret = sscanf(buf, "%20s", str_preference);
+ if (ret != 1)
+ return -EINVAL;
+
+ while (energy_perf_strings[i] != NULL) {
+ if (!strcmp(str_preference, energy_perf_strings[i])) {
+ intel_pstate_set_energy_pref_index(cpu_data, i);
+ return count;
+ }
+ ++i;
+ }
+
+ return -EINVAL;
+}
+
+static ssize_t show_energy_performance_preference(
+ struct cpufreq_policy *policy, char *buf)
+{
+ struct cpudata *cpu_data = all_cpu_data[policy->cpu];
+ int preference;
+
+ preference = intel_pstate_get_energy_pref_index(cpu_data);
+ if (preference < 0)
+ return preference;
+
+ return sprintf(buf, "%s\n", energy_perf_strings[preference]);
+}
+
+cpufreq_freq_attr_rw(energy_performance_preference);
+
+static struct freq_attr *hwp_cpufreq_attrs[] = {
+ &energy_performance_preference,
+ &energy_performance_available_preferences,
+ NULL,
+};
+
static void intel_pstate_hwp_set(const struct cpumask *cpumask)
{
int min, hw_min, max, hw_max, cpu, range, adj_range;
for_each_cpu(cpu, cpumask) {
int max_perf_pct, min_perf_pct;
+ struct cpudata *cpu_data = all_cpu_data[cpu];
+ s16 epp;
if (per_cpu_limits)
perf_limits = all_cpu_data[cpu]->perf_limits;
value &= ~HWP_MAX_PERF(~0L);
value |= HWP_MAX_PERF(max);
+
+ if (cpu_data->epp_policy == cpu_data->policy)
+ goto skip_epp;
+
+ cpu_data->epp_policy = cpu_data->policy;
+
+ if (cpu_data->epp_saved >= 0) {
+ epp = cpu_data->epp_saved;
+ cpu_data->epp_saved = -EINVAL;
+ goto update_epp;
+ }
+
+ if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ epp = intel_pstate_get_epp(cpu_data, value);
+ cpu_data->epp_powersave = epp;
+ /* If EPP read was failed, then don't try to write */
+ if (epp < 0)
+ goto skip_epp;
+
+
+ epp = 0;
+ } else {
+ /* skip setting EPP, when saved value is invalid */
+ if (cpu_data->epp_powersave < 0)
+ goto skip_epp;
+
+ /*
+ * No need to restore EPP when it is not zero. This
+ * means:
+ * - Policy is not changed
+ * - user has manually changed
+ * - Error reading EPB
+ */
+ epp = intel_pstate_get_epp(cpu_data, value);
+ if (epp)
+ goto skip_epp;
+
+ epp = cpu_data->epp_powersave;
+ }
+update_epp:
+ if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ value &= ~GENMASK_ULL(31, 24);
+ value |= (u64)epp << 24;
+ } else {
+ intel_pstate_set_epb(cpu, epp);
+ }
+skip_epp:
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
}
return 0;
}
+static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
+{
+ struct cpudata *cpu_data = all_cpu_data[policy->cpu];
+
+ if (!hwp_active)
+ return 0;
+
+ cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0);
+
+ return 0;
+}
+
+static int intel_pstate_resume(struct cpufreq_policy *policy)
+{
+ if (!hwp_active)
+ return 0;
+
+ all_cpu_data[policy->cpu]->epp_policy = 0;
+
+ return intel_pstate_hwp_set_policy(policy);
+}
+
static void intel_pstate_hwp_set_online_cpus(void)
{
get_online_cpus();
limits->no_turbo = clamp_t(int, input, 0, 1);
- mutex_unlock(&intel_pstate_limits_lock);
-
if (hwp_active)
intel_pstate_hwp_set_online_cpus();
+ mutex_unlock(&intel_pstate_limits_lock);
+
return count;
}
limits->max_perf_pct);
limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
- mutex_unlock(&intel_pstate_limits_lock);
-
if (hwp_active)
intel_pstate_hwp_set_online_cpus();
+
+ mutex_unlock(&intel_pstate_limits_lock);
+
return count;
}
limits->min_perf_pct);
limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
- mutex_unlock(&intel_pstate_limits_lock);
-
if (hwp_active)
intel_pstate_hwp_set_online_cpus();
+
+ mutex_unlock(&intel_pstate_limits_lock);
+
return count;
}
wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
+ cpudata->epp_policy = 0;
+ if (cpudata->epp_default == -EINVAL)
+ cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
}
static int atom_get_min_pstate(void)
ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_params),
ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params),
ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_params),
+ ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_params),
ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_params),
{}
};
if (per_cpu_limits)
cpu->perf_limits = (struct perf_limits *)(cpu + 1);
+ cpu->epp_default = -EINVAL;
+ cpu->epp_powersave = -EINVAL;
+ cpu->epp_saved = -EINVAL;
}
cpu = all_cpu_data[cpunum];
static void intel_pstate_set_performance_limits(struct perf_limits *limits)
{
- mutex_lock(&intel_pstate_limits_lock);
limits->no_turbo = 0;
limits->turbo_disabled = 0;
limits->max_perf_pct = 100;
limits->max_sysfs_pct = 100;
limits->min_policy_pct = 0;
limits->min_sysfs_pct = 0;
- mutex_unlock(&intel_pstate_limits_lock);
}
static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
struct perf_limits *limits)
{
- mutex_lock(&intel_pstate_limits_lock);
-
limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
policy->cpuinfo.max_freq);
limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
- mutex_unlock(&intel_pstate_limits_lock);
-
pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
limits->max_perf_pct, limits->min_perf_pct);
}
if (per_cpu_limits)
perf_limits = cpu->perf_limits;
+ mutex_lock(&intel_pstate_limits_lock);
+
if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
if (!perf_limits) {
limits = &performance_limits;
intel_pstate_hwp_set_policy(policy);
+ mutex_unlock(&intel_pstate_limits_lock);
+
return 0;
}
pr_debug("CPU %d exiting\n", policy->cpu);
intel_pstate_clear_update_util_hook(policy->cpu);
- if (!hwp_active)
+ if (hwp_active)
+ intel_pstate_hwp_save_state(policy);
+ else
intel_cpufreq_stop_cpu(policy);
}
.flags = CPUFREQ_CONST_LOOPS,
.verify = intel_pstate_verify_policy,
.setpolicy = intel_pstate_set_policy,
- .resume = intel_pstate_hwp_set_policy,
+ .suspend = intel_pstate_hwp_save_state,
+ .resume = intel_pstate_resume,
.get = intel_pstate_get,
.init = intel_pstate_cpu_init,
.exit = intel_pstate_cpu_exit,
if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
copy_cpu_funcs(&core_params.funcs);
hwp_active++;
+ intel_pstate.attr = hwp_cpufreq_attrs;
goto hwp_cpu_matched;
}