#define EXT_BITS 6
#define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
+#define fp_ext_toint(X) ((X) >> EXT_FRAC_BITS)
+#define int_ext_tofp(X) ((int64_t)(X) << EXT_FRAC_BITS)
static inline int32_t mul_fp(int32_t x, int32_t y)
{
* when per cpu controls are enforced
* @acpi_perf_data: Stores ACPI perf information read from _PSS
* @valid_pss_table: Set to true for valid ACPI _PSS entries found
+ * @epp_powersave: Last saved HWP energy performance preference
+ * (EPP) or energy performance bias (EPB),
+ * when policy switched to performance
+ * @epp_policy: Last saved policy used to set EPP/EPB
+ * @epp_default: Power on default HWP energy performance
+ * preference/bias
+ * @epp_saved: Saved EPP/EPB during system suspend or CPU offline
+ * operation
*
* This structure stores per CPU instance data for all CPUs.
*/
bool valid_pss_table;
#endif
unsigned int iowait_boost;
+ s16 epp_powersave;
+ s16 epp_policy;
+ s16 epp_default;
+ s16 epp_saved;
};
static struct cpudata **all_cpu_data;
.no_turbo = 0,
.turbo_disabled = 0,
.max_perf_pct = 100,
- .max_perf = int_tofp(1),
+ .max_perf = int_ext_tofp(1),
.min_perf_pct = 100,
- .min_perf = int_tofp(1),
+ .min_perf = int_ext_tofp(1),
.max_policy_pct = 100,
.max_sysfs_pct = 100,
.min_policy_pct = 0,
.no_turbo = 0,
.turbo_disabled = 0,
.max_perf_pct = 100,
- .max_perf = int_tofp(1),
+ .max_perf = int_ext_tofp(1),
.min_perf_pct = 0,
.min_perf = 0,
.max_policy_pct = 100,
}
#else
-static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
{
}
-static inline int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+static inline void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
{
}
#endif
cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
}
-static void intel_pstate_hwp_set(const struct cpumask *cpumask)
+static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
+{
+ u64 epb;
+ int ret;
+
+ if (!static_cpu_has(X86_FEATURE_EPB))
+ return -ENXIO;
+
+ ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ if (ret)
+ return (s16)ret;
+
+ return (s16)(epb & 0x0f);
+}
+
+static s16 intel_pstate_get_epp(struct cpudata *cpu_data, u64 hwp_req_data)
+{
+ s16 epp;
+
+ if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ /*
+ * When hwp_req_data is 0, means that caller didn't read
+ * MSR_HWP_REQUEST, so need to read and get EPP.
+ */
+ if (!hwp_req_data) {
+ epp = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST,
+ &hwp_req_data);
+ if (epp)
+ return epp;
+ }
+ epp = (hwp_req_data >> 24) & 0xff;
+ } else {
+ /* When there is no EPP present, HWP uses EPB settings */
+ epp = intel_pstate_get_epb(cpu_data);
+ }
+
+ return epp;
+}
+
+static int intel_pstate_set_epb(int cpu, s16 pref)
+{
+ u64 epb;
+ int ret;
+
+ if (!static_cpu_has(X86_FEATURE_EPB))
+ return -ENXIO;
+
+ ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ if (ret)
+ return ret;
+
+ epb = (epb & ~0x0f) | pref;
+ wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, epb);
+
+ return 0;
+}
+
+/*
+ * EPP/EPB display strings corresponding to EPP index in the
+ * energy_perf_strings[]
+ * index String
+ *-------------------------------------
+ * 0 default
+ * 1 performance
+ * 2 balance_performance
+ * 3 balance_power
+ * 4 power
+ */
+static const char * const energy_perf_strings[] = {
+ "default",
+ "performance",
+ "balance_performance",
+ "balance_power",
+ "power",
+ NULL
+};
+
+static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data)
+{
+ s16 epp;
+ int index = -EINVAL;
+
+ epp = intel_pstate_get_epp(cpu_data, 0);
+ if (epp < 0)
+ return epp;
+
+ if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ /*
+ * Range:
+ * 0x00-0x3F : Performance
+ * 0x40-0x7F : Balance performance
+ * 0x80-0xBF : Balance power
+ * 0xC0-0xFF : Power
+ * The EPP is a 8 bit value, but our ranges restrict the
+ * value which can be set. Here only using top two bits
+ * effectively.
+ */
+ index = (epp >> 6) + 1;
+ } else if (static_cpu_has(X86_FEATURE_EPB)) {
+ /*
+ * Range:
+ * 0x00-0x03 : Performance
+ * 0x04-0x07 : Balance performance
+ * 0x08-0x0B : Balance power
+ * 0x0C-0x0F : Power
+ * The EPB is a 4 bit value, but our ranges restrict the
+ * value which can be set. Here only using top two bits
+ * effectively.
+ */
+ index = (epp >> 2) + 1;
+ }
+
+ return index;
+}
+
+static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
+ int pref_index)
+{
+ int epp = -EINVAL;
+ int ret;
+
+ if (!pref_index)
+ epp = cpu_data->epp_default;
+
+ mutex_lock(&intel_pstate_limits_lock);
+
+ if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ u64 value;
+
+ ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, &value);
+ if (ret)
+ goto return_pref;
+
+ value &= ~GENMASK_ULL(31, 24);
+
+ /*
+ * If epp is not default, convert from index into
+ * energy_perf_strings to epp value, by shifting 6
+ * bits left to use only top two bits in epp.
+ * The resultant epp need to shifted by 24 bits to
+ * epp position in MSR_HWP_REQUEST.
+ */
+ if (epp == -EINVAL)
+ epp = (pref_index - 1) << 6;
+
+ value |= (u64)epp << 24;
+ ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
+ } else {
+ if (epp == -EINVAL)
+ epp = (pref_index - 1) << 2;
+ ret = intel_pstate_set_epb(cpu_data->cpu, epp);
+ }
+return_pref:
+ mutex_unlock(&intel_pstate_limits_lock);
+
+ return ret;
+}
+
+static ssize_t show_energy_performance_available_preferences(
+ struct cpufreq_policy *policy, char *buf)
+{
+ int i = 0;
+ int ret = 0;
+
+ while (energy_perf_strings[i] != NULL)
+ ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
+
+ ret += sprintf(&buf[ret], "\n");
+
+ return ret;
+}
+
+cpufreq_freq_attr_ro(energy_performance_available_preferences);
+
+static ssize_t store_energy_performance_preference(
+ struct cpufreq_policy *policy, const char *buf, size_t count)
+{
+ struct cpudata *cpu_data = all_cpu_data[policy->cpu];
+ char str_preference[21];
+ int ret, i = 0;
+
+ ret = sscanf(buf, "%20s", str_preference);
+ if (ret != 1)
+ return -EINVAL;
+
+ while (energy_perf_strings[i] != NULL) {
+ if (!strcmp(str_preference, energy_perf_strings[i])) {
+ intel_pstate_set_energy_pref_index(cpu_data, i);
+ return count;
+ }
+ ++i;
+ }
+
+ return -EINVAL;
+}
+
+static ssize_t show_energy_performance_preference(
+ struct cpufreq_policy *policy, char *buf)
+{
+ struct cpudata *cpu_data = all_cpu_data[policy->cpu];
+ int preference;
+
+ preference = intel_pstate_get_energy_pref_index(cpu_data);
+ if (preference < 0)
+ return preference;
+
+ return sprintf(buf, "%s\n", energy_perf_strings[preference]);
+}
+
+cpufreq_freq_attr_rw(energy_performance_preference);
+
+static struct freq_attr *hwp_cpufreq_attrs[] = {
+ &energy_performance_preference,
+ &energy_performance_available_preferences,
+ NULL,
+};
+
+static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
{
int min, hw_min, max, hw_max, cpu, range, adj_range;
struct perf_limits *perf_limits = limits;
u64 value, cap;
- for_each_cpu(cpu, cpumask) {
+ for_each_cpu(cpu, policy->cpus) {
int max_perf_pct, min_perf_pct;
+ struct cpudata *cpu_data = all_cpu_data[cpu];
+ s16 epp;
if (per_cpu_limits)
perf_limits = all_cpu_data[cpu]->perf_limits;
value &= ~HWP_MAX_PERF(~0L);
value |= HWP_MAX_PERF(max);
+
+ if (cpu_data->epp_policy == cpu_data->policy)
+ goto skip_epp;
+
+ cpu_data->epp_policy = cpu_data->policy;
+
+ if (cpu_data->epp_saved >= 0) {
+ epp = cpu_data->epp_saved;
+ cpu_data->epp_saved = -EINVAL;
+ goto update_epp;
+ }
+
+ if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ epp = intel_pstate_get_epp(cpu_data, value);
+ cpu_data->epp_powersave = epp;
+ /* If EPP read was failed, then don't try to write */
+ if (epp < 0)
+ goto skip_epp;
+
+
+ epp = 0;
+ } else {
+ /* skip setting EPP, when saved value is invalid */
+ if (cpu_data->epp_powersave < 0)
+ goto skip_epp;
+
+ /*
+ * No need to restore EPP when it is not zero. This
+ * means:
+ * - Policy is not changed
+ * - user has manually changed
+ * - Error reading EPB
+ */
+ epp = intel_pstate_get_epp(cpu_data, value);
+ if (epp)
+ goto skip_epp;
+
+ epp = cpu_data->epp_powersave;
+ }
+update_epp:
+ if (static_cpu_has(X86_FEATURE_HWP_EPP)) {
+ value &= ~GENMASK_ULL(31, 24);
+ value |= (u64)epp << 24;
+ } else {
+ intel_pstate_set_epb(cpu, epp);
+ }
+skip_epp:
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
}
static int intel_pstate_hwp_set_policy(struct cpufreq_policy *policy)
{
if (hwp_active)
- intel_pstate_hwp_set(policy->cpus);
+ intel_pstate_hwp_set(policy);
return 0;
}
-static void intel_pstate_hwp_set_online_cpus(void)
+static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
{
- get_online_cpus();
- intel_pstate_hwp_set(cpu_online_mask);
- put_online_cpus();
+ struct cpudata *cpu_data = all_cpu_data[policy->cpu];
+
+ if (!hwp_active)
+ return 0;
+
+ cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0);
+
+ return 0;
+}
+
+static int intel_pstate_resume(struct cpufreq_policy *policy)
+{
+ int ret;
+
+ if (!hwp_active)
+ return 0;
+
+ mutex_lock(&intel_pstate_limits_lock);
+
+ all_cpu_data[policy->cpu]->epp_policy = 0;
+
+ ret = intel_pstate_hwp_set_policy(policy);
+
+ mutex_unlock(&intel_pstate_limits_lock);
+
+ return ret;
+}
+
+static void intel_pstate_update_policies(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ cpufreq_update_policy(cpu);
}
/************************** debugfs begin ************************/
struct dentry *debugfs_parent;
int i = 0;
- if (hwp_active ||
- pstate_funcs.get_target_pstate == get_target_pstate_use_cpu_load)
- return;
-
debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
if (IS_ERR_OR_NULL(debugfs_parent))
return;
mutex_unlock(&intel_pstate_limits_lock);
- if (hwp_active)
- intel_pstate_hwp_set_online_cpus();
+ intel_pstate_update_policies();
return count;
}
limits->max_perf_pct);
limits->max_perf_pct = max(limits->min_perf_pct,
limits->max_perf_pct);
- limits->max_perf = div_fp(limits->max_perf_pct, 100);
+ limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
mutex_unlock(&intel_pstate_limits_lock);
- if (hwp_active)
- intel_pstate_hwp_set_online_cpus();
+ intel_pstate_update_policies();
+
return count;
}
limits->min_perf_pct);
limits->min_perf_pct = min(limits->max_perf_pct,
limits->min_perf_pct);
- limits->min_perf = div_fp(limits->min_perf_pct, 100);
+ limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
mutex_unlock(&intel_pstate_limits_lock);
- if (hwp_active)
- intel_pstate_hwp_set_online_cpus();
+ intel_pstate_update_policies();
+
return count;
}
wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
+ cpudata->epp_policy = 0;
+ if (cpudata->epp_default == -EINVAL)
+ cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
}
static int atom_get_min_pstate(void)
* policy, or by cpu specific default values determined through
* experimentation.
*/
- max_perf_adj = fp_toint(max_perf * perf_limits->max_perf);
+ max_perf_adj = fp_ext_toint(max_perf * perf_limits->max_perf);
*max = clamp_t(int, max_perf_adj,
cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
- min_perf = fp_toint(max_perf * perf_limits->min_perf);
+ min_perf = fp_ext_toint(max_perf * perf_limits->min_perf);
*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
}
ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_params),
ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params),
ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_params),
+ ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_params),
ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_params),
{}
};
if (per_cpu_limits)
cpu->perf_limits = (struct perf_limits *)(cpu + 1);
+ cpu->epp_default = -EINVAL;
+ cpu->epp_powersave = -EINVAL;
+ cpu->epp_saved = -EINVAL;
}
cpu = all_cpu_data[cpunum];
static void intel_pstate_set_performance_limits(struct perf_limits *limits)
{
- mutex_lock(&intel_pstate_limits_lock);
limits->no_turbo = 0;
limits->turbo_disabled = 0;
limits->max_perf_pct = 100;
- limits->max_perf = int_tofp(1);
+ limits->max_perf = int_ext_tofp(1);
limits->min_perf_pct = 100;
- limits->min_perf = int_tofp(1);
+ limits->min_perf = int_ext_tofp(1);
limits->max_policy_pct = 100;
limits->max_sysfs_pct = 100;
limits->min_policy_pct = 0;
limits->min_sysfs_pct = 0;
- mutex_unlock(&intel_pstate_limits_lock);
}
static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
struct perf_limits *limits)
{
- mutex_lock(&intel_pstate_limits_lock);
-
limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
policy->cpuinfo.max_freq);
limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
if (policy->max == policy->min) {
limits->min_policy_pct = limits->max_policy_pct;
} else {
- limits->min_policy_pct = (policy->min * 100) /
- policy->cpuinfo.max_freq;
+ limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100,
+ policy->cpuinfo.max_freq);
limits->min_policy_pct = clamp_t(int, limits->min_policy_pct,
0, 100);
}
/* Make sure min_perf_pct <= max_perf_pct */
limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
- limits->min_perf = div_fp(limits->min_perf_pct, 100);
- limits->max_perf = div_fp(limits->max_perf_pct, 100);
- limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
-
- mutex_unlock(&intel_pstate_limits_lock);
+ limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
+ limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
+ limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
+ limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
limits->max_perf_pct, limits->min_perf_pct);
if (per_cpu_limits)
perf_limits = cpu->perf_limits;
+ mutex_lock(&intel_pstate_limits_lock);
+
if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
if (!perf_limits) {
limits = &performance_limits;
intel_pstate_hwp_set_policy(policy);
+ mutex_unlock(&intel_pstate_limits_lock);
+
return 0;
}
pr_debug("CPU %d exiting\n", policy->cpu);
intel_pstate_clear_update_util_hook(policy->cpu);
- if (!hwp_active)
+ if (hwp_active)
+ intel_pstate_hwp_save_state(policy);
+ else
intel_cpufreq_stop_cpu(policy);
}
.flags = CPUFREQ_CONST_LOOPS,
.verify = intel_pstate_verify_policy,
.setpolicy = intel_pstate_set_policy,
- .resume = intel_pstate_hwp_set_policy,
+ .suspend = intel_pstate_hwp_save_state,
+ .resume = intel_pstate_resume,
.get = intel_pstate_get,
.init = intel_pstate_cpu_init,
.exit = intel_pstate_cpu_exit,
if (per_cpu_limits)
perf_limits = cpu->perf_limits;
+ mutex_lock(&intel_pstate_limits_lock);
+
intel_pstate_update_perf_limits(policy, perf_limits);
+ mutex_unlock(&intel_pstate_limits_lock);
+
return 0;
}
if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
copy_cpu_funcs(&core_params.funcs);
hwp_active++;
+ intel_pstate.attr = hwp_cpufreq_attrs;
goto hwp_cpu_matched;
}
if (rc)
goto out;
- intel_pstate_debug_expose_params();
+ if (intel_pstate_driver == &intel_pstate && !hwp_active &&
+ pstate_funcs.get_target_pstate != get_target_pstate_use_cpu_load)
+ intel_pstate_debug_expose_params();
+
intel_pstate_sysfs_expose_params();
if (hwp_active)