1 // SPDX-License-Identifier: GPL-2.0
3 * CPUFreq governor based on scheduler-provided CPU utilization data.
5 * Copyright (C) 2016, Intel Corporation
6 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/sched/cpufreq.h>
14 #include <trace/events/power.h>
16 struct sugov_tunables
{
17 struct gov_attr_set attr_set
;
18 unsigned int rate_limit_us
;
22 struct cpufreq_policy
*policy
;
24 struct sugov_tunables
*tunables
;
25 struct list_head tunables_hook
;
27 raw_spinlock_t update_lock
; /* For shared policies */
28 u64 last_freq_update_time
;
29 s64 freq_update_delay_ns
;
30 unsigned int next_freq
;
31 unsigned int cached_raw_freq
;
33 /* The next fields are only needed if fast switch cannot be used: */
34 struct irq_work irq_work
;
35 struct kthread_work work
;
36 struct mutex work_lock
;
37 struct kthread_worker worker
;
38 struct task_struct
*thread
;
39 bool work_in_progress
;
41 bool need_freq_update
;
45 struct update_util_data update_util
;
46 struct sugov_policy
*sg_policy
;
49 bool iowait_boost_pending
;
50 unsigned int iowait_boost
;
51 unsigned int iowait_boost_max
;
57 /* The field below is for single-CPU policies only: */
58 #ifdef CONFIG_NO_HZ_COMMON
59 unsigned long saved_idle_calls
;
63 static DEFINE_PER_CPU(struct sugov_cpu
, sugov_cpu
);
65 /************************ Governor internals ***********************/
67 static bool sugov_should_update_freq(struct sugov_policy
*sg_policy
, u64 time
)
72 * Since cpufreq_update_util() is called with rq->lock held for
73 * the @target_cpu, our per-CPU data is fully serialized.
75 * However, drivers cannot in general deal with cross-CPU
76 * requests, so while get_next_freq() will work, our
77 * sugov_update_commit() call may not for the fast switching platforms.
79 * Hence stop here for remote requests if they aren't supported
80 * by the hardware, as calculating the frequency is pointless if
81 * we cannot in fact act on it.
83 * For the slow switching platforms, the kthread is always scheduled on
84 * the right set of CPUs and any CPU can find the next frequency and
85 * schedule the kthread.
87 if (sg_policy
->policy
->fast_switch_enabled
&&
88 !cpufreq_this_cpu_can_update(sg_policy
->policy
))
91 if (unlikely(sg_policy
->need_freq_update
))
94 delta_ns
= time
- sg_policy
->last_freq_update_time
;
96 return delta_ns
>= sg_policy
->freq_update_delay_ns
;
99 static bool sugov_update_next_freq(struct sugov_policy
*sg_policy
, u64 time
,
100 unsigned int next_freq
)
102 if (sg_policy
->next_freq
== next_freq
)
105 sg_policy
->next_freq
= next_freq
;
106 sg_policy
->last_freq_update_time
= time
;
111 static void sugov_fast_switch(struct sugov_policy
*sg_policy
, u64 time
,
112 unsigned int next_freq
)
114 struct cpufreq_policy
*policy
= sg_policy
->policy
;
116 if (!sugov_update_next_freq(sg_policy
, time
, next_freq
))
119 next_freq
= cpufreq_driver_fast_switch(policy
, next_freq
);
123 policy
->cur
= next_freq
;
124 trace_cpu_frequency(next_freq
, smp_processor_id());
127 static void sugov_deferred_update(struct sugov_policy
*sg_policy
, u64 time
,
128 unsigned int next_freq
)
130 if (!sugov_update_next_freq(sg_policy
, time
, next_freq
))
133 if (!sg_policy
->work_in_progress
) {
134 sg_policy
->work_in_progress
= true;
135 irq_work_queue(&sg_policy
->irq_work
);
140 * get_next_freq - Compute a new frequency for a given cpufreq policy.
141 * @sg_policy: schedutil policy object to compute the new frequency for.
142 * @util: Current CPU utilization.
143 * @max: CPU capacity.
145 * If the utilization is frequency-invariant, choose the new frequency to be
146 * proportional to it, that is
148 * next_freq = C * max_freq * util / max
150 * Otherwise, approximate the would-be frequency-invariant utilization by
151 * util_raw * (curr_freq / max_freq) which leads to
153 * next_freq = C * curr_freq * util_raw / max
155 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
157 * The lowest driver-supported frequency which is equal or greater than the raw
158 * next_freq (as calculated above) is returned, subject to policy min/max and
159 * cpufreq driver limitations.
161 static unsigned int get_next_freq(struct sugov_policy
*sg_policy
,
162 unsigned long util
, unsigned long max
)
164 struct cpufreq_policy
*policy
= sg_policy
->policy
;
165 unsigned int freq
= arch_scale_freq_invariant() ?
166 policy
->cpuinfo
.max_freq
: policy
->cur
;
168 freq
= map_util_freq(util
, freq
, max
);
170 if (freq
== sg_policy
->cached_raw_freq
&& !sg_policy
->need_freq_update
)
171 return sg_policy
->next_freq
;
173 sg_policy
->need_freq_update
= false;
174 sg_policy
->cached_raw_freq
= freq
;
175 return cpufreq_driver_resolve_freq(policy
, freq
);
179 * This function computes an effective utilization for the given CPU, to be
180 * used for frequency selection given the linear relation: f = u * f_max.
182 * The scheduler tracks the following metrics:
184 * cpu_util_{cfs,rt,dl,irq}()
187 * Where the cfs,rt and dl util numbers are tracked with the same metric and
188 * synchronized windows and are thus directly comparable.
190 * The cfs,rt,dl utilization are the running times measured with rq->clock_task
191 * which excludes things like IRQ and steal-time. These latter are then accrued
192 * in the irq utilization.
194 * The DL bandwidth number otoh is not a measured metric but a value computed
195 * based on the task model parameters and gives the minimal utilization
196 * required to meet deadlines.
198 unsigned long schedutil_freq_util(int cpu
, unsigned long util_cfs
,
199 unsigned long max
, enum schedutil_type type
)
201 unsigned long dl_util
, util
, irq
;
202 struct rq
*rq
= cpu_rq(cpu
);
204 if (type
== FREQUENCY_UTIL
&& rt_rq_is_runnable(&rq
->rt
))
208 * Early check to see if IRQ/steal time saturates the CPU, can be
209 * because of inaccuracies in how we track these -- see
210 * update_irq_load_avg().
212 irq
= cpu_util_irq(rq
);
213 if (unlikely(irq
>= max
))
217 * Because the time spend on RT/DL tasks is visible as 'lost' time to
218 * CFS tasks and we use the same metric to track the effective
219 * utilization (PELT windows are synchronized) we can directly add them
220 * to obtain the CPU's actual utilization.
223 util
+= cpu_util_rt(rq
);
225 dl_util
= cpu_util_dl(rq
);
228 * For frequency selection we do not make cpu_util_dl() a permanent part
229 * of this sum because we want to use cpu_bw_dl() later on, but we need
230 * to check if the CFS+RT+DL sum is saturated (ie. no idle time) such
231 * that we select f_max when there is no idle time.
233 * NOTE: numerical errors or stop class might cause us to not quite hit
234 * saturation when we should -- something for later.
236 if (util
+ dl_util
>= max
)
240 * OTOH, for energy computation we need the estimated running time, so
241 * include util_dl and ignore dl_bw.
243 if (type
== ENERGY_UTIL
)
247 * There is still idle time; further improve the number by using the
248 * irq metric. Because IRQ/steal time is hidden from the task clock we
249 * need to scale the task numbers:
252 * U' = irq + ------- * U
255 util
= scale_irq_capacity(util
, irq
, max
);
259 * Bandwidth required by DEADLINE must always be granted while, for
260 * FAIR and RT, we use blocked utilization of IDLE CPUs as a mechanism
261 * to gracefully reduce the frequency when no tasks show up for longer
264 * Ideally we would like to set bw_dl as min/guaranteed freq and util +
265 * bw_dl as requested freq. However, cpufreq is not yet ready for such
266 * an interface. So, we only do the latter for now.
268 if (type
== FREQUENCY_UTIL
)
269 util
+= cpu_bw_dl(rq
);
271 return min(max
, util
);
274 static unsigned long sugov_get_util(struct sugov_cpu
*sg_cpu
)
276 struct rq
*rq
= cpu_rq(sg_cpu
->cpu
);
277 unsigned long util
= cpu_util_cfs(rq
);
278 unsigned long max
= arch_scale_cpu_capacity(NULL
, sg_cpu
->cpu
);
281 sg_cpu
->bw_dl
= cpu_bw_dl(rq
);
283 return schedutil_freq_util(sg_cpu
->cpu
, util
, max
, FREQUENCY_UTIL
);
287 * sugov_iowait_reset() - Reset the IO boost status of a CPU.
288 * @sg_cpu: the sugov data for the CPU to boost
289 * @time: the update time from the caller
290 * @set_iowait_boost: true if an IO boost has been requested
292 * The IO wait boost of a task is disabled after a tick since the last update
293 * of a CPU. If a new IO wait boost is requested after more then a tick, then
294 * we enable the boost starting from the minimum frequency, which improves
295 * energy efficiency by ignoring sporadic wakeups from IO.
297 static bool sugov_iowait_reset(struct sugov_cpu
*sg_cpu
, u64 time
,
298 bool set_iowait_boost
)
300 s64 delta_ns
= time
- sg_cpu
->last_update
;
302 /* Reset boost only if a tick has elapsed since last request */
303 if (delta_ns
<= TICK_NSEC
)
306 sg_cpu
->iowait_boost
= set_iowait_boost
307 ? sg_cpu
->sg_policy
->policy
->min
: 0;
308 sg_cpu
->iowait_boost_pending
= set_iowait_boost
;
314 * sugov_iowait_boost() - Updates the IO boost status of a CPU.
315 * @sg_cpu: the sugov data for the CPU to boost
316 * @time: the update time from the caller
317 * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait
319 * Each time a task wakes up after an IO operation, the CPU utilization can be
320 * boosted to a certain utilization which doubles at each "frequent and
321 * successive" wakeup from IO, ranging from the utilization of the minimum
322 * OPP to the utilization of the maximum OPP.
323 * To keep doubling, an IO boost has to be requested at least once per tick,
324 * otherwise we restart from the utilization of the minimum OPP.
326 static void sugov_iowait_boost(struct sugov_cpu
*sg_cpu
, u64 time
,
329 bool set_iowait_boost
= flags
& SCHED_CPUFREQ_IOWAIT
;
331 /* Reset boost if the CPU appears to have been idle enough */
332 if (sg_cpu
->iowait_boost
&&
333 sugov_iowait_reset(sg_cpu
, time
, set_iowait_boost
))
336 /* Boost only tasks waking up after IO */
337 if (!set_iowait_boost
)
340 /* Ensure boost doubles only one time at each request */
341 if (sg_cpu
->iowait_boost_pending
)
343 sg_cpu
->iowait_boost_pending
= true;
345 /* Double the boost at each request */
346 if (sg_cpu
->iowait_boost
) {
347 sg_cpu
->iowait_boost
<<= 1;
348 if (sg_cpu
->iowait_boost
> sg_cpu
->iowait_boost_max
)
349 sg_cpu
->iowait_boost
= sg_cpu
->iowait_boost_max
;
353 /* First wakeup after IO: start with minimum boost */
354 sg_cpu
->iowait_boost
= sg_cpu
->sg_policy
->policy
->min
;
358 * sugov_iowait_apply() - Apply the IO boost to a CPU.
359 * @sg_cpu: the sugov data for the cpu to boost
360 * @time: the update time from the caller
361 * @util: the utilization to (eventually) boost
362 * @max: the maximum value the utilization can be boosted to
364 * A CPU running a task which woken up after an IO operation can have its
365 * utilization boosted to speed up the completion of those IO operations.
366 * The IO boost value is increased each time a task wakes up from IO, in
367 * sugov_iowait_apply(), and it's instead decreased by this function,
368 * each time an increase has not been requested (!iowait_boost_pending).
370 * A CPU which also appears to have been idle for at least one tick has also
371 * its IO boost utilization reset.
373 * This mechanism is designed to boost high frequently IO waiting tasks, while
374 * being more conservative on tasks which does sporadic IO operations.
376 static void sugov_iowait_apply(struct sugov_cpu
*sg_cpu
, u64 time
,
377 unsigned long *util
, unsigned long *max
)
379 unsigned int boost_util
, boost_max
;
381 /* No boost currently required */
382 if (!sg_cpu
->iowait_boost
)
385 /* Reset boost if the CPU appears to have been idle enough */
386 if (sugov_iowait_reset(sg_cpu
, time
, false))
390 * An IO waiting task has just woken up:
391 * allow to further double the boost value
393 if (sg_cpu
->iowait_boost_pending
) {
394 sg_cpu
->iowait_boost_pending
= false;
397 * Otherwise: reduce the boost value and disable it when we
400 sg_cpu
->iowait_boost
>>= 1;
401 if (sg_cpu
->iowait_boost
< sg_cpu
->sg_policy
->policy
->min
) {
402 sg_cpu
->iowait_boost
= 0;
408 * Apply the current boost value: a CPU is boosted only if its current
409 * utilization is smaller then the current IO boost level.
411 boost_util
= sg_cpu
->iowait_boost
;
412 boost_max
= sg_cpu
->iowait_boost_max
;
413 if (*util
* boost_max
< *max
* boost_util
) {
419 #ifdef CONFIG_NO_HZ_COMMON
420 static bool sugov_cpu_is_busy(struct sugov_cpu
*sg_cpu
)
422 unsigned long idle_calls
= tick_nohz_get_idle_calls_cpu(sg_cpu
->cpu
);
423 bool ret
= idle_calls
== sg_cpu
->saved_idle_calls
;
425 sg_cpu
->saved_idle_calls
= idle_calls
;
429 static inline bool sugov_cpu_is_busy(struct sugov_cpu
*sg_cpu
) { return false; }
430 #endif /* CONFIG_NO_HZ_COMMON */
433 * Make sugov_should_update_freq() ignore the rate limit when DL
434 * has increased the utilization.
436 static inline void ignore_dl_rate_limit(struct sugov_cpu
*sg_cpu
, struct sugov_policy
*sg_policy
)
438 if (cpu_bw_dl(cpu_rq(sg_cpu
->cpu
)) > sg_cpu
->bw_dl
)
439 sg_policy
->need_freq_update
= true;
442 static void sugov_update_single(struct update_util_data
*hook
, u64 time
,
445 struct sugov_cpu
*sg_cpu
= container_of(hook
, struct sugov_cpu
, update_util
);
446 struct sugov_policy
*sg_policy
= sg_cpu
->sg_policy
;
447 unsigned long util
, max
;
451 sugov_iowait_boost(sg_cpu
, time
, flags
);
452 sg_cpu
->last_update
= time
;
454 ignore_dl_rate_limit(sg_cpu
, sg_policy
);
456 if (!sugov_should_update_freq(sg_policy
, time
))
459 busy
= sugov_cpu_is_busy(sg_cpu
);
461 util
= sugov_get_util(sg_cpu
);
463 sugov_iowait_apply(sg_cpu
, time
, &util
, &max
);
464 next_f
= get_next_freq(sg_policy
, util
, max
);
466 * Do not reduce the frequency if the CPU has not been idle
467 * recently, as the reduction is likely to be premature then.
469 if (busy
&& next_f
< sg_policy
->next_freq
) {
470 next_f
= sg_policy
->next_freq
;
472 /* Reset cached freq as next_freq has changed */
473 sg_policy
->cached_raw_freq
= 0;
477 * This code runs under rq->lock for the target CPU, so it won't run
478 * concurrently on two different CPUs for the same target and it is not
479 * necessary to acquire the lock in the fast switch case.
481 if (sg_policy
->policy
->fast_switch_enabled
) {
482 sugov_fast_switch(sg_policy
, time
, next_f
);
484 raw_spin_lock(&sg_policy
->update_lock
);
485 sugov_deferred_update(sg_policy
, time
, next_f
);
486 raw_spin_unlock(&sg_policy
->update_lock
);
490 static unsigned int sugov_next_freq_shared(struct sugov_cpu
*sg_cpu
, u64 time
)
492 struct sugov_policy
*sg_policy
= sg_cpu
->sg_policy
;
493 struct cpufreq_policy
*policy
= sg_policy
->policy
;
494 unsigned long util
= 0, max
= 1;
497 for_each_cpu(j
, policy
->cpus
) {
498 struct sugov_cpu
*j_sg_cpu
= &per_cpu(sugov_cpu
, j
);
499 unsigned long j_util
, j_max
;
501 j_util
= sugov_get_util(j_sg_cpu
);
502 j_max
= j_sg_cpu
->max
;
503 sugov_iowait_apply(j_sg_cpu
, time
, &j_util
, &j_max
);
505 if (j_util
* max
> j_max
* util
) {
511 return get_next_freq(sg_policy
, util
, max
);
515 sugov_update_shared(struct update_util_data
*hook
, u64 time
, unsigned int flags
)
517 struct sugov_cpu
*sg_cpu
= container_of(hook
, struct sugov_cpu
, update_util
);
518 struct sugov_policy
*sg_policy
= sg_cpu
->sg_policy
;
521 raw_spin_lock(&sg_policy
->update_lock
);
523 sugov_iowait_boost(sg_cpu
, time
, flags
);
524 sg_cpu
->last_update
= time
;
526 ignore_dl_rate_limit(sg_cpu
, sg_policy
);
528 if (sugov_should_update_freq(sg_policy
, time
)) {
529 next_f
= sugov_next_freq_shared(sg_cpu
, time
);
531 if (sg_policy
->policy
->fast_switch_enabled
)
532 sugov_fast_switch(sg_policy
, time
, next_f
);
534 sugov_deferred_update(sg_policy
, time
, next_f
);
537 raw_spin_unlock(&sg_policy
->update_lock
);
540 static void sugov_work(struct kthread_work
*work
)
542 struct sugov_policy
*sg_policy
= container_of(work
, struct sugov_policy
, work
);
547 * Hold sg_policy->update_lock shortly to handle the case where:
548 * incase sg_policy->next_freq is read here, and then updated by
549 * sugov_deferred_update() just before work_in_progress is set to false
550 * here, we may miss queueing the new update.
552 * Note: If a work was queued after the update_lock is released,
553 * sugov_work() will just be called again by kthread_work code; and the
554 * request will be proceed before the sugov thread sleeps.
556 raw_spin_lock_irqsave(&sg_policy
->update_lock
, flags
);
557 freq
= sg_policy
->next_freq
;
558 sg_policy
->work_in_progress
= false;
559 raw_spin_unlock_irqrestore(&sg_policy
->update_lock
, flags
);
561 mutex_lock(&sg_policy
->work_lock
);
562 __cpufreq_driver_target(sg_policy
->policy
, freq
, CPUFREQ_RELATION_L
);
563 mutex_unlock(&sg_policy
->work_lock
);
566 static void sugov_irq_work(struct irq_work
*irq_work
)
568 struct sugov_policy
*sg_policy
;
570 sg_policy
= container_of(irq_work
, struct sugov_policy
, irq_work
);
572 kthread_queue_work(&sg_policy
->worker
, &sg_policy
->work
);
575 /************************** sysfs interface ************************/
577 static struct sugov_tunables
*global_tunables
;
578 static DEFINE_MUTEX(global_tunables_lock
);
580 static inline struct sugov_tunables
*to_sugov_tunables(struct gov_attr_set
*attr_set
)
582 return container_of(attr_set
, struct sugov_tunables
, attr_set
);
585 static ssize_t
rate_limit_us_show(struct gov_attr_set
*attr_set
, char *buf
)
587 struct sugov_tunables
*tunables
= to_sugov_tunables(attr_set
);
589 return sprintf(buf
, "%u\n", tunables
->rate_limit_us
);
593 rate_limit_us_store(struct gov_attr_set
*attr_set
, const char *buf
, size_t count
)
595 struct sugov_tunables
*tunables
= to_sugov_tunables(attr_set
);
596 struct sugov_policy
*sg_policy
;
597 unsigned int rate_limit_us
;
599 if (kstrtouint(buf
, 10, &rate_limit_us
))
602 tunables
->rate_limit_us
= rate_limit_us
;
604 list_for_each_entry(sg_policy
, &attr_set
->policy_list
, tunables_hook
)
605 sg_policy
->freq_update_delay_ns
= rate_limit_us
* NSEC_PER_USEC
;
610 static struct governor_attr rate_limit_us
= __ATTR_RW(rate_limit_us
);
612 static struct attribute
*sugov_attributes
[] = {
617 static struct kobj_type sugov_tunables_ktype
= {
618 .default_attrs
= sugov_attributes
,
619 .sysfs_ops
= &governor_sysfs_ops
,
622 /********************** cpufreq governor interface *********************/
624 struct cpufreq_governor schedutil_gov
;
626 static struct sugov_policy
*sugov_policy_alloc(struct cpufreq_policy
*policy
)
628 struct sugov_policy
*sg_policy
;
630 sg_policy
= kzalloc(sizeof(*sg_policy
), GFP_KERNEL
);
634 sg_policy
->policy
= policy
;
635 raw_spin_lock_init(&sg_policy
->update_lock
);
639 static void sugov_policy_free(struct sugov_policy
*sg_policy
)
644 static int sugov_kthread_create(struct sugov_policy
*sg_policy
)
646 struct task_struct
*thread
;
647 struct sched_attr attr
= {
648 .size
= sizeof(struct sched_attr
),
649 .sched_policy
= SCHED_DEADLINE
,
650 .sched_flags
= SCHED_FLAG_SUGOV
,
654 * Fake (unused) bandwidth; workaround to "fix"
655 * priority inheritance.
657 .sched_runtime
= 1000000,
658 .sched_deadline
= 10000000,
659 .sched_period
= 10000000,
661 struct cpufreq_policy
*policy
= sg_policy
->policy
;
664 /* kthread only required for slow path */
665 if (policy
->fast_switch_enabled
)
668 kthread_init_work(&sg_policy
->work
, sugov_work
);
669 kthread_init_worker(&sg_policy
->worker
);
670 thread
= kthread_create(kthread_worker_fn
, &sg_policy
->worker
,
672 cpumask_first(policy
->related_cpus
));
673 if (IS_ERR(thread
)) {
674 pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread
));
675 return PTR_ERR(thread
);
678 ret
= sched_setattr_nocheck(thread
, &attr
);
680 kthread_stop(thread
);
681 pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__
);
685 sg_policy
->thread
= thread
;
686 kthread_bind_mask(thread
, policy
->related_cpus
);
687 init_irq_work(&sg_policy
->irq_work
, sugov_irq_work
);
688 mutex_init(&sg_policy
->work_lock
);
690 wake_up_process(thread
);
695 static void sugov_kthread_stop(struct sugov_policy
*sg_policy
)
697 /* kthread only required for slow path */
698 if (sg_policy
->policy
->fast_switch_enabled
)
701 kthread_flush_worker(&sg_policy
->worker
);
702 kthread_stop(sg_policy
->thread
);
703 mutex_destroy(&sg_policy
->work_lock
);
706 static struct sugov_tunables
*sugov_tunables_alloc(struct sugov_policy
*sg_policy
)
708 struct sugov_tunables
*tunables
;
710 tunables
= kzalloc(sizeof(*tunables
), GFP_KERNEL
);
712 gov_attr_set_init(&tunables
->attr_set
, &sg_policy
->tunables_hook
);
713 if (!have_governor_per_policy())
714 global_tunables
= tunables
;
719 static void sugov_tunables_free(struct sugov_tunables
*tunables
)
721 if (!have_governor_per_policy())
722 global_tunables
= NULL
;
727 static int sugov_init(struct cpufreq_policy
*policy
)
729 struct sugov_policy
*sg_policy
;
730 struct sugov_tunables
*tunables
;
733 /* State should be equivalent to EXIT */
734 if (policy
->governor_data
)
737 cpufreq_enable_fast_switch(policy
);
739 sg_policy
= sugov_policy_alloc(policy
);
742 goto disable_fast_switch
;
745 ret
= sugov_kthread_create(sg_policy
);
749 mutex_lock(&global_tunables_lock
);
751 if (global_tunables
) {
752 if (WARN_ON(have_governor_per_policy())) {
756 policy
->governor_data
= sg_policy
;
757 sg_policy
->tunables
= global_tunables
;
759 gov_attr_set_get(&global_tunables
->attr_set
, &sg_policy
->tunables_hook
);
763 tunables
= sugov_tunables_alloc(sg_policy
);
769 tunables
->rate_limit_us
= cpufreq_policy_transition_delay_us(policy
);
771 policy
->governor_data
= sg_policy
;
772 sg_policy
->tunables
= tunables
;
774 ret
= kobject_init_and_add(&tunables
->attr_set
.kobj
, &sugov_tunables_ktype
,
775 get_governor_parent_kobj(policy
), "%s",
781 mutex_unlock(&global_tunables_lock
);
785 policy
->governor_data
= NULL
;
786 sugov_tunables_free(tunables
);
789 sugov_kthread_stop(sg_policy
);
790 mutex_unlock(&global_tunables_lock
);
793 sugov_policy_free(sg_policy
);
796 cpufreq_disable_fast_switch(policy
);
798 pr_err("initialization failed (error %d)\n", ret
);
802 static void sugov_exit(struct cpufreq_policy
*policy
)
804 struct sugov_policy
*sg_policy
= policy
->governor_data
;
805 struct sugov_tunables
*tunables
= sg_policy
->tunables
;
808 mutex_lock(&global_tunables_lock
);
810 count
= gov_attr_set_put(&tunables
->attr_set
, &sg_policy
->tunables_hook
);
811 policy
->governor_data
= NULL
;
813 sugov_tunables_free(tunables
);
815 mutex_unlock(&global_tunables_lock
);
817 sugov_kthread_stop(sg_policy
);
818 sugov_policy_free(sg_policy
);
819 cpufreq_disable_fast_switch(policy
);
822 static int sugov_start(struct cpufreq_policy
*policy
)
824 struct sugov_policy
*sg_policy
= policy
->governor_data
;
827 sg_policy
->freq_update_delay_ns
= sg_policy
->tunables
->rate_limit_us
* NSEC_PER_USEC
;
828 sg_policy
->last_freq_update_time
= 0;
829 sg_policy
->next_freq
= 0;
830 sg_policy
->work_in_progress
= false;
831 sg_policy
->need_freq_update
= false;
832 sg_policy
->cached_raw_freq
= 0;
834 for_each_cpu(cpu
, policy
->cpus
) {
835 struct sugov_cpu
*sg_cpu
= &per_cpu(sugov_cpu
, cpu
);
837 memset(sg_cpu
, 0, sizeof(*sg_cpu
));
839 sg_cpu
->sg_policy
= sg_policy
;
840 sg_cpu
->iowait_boost_max
= policy
->cpuinfo
.max_freq
;
843 for_each_cpu(cpu
, policy
->cpus
) {
844 struct sugov_cpu
*sg_cpu
= &per_cpu(sugov_cpu
, cpu
);
846 cpufreq_add_update_util_hook(cpu
, &sg_cpu
->update_util
,
847 policy_is_shared(policy
) ?
848 sugov_update_shared
:
849 sugov_update_single
);
854 static void sugov_stop(struct cpufreq_policy
*policy
)
856 struct sugov_policy
*sg_policy
= policy
->governor_data
;
859 for_each_cpu(cpu
, policy
->cpus
)
860 cpufreq_remove_update_util_hook(cpu
);
864 if (!policy
->fast_switch_enabled
) {
865 irq_work_sync(&sg_policy
->irq_work
);
866 kthread_cancel_work_sync(&sg_policy
->work
);
870 static void sugov_limits(struct cpufreq_policy
*policy
)
872 struct sugov_policy
*sg_policy
= policy
->governor_data
;
874 if (!policy
->fast_switch_enabled
) {
875 mutex_lock(&sg_policy
->work_lock
);
876 cpufreq_policy_apply_limits(policy
);
877 mutex_unlock(&sg_policy
->work_lock
);
880 sg_policy
->need_freq_update
= true;
883 struct cpufreq_governor schedutil_gov
= {
885 .owner
= THIS_MODULE
,
886 .dynamic_switching
= true,
889 .start
= sugov_start
,
891 .limits
= sugov_limits
,
894 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
895 struct cpufreq_governor
*cpufreq_default_governor(void)
897 return &schedutil_gov
;
901 static int __init
sugov_register(void)
903 return cpufreq_register_governor(&schedutil_gov
);
905 fs_initcall(sugov_register
);
907 #ifdef CONFIG_ENERGY_MODEL
908 extern bool sched_energy_update
;
909 extern struct mutex sched_energy_mutex
;
911 static void rebuild_sd_workfn(struct work_struct
*work
)
913 mutex_lock(&sched_energy_mutex
);
914 sched_energy_update
= true;
915 rebuild_sched_domains();
916 sched_energy_update
= false;
917 mutex_unlock(&sched_energy_mutex
);
919 static DECLARE_WORK(rebuild_sd_work
, rebuild_sd_workfn
);
922 * EAS shouldn't be attempted without sugov, so rebuild the sched_domains
923 * on governor changes to make sure the scheduler knows about it.
925 void sched_cpufreq_governor_change(struct cpufreq_policy
*policy
,
926 struct cpufreq_governor
*old_gov
)
928 if (old_gov
== &schedutil_gov
|| policy
->governor
== &schedutil_gov
) {
930 * When called from the cpufreq_register_driver() path, the
931 * cpu_hotplug_lock is already held, so use a work item to
932 * avoid nested locking in rebuild_sched_domains().
934 schedule_work(&rebuild_sd_work
);