]> git.proxmox.com Git - mirror_ubuntu-kernels.git/blame - kernel/sched/cpufreq_schedutil.c
sched: Clean up and harmonize the coding style of the scheduler code base
[mirror_ubuntu-kernels.git] / kernel / sched / cpufreq_schedutil.c
CommitLineData
9bdcb44e
RW
1/*
2 * CPUFreq governor based on scheduler-provided CPU utilization data.
3 *
4 * Copyright (C) 2016, Intel Corporation
5 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
60f05e86
VK
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
9bdcb44e 14#include <linux/cpufreq.h>
02a7b1ee 15#include <linux/kthread.h>
ae7e81c0 16#include <uapi/linux/sched/types.h>
9bdcb44e
RW
17#include <linux/slab.h>
18#include <trace/events/power.h>
19
20#include "sched.h"
21
22struct sugov_tunables {
97fb7a0a
IM
23 struct gov_attr_set attr_set;
24 unsigned int rate_limit_us;
9bdcb44e
RW
25};
26
27struct sugov_policy {
97fb7a0a
IM
28 struct cpufreq_policy *policy;
29
30 struct sugov_tunables *tunables;
31 struct list_head tunables_hook;
32
33 raw_spinlock_t update_lock; /* For shared policies */
34 u64 last_freq_update_time;
35 s64 freq_update_delay_ns;
36 unsigned int next_freq;
37 unsigned int cached_raw_freq;
38
39 /* The next fields are only needed if fast switch cannot be used: */
40 struct irq_work irq_work;
41 struct kthread_work work;
42 struct mutex work_lock;
43 struct kthread_worker worker;
44 struct task_struct *thread;
45 bool work_in_progress;
46
47 bool need_freq_update;
9bdcb44e
RW
48};
49
50struct sugov_cpu {
97fb7a0a
IM
51 struct update_util_data update_util;
52 struct sugov_policy *sg_policy;
53 unsigned int cpu;
9bdcb44e 54
97fb7a0a
IM
55 bool iowait_boost_pending;
56 unsigned int iowait_boost;
57 unsigned int iowait_boost_max;
21ca6d2c 58 u64 last_update;
5cbea469 59
97fb7a0a
IM
60 /* The fields below are only needed when sharing a policy: */
61 unsigned long util_cfs;
62 unsigned long util_dl;
63 unsigned long max;
64 unsigned int flags;
b7eaf1aa 65
97fb7a0a 66 /* The field below is for single-CPU policies only: */
b7eaf1aa 67#ifdef CONFIG_NO_HZ_COMMON
97fb7a0a 68 unsigned long saved_idle_calls;
b7eaf1aa 69#endif
9bdcb44e
RW
70};
71
72static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
73
74/************************ Governor internals ***********************/
75
76static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
77{
78 s64 delta_ns;
79
674e7541
VK
80 /*
81 * Since cpufreq_update_util() is called with rq->lock held for
97fb7a0a 82 * the @target_cpu, our per-CPU data is fully serialized.
674e7541 83 *
97fb7a0a 84 * However, drivers cannot in general deal with cross-CPU
674e7541 85 * requests, so while get_next_freq() will work, our
c49cbc19 86 * sugov_update_commit() call may not for the fast switching platforms.
674e7541
VK
87 *
88 * Hence stop here for remote requests if they aren't supported
89 * by the hardware, as calculating the frequency is pointless if
90 * we cannot in fact act on it.
c49cbc19
VK
91 *
92 * For the slow switching platforms, the kthread is always scheduled on
93 * the right set of CPUs and any CPU can find the next frequency and
94 * schedule the kthread.
674e7541 95 */
c49cbc19
VK
96 if (sg_policy->policy->fast_switch_enabled &&
97 !cpufreq_can_do_remote_dvfs(sg_policy->policy))
674e7541
VK
98 return false;
99
9bdcb44e
RW
100 if (sg_policy->work_in_progress)
101 return false;
102
103 if (unlikely(sg_policy->need_freq_update)) {
104 sg_policy->need_freq_update = false;
105 /*
106 * This happens when limits change, so forget the previous
107 * next_freq value and force an update.
108 */
109 sg_policy->next_freq = UINT_MAX;
110 return true;
111 }
112
113 delta_ns = time - sg_policy->last_freq_update_time;
97fb7a0a 114
9bdcb44e
RW
115 return delta_ns >= sg_policy->freq_update_delay_ns;
116}
117
118static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
119 unsigned int next_freq)
120{
121 struct cpufreq_policy *policy = sg_policy->policy;
122
38d4ea22
RW
123 if (sg_policy->next_freq == next_freq)
124 return;
125
126 sg_policy->next_freq = next_freq;
9bdcb44e
RW
127 sg_policy->last_freq_update_time = time;
128
129 if (policy->fast_switch_enabled) {
9bdcb44e 130 next_freq = cpufreq_driver_fast_switch(policy, next_freq);
209887e6 131 if (!next_freq)
9bdcb44e
RW
132 return;
133
134 policy->cur = next_freq;
135 trace_cpu_frequency(next_freq, smp_processor_id());
38d4ea22 136 } else {
9bdcb44e
RW
137 sg_policy->work_in_progress = true;
138 irq_work_queue(&sg_policy->irq_work);
139 }
140}
141
142/**
143 * get_next_freq - Compute a new frequency for a given cpufreq policy.
655cb1eb 144 * @sg_policy: schedutil policy object to compute the new frequency for.
9bdcb44e
RW
145 * @util: Current CPU utilization.
146 * @max: CPU capacity.
147 *
148 * If the utilization is frequency-invariant, choose the new frequency to be
149 * proportional to it, that is
150 *
151 * next_freq = C * max_freq * util / max
152 *
153 * Otherwise, approximate the would-be frequency-invariant utilization by
154 * util_raw * (curr_freq / max_freq) which leads to
155 *
156 * next_freq = C * curr_freq * util_raw / max
157 *
158 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
5cbea469
SM
159 *
160 * The lowest driver-supported frequency which is equal or greater than the raw
161 * next_freq (as calculated above) is returned, subject to policy min/max and
162 * cpufreq driver limitations.
9bdcb44e 163 */
655cb1eb
VK
164static unsigned int get_next_freq(struct sugov_policy *sg_policy,
165 unsigned long util, unsigned long max)
9bdcb44e 166{
5cbea469 167 struct cpufreq_policy *policy = sg_policy->policy;
9bdcb44e
RW
168 unsigned int freq = arch_scale_freq_invariant() ?
169 policy->cpuinfo.max_freq : policy->cur;
170
5cbea469
SM
171 freq = (freq + (freq >> 2)) * util / max;
172
6c4f0fa6 173 if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
5cbea469 174 return sg_policy->next_freq;
6c4f0fa6 175 sg_policy->cached_raw_freq = freq;
5cbea469 176 return cpufreq_driver_resolve_freq(policy, freq);
9bdcb44e
RW
177}
178
d18be45d 179static void sugov_get_util(struct sugov_cpu *sg_cpu)
58919e83 180{
d18be45d 181 struct rq *rq = cpu_rq(sg_cpu->cpu);
8314bc83 182
d18be45d
JL
183 sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
184 sg_cpu->util_cfs = cpu_util_cfs(rq);
185 sg_cpu->util_dl = cpu_util_dl(rq);
186}
58919e83 187
d18be45d
JL
188static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
189{
d4edd662
JL
190 /*
191 * Ideally we would like to set util_dl as min/guaranteed freq and
192 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
193 * ready for such an interface. So, we only do the latter for now.
194 */
d18be45d 195 return min(sg_cpu->util_cfs + sg_cpu->util_dl, sg_cpu->max);
58919e83
RW
196}
197
5083452f 198static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time)
21ca6d2c 199{
5083452f 200 if (sg_cpu->flags & SCHED_CPUFREQ_IOWAIT) {
a5a0809b
JF
201 if (sg_cpu->iowait_boost_pending)
202 return;
203
204 sg_cpu->iowait_boost_pending = true;
205
206 if (sg_cpu->iowait_boost) {
207 sg_cpu->iowait_boost <<= 1;
208 if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
209 sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
210 } else {
211 sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
212 }
21ca6d2c
RW
213 } else if (sg_cpu->iowait_boost) {
214 s64 delta_ns = time - sg_cpu->last_update;
215
216 /* Clear iowait_boost if the CPU apprears to have been idle. */
a5a0809b 217 if (delta_ns > TICK_NSEC) {
21ca6d2c 218 sg_cpu->iowait_boost = 0;
a5a0809b
JF
219 sg_cpu->iowait_boost_pending = false;
220 }
21ca6d2c
RW
221 }
222}
223
224static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
225 unsigned long *max)
226{
251accf9 227 unsigned int boost_util, boost_max;
21ca6d2c 228
a5a0809b 229 if (!sg_cpu->iowait_boost)
21ca6d2c
RW
230 return;
231
a5a0809b
JF
232 if (sg_cpu->iowait_boost_pending) {
233 sg_cpu->iowait_boost_pending = false;
234 } else {
235 sg_cpu->iowait_boost >>= 1;
236 if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
237 sg_cpu->iowait_boost = 0;
238 return;
239 }
240 }
241
242 boost_util = sg_cpu->iowait_boost;
243 boost_max = sg_cpu->iowait_boost_max;
244
21ca6d2c
RW
245 if (*util * boost_max < *max * boost_util) {
246 *util = boost_util;
247 *max = boost_max;
248 }
21ca6d2c
RW
249}
250
b7eaf1aa
RW
251#ifdef CONFIG_NO_HZ_COMMON
252static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
253{
466a2b42 254 unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
b7eaf1aa
RW
255 bool ret = idle_calls == sg_cpu->saved_idle_calls;
256
257 sg_cpu->saved_idle_calls = idle_calls;
258 return ret;
259}
260#else
261static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
262#endif /* CONFIG_NO_HZ_COMMON */
263
9bdcb44e 264static void sugov_update_single(struct update_util_data *hook, u64 time,
58919e83 265 unsigned int flags)
9bdcb44e
RW
266{
267 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
268 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
269 struct cpufreq_policy *policy = sg_policy->policy;
58919e83 270 unsigned long util, max;
9bdcb44e 271 unsigned int next_f;
b7eaf1aa 272 bool busy;
9bdcb44e 273
5083452f 274 sugov_set_iowait_boost(sg_cpu, time);
21ca6d2c
RW
275 sg_cpu->last_update = time;
276
9bdcb44e
RW
277 if (!sugov_should_update_freq(sg_policy, time))
278 return;
279
b7eaf1aa
RW
280 busy = sugov_cpu_is_busy(sg_cpu);
281
d4edd662 282 if (flags & SCHED_CPUFREQ_RT) {
58919e83
RW
283 next_f = policy->cpuinfo.max_freq;
284 } else {
d18be45d
JL
285 sugov_get_util(sg_cpu);
286 max = sg_cpu->max;
287 util = sugov_aggregate_util(sg_cpu);
21ca6d2c 288 sugov_iowait_boost(sg_cpu, &util, &max);
655cb1eb 289 next_f = get_next_freq(sg_policy, util, max);
b7eaf1aa
RW
290 /*
291 * Do not reduce the frequency if the CPU has not been idle
292 * recently, as the reduction is likely to be premature then.
293 */
07458f6a 294 if (busy && next_f < sg_policy->next_freq) {
b7eaf1aa 295 next_f = sg_policy->next_freq;
07458f6a
VK
296
297 /* Reset cached freq as next_freq has changed */
298 sg_policy->cached_raw_freq = 0;
299 }
58919e83 300 }
9bdcb44e
RW
301 sugov_update_commit(sg_policy, time, next_f);
302}
303
d86ab9cf 304static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
9bdcb44e 305{
5cbea469 306 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
9bdcb44e 307 struct cpufreq_policy *policy = sg_policy->policy;
cba1dfb5 308 unsigned long util = 0, max = 1;
9bdcb44e
RW
309 unsigned int j;
310
9bdcb44e 311 for_each_cpu(j, policy->cpus) {
cba1dfb5 312 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
9bdcb44e
RW
313 unsigned long j_util, j_max;
314 s64 delta_ns;
315
9bdcb44e 316 /*
0fa7d181
JL
317 * If the CFS CPU utilization was last updated before the
318 * previous frequency update and the time elapsed between the
319 * last update of the CPU utilization and the last frequency
320 * update is long enough, reset iowait_boost and util_cfs, as
321 * they are now probably stale. However, still consider the
322 * CPU contribution if it has some DEADLINE utilization
323 * (util_dl).
9bdcb44e 324 */
d86ab9cf 325 delta_ns = time - j_sg_cpu->last_update;
21ca6d2c
RW
326 if (delta_ns > TICK_NSEC) {
327 j_sg_cpu->iowait_boost = 0;
a5a0809b 328 j_sg_cpu->iowait_boost_pending = false;
0fa7d181
JL
329 j_sg_cpu->util_cfs = 0;
330 if (j_sg_cpu->util_dl == 0)
331 continue;
21ca6d2c 332 }
d4edd662 333 if (j_sg_cpu->flags & SCHED_CPUFREQ_RT)
cba1dfb5 334 return policy->cpuinfo.max_freq;
9bdcb44e
RW
335
336 j_max = j_sg_cpu->max;
d18be45d 337 j_util = sugov_aggregate_util(j_sg_cpu);
9bdcb44e
RW
338 if (j_util * max > j_max * util) {
339 util = j_util;
340 max = j_max;
341 }
21ca6d2c
RW
342
343 sugov_iowait_boost(j_sg_cpu, &util, &max);
9bdcb44e
RW
344 }
345
655cb1eb 346 return get_next_freq(sg_policy, util, max);
9bdcb44e
RW
347}
348
97fb7a0a
IM
349static void
350sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
9bdcb44e
RW
351{
352 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
353 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
354 unsigned int next_f;
355
356 raw_spin_lock(&sg_policy->update_lock);
357
d18be45d 358 sugov_get_util(sg_cpu);
58919e83 359 sg_cpu->flags = flags;
21ca6d2c 360
5083452f 361 sugov_set_iowait_boost(sg_cpu, time);
9bdcb44e
RW
362 sg_cpu->last_update = time;
363
364 if (sugov_should_update_freq(sg_policy, time)) {
d4edd662 365 if (flags & SCHED_CPUFREQ_RT)
cba1dfb5
VK
366 next_f = sg_policy->policy->cpuinfo.max_freq;
367 else
d86ab9cf 368 next_f = sugov_next_freq_shared(sg_cpu, time);
cba1dfb5 369
9bdcb44e
RW
370 sugov_update_commit(sg_policy, time, next_f);
371 }
372
373 raw_spin_unlock(&sg_policy->update_lock);
374}
375
02a7b1ee 376static void sugov_work(struct kthread_work *work)
9bdcb44e
RW
377{
378 struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
379
380 mutex_lock(&sg_policy->work_lock);
381 __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
382 CPUFREQ_RELATION_L);
383 mutex_unlock(&sg_policy->work_lock);
384
385 sg_policy->work_in_progress = false;
386}
387
388static void sugov_irq_work(struct irq_work *irq_work)
389{
390 struct sugov_policy *sg_policy;
391
392 sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
02a7b1ee
VK
393
394 /*
d4edd662
JL
395 * For RT tasks, the schedutil governor shoots the frequency to maximum.
396 * Special care must be taken to ensure that this kthread doesn't result
397 * in the same behavior.
02a7b1ee
VK
398 *
399 * This is (mostly) guaranteed by the work_in_progress flag. The flag is
d06e622d
VK
400 * updated only at the end of the sugov_work() function and before that
401 * the schedutil governor rejects all other frequency scaling requests.
02a7b1ee 402 *
d06e622d 403 * There is a very rare case though, where the RT thread yields right
02a7b1ee
VK
404 * after the work_in_progress flag is cleared. The effects of that are
405 * neglected for now.
406 */
407 kthread_queue_work(&sg_policy->worker, &sg_policy->work);
9bdcb44e
RW
408}
409
410/************************** sysfs interface ************************/
411
412static struct sugov_tunables *global_tunables;
413static DEFINE_MUTEX(global_tunables_lock);
414
415static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
416{
417 return container_of(attr_set, struct sugov_tunables, attr_set);
418}
419
420static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
421{
422 struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
423
424 return sprintf(buf, "%u\n", tunables->rate_limit_us);
425}
426
97fb7a0a
IM
427static ssize_t
428rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
9bdcb44e
RW
429{
430 struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
431 struct sugov_policy *sg_policy;
432 unsigned int rate_limit_us;
433
434 if (kstrtouint(buf, 10, &rate_limit_us))
435 return -EINVAL;
436
437 tunables->rate_limit_us = rate_limit_us;
438
439 list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
440 sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
441
442 return count;
443}
444
445static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
446
447static struct attribute *sugov_attributes[] = {
448 &rate_limit_us.attr,
449 NULL
450};
451
452static struct kobj_type sugov_tunables_ktype = {
453 .default_attrs = sugov_attributes,
454 .sysfs_ops = &governor_sysfs_ops,
455};
456
457/********************** cpufreq governor interface *********************/
458
459static struct cpufreq_governor schedutil_gov;
460
461static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
462{
463 struct sugov_policy *sg_policy;
464
465 sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
466 if (!sg_policy)
467 return NULL;
468
469 sg_policy->policy = policy;
9bdcb44e
RW
470 raw_spin_lock_init(&sg_policy->update_lock);
471 return sg_policy;
472}
473
474static void sugov_policy_free(struct sugov_policy *sg_policy)
475{
9bdcb44e
RW
476 kfree(sg_policy);
477}
478
02a7b1ee
VK
479static int sugov_kthread_create(struct sugov_policy *sg_policy)
480{
481 struct task_struct *thread;
794a56eb 482 struct sched_attr attr = {
97fb7a0a
IM
483 .size = sizeof(struct sched_attr),
484 .sched_policy = SCHED_DEADLINE,
485 .sched_flags = SCHED_FLAG_SUGOV,
486 .sched_nice = 0,
487 .sched_priority = 0,
794a56eb
JL
488 /*
489 * Fake (unused) bandwidth; workaround to "fix"
490 * priority inheritance.
491 */
492 .sched_runtime = 1000000,
493 .sched_deadline = 10000000,
494 .sched_period = 10000000,
495 };
02a7b1ee
VK
496 struct cpufreq_policy *policy = sg_policy->policy;
497 int ret;
498
499 /* kthread only required for slow path */
500 if (policy->fast_switch_enabled)
501 return 0;
502
503 kthread_init_work(&sg_policy->work, sugov_work);
504 kthread_init_worker(&sg_policy->worker);
505 thread = kthread_create(kthread_worker_fn, &sg_policy->worker,
506 "sugov:%d",
507 cpumask_first(policy->related_cpus));
508 if (IS_ERR(thread)) {
509 pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread));
510 return PTR_ERR(thread);
511 }
512
794a56eb 513 ret = sched_setattr_nocheck(thread, &attr);
02a7b1ee
VK
514 if (ret) {
515 kthread_stop(thread);
794a56eb 516 pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
02a7b1ee
VK
517 return ret;
518 }
519
520 sg_policy->thread = thread;
e2cabe48
VK
521
522 /* Kthread is bound to all CPUs by default */
523 if (!policy->dvfs_possible_from_any_cpu)
524 kthread_bind_mask(thread, policy->related_cpus);
525
21ef5729
VK
526 init_irq_work(&sg_policy->irq_work, sugov_irq_work);
527 mutex_init(&sg_policy->work_lock);
528
02a7b1ee
VK
529 wake_up_process(thread);
530
531 return 0;
532}
533
534static void sugov_kthread_stop(struct sugov_policy *sg_policy)
535{
536 /* kthread only required for slow path */
537 if (sg_policy->policy->fast_switch_enabled)
538 return;
539
540 kthread_flush_worker(&sg_policy->worker);
541 kthread_stop(sg_policy->thread);
21ef5729 542 mutex_destroy(&sg_policy->work_lock);
02a7b1ee
VK
543}
544
9bdcb44e
RW
545static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
546{
547 struct sugov_tunables *tunables;
548
549 tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
550 if (tunables) {
551 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
552 if (!have_governor_per_policy())
553 global_tunables = tunables;
554 }
555 return tunables;
556}
557
558static void sugov_tunables_free(struct sugov_tunables *tunables)
559{
560 if (!have_governor_per_policy())
561 global_tunables = NULL;
562
563 kfree(tunables);
564}
565
566static int sugov_init(struct cpufreq_policy *policy)
567{
568 struct sugov_policy *sg_policy;
569 struct sugov_tunables *tunables;
9bdcb44e
RW
570 int ret = 0;
571
572 /* State should be equivalent to EXIT */
573 if (policy->governor_data)
574 return -EBUSY;
575
4a71ce43
VK
576 cpufreq_enable_fast_switch(policy);
577
9bdcb44e 578 sg_policy = sugov_policy_alloc(policy);
4a71ce43
VK
579 if (!sg_policy) {
580 ret = -ENOMEM;
581 goto disable_fast_switch;
582 }
9bdcb44e 583
02a7b1ee
VK
584 ret = sugov_kthread_create(sg_policy);
585 if (ret)
586 goto free_sg_policy;
587
9bdcb44e
RW
588 mutex_lock(&global_tunables_lock);
589
590 if (global_tunables) {
591 if (WARN_ON(have_governor_per_policy())) {
592 ret = -EINVAL;
02a7b1ee 593 goto stop_kthread;
9bdcb44e
RW
594 }
595 policy->governor_data = sg_policy;
596 sg_policy->tunables = global_tunables;
597
598 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
599 goto out;
600 }
601
602 tunables = sugov_tunables_alloc(sg_policy);
603 if (!tunables) {
604 ret = -ENOMEM;
02a7b1ee 605 goto stop_kthread;
9bdcb44e
RW
606 }
607
aa7519af 608 tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
9bdcb44e
RW
609
610 policy->governor_data = sg_policy;
611 sg_policy->tunables = tunables;
612
613 ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
614 get_governor_parent_kobj(policy), "%s",
615 schedutil_gov.name);
616 if (ret)
617 goto fail;
618
8e2ddb03 619out:
9bdcb44e 620 mutex_unlock(&global_tunables_lock);
9bdcb44e
RW
621 return 0;
622
8e2ddb03 623fail:
9bdcb44e
RW
624 policy->governor_data = NULL;
625 sugov_tunables_free(tunables);
626
02a7b1ee
VK
627stop_kthread:
628 sugov_kthread_stop(sg_policy);
629
8e2ddb03 630free_sg_policy:
9bdcb44e
RW
631 mutex_unlock(&global_tunables_lock);
632
633 sugov_policy_free(sg_policy);
4a71ce43
VK
634
635disable_fast_switch:
636 cpufreq_disable_fast_switch(policy);
637
60f05e86 638 pr_err("initialization failed (error %d)\n", ret);
9bdcb44e
RW
639 return ret;
640}
641
e788892b 642static void sugov_exit(struct cpufreq_policy *policy)
9bdcb44e
RW
643{
644 struct sugov_policy *sg_policy = policy->governor_data;
645 struct sugov_tunables *tunables = sg_policy->tunables;
646 unsigned int count;
647
648 mutex_lock(&global_tunables_lock);
649
650 count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
651 policy->governor_data = NULL;
652 if (!count)
653 sugov_tunables_free(tunables);
654
655 mutex_unlock(&global_tunables_lock);
656
02a7b1ee 657 sugov_kthread_stop(sg_policy);
9bdcb44e 658 sugov_policy_free(sg_policy);
4a71ce43 659 cpufreq_disable_fast_switch(policy);
9bdcb44e
RW
660}
661
662static int sugov_start(struct cpufreq_policy *policy)
663{
664 struct sugov_policy *sg_policy = policy->governor_data;
665 unsigned int cpu;
666
97fb7a0a
IM
667 sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
668 sg_policy->last_freq_update_time = 0;
669 sg_policy->next_freq = UINT_MAX;
670 sg_policy->work_in_progress = false;
671 sg_policy->need_freq_update = false;
672 sg_policy->cached_raw_freq = 0;
9bdcb44e
RW
673
674 for_each_cpu(cpu, policy->cpus) {
675 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
676
4296f23e 677 memset(sg_cpu, 0, sizeof(*sg_cpu));
97fb7a0a
IM
678 sg_cpu->cpu = cpu;
679 sg_cpu->sg_policy = sg_policy;
680 sg_cpu->flags = 0;
681 sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
ab2f7cf1
VM
682 }
683
684 for_each_cpu(cpu, policy->cpus) {
685 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
686
4296f23e
RW
687 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
688 policy_is_shared(policy) ?
689 sugov_update_shared :
690 sugov_update_single);
9bdcb44e
RW
691 }
692 return 0;
693}
694
e788892b 695static void sugov_stop(struct cpufreq_policy *policy)
9bdcb44e
RW
696{
697 struct sugov_policy *sg_policy = policy->governor_data;
698 unsigned int cpu;
699
700 for_each_cpu(cpu, policy->cpus)
701 cpufreq_remove_update_util_hook(cpu);
702
703 synchronize_sched();
704
21ef5729
VK
705 if (!policy->fast_switch_enabled) {
706 irq_work_sync(&sg_policy->irq_work);
707 kthread_cancel_work_sync(&sg_policy->work);
708 }
9bdcb44e
RW
709}
710
e788892b 711static void sugov_limits(struct cpufreq_policy *policy)
9bdcb44e
RW
712{
713 struct sugov_policy *sg_policy = policy->governor_data;
714
715 if (!policy->fast_switch_enabled) {
716 mutex_lock(&sg_policy->work_lock);
bf2be2de 717 cpufreq_policy_apply_limits(policy);
9bdcb44e
RW
718 mutex_unlock(&sg_policy->work_lock);
719 }
720
721 sg_policy->need_freq_update = true;
9bdcb44e
RW
722}
723
724static struct cpufreq_governor schedutil_gov = {
97fb7a0a
IM
725 .name = "schedutil",
726 .owner = THIS_MODULE,
727 .dynamic_switching = true,
728 .init = sugov_init,
729 .exit = sugov_exit,
730 .start = sugov_start,
731 .stop = sugov_stop,
732 .limits = sugov_limits,
9bdcb44e
RW
733};
734
9bdcb44e
RW
735#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
736struct cpufreq_governor *cpufreq_default_governor(void)
737{
738 return &schedutil_gov;
739}
9bdcb44e 740#endif
58919e83
RW
741
742static int __init sugov_register(void)
743{
744 return cpufreq_register_governor(&schedutil_gov);
745}
746fs_initcall(sugov_register);