]>
Commit | Line | Data |
---|---|---|
9bdcb44e RW |
1 | /* |
2 | * CPUFreq governor based on scheduler-provided CPU utilization data. | |
3 | * | |
4 | * Copyright (C) 2016, Intel Corporation | |
5 | * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | */ | |
11 | ||
60f05e86 VK |
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
13 | ||
9bdcb44e | 14 | #include <linux/cpufreq.h> |
02a7b1ee | 15 | #include <linux/kthread.h> |
ae7e81c0 | 16 | #include <uapi/linux/sched/types.h> |
9bdcb44e RW |
17 | #include <linux/slab.h> |
18 | #include <trace/events/power.h> | |
19 | ||
20 | #include "sched.h" | |
21 | ||
22 | struct sugov_tunables { | |
97fb7a0a IM |
23 | struct gov_attr_set attr_set; |
24 | unsigned int rate_limit_us; | |
9bdcb44e RW |
25 | }; |
26 | ||
27 | struct sugov_policy { | |
97fb7a0a IM |
28 | struct cpufreq_policy *policy; |
29 | ||
30 | struct sugov_tunables *tunables; | |
31 | struct list_head tunables_hook; | |
32 | ||
33 | raw_spinlock_t update_lock; /* For shared policies */ | |
34 | u64 last_freq_update_time; | |
35 | s64 freq_update_delay_ns; | |
36 | unsigned int next_freq; | |
37 | unsigned int cached_raw_freq; | |
38 | ||
39 | /* The next fields are only needed if fast switch cannot be used: */ | |
40 | struct irq_work irq_work; | |
41 | struct kthread_work work; | |
42 | struct mutex work_lock; | |
43 | struct kthread_worker worker; | |
44 | struct task_struct *thread; | |
45 | bool work_in_progress; | |
46 | ||
47 | bool need_freq_update; | |
9bdcb44e RW |
48 | }; |
49 | ||
50 | struct sugov_cpu { | |
97fb7a0a IM |
51 | struct update_util_data update_util; |
52 | struct sugov_policy *sg_policy; | |
53 | unsigned int cpu; | |
9bdcb44e | 54 | |
97fb7a0a IM |
55 | bool iowait_boost_pending; |
56 | unsigned int iowait_boost; | |
57 | unsigned int iowait_boost_max; | |
21ca6d2c | 58 | u64 last_update; |
5cbea469 | 59 | |
97fb7a0a IM |
60 | /* The fields below are only needed when sharing a policy: */ |
61 | unsigned long util_cfs; | |
62 | unsigned long util_dl; | |
63 | unsigned long max; | |
64 | unsigned int flags; | |
b7eaf1aa | 65 | |
97fb7a0a | 66 | /* The field below is for single-CPU policies only: */ |
b7eaf1aa | 67 | #ifdef CONFIG_NO_HZ_COMMON |
97fb7a0a | 68 | unsigned long saved_idle_calls; |
b7eaf1aa | 69 | #endif |
9bdcb44e RW |
70 | }; |
71 | ||
72 | static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); | |
73 | ||
74 | /************************ Governor internals ***********************/ | |
75 | ||
76 | static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) | |
77 | { | |
78 | s64 delta_ns; | |
79 | ||
674e7541 VK |
80 | /* |
81 | * Since cpufreq_update_util() is called with rq->lock held for | |
97fb7a0a | 82 | * the @target_cpu, our per-CPU data is fully serialized. |
674e7541 | 83 | * |
97fb7a0a | 84 | * However, drivers cannot in general deal with cross-CPU |
674e7541 | 85 | * requests, so while get_next_freq() will work, our |
c49cbc19 | 86 | * sugov_update_commit() call may not for the fast switching platforms. |
674e7541 VK |
87 | * |
88 | * Hence stop here for remote requests if they aren't supported | |
89 | * by the hardware, as calculating the frequency is pointless if | |
90 | * we cannot in fact act on it. | |
c49cbc19 VK |
91 | * |
92 | * For the slow switching platforms, the kthread is always scheduled on | |
93 | * the right set of CPUs and any CPU can find the next frequency and | |
94 | * schedule the kthread. | |
674e7541 | 95 | */ |
c49cbc19 VK |
96 | if (sg_policy->policy->fast_switch_enabled && |
97 | !cpufreq_can_do_remote_dvfs(sg_policy->policy)) | |
674e7541 VK |
98 | return false; |
99 | ||
9bdcb44e RW |
100 | if (sg_policy->work_in_progress) |
101 | return false; | |
102 | ||
103 | if (unlikely(sg_policy->need_freq_update)) { | |
104 | sg_policy->need_freq_update = false; | |
105 | /* | |
106 | * This happens when limits change, so forget the previous | |
107 | * next_freq value and force an update. | |
108 | */ | |
109 | sg_policy->next_freq = UINT_MAX; | |
110 | return true; | |
111 | } | |
112 | ||
113 | delta_ns = time - sg_policy->last_freq_update_time; | |
97fb7a0a | 114 | |
9bdcb44e RW |
115 | return delta_ns >= sg_policy->freq_update_delay_ns; |
116 | } | |
117 | ||
118 | static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, | |
119 | unsigned int next_freq) | |
120 | { | |
121 | struct cpufreq_policy *policy = sg_policy->policy; | |
122 | ||
38d4ea22 RW |
123 | if (sg_policy->next_freq == next_freq) |
124 | return; | |
125 | ||
126 | sg_policy->next_freq = next_freq; | |
9bdcb44e RW |
127 | sg_policy->last_freq_update_time = time; |
128 | ||
129 | if (policy->fast_switch_enabled) { | |
9bdcb44e | 130 | next_freq = cpufreq_driver_fast_switch(policy, next_freq); |
209887e6 | 131 | if (!next_freq) |
9bdcb44e RW |
132 | return; |
133 | ||
134 | policy->cur = next_freq; | |
135 | trace_cpu_frequency(next_freq, smp_processor_id()); | |
38d4ea22 | 136 | } else { |
9bdcb44e RW |
137 | sg_policy->work_in_progress = true; |
138 | irq_work_queue(&sg_policy->irq_work); | |
139 | } | |
140 | } | |
141 | ||
142 | /** | |
143 | * get_next_freq - Compute a new frequency for a given cpufreq policy. | |
655cb1eb | 144 | * @sg_policy: schedutil policy object to compute the new frequency for. |
9bdcb44e RW |
145 | * @util: Current CPU utilization. |
146 | * @max: CPU capacity. | |
147 | * | |
148 | * If the utilization is frequency-invariant, choose the new frequency to be | |
149 | * proportional to it, that is | |
150 | * | |
151 | * next_freq = C * max_freq * util / max | |
152 | * | |
153 | * Otherwise, approximate the would-be frequency-invariant utilization by | |
154 | * util_raw * (curr_freq / max_freq) which leads to | |
155 | * | |
156 | * next_freq = C * curr_freq * util_raw / max | |
157 | * | |
158 | * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. | |
5cbea469 SM |
159 | * |
160 | * The lowest driver-supported frequency which is equal or greater than the raw | |
161 | * next_freq (as calculated above) is returned, subject to policy min/max and | |
162 | * cpufreq driver limitations. | |
9bdcb44e | 163 | */ |
655cb1eb VK |
164 | static unsigned int get_next_freq(struct sugov_policy *sg_policy, |
165 | unsigned long util, unsigned long max) | |
9bdcb44e | 166 | { |
5cbea469 | 167 | struct cpufreq_policy *policy = sg_policy->policy; |
9bdcb44e RW |
168 | unsigned int freq = arch_scale_freq_invariant() ? |
169 | policy->cpuinfo.max_freq : policy->cur; | |
170 | ||
5cbea469 SM |
171 | freq = (freq + (freq >> 2)) * util / max; |
172 | ||
6c4f0fa6 | 173 | if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX) |
5cbea469 | 174 | return sg_policy->next_freq; |
6c4f0fa6 | 175 | sg_policy->cached_raw_freq = freq; |
5cbea469 | 176 | return cpufreq_driver_resolve_freq(policy, freq); |
9bdcb44e RW |
177 | } |
178 | ||
d18be45d | 179 | static void sugov_get_util(struct sugov_cpu *sg_cpu) |
58919e83 | 180 | { |
d18be45d | 181 | struct rq *rq = cpu_rq(sg_cpu->cpu); |
8314bc83 | 182 | |
d18be45d JL |
183 | sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu); |
184 | sg_cpu->util_cfs = cpu_util_cfs(rq); | |
185 | sg_cpu->util_dl = cpu_util_dl(rq); | |
186 | } | |
58919e83 | 187 | |
d18be45d JL |
188 | static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) |
189 | { | |
d4edd662 JL |
190 | /* |
191 | * Ideally we would like to set util_dl as min/guaranteed freq and | |
192 | * util_cfs + util_dl as requested freq. However, cpufreq is not yet | |
193 | * ready for such an interface. So, we only do the latter for now. | |
194 | */ | |
d18be45d | 195 | return min(sg_cpu->util_cfs + sg_cpu->util_dl, sg_cpu->max); |
58919e83 RW |
196 | } |
197 | ||
5083452f | 198 | static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time) |
21ca6d2c | 199 | { |
5083452f | 200 | if (sg_cpu->flags & SCHED_CPUFREQ_IOWAIT) { |
a5a0809b JF |
201 | if (sg_cpu->iowait_boost_pending) |
202 | return; | |
203 | ||
204 | sg_cpu->iowait_boost_pending = true; | |
205 | ||
206 | if (sg_cpu->iowait_boost) { | |
207 | sg_cpu->iowait_boost <<= 1; | |
208 | if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) | |
209 | sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; | |
210 | } else { | |
211 | sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; | |
212 | } | |
21ca6d2c RW |
213 | } else if (sg_cpu->iowait_boost) { |
214 | s64 delta_ns = time - sg_cpu->last_update; | |
215 | ||
216 | /* Clear iowait_boost if the CPU apprears to have been idle. */ | |
a5a0809b | 217 | if (delta_ns > TICK_NSEC) { |
21ca6d2c | 218 | sg_cpu->iowait_boost = 0; |
a5a0809b JF |
219 | sg_cpu->iowait_boost_pending = false; |
220 | } | |
21ca6d2c RW |
221 | } |
222 | } | |
223 | ||
224 | static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, | |
225 | unsigned long *max) | |
226 | { | |
251accf9 | 227 | unsigned int boost_util, boost_max; |
21ca6d2c | 228 | |
a5a0809b | 229 | if (!sg_cpu->iowait_boost) |
21ca6d2c RW |
230 | return; |
231 | ||
a5a0809b JF |
232 | if (sg_cpu->iowait_boost_pending) { |
233 | sg_cpu->iowait_boost_pending = false; | |
234 | } else { | |
235 | sg_cpu->iowait_boost >>= 1; | |
236 | if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { | |
237 | sg_cpu->iowait_boost = 0; | |
238 | return; | |
239 | } | |
240 | } | |
241 | ||
242 | boost_util = sg_cpu->iowait_boost; | |
243 | boost_max = sg_cpu->iowait_boost_max; | |
244 | ||
21ca6d2c RW |
245 | if (*util * boost_max < *max * boost_util) { |
246 | *util = boost_util; | |
247 | *max = boost_max; | |
248 | } | |
21ca6d2c RW |
249 | } |
250 | ||
b7eaf1aa RW |
251 | #ifdef CONFIG_NO_HZ_COMMON |
252 | static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) | |
253 | { | |
466a2b42 | 254 | unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu); |
b7eaf1aa RW |
255 | bool ret = idle_calls == sg_cpu->saved_idle_calls; |
256 | ||
257 | sg_cpu->saved_idle_calls = idle_calls; | |
258 | return ret; | |
259 | } | |
260 | #else | |
261 | static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } | |
262 | #endif /* CONFIG_NO_HZ_COMMON */ | |
263 | ||
9bdcb44e | 264 | static void sugov_update_single(struct update_util_data *hook, u64 time, |
58919e83 | 265 | unsigned int flags) |
9bdcb44e RW |
266 | { |
267 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | |
268 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | |
269 | struct cpufreq_policy *policy = sg_policy->policy; | |
58919e83 | 270 | unsigned long util, max; |
9bdcb44e | 271 | unsigned int next_f; |
b7eaf1aa | 272 | bool busy; |
9bdcb44e | 273 | |
5083452f | 274 | sugov_set_iowait_boost(sg_cpu, time); |
21ca6d2c RW |
275 | sg_cpu->last_update = time; |
276 | ||
9bdcb44e RW |
277 | if (!sugov_should_update_freq(sg_policy, time)) |
278 | return; | |
279 | ||
b7eaf1aa RW |
280 | busy = sugov_cpu_is_busy(sg_cpu); |
281 | ||
d4edd662 | 282 | if (flags & SCHED_CPUFREQ_RT) { |
58919e83 RW |
283 | next_f = policy->cpuinfo.max_freq; |
284 | } else { | |
d18be45d JL |
285 | sugov_get_util(sg_cpu); |
286 | max = sg_cpu->max; | |
287 | util = sugov_aggregate_util(sg_cpu); | |
21ca6d2c | 288 | sugov_iowait_boost(sg_cpu, &util, &max); |
655cb1eb | 289 | next_f = get_next_freq(sg_policy, util, max); |
b7eaf1aa RW |
290 | /* |
291 | * Do not reduce the frequency if the CPU has not been idle | |
292 | * recently, as the reduction is likely to be premature then. | |
293 | */ | |
07458f6a | 294 | if (busy && next_f < sg_policy->next_freq) { |
b7eaf1aa | 295 | next_f = sg_policy->next_freq; |
07458f6a VK |
296 | |
297 | /* Reset cached freq as next_freq has changed */ | |
298 | sg_policy->cached_raw_freq = 0; | |
299 | } | |
58919e83 | 300 | } |
9bdcb44e RW |
301 | sugov_update_commit(sg_policy, time, next_f); |
302 | } | |
303 | ||
d86ab9cf | 304 | static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) |
9bdcb44e | 305 | { |
5cbea469 | 306 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; |
9bdcb44e | 307 | struct cpufreq_policy *policy = sg_policy->policy; |
cba1dfb5 | 308 | unsigned long util = 0, max = 1; |
9bdcb44e RW |
309 | unsigned int j; |
310 | ||
9bdcb44e | 311 | for_each_cpu(j, policy->cpus) { |
cba1dfb5 | 312 | struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); |
9bdcb44e RW |
313 | unsigned long j_util, j_max; |
314 | s64 delta_ns; | |
315 | ||
9bdcb44e | 316 | /* |
0fa7d181 JL |
317 | * If the CFS CPU utilization was last updated before the |
318 | * previous frequency update and the time elapsed between the | |
319 | * last update of the CPU utilization and the last frequency | |
320 | * update is long enough, reset iowait_boost and util_cfs, as | |
321 | * they are now probably stale. However, still consider the | |
322 | * CPU contribution if it has some DEADLINE utilization | |
323 | * (util_dl). | |
9bdcb44e | 324 | */ |
d86ab9cf | 325 | delta_ns = time - j_sg_cpu->last_update; |
21ca6d2c RW |
326 | if (delta_ns > TICK_NSEC) { |
327 | j_sg_cpu->iowait_boost = 0; | |
a5a0809b | 328 | j_sg_cpu->iowait_boost_pending = false; |
0fa7d181 JL |
329 | j_sg_cpu->util_cfs = 0; |
330 | if (j_sg_cpu->util_dl == 0) | |
331 | continue; | |
21ca6d2c | 332 | } |
d4edd662 | 333 | if (j_sg_cpu->flags & SCHED_CPUFREQ_RT) |
cba1dfb5 | 334 | return policy->cpuinfo.max_freq; |
9bdcb44e RW |
335 | |
336 | j_max = j_sg_cpu->max; | |
d18be45d | 337 | j_util = sugov_aggregate_util(j_sg_cpu); |
9bdcb44e RW |
338 | if (j_util * max > j_max * util) { |
339 | util = j_util; | |
340 | max = j_max; | |
341 | } | |
21ca6d2c RW |
342 | |
343 | sugov_iowait_boost(j_sg_cpu, &util, &max); | |
9bdcb44e RW |
344 | } |
345 | ||
655cb1eb | 346 | return get_next_freq(sg_policy, util, max); |
9bdcb44e RW |
347 | } |
348 | ||
97fb7a0a IM |
349 | static void |
350 | sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) | |
9bdcb44e RW |
351 | { |
352 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | |
353 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | |
354 | unsigned int next_f; | |
355 | ||
356 | raw_spin_lock(&sg_policy->update_lock); | |
357 | ||
d18be45d | 358 | sugov_get_util(sg_cpu); |
58919e83 | 359 | sg_cpu->flags = flags; |
21ca6d2c | 360 | |
5083452f | 361 | sugov_set_iowait_boost(sg_cpu, time); |
9bdcb44e RW |
362 | sg_cpu->last_update = time; |
363 | ||
364 | if (sugov_should_update_freq(sg_policy, time)) { | |
d4edd662 | 365 | if (flags & SCHED_CPUFREQ_RT) |
cba1dfb5 VK |
366 | next_f = sg_policy->policy->cpuinfo.max_freq; |
367 | else | |
d86ab9cf | 368 | next_f = sugov_next_freq_shared(sg_cpu, time); |
cba1dfb5 | 369 | |
9bdcb44e RW |
370 | sugov_update_commit(sg_policy, time, next_f); |
371 | } | |
372 | ||
373 | raw_spin_unlock(&sg_policy->update_lock); | |
374 | } | |
375 | ||
02a7b1ee | 376 | static void sugov_work(struct kthread_work *work) |
9bdcb44e RW |
377 | { |
378 | struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); | |
379 | ||
380 | mutex_lock(&sg_policy->work_lock); | |
381 | __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, | |
382 | CPUFREQ_RELATION_L); | |
383 | mutex_unlock(&sg_policy->work_lock); | |
384 | ||
385 | sg_policy->work_in_progress = false; | |
386 | } | |
387 | ||
388 | static void sugov_irq_work(struct irq_work *irq_work) | |
389 | { | |
390 | struct sugov_policy *sg_policy; | |
391 | ||
392 | sg_policy = container_of(irq_work, struct sugov_policy, irq_work); | |
02a7b1ee VK |
393 | |
394 | /* | |
d4edd662 JL |
395 | * For RT tasks, the schedutil governor shoots the frequency to maximum. |
396 | * Special care must be taken to ensure that this kthread doesn't result | |
397 | * in the same behavior. | |
02a7b1ee VK |
398 | * |
399 | * This is (mostly) guaranteed by the work_in_progress flag. The flag is | |
d06e622d VK |
400 | * updated only at the end of the sugov_work() function and before that |
401 | * the schedutil governor rejects all other frequency scaling requests. | |
02a7b1ee | 402 | * |
d06e622d | 403 | * There is a very rare case though, where the RT thread yields right |
02a7b1ee VK |
404 | * after the work_in_progress flag is cleared. The effects of that are |
405 | * neglected for now. | |
406 | */ | |
407 | kthread_queue_work(&sg_policy->worker, &sg_policy->work); | |
9bdcb44e RW |
408 | } |
409 | ||
410 | /************************** sysfs interface ************************/ | |
411 | ||
412 | static struct sugov_tunables *global_tunables; | |
413 | static DEFINE_MUTEX(global_tunables_lock); | |
414 | ||
415 | static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) | |
416 | { | |
417 | return container_of(attr_set, struct sugov_tunables, attr_set); | |
418 | } | |
419 | ||
420 | static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) | |
421 | { | |
422 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | |
423 | ||
424 | return sprintf(buf, "%u\n", tunables->rate_limit_us); | |
425 | } | |
426 | ||
97fb7a0a IM |
427 | static ssize_t |
428 | rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count) | |
9bdcb44e RW |
429 | { |
430 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | |
431 | struct sugov_policy *sg_policy; | |
432 | unsigned int rate_limit_us; | |
433 | ||
434 | if (kstrtouint(buf, 10, &rate_limit_us)) | |
435 | return -EINVAL; | |
436 | ||
437 | tunables->rate_limit_us = rate_limit_us; | |
438 | ||
439 | list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) | |
440 | sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; | |
441 | ||
442 | return count; | |
443 | } | |
444 | ||
445 | static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); | |
446 | ||
447 | static struct attribute *sugov_attributes[] = { | |
448 | &rate_limit_us.attr, | |
449 | NULL | |
450 | }; | |
451 | ||
452 | static struct kobj_type sugov_tunables_ktype = { | |
453 | .default_attrs = sugov_attributes, | |
454 | .sysfs_ops = &governor_sysfs_ops, | |
455 | }; | |
456 | ||
457 | /********************** cpufreq governor interface *********************/ | |
458 | ||
459 | static struct cpufreq_governor schedutil_gov; | |
460 | ||
461 | static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) | |
462 | { | |
463 | struct sugov_policy *sg_policy; | |
464 | ||
465 | sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); | |
466 | if (!sg_policy) | |
467 | return NULL; | |
468 | ||
469 | sg_policy->policy = policy; | |
9bdcb44e RW |
470 | raw_spin_lock_init(&sg_policy->update_lock); |
471 | return sg_policy; | |
472 | } | |
473 | ||
474 | static void sugov_policy_free(struct sugov_policy *sg_policy) | |
475 | { | |
9bdcb44e RW |
476 | kfree(sg_policy); |
477 | } | |
478 | ||
02a7b1ee VK |
479 | static int sugov_kthread_create(struct sugov_policy *sg_policy) |
480 | { | |
481 | struct task_struct *thread; | |
794a56eb | 482 | struct sched_attr attr = { |
97fb7a0a IM |
483 | .size = sizeof(struct sched_attr), |
484 | .sched_policy = SCHED_DEADLINE, | |
485 | .sched_flags = SCHED_FLAG_SUGOV, | |
486 | .sched_nice = 0, | |
487 | .sched_priority = 0, | |
794a56eb JL |
488 | /* |
489 | * Fake (unused) bandwidth; workaround to "fix" | |
490 | * priority inheritance. | |
491 | */ | |
492 | .sched_runtime = 1000000, | |
493 | .sched_deadline = 10000000, | |
494 | .sched_period = 10000000, | |
495 | }; | |
02a7b1ee VK |
496 | struct cpufreq_policy *policy = sg_policy->policy; |
497 | int ret; | |
498 | ||
499 | /* kthread only required for slow path */ | |
500 | if (policy->fast_switch_enabled) | |
501 | return 0; | |
502 | ||
503 | kthread_init_work(&sg_policy->work, sugov_work); | |
504 | kthread_init_worker(&sg_policy->worker); | |
505 | thread = kthread_create(kthread_worker_fn, &sg_policy->worker, | |
506 | "sugov:%d", | |
507 | cpumask_first(policy->related_cpus)); | |
508 | if (IS_ERR(thread)) { | |
509 | pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread)); | |
510 | return PTR_ERR(thread); | |
511 | } | |
512 | ||
794a56eb | 513 | ret = sched_setattr_nocheck(thread, &attr); |
02a7b1ee VK |
514 | if (ret) { |
515 | kthread_stop(thread); | |
794a56eb | 516 | pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); |
02a7b1ee VK |
517 | return ret; |
518 | } | |
519 | ||
520 | sg_policy->thread = thread; | |
e2cabe48 VK |
521 | |
522 | /* Kthread is bound to all CPUs by default */ | |
523 | if (!policy->dvfs_possible_from_any_cpu) | |
524 | kthread_bind_mask(thread, policy->related_cpus); | |
525 | ||
21ef5729 VK |
526 | init_irq_work(&sg_policy->irq_work, sugov_irq_work); |
527 | mutex_init(&sg_policy->work_lock); | |
528 | ||
02a7b1ee VK |
529 | wake_up_process(thread); |
530 | ||
531 | return 0; | |
532 | } | |
533 | ||
534 | static void sugov_kthread_stop(struct sugov_policy *sg_policy) | |
535 | { | |
536 | /* kthread only required for slow path */ | |
537 | if (sg_policy->policy->fast_switch_enabled) | |
538 | return; | |
539 | ||
540 | kthread_flush_worker(&sg_policy->worker); | |
541 | kthread_stop(sg_policy->thread); | |
21ef5729 | 542 | mutex_destroy(&sg_policy->work_lock); |
02a7b1ee VK |
543 | } |
544 | ||
9bdcb44e RW |
545 | static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) |
546 | { | |
547 | struct sugov_tunables *tunables; | |
548 | ||
549 | tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); | |
550 | if (tunables) { | |
551 | gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); | |
552 | if (!have_governor_per_policy()) | |
553 | global_tunables = tunables; | |
554 | } | |
555 | return tunables; | |
556 | } | |
557 | ||
558 | static void sugov_tunables_free(struct sugov_tunables *tunables) | |
559 | { | |
560 | if (!have_governor_per_policy()) | |
561 | global_tunables = NULL; | |
562 | ||
563 | kfree(tunables); | |
564 | } | |
565 | ||
566 | static int sugov_init(struct cpufreq_policy *policy) | |
567 | { | |
568 | struct sugov_policy *sg_policy; | |
569 | struct sugov_tunables *tunables; | |
9bdcb44e RW |
570 | int ret = 0; |
571 | ||
572 | /* State should be equivalent to EXIT */ | |
573 | if (policy->governor_data) | |
574 | return -EBUSY; | |
575 | ||
4a71ce43 VK |
576 | cpufreq_enable_fast_switch(policy); |
577 | ||
9bdcb44e | 578 | sg_policy = sugov_policy_alloc(policy); |
4a71ce43 VK |
579 | if (!sg_policy) { |
580 | ret = -ENOMEM; | |
581 | goto disable_fast_switch; | |
582 | } | |
9bdcb44e | 583 | |
02a7b1ee VK |
584 | ret = sugov_kthread_create(sg_policy); |
585 | if (ret) | |
586 | goto free_sg_policy; | |
587 | ||
9bdcb44e RW |
588 | mutex_lock(&global_tunables_lock); |
589 | ||
590 | if (global_tunables) { | |
591 | if (WARN_ON(have_governor_per_policy())) { | |
592 | ret = -EINVAL; | |
02a7b1ee | 593 | goto stop_kthread; |
9bdcb44e RW |
594 | } |
595 | policy->governor_data = sg_policy; | |
596 | sg_policy->tunables = global_tunables; | |
597 | ||
598 | gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); | |
599 | goto out; | |
600 | } | |
601 | ||
602 | tunables = sugov_tunables_alloc(sg_policy); | |
603 | if (!tunables) { | |
604 | ret = -ENOMEM; | |
02a7b1ee | 605 | goto stop_kthread; |
9bdcb44e RW |
606 | } |
607 | ||
aa7519af | 608 | tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy); |
9bdcb44e RW |
609 | |
610 | policy->governor_data = sg_policy; | |
611 | sg_policy->tunables = tunables; | |
612 | ||
613 | ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, | |
614 | get_governor_parent_kobj(policy), "%s", | |
615 | schedutil_gov.name); | |
616 | if (ret) | |
617 | goto fail; | |
618 | ||
8e2ddb03 | 619 | out: |
9bdcb44e | 620 | mutex_unlock(&global_tunables_lock); |
9bdcb44e RW |
621 | return 0; |
622 | ||
8e2ddb03 | 623 | fail: |
9bdcb44e RW |
624 | policy->governor_data = NULL; |
625 | sugov_tunables_free(tunables); | |
626 | ||
02a7b1ee VK |
627 | stop_kthread: |
628 | sugov_kthread_stop(sg_policy); | |
629 | ||
8e2ddb03 | 630 | free_sg_policy: |
9bdcb44e RW |
631 | mutex_unlock(&global_tunables_lock); |
632 | ||
633 | sugov_policy_free(sg_policy); | |
4a71ce43 VK |
634 | |
635 | disable_fast_switch: | |
636 | cpufreq_disable_fast_switch(policy); | |
637 | ||
60f05e86 | 638 | pr_err("initialization failed (error %d)\n", ret); |
9bdcb44e RW |
639 | return ret; |
640 | } | |
641 | ||
e788892b | 642 | static void sugov_exit(struct cpufreq_policy *policy) |
9bdcb44e RW |
643 | { |
644 | struct sugov_policy *sg_policy = policy->governor_data; | |
645 | struct sugov_tunables *tunables = sg_policy->tunables; | |
646 | unsigned int count; | |
647 | ||
648 | mutex_lock(&global_tunables_lock); | |
649 | ||
650 | count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); | |
651 | policy->governor_data = NULL; | |
652 | if (!count) | |
653 | sugov_tunables_free(tunables); | |
654 | ||
655 | mutex_unlock(&global_tunables_lock); | |
656 | ||
02a7b1ee | 657 | sugov_kthread_stop(sg_policy); |
9bdcb44e | 658 | sugov_policy_free(sg_policy); |
4a71ce43 | 659 | cpufreq_disable_fast_switch(policy); |
9bdcb44e RW |
660 | } |
661 | ||
662 | static int sugov_start(struct cpufreq_policy *policy) | |
663 | { | |
664 | struct sugov_policy *sg_policy = policy->governor_data; | |
665 | unsigned int cpu; | |
666 | ||
97fb7a0a IM |
667 | sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; |
668 | sg_policy->last_freq_update_time = 0; | |
669 | sg_policy->next_freq = UINT_MAX; | |
670 | sg_policy->work_in_progress = false; | |
671 | sg_policy->need_freq_update = false; | |
672 | sg_policy->cached_raw_freq = 0; | |
9bdcb44e RW |
673 | |
674 | for_each_cpu(cpu, policy->cpus) { | |
675 | struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); | |
676 | ||
4296f23e | 677 | memset(sg_cpu, 0, sizeof(*sg_cpu)); |
97fb7a0a IM |
678 | sg_cpu->cpu = cpu; |
679 | sg_cpu->sg_policy = sg_policy; | |
680 | sg_cpu->flags = 0; | |
681 | sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; | |
ab2f7cf1 VM |
682 | } |
683 | ||
684 | for_each_cpu(cpu, policy->cpus) { | |
685 | struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); | |
686 | ||
4296f23e RW |
687 | cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, |
688 | policy_is_shared(policy) ? | |
689 | sugov_update_shared : | |
690 | sugov_update_single); | |
9bdcb44e RW |
691 | } |
692 | return 0; | |
693 | } | |
694 | ||
e788892b | 695 | static void sugov_stop(struct cpufreq_policy *policy) |
9bdcb44e RW |
696 | { |
697 | struct sugov_policy *sg_policy = policy->governor_data; | |
698 | unsigned int cpu; | |
699 | ||
700 | for_each_cpu(cpu, policy->cpus) | |
701 | cpufreq_remove_update_util_hook(cpu); | |
702 | ||
703 | synchronize_sched(); | |
704 | ||
21ef5729 VK |
705 | if (!policy->fast_switch_enabled) { |
706 | irq_work_sync(&sg_policy->irq_work); | |
707 | kthread_cancel_work_sync(&sg_policy->work); | |
708 | } | |
9bdcb44e RW |
709 | } |
710 | ||
e788892b | 711 | static void sugov_limits(struct cpufreq_policy *policy) |
9bdcb44e RW |
712 | { |
713 | struct sugov_policy *sg_policy = policy->governor_data; | |
714 | ||
715 | if (!policy->fast_switch_enabled) { | |
716 | mutex_lock(&sg_policy->work_lock); | |
bf2be2de | 717 | cpufreq_policy_apply_limits(policy); |
9bdcb44e RW |
718 | mutex_unlock(&sg_policy->work_lock); |
719 | } | |
720 | ||
721 | sg_policy->need_freq_update = true; | |
9bdcb44e RW |
722 | } |
723 | ||
724 | static struct cpufreq_governor schedutil_gov = { | |
97fb7a0a IM |
725 | .name = "schedutil", |
726 | .owner = THIS_MODULE, | |
727 | .dynamic_switching = true, | |
728 | .init = sugov_init, | |
729 | .exit = sugov_exit, | |
730 | .start = sugov_start, | |
731 | .stop = sugov_stop, | |
732 | .limits = sugov_limits, | |
9bdcb44e RW |
733 | }; |
734 | ||
9bdcb44e RW |
735 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL |
736 | struct cpufreq_governor *cpufreq_default_governor(void) | |
737 | { | |
738 | return &schedutil_gov; | |
739 | } | |
9bdcb44e | 740 | #endif |
58919e83 RW |
741 | |
742 | static int __init sugov_register(void) | |
743 | { | |
744 | return cpufreq_register_governor(&schedutil_gov); | |
745 | } | |
746 | fs_initcall(sugov_register); |