]>
Commit | Line | Data |
---|---|---|
9bdcb44e RW |
1 | /* |
2 | * CPUFreq governor based on scheduler-provided CPU utilization data. | |
3 | * | |
4 | * Copyright (C) 2016, Intel Corporation | |
5 | * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | */ | |
11 | ||
60f05e86 VK |
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
13 | ||
9bdcb44e RW |
14 | #include <linux/cpufreq.h> |
15 | #include <linux/module.h> | |
16 | #include <linux/slab.h> | |
17 | #include <trace/events/power.h> | |
18 | ||
19 | #include "sched.h" | |
20 | ||
21 | struct sugov_tunables { | |
22 | struct gov_attr_set attr_set; | |
23 | unsigned int rate_limit_us; | |
24 | }; | |
25 | ||
26 | struct sugov_policy { | |
27 | struct cpufreq_policy *policy; | |
28 | ||
29 | struct sugov_tunables *tunables; | |
30 | struct list_head tunables_hook; | |
31 | ||
32 | raw_spinlock_t update_lock; /* For shared policies */ | |
33 | u64 last_freq_update_time; | |
34 | s64 freq_update_delay_ns; | |
35 | unsigned int next_freq; | |
36 | ||
37 | /* The next fields are only needed if fast switch cannot be used. */ | |
38 | struct irq_work irq_work; | |
39 | struct work_struct work; | |
40 | struct mutex work_lock; | |
41 | bool work_in_progress; | |
42 | ||
43 | bool need_freq_update; | |
44 | }; | |
45 | ||
46 | struct sugov_cpu { | |
47 | struct update_util_data update_util; | |
48 | struct sugov_policy *sg_policy; | |
49 | ||
50 | /* The fields below are only needed when sharing a policy. */ | |
51 | unsigned long util; | |
52 | unsigned long max; | |
53 | u64 last_update; | |
54 | }; | |
55 | ||
56 | static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); | |
57 | ||
58 | /************************ Governor internals ***********************/ | |
59 | ||
60 | static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) | |
61 | { | |
62 | s64 delta_ns; | |
63 | ||
64 | if (sg_policy->work_in_progress) | |
65 | return false; | |
66 | ||
67 | if (unlikely(sg_policy->need_freq_update)) { | |
68 | sg_policy->need_freq_update = false; | |
69 | /* | |
70 | * This happens when limits change, so forget the previous | |
71 | * next_freq value and force an update. | |
72 | */ | |
73 | sg_policy->next_freq = UINT_MAX; | |
74 | return true; | |
75 | } | |
76 | ||
77 | delta_ns = time - sg_policy->last_freq_update_time; | |
78 | return delta_ns >= sg_policy->freq_update_delay_ns; | |
79 | } | |
80 | ||
81 | static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, | |
82 | unsigned int next_freq) | |
83 | { | |
84 | struct cpufreq_policy *policy = sg_policy->policy; | |
85 | ||
86 | sg_policy->last_freq_update_time = time; | |
87 | ||
88 | if (policy->fast_switch_enabled) { | |
89 | if (sg_policy->next_freq == next_freq) { | |
90 | trace_cpu_frequency(policy->cur, smp_processor_id()); | |
91 | return; | |
92 | } | |
93 | sg_policy->next_freq = next_freq; | |
94 | next_freq = cpufreq_driver_fast_switch(policy, next_freq); | |
95 | if (next_freq == CPUFREQ_ENTRY_INVALID) | |
96 | return; | |
97 | ||
98 | policy->cur = next_freq; | |
99 | trace_cpu_frequency(next_freq, smp_processor_id()); | |
100 | } else if (sg_policy->next_freq != next_freq) { | |
101 | sg_policy->next_freq = next_freq; | |
102 | sg_policy->work_in_progress = true; | |
103 | irq_work_queue(&sg_policy->irq_work); | |
104 | } | |
105 | } | |
106 | ||
107 | /** | |
108 | * get_next_freq - Compute a new frequency for a given cpufreq policy. | |
109 | * @policy: cpufreq policy object to compute the new frequency for. | |
110 | * @util: Current CPU utilization. | |
111 | * @max: CPU capacity. | |
112 | * | |
113 | * If the utilization is frequency-invariant, choose the new frequency to be | |
114 | * proportional to it, that is | |
115 | * | |
116 | * next_freq = C * max_freq * util / max | |
117 | * | |
118 | * Otherwise, approximate the would-be frequency-invariant utilization by | |
119 | * util_raw * (curr_freq / max_freq) which leads to | |
120 | * | |
121 | * next_freq = C * curr_freq * util_raw / max | |
122 | * | |
123 | * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. | |
124 | */ | |
125 | static unsigned int get_next_freq(struct cpufreq_policy *policy, | |
126 | unsigned long util, unsigned long max) | |
127 | { | |
128 | unsigned int freq = arch_scale_freq_invariant() ? | |
129 | policy->cpuinfo.max_freq : policy->cur; | |
130 | ||
131 | return (freq + (freq >> 2)) * util / max; | |
132 | } | |
133 | ||
134 | static void sugov_update_single(struct update_util_data *hook, u64 time, | |
135 | unsigned long util, unsigned long max) | |
136 | { | |
137 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | |
138 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | |
139 | struct cpufreq_policy *policy = sg_policy->policy; | |
140 | unsigned int next_f; | |
141 | ||
142 | if (!sugov_should_update_freq(sg_policy, time)) | |
143 | return; | |
144 | ||
145 | next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : | |
146 | get_next_freq(policy, util, max); | |
147 | sugov_update_commit(sg_policy, time, next_f); | |
148 | } | |
149 | ||
150 | static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy, | |
151 | unsigned long util, unsigned long max) | |
152 | { | |
153 | struct cpufreq_policy *policy = sg_policy->policy; | |
154 | unsigned int max_f = policy->cpuinfo.max_freq; | |
155 | u64 last_freq_update_time = sg_policy->last_freq_update_time; | |
156 | unsigned int j; | |
157 | ||
158 | if (util == ULONG_MAX) | |
159 | return max_f; | |
160 | ||
161 | for_each_cpu(j, policy->cpus) { | |
162 | struct sugov_cpu *j_sg_cpu; | |
163 | unsigned long j_util, j_max; | |
164 | s64 delta_ns; | |
165 | ||
166 | if (j == smp_processor_id()) | |
167 | continue; | |
168 | ||
169 | j_sg_cpu = &per_cpu(sugov_cpu, j); | |
170 | /* | |
171 | * If the CPU utilization was last updated before the previous | |
172 | * frequency update and the time elapsed between the last update | |
173 | * of the CPU utilization and the last frequency update is long | |
174 | * enough, don't take the CPU into account as it probably is | |
175 | * idle now. | |
176 | */ | |
177 | delta_ns = last_freq_update_time - j_sg_cpu->last_update; | |
178 | if (delta_ns > TICK_NSEC) | |
179 | continue; | |
180 | ||
181 | j_util = j_sg_cpu->util; | |
182 | if (j_util == ULONG_MAX) | |
183 | return max_f; | |
184 | ||
185 | j_max = j_sg_cpu->max; | |
186 | if (j_util * max > j_max * util) { | |
187 | util = j_util; | |
188 | max = j_max; | |
189 | } | |
190 | } | |
191 | ||
192 | return get_next_freq(policy, util, max); | |
193 | } | |
194 | ||
195 | static void sugov_update_shared(struct update_util_data *hook, u64 time, | |
196 | unsigned long util, unsigned long max) | |
197 | { | |
198 | struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); | |
199 | struct sugov_policy *sg_policy = sg_cpu->sg_policy; | |
200 | unsigned int next_f; | |
201 | ||
202 | raw_spin_lock(&sg_policy->update_lock); | |
203 | ||
204 | sg_cpu->util = util; | |
205 | sg_cpu->max = max; | |
206 | sg_cpu->last_update = time; | |
207 | ||
208 | if (sugov_should_update_freq(sg_policy, time)) { | |
209 | next_f = sugov_next_freq_shared(sg_policy, util, max); | |
210 | sugov_update_commit(sg_policy, time, next_f); | |
211 | } | |
212 | ||
213 | raw_spin_unlock(&sg_policy->update_lock); | |
214 | } | |
215 | ||
216 | static void sugov_work(struct work_struct *work) | |
217 | { | |
218 | struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); | |
219 | ||
220 | mutex_lock(&sg_policy->work_lock); | |
221 | __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq, | |
222 | CPUFREQ_RELATION_L); | |
223 | mutex_unlock(&sg_policy->work_lock); | |
224 | ||
225 | sg_policy->work_in_progress = false; | |
226 | } | |
227 | ||
228 | static void sugov_irq_work(struct irq_work *irq_work) | |
229 | { | |
230 | struct sugov_policy *sg_policy; | |
231 | ||
232 | sg_policy = container_of(irq_work, struct sugov_policy, irq_work); | |
233 | schedule_work_on(smp_processor_id(), &sg_policy->work); | |
234 | } | |
235 | ||
236 | /************************** sysfs interface ************************/ | |
237 | ||
238 | static struct sugov_tunables *global_tunables; | |
239 | static DEFINE_MUTEX(global_tunables_lock); | |
240 | ||
241 | static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) | |
242 | { | |
243 | return container_of(attr_set, struct sugov_tunables, attr_set); | |
244 | } | |
245 | ||
246 | static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) | |
247 | { | |
248 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | |
249 | ||
250 | return sprintf(buf, "%u\n", tunables->rate_limit_us); | |
251 | } | |
252 | ||
253 | static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, | |
254 | size_t count) | |
255 | { | |
256 | struct sugov_tunables *tunables = to_sugov_tunables(attr_set); | |
257 | struct sugov_policy *sg_policy; | |
258 | unsigned int rate_limit_us; | |
259 | ||
260 | if (kstrtouint(buf, 10, &rate_limit_us)) | |
261 | return -EINVAL; | |
262 | ||
263 | tunables->rate_limit_us = rate_limit_us; | |
264 | ||
265 | list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) | |
266 | sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; | |
267 | ||
268 | return count; | |
269 | } | |
270 | ||
271 | static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); | |
272 | ||
273 | static struct attribute *sugov_attributes[] = { | |
274 | &rate_limit_us.attr, | |
275 | NULL | |
276 | }; | |
277 | ||
278 | static struct kobj_type sugov_tunables_ktype = { | |
279 | .default_attrs = sugov_attributes, | |
280 | .sysfs_ops = &governor_sysfs_ops, | |
281 | }; | |
282 | ||
283 | /********************** cpufreq governor interface *********************/ | |
284 | ||
285 | static struct cpufreq_governor schedutil_gov; | |
286 | ||
287 | static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) | |
288 | { | |
289 | struct sugov_policy *sg_policy; | |
290 | ||
291 | sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); | |
292 | if (!sg_policy) | |
293 | return NULL; | |
294 | ||
295 | sg_policy->policy = policy; | |
296 | init_irq_work(&sg_policy->irq_work, sugov_irq_work); | |
297 | INIT_WORK(&sg_policy->work, sugov_work); | |
298 | mutex_init(&sg_policy->work_lock); | |
299 | raw_spin_lock_init(&sg_policy->update_lock); | |
300 | return sg_policy; | |
301 | } | |
302 | ||
303 | static void sugov_policy_free(struct sugov_policy *sg_policy) | |
304 | { | |
305 | mutex_destroy(&sg_policy->work_lock); | |
306 | kfree(sg_policy); | |
307 | } | |
308 | ||
309 | static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) | |
310 | { | |
311 | struct sugov_tunables *tunables; | |
312 | ||
313 | tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); | |
314 | if (tunables) { | |
315 | gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); | |
316 | if (!have_governor_per_policy()) | |
317 | global_tunables = tunables; | |
318 | } | |
319 | return tunables; | |
320 | } | |
321 | ||
322 | static void sugov_tunables_free(struct sugov_tunables *tunables) | |
323 | { | |
324 | if (!have_governor_per_policy()) | |
325 | global_tunables = NULL; | |
326 | ||
327 | kfree(tunables); | |
328 | } | |
329 | ||
330 | static int sugov_init(struct cpufreq_policy *policy) | |
331 | { | |
332 | struct sugov_policy *sg_policy; | |
333 | struct sugov_tunables *tunables; | |
334 | unsigned int lat; | |
335 | int ret = 0; | |
336 | ||
337 | /* State should be equivalent to EXIT */ | |
338 | if (policy->governor_data) | |
339 | return -EBUSY; | |
340 | ||
341 | sg_policy = sugov_policy_alloc(policy); | |
342 | if (!sg_policy) | |
343 | return -ENOMEM; | |
344 | ||
345 | mutex_lock(&global_tunables_lock); | |
346 | ||
347 | if (global_tunables) { | |
348 | if (WARN_ON(have_governor_per_policy())) { | |
349 | ret = -EINVAL; | |
350 | goto free_sg_policy; | |
351 | } | |
352 | policy->governor_data = sg_policy; | |
353 | sg_policy->tunables = global_tunables; | |
354 | ||
355 | gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); | |
356 | goto out; | |
357 | } | |
358 | ||
359 | tunables = sugov_tunables_alloc(sg_policy); | |
360 | if (!tunables) { | |
361 | ret = -ENOMEM; | |
362 | goto free_sg_policy; | |
363 | } | |
364 | ||
365 | tunables->rate_limit_us = LATENCY_MULTIPLIER; | |
366 | lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; | |
367 | if (lat) | |
368 | tunables->rate_limit_us *= lat; | |
369 | ||
370 | policy->governor_data = sg_policy; | |
371 | sg_policy->tunables = tunables; | |
372 | ||
373 | ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, | |
374 | get_governor_parent_kobj(policy), "%s", | |
375 | schedutil_gov.name); | |
376 | if (ret) | |
377 | goto fail; | |
378 | ||
379 | out: | |
380 | mutex_unlock(&global_tunables_lock); | |
381 | ||
382 | cpufreq_enable_fast_switch(policy); | |
383 | return 0; | |
384 | ||
385 | fail: | |
386 | policy->governor_data = NULL; | |
387 | sugov_tunables_free(tunables); | |
388 | ||
389 | free_sg_policy: | |
390 | mutex_unlock(&global_tunables_lock); | |
391 | ||
392 | sugov_policy_free(sg_policy); | |
60f05e86 | 393 | pr_err("initialization failed (error %d)\n", ret); |
9bdcb44e RW |
394 | return ret; |
395 | } | |
396 | ||
397 | static int sugov_exit(struct cpufreq_policy *policy) | |
398 | { | |
399 | struct sugov_policy *sg_policy = policy->governor_data; | |
400 | struct sugov_tunables *tunables = sg_policy->tunables; | |
401 | unsigned int count; | |
402 | ||
6c9d9c81 RW |
403 | cpufreq_disable_fast_switch(policy); |
404 | ||
9bdcb44e RW |
405 | mutex_lock(&global_tunables_lock); |
406 | ||
407 | count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); | |
408 | policy->governor_data = NULL; | |
409 | if (!count) | |
410 | sugov_tunables_free(tunables); | |
411 | ||
412 | mutex_unlock(&global_tunables_lock); | |
413 | ||
414 | sugov_policy_free(sg_policy); | |
415 | return 0; | |
416 | } | |
417 | ||
418 | static int sugov_start(struct cpufreq_policy *policy) | |
419 | { | |
420 | struct sugov_policy *sg_policy = policy->governor_data; | |
421 | unsigned int cpu; | |
422 | ||
423 | sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; | |
424 | sg_policy->last_freq_update_time = 0; | |
425 | sg_policy->next_freq = UINT_MAX; | |
426 | sg_policy->work_in_progress = false; | |
427 | sg_policy->need_freq_update = false; | |
428 | ||
429 | for_each_cpu(cpu, policy->cpus) { | |
430 | struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); | |
431 | ||
432 | sg_cpu->sg_policy = sg_policy; | |
433 | if (policy_is_shared(policy)) { | |
434 | sg_cpu->util = ULONG_MAX; | |
435 | sg_cpu->max = 0; | |
436 | sg_cpu->last_update = 0; | |
437 | cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, | |
438 | sugov_update_shared); | |
439 | } else { | |
440 | cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, | |
441 | sugov_update_single); | |
442 | } | |
443 | } | |
444 | return 0; | |
445 | } | |
446 | ||
447 | static int sugov_stop(struct cpufreq_policy *policy) | |
448 | { | |
449 | struct sugov_policy *sg_policy = policy->governor_data; | |
450 | unsigned int cpu; | |
451 | ||
452 | for_each_cpu(cpu, policy->cpus) | |
453 | cpufreq_remove_update_util_hook(cpu); | |
454 | ||
455 | synchronize_sched(); | |
456 | ||
457 | irq_work_sync(&sg_policy->irq_work); | |
458 | cancel_work_sync(&sg_policy->work); | |
459 | return 0; | |
460 | } | |
461 | ||
462 | static int sugov_limits(struct cpufreq_policy *policy) | |
463 | { | |
464 | struct sugov_policy *sg_policy = policy->governor_data; | |
465 | ||
466 | if (!policy->fast_switch_enabled) { | |
467 | mutex_lock(&sg_policy->work_lock); | |
468 | ||
469 | if (policy->max < policy->cur) | |
470 | __cpufreq_driver_target(policy, policy->max, | |
471 | CPUFREQ_RELATION_H); | |
472 | else if (policy->min > policy->cur) | |
473 | __cpufreq_driver_target(policy, policy->min, | |
474 | CPUFREQ_RELATION_L); | |
475 | ||
476 | mutex_unlock(&sg_policy->work_lock); | |
477 | } | |
478 | ||
479 | sg_policy->need_freq_update = true; | |
480 | return 0; | |
481 | } | |
482 | ||
483 | int sugov_governor(struct cpufreq_policy *policy, unsigned int event) | |
484 | { | |
485 | if (event == CPUFREQ_GOV_POLICY_INIT) { | |
486 | return sugov_init(policy); | |
487 | } else if (policy->governor_data) { | |
488 | switch (event) { | |
489 | case CPUFREQ_GOV_POLICY_EXIT: | |
490 | return sugov_exit(policy); | |
491 | case CPUFREQ_GOV_START: | |
492 | return sugov_start(policy); | |
493 | case CPUFREQ_GOV_STOP: | |
494 | return sugov_stop(policy); | |
495 | case CPUFREQ_GOV_LIMITS: | |
496 | return sugov_limits(policy); | |
497 | } | |
498 | } | |
499 | return -EINVAL; | |
500 | } | |
501 | ||
502 | static struct cpufreq_governor schedutil_gov = { | |
503 | .name = "schedutil", | |
504 | .governor = sugov_governor, | |
505 | .owner = THIS_MODULE, | |
506 | }; | |
507 | ||
508 | static int __init sugov_module_init(void) | |
509 | { | |
510 | return cpufreq_register_governor(&schedutil_gov); | |
511 | } | |
512 | ||
513 | static void __exit sugov_module_exit(void) | |
514 | { | |
515 | cpufreq_unregister_governor(&schedutil_gov); | |
516 | } | |
517 | ||
518 | MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>"); | |
519 | MODULE_DESCRIPTION("Utilization-based CPU frequency selection"); | |
520 | MODULE_LICENSE("GPL"); | |
521 | ||
522 | #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL | |
523 | struct cpufreq_governor *cpufreq_default_governor(void) | |
524 | { | |
525 | return &schedutil_gov; | |
526 | } | |
527 | ||
528 | fs_initcall(sugov_module_init); | |
529 | #else | |
530 | module_init(sugov_module_init); | |
531 | #endif | |
532 | module_exit(sugov_module_exit); |