]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - drivers/thermal/intel_powerclamp.c
BCM270X: Enable the DSI panel node in the VC4 overlay.
[mirror_ubuntu-zesty-kernel.git] / drivers / thermal / intel_powerclamp.c
CommitLineData
d6d71ee4
JP
1/*
2 * intel_powerclamp.c - package c-state idle injection
3 *
4 * Copyright (c) 2012, Intel Corporation.
5 *
6 * Authors:
7 * Arjan van de Ven <arjan@linux.intel.com>
8 * Jacob Pan <jacob.jun.pan@linux.intel.com>
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms and conditions of the GNU General Public License,
12 * version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
22 *
23 *
24 * TODO:
25 * 1. better handle wakeup from external interrupts, currently a fixed
26 * compensation is added to clamping duration when excessive amount
27 * of wakeups are observed during idle time. the reason is that in
28 * case of external interrupts without need for ack, clamping down
29 * cpu in non-irq context does not reduce irq. for majority of the
30 * cases, clamping down cpu does help reduce irq as well, we should
31 * be able to differenciate the two cases and give a quantitative
32 * solution for the irqs that we can control. perhaps based on
33 * get_cpu_iowait_time_us()
34 *
35 * 2. synchronization with other hw blocks
36 *
37 *
38 */
39
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
41
42#include <linux/module.h>
43#include <linux/kernel.h>
44#include <linux/delay.h>
45#include <linux/kthread.h>
d6d71ee4
JP
46#include <linux/cpu.h>
47#include <linux/thermal.h>
48#include <linux/slab.h>
49#include <linux/tick.h>
50#include <linux/debugfs.h>
51#include <linux/seq_file.h>
19cc90f5 52#include <linux/sched/rt.h>
d6d71ee4
JP
53
54#include <asm/nmi.h>
55#include <asm/msr.h>
56#include <asm/mwait.h>
57#include <asm/cpu_device_id.h>
d6d71ee4
JP
58#include <asm/hardirq.h>
59
60#define MAX_TARGET_RATIO (50U)
61/* For each undisturbed clamping period (no extra wake ups during idle time),
62 * we increment the confidence counter for the given target ratio.
63 * CONFIDENCE_OK defines the level where runtime calibration results are
64 * valid.
65 */
66#define CONFIDENCE_OK (3)
67/* Default idle injection duration, driver adjust sleep time to meet target
68 * idle ratio. Similar to frequency modulation.
69 */
70#define DEFAULT_DURATION_JIFFIES (6)
71
72static unsigned int target_mwait;
73static struct dentry *debug_dir;
74
75/* user selected target */
76static unsigned int set_target_ratio;
77static unsigned int current_ratio;
78static bool should_skip;
79static bool reduce_irq;
80static atomic_t idle_wakeup_counter;
81static unsigned int control_cpu; /* The cpu assigned to collect stat and update
82 * control parameters. default to BSP but BSP
83 * can be offlined.
84 */
85static bool clamping;
86
8d962ac7
PM
87static const struct sched_param sparam = {
88 .sched_priority = MAX_USER_RT_PRIO / 2,
89};
90struct powerclamp_worker_data {
91 struct kthread_worker *worker;
92 struct kthread_work balancing_work;
93 struct kthread_delayed_work idle_injection_work;
8d962ac7
PM
94 unsigned int cpu;
95 unsigned int count;
96 unsigned int guard;
97 unsigned int window_size_now;
98 unsigned int target_ratio;
99 unsigned int duration_jiffies;
100 bool clamping;
101};
d6d71ee4 102
8d962ac7 103static struct powerclamp_worker_data * __percpu worker_data;
d6d71ee4
JP
104static struct thermal_cooling_device *cooling_dev;
105static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu
8d962ac7 106 * clamping kthread worker
d6d71ee4
JP
107 */
108
109static unsigned int duration;
110static unsigned int pkg_cstate_ratio_cur;
111static unsigned int window_size;
112
113static int duration_set(const char *arg, const struct kernel_param *kp)
114{
115 int ret = 0;
116 unsigned long new_duration;
117
118 ret = kstrtoul(arg, 10, &new_duration);
119 if (ret)
120 goto exit;
121 if (new_duration > 25 || new_duration < 6) {
122 pr_err("Out of recommended range %lu, between 6-25ms\n",
123 new_duration);
124 ret = -EINVAL;
125 }
126
127 duration = clamp(new_duration, 6ul, 25ul);
128 smp_mb();
129
130exit:
131
132 return ret;
133}
134
9c27847d 135static const struct kernel_param_ops duration_ops = {
d6d71ee4
JP
136 .set = duration_set,
137 .get = param_get_int,
138};
139
140
141module_param_cb(duration, &duration_ops, &duration, 0644);
142MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
143
144struct powerclamp_calibration_data {
145 unsigned long confidence; /* used for calibration, basically a counter
146 * gets incremented each time a clamping
147 * period is completed without extra wakeups
148 * once that counter is reached given level,
149 * compensation is deemed usable.
150 */
151 unsigned long steady_comp; /* steady state compensation used when
152 * no extra wakeups occurred.
153 */
154 unsigned long dynamic_comp; /* compensate excessive wakeup from idle
155 * mostly from external interrupts.
156 */
157};
158
159static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
160
161static int window_size_set(const char *arg, const struct kernel_param *kp)
162{
163 int ret = 0;
164 unsigned long new_window_size;
165
166 ret = kstrtoul(arg, 10, &new_window_size);
167 if (ret)
168 goto exit_win;
169 if (new_window_size > 10 || new_window_size < 2) {
170 pr_err("Out of recommended window size %lu, between 2-10\n",
171 new_window_size);
172 ret = -EINVAL;
173 }
174
175 window_size = clamp(new_window_size, 2ul, 10ul);
176 smp_mb();
177
178exit_win:
179
180 return ret;
181}
182
9c27847d 183static const struct kernel_param_ops window_size_ops = {
d6d71ee4
JP
184 .set = window_size_set,
185 .get = param_get_int,
186};
187
188module_param_cb(window_size, &window_size_ops, &window_size, 0644);
189MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
190 "\tpowerclamp controls idle ratio within this window. larger\n"
191 "\twindow size results in slower response time but more smooth\n"
192 "\tclamping results. default to 2.");
193
194static void find_target_mwait(void)
195{
196 unsigned int eax, ebx, ecx, edx;
197 unsigned int highest_cstate = 0;
198 unsigned int highest_subcstate = 0;
199 int i;
200
201 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
202 return;
203
204 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
205
206 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
207 !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
208 return;
209
210 edx >>= MWAIT_SUBSTATE_SIZE;
211 for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
212 if (edx & MWAIT_SUBSTATE_MASK) {
213 highest_cstate = i;
214 highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
215 }
216 }
217 target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
218 (highest_subcstate - 1);
219
220}
221
d8186113
JP
222struct pkg_cstate_info {
223 bool skip;
224 int msr_index;
225 int cstate_id;
226};
227
228#define PKG_CSTATE_INIT(id) { \
229 .msr_index = MSR_PKG_C##id##_RESIDENCY, \
230 .cstate_id = id \
231 }
232
233static struct pkg_cstate_info pkg_cstates[] = {
234 PKG_CSTATE_INIT(2),
235 PKG_CSTATE_INIT(3),
236 PKG_CSTATE_INIT(6),
237 PKG_CSTATE_INIT(7),
238 PKG_CSTATE_INIT(8),
239 PKG_CSTATE_INIT(9),
240 PKG_CSTATE_INIT(10),
241 {NULL},
242};
243
7734e3ac
YS
244static bool has_pkg_state_counter(void)
245{
d8186113
JP
246 u64 val;
247 struct pkg_cstate_info *info = pkg_cstates;
248
249 /* check if any one of the counter msrs exists */
250 while (info->msr_index) {
251 if (!rdmsrl_safe(info->msr_index, &val))
252 return true;
253 info++;
254 }
255
256 return false;
7734e3ac
YS
257}
258
d6d71ee4
JP
259static u64 pkg_state_counter(void)
260{
261 u64 val;
262 u64 count = 0;
d8186113
JP
263 struct pkg_cstate_info *info = pkg_cstates;
264
265 while (info->msr_index) {
266 if (!info->skip) {
267 if (!rdmsrl_safe(info->msr_index, &val))
268 count += val;
269 else
270 info->skip = true;
271 }
272 info++;
d6d71ee4
JP
273 }
274
275 return count;
276}
277
d6d71ee4
JP
278static unsigned int get_compensation(int ratio)
279{
280 unsigned int comp = 0;
281
282 /* we only use compensation if all adjacent ones are good */
283 if (ratio == 1 &&
284 cal_data[ratio].confidence >= CONFIDENCE_OK &&
285 cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
286 cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
287 comp = (cal_data[ratio].steady_comp +
288 cal_data[ratio + 1].steady_comp +
289 cal_data[ratio + 2].steady_comp) / 3;
290 } else if (ratio == MAX_TARGET_RATIO - 1 &&
291 cal_data[ratio].confidence >= CONFIDENCE_OK &&
292 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
293 cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
294 comp = (cal_data[ratio].steady_comp +
295 cal_data[ratio - 1].steady_comp +
296 cal_data[ratio - 2].steady_comp) / 3;
297 } else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
298 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
299 cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
300 comp = (cal_data[ratio].steady_comp +
301 cal_data[ratio - 1].steady_comp +
302 cal_data[ratio + 1].steady_comp) / 3;
303 }
304
305 /* REVISIT: simple penalty of double idle injection */
306 if (reduce_irq)
307 comp = ratio;
308 /* do not exceed limit */
309 if (comp + ratio >= MAX_TARGET_RATIO)
310 comp = MAX_TARGET_RATIO - ratio - 1;
311
312 return comp;
313}
314
315static void adjust_compensation(int target_ratio, unsigned int win)
316{
317 int delta;
318 struct powerclamp_calibration_data *d = &cal_data[target_ratio];
319
320 /*
321 * adjust compensations if confidence level has not been reached or
322 * there are too many wakeups during the last idle injection period, we
323 * cannot trust the data for compensation.
324 */
325 if (d->confidence >= CONFIDENCE_OK ||
326 atomic_read(&idle_wakeup_counter) >
327 win * num_online_cpus())
328 return;
329
330 delta = set_target_ratio - current_ratio;
331 /* filter out bad data */
332 if (delta >= 0 && delta <= (1+target_ratio/10)) {
333 if (d->steady_comp)
334 d->steady_comp =
335 roundup(delta+d->steady_comp, 2)/2;
336 else
337 d->steady_comp = delta;
338 d->confidence++;
339 }
340}
341
342static bool powerclamp_adjust_controls(unsigned int target_ratio,
343 unsigned int guard, unsigned int win)
344{
345 static u64 msr_last, tsc_last;
346 u64 msr_now, tsc_now;
347 u64 val64;
348
349 /* check result for the last window */
350 msr_now = pkg_state_counter();
4ea1636b 351 tsc_now = rdtsc();
d6d71ee4
JP
352
353 /* calculate pkg cstate vs tsc ratio */
354 if (!msr_last || !tsc_last)
355 current_ratio = 1;
356 else if (tsc_now-tsc_last) {
357 val64 = 100*(msr_now-msr_last);
358 do_div(val64, (tsc_now-tsc_last));
359 current_ratio = val64;
360 }
361
362 /* update record */
363 msr_last = msr_now;
364 tsc_last = tsc_now;
365
366 adjust_compensation(target_ratio, win);
367 /*
368 * too many external interrupts, set flag such
369 * that we can take measure later.
370 */
371 reduce_irq = atomic_read(&idle_wakeup_counter) >=
372 2 * win * num_online_cpus();
373
374 atomic_set(&idle_wakeup_counter, 0);
375 /* if we are above target+guard, skip */
376 return set_target_ratio + guard <= current_ratio;
377}
378
8d962ac7 379static void clamp_balancing_func(struct kthread_work *work)
d6d71ee4 380{
8d962ac7
PM
381 struct powerclamp_worker_data *w_data;
382 int sleeptime;
383 unsigned long target_jiffies;
384 unsigned int compensated_ratio;
385 int interval; /* jiffies to sleep for each attempt */
d6d71ee4 386
8d962ac7
PM
387 w_data = container_of(work, struct powerclamp_worker_data,
388 balancing_work);
d6d71ee4 389
8d962ac7
PM
390 /*
391 * make sure user selected ratio does not take effect until
392 * the next round. adjust target_ratio if user has changed
393 * target such that we can converge quickly.
394 */
395 w_data->target_ratio = READ_ONCE(set_target_ratio);
396 w_data->guard = 1 + w_data->target_ratio / 20;
397 w_data->window_size_now = window_size;
398 w_data->duration_jiffies = msecs_to_jiffies(duration);
399 w_data->count++;
400
401 /*
402 * systems may have different ability to enter package level
403 * c-states, thus we need to compensate the injected idle ratio
404 * to achieve the actual target reported by the HW.
405 */
406 compensated_ratio = w_data->target_ratio +
407 get_compensation(w_data->target_ratio);
408 if (compensated_ratio <= 0)
409 compensated_ratio = 1;
410 interval = w_data->duration_jiffies * 100 / compensated_ratio;
411
412 /* align idle time */
413 target_jiffies = roundup(jiffies, interval);
414 sleeptime = target_jiffies - jiffies;
415 if (sleeptime <= 0)
416 sleeptime = 1;
417
418 if (clamping && w_data->clamping && cpu_online(w_data->cpu))
419 kthread_queue_delayed_work(w_data->worker,
420 &w_data->idle_injection_work,
421 sleeptime);
422}
423
424static void clamp_idle_injection_func(struct kthread_work *work)
425{
426 struct powerclamp_worker_data *w_data;
8d962ac7
PM
427
428 w_data = container_of(work, struct powerclamp_worker_data,
429 idle_injection_work.work);
430
431 /*
432 * only elected controlling cpu can collect stats and update
433 * control parameters.
434 */
435 if (w_data->cpu == control_cpu &&
436 !(w_data->count % w_data->window_size_now)) {
437 should_skip =
438 powerclamp_adjust_controls(w_data->target_ratio,
439 w_data->guard,
440 w_data->window_size_now);
441 smp_mb();
d6d71ee4 442 }
d6d71ee4 443
8d962ac7
PM
444 if (should_skip)
445 goto balance;
446
feb6cd6a 447 play_idle(jiffies_to_msecs(w_data->duration_jiffies));
d6d71ee4 448
8d962ac7
PM
449balance:
450 if (clamping && w_data->clamping && cpu_online(w_data->cpu))
451 kthread_queue_work(w_data->worker, &w_data->balancing_work);
d6d71ee4
JP
452}
453
454/*
455 * 1 HZ polling while clamping is active, useful for userspace
456 * to monitor actual idle ratio.
457 */
458static void poll_pkg_cstate(struct work_struct *dummy);
459static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
460static void poll_pkg_cstate(struct work_struct *dummy)
461{
462 static u64 msr_last;
463 static u64 tsc_last;
464 static unsigned long jiffies_last;
465
466 u64 msr_now;
467 unsigned long jiffies_now;
468 u64 tsc_now;
469 u64 val64;
470
471 msr_now = pkg_state_counter();
4ea1636b 472 tsc_now = rdtsc();
d6d71ee4
JP
473 jiffies_now = jiffies;
474
475 /* calculate pkg cstate vs tsc ratio */
476 if (!msr_last || !tsc_last)
477 pkg_cstate_ratio_cur = 1;
478 else {
479 if (tsc_now - tsc_last) {
480 val64 = 100 * (msr_now - msr_last);
481 do_div(val64, (tsc_now - tsc_last));
482 pkg_cstate_ratio_cur = val64;
483 }
484 }
485
486 /* update record */
487 msr_last = msr_now;
488 jiffies_last = jiffies_now;
489 tsc_last = tsc_now;
490
491 if (true == clamping)
492 schedule_delayed_work(&poll_pkg_cstate_work, HZ);
493}
494
8d962ac7 495static void start_power_clamp_worker(unsigned long cpu)
14f3f7d8 496{
8d962ac7
PM
497 struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
498 struct kthread_worker *worker;
499
cb91fef1 500 worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inject/%ld", cpu);
8d962ac7 501 if (IS_ERR(worker))
14f3f7d8
PM
502 return;
503
8d962ac7
PM
504 w_data->worker = worker;
505 w_data->count = 0;
506 w_data->cpu = cpu;
507 w_data->clamping = true;
508 set_bit(cpu, cpu_clamping_mask);
8d962ac7
PM
509 sched_setscheduler(worker->task, SCHED_FIFO, &sparam);
510 kthread_init_work(&w_data->balancing_work, clamp_balancing_func);
511 kthread_init_delayed_work(&w_data->idle_injection_work,
512 clamp_idle_injection_func);
513 kthread_queue_work(w_data->worker, &w_data->balancing_work);
514}
515
516static void stop_power_clamp_worker(unsigned long cpu)
517{
518 struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
519
520 if (!w_data->worker)
521 return;
522
523 w_data->clamping = false;
524 /*
525 * Make sure that all works that get queued after this point see
526 * the clamping disabled. The counter part is not needed because
527 * there is an implicit memory barrier when the queued work
528 * is proceed.
529 */
530 smp_wmb();
531 kthread_cancel_work_sync(&w_data->balancing_work);
532 kthread_cancel_delayed_work_sync(&w_data->idle_injection_work);
533 /*
534 * The balancing work still might be queued here because
535 * the handling of the "clapming" variable, cancel, and queue
536 * operations are not synchronized via a lock. But it is not
537 * a big deal. The balancing work is fast and destroy kthread
538 * will wait for it.
539 */
8d962ac7
PM
540 clear_bit(w_data->cpu, cpu_clamping_mask);
541 kthread_destroy_worker(w_data->worker);
542
543 w_data->worker = NULL;
14f3f7d8
PM
544}
545
d6d71ee4
JP
546static int start_power_clamp(void)
547{
548 unsigned long cpu;
d6d71ee4 549
c8165dc0 550 set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
d6d71ee4
JP
551 /* prevent cpu hotplug */
552 get_online_cpus();
553
554 /* prefer BSP */
555 control_cpu = 0;
556 if (!cpu_online(control_cpu))
557 control_cpu = smp_processor_id();
558
559 clamping = true;
560 schedule_delayed_work(&poll_pkg_cstate_work, 0);
561
8d962ac7 562 /* start one kthread worker per online cpu */
d6d71ee4 563 for_each_online_cpu(cpu) {
8d962ac7 564 start_power_clamp_worker(cpu);
d6d71ee4
JP
565 }
566 put_online_cpus();
567
568 return 0;
569}
570
571static void end_power_clamp(void)
572{
573 int i;
d6d71ee4 574
d6d71ee4 575 /*
8d962ac7
PM
576 * Block requeuing in all the kthread workers. They will flush and
577 * stop faster.
d6d71ee4 578 */
8d962ac7 579 clamping = false;
d6d71ee4
JP
580 if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
581 for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
8d962ac7
PM
582 pr_debug("clamping worker for cpu %d alive, destroy\n",
583 i);
584 stop_power_clamp_worker(i);
d6d71ee4
JP
585 }
586 }
587}
588
cb91fef1 589static int powerclamp_cpu_online(unsigned int cpu)
d6d71ee4 590{
cb91fef1
SAS
591 if (clamping == false)
592 return 0;
593 start_power_clamp_worker(cpu);
594 /* prefer BSP as controlling CPU */
595 if (cpu == 0) {
596 control_cpu = 0;
597 smp_mb();
d6d71ee4 598 }
cb91fef1 599 return 0;
d6d71ee4
JP
600}
601
cb91fef1
SAS
602static int powerclamp_cpu_predown(unsigned int cpu)
603{
604 if (clamping == false)
605 return 0;
d6d71ee4 606
cb91fef1
SAS
607 stop_power_clamp_worker(cpu);
608 if (cpu != control_cpu)
609 return 0;
d6d71ee4 610
cb91fef1
SAS
611 control_cpu = cpumask_first(cpu_online_mask);
612 if (control_cpu == cpu)
613 control_cpu = cpumask_next(cpu, cpu_online_mask);
614 smp_mb();
615 return 0;
d6d71ee4 616}
d6d71ee4
JP
617
618static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
619 unsigned long *state)
620{
621 *state = MAX_TARGET_RATIO;
622
623 return 0;
624}
625
626static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
627 unsigned long *state)
628{
629 if (true == clamping)
630 *state = pkg_cstate_ratio_cur;
631 else
632 /* to save power, do not poll idle ratio while not clamping */
633 *state = -1; /* indicates invalid state */
634
635 return 0;
636}
637
638static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
639 unsigned long new_target_ratio)
640{
641 int ret = 0;
642
643 new_target_ratio = clamp(new_target_ratio, 0UL,
644 (unsigned long) (MAX_TARGET_RATIO-1));
645 if (set_target_ratio == 0 && new_target_ratio > 0) {
646 pr_info("Start idle injection to reduce power\n");
647 set_target_ratio = new_target_ratio;
648 ret = start_power_clamp();
649 goto exit_set;
650 } else if (set_target_ratio > 0 && new_target_ratio == 0) {
651 pr_info("Stop forced idle injection\n");
d6d71ee4 652 end_power_clamp();
70c50ee7 653 set_target_ratio = 0;
d6d71ee4
JP
654 } else /* adjust currently running */ {
655 set_target_ratio = new_target_ratio;
656 /* make new set_target_ratio visible to other cpus */
657 smp_mb();
658 }
659
660exit_set:
661 return ret;
662}
663
664/* bind to generic thermal layer as cooling device*/
665static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
666 .get_max_state = powerclamp_get_max_state,
667 .get_cur_state = powerclamp_get_cur_state,
668 .set_cur_state = powerclamp_set_cur_state,
669};
670
ec638db8
JP
671static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
672 { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_MWAIT },
673 {}
674};
675MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
676
4d2b6e4a 677static int __init powerclamp_probe(void)
d6d71ee4 678{
ec638db8
JP
679
680 if (!x86_match_cpu(intel_powerclamp_ids)) {
3105f234 681 pr_err("CPU does not support MWAIT");
d6d71ee4
JP
682 return -ENODEV;
683 }
b721ca0d
JP
684
685 /* The goal for idle time alignment is to achieve package cstate. */
686 if (!has_pkg_state_counter()) {
687 pr_info("No package C-state available");
d6d71ee4 688 return -ENODEV;
b721ca0d 689 }
d6d71ee4
JP
690
691 /* find the deepest mwait value */
692 find_target_mwait();
693
694 return 0;
695}
696
697static int powerclamp_debug_show(struct seq_file *m, void *unused)
698{
699 int i = 0;
700
701 seq_printf(m, "controlling cpu: %d\n", control_cpu);
702 seq_printf(m, "pct confidence steady dynamic (compensation)\n");
703 for (i = 0; i < MAX_TARGET_RATIO; i++) {
704 seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
705 i,
706 cal_data[i].confidence,
707 cal_data[i].steady_comp,
708 cal_data[i].dynamic_comp);
709 }
710
711 return 0;
712}
713
714static int powerclamp_debug_open(struct inode *inode,
715 struct file *file)
716{
717 return single_open(file, powerclamp_debug_show, inode->i_private);
718}
719
720static const struct file_operations powerclamp_debug_fops = {
721 .open = powerclamp_debug_open,
722 .read = seq_read,
723 .llseek = seq_lseek,
724 .release = single_release,
725 .owner = THIS_MODULE,
726};
727
728static inline void powerclamp_create_debug_files(void)
729{
730 debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
731 if (!debug_dir)
732 return;
733
734 if (!debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir,
735 cal_data, &powerclamp_debug_fops))
736 goto file_error;
737
738 return;
739
740file_error:
741 debugfs_remove_recursive(debug_dir);
742}
743
cb91fef1
SAS
744static enum cpuhp_state hp_state;
745
4d2b6e4a 746static int __init powerclamp_init(void)
d6d71ee4
JP
747{
748 int retval;
749 int bitmap_size;
750
751 bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long);
752 cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL);
753 if (!cpu_clamping_mask)
754 return -ENOMEM;
755
756 /* probe cpu features and ids here */
757 retval = powerclamp_probe();
758 if (retval)
c32a5087 759 goto exit_free;
760
d6d71ee4
JP
761 /* set default limit, maybe adjusted during runtime based on feedback */
762 window_size = 2;
cb91fef1
SAS
763 retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
764 "thermal/intel_powerclamp:online",
765 powerclamp_cpu_online,
766 powerclamp_cpu_predown);
767 if (retval < 0)
768 goto exit_free;
769
770 hp_state = retval;
c32a5087 771
8d962ac7
PM
772 worker_data = alloc_percpu(struct powerclamp_worker_data);
773 if (!worker_data) {
c32a5087 774 retval = -ENOMEM;
775 goto exit_unregister;
776 }
777
d6d71ee4
JP
778 cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
779 &powerclamp_cooling_ops);
c32a5087 780 if (IS_ERR(cooling_dev)) {
781 retval = -ENODEV;
782 goto exit_free_thread;
783 }
d6d71ee4
JP
784
785 if (!duration)
786 duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
c32a5087 787
d6d71ee4
JP
788 powerclamp_create_debug_files();
789
790 return 0;
c32a5087 791
792exit_free_thread:
8d962ac7 793 free_percpu(worker_data);
c32a5087 794exit_unregister:
cb91fef1 795 cpuhp_remove_state_nocalls(hp_state);
c32a5087 796exit_free:
797 kfree(cpu_clamping_mask);
798 return retval;
d6d71ee4
JP
799}
800module_init(powerclamp_init);
801
4d2b6e4a 802static void __exit powerclamp_exit(void)
d6d71ee4 803{
d6d71ee4 804 end_power_clamp();
cb91fef1 805 cpuhp_remove_state_nocalls(hp_state);
8d962ac7 806 free_percpu(worker_data);
d6d71ee4
JP
807 thermal_cooling_device_unregister(cooling_dev);
808 kfree(cpu_clamping_mask);
809
810 cancel_delayed_work_sync(&poll_pkg_cstate_work);
811 debugfs_remove_recursive(debug_dir);
812}
813module_exit(powerclamp_exit);
814
815MODULE_LICENSE("GPL");
816MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
817MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
818MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");