]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - kernel/irq/timings.c
userns: prevent speculative execution
[mirror_ubuntu-artful-kernel.git] / kernel / irq / timings.c
CommitLineData
b2d3d61a
DL
1/*
2 * linux/kernel/irq/timings.c
3 *
4 * Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 */
e1c92149 11#include <linux/kernel.h>
b2d3d61a 12#include <linux/percpu.h>
e1c92149 13#include <linux/slab.h>
b2d3d61a
DL
14#include <linux/static_key.h>
15#include <linux/interrupt.h>
e1c92149 16#include <linux/idr.h>
b2d3d61a 17#include <linux/irq.h>
e1c92149
DL
18#include <linux/math64.h>
19
20#include <trace/events/irq.h>
b2d3d61a
DL
21
22#include "internals.h"
23
24DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
25
26DEFINE_PER_CPU(struct irq_timings, irq_timings);
27
e1c92149
DL
28struct irqt_stat {
29 u64 next_evt;
30 u64 last_ts;
31 u64 variance;
32 u32 avg;
33 u32 nr_samples;
34 int anomalies;
35 int valid;
36};
37
38static DEFINE_IDR(irqt_stats);
39
b2d3d61a
DL
40void irq_timings_enable(void)
41{
42 static_branch_enable(&irq_timing_enabled);
43}
44
45void irq_timings_disable(void)
46{
47 static_branch_disable(&irq_timing_enabled);
48}
e1c92149
DL
49
50/**
51 * irqs_update - update the irq timing statistics with a new timestamp
52 *
53 * @irqs: an irqt_stat struct pointer
54 * @ts: the new timestamp
55 *
56 * The statistics are computed online, in other words, the code is
57 * designed to compute the statistics on a stream of values rather
58 * than doing multiple passes on the values to compute the average,
59 * then the variance. The integer division introduces a loss of
60 * precision but with an acceptable error margin regarding the results
61 * we would have with the double floating precision: we are dealing
62 * with nanosec, so big numbers, consequently the mantisse is
63 * negligeable, especially when converting the time in usec
64 * afterwards.
65 *
66 * The computation happens at idle time. When the CPU is not idle, the
67 * interrupts' timestamps are stored in the circular buffer, when the
68 * CPU goes idle and this routine is called, all the buffer's values
69 * are injected in the statistical model continuying to extend the
70 * statistics from the previous busy-idle cycle.
71 *
72 * The observations showed a device will trigger a burst of periodic
73 * interrupts followed by one or two peaks of longer time, for
74 * instance when a SD card device flushes its cache, then the periodic
75 * intervals occur again. A one second inactivity period resets the
76 * stats, that gives us the certitude the statistical values won't
77 * exceed 1x10^9, thus the computation won't overflow.
78 *
79 * Basically, the purpose of the algorithm is to watch the periodic
80 * interrupts and eliminate the peaks.
81 *
82 * An interrupt is considered periodically stable if the interval of
83 * its occurences follow the normal distribution, thus the values
84 * comply with:
85 *
86 * avg - 3 x stddev < value < avg + 3 x stddev
87 *
88 * Which can be simplified to:
89 *
90 * -3 x stddev < value - avg < 3 x stddev
91 *
92 * abs(value - avg) < 3 x stddev
93 *
94 * In order to save a costly square root computation, we use the
95 * variance. For the record, stddev = sqrt(variance). The equation
96 * above becomes:
97 *
98 * abs(value - avg) < 3 x sqrt(variance)
99 *
100 * And finally we square it:
101 *
102 * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
103 *
104 * (value - avg) x (value - avg) < 9 x variance
105 *
106 * Statistically speaking, any values out of this interval is
107 * considered as an anomaly and is discarded. However, a normal
108 * distribution appears when the number of samples is 30 (it is the
109 * rule of thumb in statistics, cf. "30 samples" on Internet). When
110 * there are three consecutive anomalies, the statistics are resetted.
111 *
112 */
113static void irqs_update(struct irqt_stat *irqs, u64 ts)
114{
115 u64 old_ts = irqs->last_ts;
116 u64 variance = 0;
117 u64 interval;
118 s64 diff;
119
120 /*
121 * The timestamps are absolute time values, we need to compute
122 * the timing interval between two interrupts.
123 */
124 irqs->last_ts = ts;
125
126 /*
127 * The interval type is u64 in order to deal with the same
128 * type in our computation, that prevent mindfuck issues with
129 * overflow, sign and division.
130 */
131 interval = ts - old_ts;
132
133 /*
134 * The interrupt triggered more than one second apart, that
135 * ends the sequence as predictible for our purpose. In this
136 * case, assume we have the beginning of a sequence and the
137 * timestamp is the first value. As it is impossible to
138 * predict anything at this point, return.
139 *
140 * Note the first timestamp of the sequence will always fall
141 * in this test because the old_ts is zero. That is what we
142 * want as we need another timestamp to compute an interval.
143 */
144 if (interval >= NSEC_PER_SEC) {
145 memset(irqs, 0, sizeof(*irqs));
146 irqs->last_ts = ts;
147 return;
148 }
149
150 /*
151 * Pre-compute the delta with the average as the result is
152 * used several times in this function.
153 */
154 diff = interval - irqs->avg;
155
156 /*
157 * Increment the number of samples.
158 */
159 irqs->nr_samples++;
160
161 /*
162 * Online variance divided by the number of elements if there
163 * is more than one sample. Normally the formula is division
164 * by nr_samples - 1 but we assume the number of element will be
165 * more than 32 and dividing by 32 instead of 31 is enough
166 * precise.
167 */
168 if (likely(irqs->nr_samples > 1))
169 variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
170
171 /*
172 * The rule of thumb in statistics for the normal distribution
173 * is having at least 30 samples in order to have the model to
174 * apply. Values outside the interval are considered as an
175 * anomaly.
176 */
177 if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
178 /*
179 * After three consecutive anomalies, we reset the
180 * stats as it is no longer stable enough.
181 */
182 if (irqs->anomalies++ >= 3) {
183 memset(irqs, 0, sizeof(*irqs));
184 irqs->last_ts = ts;
185 return;
186 }
187 } else {
188 /*
189 * The anomalies must be consecutives, so at this
190 * point, we reset the anomalies counter.
191 */
192 irqs->anomalies = 0;
193 }
194
195 /*
196 * The interrupt is considered stable enough to try to predict
197 * the next event on it.
198 */
199 irqs->valid = 1;
200
201 /*
202 * Online average algorithm:
203 *
204 * new_average = average + ((value - average) / count)
205 *
206 * The variance computation depends on the new average
207 * to be computed here first.
208 *
209 */
210 irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
211
212 /*
213 * Online variance algorithm:
214 *
215 * new_variance = variance + (value - average) x (value - new_average)
216 *
217 * Warning: irqs->avg is updated with the line above, hence
218 * 'interval - irqs->avg' is no longer equal to 'diff'
219 */
220 irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
221
222 /*
223 * Update the next event
224 */
225 irqs->next_evt = ts + irqs->avg;
226}
227
228/**
229 * irq_timings_next_event - Return when the next event is supposed to arrive
230 *
231 * During the last busy cycle, the number of interrupts is incremented
232 * and stored in the irq_timings structure. This information is
233 * necessary to:
234 *
235 * - know if the index in the table wrapped up:
236 *
237 * If more than the array size interrupts happened during the
238 * last busy/idle cycle, the index wrapped up and we have to
239 * begin with the next element in the array which is the last one
240 * in the sequence, otherwise it is a the index 0.
241 *
242 * - have an indication of the interrupts activity on this CPU
243 * (eg. irq/sec)
244 *
245 * The values are 'consumed' after inserting in the statistical model,
246 * thus the count is reinitialized.
247 *
248 * The array of values **must** be browsed in the time direction, the
249 * timestamp must increase between an element and the next one.
250 *
251 * Returns a nanosec time based estimation of the earliest interrupt,
252 * U64_MAX otherwise.
253 */
254u64 irq_timings_next_event(u64 now)
255{
256 struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
257 struct irqt_stat *irqs;
258 struct irqt_stat __percpu *s;
259 u64 ts, next_evt = U64_MAX;
260 int i, irq = 0;
261
262 /*
263 * This function must be called with the local irq disabled in
264 * order to prevent the timings circular buffer to be updated
265 * while we are reading it.
266 */
267 WARN_ON_ONCE(!irqs_disabled());
268
269 /*
270 * Number of elements in the circular buffer: If it happens it
271 * was flushed before, then the number of elements could be
272 * smaller than IRQ_TIMINGS_SIZE, so the count is used,
273 * otherwise the array size is used as we wrapped. The index
274 * begins from zero when we did not wrap. That could be done
275 * in a nicer way with the proper circular array structure
276 * type but with the cost of extra computation in the
277 * interrupt handler hot path. We choose efficiency.
278 *
279 * Inject measured irq/timestamp to the statistical model
280 * while decrementing the counter because we consume the data
281 * from our circular buffer.
282 */
283 for (i = irqts->count & IRQ_TIMINGS_MASK,
284 irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
285 irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
286
287 irq = irq_timing_decode(irqts->values[i], &ts);
288
289 s = idr_find(&irqt_stats, irq);
290 if (s) {
291 irqs = this_cpu_ptr(s);
292 irqs_update(irqs, ts);
293 }
294 }
295
296 /*
297 * Look in the list of interrupts' statistics, the earliest
298 * next event.
299 */
300 idr_for_each_entry(&irqt_stats, s, i) {
301
302 irqs = this_cpu_ptr(s);
303
304 if (!irqs->valid)
305 continue;
306
307 if (irqs->next_evt <= now) {
308 irq = i;
309 next_evt = now;
310
311 /*
312 * This interrupt mustn't use in the future
313 * until new events occur and update the
314 * statistics.
315 */
316 irqs->valid = 0;
317 break;
318 }
319
320 if (irqs->next_evt < next_evt) {
321 irq = i;
322 next_evt = irqs->next_evt;
323 }
324 }
325
326 return next_evt;
327}
328
329void irq_timings_free(int irq)
330{
331 struct irqt_stat __percpu *s;
332
333 s = idr_find(&irqt_stats, irq);
334 if (s) {
335 free_percpu(s);
336 idr_remove(&irqt_stats, irq);
337 }
338}
339
340int irq_timings_alloc(int irq)
341{
342 struct irqt_stat __percpu *s;
343 int id;
344
345 /*
346 * Some platforms can have the same private interrupt per cpu,
347 * so this function may be be called several times with the
348 * same interrupt number. Just bail out in case the per cpu
349 * stat structure is already allocated.
350 */
351 s = idr_find(&irqt_stats, irq);
352 if (s)
353 return 0;
354
355 s = alloc_percpu(*s);
356 if (!s)
357 return -ENOMEM;
358
359 idr_preload(GFP_KERNEL);
360 id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
361 idr_preload_end();
362
363 if (id < 0) {
364 free_percpu(s);
365 return id;
366 }
367
368 return 0;
369}