]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - kernel/time/timer.c
timer: Don't initialize 'tvec_base' on hotplug
[mirror_ubuntu-artful-kernel.git] / kernel / time / timer.c
CommitLineData
1da177e4
LT
1/*
2 * linux/kernel/timer.c
3 *
4a22f166 4 * Kernel internal timers
1da177e4
LT
5 *
6 * Copyright (C) 1991, 1992 Linus Torvalds
7 *
8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
9 *
10 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
11 * "A Kernel Model for Precision Timekeeping" by Dave Mills
12 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
13 * serialize accesses to xtime/lost_ticks).
14 * Copyright (C) 1998 Andrea Arcangeli
15 * 1999-03-10 Improved NTP compatibility by Ulrich Windl
16 * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love
17 * 2000-10-05 Implemented scalable SMP per-CPU timer handling.
18 * Copyright (C) 2000, 2001, 2002 Ingo Molnar
19 * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
20 */
21
22#include <linux/kernel_stat.h>
9984de1a 23#include <linux/export.h>
1da177e4
LT
24#include <linux/interrupt.h>
25#include <linux/percpu.h>
26#include <linux/init.h>
27#include <linux/mm.h>
28#include <linux/swap.h>
b488893a 29#include <linux/pid_namespace.h>
1da177e4
LT
30#include <linux/notifier.h>
31#include <linux/thread_info.h>
32#include <linux/time.h>
33#include <linux/jiffies.h>
34#include <linux/posix-timers.h>
35#include <linux/cpu.h>
36#include <linux/syscalls.h>
97a41e26 37#include <linux/delay.h>
79bf2bb3 38#include <linux/tick.h>
82f67cd9 39#include <linux/kallsyms.h>
e360adbe 40#include <linux/irq_work.h>
eea08f32 41#include <linux/sched.h>
cf4aebc2 42#include <linux/sched/sysctl.h>
5a0e3ad6 43#include <linux/slab.h>
1a0df594 44#include <linux/compat.h>
1da177e4
LT
45
46#include <asm/uaccess.h>
47#include <asm/unistd.h>
48#include <asm/div64.h>
49#include <asm/timex.h>
50#include <asm/io.h>
51
2b022e3d
XG
52#define CREATE_TRACE_POINTS
53#include <trace/events/timer.h>
54
40747ffa 55__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
ecea8d19
TG
56
57EXPORT_SYMBOL(jiffies_64);
58
1da177e4
LT
59/*
60 * per-CPU timer vector definitions:
61 */
1da177e4
LT
62#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
63#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
64#define TVN_SIZE (1 << TVN_BITS)
65#define TVR_SIZE (1 << TVR_BITS)
66#define TVN_MASK (TVN_SIZE - 1)
67#define TVR_MASK (TVR_SIZE - 1)
26cff4e2 68#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
1da177e4 69
a6fa8e5a 70struct tvec {
1da177e4 71 struct list_head vec[TVN_SIZE];
a6fa8e5a 72};
1da177e4 73
a6fa8e5a 74struct tvec_root {
1da177e4 75 struct list_head vec[TVR_SIZE];
a6fa8e5a 76};
1da177e4 77
a6fa8e5a 78struct tvec_base {
3691c519
ON
79 spinlock_t lock;
80 struct timer_list *running_timer;
1da177e4 81 unsigned long timer_jiffies;
97fd9ed4 82 unsigned long next_timer;
99d5f3aa 83 unsigned long active_timers;
fff42158 84 unsigned long all_timers;
d6f93829 85 int cpu;
a6fa8e5a
PM
86 struct tvec_root tv1;
87 struct tvec tv2;
88 struct tvec tv3;
89 struct tvec tv4;
90 struct tvec tv5;
6e453a67 91} ____cacheline_aligned;
1da177e4 92
b337a938
PZ
93/*
94 * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've
95 * made NULL special, hint: lock_timer_base()) and we cannot get a compile time
96 * pointer to per-cpu entries because we don't know where we'll map the section,
97 * even for the boot cpu.
98 *
99 * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the
100 * rest of them.
101 */
a6fa8e5a 102struct tvec_base boot_tvec_bases;
3691c519 103EXPORT_SYMBOL(boot_tvec_bases);
b337a938
PZ
104static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
105
a6fa8e5a 106static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
1da177e4 107
6e453a67 108/* Functions below help us manage 'deferrable' flag */
a6fa8e5a 109static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
6e453a67 110{
e52b1db3 111 return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE);
6e453a67
VP
112}
113
c5f66e99
TH
114static inline unsigned int tbase_get_irqsafe(struct tvec_base *base)
115{
116 return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE);
117}
118
a6fa8e5a 119static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
6e453a67 120{
e52b1db3 121 return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK));
6e453a67
VP
122}
123
6e453a67 124static inline void
a6fa8e5a 125timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
6e453a67 126{
e52b1db3
TH
127 unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK;
128
129 timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
6e453a67
VP
130}
131
9c133c46
AS
132static unsigned long round_jiffies_common(unsigned long j, int cpu,
133 bool force_up)
4c36a5de
AV
134{
135 int rem;
136 unsigned long original = j;
137
138 /*
139 * We don't want all cpus firing their timers at once hitting the
140 * same lock or cachelines, so we skew each extra cpu with an extra
141 * 3 jiffies. This 3 jiffies came originally from the mm/ code which
142 * already did this.
143 * The skew is done by adding 3*cpunr, then round, then subtract this
144 * extra offset again.
145 */
146 j += cpu * 3;
147
148 rem = j % HZ;
149
150 /*
151 * If the target jiffie is just after a whole second (which can happen
152 * due to delays of the timer irq, long irq off times etc etc) then
153 * we should round down to the whole second, not up. Use 1/4th second
154 * as cutoff for this rounding as an extreme upper bound for this.
9c133c46 155 * But never round down if @force_up is set.
4c36a5de 156 */
9c133c46 157 if (rem < HZ/4 && !force_up) /* round down */
4c36a5de
AV
158 j = j - rem;
159 else /* round up */
160 j = j - rem + HZ;
161
162 /* now that we have rounded, subtract the extra skew again */
163 j -= cpu * 3;
164
9e04d380
BVA
165 /*
166 * Make sure j is still in the future. Otherwise return the
167 * unmodified value.
168 */
169 return time_is_after_jiffies(j) ? j : original;
4c36a5de 170}
9c133c46
AS
171
172/**
173 * __round_jiffies - function to round jiffies to a full second
174 * @j: the time in (absolute) jiffies that should be rounded
175 * @cpu: the processor number on which the timeout will happen
176 *
177 * __round_jiffies() rounds an absolute time in the future (in jiffies)
178 * up or down to (approximately) full seconds. This is useful for timers
179 * for which the exact time they fire does not matter too much, as long as
180 * they fire approximately every X seconds.
181 *
182 * By rounding these timers to whole seconds, all such timers will fire
183 * at the same time, rather than at various times spread out. The goal
184 * of this is to have the CPU wake up less, which saves power.
185 *
186 * The exact rounding is skewed for each processor to avoid all
187 * processors firing at the exact same time, which could lead
188 * to lock contention or spurious cache line bouncing.
189 *
190 * The return value is the rounded version of the @j parameter.
191 */
192unsigned long __round_jiffies(unsigned long j, int cpu)
193{
194 return round_jiffies_common(j, cpu, false);
195}
4c36a5de
AV
196EXPORT_SYMBOL_GPL(__round_jiffies);
197
198/**
199 * __round_jiffies_relative - function to round jiffies to a full second
200 * @j: the time in (relative) jiffies that should be rounded
201 * @cpu: the processor number on which the timeout will happen
202 *
72fd4a35 203 * __round_jiffies_relative() rounds a time delta in the future (in jiffies)
4c36a5de
AV
204 * up or down to (approximately) full seconds. This is useful for timers
205 * for which the exact time they fire does not matter too much, as long as
206 * they fire approximately every X seconds.
207 *
208 * By rounding these timers to whole seconds, all such timers will fire
209 * at the same time, rather than at various times spread out. The goal
210 * of this is to have the CPU wake up less, which saves power.
211 *
212 * The exact rounding is skewed for each processor to avoid all
213 * processors firing at the exact same time, which could lead
214 * to lock contention or spurious cache line bouncing.
215 *
72fd4a35 216 * The return value is the rounded version of the @j parameter.
4c36a5de
AV
217 */
218unsigned long __round_jiffies_relative(unsigned long j, int cpu)
219{
9c133c46
AS
220 unsigned long j0 = jiffies;
221
222 /* Use j0 because jiffies might change while we run */
223 return round_jiffies_common(j + j0, cpu, false) - j0;
4c36a5de
AV
224}
225EXPORT_SYMBOL_GPL(__round_jiffies_relative);
226
227/**
228 * round_jiffies - function to round jiffies to a full second
229 * @j: the time in (absolute) jiffies that should be rounded
230 *
72fd4a35 231 * round_jiffies() rounds an absolute time in the future (in jiffies)
4c36a5de
AV
232 * up or down to (approximately) full seconds. This is useful for timers
233 * for which the exact time they fire does not matter too much, as long as
234 * they fire approximately every X seconds.
235 *
236 * By rounding these timers to whole seconds, all such timers will fire
237 * at the same time, rather than at various times spread out. The goal
238 * of this is to have the CPU wake up less, which saves power.
239 *
72fd4a35 240 * The return value is the rounded version of the @j parameter.
4c36a5de
AV
241 */
242unsigned long round_jiffies(unsigned long j)
243{
9c133c46 244 return round_jiffies_common(j, raw_smp_processor_id(), false);
4c36a5de
AV
245}
246EXPORT_SYMBOL_GPL(round_jiffies);
247
248/**
249 * round_jiffies_relative - function to round jiffies to a full second
250 * @j: the time in (relative) jiffies that should be rounded
251 *
72fd4a35 252 * round_jiffies_relative() rounds a time delta in the future (in jiffies)
4c36a5de
AV
253 * up or down to (approximately) full seconds. This is useful for timers
254 * for which the exact time they fire does not matter too much, as long as
255 * they fire approximately every X seconds.
256 *
257 * By rounding these timers to whole seconds, all such timers will fire
258 * at the same time, rather than at various times spread out. The goal
259 * of this is to have the CPU wake up less, which saves power.
260 *
72fd4a35 261 * The return value is the rounded version of the @j parameter.
4c36a5de
AV
262 */
263unsigned long round_jiffies_relative(unsigned long j)
264{
265 return __round_jiffies_relative(j, raw_smp_processor_id());
266}
267EXPORT_SYMBOL_GPL(round_jiffies_relative);
268
9c133c46
AS
269/**
270 * __round_jiffies_up - function to round jiffies up to a full second
271 * @j: the time in (absolute) jiffies that should be rounded
272 * @cpu: the processor number on which the timeout will happen
273 *
274 * This is the same as __round_jiffies() except that it will never
275 * round down. This is useful for timeouts for which the exact time
276 * of firing does not matter too much, as long as they don't fire too
277 * early.
278 */
279unsigned long __round_jiffies_up(unsigned long j, int cpu)
280{
281 return round_jiffies_common(j, cpu, true);
282}
283EXPORT_SYMBOL_GPL(__round_jiffies_up);
284
285/**
286 * __round_jiffies_up_relative - function to round jiffies up to a full second
287 * @j: the time in (relative) jiffies that should be rounded
288 * @cpu: the processor number on which the timeout will happen
289 *
290 * This is the same as __round_jiffies_relative() except that it will never
291 * round down. This is useful for timeouts for which the exact time
292 * of firing does not matter too much, as long as they don't fire too
293 * early.
294 */
295unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
296{
297 unsigned long j0 = jiffies;
298
299 /* Use j0 because jiffies might change while we run */
300 return round_jiffies_common(j + j0, cpu, true) - j0;
301}
302EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
303
304/**
305 * round_jiffies_up - function to round jiffies up to a full second
306 * @j: the time in (absolute) jiffies that should be rounded
307 *
308 * This is the same as round_jiffies() except that it will never
309 * round down. This is useful for timeouts for which the exact time
310 * of firing does not matter too much, as long as they don't fire too
311 * early.
312 */
313unsigned long round_jiffies_up(unsigned long j)
314{
315 return round_jiffies_common(j, raw_smp_processor_id(), true);
316}
317EXPORT_SYMBOL_GPL(round_jiffies_up);
318
319/**
320 * round_jiffies_up_relative - function to round jiffies up to a full second
321 * @j: the time in (relative) jiffies that should be rounded
322 *
323 * This is the same as round_jiffies_relative() except that it will never
324 * round down. This is useful for timeouts for which the exact time
325 * of firing does not matter too much, as long as they don't fire too
326 * early.
327 */
328unsigned long round_jiffies_up_relative(unsigned long j)
329{
330 return __round_jiffies_up_relative(j, raw_smp_processor_id());
331}
332EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
333
3bbb9ec9
AV
334/**
335 * set_timer_slack - set the allowed slack for a timer
0caa6210 336 * @timer: the timer to be modified
3bbb9ec9
AV
337 * @slack_hz: the amount of time (in jiffies) allowed for rounding
338 *
339 * Set the amount of time, in jiffies, that a certain timer has
340 * in terms of slack. By setting this value, the timer subsystem
341 * will schedule the actual timer somewhere between
342 * the time mod_timer() asks for, and that time plus the slack.
343 *
344 * By setting the slack to -1, a percentage of the delay is used
345 * instead.
346 */
347void set_timer_slack(struct timer_list *timer, int slack_hz)
348{
349 timer->slack = slack_hz;
350}
351EXPORT_SYMBOL_GPL(set_timer_slack);
352
d550e81d
PM
353/*
354 * If the list is empty, catch up ->timer_jiffies to the current time.
355 * The caller must hold the tvec_base lock. Returns true if the list
356 * was empty and therefore ->timer_jiffies was updated.
357 */
358static bool catchup_timer_jiffies(struct tvec_base *base)
359{
360 if (!base->all_timers) {
361 base->timer_jiffies = jiffies;
362 return true;
363 }
364 return false;
365}
366
facbb4a7
TG
367static void
368__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
1da177e4
LT
369{
370 unsigned long expires = timer->expires;
371 unsigned long idx = expires - base->timer_jiffies;
372 struct list_head *vec;
373
374 if (idx < TVR_SIZE) {
375 int i = expires & TVR_MASK;
376 vec = base->tv1.vec + i;
377 } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
378 int i = (expires >> TVR_BITS) & TVN_MASK;
379 vec = base->tv2.vec + i;
380 } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
381 int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
382 vec = base->tv3.vec + i;
383 } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
384 int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
385 vec = base->tv4.vec + i;
386 } else if ((signed long) idx < 0) {
387 /*
388 * Can happen if you add a timer with expires == jiffies,
389 * or you set a timer to go off in the past
390 */
391 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
392 } else {
393 int i;
26cff4e2
HC
394 /* If the timeout is larger than MAX_TVAL (on 64-bit
395 * architectures or with CONFIG_BASE_SMALL=1) then we
396 * use the maximum timeout.
1da177e4 397 */
26cff4e2
HC
398 if (idx > MAX_TVAL) {
399 idx = MAX_TVAL;
1da177e4
LT
400 expires = idx + base->timer_jiffies;
401 }
402 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
403 vec = base->tv5.vec + i;
404 }
405 /*
406 * Timers are FIFO:
407 */
408 list_add_tail(&timer->entry, vec);
409}
410
facbb4a7
TG
411static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
412{
18d8cb64 413 (void)catchup_timer_jiffies(base);
facbb4a7
TG
414 __internal_add_timer(base, timer);
415 /*
99d5f3aa 416 * Update base->active_timers and base->next_timer
facbb4a7 417 */
99d5f3aa 418 if (!tbase_get_deferrable(timer->base)) {
aea369b9
ON
419 if (!base->active_timers++ ||
420 time_before(timer->expires, base->next_timer))
99d5f3aa 421 base->next_timer = timer->expires;
99d5f3aa 422 }
fff42158 423 base->all_timers++;
9f6d9baa
VK
424
425 /*
426 * Check whether the other CPU is in dynticks mode and needs
427 * to be triggered to reevaluate the timer wheel.
428 * We are protected against the other CPU fiddling
429 * with the timer by holding the timer base lock. This also
430 * makes sure that a CPU on the way to stop its tick can not
431 * evaluate the timer wheel.
432 *
433 * Spare the IPI for deferrable timers on idle targets though.
434 * The next busy ticks will take care of it. Except full dynticks
435 * require special care against races with idle_cpu(), lets deal
436 * with that later.
437 */
438 if (!tbase_get_deferrable(base) || tick_nohz_full_cpu(base->cpu))
439 wake_up_nohz_cpu(base->cpu);
facbb4a7
TG
440}
441
82f67cd9
IM
442#ifdef CONFIG_TIMER_STATS
443void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
444{
445 if (timer->start_site)
446 return;
447
448 timer->start_site = addr;
449 memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
450 timer->start_pid = current->pid;
451}
c5c061b8
VP
452
453static void timer_stats_account_timer(struct timer_list *timer)
454{
455 unsigned int flag = 0;
456
507e1231
HC
457 if (likely(!timer->start_site))
458 return;
c5c061b8
VP
459 if (unlikely(tbase_get_deferrable(timer->base)))
460 flag |= TIMER_STATS_FLAG_DEFERRABLE;
461
462 timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
463 timer->function, timer->start_comm, flag);
464}
465
466#else
467static void timer_stats_account_timer(struct timer_list *timer) {}
82f67cd9
IM
468#endif
469
c6f3a97f
TG
470#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
471
472static struct debug_obj_descr timer_debug_descr;
473
99777288
SG
474static void *timer_debug_hint(void *addr)
475{
476 return ((struct timer_list *) addr)->function;
477}
478
c6f3a97f
TG
479/*
480 * fixup_init is called when:
481 * - an active object is initialized
55c888d6 482 */
c6f3a97f
TG
483static int timer_fixup_init(void *addr, enum debug_obj_state state)
484{
485 struct timer_list *timer = addr;
486
487 switch (state) {
488 case ODEBUG_STATE_ACTIVE:
489 del_timer_sync(timer);
490 debug_object_init(timer, &timer_debug_descr);
491 return 1;
492 default:
493 return 0;
494 }
495}
496
fb16b8cf
SB
497/* Stub timer callback for improperly used timers. */
498static void stub_timer(unsigned long data)
499{
500 WARN_ON(1);
501}
502
c6f3a97f
TG
503/*
504 * fixup_activate is called when:
505 * - an active object is activated
506 * - an unknown object is activated (might be a statically initialized object)
507 */
508static int timer_fixup_activate(void *addr, enum debug_obj_state state)
509{
510 struct timer_list *timer = addr;
511
512 switch (state) {
513
514 case ODEBUG_STATE_NOTAVAILABLE:
515 /*
516 * This is not really a fixup. The timer was
517 * statically initialized. We just make sure that it
518 * is tracked in the object tracker.
519 */
520 if (timer->entry.next == NULL &&
521 timer->entry.prev == TIMER_ENTRY_STATIC) {
522 debug_object_init(timer, &timer_debug_descr);
523 debug_object_activate(timer, &timer_debug_descr);
524 return 0;
525 } else {
fb16b8cf
SB
526 setup_timer(timer, stub_timer, 0);
527 return 1;
c6f3a97f
TG
528 }
529 return 0;
530
531 case ODEBUG_STATE_ACTIVE:
532 WARN_ON(1);
533
534 default:
535 return 0;
536 }
537}
538
539/*
540 * fixup_free is called when:
541 * - an active object is freed
542 */
543static int timer_fixup_free(void *addr, enum debug_obj_state state)
544{
545 struct timer_list *timer = addr;
546
547 switch (state) {
548 case ODEBUG_STATE_ACTIVE:
549 del_timer_sync(timer);
550 debug_object_free(timer, &timer_debug_descr);
551 return 1;
552 default:
553 return 0;
554 }
555}
556
dc4218bd
CC
557/*
558 * fixup_assert_init is called when:
559 * - an untracked/uninit-ed object is found
560 */
561static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
562{
563 struct timer_list *timer = addr;
564
565 switch (state) {
566 case ODEBUG_STATE_NOTAVAILABLE:
567 if (timer->entry.prev == TIMER_ENTRY_STATIC) {
568 /*
569 * This is not really a fixup. The timer was
570 * statically initialized. We just make sure that it
571 * is tracked in the object tracker.
572 */
573 debug_object_init(timer, &timer_debug_descr);
574 return 0;
575 } else {
576 setup_timer(timer, stub_timer, 0);
577 return 1;
578 }
579 default:
580 return 0;
581 }
582}
583
c6f3a97f 584static struct debug_obj_descr timer_debug_descr = {
dc4218bd
CC
585 .name = "timer_list",
586 .debug_hint = timer_debug_hint,
587 .fixup_init = timer_fixup_init,
588 .fixup_activate = timer_fixup_activate,
589 .fixup_free = timer_fixup_free,
590 .fixup_assert_init = timer_fixup_assert_init,
c6f3a97f
TG
591};
592
593static inline void debug_timer_init(struct timer_list *timer)
594{
595 debug_object_init(timer, &timer_debug_descr);
596}
597
598static inline void debug_timer_activate(struct timer_list *timer)
599{
600 debug_object_activate(timer, &timer_debug_descr);
601}
602
603static inline void debug_timer_deactivate(struct timer_list *timer)
604{
605 debug_object_deactivate(timer, &timer_debug_descr);
606}
607
608static inline void debug_timer_free(struct timer_list *timer)
609{
610 debug_object_free(timer, &timer_debug_descr);
611}
612
dc4218bd
CC
613static inline void debug_timer_assert_init(struct timer_list *timer)
614{
615 debug_object_assert_init(timer, &timer_debug_descr);
616}
617
fc683995
TH
618static void do_init_timer(struct timer_list *timer, unsigned int flags,
619 const char *name, struct lock_class_key *key);
c6f3a97f 620
fc683995
TH
621void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags,
622 const char *name, struct lock_class_key *key)
c6f3a97f
TG
623{
624 debug_object_init_on_stack(timer, &timer_debug_descr);
fc683995 625 do_init_timer(timer, flags, name, key);
c6f3a97f 626}
6f2b9b9a 627EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
c6f3a97f
TG
628
629void destroy_timer_on_stack(struct timer_list *timer)
630{
631 debug_object_free(timer, &timer_debug_descr);
632}
633EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
634
635#else
636static inline void debug_timer_init(struct timer_list *timer) { }
637static inline void debug_timer_activate(struct timer_list *timer) { }
638static inline void debug_timer_deactivate(struct timer_list *timer) { }
dc4218bd 639static inline void debug_timer_assert_init(struct timer_list *timer) { }
c6f3a97f
TG
640#endif
641
2b022e3d
XG
642static inline void debug_init(struct timer_list *timer)
643{
644 debug_timer_init(timer);
645 trace_timer_init(timer);
646}
647
648static inline void
649debug_activate(struct timer_list *timer, unsigned long expires)
650{
651 debug_timer_activate(timer);
652 trace_timer_start(timer, expires);
653}
654
655static inline void debug_deactivate(struct timer_list *timer)
656{
657 debug_timer_deactivate(timer);
658 trace_timer_cancel(timer);
659}
660
dc4218bd
CC
661static inline void debug_assert_init(struct timer_list *timer)
662{
663 debug_timer_assert_init(timer);
664}
665
fc683995
TH
666static void do_init_timer(struct timer_list *timer, unsigned int flags,
667 const char *name, struct lock_class_key *key)
55c888d6 668{
22127e93 669 struct tvec_base *base = raw_cpu_read(tvec_bases);
fc683995 670
55c888d6 671 timer->entry.next = NULL;
fc683995 672 timer->base = (void *)((unsigned long)base | flags);
3bbb9ec9 673 timer->slack = -1;
82f67cd9
IM
674#ifdef CONFIG_TIMER_STATS
675 timer->start_site = NULL;
676 timer->start_pid = -1;
677 memset(timer->start_comm, 0, TASK_COMM_LEN);
678#endif
6f2b9b9a 679 lockdep_init_map(&timer->lockdep_map, name, key, 0);
55c888d6 680}
c6f3a97f
TG
681
682/**
633fe795 683 * init_timer_key - initialize a timer
c6f3a97f 684 * @timer: the timer to be initialized
fc683995 685 * @flags: timer flags
633fe795
RD
686 * @name: name of the timer
687 * @key: lockdep class key of the fake lock used for tracking timer
688 * sync lock dependencies
c6f3a97f 689 *
633fe795 690 * init_timer_key() must be done to a timer prior calling *any* of the
c6f3a97f
TG
691 * other timer functions.
692 */
fc683995
TH
693void init_timer_key(struct timer_list *timer, unsigned int flags,
694 const char *name, struct lock_class_key *key)
c6f3a97f 695{
2b022e3d 696 debug_init(timer);
fc683995 697 do_init_timer(timer, flags, name, key);
c6f3a97f 698}
6f2b9b9a 699EXPORT_SYMBOL(init_timer_key);
55c888d6 700
ec44bc7a 701static inline void detach_timer(struct timer_list *timer, bool clear_pending)
55c888d6
ON
702{
703 struct list_head *entry = &timer->entry;
704
2b022e3d 705 debug_deactivate(timer);
c6f3a97f 706
55c888d6
ON
707 __list_del(entry->prev, entry->next);
708 if (clear_pending)
709 entry->next = NULL;
710 entry->prev = LIST_POISON2;
711}
712
99d5f3aa
TG
713static inline void
714detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
715{
716 detach_timer(timer, true);
717 if (!tbase_get_deferrable(timer->base))
e52b1db3 718 base->active_timers--;
fff42158 719 base->all_timers--;
16d937f8 720 (void)catchup_timer_jiffies(base);
99d5f3aa
TG
721}
722
ec44bc7a
TG
723static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
724 bool clear_pending)
725{
726 if (!timer_pending(timer))
727 return 0;
728
729 detach_timer(timer, clear_pending);
99d5f3aa 730 if (!tbase_get_deferrable(timer->base)) {
e52b1db3 731 base->active_timers--;
99d5f3aa
TG
732 if (timer->expires == base->next_timer)
733 base->next_timer = base->timer_jiffies;
734 }
fff42158 735 base->all_timers--;
16d937f8 736 (void)catchup_timer_jiffies(base);
ec44bc7a
TG
737 return 1;
738}
739
55c888d6 740/*
3691c519 741 * We are using hashed locking: holding per_cpu(tvec_bases).lock
55c888d6
ON
742 * means that all timers which are tied to this base via timer->base are
743 * locked, and the base itself is locked too.
744 *
745 * So __run_timers/migrate_timers can safely modify all timers which could
746 * be found on ->tvX lists.
747 *
748 * When the timer's base is locked, and the timer removed from list, it is
749 * possible to set timer->base = NULL and drop the lock: the timer remains
750 * locked.
751 */
a6fa8e5a 752static struct tvec_base *lock_timer_base(struct timer_list *timer,
55c888d6 753 unsigned long *flags)
89e7e374 754 __acquires(timer->base->lock)
55c888d6 755{
a6fa8e5a 756 struct tvec_base *base;
55c888d6
ON
757
758 for (;;) {
a6fa8e5a 759 struct tvec_base *prelock_base = timer->base;
6e453a67 760 base = tbase_get_base(prelock_base);
55c888d6
ON
761 if (likely(base != NULL)) {
762 spin_lock_irqsave(&base->lock, *flags);
6e453a67 763 if (likely(prelock_base == timer->base))
55c888d6
ON
764 return base;
765 /* The timer has migrated to another CPU */
766 spin_unlock_irqrestore(&base->lock, *flags);
767 }
768 cpu_relax();
769 }
770}
771
74019224 772static inline int
597d0275
AB
773__mod_timer(struct timer_list *timer, unsigned long expires,
774 bool pending_only, int pinned)
1da177e4 775{
a6fa8e5a 776 struct tvec_base *base, *new_base;
1da177e4 777 unsigned long flags;
eea08f32 778 int ret = 0 , cpu;
1da177e4 779
82f67cd9 780 timer_stats_timer_set_start_info(timer);
1da177e4 781 BUG_ON(!timer->function);
1da177e4 782
55c888d6
ON
783 base = lock_timer_base(timer, &flags);
784
ec44bc7a
TG
785 ret = detach_if_pending(timer, base, false);
786 if (!ret && pending_only)
787 goto out_unlock;
55c888d6 788
2b022e3d 789 debug_activate(timer, expires);
c6f3a97f 790
6201b4d6 791 cpu = get_nohz_timer_target(pinned);
eea08f32
AB
792 new_base = per_cpu(tvec_bases, cpu);
793
3691c519 794 if (base != new_base) {
1da177e4 795 /*
55c888d6
ON
796 * We are trying to schedule the timer on the local CPU.
797 * However we can't change timer's base while it is running,
798 * otherwise del_timer_sync() can't detect that the timer's
799 * handler yet has not finished. This also guarantees that
800 * the timer is serialized wrt itself.
1da177e4 801 */
a2c348fe 802 if (likely(base->running_timer != timer)) {
55c888d6 803 /* See the comment in lock_timer_base() */
6e453a67 804 timer_set_base(timer, NULL);
55c888d6 805 spin_unlock(&base->lock);
a2c348fe
ON
806 base = new_base;
807 spin_lock(&base->lock);
6e453a67 808 timer_set_base(timer, base);
1da177e4
LT
809 }
810 }
811
1da177e4 812 timer->expires = expires;
a2c348fe 813 internal_add_timer(base, timer);
74019224
IM
814
815out_unlock:
a2c348fe 816 spin_unlock_irqrestore(&base->lock, flags);
1da177e4
LT
817
818 return ret;
819}
820
2aae4a10 821/**
74019224
IM
822 * mod_timer_pending - modify a pending timer's timeout
823 * @timer: the pending timer to be modified
824 * @expires: new timeout in jiffies
1da177e4 825 *
74019224
IM
826 * mod_timer_pending() is the same for pending timers as mod_timer(),
827 * but will not re-activate and modify already deleted timers.
828 *
829 * It is useful for unserialized use of timers.
1da177e4 830 */
74019224 831int mod_timer_pending(struct timer_list *timer, unsigned long expires)
1da177e4 832{
597d0275 833 return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
1da177e4 834}
74019224 835EXPORT_SYMBOL(mod_timer_pending);
1da177e4 836
3bbb9ec9
AV
837/*
838 * Decide where to put the timer while taking the slack into account
839 *
840 * Algorithm:
841 * 1) calculate the maximum (absolute) time
842 * 2) calculate the highest bit where the expires and new max are different
843 * 3) use this bit to make a mask
844 * 4) use the bitmask to round down the maximum time, so that all last
845 * bits are zeros
846 */
847static inline
848unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
849{
850 unsigned long expires_limit, mask;
851 int bit;
852
8e63d779 853 if (timer->slack >= 0) {
f00e047e 854 expires_limit = expires + timer->slack;
8e63d779 855 } else {
1c3cc116
SAS
856 long delta = expires - jiffies;
857
858 if (delta < 256)
859 return expires;
3bbb9ec9 860
1c3cc116 861 expires_limit = expires + delta / 256;
8e63d779 862 }
3bbb9ec9 863 mask = expires ^ expires_limit;
3bbb9ec9
AV
864 if (mask == 0)
865 return expires;
866
867 bit = find_last_bit(&mask, BITS_PER_LONG);
868
98a01e77 869 mask = (1UL << bit) - 1;
3bbb9ec9
AV
870
871 expires_limit = expires_limit & ~(mask);
872
873 return expires_limit;
874}
875
2aae4a10 876/**
1da177e4
LT
877 * mod_timer - modify a timer's timeout
878 * @timer: the timer to be modified
2aae4a10 879 * @expires: new timeout in jiffies
1da177e4 880 *
72fd4a35 881 * mod_timer() is a more efficient way to update the expire field of an
1da177e4
LT
882 * active timer (if the timer is inactive it will be activated)
883 *
884 * mod_timer(timer, expires) is equivalent to:
885 *
886 * del_timer(timer); timer->expires = expires; add_timer(timer);
887 *
888 * Note that if there are multiple unserialized concurrent users of the
889 * same timer, then mod_timer() is the only safe way to modify the timeout,
890 * since add_timer() cannot modify an already running timer.
891 *
892 * The function returns whether it has modified a pending timer or not.
893 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
894 * active timer returns 1.)
895 */
896int mod_timer(struct timer_list *timer, unsigned long expires)
897{
1c3cc116
SAS
898 expires = apply_slack(timer, expires);
899
1da177e4
LT
900 /*
901 * This is a common optimization triggered by the
902 * networking code - if the timer is re-modified
903 * to be the same thing then just return:
904 */
4841158b 905 if (timer_pending(timer) && timer->expires == expires)
1da177e4
LT
906 return 1;
907
597d0275 908 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
1da177e4 909}
1da177e4
LT
910EXPORT_SYMBOL(mod_timer);
911
597d0275
AB
912/**
913 * mod_timer_pinned - modify a timer's timeout
914 * @timer: the timer to be modified
915 * @expires: new timeout in jiffies
916 *
917 * mod_timer_pinned() is a way to update the expire field of an
918 * active timer (if the timer is inactive it will be activated)
048a0e8f
PM
919 * and to ensure that the timer is scheduled on the current CPU.
920 *
921 * Note that this does not prevent the timer from being migrated
922 * when the current CPU goes offline. If this is a problem for
923 * you, use CPU-hotplug notifiers to handle it correctly, for
924 * example, cancelling the timer when the corresponding CPU goes
925 * offline.
597d0275
AB
926 *
927 * mod_timer_pinned(timer, expires) is equivalent to:
928 *
929 * del_timer(timer); timer->expires = expires; add_timer(timer);
930 */
931int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
932{
933 if (timer->expires == expires && timer_pending(timer))
934 return 1;
935
936 return __mod_timer(timer, expires, false, TIMER_PINNED);
937}
938EXPORT_SYMBOL(mod_timer_pinned);
939
74019224
IM
940/**
941 * add_timer - start a timer
942 * @timer: the timer to be added
943 *
944 * The kernel will do a ->function(->data) callback from the
945 * timer interrupt at the ->expires point in the future. The
946 * current time is 'jiffies'.
947 *
948 * The timer's ->expires, ->function (and if the handler uses it, ->data)
949 * fields must be set prior calling this function.
950 *
951 * Timers with an ->expires field in the past will be executed in the next
952 * timer tick.
953 */
954void add_timer(struct timer_list *timer)
955{
956 BUG_ON(timer_pending(timer));
957 mod_timer(timer, timer->expires);
958}
959EXPORT_SYMBOL(add_timer);
960
961/**
962 * add_timer_on - start a timer on a particular CPU
963 * @timer: the timer to be added
964 * @cpu: the CPU to start it on
965 *
966 * This is not very scalable on SMP. Double adds are not possible.
967 */
968void add_timer_on(struct timer_list *timer, int cpu)
969{
970 struct tvec_base *base = per_cpu(tvec_bases, cpu);
971 unsigned long flags;
972
973 timer_stats_timer_set_start_info(timer);
974 BUG_ON(timer_pending(timer) || !timer->function);
975 spin_lock_irqsave(&base->lock, flags);
976 timer_set_base(timer, base);
2b022e3d 977 debug_activate(timer, timer->expires);
74019224 978 internal_add_timer(base, timer);
74019224
IM
979 spin_unlock_irqrestore(&base->lock, flags);
980}
a9862e05 981EXPORT_SYMBOL_GPL(add_timer_on);
74019224 982
2aae4a10 983/**
1da177e4
LT
984 * del_timer - deactive a timer.
985 * @timer: the timer to be deactivated
986 *
987 * del_timer() deactivates a timer - this works on both active and inactive
988 * timers.
989 *
990 * The function returns whether it has deactivated a pending timer or not.
991 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
992 * active timer returns 1.)
993 */
994int del_timer(struct timer_list *timer)
995{
a6fa8e5a 996 struct tvec_base *base;
1da177e4 997 unsigned long flags;
55c888d6 998 int ret = 0;
1da177e4 999
dc4218bd
CC
1000 debug_assert_init(timer);
1001
82f67cd9 1002 timer_stats_timer_clear_start_info(timer);
55c888d6
ON
1003 if (timer_pending(timer)) {
1004 base = lock_timer_base(timer, &flags);
ec44bc7a 1005 ret = detach_if_pending(timer, base, true);
1da177e4 1006 spin_unlock_irqrestore(&base->lock, flags);
1da177e4 1007 }
1da177e4 1008
55c888d6 1009 return ret;
1da177e4 1010}
1da177e4
LT
1011EXPORT_SYMBOL(del_timer);
1012
2aae4a10
REB
1013/**
1014 * try_to_del_timer_sync - Try to deactivate a timer
1015 * @timer: timer do del
1016 *
fd450b73
ON
1017 * This function tries to deactivate a timer. Upon successful (ret >= 0)
1018 * exit the timer is not queued and the handler is not running on any CPU.
fd450b73
ON
1019 */
1020int try_to_del_timer_sync(struct timer_list *timer)
1021{
a6fa8e5a 1022 struct tvec_base *base;
fd450b73
ON
1023 unsigned long flags;
1024 int ret = -1;
1025
dc4218bd
CC
1026 debug_assert_init(timer);
1027
fd450b73
ON
1028 base = lock_timer_base(timer, &flags);
1029
ec44bc7a
TG
1030 if (base->running_timer != timer) {
1031 timer_stats_timer_clear_start_info(timer);
1032 ret = detach_if_pending(timer, base, true);
fd450b73 1033 }
fd450b73
ON
1034 spin_unlock_irqrestore(&base->lock, flags);
1035
1036 return ret;
1037}
e19dff1f
DH
1038EXPORT_SYMBOL(try_to_del_timer_sync);
1039
6f1bc451 1040#ifdef CONFIG_SMP
2aae4a10 1041/**
1da177e4
LT
1042 * del_timer_sync - deactivate a timer and wait for the handler to finish.
1043 * @timer: the timer to be deactivated
1044 *
1045 * This function only differs from del_timer() on SMP: besides deactivating
1046 * the timer it also makes sure the handler has finished executing on other
1047 * CPUs.
1048 *
72fd4a35 1049 * Synchronization rules: Callers must prevent restarting of the timer,
1da177e4 1050 * otherwise this function is meaningless. It must not be called from
c5f66e99
TH
1051 * interrupt contexts unless the timer is an irqsafe one. The caller must
1052 * not hold locks which would prevent completion of the timer's
1053 * handler. The timer's handler must not call add_timer_on(). Upon exit the
1054 * timer is not queued and the handler is not running on any CPU.
1da177e4 1055 *
c5f66e99
TH
1056 * Note: For !irqsafe timers, you must not hold locks that are held in
1057 * interrupt context while calling this function. Even if the lock has
1058 * nothing to do with the timer in question. Here's why:
48228f7b
SR
1059 *
1060 * CPU0 CPU1
1061 * ---- ----
1062 * <SOFTIRQ>
1063 * call_timer_fn();
1064 * base->running_timer = mytimer;
1065 * spin_lock_irq(somelock);
1066 * <IRQ>
1067 * spin_lock(somelock);
1068 * del_timer_sync(mytimer);
1069 * while (base->running_timer == mytimer);
1070 *
1071 * Now del_timer_sync() will never return and never release somelock.
1072 * The interrupt on the other CPU is waiting to grab somelock but
1073 * it has interrupted the softirq that CPU0 is waiting to finish.
1074 *
1da177e4 1075 * The function returns whether it has deactivated a pending timer or not.
1da177e4
LT
1076 */
1077int del_timer_sync(struct timer_list *timer)
1078{
6f2b9b9a 1079#ifdef CONFIG_LOCKDEP
f266a511
PZ
1080 unsigned long flags;
1081
48228f7b
SR
1082 /*
1083 * If lockdep gives a backtrace here, please reference
1084 * the synchronization rules above.
1085 */
7ff20792 1086 local_irq_save(flags);
6f2b9b9a
JB
1087 lock_map_acquire(&timer->lockdep_map);
1088 lock_map_release(&timer->lockdep_map);
7ff20792 1089 local_irq_restore(flags);
6f2b9b9a 1090#endif
466bd303
YZ
1091 /*
1092 * don't use it in hardirq context, because it
1093 * could lead to deadlock.
1094 */
c5f66e99 1095 WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base));
fd450b73
ON
1096 for (;;) {
1097 int ret = try_to_del_timer_sync(timer);
1098 if (ret >= 0)
1099 return ret;
a0009652 1100 cpu_relax();
fd450b73 1101 }
1da177e4 1102}
55c888d6 1103EXPORT_SYMBOL(del_timer_sync);
1da177e4
LT
1104#endif
1105
a6fa8e5a 1106static int cascade(struct tvec_base *base, struct tvec *tv, int index)
1da177e4
LT
1107{
1108 /* cascade all the timers from tv up one level */
3439dd86
P
1109 struct timer_list *timer, *tmp;
1110 struct list_head tv_list;
1111
1112 list_replace_init(tv->vec + index, &tv_list);
1da177e4 1113
1da177e4 1114 /*
3439dd86
P
1115 * We are removing _all_ timers from the list, so we
1116 * don't have to detach them individually.
1da177e4 1117 */
3439dd86 1118 list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
6e453a67 1119 BUG_ON(tbase_get_base(timer->base) != base);
facbb4a7
TG
1120 /* No accounting, while moving them */
1121 __internal_add_timer(base, timer);
1da177e4 1122 }
1da177e4
LT
1123
1124 return index;
1125}
1126
576da126
TG
1127static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
1128 unsigned long data)
1129{
4a2b4b22 1130 int count = preempt_count();
576da126
TG
1131
1132#ifdef CONFIG_LOCKDEP
1133 /*
1134 * It is permissible to free the timer from inside the
1135 * function that is called from it, this we need to take into
1136 * account for lockdep too. To avoid bogus "held lock freed"
1137 * warnings as well as problems when looking into
1138 * timer->lockdep_map, make a copy and use that here.
1139 */
4d82a1de
PZ
1140 struct lockdep_map lockdep_map;
1141
1142 lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
576da126
TG
1143#endif
1144 /*
1145 * Couple the lock chain with the lock chain at
1146 * del_timer_sync() by acquiring the lock_map around the fn()
1147 * call here and in del_timer_sync().
1148 */
1149 lock_map_acquire(&lockdep_map);
1150
1151 trace_timer_expire_entry(timer);
1152 fn(data);
1153 trace_timer_expire_exit(timer);
1154
1155 lock_map_release(&lockdep_map);
1156
4a2b4b22 1157 if (count != preempt_count()) {
802702e0 1158 WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
4a2b4b22 1159 fn, count, preempt_count());
802702e0
TG
1160 /*
1161 * Restore the preempt count. That gives us a decent
1162 * chance to survive and extract information. If the
1163 * callback kept a lock held, bad luck, but not worse
1164 * than the BUG() we had.
1165 */
4a2b4b22 1166 preempt_count_set(count);
576da126
TG
1167 }
1168}
1169
2aae4a10
REB
1170#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
1171
1172/**
1da177e4
LT
1173 * __run_timers - run all expired timers (if any) on this CPU.
1174 * @base: the timer vector to be processed.
1175 *
1176 * This function cascades all vectors and executes all expired timer
1177 * vectors.
1178 */
a6fa8e5a 1179static inline void __run_timers(struct tvec_base *base)
1da177e4
LT
1180{
1181 struct timer_list *timer;
1182
3691c519 1183 spin_lock_irq(&base->lock);
d550e81d
PM
1184 if (catchup_timer_jiffies(base)) {
1185 spin_unlock_irq(&base->lock);
1186 return;
1187 }
1da177e4 1188 while (time_after_eq(jiffies, base->timer_jiffies)) {
626ab0e6 1189 struct list_head work_list;
1da177e4 1190 struct list_head *head = &work_list;
6819457d 1191 int index = base->timer_jiffies & TVR_MASK;
626ab0e6 1192
1da177e4
LT
1193 /*
1194 * Cascade timers:
1195 */
1196 if (!index &&
1197 (!cascade(base, &base->tv2, INDEX(0))) &&
1198 (!cascade(base, &base->tv3, INDEX(1))) &&
1199 !cascade(base, &base->tv4, INDEX(2)))
1200 cascade(base, &base->tv5, INDEX(3));
626ab0e6 1201 ++base->timer_jiffies;
c41eba7d 1202 list_replace_init(base->tv1.vec + index, head);
55c888d6 1203 while (!list_empty(head)) {
1da177e4
LT
1204 void (*fn)(unsigned long);
1205 unsigned long data;
c5f66e99 1206 bool irqsafe;
1da177e4 1207
b5e61818 1208 timer = list_first_entry(head, struct timer_list,entry);
6819457d
TG
1209 fn = timer->function;
1210 data = timer->data;
c5f66e99 1211 irqsafe = tbase_get_irqsafe(timer->base);
1da177e4 1212
82f67cd9
IM
1213 timer_stats_account_timer(timer);
1214
6f1bc451 1215 base->running_timer = timer;
99d5f3aa 1216 detach_expired_timer(timer, base);
6f2b9b9a 1217
c5f66e99
TH
1218 if (irqsafe) {
1219 spin_unlock(&base->lock);
1220 call_timer_fn(timer, fn, data);
1221 spin_lock(&base->lock);
1222 } else {
1223 spin_unlock_irq(&base->lock);
1224 call_timer_fn(timer, fn, data);
1225 spin_lock_irq(&base->lock);
1226 }
1da177e4
LT
1227 }
1228 }
6f1bc451 1229 base->running_timer = NULL;
3691c519 1230 spin_unlock_irq(&base->lock);
1da177e4
LT
1231}
1232
3451d024 1233#ifdef CONFIG_NO_HZ_COMMON
1da177e4
LT
1234/*
1235 * Find out when the next timer event is due to happen. This
90cba64a
RD
1236 * is used on S/390 to stop all activity when a CPU is idle.
1237 * This function needs to be called with interrupts disabled.
1da177e4 1238 */
a6fa8e5a 1239static unsigned long __next_timer_interrupt(struct tvec_base *base)
1da177e4 1240{
1cfd6849 1241 unsigned long timer_jiffies = base->timer_jiffies;
eaad084b 1242 unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
1cfd6849 1243 int index, slot, array, found = 0;
1da177e4 1244 struct timer_list *nte;
a6fa8e5a 1245 struct tvec *varray[4];
1da177e4
LT
1246
1247 /* Look for timer events in tv1. */
1cfd6849 1248 index = slot = timer_jiffies & TVR_MASK;
1da177e4 1249 do {
1cfd6849 1250 list_for_each_entry(nte, base->tv1.vec + slot, entry) {
6819457d
TG
1251 if (tbase_get_deferrable(nte->base))
1252 continue;
6e453a67 1253
1cfd6849 1254 found = 1;
1da177e4 1255 expires = nte->expires;
1cfd6849
TG
1256 /* Look at the cascade bucket(s)? */
1257 if (!index || slot < index)
1258 goto cascade;
1259 return expires;
1da177e4 1260 }
1cfd6849
TG
1261 slot = (slot + 1) & TVR_MASK;
1262 } while (slot != index);
1263
1264cascade:
1265 /* Calculate the next cascade event */
1266 if (index)
1267 timer_jiffies += TVR_SIZE - index;
1268 timer_jiffies >>= TVR_BITS;
1da177e4
LT
1269
1270 /* Check tv2-tv5. */
1271 varray[0] = &base->tv2;
1272 varray[1] = &base->tv3;
1273 varray[2] = &base->tv4;
1274 varray[3] = &base->tv5;
1cfd6849
TG
1275
1276 for (array = 0; array < 4; array++) {
a6fa8e5a 1277 struct tvec *varp = varray[array];
1cfd6849
TG
1278
1279 index = slot = timer_jiffies & TVN_MASK;
1da177e4 1280 do {
1cfd6849 1281 list_for_each_entry(nte, varp->vec + slot, entry) {
a0419888
JH
1282 if (tbase_get_deferrable(nte->base))
1283 continue;
1284
1cfd6849 1285 found = 1;
1da177e4
LT
1286 if (time_before(nte->expires, expires))
1287 expires = nte->expires;
1cfd6849
TG
1288 }
1289 /*
1290 * Do we still search for the first timer or are
1291 * we looking up the cascade buckets ?
1292 */
1293 if (found) {
1294 /* Look at the cascade bucket(s)? */
1295 if (!index || slot < index)
1296 break;
1297 return expires;
1298 }
1299 slot = (slot + 1) & TVN_MASK;
1300 } while (slot != index);
1301
1302 if (index)
1303 timer_jiffies += TVN_SIZE - index;
1304 timer_jiffies >>= TVN_BITS;
1da177e4 1305 }
1cfd6849
TG
1306 return expires;
1307}
69239749 1308
1cfd6849
TG
1309/*
1310 * Check, if the next hrtimer event is before the next timer wheel
1311 * event:
1312 */
1313static unsigned long cmp_next_hrtimer_event(unsigned long now,
1314 unsigned long expires)
1315{
1316 ktime_t hr_delta = hrtimer_get_next_event();
1317 struct timespec tsdelta;
9501b6cf 1318 unsigned long delta;
1cfd6849
TG
1319
1320 if (hr_delta.tv64 == KTIME_MAX)
1321 return expires;
0662b713 1322
9501b6cf
TG
1323 /*
1324 * Expired timer available, let it expire in the next tick
1325 */
1326 if (hr_delta.tv64 <= 0)
1327 return now + 1;
69239749 1328
1cfd6849 1329 tsdelta = ktime_to_timespec(hr_delta);
9501b6cf 1330 delta = timespec_to_jiffies(&tsdelta);
eaad084b
TG
1331
1332 /*
1333 * Limit the delta to the max value, which is checked in
1334 * tick_nohz_stop_sched_tick():
1335 */
1336 if (delta > NEXT_TIMER_MAX_DELTA)
1337 delta = NEXT_TIMER_MAX_DELTA;
1338
9501b6cf
TG
1339 /*
1340 * Take rounding errors in to account and make sure, that it
1341 * expires in the next tick. Otherwise we go into an endless
1342 * ping pong due to tick_nohz_stop_sched_tick() retriggering
1343 * the timer softirq
1344 */
1345 if (delta < 1)
1346 delta = 1;
1347 now += delta;
1cfd6849
TG
1348 if (time_before(now, expires))
1349 return now;
1da177e4
LT
1350 return expires;
1351}
1cfd6849
TG
1352
1353/**
8dce39c2 1354 * get_next_timer_interrupt - return the jiffy of the next pending timer
05fb6bf0 1355 * @now: current time (in jiffies)
1cfd6849 1356 */
fd064b9b 1357unsigned long get_next_timer_interrupt(unsigned long now)
1cfd6849 1358{
7496351a 1359 struct tvec_base *base = __this_cpu_read(tvec_bases);
e40468a5 1360 unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
1cfd6849 1361
dbd87b5a
HC
1362 /*
1363 * Pretend that there is no timer pending if the cpu is offline.
1364 * Possible pending timers will be migrated later to an active cpu.
1365 */
1366 if (cpu_is_offline(smp_processor_id()))
e40468a5
TG
1367 return expires;
1368
1cfd6849 1369 spin_lock(&base->lock);
e40468a5
TG
1370 if (base->active_timers) {
1371 if (time_before_eq(base->next_timer, base->timer_jiffies))
1372 base->next_timer = __next_timer_interrupt(base);
1373 expires = base->next_timer;
1374 }
1cfd6849
TG
1375 spin_unlock(&base->lock);
1376
1377 if (time_before_eq(expires, now))
1378 return now;
1379
1380 return cmp_next_hrtimer_event(now, expires);
1381}
1da177e4
LT
1382#endif
1383
1da177e4 1384/*
5b4db0c2 1385 * Called from the timer interrupt handler to charge one tick to the current
1da177e4
LT
1386 * process. user_tick is 1 if the tick is user time, 0 for system.
1387 */
1388void update_process_times(int user_tick)
1389{
1390 struct task_struct *p = current;
1da177e4
LT
1391
1392 /* Note: this timer irq context must be accounted for as well. */
fa13a5a1 1393 account_process_tick(p, user_tick);
1da177e4 1394 run_local_timers();
c3377c2d 1395 rcu_check_callbacks(user_tick);
e360adbe
PZ
1396#ifdef CONFIG_IRQ_WORK
1397 if (in_irq())
76a33061 1398 irq_work_tick();
e360adbe 1399#endif
1da177e4 1400 scheduler_tick();
6819457d 1401 run_posix_cpu_timers(p);
1da177e4
LT
1402}
1403
1da177e4
LT
1404/*
1405 * This function runs timers and the timer-tq in bottom half context.
1406 */
1407static void run_timer_softirq(struct softirq_action *h)
1408{
7496351a 1409 struct tvec_base *base = __this_cpu_read(tvec_bases);
1da177e4 1410
d3d74453 1411 hrtimer_run_pending();
82f67cd9 1412
1da177e4
LT
1413 if (time_after_eq(jiffies, base->timer_jiffies))
1414 __run_timers(base);
1415}
1416
1417/*
1418 * Called by the local, per-CPU timer interrupt on SMP.
1419 */
1420void run_local_timers(void)
1421{
d3d74453 1422 hrtimer_run_queues();
1da177e4
LT
1423 raise_softirq(TIMER_SOFTIRQ);
1424}
1425
1da177e4
LT
1426#ifdef __ARCH_WANT_SYS_ALARM
1427
1428/*
1429 * For backwards compatibility? This can be done in libc so Alpha
1430 * and all newer ports shouldn't need it.
1431 */
58fd3aa2 1432SYSCALL_DEFINE1(alarm, unsigned int, seconds)
1da177e4 1433{
c08b8a49 1434 return alarm_setitimer(seconds);
1da177e4
LT
1435}
1436
1437#endif
1438
1da177e4
LT
1439static void process_timeout(unsigned long __data)
1440{
36c8b586 1441 wake_up_process((struct task_struct *)__data);
1da177e4
LT
1442}
1443
1444/**
1445 * schedule_timeout - sleep until timeout
1446 * @timeout: timeout value in jiffies
1447 *
1448 * Make the current task sleep until @timeout jiffies have
1449 * elapsed. The routine will return immediately unless
1450 * the current task state has been set (see set_current_state()).
1451 *
1452 * You can set the task state as follows -
1453 *
1454 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
1455 * pass before the routine returns. The routine will return 0
1456 *
1457 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1458 * delivered to the current task. In this case the remaining time
1459 * in jiffies will be returned, or 0 if the timer expired in time
1460 *
1461 * The current task state is guaranteed to be TASK_RUNNING when this
1462 * routine returns.
1463 *
1464 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
1465 * the CPU away without a bound on the timeout. In this case the return
1466 * value will be %MAX_SCHEDULE_TIMEOUT.
1467 *
1468 * In all cases the return value is guaranteed to be non-negative.
1469 */
7ad5b3a5 1470signed long __sched schedule_timeout(signed long timeout)
1da177e4
LT
1471{
1472 struct timer_list timer;
1473 unsigned long expire;
1474
1475 switch (timeout)
1476 {
1477 case MAX_SCHEDULE_TIMEOUT:
1478 /*
1479 * These two special cases are useful to be comfortable
1480 * in the caller. Nothing more. We could take
1481 * MAX_SCHEDULE_TIMEOUT from one of the negative value
1482 * but I' d like to return a valid offset (>=0) to allow
1483 * the caller to do everything it want with the retval.
1484 */
1485 schedule();
1486 goto out;
1487 default:
1488 /*
1489 * Another bit of PARANOID. Note that the retval will be
1490 * 0 since no piece of kernel is supposed to do a check
1491 * for a negative retval of schedule_timeout() (since it
1492 * should never happens anyway). You just have the printk()
1493 * that will tell you if something is gone wrong and where.
1494 */
5b149bcc 1495 if (timeout < 0) {
1da177e4 1496 printk(KERN_ERR "schedule_timeout: wrong timeout "
5b149bcc
AM
1497 "value %lx\n", timeout);
1498 dump_stack();
1da177e4
LT
1499 current->state = TASK_RUNNING;
1500 goto out;
1501 }
1502 }
1503
1504 expire = timeout + jiffies;
1505
c6f3a97f 1506 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
597d0275 1507 __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
1da177e4
LT
1508 schedule();
1509 del_singleshot_timer_sync(&timer);
1510
c6f3a97f
TG
1511 /* Remove the timer from the object tracker */
1512 destroy_timer_on_stack(&timer);
1513
1da177e4
LT
1514 timeout = expire - jiffies;
1515
1516 out:
1517 return timeout < 0 ? 0 : timeout;
1518}
1da177e4
LT
1519EXPORT_SYMBOL(schedule_timeout);
1520
8a1c1757
AM
1521/*
1522 * We can use __set_current_state() here because schedule_timeout() calls
1523 * schedule() unconditionally.
1524 */
64ed93a2
NA
1525signed long __sched schedule_timeout_interruptible(signed long timeout)
1526{
a5a0d52c
AM
1527 __set_current_state(TASK_INTERRUPTIBLE);
1528 return schedule_timeout(timeout);
64ed93a2
NA
1529}
1530EXPORT_SYMBOL(schedule_timeout_interruptible);
1531
294d5cc2
MW
1532signed long __sched schedule_timeout_killable(signed long timeout)
1533{
1534 __set_current_state(TASK_KILLABLE);
1535 return schedule_timeout(timeout);
1536}
1537EXPORT_SYMBOL(schedule_timeout_killable);
1538
64ed93a2
NA
1539signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1540{
a5a0d52c
AM
1541 __set_current_state(TASK_UNINTERRUPTIBLE);
1542 return schedule_timeout(timeout);
64ed93a2
NA
1543}
1544EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1545
1da177e4 1546#ifdef CONFIG_HOTPLUG_CPU
a6fa8e5a 1547static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
1da177e4
LT
1548{
1549 struct timer_list *timer;
1550
1551 while (!list_empty(head)) {
b5e61818 1552 timer = list_first_entry(head, struct timer_list, entry);
99d5f3aa 1553 /* We ignore the accounting on the dying cpu */
ec44bc7a 1554 detach_timer(timer, false);
6e453a67 1555 timer_set_base(timer, new_base);
1da177e4 1556 internal_add_timer(new_base, timer);
1da177e4 1557 }
1da177e4
LT
1558}
1559
0db0628d 1560static void migrate_timers(int cpu)
1da177e4 1561{
a6fa8e5a
PM
1562 struct tvec_base *old_base;
1563 struct tvec_base *new_base;
1da177e4
LT
1564 int i;
1565
1566 BUG_ON(cpu_online(cpu));
a4a6198b
JB
1567 old_base = per_cpu(tvec_bases, cpu);
1568 new_base = get_cpu_var(tvec_bases);
d82f0b0f
ON
1569 /*
1570 * The caller is globally serialized and nobody else
1571 * takes two locks at once, deadlock is not possible.
1572 */
1573 spin_lock_irq(&new_base->lock);
0d180406 1574 spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
3691c519
ON
1575
1576 BUG_ON(old_base->running_timer);
1da177e4 1577
1da177e4 1578 for (i = 0; i < TVR_SIZE; i++)
55c888d6
ON
1579 migrate_timer_list(new_base, old_base->tv1.vec + i);
1580 for (i = 0; i < TVN_SIZE; i++) {
1581 migrate_timer_list(new_base, old_base->tv2.vec + i);
1582 migrate_timer_list(new_base, old_base->tv3.vec + i);
1583 migrate_timer_list(new_base, old_base->tv4.vec + i);
1584 migrate_timer_list(new_base, old_base->tv5.vec + i);
1585 }
1586
8def9060
VK
1587 old_base->active_timers = 0;
1588 old_base->all_timers = 0;
1589
0d180406 1590 spin_unlock(&old_base->lock);
d82f0b0f 1591 spin_unlock_irq(&new_base->lock);
1da177e4 1592 put_cpu_var(tvec_bases);
1da177e4
LT
1593}
1594#endif /* CONFIG_HOTPLUG_CPU */
1595
0db0628d 1596static int timer_cpu_notify(struct notifier_block *self,
1da177e4
LT
1597 unsigned long action, void *hcpu)
1598{
1da177e4 1599#ifdef CONFIG_HOTPLUG_CPU
8def9060 1600 switch (action) {
1da177e4 1601 case CPU_DEAD:
8bb78442 1602 case CPU_DEAD_FROZEN:
8def9060 1603 migrate_timers((long)hcpu);
1da177e4 1604 break;
1da177e4
LT
1605 default:
1606 break;
1607 }
8def9060 1608#endif
1da177e4
LT
1609 return NOTIFY_OK;
1610}
1611
0db0628d 1612static struct notifier_block timers_nb = {
1da177e4
LT
1613 .notifier_call = timer_cpu_notify,
1614};
1615
8def9060
VK
1616static void __init init_timer_cpu(struct tvec_base *base, int cpu)
1617{
1618 int j;
1da177e4 1619
8def9060
VK
1620 base->cpu = cpu;
1621 per_cpu(tvec_bases, cpu) = base;
1622 spin_lock_init(&base->lock);
1623
1624 for (j = 0; j < TVN_SIZE; j++) {
1625 INIT_LIST_HEAD(base->tv5.vec + j);
1626 INIT_LIST_HEAD(base->tv4.vec + j);
1627 INIT_LIST_HEAD(base->tv3.vec + j);
1628 INIT_LIST_HEAD(base->tv2.vec + j);
1629 }
1630 for (j = 0; j < TVR_SIZE; j++)
1631 INIT_LIST_HEAD(base->tv1.vec + j);
1632
1633 base->timer_jiffies = jiffies;
1634 base->next_timer = base->timer_jiffies;
1635}
1636
1637static void __init init_timer_cpus(void)
1da177e4 1638{
8def9060
VK
1639 struct tvec_base *base;
1640 int local_cpu = smp_processor_id();
1641 int cpu;
1642
1643 for_each_possible_cpu(cpu) {
1644 if (cpu == local_cpu)
1645 base = &boot_tvec_bases;
1646 else
1647 base = per_cpu_ptr(&__tvec_bases, cpu);
1648
1649 init_timer_cpu(base, cpu);
1650 }
1651}
e52b1db3 1652
8def9060
VK
1653void __init init_timers(void)
1654{
e52b1db3
TH
1655 /* ensure there are enough low bits for flags in timer->base pointer */
1656 BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
07dccf33 1657
8def9060 1658 init_timer_cpus();
c24a4a36 1659 init_timer_stats();
1da177e4 1660 register_cpu_notifier(&timers_nb);
962cf36c 1661 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1da177e4
LT
1662}
1663
1da177e4
LT
1664/**
1665 * msleep - sleep safely even with waitqueue interruptions
1666 * @msecs: Time in milliseconds to sleep for
1667 */
1668void msleep(unsigned int msecs)
1669{
1670 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1671
75bcc8c5
NA
1672 while (timeout)
1673 timeout = schedule_timeout_uninterruptible(timeout);
1da177e4
LT
1674}
1675
1676EXPORT_SYMBOL(msleep);
1677
1678/**
96ec3efd 1679 * msleep_interruptible - sleep waiting for signals
1da177e4
LT
1680 * @msecs: Time in milliseconds to sleep for
1681 */
1682unsigned long msleep_interruptible(unsigned int msecs)
1683{
1684 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1685
75bcc8c5
NA
1686 while (timeout && !signal_pending(current))
1687 timeout = schedule_timeout_interruptible(timeout);
1da177e4
LT
1688 return jiffies_to_msecs(timeout);
1689}
1690
1691EXPORT_SYMBOL(msleep_interruptible);
5e7f5a17
PP
1692
1693static int __sched do_usleep_range(unsigned long min, unsigned long max)
1694{
1695 ktime_t kmin;
1696 unsigned long delta;
1697
1698 kmin = ktime_set(0, min * NSEC_PER_USEC);
1699 delta = (max - min) * NSEC_PER_USEC;
1700 return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
1701}
1702
1703/**
1704 * usleep_range - Drop in replacement for udelay where wakeup is flexible
1705 * @min: Minimum time in usecs to sleep
1706 * @max: Maximum time in usecs to sleep
1707 */
1708void usleep_range(unsigned long min, unsigned long max)
1709{
1710 __set_current_state(TASK_UNINTERRUPTIBLE);
1711 do_usleep_range(min, max);
1712}
1713EXPORT_SYMBOL(usleep_range);