]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - arch/x86/xen/time.c
xen/time: Don't leak interrupt name when offlining.
[mirror_ubuntu-zesty-kernel.git] / arch / x86 / xen / time.c
CommitLineData
15c84731
JF
1/*
2 * Xen time implementation.
3 *
4 * This is implemented in terms of a clocksource driver which uses
5 * the hypervisor clock as a nanosecond timebase, and a clockevent
6 * driver which uses the hypervisor's timer mechanism.
7 *
8 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
9 */
10#include <linux/kernel.h>
11#include <linux/interrupt.h>
12#include <linux/clocksource.h>
13#include <linux/clockchips.h>
f91a8b44 14#include <linux/kernel_stat.h>
f595ec96 15#include <linux/math64.h>
5a0e3ad6 16#include <linux/gfp.h>
c9d76a24 17#include <linux/slab.h>
15c84731 18
1c7b67f7 19#include <asm/pvclock.h>
15c84731
JF
20#include <asm/xen/hypervisor.h>
21#include <asm/xen/hypercall.h>
22
23#include <xen/events.h>
409771d2 24#include <xen/features.h>
15c84731
JF
25#include <xen/interface/xen.h>
26#include <xen/interface/vcpu.h>
27
28#include "xen-ops.h"
29
15c84731
JF
30/* Xen may fire a timer up to this many ns early */
31#define TIMER_SLOP 100000
f91a8b44 32#define NS_PER_TICK (1000000000LL / HZ)
15c84731 33
f91a8b44 34/* runstate info updated by Xen */
c6e22f9e 35static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
f91a8b44
JF
36
37/* snapshots of runstate info */
c6e22f9e 38static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
f91a8b44
JF
39
40/* unused ns of stolen and blocked time */
c6e22f9e
TH
41static DEFINE_PER_CPU(u64, xen_residual_stolen);
42static DEFINE_PER_CPU(u64, xen_residual_blocked);
f91a8b44
JF
43
44/* return an consistent snapshot of 64-bit time/counter value */
45static u64 get64(const u64 *p)
46{
47 u64 ret;
48
49 if (BITS_PER_LONG < 64) {
50 u32 *p32 = (u32 *)p;
51 u32 h, l;
52
53 /*
54 * Read high then low, and then make sure high is
55 * still the same; this will only loop if low wraps
56 * and carries into high.
57 * XXX some clean way to make this endian-proof?
58 */
59 do {
60 h = p32[1];
61 barrier();
62 l = p32[0];
63 barrier();
64 } while (p32[1] != h);
65
66 ret = (((u64)h) << 32) | l;
67 } else
68 ret = *p;
69
70 return ret;
71}
72
73/*
74 * Runstate accounting
75 */
76static void get_runstate_snapshot(struct vcpu_runstate_info *res)
77{
78 u64 state_time;
79 struct vcpu_runstate_info *state;
80
f120f13e 81 BUG_ON(preemptible());
f91a8b44 82
c6e22f9e 83 state = &__get_cpu_var(xen_runstate);
f91a8b44
JF
84
85 /*
86 * The runstate info is always updated by the hypervisor on
87 * the current CPU, so there's no need to use anything
88 * stronger than a compiler barrier when fetching it.
89 */
90 do {
91 state_time = get64(&state->state_entry_time);
92 barrier();
93 *res = *state;
94 barrier();
95 } while (get64(&state->state_entry_time) != state_time);
f91a8b44
JF
96}
97
f0d73394
JF
98/* return true when a vcpu could run but has no real cpu to run on */
99bool xen_vcpu_stolen(int vcpu)
100{
c6e22f9e 101 return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
f0d73394
JF
102}
103
be012920 104void xen_setup_runstate_info(int cpu)
f91a8b44
JF
105{
106 struct vcpu_register_runstate_memory_area area;
107
c6e22f9e 108 area.addr.v = &per_cpu(xen_runstate, cpu);
f91a8b44
JF
109
110 if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
111 cpu, &area))
112 BUG();
113}
114
115static void do_stolen_accounting(void)
116{
117 struct vcpu_runstate_info state;
118 struct vcpu_runstate_info *snap;
119 s64 blocked, runnable, offline, stolen;
120 cputime_t ticks;
121
122 get_runstate_snapshot(&state);
123
124 WARN_ON(state.state != RUNSTATE_running);
125
c6e22f9e 126 snap = &__get_cpu_var(xen_runstate_snapshot);
f91a8b44
JF
127
128 /* work out how much time the VCPU has not been runn*ing* */
129 blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
130 runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
131 offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
132
133 *snap = state;
134
135 /* Add the appropriate number of ticks of stolen time,
79741dd3 136 including any left-overs from last time. */
780f36d8 137 stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
f91a8b44
JF
138
139 if (stolen < 0)
140 stolen = 0;
141
f595ec96 142 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
780f36d8 143 __this_cpu_write(xen_residual_stolen, stolen);
79741dd3 144 account_steal_ticks(ticks);
f91a8b44
JF
145
146 /* Add the appropriate number of ticks of blocked time,
79741dd3 147 including any left-overs from last time. */
780f36d8 148 blocked += __this_cpu_read(xen_residual_blocked);
f91a8b44
JF
149
150 if (blocked < 0)
151 blocked = 0;
152
f595ec96 153 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
780f36d8 154 __this_cpu_write(xen_residual_blocked, blocked);
79741dd3 155 account_idle_ticks(ticks);
f91a8b44
JF
156}
157
e93ef949 158/* Get the TSC speed from Xen */
409771d2 159static unsigned long xen_tsc_khz(void)
15c84731 160{
3807f345 161 struct pvclock_vcpu_time_info *info =
15c84731
JF
162 &HYPERVISOR_shared_info->vcpu_info[0].time;
163
3807f345 164 return pvclock_tsc_khz(info);
15c84731
JF
165}
166
ee7686bc 167cycle_t xen_clocksource_read(void)
15c84731 168{
1c7b67f7 169 struct pvclock_vcpu_time_info *src;
15c84731 170 cycle_t ret;
15c84731 171
f1c39625
JF
172 preempt_disable_notrace();
173 src = &__get_cpu_var(xen_vcpu)->time;
1c7b67f7 174 ret = pvclock_clocksource_read(src);
f1c39625 175 preempt_enable_notrace();
15c84731
JF
176 return ret;
177}
178
8e19608e
MD
179static cycle_t xen_clocksource_get_cycles(struct clocksource *cs)
180{
181 return xen_clocksource_read();
182}
183
15c84731
JF
184static void xen_read_wallclock(struct timespec *ts)
185{
1c7b67f7
GH
186 struct shared_info *s = HYPERVISOR_shared_info;
187 struct pvclock_wall_clock *wall_clock = &(s->wc);
188 struct pvclock_vcpu_time_info *vcpu_time;
15c84731 189
1c7b67f7
GH
190 vcpu_time = &get_cpu_var(xen_vcpu)->time;
191 pvclock_read_wallclock(wall_clock, vcpu_time, ts);
192 put_cpu_var(xen_vcpu);
15c84731
JF
193}
194
409771d2 195static unsigned long xen_get_wallclock(void)
15c84731
JF
196{
197 struct timespec ts;
198
199 xen_read_wallclock(&ts);
15c84731
JF
200 return ts.tv_sec;
201}
202
409771d2 203static int xen_set_wallclock(unsigned long now)
15c84731 204{
fdb9eb9f
JF
205 struct xen_platform_op op;
206 int rc;
207
15c84731 208 /* do nothing for domU */
fdb9eb9f
JF
209 if (!xen_initial_domain())
210 return -1;
211
212 op.cmd = XENPF_settime;
213 op.u.settime.secs = now;
214 op.u.settime.nsecs = 0;
215 op.u.settime.system_time = xen_clocksource_read();
216
217 rc = HYPERVISOR_dom0_op(&op);
218 WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now);
219
220 return rc;
15c84731
JF
221}
222
223static struct clocksource xen_clocksource __read_mostly = {
224 .name = "xen",
225 .rating = 400,
8e19608e 226 .read = xen_clocksource_get_cycles,
15c84731 227 .mask = ~0,
15c84731
JF
228 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
229};
230
231/*
232 Xen clockevent implementation
233
234 Xen has two clockevent implementations:
235
236 The old timer_op one works with all released versions of Xen prior
237 to version 3.0.4. This version of the hypervisor provides a
238 single-shot timer with nanosecond resolution. However, sharing the
239 same event channel is a 100Hz tick which is delivered while the
240 vcpu is running. We don't care about or use this tick, but it will
241 cause the core time code to think the timer fired too soon, and
242 will end up resetting it each time. It could be filtered, but
243 doing so has complications when the ktime clocksource is not yet
244 the xen clocksource (ie, at boot time).
245
246 The new vcpu_op-based timer interface allows the tick timer period
247 to be changed or turned off. The tick timer is not useful as a
248 periodic timer because events are only delivered to running vcpus.
249 The one-shot timer can report when a timeout is in the past, so
250 set_next_event is capable of returning -ETIME when appropriate.
251 This interface is used when available.
252*/
253
254
255/*
256 Get a hypervisor absolute time. In theory we could maintain an
257 offset between the kernel's time and the hypervisor's time, and
258 apply that to a kernel's absolute timeout. Unfortunately the
259 hypervisor and kernel times can drift even if the kernel is using
260 the Xen clocksource, because ntp can warp the kernel's clocksource.
261*/
262static s64 get_abs_timeout(unsigned long delta)
263{
264 return xen_clocksource_read() + delta;
265}
266
267static void xen_timerop_set_mode(enum clock_event_mode mode,
268 struct clock_event_device *evt)
269{
270 switch (mode) {
271 case CLOCK_EVT_MODE_PERIODIC:
272 /* unsupported */
273 WARN_ON(1);
274 break;
275
276 case CLOCK_EVT_MODE_ONESHOT:
18de5bc4 277 case CLOCK_EVT_MODE_RESUME:
15c84731
JF
278 break;
279
280 case CLOCK_EVT_MODE_UNUSED:
281 case CLOCK_EVT_MODE_SHUTDOWN:
282 HYPERVISOR_set_timer_op(0); /* cancel timeout */
283 break;
284 }
285}
286
287static int xen_timerop_set_next_event(unsigned long delta,
288 struct clock_event_device *evt)
289{
290 WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
291
292 if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
293 BUG();
294
295 /* We may have missed the deadline, but there's no real way of
296 knowing for sure. If the event was in the past, then we'll
297 get an immediate interrupt. */
298
299 return 0;
300}
301
302static const struct clock_event_device xen_timerop_clockevent = {
303 .name = "xen",
304 .features = CLOCK_EVT_FEAT_ONESHOT,
305
306 .max_delta_ns = 0xffffffff,
307 .min_delta_ns = TIMER_SLOP,
308
309 .mult = 1,
310 .shift = 0,
311 .rating = 500,
312
313 .set_mode = xen_timerop_set_mode,
314 .set_next_event = xen_timerop_set_next_event,
315};
316
317
318
319static void xen_vcpuop_set_mode(enum clock_event_mode mode,
320 struct clock_event_device *evt)
321{
322 int cpu = smp_processor_id();
323
324 switch (mode) {
325 case CLOCK_EVT_MODE_PERIODIC:
326 WARN_ON(1); /* unsupported */
327 break;
328
329 case CLOCK_EVT_MODE_ONESHOT:
330 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
331 BUG();
332 break;
333
334 case CLOCK_EVT_MODE_UNUSED:
335 case CLOCK_EVT_MODE_SHUTDOWN:
336 if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
337 HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
338 BUG();
339 break;
18de5bc4
TG
340 case CLOCK_EVT_MODE_RESUME:
341 break;
15c84731
JF
342 }
343}
344
345static int xen_vcpuop_set_next_event(unsigned long delta,
346 struct clock_event_device *evt)
347{
348 int cpu = smp_processor_id();
349 struct vcpu_set_singleshot_timer single;
350 int ret;
351
352 WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
353
354 single.timeout_abs_ns = get_abs_timeout(delta);
355 single.flags = VCPU_SSHOTTMR_future;
356
357 ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
358
359 BUG_ON(ret != 0 && ret != -ETIME);
360
361 return ret;
362}
363
364static const struct clock_event_device xen_vcpuop_clockevent = {
365 .name = "xen",
366 .features = CLOCK_EVT_FEAT_ONESHOT,
367
368 .max_delta_ns = 0xffffffff,
369 .min_delta_ns = TIMER_SLOP,
370
371 .mult = 1,
372 .shift = 0,
373 .rating = 500,
374
375 .set_mode = xen_vcpuop_set_mode,
376 .set_next_event = xen_vcpuop_set_next_event,
377};
378
379static const struct clock_event_device *xen_clockevent =
380 &xen_timerop_clockevent;
31620a19
KRW
381
382struct xen_clock_event_device {
383 struct clock_event_device evt;
384 char *name;
385};
386static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
15c84731
JF
387
388static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
389{
31620a19 390 struct clock_event_device *evt = &__get_cpu_var(xen_clock_events).evt;
15c84731
JF
391 irqreturn_t ret;
392
393 ret = IRQ_NONE;
394 if (evt->event_handler) {
395 evt->event_handler(evt);
396 ret = IRQ_HANDLED;
397 }
398
f91a8b44
JF
399 do_stolen_accounting();
400
15c84731
JF
401 return ret;
402}
403
f87e4cac 404void xen_setup_timer(int cpu)
15c84731 405{
c9d76a24 406 char *name;
15c84731
JF
407 struct clock_event_device *evt;
408 int irq;
409
31620a19 410 evt = &per_cpu(xen_clock_events, cpu).evt;
ef35a4e6
KRW
411 WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
412
15c84731
JF
413 printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
414
415 name = kasprintf(GFP_KERNEL, "timer%d", cpu);
416 if (!name)
417 name = "<timer kasprintf failed>";
418
419 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
f611f2da
IC
420 IRQF_DISABLED|IRQF_PERCPU|
421 IRQF_NOBALANCING|IRQF_TIMER|
422 IRQF_FORCE_RESUME,
15c84731
JF
423 name, NULL);
424
15c84731
JF
425 memcpy(evt, xen_clockevent, sizeof(*evt));
426
320ab2b0 427 evt->cpumask = cpumask_of(cpu);
15c84731 428 evt->irq = irq;
c9d76a24 429 per_cpu(xen_clock_events, cpu).name = name;
f87e4cac
JF
430}
431
d68d82af
AN
432void xen_teardown_timer(int cpu)
433{
434 struct clock_event_device *evt;
435 BUG_ON(cpu == 0);
31620a19 436 evt = &per_cpu(xen_clock_events, cpu).evt;
d68d82af 437 unbind_from_irqhandler(evt->irq, NULL);
ef35a4e6 438 evt->irq = -1;
c9d76a24
KRW
439 kfree(per_cpu(xen_clock_events, cpu).name);
440 per_cpu(xen_clock_events, cpu).name = NULL;
d68d82af
AN
441}
442
f87e4cac
JF
443void xen_setup_cpu_clockevents(void)
444{
445 BUG_ON(preemptible());
f91a8b44 446
31620a19 447 clockevents_register_device(&__get_cpu_var(xen_clock_events).evt);
15c84731
JF
448}
449
d07af1f0
JF
450void xen_timer_resume(void)
451{
452 int cpu;
453
e7a3481c
JF
454 pvclock_resume();
455
d07af1f0
JF
456 if (xen_clockevent != &xen_vcpuop_clockevent)
457 return;
458
459 for_each_online_cpu(cpu) {
460 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
461 BUG();
462 }
463}
464
fb6ce5de 465static const struct pv_time_ops xen_time_ops __initconst = {
ca50a5f3 466 .sched_clock = xen_clocksource_read,
409771d2
SS
467};
468
fb6ce5de 469static void __init xen_time_init(void)
15c84731
JF
470{
471 int cpu = smp_processor_id();
c4507257 472 struct timespec tp;
15c84731 473
b01cc1b0 474 clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
15c84731
JF
475
476 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
f91a8b44 477 /* Successfully turned off 100Hz tick, so we have the
15c84731
JF
478 vcpuop-based timer interface */
479 printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
480 xen_clockevent = &xen_vcpuop_clockevent;
481 }
482
483 /* Set initial system time with full resolution */
c4507257
JS
484 xen_read_wallclock(&tp);
485 do_settimeofday(&tp);
15c84731 486
404ee5b1 487 setup_force_cpu_cap(X86_FEATURE_TSC);
15c84731 488
be012920 489 xen_setup_runstate_info(cpu);
15c84731 490 xen_setup_timer(cpu);
f87e4cac 491 xen_setup_cpu_clockevents();
15c84731 492}
409771d2 493
fb6ce5de 494void __init xen_init_time_ops(void)
409771d2
SS
495{
496 pv_time_ops = xen_time_ops;
497
498 x86_init.timers.timer_init = xen_time_init;
499 x86_init.timers.setup_percpu_clockev = x86_init_noop;
500 x86_cpuinit.setup_percpu_clockev = x86_init_noop;
501
502 x86_platform.calibrate_tsc = xen_tsc_khz;
503 x86_platform.get_wallclock = xen_get_wallclock;
504 x86_platform.set_wallclock = xen_set_wallclock;
505}
506
ca65f9fc 507#ifdef CONFIG_XEN_PVHVM
409771d2
SS
508static void xen_hvm_setup_cpu_clockevents(void)
509{
510 int cpu = smp_processor_id();
511 xen_setup_runstate_info(cpu);
7918c92a
KRW
512 /*
513 * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence
514 * doing it xen_hvm_cpu_notify (which gets called by smp_init during
515 * early bootup and also during CPU hotplug events).
516 */
409771d2
SS
517 xen_setup_cpu_clockevents();
518}
519
fb6ce5de 520void __init xen_hvm_init_time_ops(void)
409771d2
SS
521{
522 /* vector callback is needed otherwise we cannot receive interrupts
31e7e931
SS
523 * on cpu > 0 and at this point we don't know how many cpus are
524 * available */
525 if (!xen_have_vector_callback)
409771d2
SS
526 return;
527 if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
528 printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
529 "disable pv timer\n");
530 return;
531 }
532
533 pv_time_ops = xen_time_ops;
534 x86_init.timers.setup_percpu_clockev = xen_time_init;
535 x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
536
537 x86_platform.calibrate_tsc = xen_tsc_khz;
538 x86_platform.get_wallclock = xen_get_wallclock;
539 x86_platform.set_wallclock = xen_set_wallclock;
540}
ca65f9fc 541#endif