]>
Commit | Line | Data |
---|---|---|
15c84731 JF |
1 | /* |
2 | * Xen time implementation. | |
3 | * | |
4 | * This is implemented in terms of a clocksource driver which uses | |
5 | * the hypervisor clock as a nanosecond timebase, and a clockevent | |
6 | * driver which uses the hypervisor's timer mechanism. | |
7 | * | |
8 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 | |
9 | */ | |
10 | #include <linux/kernel.h> | |
11 | #include <linux/interrupt.h> | |
12 | #include <linux/clocksource.h> | |
13 | #include <linux/clockchips.h> | |
14 | ||
15 | #include <asm/xen/hypervisor.h> | |
16 | #include <asm/xen/hypercall.h> | |
17 | ||
18 | #include <xen/events.h> | |
19 | #include <xen/interface/xen.h> | |
20 | #include <xen/interface/vcpu.h> | |
21 | ||
22 | #include "xen-ops.h" | |
23 | ||
24 | #define XEN_SHIFT 22 | |
25 | ||
26 | /* Xen may fire a timer up to this many ns early */ | |
27 | #define TIMER_SLOP 100000 | |
28 | ||
29 | /* These are perodically updated in shared_info, and then copied here. */ | |
30 | struct shadow_time_info { | |
31 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | |
32 | u64 system_timestamp; /* Time, in nanosecs, since boot. */ | |
33 | u32 tsc_to_nsec_mul; | |
34 | int tsc_shift; | |
35 | u32 version; | |
36 | }; | |
37 | ||
38 | static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); | |
39 | ||
40 | unsigned long xen_cpu_khz(void) | |
41 | { | |
42 | u64 cpu_khz = 1000000ULL << 32; | |
43 | const struct vcpu_time_info *info = | |
44 | &HYPERVISOR_shared_info->vcpu_info[0].time; | |
45 | ||
46 | do_div(cpu_khz, info->tsc_to_system_mul); | |
47 | if (info->tsc_shift < 0) | |
48 | cpu_khz <<= -info->tsc_shift; | |
49 | else | |
50 | cpu_khz >>= info->tsc_shift; | |
51 | ||
52 | return cpu_khz; | |
53 | } | |
54 | ||
55 | /* | |
56 | * Reads a consistent set of time-base values from Xen, into a shadow data | |
57 | * area. | |
58 | */ | |
59 | static void get_time_values_from_xen(void) | |
60 | { | |
61 | struct vcpu_time_info *src; | |
62 | struct shadow_time_info *dst; | |
63 | ||
64 | preempt_disable(); | |
65 | ||
66 | /* src is shared memory with the hypervisor, so we need to | |
67 | make sure we get a consistent snapshot, even in the face of | |
68 | being preempted. */ | |
69 | src = &__get_cpu_var(xen_vcpu)->time; | |
70 | dst = &__get_cpu_var(shadow_time); | |
71 | ||
72 | do { | |
73 | dst->version = src->version; | |
74 | rmb(); /* fetch version before data */ | |
75 | dst->tsc_timestamp = src->tsc_timestamp; | |
76 | dst->system_timestamp = src->system_time; | |
77 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | |
78 | dst->tsc_shift = src->tsc_shift; | |
79 | rmb(); /* test version after fetching data */ | |
80 | } while ((src->version & 1) | (dst->version ^ src->version)); | |
81 | ||
82 | preempt_enable(); | |
83 | } | |
84 | ||
85 | /* | |
86 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | |
87 | * yielding a 64-bit result. | |
88 | */ | |
89 | static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) | |
90 | { | |
91 | u64 product; | |
92 | #ifdef __i386__ | |
93 | u32 tmp1, tmp2; | |
94 | #endif | |
95 | ||
96 | if (shift < 0) | |
97 | delta >>= -shift; | |
98 | else | |
99 | delta <<= shift; | |
100 | ||
101 | #ifdef __i386__ | |
102 | __asm__ ( | |
103 | "mul %5 ; " | |
104 | "mov %4,%%eax ; " | |
105 | "mov %%edx,%4 ; " | |
106 | "mul %5 ; " | |
107 | "xor %5,%5 ; " | |
108 | "add %4,%%eax ; " | |
109 | "adc %5,%%edx ; " | |
110 | : "=A" (product), "=r" (tmp1), "=r" (tmp2) | |
111 | : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); | |
112 | #elif __x86_64__ | |
113 | __asm__ ( | |
114 | "mul %%rdx ; shrd $32,%%rdx,%%rax" | |
115 | : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); | |
116 | #else | |
117 | #error implement me! | |
118 | #endif | |
119 | ||
120 | return product; | |
121 | } | |
122 | ||
123 | static u64 get_nsec_offset(struct shadow_time_info *shadow) | |
124 | { | |
125 | u64 now, delta; | |
126 | rdtscll(now); | |
127 | delta = now - shadow->tsc_timestamp; | |
128 | return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); | |
129 | } | |
130 | ||
131 | cycle_t xen_clocksource_read(void) | |
132 | { | |
133 | struct shadow_time_info *shadow = &get_cpu_var(shadow_time); | |
134 | cycle_t ret; | |
135 | ||
136 | get_time_values_from_xen(); | |
137 | ||
138 | ret = shadow->system_timestamp + get_nsec_offset(shadow); | |
139 | ||
140 | put_cpu_var(shadow_time); | |
141 | ||
142 | return ret; | |
143 | } | |
144 | ||
145 | static void xen_read_wallclock(struct timespec *ts) | |
146 | { | |
147 | const struct shared_info *s = HYPERVISOR_shared_info; | |
148 | u32 version; | |
149 | u64 delta; | |
150 | struct timespec now; | |
151 | ||
152 | /* get wallclock at system boot */ | |
153 | do { | |
154 | version = s->wc_version; | |
155 | rmb(); /* fetch version before time */ | |
156 | now.tv_sec = s->wc_sec; | |
157 | now.tv_nsec = s->wc_nsec; | |
158 | rmb(); /* fetch time before checking version */ | |
159 | } while ((s->wc_version & 1) | (version ^ s->wc_version)); | |
160 | ||
161 | delta = xen_clocksource_read(); /* time since system boot */ | |
162 | delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; | |
163 | ||
164 | now.tv_nsec = do_div(delta, NSEC_PER_SEC); | |
165 | now.tv_sec = delta; | |
166 | ||
167 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | |
168 | } | |
169 | ||
170 | unsigned long xen_get_wallclock(void) | |
171 | { | |
172 | struct timespec ts; | |
173 | ||
174 | xen_read_wallclock(&ts); | |
175 | ||
176 | return ts.tv_sec; | |
177 | } | |
178 | ||
179 | int xen_set_wallclock(unsigned long now) | |
180 | { | |
181 | /* do nothing for domU */ | |
182 | return -1; | |
183 | } | |
184 | ||
185 | static struct clocksource xen_clocksource __read_mostly = { | |
186 | .name = "xen", | |
187 | .rating = 400, | |
188 | .read = xen_clocksource_read, | |
189 | .mask = ~0, | |
190 | .mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */ | |
191 | .shift = XEN_SHIFT, | |
192 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | |
193 | }; | |
194 | ||
195 | /* | |
196 | Xen clockevent implementation | |
197 | ||
198 | Xen has two clockevent implementations: | |
199 | ||
200 | The old timer_op one works with all released versions of Xen prior | |
201 | to version 3.0.4. This version of the hypervisor provides a | |
202 | single-shot timer with nanosecond resolution. However, sharing the | |
203 | same event channel is a 100Hz tick which is delivered while the | |
204 | vcpu is running. We don't care about or use this tick, but it will | |
205 | cause the core time code to think the timer fired too soon, and | |
206 | will end up resetting it each time. It could be filtered, but | |
207 | doing so has complications when the ktime clocksource is not yet | |
208 | the xen clocksource (ie, at boot time). | |
209 | ||
210 | The new vcpu_op-based timer interface allows the tick timer period | |
211 | to be changed or turned off. The tick timer is not useful as a | |
212 | periodic timer because events are only delivered to running vcpus. | |
213 | The one-shot timer can report when a timeout is in the past, so | |
214 | set_next_event is capable of returning -ETIME when appropriate. | |
215 | This interface is used when available. | |
216 | */ | |
217 | ||
218 | ||
219 | /* | |
220 | Get a hypervisor absolute time. In theory we could maintain an | |
221 | offset between the kernel's time and the hypervisor's time, and | |
222 | apply that to a kernel's absolute timeout. Unfortunately the | |
223 | hypervisor and kernel times can drift even if the kernel is using | |
224 | the Xen clocksource, because ntp can warp the kernel's clocksource. | |
225 | */ | |
226 | static s64 get_abs_timeout(unsigned long delta) | |
227 | { | |
228 | return xen_clocksource_read() + delta; | |
229 | } | |
230 | ||
231 | static void xen_timerop_set_mode(enum clock_event_mode mode, | |
232 | struct clock_event_device *evt) | |
233 | { | |
234 | switch (mode) { | |
235 | case CLOCK_EVT_MODE_PERIODIC: | |
236 | /* unsupported */ | |
237 | WARN_ON(1); | |
238 | break; | |
239 | ||
240 | case CLOCK_EVT_MODE_ONESHOT: | |
241 | break; | |
242 | ||
243 | case CLOCK_EVT_MODE_UNUSED: | |
244 | case CLOCK_EVT_MODE_SHUTDOWN: | |
245 | HYPERVISOR_set_timer_op(0); /* cancel timeout */ | |
246 | break; | |
247 | } | |
248 | } | |
249 | ||
250 | static int xen_timerop_set_next_event(unsigned long delta, | |
251 | struct clock_event_device *evt) | |
252 | { | |
253 | WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); | |
254 | ||
255 | if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0) | |
256 | BUG(); | |
257 | ||
258 | /* We may have missed the deadline, but there's no real way of | |
259 | knowing for sure. If the event was in the past, then we'll | |
260 | get an immediate interrupt. */ | |
261 | ||
262 | return 0; | |
263 | } | |
264 | ||
265 | static const struct clock_event_device xen_timerop_clockevent = { | |
266 | .name = "xen", | |
267 | .features = CLOCK_EVT_FEAT_ONESHOT, | |
268 | ||
269 | .max_delta_ns = 0xffffffff, | |
270 | .min_delta_ns = TIMER_SLOP, | |
271 | ||
272 | .mult = 1, | |
273 | .shift = 0, | |
274 | .rating = 500, | |
275 | ||
276 | .set_mode = xen_timerop_set_mode, | |
277 | .set_next_event = xen_timerop_set_next_event, | |
278 | }; | |
279 | ||
280 | ||
281 | ||
282 | static void xen_vcpuop_set_mode(enum clock_event_mode mode, | |
283 | struct clock_event_device *evt) | |
284 | { | |
285 | int cpu = smp_processor_id(); | |
286 | ||
287 | switch (mode) { | |
288 | case CLOCK_EVT_MODE_PERIODIC: | |
289 | WARN_ON(1); /* unsupported */ | |
290 | break; | |
291 | ||
292 | case CLOCK_EVT_MODE_ONESHOT: | |
293 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) | |
294 | BUG(); | |
295 | break; | |
296 | ||
297 | case CLOCK_EVT_MODE_UNUSED: | |
298 | case CLOCK_EVT_MODE_SHUTDOWN: | |
299 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) || | |
300 | HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) | |
301 | BUG(); | |
302 | break; | |
303 | } | |
304 | } | |
305 | ||
306 | static int xen_vcpuop_set_next_event(unsigned long delta, | |
307 | struct clock_event_device *evt) | |
308 | { | |
309 | int cpu = smp_processor_id(); | |
310 | struct vcpu_set_singleshot_timer single; | |
311 | int ret; | |
312 | ||
313 | WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); | |
314 | ||
315 | single.timeout_abs_ns = get_abs_timeout(delta); | |
316 | single.flags = VCPU_SSHOTTMR_future; | |
317 | ||
318 | ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single); | |
319 | ||
320 | BUG_ON(ret != 0 && ret != -ETIME); | |
321 | ||
322 | return ret; | |
323 | } | |
324 | ||
325 | static const struct clock_event_device xen_vcpuop_clockevent = { | |
326 | .name = "xen", | |
327 | .features = CLOCK_EVT_FEAT_ONESHOT, | |
328 | ||
329 | .max_delta_ns = 0xffffffff, | |
330 | .min_delta_ns = TIMER_SLOP, | |
331 | ||
332 | .mult = 1, | |
333 | .shift = 0, | |
334 | .rating = 500, | |
335 | ||
336 | .set_mode = xen_vcpuop_set_mode, | |
337 | .set_next_event = xen_vcpuop_set_next_event, | |
338 | }; | |
339 | ||
340 | static const struct clock_event_device *xen_clockevent = | |
341 | &xen_timerop_clockevent; | |
342 | static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events); | |
343 | ||
344 | static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) | |
345 | { | |
346 | struct clock_event_device *evt = &__get_cpu_var(xen_clock_events); | |
347 | irqreturn_t ret; | |
348 | ||
349 | ret = IRQ_NONE; | |
350 | if (evt->event_handler) { | |
351 | evt->event_handler(evt); | |
352 | ret = IRQ_HANDLED; | |
353 | } | |
354 | ||
355 | return ret; | |
356 | } | |
357 | ||
358 | static void xen_setup_timer(int cpu) | |
359 | { | |
360 | const char *name; | |
361 | struct clock_event_device *evt; | |
362 | int irq; | |
363 | ||
364 | printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); | |
365 | ||
366 | name = kasprintf(GFP_KERNEL, "timer%d", cpu); | |
367 | if (!name) | |
368 | name = "<timer kasprintf failed>"; | |
369 | ||
370 | irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, | |
371 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | |
372 | name, NULL); | |
373 | ||
374 | evt = &get_cpu_var(xen_clock_events); | |
375 | memcpy(evt, xen_clockevent, sizeof(*evt)); | |
376 | ||
377 | evt->cpumask = cpumask_of_cpu(cpu); | |
378 | evt->irq = irq; | |
379 | clockevents_register_device(evt); | |
380 | ||
381 | put_cpu_var(xen_clock_events); | |
382 | } | |
383 | ||
384 | __init void xen_time_init(void) | |
385 | { | |
386 | int cpu = smp_processor_id(); | |
387 | ||
388 | get_time_values_from_xen(); | |
389 | ||
390 | clocksource_register(&xen_clocksource); | |
391 | ||
392 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { | |
393 | /* Successfully turned off 100hz tick, so we have the | |
394 | vcpuop-based timer interface */ | |
395 | printk(KERN_DEBUG "Xen: using vcpuop timer interface\n"); | |
396 | xen_clockevent = &xen_vcpuop_clockevent; | |
397 | } | |
398 | ||
399 | /* Set initial system time with full resolution */ | |
400 | xen_read_wallclock(&xtime); | |
401 | set_normalized_timespec(&wall_to_monotonic, | |
402 | -xtime.tv_sec, -xtime.tv_nsec); | |
403 | ||
404 | tsc_disable = 0; | |
405 | ||
406 | xen_setup_timer(cpu); | |
407 | } |