]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 2012 ARM Ltd. | |
3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of the GNU General Public License version 2 as | |
7 | * published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 | */ | |
18 | ||
19 | #include <linux/cpu.h> | |
20 | #include <linux/of_irq.h> | |
21 | #include <linux/kvm.h> | |
22 | #include <linux/kvm_host.h> | |
23 | #include <linux/interrupt.h> | |
24 | ||
25 | #include <clocksource/arm_arch_timer.h> | |
26 | #include <asm/arch_timer.h> | |
27 | ||
28 | #include <kvm/arm_vgic.h> | |
29 | #include <kvm/arm_arch_timer.h> | |
30 | ||
31 | #include "trace.h" | |
32 | ||
33 | static struct timecounter *timecounter; | |
34 | static struct workqueue_struct *wqueue; | |
35 | static unsigned int host_vtimer_irq; | |
36 | ||
37 | void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | |
38 | { | |
39 | vcpu->arch.timer_cpu.active_cleared_last = false; | |
40 | } | |
41 | ||
42 | static cycle_t kvm_phys_timer_read(void) | |
43 | { | |
44 | return timecounter->cc->read(timecounter->cc); | |
45 | } | |
46 | ||
47 | static bool timer_is_armed(struct arch_timer_cpu *timer) | |
48 | { | |
49 | return timer->armed; | |
50 | } | |
51 | ||
52 | /* timer_arm: as in "arm the timer", not as in ARM the company */ | |
53 | static void timer_arm(struct arch_timer_cpu *timer, u64 ns) | |
54 | { | |
55 | timer->armed = true; | |
56 | hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns), | |
57 | HRTIMER_MODE_ABS); | |
58 | } | |
59 | ||
60 | static void timer_disarm(struct arch_timer_cpu *timer) | |
61 | { | |
62 | if (timer_is_armed(timer)) { | |
63 | hrtimer_cancel(&timer->timer); | |
64 | cancel_work_sync(&timer->expired); | |
65 | timer->armed = false; | |
66 | } | |
67 | } | |
68 | ||
69 | static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) | |
70 | { | |
71 | struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; | |
72 | ||
73 | /* | |
74 | * We disable the timer in the world switch and let it be | |
75 | * handled by kvm_timer_sync_hwstate(). Getting a timer | |
76 | * interrupt at this point is a sure sign of some major | |
77 | * breakage. | |
78 | */ | |
79 | pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu); | |
80 | return IRQ_HANDLED; | |
81 | } | |
82 | ||
83 | /* | |
84 | * Work function for handling the backup timer that we schedule when a vcpu is | |
85 | * no longer running, but had a timer programmed to fire in the future. | |
86 | */ | |
87 | static void kvm_timer_inject_irq_work(struct work_struct *work) | |
88 | { | |
89 | struct kvm_vcpu *vcpu; | |
90 | ||
91 | vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); | |
92 | vcpu->arch.timer_cpu.armed = false; | |
93 | ||
94 | WARN_ON(!kvm_timer_should_fire(vcpu)); | |
95 | ||
96 | /* | |
97 | * If the vcpu is blocked we want to wake it up so that it will see | |
98 | * the timer has expired when entering the guest. | |
99 | */ | |
100 | kvm_vcpu_kick(vcpu); | |
101 | } | |
102 | ||
103 | static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) | |
104 | { | |
105 | cycle_t cval, now; | |
106 | ||
107 | cval = vcpu->arch.timer_cpu.cntv_cval; | |
108 | now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; | |
109 | ||
110 | if (now < cval) { | |
111 | u64 ns; | |
112 | ||
113 | ns = cyclecounter_cyc2ns(timecounter->cc, | |
114 | cval - now, | |
115 | timecounter->mask, | |
116 | &timecounter->frac); | |
117 | return ns; | |
118 | } | |
119 | ||
120 | return 0; | |
121 | } | |
122 | ||
123 | static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) | |
124 | { | |
125 | struct arch_timer_cpu *timer; | |
126 | struct kvm_vcpu *vcpu; | |
127 | u64 ns; | |
128 | ||
129 | timer = container_of(hrt, struct arch_timer_cpu, timer); | |
130 | vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); | |
131 | ||
132 | /* | |
133 | * Check that the timer has really expired from the guest's | |
134 | * PoV (NTP on the host may have forced it to expire | |
135 | * early). If we should have slept longer, restart it. | |
136 | */ | |
137 | ns = kvm_timer_compute_delta(vcpu); | |
138 | if (unlikely(ns)) { | |
139 | hrtimer_forward_now(hrt, ns_to_ktime(ns)); | |
140 | return HRTIMER_RESTART; | |
141 | } | |
142 | ||
143 | queue_work(wqueue, &timer->expired); | |
144 | return HRTIMER_NORESTART; | |
145 | } | |
146 | ||
147 | static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) | |
148 | { | |
149 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
150 | ||
151 | return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && | |
152 | (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); | |
153 | } | |
154 | ||
155 | bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) | |
156 | { | |
157 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
158 | cycle_t cval, now; | |
159 | ||
160 | if (!kvm_timer_irq_can_fire(vcpu)) | |
161 | return false; | |
162 | ||
163 | cval = timer->cntv_cval; | |
164 | now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; | |
165 | ||
166 | return cval <= now; | |
167 | } | |
168 | ||
169 | static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) | |
170 | { | |
171 | int ret; | |
172 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
173 | ||
174 | BUG_ON(!vgic_initialized(vcpu->kvm)); | |
175 | ||
176 | timer->active_cleared_last = false; | |
177 | timer->irq.level = new_level; | |
178 | trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, | |
179 | timer->irq.level); | |
180 | ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, | |
181 | timer->map, | |
182 | timer->irq.level); | |
183 | WARN_ON(ret); | |
184 | } | |
185 | ||
186 | /* | |
187 | * Check if there was a change in the timer state (should we raise or lower | |
188 | * the line level to the GIC). | |
189 | */ | |
190 | static int kvm_timer_update_state(struct kvm_vcpu *vcpu) | |
191 | { | |
192 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
193 | ||
194 | /* | |
195 | * If userspace modified the timer registers via SET_ONE_REG before | |
196 | * the vgic was initialized, we mustn't set the timer->irq.level value | |
197 | * because the guest would never see the interrupt. Instead wait | |
198 | * until we call this function from kvm_timer_flush_hwstate. | |
199 | */ | |
200 | if (!vgic_initialized(vcpu->kvm)) | |
201 | return -ENODEV; | |
202 | ||
203 | if (kvm_timer_should_fire(vcpu) != timer->irq.level) | |
204 | kvm_timer_update_irq(vcpu, !timer->irq.level); | |
205 | ||
206 | return 0; | |
207 | } | |
208 | ||
209 | /* | |
210 | * Schedule the background timer before calling kvm_vcpu_block, so that this | |
211 | * thread is removed from its waitqueue and made runnable when there's a timer | |
212 | * interrupt to handle. | |
213 | */ | |
214 | void kvm_timer_schedule(struct kvm_vcpu *vcpu) | |
215 | { | |
216 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
217 | ||
218 | BUG_ON(timer_is_armed(timer)); | |
219 | ||
220 | /* | |
221 | * No need to schedule a background timer if the guest timer has | |
222 | * already expired, because kvm_vcpu_block will return before putting | |
223 | * the thread to sleep. | |
224 | */ | |
225 | if (kvm_timer_should_fire(vcpu)) | |
226 | return; | |
227 | ||
228 | /* | |
229 | * If the timer is not capable of raising interrupts (disabled or | |
230 | * masked), then there's no more work for us to do. | |
231 | */ | |
232 | if (!kvm_timer_irq_can_fire(vcpu)) | |
233 | return; | |
234 | ||
235 | /* The timer has not yet expired, schedule a background timer */ | |
236 | timer_arm(timer, kvm_timer_compute_delta(vcpu)); | |
237 | } | |
238 | ||
239 | void kvm_timer_unschedule(struct kvm_vcpu *vcpu) | |
240 | { | |
241 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
242 | timer_disarm(timer); | |
243 | } | |
244 | ||
245 | /** | |
246 | * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu | |
247 | * @vcpu: The vcpu pointer | |
248 | * | |
249 | * Check if the virtual timer has expired while we were running in the host, | |
250 | * and inject an interrupt if that was the case. | |
251 | */ | |
252 | void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) | |
253 | { | |
254 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
255 | bool phys_active; | |
256 | int ret; | |
257 | ||
258 | if (kvm_timer_update_state(vcpu)) | |
259 | return; | |
260 | ||
261 | /* | |
262 | * If we enter the guest with the virtual input level to the VGIC | |
263 | * asserted, then we have already told the VGIC what we need to, and | |
264 | * we don't need to exit from the guest until the guest deactivates | |
265 | * the already injected interrupt, so therefore we should set the | |
266 | * hardware active state to prevent unnecessary exits from the guest. | |
267 | * | |
268 | * Also, if we enter the guest with the virtual timer interrupt active, | |
269 | * then it must be active on the physical distributor, because we set | |
270 | * the HW bit and the guest must be able to deactivate the virtual and | |
271 | * physical interrupt at the same time. | |
272 | * | |
273 | * Conversely, if the virtual input level is deasserted and the virtual | |
274 | * interrupt is not active, then always clear the hardware active state | |
275 | * to ensure that hardware interrupts from the timer triggers a guest | |
276 | * exit. | |
277 | */ | |
278 | if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map)) | |
279 | phys_active = true; | |
280 | else | |
281 | phys_active = false; | |
282 | ||
283 | /* | |
284 | * We want to avoid hitting the (re)distributor as much as | |
285 | * possible, as this is a potentially expensive MMIO access | |
286 | * (not to mention locks in the irq layer), and a solution for | |
287 | * this is to cache the "active" state in memory. | |
288 | * | |
289 | * Things to consider: we cannot cache an "active set" state, | |
290 | * because the HW can change this behind our back (it becomes | |
291 | * "clear" in the HW). We must then restrict the caching to | |
292 | * the "clear" state. | |
293 | * | |
294 | * The cache is invalidated on: | |
295 | * - vcpu put, indicating that the HW cannot be trusted to be | |
296 | * in a sane state on the next vcpu load, | |
297 | * - any change in the interrupt state | |
298 | * | |
299 | * Usage conditions: | |
300 | * - cached value is "active clear" | |
301 | * - value to be programmed is "active clear" | |
302 | */ | |
303 | if (timer->active_cleared_last && !phys_active) | |
304 | return; | |
305 | ||
306 | ret = irq_set_irqchip_state(timer->map->irq, | |
307 | IRQCHIP_STATE_ACTIVE, | |
308 | phys_active); | |
309 | WARN_ON(ret); | |
310 | ||
311 | timer->active_cleared_last = !phys_active; | |
312 | } | |
313 | ||
314 | /** | |
315 | * kvm_timer_sync_hwstate - sync timer state from cpu | |
316 | * @vcpu: The vcpu pointer | |
317 | * | |
318 | * Check if the virtual timer has expired while we were running in the guest, | |
319 | * and inject an interrupt if that was the case. | |
320 | */ | |
321 | void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | |
322 | { | |
323 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
324 | ||
325 | BUG_ON(timer_is_armed(timer)); | |
326 | ||
327 | /* | |
328 | * The guest could have modified the timer registers or the timer | |
329 | * could have expired, update the timer state. | |
330 | */ | |
331 | kvm_timer_update_state(vcpu); | |
332 | } | |
333 | ||
334 | int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, | |
335 | const struct kvm_irq_level *irq) | |
336 | { | |
337 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
338 | struct irq_phys_map *map; | |
339 | ||
340 | /* | |
341 | * The vcpu timer irq number cannot be determined in | |
342 | * kvm_timer_vcpu_init() because it is called much before | |
343 | * kvm_vcpu_set_target(). To handle this, we determine | |
344 | * vcpu timer irq number when the vcpu is reset. | |
345 | */ | |
346 | timer->irq.irq = irq->irq; | |
347 | ||
348 | /* | |
349 | * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 | |
350 | * and to 0 for ARMv7. We provide an implementation that always | |
351 | * resets the timer to be disabled and unmasked and is compliant with | |
352 | * the ARMv7 architecture. | |
353 | */ | |
354 | timer->cntv_ctl = 0; | |
355 | kvm_timer_update_state(vcpu); | |
356 | ||
357 | /* | |
358 | * Tell the VGIC that the virtual interrupt is tied to a | |
359 | * physical interrupt. We do that once per VCPU. | |
360 | */ | |
361 | map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq); | |
362 | if (WARN_ON(IS_ERR(map))) | |
363 | return PTR_ERR(map); | |
364 | ||
365 | timer->map = map; | |
366 | return 0; | |
367 | } | |
368 | ||
369 | void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) | |
370 | { | |
371 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
372 | ||
373 | INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); | |
374 | hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
375 | timer->timer.function = kvm_timer_expire; | |
376 | } | |
377 | ||
378 | static void kvm_timer_init_interrupt(void *info) | |
379 | { | |
380 | enable_percpu_irq(host_vtimer_irq, 0); | |
381 | } | |
382 | ||
383 | int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) | |
384 | { | |
385 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
386 | ||
387 | switch (regid) { | |
388 | case KVM_REG_ARM_TIMER_CTL: | |
389 | timer->cntv_ctl = value; | |
390 | break; | |
391 | case KVM_REG_ARM_TIMER_CNT: | |
392 | vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value; | |
393 | break; | |
394 | case KVM_REG_ARM_TIMER_CVAL: | |
395 | timer->cntv_cval = value; | |
396 | break; | |
397 | default: | |
398 | return -1; | |
399 | } | |
400 | ||
401 | kvm_timer_update_state(vcpu); | |
402 | return 0; | |
403 | } | |
404 | ||
405 | u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) | |
406 | { | |
407 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
408 | ||
409 | switch (regid) { | |
410 | case KVM_REG_ARM_TIMER_CTL: | |
411 | return timer->cntv_ctl; | |
412 | case KVM_REG_ARM_TIMER_CNT: | |
413 | return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; | |
414 | case KVM_REG_ARM_TIMER_CVAL: | |
415 | return timer->cntv_cval; | |
416 | } | |
417 | return (u64)-1; | |
418 | } | |
419 | ||
420 | static int kvm_timer_cpu_notify(struct notifier_block *self, | |
421 | unsigned long action, void *cpu) | |
422 | { | |
423 | switch (action) { | |
424 | case CPU_STARTING: | |
425 | case CPU_STARTING_FROZEN: | |
426 | kvm_timer_init_interrupt(NULL); | |
427 | break; | |
428 | case CPU_DYING: | |
429 | case CPU_DYING_FROZEN: | |
430 | disable_percpu_irq(host_vtimer_irq); | |
431 | break; | |
432 | } | |
433 | ||
434 | return NOTIFY_OK; | |
435 | } | |
436 | ||
437 | static struct notifier_block kvm_timer_cpu_nb = { | |
438 | .notifier_call = kvm_timer_cpu_notify, | |
439 | }; | |
440 | ||
441 | static const struct of_device_id arch_timer_of_match[] = { | |
442 | { .compatible = "arm,armv7-timer", }, | |
443 | { .compatible = "arm,armv8-timer", }, | |
444 | {}, | |
445 | }; | |
446 | ||
447 | int kvm_timer_hyp_init(void) | |
448 | { | |
449 | struct device_node *np; | |
450 | unsigned int ppi; | |
451 | int err; | |
452 | ||
453 | timecounter = arch_timer_get_timecounter(); | |
454 | if (!timecounter) | |
455 | return -ENODEV; | |
456 | ||
457 | np = of_find_matching_node(NULL, arch_timer_of_match); | |
458 | if (!np) { | |
459 | kvm_err("kvm_arch_timer: can't find DT node\n"); | |
460 | return -ENODEV; | |
461 | } | |
462 | ||
463 | ppi = irq_of_parse_and_map(np, 2); | |
464 | if (!ppi) { | |
465 | kvm_err("kvm_arch_timer: no virtual timer interrupt\n"); | |
466 | err = -EINVAL; | |
467 | goto out; | |
468 | } | |
469 | ||
470 | err = request_percpu_irq(ppi, kvm_arch_timer_handler, | |
471 | "kvm guest timer", kvm_get_running_vcpus()); | |
472 | if (err) { | |
473 | kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", | |
474 | ppi, err); | |
475 | goto out; | |
476 | } | |
477 | ||
478 | host_vtimer_irq = ppi; | |
479 | ||
480 | err = __register_cpu_notifier(&kvm_timer_cpu_nb); | |
481 | if (err) { | |
482 | kvm_err("Cannot register timer CPU notifier\n"); | |
483 | goto out_free; | |
484 | } | |
485 | ||
486 | wqueue = create_singlethread_workqueue("kvm_arch_timer"); | |
487 | if (!wqueue) { | |
488 | err = -ENOMEM; | |
489 | goto out_free; | |
490 | } | |
491 | ||
492 | kvm_info("%s IRQ%d\n", np->name, ppi); | |
493 | on_each_cpu(kvm_timer_init_interrupt, NULL, 1); | |
494 | ||
495 | goto out; | |
496 | out_free: | |
497 | free_percpu_irq(ppi, kvm_get_running_vcpus()); | |
498 | out: | |
499 | of_node_put(np); | |
500 | return err; | |
501 | } | |
502 | ||
503 | void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) | |
504 | { | |
505 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | |
506 | ||
507 | timer_disarm(timer); | |
508 | if (timer->map) | |
509 | kvm_vgic_unmap_phys_irq(vcpu, timer->map); | |
510 | } | |
511 | ||
512 | void kvm_timer_enable(struct kvm *kvm) | |
513 | { | |
514 | if (kvm->arch.timer.enabled) | |
515 | return; | |
516 | ||
517 | /* | |
518 | * There is a potential race here between VCPUs starting for the first | |
519 | * time, which may be enabling the timer multiple times. That doesn't | |
520 | * hurt though, because we're just setting a variable to the same | |
521 | * variable that it already was. The important thing is that all | |
522 | * VCPUs have the enabled variable set, before entering the guest, if | |
523 | * the arch timers are enabled. | |
524 | */ | |
525 | if (timecounter && wqueue) | |
526 | kvm->arch.timer.enabled = 1; | |
527 | } | |
528 | ||
529 | void kvm_timer_init(struct kvm *kvm) | |
530 | { | |
531 | kvm->arch.timer.cntvoff = kvm_phys_timer_read(); | |
532 | } |