virt/kvm/arm/arch_timer.c

   1 /*
   2  * Copyright (C) 2012 ARM Ltd.
   3  * Author: Marc Zyngier <marc.zyngier@arm.com>
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License version 2 as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17  */
  18
  19 #include <linux/cpu.h>
  20 #include <linux/kvm.h>
  21 #include <linux/kvm_host.h>
  22 #include <linux/interrupt.h>
  23 #include <linux/irq.h>
  24 #include <linux/uaccess.h>
  25
  26 #include <clocksource/arm_arch_timer.h>
  27 #include <asm/arch_timer.h>
  28 #include <asm/kvm_hyp.h>
  29
  30 #include <kvm/arm_vgic.h>
  31 #include <kvm/arm_arch_timer.h>
  32
  33 #include "trace.h"
  34
  35 static struct timecounter *timecounter;
  36 static unsigned int host_vtimer_irq;
  37 static u32 host_vtimer_irq_flags;
  38
  39 static const struct kvm_irq_level default_ptimer_irq = {
  40         .irq    = 30,
  41         .level  = 1,
  42 };
  43
  44 static const struct kvm_irq_level default_vtimer_irq = {
  45         .irq    = 27,
  46         .level  = 1,
  47 };
  48
  49 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
  50 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
  51                                  struct arch_timer_context *timer_ctx);
  52 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
  53
  54 u64 kvm_phys_timer_read(void)
  55 {
  56         return timecounter->cc->read(timecounter->cc);
  57 }
  58
  59 static void soft_timer_start(struct hrtimer *hrt, u64 ns)
  60 {
  61         hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
  62                       HRTIMER_MODE_ABS);
  63 }
  64
  65 static void soft_timer_cancel(struct hrtimer *hrt, struct work_struct *work)
  66 {
  67         hrtimer_cancel(hrt);
  68         if (work)
  69                 cancel_work_sync(work);
  70 }
  71
  72 static void kvm_vtimer_update_mask_user(struct kvm_vcpu *vcpu)
  73 {
  74         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
  75
  76         /*
  77          * When using a userspace irqchip with the architected timers, we must
  78          * prevent continuously exiting from the guest, and therefore mask the
  79          * physical interrupt by disabling it on the host interrupt controller
  80          * when the virtual level is high, such that the guest can make
  81          * forward progress.  Once we detect the output level being
  82          * de-asserted, we unmask the interrupt again so that we exit from the
  83          * guest when the timer fires.
  84          */
  85         if (vtimer->irq.level)
  86                 disable_percpu_irq(host_vtimer_irq);
  87         else
  88                 enable_percpu_irq(host_vtimer_irq, 0);
  89 }
  90
  91 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
  92 {
  93         struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
  94         struct arch_timer_context *vtimer;
  95
  96         if (!vcpu) {
  97                 pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
  98                 return IRQ_NONE;
  99         }
 100         vtimer = vcpu_vtimer(vcpu);
 101
 102         if (!vtimer->irq.level) {
 103                 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
 104                 if (kvm_timer_irq_can_fire(vtimer))
 105                         kvm_timer_update_irq(vcpu, true, vtimer);
 106         }
 107
 108         if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
 109                 kvm_vtimer_update_mask_user(vcpu);
 110
 111         return IRQ_HANDLED;
 112 }
 113
 114 /*
 115  * Work function for handling the backup timer that we schedule when a vcpu is
 116  * no longer running, but had a timer programmed to fire in the future.
 117  */
 118 static void kvm_timer_inject_irq_work(struct work_struct *work)
 119 {
 120         struct kvm_vcpu *vcpu;
 121
 122         vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
 123
 124         /*
 125          * If the vcpu is blocked we want to wake it up so that it will see
 126          * the timer has expired when entering the guest.
 127          */
 128         kvm_vcpu_wake_up(vcpu);
 129 }
 130
 131 static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
 132 {
 133         u64 cval, now;
 134
 135         cval = timer_ctx->cnt_cval;
 136         now = kvm_phys_timer_read() - timer_ctx->cntvoff;
 137
 138         if (now < cval) {
 139                 u64 ns;
 140
 141                 ns = cyclecounter_cyc2ns(timecounter->cc,
 142                                          cval - now,
 143                                          timecounter->mask,
 144                                          &timecounter->frac);
 145                 return ns;
 146         }
 147
 148         return 0;
 149 }
 150
 151 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
 152 {
 153         return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
 154                 (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE);
 155 }
 156
 157 /*
 158  * Returns the earliest expiration time in ns among guest timers.
 159  * Note that it will return 0 if none of timers can fire.
 160  */
 161 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
 162 {
 163         u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX;
 164         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 165         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 166
 167         if (kvm_timer_irq_can_fire(vtimer))
 168                 min_virt = kvm_timer_compute_delta(vtimer);
 169
 170         if (kvm_timer_irq_can_fire(ptimer))
 171                 min_phys = kvm_timer_compute_delta(ptimer);
 172
 173         /* If none of timers can fire, then return 0 */
 174         if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX))
 175                 return 0;
 176
 177         return min(min_virt, min_phys);
 178 }
 179
 180 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
 181 {
 182         struct arch_timer_cpu *timer;
 183         struct kvm_vcpu *vcpu;
 184         u64 ns;
 185
 186         timer = container_of(hrt, struct arch_timer_cpu, bg_timer);
 187         vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
 188
 189         /*
 190          * Check that the timer has really expired from the guest's
 191          * PoV (NTP on the host may have forced it to expire
 192          * early). If we should have slept longer, restart it.
 193          */
 194         ns = kvm_timer_earliest_exp(vcpu);
 195         if (unlikely(ns)) {
 196                 hrtimer_forward_now(hrt, ns_to_ktime(ns));
 197                 return HRTIMER_RESTART;
 198         }
 199
 200         schedule_work(&timer->expired);
 201         return HRTIMER_NORESTART;
 202 }
 203
 204 static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt)
 205 {
 206         struct arch_timer_context *ptimer;
 207         struct arch_timer_cpu *timer;
 208         struct kvm_vcpu *vcpu;
 209         u64 ns;
 210
 211         timer = container_of(hrt, struct arch_timer_cpu, phys_timer);
 212         vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
 213         ptimer = vcpu_ptimer(vcpu);
 214
 215         /*
 216          * Check that the timer has really expired from the guest's
 217          * PoV (NTP on the host may have forced it to expire
 218          * early). If not ready, schedule for a later time.
 219          */
 220         ns = kvm_timer_compute_delta(ptimer);
 221         if (unlikely(ns)) {
 222                 hrtimer_forward_now(hrt, ns_to_ktime(ns));
 223                 return HRTIMER_RESTART;
 224         }
 225
 226         kvm_timer_update_irq(vcpu, true, ptimer);
 227         return HRTIMER_NORESTART;
 228 }
 229
 230 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
 231 {
 232         u64 cval, now;
 233
 234         if (!kvm_timer_irq_can_fire(timer_ctx))
 235                 return false;
 236
 237         cval = timer_ctx->cnt_cval;
 238         now = kvm_phys_timer_read() - timer_ctx->cntvoff;
 239
 240         return cval <= now;
 241 }
 242
 243 bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
 244 {
 245         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 246         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 247
 248         if (vtimer->irq.level || ptimer->irq.level)
 249                 return true;
 250
 251         /*
 252          * When this is called from withing the wait loop of kvm_vcpu_block(),
 253          * the software view of the timer state is up to date (timer->loaded
 254          * is false), and so we can simply check if the timer should fire now.
 255          */
 256         if (!vtimer->loaded && kvm_timer_should_fire(vtimer))
 257                 return true;
 258
 259         return kvm_timer_should_fire(ptimer);
 260 }
 261
 262 /*
 263  * Reflect the timer output level into the kvm_run structure
 264  */
 265 void kvm_timer_update_run(struct kvm_vcpu *vcpu)
 266 {
 267         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 268         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 269         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
 270
 271         /* Populate the device bitmap with the timer states */
 272         regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
 273                                     KVM_ARM_DEV_EL1_PTIMER);
 274         if (vtimer->irq.level)
 275                 regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
 276         if (ptimer->irq.level)
 277                 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
 278 }
 279
 280 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
 281                                  struct arch_timer_context *timer_ctx)
 282 {
 283         int ret;
 284
 285         timer_ctx->irq.level = new_level;
 286         trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
 287                                    timer_ctx->irq.level);
 288
 289         if (likely(irqchip_in_kernel(vcpu->kvm))) {
 290                 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
 291                                           timer_ctx->irq.irq,
 292                                           timer_ctx->irq.level,
 293                                           timer_ctx);
 294                 WARN_ON(ret);
 295         }
 296 }
 297
 298 /* Schedule the background timer for the emulated timer. */
 299 static void phys_timer_emulate(struct kvm_vcpu *vcpu)
 300 {
 301         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 302         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 303
 304         /*
 305          * If the timer can fire now we have just raised the IRQ line and we
 306          * don't need to have a soft timer scheduled for the future.  If the
 307          * timer cannot fire at all, then we also don't need a soft timer.
 308          */
 309         if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
 310                 soft_timer_cancel(&timer->phys_timer, NULL);
 311                 return;
 312         }
 313
 314         soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer));
 315 }
 316
 317 /*
 318  * Check if there was a change in the timer state, so that we should either
 319  * raise or lower the line level to the GIC or schedule a background timer to
 320  * emulate the physical timer.
 321  */
 322 static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
 323 {
 324         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 325         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 326         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 327
 328         if (unlikely(!timer->enabled))
 329                 return;
 330
 331         if (kvm_timer_should_fire(vtimer) != vtimer->irq.level)
 332                 kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer);
 333
 334         if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
 335                 kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
 336
 337         phys_timer_emulate(vcpu);
 338 }
 339
 340 static void vtimer_save_state(struct kvm_vcpu *vcpu)
 341 {
 342         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 343         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 344         unsigned long flags;
 345
 346         local_irq_save(flags);
 347
 348         if (!vtimer->loaded)
 349                 goto out;
 350
 351         if (timer->enabled) {
 352                 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
 353                 vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
 354         }
 355
 356         /* Disable the virtual timer */
 357         write_sysreg_el0(0, cntv_ctl);
 358
 359         vtimer->loaded = false;
 360 out:
 361         local_irq_restore(flags);
 362 }
 363
 364 /*
 365  * Schedule the background timer before calling kvm_vcpu_block, so that this
 366  * thread is removed from its waitqueue and made runnable when there's a timer
 367  * interrupt to handle.
 368  */
 369 void kvm_timer_schedule(struct kvm_vcpu *vcpu)
 370 {
 371         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 372         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 373         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 374
 375         vtimer_save_state(vcpu);
 376
 377         /*
 378          * No need to schedule a background timer if any guest timer has
 379          * already expired, because kvm_vcpu_block will return before putting
 380          * the thread to sleep.
 381          */
 382         if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer))
 383                 return;
 384
 385         /*
 386          * If both timers are not capable of raising interrupts (disabled or
 387          * masked), then there's no more work for us to do.
 388          */
 389         if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer))
 390                 return;
 391
 392         /*
 393          * The guest timers have not yet expired, schedule a background timer.
 394          * Set the earliest expiration time among the guest timers.
 395          */
 396         soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
 397 }
 398
 399 static void vtimer_restore_state(struct kvm_vcpu *vcpu)
 400 {
 401         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 402         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 403         unsigned long flags;
 404
 405         local_irq_save(flags);
 406
 407         if (vtimer->loaded)
 408                 goto out;
 409
 410         if (timer->enabled) {
 411                 write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
 412                 isb();
 413                 write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
 414         }
 415
 416         vtimer->loaded = true;
 417 out:
 418         local_irq_restore(flags);
 419 }
 420
 421 void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
 422 {
 423         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 424
 425         vtimer_restore_state(vcpu);
 426
 427         soft_timer_cancel(&timer->bg_timer, &timer->expired);
 428 }
 429
 430 static void set_cntvoff(u64 cntvoff)
 431 {
 432         u32 low = lower_32_bits(cntvoff);
 433         u32 high = upper_32_bits(cntvoff);
 434
 435         /*
 436          * Since kvm_call_hyp doesn't fully support the ARM PCS especially on
 437          * 32-bit systems, but rather passes register by register shifted one
 438          * place (we put the function address in r0/x0), we cannot simply pass
 439          * a 64-bit value as an argument, but have to split the value in two
 440          * 32-bit halves.
 441          */
 442         kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
 443 }
 444
 445 static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
 446 {
 447         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 448         bool phys_active;
 449         int ret;
 450
 451         phys_active = vtimer->irq.level ||
 452                       kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
 453
 454         ret = irq_set_irqchip_state(host_vtimer_irq,
 455                                     IRQCHIP_STATE_ACTIVE,
 456                                     phys_active);
 457         WARN_ON(ret);
 458 }
 459
 460 static void kvm_timer_vcpu_load_user(struct kvm_vcpu *vcpu)
 461 {
 462         kvm_vtimer_update_mask_user(vcpu);
 463 }
 464
 465 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 466 {
 467         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 468         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 469
 470         if (unlikely(!timer->enabled))
 471                 return;
 472
 473         if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
 474                 kvm_timer_vcpu_load_user(vcpu);
 475         else
 476                 kvm_timer_vcpu_load_vgic(vcpu);
 477
 478         set_cntvoff(vtimer->cntvoff);
 479
 480         vtimer_restore_state(vcpu);
 481
 482         if (has_vhe())
 483                 disable_el1_phys_timer_access();
 484
 485         /* Set the background timer for the physical timer emulation. */
 486         phys_timer_emulate(vcpu);
 487 }
 488
 489 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
 490 {
 491         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 492         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 493         struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
 494         bool vlevel, plevel;
 495
 496         if (likely(irqchip_in_kernel(vcpu->kvm)))
 497                 return false;
 498
 499         vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
 500         plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
 501
 502         return vtimer->irq.level != vlevel ||
 503                ptimer->irq.level != plevel;
 504 }
 505
 506 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 507 {
 508         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 509
 510         if (unlikely(!timer->enabled))
 511                 return;
 512
 513         if (has_vhe())
 514                 enable_el1_phys_timer_access();
 515
 516         vtimer_save_state(vcpu);
 517
 518         /*
 519          * Cancel the physical timer emulation, because the only case where we
 520          * need it after a vcpu_put is in the context of a sleeping VCPU, and
 521          * in that case we already factor in the deadline for the physical
 522          * timer when scheduling the bg_timer.
 523          *
 524          * In any case, we re-schedule the hrtimer for the physical timer when
 525          * coming back to the VCPU thread in kvm_timer_vcpu_load().
 526          */
 527         soft_timer_cancel(&timer->phys_timer, NULL);
 528
 529         /*
 530          * The kernel may decide to run userspace after calling vcpu_put, so
 531          * we reset cntvoff to 0 to ensure a consistent read between user
 532          * accesses to the virtual counter and kernel access to the physical
 533          * counter.
 534          */
 535         set_cntvoff(0);
 536 }
 537
 538 static void unmask_vtimer_irq(struct kvm_vcpu *vcpu)
 539 {
 540         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 541
 542         if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
 543                 kvm_vtimer_update_mask_user(vcpu);
 544                 return;
 545         }
 546
 547         /*
 548          * If the guest disabled the timer without acking the interrupt, then
 549          * we must make sure the physical and virtual active states are in
 550          * sync by deactivating the physical interrupt, because otherwise we
 551          * wouldn't see the next timer interrupt in the host.
 552          */
 553         if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) {
 554                 int ret;
 555                 ret = irq_set_irqchip_state(host_vtimer_irq,
 556                                             IRQCHIP_STATE_ACTIVE,
 557                                             false);
 558                 WARN_ON(ret);
 559         }
 560 }
 561
 562 /**
 563  * kvm_timer_sync_hwstate - sync timer state from cpu
 564  * @vcpu: The vcpu pointer
 565  *
 566  * Check if any of the timers have expired while we were running in the guest,
 567  * and inject an interrupt if that was the case.
 568  */
 569 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 570 {
 571         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 572
 573         /*
 574          * If we entered the guest with the vtimer output asserted we have to
 575          * check if the guest has modified the timer so that we should lower
 576          * the line at this point.
 577          */
 578         if (vtimer->irq.level) {
 579                 vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
 580                 vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
 581                 if (!kvm_timer_should_fire(vtimer)) {
 582                         kvm_timer_update_irq(vcpu, false, vtimer);
 583                         unmask_vtimer_irq(vcpu);
 584                 }
 585         }
 586 }
 587
 588 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 589 {
 590         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 591         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 592
 593         /*
 594          * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
 595          * and to 0 for ARMv7.  We provide an implementation that always
 596          * resets the timer to be disabled and unmasked and is compliant with
 597          * the ARMv7 architecture.
 598          */
 599         vtimer->cnt_ctl = 0;
 600         ptimer->cnt_ctl = 0;
 601         kvm_timer_update_state(vcpu);
 602
 603         return 0;
 604 }
 605
 606 /* Make the updates of cntvoff for all vtimer contexts atomic */
 607 static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
 608 {
 609         int i;
 610         struct kvm *kvm = vcpu->kvm;
 611         struct kvm_vcpu *tmp;
 612
 613         mutex_lock(&kvm->lock);
 614         kvm_for_each_vcpu(i, tmp, kvm)
 615                 vcpu_vtimer(tmp)->cntvoff = cntvoff;
 616
 617         /*
 618          * When called from the vcpu create path, the CPU being created is not
 619          * included in the loop above, so we just set it here as well.
 620          */
 621         vcpu_vtimer(vcpu)->cntvoff = cntvoff;
 622         mutex_unlock(&kvm->lock);
 623 }
 624
 625 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 626 {
 627         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 628         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 629         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 630
 631         /* Synchronize cntvoff across all vtimers of a VM. */
 632         update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
 633         vcpu_ptimer(vcpu)->cntvoff = 0;
 634
 635         INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
 636         hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 637         timer->bg_timer.function = kvm_bg_timer_expire;
 638
 639         hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 640         timer->phys_timer.function = kvm_phys_timer_expire;
 641
 642         vtimer->irq.irq = default_vtimer_irq.irq;
 643         ptimer->irq.irq = default_ptimer_irq.irq;
 644 }
 645
 646 static void kvm_timer_init_interrupt(void *info)
 647 {
 648         enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
 649 }
 650
 651 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
 652 {
 653         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 654         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 655
 656         switch (regid) {
 657         case KVM_REG_ARM_TIMER_CTL:
 658                 vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT;
 659                 break;
 660         case KVM_REG_ARM_TIMER_CNT:
 661                 update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
 662                 break;
 663         case KVM_REG_ARM_TIMER_CVAL:
 664                 vtimer->cnt_cval = value;
 665                 break;
 666         case KVM_REG_ARM_PTIMER_CTL:
 667                 ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT;
 668                 break;
 669         case KVM_REG_ARM_PTIMER_CVAL:
 670                 ptimer->cnt_cval = value;
 671                 break;
 672
 673         default:
 674                 return -1;
 675         }
 676
 677         kvm_timer_update_state(vcpu);
 678         return 0;
 679 }
 680
 681 static u64 read_timer_ctl(struct arch_timer_context *timer)
 682 {
 683         /*
 684          * Set ISTATUS bit if it's expired.
 685          * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
 686          * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
 687          * regardless of ENABLE bit for our implementation convenience.
 688          */
 689         if (!kvm_timer_compute_delta(timer))
 690                 return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT;
 691         else
 692                 return timer->cnt_ctl;
 693 }
 694
 695 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
 696 {
 697         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 698         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 699
 700         switch (regid) {
 701         case KVM_REG_ARM_TIMER_CTL:
 702                 return read_timer_ctl(vtimer);
 703         case KVM_REG_ARM_TIMER_CNT:
 704                 return kvm_phys_timer_read() - vtimer->cntvoff;
 705         case KVM_REG_ARM_TIMER_CVAL:
 706                 return vtimer->cnt_cval;
 707         case KVM_REG_ARM_PTIMER_CTL:
 708                 return read_timer_ctl(ptimer);
 709         case KVM_REG_ARM_PTIMER_CVAL:
 710                 return ptimer->cnt_cval;
 711         case KVM_REG_ARM_PTIMER_CNT:
 712                 return kvm_phys_timer_read();
 713         }
 714         return (u64)-1;
 715 }
 716
 717 static int kvm_timer_starting_cpu(unsigned int cpu)
 718 {
 719         kvm_timer_init_interrupt(NULL);
 720         return 0;
 721 }
 722
 723 static int kvm_timer_dying_cpu(unsigned int cpu)
 724 {
 725         disable_percpu_irq(host_vtimer_irq);
 726         return 0;
 727 }
 728
 729 int kvm_timer_hyp_init(void)
 730 {
 731         struct arch_timer_kvm_info *info;
 732         int err;
 733
 734         info = arch_timer_get_kvm_info();
 735         timecounter = &info->timecounter;
 736
 737         if (!timecounter->cc) {
 738                 kvm_err("kvm_arch_timer: uninitialized timecounter\n");
 739                 return -ENODEV;
 740         }
 741
 742         if (info->virtual_irq <= 0) {
 743                 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
 744                         info->virtual_irq);
 745                 return -ENODEV;
 746         }
 747         host_vtimer_irq = info->virtual_irq;
 748
 749         host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
 750         if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
 751             host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
 752                 kvm_err("Invalid trigger for IRQ%d, assuming level low\n",
 753                         host_vtimer_irq);
 754                 host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
 755         }
 756
 757         err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
 758                                  "kvm guest timer", kvm_get_running_vcpus());
 759         if (err) {
 760                 kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
 761                         host_vtimer_irq, err);
 762                 return err;
 763         }
 764
 765         err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus());
 766         if (err) {
 767                 kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
 768                 goto out_free_irq;
 769         }
 770
 771         kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
 772
 773         cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
 774                           "kvm/arm/timer:starting", kvm_timer_starting_cpu,
 775                           kvm_timer_dying_cpu);
 776         return 0;
 777 out_free_irq:
 778         free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
 779         return err;
 780 }
 781
 782 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
 783 {
 784         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 785         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 786
 787         soft_timer_cancel(&timer->bg_timer, &timer->expired);
 788         soft_timer_cancel(&timer->phys_timer, NULL);
 789         kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq);
 790 }
 791
 792 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
 793 {
 794         int vtimer_irq, ptimer_irq;
 795         int i, ret;
 796
 797         vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
 798         ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
 799         if (ret)
 800                 return false;
 801
 802         ptimer_irq = vcpu_ptimer(vcpu)->irq.irq;
 803         ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu));
 804         if (ret)
 805                 return false;
 806
 807         kvm_for_each_vcpu(i, vcpu, vcpu->kvm) {
 808                 if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq ||
 809                     vcpu_ptimer(vcpu)->irq.irq != ptimer_irq)
 810                         return false;
 811         }
 812
 813         return true;
 814 }
 815
 816 int kvm_timer_enable(struct kvm_vcpu *vcpu)
 817 {
 818         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 819         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 820         struct irq_desc *desc;
 821         struct irq_data *data;
 822         int phys_irq;
 823         int ret;
 824
 825         if (timer->enabled)
 826                 return 0;
 827
 828         /* Without a VGIC we do not map virtual IRQs to physical IRQs */
 829         if (!irqchip_in_kernel(vcpu->kvm))
 830                 goto no_vgic;
 831
 832         if (!vgic_initialized(vcpu->kvm))
 833                 return -ENODEV;
 834
 835         if (!timer_irqs_are_valid(vcpu)) {
 836                 kvm_debug("incorrectly configured timer irqs\n");
 837                 return -EINVAL;
 838         }
 839
 840         /*
 841          * Find the physical IRQ number corresponding to the host_vtimer_irq
 842          */
 843         desc = irq_to_desc(host_vtimer_irq);
 844         if (!desc) {
 845                 kvm_err("%s: no interrupt descriptor\n", __func__);
 846                 return -EINVAL;
 847         }
 848
 849         data = irq_desc_get_irq_data(desc);
 850         while (data->parent_data)
 851                 data = data->parent_data;
 852
 853         phys_irq = data->hwirq;
 854
 855         /*
 856          * Tell the VGIC that the virtual interrupt is tied to a
 857          * physical interrupt. We do that once per VCPU.
 858          */
 859         ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq);
 860         if (ret)
 861                 return ret;
 862
 863 no_vgic:
 864         preempt_disable();
 865         timer->enabled = 1;
 866         kvm_timer_vcpu_load_vgic(vcpu);
 867         preempt_enable();
 868
 869         return 0;
 870 }
 871
 872 /*
 873  * On VHE system, we only need to configure trap on physical timer and counter
 874  * accesses in EL0 and EL1 once, not for every world switch.
 875  * The host kernel runs at EL2 with HCR_EL2.TGE == 1,
 876  * and this makes those bits have no effect for the host kernel execution.
 877  */
 878 void kvm_timer_init_vhe(void)
 879 {
 880         /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */
 881         u32 cnthctl_shift = 10;
 882         u64 val;
 883
 884         /*
 885          * Disallow physical timer access for the guest.
 886          * Physical counter access is allowed.
 887          */
 888         val = read_sysreg(cnthctl_el2);
 889         val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift);
 890         val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
 891         write_sysreg(val, cnthctl_el2);
 892 }
 893
 894 static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
 895 {
 896         struct kvm_vcpu *vcpu;
 897         int i;
 898
 899         kvm_for_each_vcpu(i, vcpu, kvm) {
 900                 vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
 901                 vcpu_ptimer(vcpu)->irq.irq = ptimer_irq;
 902         }
 903 }
 904
 905 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 906 {
 907         int __user *uaddr = (int __user *)(long)attr->addr;
 908         struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 909         struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 910         int irq;
 911
 912         if (!irqchip_in_kernel(vcpu->kvm))
 913                 return -EINVAL;
 914
 915         if (get_user(irq, uaddr))
 916                 return -EFAULT;
 917
 918         if (!(irq_is_ppi(irq)))
 919                 return -EINVAL;
 920
 921         if (vcpu->arch.timer_cpu.enabled)
 922                 return -EBUSY;
 923
 924         switch (attr->attr) {
 925         case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
 926                 set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq);
 927                 break;
 928         case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
 929                 set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq);
 930                 break;
 931         default:
 932                 return -ENXIO;
 933         }
 934
 935         return 0;
 936 }
 937
 938 int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 939 {
 940         int __user *uaddr = (int __user *)(long)attr->addr;
 941         struct arch_timer_context *timer;
 942         int irq;
 943
 944         switch (attr->attr) {
 945         case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
 946                 timer = vcpu_vtimer(vcpu);
 947                 break;
 948         case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
 949                 timer = vcpu_ptimer(vcpu);
 950                 break;
 951         default:
 952                 return -ENXIO;
 953         }
 954
 955         irq = timer->irq.irq;
 956         return put_user(irq, uaddr);
 957 }
 958
 959 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 960 {
 961         switch (attr->attr) {
 962         case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
 963         case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
 964                 return 0;
 965         }
 966
 967         return -ENXIO;
 968 }