virt/kvm/arm/vgic/vgic.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) 2015, 2016 ARM Ltd.
   4  */
   5
   6 #include <linux/interrupt.h>
   7 #include <linux/irq.h>
   8 #include <linux/kvm.h>
   9 #include <linux/kvm_host.h>
  10 #include <linux/list_sort.h>
  11 #include <linux/nospec.h>
  12
  13 #include <asm/kvm_hyp.h>
  14
  15 #include "vgic.h"
  16
  17 #define CREATE_TRACE_POINTS
  18 #include "trace.h"
  19
  20 struct vgic_global kvm_vgic_global_state __ro_after_init = {
  21         .gicv3_cpuif = STATIC_KEY_FALSE_INIT,
  22 };
  23
  24 /*
  25  * Locking order is always:
  26  * kvm->lock (mutex)
  27  *   its->cmd_lock (mutex)
  28  *     its->its_lock (mutex)
  29  *       vgic_cpu->ap_list_lock         must be taken with IRQs disabled
  30  *         kvm->lpi_list_lock           must be taken with IRQs disabled
  31  *           vgic_irq->irq_lock         must be taken with IRQs disabled
  32  *
  33  * As the ap_list_lock might be taken from the timer interrupt handler,
  34  * we have to disable IRQs before taking this lock and everything lower
  35  * than it.
  36  *
  37  * If you need to take multiple locks, always take the upper lock first,
  38  * then the lower ones, e.g. first take the its_lock, then the irq_lock.
  39  * If you are already holding a lock and need to take a higher one, you
  40  * have to drop the lower ranking lock first and re-aquire it after having
  41  * taken the upper one.
  42  *
  43  * When taking more than one ap_list_lock at the same time, always take the
  44  * lowest numbered VCPU's ap_list_lock first, so:
  45  *   vcpuX->vcpu_id < vcpuY->vcpu_id:
  46  *     raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
  47  *     raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
  48  *
  49  * Since the VGIC must support injecting virtual interrupts from ISRs, we have
  50  * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer
  51  * spinlocks for any lock that may be taken while injecting an interrupt.
  52  */
  53
  54 /*
  55  * Iterate over the VM's list of mapped LPIs to find the one with a
  56  * matching interrupt ID and return a reference to the IRQ structure.
  57  */
  58 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
  59 {
  60         struct vgic_dist *dist = &kvm->arch.vgic;
  61         struct vgic_irq *irq = NULL;
  62         unsigned long flags;
  63
  64         raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
  65
  66         list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
  67                 if (irq->intid != intid)
  68                         continue;
  69
  70                 /*
  71                  * This increases the refcount, the caller is expected to
  72                  * call vgic_put_irq() later once it's finished with the IRQ.
  73                  */
  74                 vgic_get_irq_kref(irq);
  75                 goto out_unlock;
  76         }
  77         irq = NULL;
  78
  79 out_unlock:
  80         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
  81
  82         return irq;
  83 }
  84
  85 /*
  86  * This looks up the virtual interrupt ID to get the corresponding
  87  * struct vgic_irq. It also increases the refcount, so any caller is expected
  88  * to call vgic_put_irq() once it's finished with this IRQ.
  89  */
  90 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
  91                               u32 intid)
  92 {
  93         /* SGIs and PPIs */
  94         if (intid <= VGIC_MAX_PRIVATE) {
  95                 intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1);
  96                 return &vcpu->arch.vgic_cpu.private_irqs[intid];
  97         }
  98
  99         /* SPIs */
 100         if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
 101                 intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS);
 102                 return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
 103         }
 104
 105         /* LPIs */
 106         if (intid >= VGIC_MIN_LPI)
 107                 return vgic_get_lpi(kvm, intid);
 108
 109         WARN(1, "Looking up struct vgic_irq for reserved INTID");
 110         return NULL;
 111 }
 112
 113 /*
 114  * We can't do anything in here, because we lack the kvm pointer to
 115  * lock and remove the item from the lpi_list. So we keep this function
 116  * empty and use the return value of kref_put() to trigger the freeing.
 117  */
 118 static void vgic_irq_release(struct kref *ref)
 119 {
 120 }
 121
 122 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 123 {
 124         struct vgic_dist *dist = &kvm->arch.vgic;
 125         unsigned long flags;
 126
 127         if (irq->intid < VGIC_MIN_LPI)
 128                 return;
 129
 130         raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
 131         if (!kref_put(&irq->refcount, vgic_irq_release)) {
 132                 raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 133                 return;
 134         };
 135
 136         list_del(&irq->lpi_list);
 137         dist->lpi_list_count--;
 138         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 139
 140         kfree(irq);
 141 }
 142
 143 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
 144 {
 145         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 146         struct vgic_irq *irq, *tmp;
 147         unsigned long flags;
 148
 149         raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
 150
 151         list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
 152                 if (irq->intid >= VGIC_MIN_LPI) {
 153                         raw_spin_lock(&irq->irq_lock);
 154                         list_del(&irq->ap_list);
 155                         irq->vcpu = NULL;
 156                         raw_spin_unlock(&irq->irq_lock);
 157                         vgic_put_irq(vcpu->kvm, irq);
 158                 }
 159         }
 160
 161         raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
 162 }
 163
 164 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
 165 {
 166         WARN_ON(irq_set_irqchip_state(irq->host_irq,
 167                                       IRQCHIP_STATE_PENDING,
 168                                       pending));
 169 }
 170
 171 bool vgic_get_phys_line_level(struct vgic_irq *irq)
 172 {
 173         bool line_level;
 174
 175         BUG_ON(!irq->hw);
 176
 177         if (irq->get_input_level)
 178                 return irq->get_input_level(irq->intid);
 179
 180         WARN_ON(irq_get_irqchip_state(irq->host_irq,
 181                                       IRQCHIP_STATE_PENDING,
 182                                       &line_level));
 183         return line_level;
 184 }
 185
 186 /* Set/Clear the physical active state */
 187 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
 188 {
 189
 190         BUG_ON(!irq->hw);
 191         WARN_ON(irq_set_irqchip_state(irq->host_irq,
 192                                       IRQCHIP_STATE_ACTIVE,
 193                                       active));
 194 }
 195
 196 /**
 197  * kvm_vgic_target_oracle - compute the target vcpu for an irq
 198  *
 199  * @irq:        The irq to route. Must be already locked.
 200  *
 201  * Based on the current state of the interrupt (enabled, pending,
 202  * active, vcpu and target_vcpu), compute the next vcpu this should be
 203  * given to. Return NULL if this shouldn't be injected at all.
 204  *
 205  * Requires the IRQ lock to be held.
 206  */
 207 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
 208 {
 209         lockdep_assert_held(&irq->irq_lock);
 210
 211         /* If the interrupt is active, it must stay on the current vcpu */
 212         if (irq->active)
 213                 return irq->vcpu ? : irq->target_vcpu;
 214
 215         /*
 216          * If the IRQ is not active but enabled and pending, we should direct
 217          * it to its configured target VCPU.
 218          * If the distributor is disabled, pending interrupts shouldn't be
 219          * forwarded.
 220          */
 221         if (irq->enabled && irq_is_pending(irq)) {
 222                 if (unlikely(irq->target_vcpu &&
 223                              !irq->target_vcpu->kvm->arch.vgic.enabled))
 224                         return NULL;
 225
 226                 return irq->target_vcpu;
 227         }
 228
 229         /* If neither active nor pending and enabled, then this IRQ should not
 230          * be queued to any VCPU.
 231          */
 232         return NULL;
 233 }
 234
 235 /*
 236  * The order of items in the ap_lists defines how we'll pack things in LRs as
 237  * well, the first items in the list being the first things populated in the
 238  * LRs.
 239  *
 240  * A hard rule is that active interrupts can never be pushed out of the LRs
 241  * (and therefore take priority) since we cannot reliably trap on deactivation
 242  * of IRQs and therefore they have to be present in the LRs.
 243  *
 244  * Otherwise things should be sorted by the priority field and the GIC
 245  * hardware support will take care of preemption of priority groups etc.
 246  *
 247  * Return negative if "a" sorts before "b", 0 to preserve order, and positive
 248  * to sort "b" before "a".
 249  */
 250 static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
 251 {
 252         struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
 253         struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
 254         bool penda, pendb;
 255         int ret;
 256
 257         /*
 258          * list_sort may call this function with the same element when
 259          * the list is fairly long.
 260          */
 261         if (unlikely(irqa == irqb))
 262                 return 0;
 263
 264         raw_spin_lock(&irqa->irq_lock);
 265         raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
 266
 267         if (irqa->active || irqb->active) {
 268                 ret = (int)irqb->active - (int)irqa->active;
 269                 goto out;
 270         }
 271
 272         penda = irqa->enabled && irq_is_pending(irqa);
 273         pendb = irqb->enabled && irq_is_pending(irqb);
 274
 275         if (!penda || !pendb) {
 276                 ret = (int)pendb - (int)penda;
 277                 goto out;
 278         }
 279
 280         /* Both pending and enabled, sort by priority */
 281         ret = irqa->priority - irqb->priority;
 282 out:
 283         raw_spin_unlock(&irqb->irq_lock);
 284         raw_spin_unlock(&irqa->irq_lock);
 285         return ret;
 286 }
 287
 288 /* Must be called with the ap_list_lock held */
 289 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
 290 {
 291         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 292
 293         lockdep_assert_held(&vgic_cpu->ap_list_lock);
 294
 295         list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
 296 }
 297
 298 /*
 299  * Only valid injection if changing level for level-triggered IRQs or for a
 300  * rising edge, and in-kernel connected IRQ lines can only be controlled by
 301  * their owner.
 302  */
 303 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
 304 {
 305         if (irq->owner != owner)
 306                 return false;
 307
 308         switch (irq->config) {
 309         case VGIC_CONFIG_LEVEL:
 310                 return irq->line_level != level;
 311         case VGIC_CONFIG_EDGE:
 312                 return level;
 313         }
 314
 315         return false;
 316 }
 317
 318 /*
 319  * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
 320  * Do the queuing if necessary, taking the right locks in the right order.
 321  * Returns true when the IRQ was queued, false otherwise.
 322  *
 323  * Needs to be entered with the IRQ lock already held, but will return
 324  * with all locks dropped.
 325  */
 326 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
 327                            unsigned long flags)
 328 {
 329         struct kvm_vcpu *vcpu;
 330
 331         lockdep_assert_held(&irq->irq_lock);
 332
 333 retry:
 334         vcpu = vgic_target_oracle(irq);
 335         if (irq->vcpu || !vcpu) {
 336                 /*
 337                  * If this IRQ is already on a VCPU's ap_list, then it
 338                  * cannot be moved or modified and there is no more work for
 339                  * us to do.
 340                  *
 341                  * Otherwise, if the irq is not pending and enabled, it does
 342                  * not need to be inserted into an ap_list and there is also
 343                  * no more work for us to do.
 344                  */
 345                 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 346
 347                 /*
 348                  * We have to kick the VCPU here, because we could be
 349                  * queueing an edge-triggered interrupt for which we
 350                  * get no EOI maintenance interrupt. In that case,
 351                  * while the IRQ is already on the VCPU's AP list, the
 352                  * VCPU could have EOI'ed the original interrupt and
 353                  * won't see this one until it exits for some other
 354                  * reason.
 355                  */
 356                 if (vcpu) {
 357                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 358                         kvm_vcpu_kick(vcpu);
 359                 }
 360                 return false;
 361         }
 362
 363         /*
 364          * We must unlock the irq lock to take the ap_list_lock where
 365          * we are going to insert this new pending interrupt.
 366          */
 367         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 368
 369         /* someone can do stuff here, which we re-check below */
 370
 371         raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 372         raw_spin_lock(&irq->irq_lock);
 373
 374         /*
 375          * Did something change behind our backs?
 376          *
 377          * There are two cases:
 378          * 1) The irq lost its pending state or was disabled behind our
 379          *    backs and/or it was queued to another VCPU's ap_list.
 380          * 2) Someone changed the affinity on this irq behind our
 381          *    backs and we are now holding the wrong ap_list_lock.
 382          *
 383          * In both cases, drop the locks and retry.
 384          */
 385
 386         if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
 387                 raw_spin_unlock(&irq->irq_lock);
 388                 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock,
 389                                            flags);
 390
 391                 raw_spin_lock_irqsave(&irq->irq_lock, flags);
 392                 goto retry;
 393         }
 394
 395         /*
 396          * Grab a reference to the irq to reflect the fact that it is
 397          * now in the ap_list.
 398          */
 399         vgic_get_irq_kref(irq);
 400         list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
 401         irq->vcpu = vcpu;
 402
 403         raw_spin_unlock(&irq->irq_lock);
 404         raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 405
 406         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 407         kvm_vcpu_kick(vcpu);
 408
 409         return true;
 410 }
 411
 412 /**
 413  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
 414  * @kvm:     The VM structure pointer
 415  * @cpuid:   The CPU for PPIs
 416  * @intid:   The INTID to inject a new state to.
 417  * @level:   Edge-triggered:  true:  to trigger the interrupt
 418  *                            false: to ignore the call
 419  *           Level-sensitive  true:  raise the input signal
 420  *                            false: lower the input signal
 421  * @owner:   The opaque pointer to the owner of the IRQ being raised to verify
 422  *           that the caller is allowed to inject this IRQ.  Userspace
 423  *           injections will have owner == NULL.
 424  *
 425  * The VGIC is not concerned with devices being active-LOW or active-HIGH for
 426  * level-sensitive interrupts.  You can think of the level parameter as 1
 427  * being HIGH and 0 being LOW and all devices being active-HIGH.
 428  */
 429 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 430                         bool level, void *owner)
 431 {
 432         struct kvm_vcpu *vcpu;
 433         struct vgic_irq *irq;
 434         unsigned long flags;
 435         int ret;
 436
 437         trace_vgic_update_irq_pending(cpuid, intid, level);
 438
 439         ret = vgic_lazy_init(kvm);
 440         if (ret)
 441                 return ret;
 442
 443         vcpu = kvm_get_vcpu(kvm, cpuid);
 444         if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
 445                 return -EINVAL;
 446
 447         irq = vgic_get_irq(kvm, vcpu, intid);
 448         if (!irq)
 449                 return -EINVAL;
 450
 451         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 452
 453         if (!vgic_validate_injection(irq, level, owner)) {
 454                 /* Nothing to see here, move along... */
 455                 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 456                 vgic_put_irq(kvm, irq);
 457                 return 0;
 458         }
 459
 460         if (irq->config == VGIC_CONFIG_LEVEL)
 461                 irq->line_level = level;
 462         else
 463                 irq->pending_latch = true;
 464
 465         vgic_queue_irq_unlock(kvm, irq, flags);
 466         vgic_put_irq(kvm, irq);
 467
 468         return 0;
 469 }
 470
 471 /* @irq->irq_lock must be held */
 472 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
 473                             unsigned int host_irq,
 474                             bool (*get_input_level)(int vindid))
 475 {
 476         struct irq_desc *desc;
 477         struct irq_data *data;
 478
 479         /*
 480          * Find the physical IRQ number corresponding to @host_irq
 481          */
 482         desc = irq_to_desc(host_irq);
 483         if (!desc) {
 484                 kvm_err("%s: no interrupt descriptor\n", __func__);
 485                 return -EINVAL;
 486         }
 487         data = irq_desc_get_irq_data(desc);
 488         while (data->parent_data)
 489                 data = data->parent_data;
 490
 491         irq->hw = true;
 492         irq->host_irq = host_irq;
 493         irq->hwintid = data->hwirq;
 494         irq->get_input_level = get_input_level;
 495         return 0;
 496 }
 497
 498 /* @irq->irq_lock must be held */
 499 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
 500 {
 501         irq->hw = false;
 502         irq->hwintid = 0;
 503         irq->get_input_level = NULL;
 504 }
 505
 506 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
 507                           u32 vintid, bool (*get_input_level)(int vindid))
 508 {
 509         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 510         unsigned long flags;
 511         int ret;
 512
 513         BUG_ON(!irq);
 514
 515         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 516         ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
 517         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 518         vgic_put_irq(vcpu->kvm, irq);
 519
 520         return ret;
 521 }
 522
 523 /**
 524  * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
 525  * @vcpu: The VCPU pointer
 526  * @vintid: The INTID of the interrupt
 527  *
 528  * Reset the active and pending states of a mapped interrupt.  Kernel
 529  * subsystems injecting mapped interrupts should reset their interrupt lines
 530  * when we are doing a reset of the VM.
 531  */
 532 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
 533 {
 534         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 535         unsigned long flags;
 536
 537         if (!irq->hw)
 538                 goto out;
 539
 540         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 541         irq->active = false;
 542         irq->pending_latch = false;
 543         irq->line_level = false;
 544         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 545 out:
 546         vgic_put_irq(vcpu->kvm, irq);
 547 }
 548
 549 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
 550 {
 551         struct vgic_irq *irq;
 552         unsigned long flags;
 553
 554         if (!vgic_initialized(vcpu->kvm))
 555                 return -EAGAIN;
 556
 557         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 558         BUG_ON(!irq);
 559
 560         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 561         kvm_vgic_unmap_irq(irq);
 562         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 563         vgic_put_irq(vcpu->kvm, irq);
 564
 565         return 0;
 566 }
 567
 568 /**
 569  * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
 570  *
 571  * @vcpu:   Pointer to the VCPU (used for PPIs)
 572  * @intid:  The virtual INTID identifying the interrupt (PPI or SPI)
 573  * @owner:  Opaque pointer to the owner
 574  *
 575  * Returns 0 if intid is not already used by another in-kernel device and the
 576  * owner is set, otherwise returns an error code.
 577  */
 578 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
 579 {
 580         struct vgic_irq *irq;
 581         unsigned long flags;
 582         int ret = 0;
 583
 584         if (!vgic_initialized(vcpu->kvm))
 585                 return -EAGAIN;
 586
 587         /* SGIs and LPIs cannot be wired up to any device */
 588         if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
 589                 return -EINVAL;
 590
 591         irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 592         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 593         if (irq->owner && irq->owner != owner)
 594                 ret = -EEXIST;
 595         else
 596                 irq->owner = owner;
 597         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 598
 599         return ret;
 600 }
 601
 602 /**
 603  * vgic_prune_ap_list - Remove non-relevant interrupts from the list
 604  *
 605  * @vcpu: The VCPU pointer
 606  *
 607  * Go over the list of "interesting" interrupts, and prune those that we
 608  * won't have to consider in the near future.
 609  */
 610 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
 611 {
 612         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 613         struct vgic_irq *irq, *tmp;
 614
 615         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 616
 617 retry:
 618         raw_spin_lock(&vgic_cpu->ap_list_lock);
 619
 620         list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
 621                 struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
 622                 bool target_vcpu_needs_kick = false;
 623
 624                 raw_spin_lock(&irq->irq_lock);
 625
 626                 BUG_ON(vcpu != irq->vcpu);
 627
 628                 target_vcpu = vgic_target_oracle(irq);
 629
 630                 if (!target_vcpu) {
 631                         /*
 632                          * We don't need to process this interrupt any
 633                          * further, move it off the list.
 634                          */
 635                         list_del(&irq->ap_list);
 636                         irq->vcpu = NULL;
 637                         raw_spin_unlock(&irq->irq_lock);
 638
 639                         /*
 640                          * This vgic_put_irq call matches the
 641                          * vgic_get_irq_kref in vgic_queue_irq_unlock,
 642                          * where we added the LPI to the ap_list. As
 643                          * we remove the irq from the list, we drop
 644                          * also drop the refcount.
 645                          */
 646                         vgic_put_irq(vcpu->kvm, irq);
 647                         continue;
 648                 }
 649
 650                 if (target_vcpu == vcpu) {
 651                         /* We're on the right CPU */
 652                         raw_spin_unlock(&irq->irq_lock);
 653                         continue;
 654                 }
 655
 656                 /* This interrupt looks like it has to be migrated. */
 657
 658                 raw_spin_unlock(&irq->irq_lock);
 659                 raw_spin_unlock(&vgic_cpu->ap_list_lock);
 660
 661                 /*
 662                  * Ensure locking order by always locking the smallest
 663                  * ID first.
 664                  */
 665                 if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
 666                         vcpuA = vcpu;
 667                         vcpuB = target_vcpu;
 668                 } else {
 669                         vcpuA = target_vcpu;
 670                         vcpuB = vcpu;
 671                 }
 672
 673                 raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 674                 raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
 675                                       SINGLE_DEPTH_NESTING);
 676                 raw_spin_lock(&irq->irq_lock);
 677
 678                 /*
 679                  * If the affinity has been preserved, move the
 680                  * interrupt around. Otherwise, it means things have
 681                  * changed while the interrupt was unlocked, and we
 682                  * need to replay this.
 683                  *
 684                  * In all cases, we cannot trust the list not to have
 685                  * changed, so we restart from the beginning.
 686                  */
 687                 if (target_vcpu == vgic_target_oracle(irq)) {
 688                         struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
 689
 690                         list_del(&irq->ap_list);
 691                         irq->vcpu = target_vcpu;
 692                         list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
 693                         target_vcpu_needs_kick = true;
 694                 }
 695
 696                 raw_spin_unlock(&irq->irq_lock);
 697                 raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
 698                 raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 699
 700                 if (target_vcpu_needs_kick) {
 701                         kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
 702                         kvm_vcpu_kick(target_vcpu);
 703                 }
 704
 705                 goto retry;
 706         }
 707
 708         raw_spin_unlock(&vgic_cpu->ap_list_lock);
 709 }
 710
 711 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
 712 {
 713         if (kvm_vgic_global_state.type == VGIC_V2)
 714                 vgic_v2_fold_lr_state(vcpu);
 715         else
 716                 vgic_v3_fold_lr_state(vcpu);
 717 }
 718
 719 /* Requires the irq_lock to be held. */
 720 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
 721                                     struct vgic_irq *irq, int lr)
 722 {
 723         lockdep_assert_held(&irq->irq_lock);
 724
 725         if (kvm_vgic_global_state.type == VGIC_V2)
 726                 vgic_v2_populate_lr(vcpu, irq, lr);
 727         else
 728                 vgic_v3_populate_lr(vcpu, irq, lr);
 729 }
 730
 731 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
 732 {
 733         if (kvm_vgic_global_state.type == VGIC_V2)
 734                 vgic_v2_clear_lr(vcpu, lr);
 735         else
 736                 vgic_v3_clear_lr(vcpu, lr);
 737 }
 738
 739 static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
 740 {
 741         if (kvm_vgic_global_state.type == VGIC_V2)
 742                 vgic_v2_set_underflow(vcpu);
 743         else
 744                 vgic_v3_set_underflow(vcpu);
 745 }
 746
 747 /* Requires the ap_list_lock to be held. */
 748 static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
 749                                  bool *multi_sgi)
 750 {
 751         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 752         struct vgic_irq *irq;
 753         int count = 0;
 754
 755         *multi_sgi = false;
 756
 757         lockdep_assert_held(&vgic_cpu->ap_list_lock);
 758
 759         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 760                 int w;
 761
 762                 raw_spin_lock(&irq->irq_lock);
 763                 /* GICv2 SGIs can count for more than one... */
 764                 w = vgic_irq_get_lr_count(irq);
 765                 raw_spin_unlock(&irq->irq_lock);
 766
 767                 count += w;
 768                 *multi_sgi |= (w > 1);
 769         }
 770         return count;
 771 }
 772
 773 /* Requires the VCPU's ap_list_lock to be held. */
 774 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 775 {
 776         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 777         struct vgic_irq *irq;
 778         int count;
 779         bool multi_sgi;
 780         u8 prio = 0xff;
 781
 782         lockdep_assert_held(&vgic_cpu->ap_list_lock);
 783
 784         count = compute_ap_list_depth(vcpu, &multi_sgi);
 785         if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
 786                 vgic_sort_ap_list(vcpu);
 787
 788         count = 0;
 789
 790         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 791                 raw_spin_lock(&irq->irq_lock);
 792
 793                 /*
 794                  * If we have multi-SGIs in the pipeline, we need to
 795                  * guarantee that they are all seen before any IRQ of
 796                  * lower priority. In that case, we need to filter out
 797                  * these interrupts by exiting early. This is easy as
 798                  * the AP list has been sorted already.
 799                  */
 800                 if (multi_sgi && irq->priority > prio) {
 801                         _raw_spin_unlock(&irq->irq_lock);
 802                         break;
 803                 }
 804
 805                 if (likely(vgic_target_oracle(irq) == vcpu)) {
 806                         vgic_populate_lr(vcpu, irq, count++);
 807
 808                         if (irq->source)
 809                                 prio = irq->priority;
 810                 }
 811
 812                 raw_spin_unlock(&irq->irq_lock);
 813
 814                 if (count == kvm_vgic_global_state.nr_lr) {
 815                         if (!list_is_last(&irq->ap_list,
 816                                           &vgic_cpu->ap_list_head))
 817                                 vgic_set_underflow(vcpu);
 818                         break;
 819                 }
 820         }
 821
 822         vcpu->arch.vgic_cpu.used_lrs = count;
 823
 824         /* Nuke remaining LRs */
 825         for ( ; count < kvm_vgic_global_state.nr_lr; count++)
 826                 vgic_clear_lr(vcpu, count);
 827 }
 828
 829 static inline bool can_access_vgic_from_kernel(void)
 830 {
 831         /*
 832          * GICv2 can always be accessed from the kernel because it is
 833          * memory-mapped, and VHE systems can access GICv3 EL2 system
 834          * registers.
 835          */
 836         return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
 837 }
 838
 839 static inline void vgic_save_state(struct kvm_vcpu *vcpu)
 840 {
 841         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 842                 vgic_v2_save_state(vcpu);
 843         else
 844                 __vgic_v3_save_state(vcpu);
 845 }
 846
 847 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
 848 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 849 {
 850         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 851
 852         WARN_ON(vgic_v4_sync_hwstate(vcpu));
 853
 854         /* An empty ap_list_head implies used_lrs == 0 */
 855         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 856                 return;
 857
 858         if (can_access_vgic_from_kernel())
 859                 vgic_save_state(vcpu);
 860
 861         if (vgic_cpu->used_lrs)
 862                 vgic_fold_lr_state(vcpu);
 863         vgic_prune_ap_list(vcpu);
 864 }
 865
 866 static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
 867 {
 868         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 869                 vgic_v2_restore_state(vcpu);
 870         else
 871                 __vgic_v3_restore_state(vcpu);
 872 }
 873
 874 /* Flush our emulation state into the GIC hardware before entering the guest. */
 875 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 876 {
 877         WARN_ON(vgic_v4_flush_hwstate(vcpu));
 878
 879         /*
 880          * If there are no virtual interrupts active or pending for this
 881          * VCPU, then there is no work to do and we can bail out without
 882          * taking any lock.  There is a potential race with someone injecting
 883          * interrupts to the VCPU, but it is a benign race as the VCPU will
 884          * either observe the new interrupt before or after doing this check,
 885          * and introducing additional synchronization mechanism doesn't change
 886          * this.
 887          *
 888          * Note that we still need to go through the whole thing if anything
 889          * can be directly injected (GICv4).
 890          */
 891         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
 892             !vgic_supports_direct_msis(vcpu->kvm))
 893                 return;
 894
 895         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 896
 897         if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
 898                 raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
 899                 vgic_flush_lr_state(vcpu);
 900                 raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
 901         }
 902
 903         if (can_access_vgic_from_kernel())
 904                 vgic_restore_state(vcpu);
 905 }
 906
 907 void kvm_vgic_load(struct kvm_vcpu *vcpu)
 908 {
 909         if (unlikely(!vgic_initialized(vcpu->kvm)))
 910                 return;
 911
 912         if (kvm_vgic_global_state.type == VGIC_V2)
 913                 vgic_v2_load(vcpu);
 914         else
 915                 vgic_v3_load(vcpu);
 916 }
 917
 918 void kvm_vgic_put(struct kvm_vcpu *vcpu)
 919 {
 920         if (unlikely(!vgic_initialized(vcpu->kvm)))
 921                 return;
 922
 923         if (kvm_vgic_global_state.type == VGIC_V2)
 924                 vgic_v2_put(vcpu);
 925         else
 926                 vgic_v3_put(vcpu);
 927 }
 928
 929 void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
 930 {
 931         if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
 932                 return;
 933
 934         if (kvm_vgic_global_state.type == VGIC_V2)
 935                 vgic_v2_vmcr_sync(vcpu);
 936         else
 937                 vgic_v3_vmcr_sync(vcpu);
 938 }
 939
 940 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 941 {
 942         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 943         struct vgic_irq *irq;
 944         bool pending = false;
 945         unsigned long flags;
 946         struct vgic_vmcr vmcr;
 947
 948         if (!vcpu->kvm->arch.vgic.enabled)
 949                 return false;
 950
 951         if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
 952                 return true;
 953
 954         vgic_get_vmcr(vcpu, &vmcr);
 955
 956         raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
 957
 958         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 959                 raw_spin_lock(&irq->irq_lock);
 960                 pending = irq_is_pending(irq) && irq->enabled &&
 961                           !irq->active &&
 962                           irq->priority < vmcr.pmr;
 963                 raw_spin_unlock(&irq->irq_lock);
 964
 965                 if (pending)
 966                         break;
 967         }
 968
 969         raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
 970
 971         return pending;
 972 }
 973
 974 void vgic_kick_vcpus(struct kvm *kvm)
 975 {
 976         struct kvm_vcpu *vcpu;
 977         int c;
 978
 979         /*
 980          * We've injected an interrupt, time to find out who deserves
 981          * a good kick...
 982          */
 983         kvm_for_each_vcpu(c, vcpu, kvm) {
 984                 if (kvm_vgic_vcpu_pending_irq(vcpu)) {
 985                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 986                         kvm_vcpu_kick(vcpu);
 987                 }
 988         }
 989 }
 990
 991 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
 992 {
 993         struct vgic_irq *irq;
 994         bool map_is_active;
 995         unsigned long flags;
 996
 997         if (!vgic_initialized(vcpu->kvm))
 998                 return false;
 999
1000         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
1001         raw_spin_lock_irqsave(&irq->irq_lock, flags);
1002         map_is_active = irq->hw && irq->active;
1003         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
1004         vgic_put_irq(vcpu->kvm, irq);
1005
1006         return map_is_active;
1007 }