target/i386/kvm/xen-emu.c

   1 /*
   2  * Xen HVM emulation support in KVM
   3  *
   4  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
   5  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
   6  *
   7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
   8  * See the COPYING file in the top-level directory.
   9  *
  10  */
  11
  12 #include "qemu/osdep.h"
  13 #include "qemu/log.h"
  14 #include "qemu/main-loop.h"
  15 #include "qemu/error-report.h"
  16 #include "hw/xen/xen.h"
  17 #include "sysemu/kvm_int.h"
  18 #include "sysemu/kvm_xen.h"
  19 #include "kvm/kvm_i386.h"
  20 #include "exec/address-spaces.h"
  21 #include "xen-emu.h"
  22 #include "trace.h"
  23 #include "sysemu/runstate.h"
  24
  25 #include "hw/pci/msi.h"
  26 #include "hw/i386/apic-msidef.h"
  27 #include "hw/i386/e820_memory_layout.h"
  28 #include "hw/i386/kvm/xen_overlay.h"
  29 #include "hw/i386/kvm/xen_evtchn.h"
  30 #include "hw/i386/kvm/xen_gnttab.h"
  31 #include "hw/i386/kvm/xen_primary_console.h"
  32 #include "hw/i386/kvm/xen_xenstore.h"
  33
  34 #include "hw/xen/interface/version.h"
  35 #include "hw/xen/interface/sched.h"
  36 #include "hw/xen/interface/memory.h"
  37 #include "hw/xen/interface/hvm/hvm_op.h"
  38 #include "hw/xen/interface/hvm/params.h"
  39 #include "hw/xen/interface/vcpu.h"
  40 #include "hw/xen/interface/event_channel.h"
  41 #include "hw/xen/interface/grant_table.h"
  42
  43 #include "xen-compat.h"
  44
  45 static void xen_vcpu_singleshot_timer_event(void *opaque);
  46 static void xen_vcpu_periodic_timer_event(void *opaque);
  47 static int vcpuop_stop_singleshot_timer(CPUState *cs);
  48
  49 #ifdef TARGET_X86_64
  50 #define hypercall_compat32(longmode) (!(longmode))
  51 #else
  52 #define hypercall_compat32(longmode) (false)
  53 #endif
  54
  55 static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
  56                            size_t *len, bool is_write)
  57 {
  58         struct kvm_translation tr = {
  59             .linear_address = gva,
  60         };
  61
  62         if (len) {
  63             *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
  64         }
  65
  66         if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
  67             (is_write && !tr.writeable)) {
  68             return false;
  69         }
  70         *gpa = tr.physical_address;
  71         return true;
  72 }
  73
  74 static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
  75                       bool is_write)
  76 {
  77     uint8_t *buf = (uint8_t *)_buf;
  78     uint64_t gpa;
  79     size_t len;
  80
  81     while (sz) {
  82         if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
  83             return -EFAULT;
  84         }
  85         if (len > sz) {
  86             len = sz;
  87         }
  88
  89         cpu_physical_memory_rw(gpa, buf, len, is_write);
  90
  91         buf += len;
  92         sz -= len;
  93         gva += len;
  94     }
  95
  96     return 0;
  97 }
  98
  99 static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
 100                                     size_t sz)
 101 {
 102     return kvm_gva_rw(cs, gva, buf, sz, false);
 103 }
 104
 105 static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
 106                                   size_t sz)
 107 {
 108     return kvm_gva_rw(cs, gva, buf, sz, true);
 109 }
 110
 111 int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
 112 {
 113     const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
 114         KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
 115     struct kvm_xen_hvm_config cfg = {
 116         .msr = hypercall_msr,
 117         .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
 118     };
 119     int xen_caps, ret;
 120
 121     xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
 122     if (required_caps & ~xen_caps) {
 123         error_report("kvm: Xen HVM guest support not present or insufficient");
 124         return -ENOSYS;
 125     }
 126
 127     if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
 128         struct kvm_xen_hvm_attr ha = {
 129             .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
 130             .u.xen_version = s->xen_version,
 131         };
 132         (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
 133
 134         cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
 135     }
 136
 137     ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
 138     if (ret < 0) {
 139         error_report("kvm: Failed to enable Xen HVM support: %s",
 140                      strerror(-ret));
 141         return ret;
 142     }
 143
 144     /* If called a second time, don't repeat the rest of the setup. */
 145     if (s->xen_caps) {
 146         return 0;
 147     }
 148
 149     /*
 150      * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
 151      * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
 152      *
 153      * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
 154      * such things to be polled at precisely the right time. We *could* do
 155      * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
 156      * the moment the IRQ is acked, and see if it should be reasserted.
 157      *
 158      * But the in-kernel irqchip is deprecated, so we're unlikely to add
 159      * that support in the kernel. Insist on using the split irqchip mode
 160      * instead.
 161      *
 162      * This leaves us polling for the level going low in QEMU, which lacks
 163      * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
 164      * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
 165      * the device (for which it has to unmap the device and trap access, for
 166      * some period after an IRQ!!). In the Xen case, we do it on exit from
 167      * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
 168      * Which is kind of icky, but less so than the VFIO one. I may fix them
 169      * both later...
 170      */
 171     if (!kvm_kernel_irqchip_split()) {
 172         error_report("kvm: Xen support requires kernel-irqchip=split");
 173         return -EINVAL;
 174     }
 175
 176     s->xen_caps = xen_caps;
 177
 178     /* Tell fw_cfg to notify the BIOS to reserve the range. */
 179     ret = e820_add_entry(XEN_SPECIAL_AREA_ADDR, XEN_SPECIAL_AREA_SIZE,
 180                          E820_RESERVED);
 181     if (ret < 0) {
 182         fprintf(stderr, "e820_add_entry() table is full\n");
 183         return ret;
 184     }
 185
 186     /* The pages couldn't be overlaid until KVM was initialized */
 187     xen_primary_console_reset();
 188     xen_xenstore_reset();
 189
 190     return 0;
 191 }
 192
 193 int kvm_xen_init_vcpu(CPUState *cs)
 194 {
 195     X86CPU *cpu = X86_CPU(cs);
 196     CPUX86State *env = &cpu->env;
 197     int err;
 198
 199     /*
 200      * The kernel needs to know the Xen/ACPI vCPU ID because that's
 201      * what the guest uses in hypercalls such as timers. It doesn't
 202      * match the APIC ID which is generally used for talking to the
 203      * kernel about vCPUs. And if vCPU threads race with creating
 204      * their KVM vCPUs out of order, it doesn't necessarily match
 205      * with the kernel's internal vCPU indices either.
 206      */
 207     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 208         struct kvm_xen_vcpu_attr va = {
 209             .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
 210             .u.vcpu_id = cs->cpu_index,
 211         };
 212         err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
 213         if (err) {
 214             error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
 215                          strerror(-err));
 216             return err;
 217         }
 218     }
 219
 220     env->xen_vcpu_info_gpa = INVALID_GPA;
 221     env->xen_vcpu_info_default_gpa = INVALID_GPA;
 222     env->xen_vcpu_time_info_gpa = INVALID_GPA;
 223     env->xen_vcpu_runstate_gpa = INVALID_GPA;
 224
 225     qemu_mutex_init(&env->xen_timers_lock);
 226     env->xen_singleshot_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 227                                              xen_vcpu_singleshot_timer_event,
 228                                              cpu);
 229     if (!env->xen_singleshot_timer) {
 230         return -ENOMEM;
 231     }
 232     env->xen_singleshot_timer->opaque = cs;
 233
 234     env->xen_periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 235                                            xen_vcpu_periodic_timer_event,
 236                                            cpu);
 237     if (!env->xen_periodic_timer) {
 238         return -ENOMEM;
 239     }
 240     env->xen_periodic_timer->opaque = cs;
 241
 242     return 0;
 243 }
 244
 245 uint32_t kvm_xen_get_caps(void)
 246 {
 247     return kvm_state->xen_caps;
 248 }
 249
 250 static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
 251                                      int cmd, uint64_t arg)
 252 {
 253     int err = 0;
 254
 255     switch (cmd) {
 256     case XENVER_get_features: {
 257         struct xen_feature_info fi;
 258
 259         /* No need for 32/64 compat handling */
 260         qemu_build_assert(sizeof(fi) == 8);
 261
 262         err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
 263         if (err) {
 264             break;
 265         }
 266
 267         fi.submap = 0;
 268         if (fi.submap_idx == 0) {
 269             fi.submap |= 1 << XENFEAT_writable_page_tables |
 270                          1 << XENFEAT_writable_descriptor_tables |
 271                          1 << XENFEAT_auto_translated_physmap |
 272                          1 << XENFEAT_hvm_callback_vector |
 273                          1 << XENFEAT_hvm_safe_pvclock |
 274                          1 << XENFEAT_hvm_pirqs;
 275         }
 276
 277         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
 278         break;
 279     }
 280
 281     default:
 282         return false;
 283     }
 284
 285     exit->u.hcall.result = err;
 286     return true;
 287 }
 288
 289 static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
 290 {
 291     struct kvm_xen_vcpu_attr xhsi;
 292
 293     xhsi.type = type;
 294     xhsi.u.gpa = gpa;
 295
 296     trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
 297
 298     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
 299 }
 300
 301 static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
 302 {
 303     uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
 304     struct kvm_xen_vcpu_attr xva;
 305
 306     xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
 307     xva.u.vector = vector;
 308
 309     trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
 310
 311     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xva);
 312 }
 313
 314 static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
 315 {
 316     X86CPU *cpu = X86_CPU(cs);
 317     CPUX86State *env = &cpu->env;
 318
 319     env->xen_vcpu_callback_vector = data.host_int;
 320
 321     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 322         kvm_xen_set_vcpu_callback_vector(cs);
 323     }
 324 }
 325
 326 static int set_vcpu_info(CPUState *cs, uint64_t gpa)
 327 {
 328     X86CPU *cpu = X86_CPU(cs);
 329     CPUX86State *env = &cpu->env;
 330     MemoryRegionSection mrs = { .mr = NULL };
 331     void *vcpu_info_hva = NULL;
 332     int ret;
 333
 334     ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
 335     if (ret || gpa == INVALID_GPA) {
 336         goto out;
 337     }
 338
 339     mrs = memory_region_find(get_system_memory(), gpa,
 340                              sizeof(struct vcpu_info));
 341     if (mrs.mr && mrs.mr->ram_block &&
 342         !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
 343         vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
 344                                          mrs.offset_within_region);
 345     }
 346     if (!vcpu_info_hva) {
 347         if (mrs.mr) {
 348             memory_region_unref(mrs.mr);
 349             mrs.mr = NULL;
 350         }
 351         ret = -EINVAL;
 352     }
 353
 354  out:
 355     if (env->xen_vcpu_info_mr) {
 356         memory_region_unref(env->xen_vcpu_info_mr);
 357     }
 358     env->xen_vcpu_info_hva = vcpu_info_hva;
 359     env->xen_vcpu_info_mr = mrs.mr;
 360     return ret;
 361 }
 362
 363 static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
 364 {
 365     X86CPU *cpu = X86_CPU(cs);
 366     CPUX86State *env = &cpu->env;
 367
 368     env->xen_vcpu_info_default_gpa = data.host_ulong;
 369
 370     /* Changing the default does nothing if a vcpu_info was explicitly set. */
 371     if (env->xen_vcpu_info_gpa == INVALID_GPA) {
 372         set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
 373     }
 374 }
 375
 376 static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
 377 {
 378     X86CPU *cpu = X86_CPU(cs);
 379     CPUX86State *env = &cpu->env;
 380
 381     env->xen_vcpu_info_gpa = data.host_ulong;
 382
 383     set_vcpu_info(cs, env->xen_vcpu_info_gpa);
 384 }
 385
 386 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
 387 {
 388     CPUState *cs = qemu_get_cpu(vcpu_id);
 389     if (!cs) {
 390         return NULL;
 391     }
 392
 393     return X86_CPU(cs)->env.xen_vcpu_info_hva;
 394 }
 395
 396 void kvm_xen_maybe_deassert_callback(CPUState *cs)
 397 {
 398     CPUX86State *env = &X86_CPU(cs)->env;
 399     struct vcpu_info *vi = env->xen_vcpu_info_hva;
 400     if (!vi) {
 401         return;
 402     }
 403
 404     /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
 405     if (!vi->evtchn_upcall_pending) {
 406         qemu_mutex_lock_iothread();
 407         /*
 408          * Check again now we have the lock, because it may have been
 409          * asserted in the interim. And we don't want to take the lock
 410          * every time because this is a fast path.
 411          */
 412         if (!vi->evtchn_upcall_pending) {
 413             X86_CPU(cs)->env.xen_callback_asserted = false;
 414             xen_evtchn_set_callback_level(0);
 415         }
 416         qemu_mutex_unlock_iothread();
 417     }
 418 }
 419
 420 void kvm_xen_set_callback_asserted(void)
 421 {
 422     CPUState *cs = qemu_get_cpu(0);
 423
 424     if (cs) {
 425         X86_CPU(cs)->env.xen_callback_asserted = true;
 426     }
 427 }
 428
 429 bool kvm_xen_has_vcpu_callback_vector(void)
 430 {
 431     CPUState *cs = qemu_get_cpu(0);
 432
 433     return cs && !!X86_CPU(cs)->env.xen_vcpu_callback_vector;
 434 }
 435
 436 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
 437 {
 438     CPUState *cs = qemu_get_cpu(vcpu_id);
 439     uint8_t vector;
 440
 441     if (!cs) {
 442         return;
 443     }
 444
 445     vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
 446     if (vector) {
 447         /*
 448          * The per-vCPU callback vector injected via lapic. Just
 449          * deliver it as an MSI.
 450          */
 451         MSIMessage msg = {
 452             .address = APIC_DEFAULT_ADDRESS |
 453                        (X86_CPU(cs)->apic_id << MSI_ADDR_DEST_ID_SHIFT),
 454             .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
 455         };
 456         kvm_irqchip_send_msi(kvm_state, msg);
 457         return;
 458     }
 459
 460     switch (type) {
 461     case HVM_PARAM_CALLBACK_TYPE_VECTOR:
 462         /*
 463          * If the evtchn_upcall_pending field in the vcpu_info is set, then
 464          * KVM will automatically deliver the vector on entering the vCPU
 465          * so all we have to do is kick it out.
 466          */
 467         qemu_cpu_kick(cs);
 468         break;
 469
 470     case HVM_PARAM_CALLBACK_TYPE_GSI:
 471     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
 472         if (vcpu_id == 0) {
 473             xen_evtchn_set_callback_level(1);
 474         }
 475         break;
 476     }
 477 }
 478
 479 /* Must always be called with xen_timers_lock held */
 480 static int kvm_xen_set_vcpu_timer(CPUState *cs)
 481 {
 482     X86CPU *cpu = X86_CPU(cs);
 483     CPUX86State *env = &cpu->env;
 484
 485     struct kvm_xen_vcpu_attr va = {
 486         .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
 487         .u.timer.port = env->xen_virq[VIRQ_TIMER],
 488         .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
 489         .u.timer.expires_ns = env->xen_singleshot_timer_ns,
 490     };
 491
 492     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
 493 }
 494
 495 static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
 496 {
 497     QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
 498     kvm_xen_set_vcpu_timer(cs);
 499 }
 500
 501 int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
 502 {
 503     CPUState *cs = qemu_get_cpu(vcpu_id);
 504
 505     if (!cs) {
 506         return -ENOENT;
 507     }
 508
 509     /* cpu.h doesn't include the actual Xen header. */
 510     qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
 511
 512     if (virq >= NR_VIRQS) {
 513         return -EINVAL;
 514     }
 515
 516     if (port && X86_CPU(cs)->env.xen_virq[virq]) {
 517         return -EEXIST;
 518     }
 519
 520     X86_CPU(cs)->env.xen_virq[virq] = port;
 521     if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
 522         async_run_on_cpu(cs, do_set_vcpu_timer_virq,
 523                          RUN_ON_CPU_HOST_INT(port));
 524     }
 525     return 0;
 526 }
 527
 528 static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
 529 {
 530     X86CPU *cpu = X86_CPU(cs);
 531     CPUX86State *env = &cpu->env;
 532
 533     env->xen_vcpu_time_info_gpa = data.host_ulong;
 534
 535     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 536                           env->xen_vcpu_time_info_gpa);
 537 }
 538
 539 static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
 540 {
 541     X86CPU *cpu = X86_CPU(cs);
 542     CPUX86State *env = &cpu->env;
 543
 544     env->xen_vcpu_runstate_gpa = data.host_ulong;
 545
 546     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
 547                           env->xen_vcpu_runstate_gpa);
 548 }
 549
 550 static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
 551 {
 552     X86CPU *cpu = X86_CPU(cs);
 553     CPUX86State *env = &cpu->env;
 554
 555     env->xen_vcpu_info_gpa = INVALID_GPA;
 556     env->xen_vcpu_info_default_gpa = INVALID_GPA;
 557     env->xen_vcpu_time_info_gpa = INVALID_GPA;
 558     env->xen_vcpu_runstate_gpa = INVALID_GPA;
 559     env->xen_vcpu_callback_vector = 0;
 560     memset(env->xen_virq, 0, sizeof(env->xen_virq));
 561
 562     set_vcpu_info(cs, INVALID_GPA);
 563     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 564                           INVALID_GPA);
 565     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
 566                           INVALID_GPA);
 567     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 568         kvm_xen_set_vcpu_callback_vector(cs);
 569
 570         QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
 571         env->xen_singleshot_timer_ns = 0;
 572         kvm_xen_set_vcpu_timer(cs);
 573     } else {
 574         vcpuop_stop_singleshot_timer(cs);
 575     };
 576
 577 }
 578
 579 static int xen_set_shared_info(uint64_t gfn)
 580 {
 581     uint64_t gpa = gfn << TARGET_PAGE_BITS;
 582     int i, err;
 583
 584     QEMU_IOTHREAD_LOCK_GUARD();
 585
 586     /*
 587      * The xen_overlay device tells KVM about it too, since it had to
 588      * do that on migration load anyway (unless we're going to jump
 589      * through lots of hoops to maintain the fiction that this isn't
 590      * KVM-specific.
 591      */
 592     err = xen_overlay_map_shinfo_page(gpa);
 593     if (err) {
 594             return err;
 595     }
 596
 597     trace_kvm_xen_set_shared_info(gfn);
 598
 599     for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
 600         CPUState *cpu = qemu_get_cpu(i);
 601         if (cpu) {
 602             async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
 603                              RUN_ON_CPU_HOST_ULONG(gpa));
 604         }
 605         gpa += sizeof(vcpu_info_t);
 606     }
 607
 608     return err;
 609 }
 610
 611 static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
 612 {
 613     switch (space) {
 614     case XENMAPSPACE_shared_info:
 615         if (idx > 0) {
 616             return -EINVAL;
 617         }
 618         return xen_set_shared_info(gfn);
 619
 620     case XENMAPSPACE_grant_table:
 621         return xen_gnttab_map_page(idx, gfn);
 622
 623     case XENMAPSPACE_gmfn:
 624     case XENMAPSPACE_gmfn_range:
 625         return -ENOTSUP;
 626
 627     case XENMAPSPACE_gmfn_foreign:
 628     case XENMAPSPACE_dev_mmio:
 629         return -EPERM;
 630
 631     default:
 632         return -EINVAL;
 633     }
 634 }
 635
 636 static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
 637                              uint64_t arg)
 638 {
 639     struct xen_add_to_physmap xatp;
 640     CPUState *cs = CPU(cpu);
 641
 642     if (hypercall_compat32(exit->u.hcall.longmode)) {
 643         struct compat_xen_add_to_physmap xatp32;
 644
 645         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
 646         if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
 647             return -EFAULT;
 648         }
 649         xatp.domid = xatp32.domid;
 650         xatp.size = xatp32.size;
 651         xatp.space = xatp32.space;
 652         xatp.idx = xatp32.idx;
 653         xatp.gpfn = xatp32.gpfn;
 654     } else {
 655         if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
 656             return -EFAULT;
 657         }
 658     }
 659
 660     if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
 661         return -ESRCH;
 662     }
 663
 664     return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
 665 }
 666
 667 static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
 668                                    uint64_t arg)
 669 {
 670     struct xen_add_to_physmap_batch xatpb;
 671     unsigned long idxs_gva, gpfns_gva, errs_gva;
 672     CPUState *cs = CPU(cpu);
 673     size_t op_sz;
 674
 675     if (hypercall_compat32(exit->u.hcall.longmode)) {
 676         struct compat_xen_add_to_physmap_batch xatpb32;
 677
 678         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
 679         if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
 680             return -EFAULT;
 681         }
 682         xatpb.domid = xatpb32.domid;
 683         xatpb.space = xatpb32.space;
 684         xatpb.size = xatpb32.size;
 685
 686         idxs_gva = xatpb32.idxs.c;
 687         gpfns_gva = xatpb32.gpfns.c;
 688         errs_gva = xatpb32.errs.c;
 689         op_sz = sizeof(uint32_t);
 690     } else {
 691         if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
 692             return -EFAULT;
 693         }
 694         op_sz = sizeof(unsigned long);
 695         idxs_gva = (unsigned long)xatpb.idxs.p;
 696         gpfns_gva = (unsigned long)xatpb.gpfns.p;
 697         errs_gva = (unsigned long)xatpb.errs.p;
 698     }
 699
 700     if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
 701         return -ESRCH;
 702     }
 703
 704     /* Explicitly invalid for the batch op. Not that we implement it anyway. */
 705     if (xatpb.space == XENMAPSPACE_gmfn_range) {
 706         return -EINVAL;
 707     }
 708
 709     while (xatpb.size--) {
 710         unsigned long idx = 0;
 711         unsigned long gpfn = 0;
 712         int err;
 713
 714         /* For 32-bit compat this only copies the low 32 bits of each */
 715         if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
 716             kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
 717             return -EFAULT;
 718         }
 719         idxs_gva += op_sz;
 720         gpfns_gva += op_sz;
 721
 722         err = add_to_physmap_one(xatpb.space, idx, gpfn);
 723
 724         if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
 725             return -EFAULT;
 726         }
 727         errs_gva += sizeof(err);
 728     }
 729     return 0;
 730 }
 731
 732 static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 733                                    int cmd, uint64_t arg)
 734 {
 735     int err;
 736
 737     switch (cmd) {
 738     case XENMEM_add_to_physmap:
 739         err = do_add_to_physmap(exit, cpu, arg);
 740         break;
 741
 742     case XENMEM_add_to_physmap_batch:
 743         err = do_add_to_physmap_batch(exit, cpu, arg);
 744         break;
 745
 746     default:
 747         return false;
 748     }
 749
 750     exit->u.hcall.result = err;
 751     return true;
 752 }
 753
 754 static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
 755                              uint64_t arg)
 756 {
 757     CPUState *cs = CPU(cpu);
 758     struct xen_hvm_param hp;
 759     int err = 0;
 760
 761     /* No need for 32/64 compat handling */
 762     qemu_build_assert(sizeof(hp) == 16);
 763
 764     if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
 765         err = -EFAULT;
 766         goto out;
 767     }
 768
 769     if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
 770         err = -ESRCH;
 771         goto out;
 772     }
 773
 774     switch (hp.index) {
 775     case HVM_PARAM_CALLBACK_IRQ:
 776         qemu_mutex_lock_iothread();
 777         err = xen_evtchn_set_callback_param(hp.value);
 778         qemu_mutex_unlock_iothread();
 779         xen_set_long_mode(exit->u.hcall.longmode);
 780         break;
 781     default:
 782         return false;
 783     }
 784
 785 out:
 786     exit->u.hcall.result = err;
 787     return true;
 788 }
 789
 790 static bool handle_get_param(struct kvm_xen_exit *exit, X86CPU *cpu,
 791                              uint64_t arg)
 792 {
 793     CPUState *cs = CPU(cpu);
 794     struct xen_hvm_param hp;
 795     int err = 0;
 796
 797     /* No need for 32/64 compat handling */
 798     qemu_build_assert(sizeof(hp) == 16);
 799
 800     if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
 801         err = -EFAULT;
 802         goto out;
 803     }
 804
 805     if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
 806         err = -ESRCH;
 807         goto out;
 808     }
 809
 810     switch (hp.index) {
 811     case HVM_PARAM_STORE_PFN:
 812         hp.value = XEN_SPECIAL_PFN(XENSTORE);
 813         break;
 814     case HVM_PARAM_STORE_EVTCHN:
 815         hp.value = xen_xenstore_get_port();
 816         break;
 817     case HVM_PARAM_CONSOLE_PFN:
 818         hp.value = xen_primary_console_get_pfn();
 819         if (!hp.value) {
 820             err = -EINVAL;
 821         }
 822         break;
 823     case HVM_PARAM_CONSOLE_EVTCHN:
 824         hp.value = xen_primary_console_get_port();
 825         if (!hp.value) {
 826             err = -EINVAL;
 827         }
 828         break;
 829     default:
 830         return false;
 831     }
 832
 833     if (!err && kvm_copy_to_gva(cs, arg, &hp, sizeof(hp))) {
 834         err = -EFAULT;
 835     }
 836 out:
 837     exit->u.hcall.result = err;
 838     return true;
 839 }
 840
 841 static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
 842                                               X86CPU *cpu, uint64_t arg)
 843 {
 844     struct xen_hvm_evtchn_upcall_vector up;
 845     CPUState *target_cs;
 846
 847     /* No need for 32/64 compat handling */
 848     qemu_build_assert(sizeof(up) == 8);
 849
 850     if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
 851         return -EFAULT;
 852     }
 853
 854     if (up.vector < 0x10) {
 855         return -EINVAL;
 856     }
 857
 858     target_cs = qemu_get_cpu(up.vcpu);
 859     if (!target_cs) {
 860         return -EINVAL;
 861     }
 862
 863     async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
 864                      RUN_ON_CPU_HOST_INT(up.vector));
 865     return 0;
 866 }
 867
 868 static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 869                                  int cmd, uint64_t arg)
 870 {
 871     int ret = -ENOSYS;
 872     switch (cmd) {
 873     case HVMOP_set_evtchn_upcall_vector:
 874         ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, arg);
 875         break;
 876
 877     case HVMOP_pagetable_dying:
 878         ret = -ENOSYS;
 879         break;
 880
 881     case HVMOP_set_param:
 882         return handle_set_param(exit, cpu, arg);
 883
 884     case HVMOP_get_param:
 885         return handle_get_param(exit, cpu, arg);
 886
 887     default:
 888         return false;
 889     }
 890
 891     exit->u.hcall.result = ret;
 892     return true;
 893 }
 894
 895 static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
 896                                      uint64_t arg)
 897 {
 898     struct vcpu_register_vcpu_info rvi;
 899     uint64_t gpa;
 900
 901     /* No need for 32/64 compat handling */
 902     qemu_build_assert(sizeof(rvi) == 16);
 903     qemu_build_assert(sizeof(struct vcpu_info) == 64);
 904
 905     if (!target) {
 906         return -ENOENT;
 907     }
 908
 909     if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
 910         return -EFAULT;
 911     }
 912
 913     if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
 914         return -EINVAL;
 915     }
 916
 917     gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
 918     async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
 919     return 0;
 920 }
 921
 922 static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
 923                                           uint64_t arg)
 924 {
 925     struct vcpu_register_time_memory_area tma;
 926     uint64_t gpa;
 927     size_t len;
 928
 929     /* No need for 32/64 compat handling */
 930     qemu_build_assert(sizeof(tma) == 8);
 931     qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
 932
 933     if (!target) {
 934         return -ENOENT;
 935     }
 936
 937     if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
 938         return -EFAULT;
 939     }
 940
 941     /*
 942      * Xen actually uses the GVA and does the translation through the guest
 943      * page tables each time. But Linux/KVM uses the GPA, on the assumption
 944      * that guests only ever use *global* addresses (kernel virtual addresses)
 945      * for it. If Linux is changed to redo the GVA→GPA translation each time,
 946      * it will offer a new vCPU attribute for that, and we'll use it instead.
 947      */
 948     if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
 949         len < sizeof(struct vcpu_time_info)) {
 950         return -EFAULT;
 951     }
 952
 953     async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
 954                      RUN_ON_CPU_HOST_ULONG(gpa));
 955     return 0;
 956 }
 957
 958 static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
 959                                          uint64_t arg)
 960 {
 961     struct vcpu_register_runstate_memory_area rma;
 962     uint64_t gpa;
 963     size_t len;
 964
 965     /* No need for 32/64 compat handling */
 966     qemu_build_assert(sizeof(rma) == 8);
 967     /* The runstate area actually does change size, but Linux copes. */
 968
 969     if (!target) {
 970         return -ENOENT;
 971     }
 972
 973     if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
 974         return -EFAULT;
 975     }
 976
 977     /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
 978     if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
 979         return -EFAULT;
 980     }
 981
 982     async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
 983                      RUN_ON_CPU_HOST_ULONG(gpa));
 984     return 0;
 985 }
 986
 987 static uint64_t kvm_get_current_ns(void)
 988 {
 989     struct kvm_clock_data data;
 990     int ret;
 991
 992     ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
 993     if (ret < 0) {
 994         fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
 995                 abort();
 996     }
 997
 998     return data.clock;
 999 }
1000
1001 static void xen_vcpu_singleshot_timer_event(void *opaque)
1002 {
1003     CPUState *cpu = opaque;
1004     CPUX86State *env = &X86_CPU(cpu)->env;
1005     uint16_t port = env->xen_virq[VIRQ_TIMER];
1006
1007     if (likely(port)) {
1008         xen_evtchn_set_port(port);
1009     }
1010
1011     qemu_mutex_lock(&env->xen_timers_lock);
1012     env->xen_singleshot_timer_ns = 0;
1013     qemu_mutex_unlock(&env->xen_timers_lock);
1014 }
1015
1016 static void xen_vcpu_periodic_timer_event(void *opaque)
1017 {
1018     CPUState *cpu = opaque;
1019     CPUX86State *env = &X86_CPU(cpu)->env;
1020     uint16_t port = env->xen_virq[VIRQ_TIMER];
1021     int64_t qemu_now;
1022
1023     if (likely(port)) {
1024         xen_evtchn_set_port(port);
1025     }
1026
1027     qemu_mutex_lock(&env->xen_timers_lock);
1028
1029     qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1030     timer_mod_ns(env->xen_periodic_timer,
1031                  qemu_now + env->xen_periodic_timer_period);
1032
1033     qemu_mutex_unlock(&env->xen_timers_lock);
1034 }
1035
1036 static int do_set_periodic_timer(CPUState *target, uint64_t period_ns)
1037 {
1038     CPUX86State *tenv = &X86_CPU(target)->env;
1039     int64_t qemu_now;
1040
1041     timer_del(tenv->xen_periodic_timer);
1042
1043     qemu_mutex_lock(&tenv->xen_timers_lock);
1044
1045     qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1046     timer_mod_ns(tenv->xen_periodic_timer, qemu_now + period_ns);
1047     tenv->xen_periodic_timer_period = period_ns;
1048
1049     qemu_mutex_unlock(&tenv->xen_timers_lock);
1050     return 0;
1051 }
1052
1053 #define MILLISECS(_ms)  ((int64_t)((_ms) * 1000000ULL))
1054 #define MICROSECS(_us)  ((int64_t)((_us) * 1000ULL))
1055 #define STIME_MAX ((time_t)((int64_t)~0ull >> 1))
1056 /* Chosen so (NOW() + delta) won't overflow without an uptime of 200 years */
1057 #define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2))
1058
1059 static int vcpuop_set_periodic_timer(CPUState *cs, CPUState *target,
1060                                      uint64_t arg)
1061 {
1062     struct vcpu_set_periodic_timer spt;
1063
1064     qemu_build_assert(sizeof(spt) == 8);
1065     if (kvm_copy_from_gva(cs, arg, &spt, sizeof(spt))) {
1066         return -EFAULT;
1067     }
1068
1069     if (spt.period_ns < MILLISECS(1) || spt.period_ns > STIME_DELTA_MAX) {
1070         return -EINVAL;
1071     }
1072
1073     return do_set_periodic_timer(target, spt.period_ns);
1074 }
1075
1076 static int vcpuop_stop_periodic_timer(CPUState *target)
1077 {
1078     CPUX86State *tenv = &X86_CPU(target)->env;
1079
1080     qemu_mutex_lock(&tenv->xen_timers_lock);
1081
1082     timer_del(tenv->xen_periodic_timer);
1083     tenv->xen_periodic_timer_period = 0;
1084
1085     qemu_mutex_unlock(&tenv->xen_timers_lock);
1086     return 0;
1087 }
1088
1089 /*
1090  * Userspace handling of timer, for older kernels.
1091  * Must always be called with xen_timers_lock held.
1092  */
1093 static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs,
1094                                    bool linux_wa)
1095 {
1096     CPUX86State *env = &X86_CPU(cs)->env;
1097     int64_t now = kvm_get_current_ns();
1098     int64_t qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1099     int64_t delta = timeout_abs - now;
1100
1101     if (linux_wa && unlikely((int64_t)timeout_abs < 0 ||
1102                              (delta > 0 && (uint32_t)(delta >> 50) != 0))) {
1103         /*
1104          * Xen has a 'Linux workaround' in do_set_timer_op() which checks
1105          * for negative absolute timeout values (caused by integer
1106          * overflow), and for values about 13 days in the future (2^50ns)
1107          * which would be caused by jiffies overflow. For those cases, it
1108          * sets the timeout 100ms in the future (not *too* soon, since if
1109          * a guest really did set a long timeout on purpose we don't want
1110          * to keep churning CPU time by waking it up).
1111          */
1112         delta = (100 * SCALE_MS);
1113         timeout_abs = now + delta;
1114     }
1115
1116     timer_mod_ns(env->xen_singleshot_timer, qemu_now + delta);
1117     env->xen_singleshot_timer_ns = now + delta;
1118     return 0;
1119 }
1120
1121 static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg)
1122 {
1123     struct vcpu_set_singleshot_timer sst = { 0 };
1124
1125     /*
1126      * The struct is a uint64_t followed by a uint32_t. On 32-bit that
1127      * makes it 12 bytes. On 64-bit it gets padded to 16. The parts
1128      * that get used are identical, and there's four bytes of padding
1129      * unused at the end. For true Xen compatibility we should attempt
1130      * to copy the full 16 bytes from 64-bit guests, and return -EFAULT
1131      * if we can't get the padding too. But that's daft. Just copy what
1132      * we need.
1133      */
1134     qemu_build_assert(offsetof(struct vcpu_set_singleshot_timer, flags) == 8);
1135     qemu_build_assert(sizeof(sst) >= 12);
1136
1137     if (kvm_copy_from_gva(cs, arg, &sst, 12)) {
1138         return -EFAULT;
1139     }
1140
1141     QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
1142
1143     /*
1144      * We ignore the VCPU_SSHOTTMR_future flag, just as Xen now does.
1145      * The only guest that ever used it, got it wrong.
1146      * https://xenbits.xen.org/gitweb/?p=xen.git;a=commitdiff;h=19c6cbd909
1147      */
1148     return do_set_singleshot_timer(cs, sst.timeout_abs_ns, false);
1149 }
1150
1151 static int vcpuop_stop_singleshot_timer(CPUState *cs)
1152 {
1153     CPUX86State *env = &X86_CPU(cs)->env;
1154
1155     qemu_mutex_lock(&env->xen_timers_lock);
1156
1157     timer_del(env->xen_singleshot_timer);
1158     env->xen_singleshot_timer_ns = 0;
1159
1160     qemu_mutex_unlock(&env->xen_timers_lock);
1161     return 0;
1162 }
1163
1164 static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1165                                        uint64_t timeout)
1166 {
1167     int err;
1168
1169     if (unlikely(timeout == 0)) {
1170         err = vcpuop_stop_singleshot_timer(CPU(cpu));
1171     } else {
1172         QEMU_LOCK_GUARD(&X86_CPU(cpu)->env.xen_timers_lock);
1173         err = do_set_singleshot_timer(CPU(cpu), timeout, true);
1174     }
1175     exit->u.hcall.result = err;
1176     return true;
1177 }
1178
1179 static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1180                                   int cmd, int vcpu_id, uint64_t arg)
1181 {
1182     CPUState *cs = CPU(cpu);
1183     CPUState *dest = cs->cpu_index == vcpu_id ? cs : qemu_get_cpu(vcpu_id);
1184     int err;
1185
1186     if (!dest) {
1187         err = -ENOENT;
1188         goto out;
1189     }
1190
1191     switch (cmd) {
1192     case VCPUOP_register_runstate_memory_area:
1193         err = vcpuop_register_runstate_info(cs, dest, arg);
1194         break;
1195     case VCPUOP_register_vcpu_time_memory_area:
1196         err = vcpuop_register_vcpu_time_info(cs, dest, arg);
1197         break;
1198     case VCPUOP_register_vcpu_info:
1199         err = vcpuop_register_vcpu_info(cs, dest, arg);
1200         break;
1201     case VCPUOP_set_singleshot_timer: {
1202         if (cs->cpu_index == vcpu_id) {
1203             err = vcpuop_set_singleshot_timer(dest, arg);
1204         } else {
1205             err = -EINVAL;
1206         }
1207         break;
1208     }
1209     case VCPUOP_stop_singleshot_timer:
1210         if (cs->cpu_index == vcpu_id) {
1211             err = vcpuop_stop_singleshot_timer(dest);
1212         } else {
1213             err = -EINVAL;
1214         }
1215         break;
1216     case VCPUOP_set_periodic_timer: {
1217         err = vcpuop_set_periodic_timer(cs, dest, arg);
1218         break;
1219     }
1220     case VCPUOP_stop_periodic_timer:
1221         err = vcpuop_stop_periodic_timer(dest);
1222         break;
1223
1224     default:
1225         return false;
1226     }
1227
1228  out:
1229     exit->u.hcall.result = err;
1230     return true;
1231 }
1232
1233 static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1234                                     int cmd, uint64_t arg)
1235 {
1236     CPUState *cs = CPU(cpu);
1237     int err = -ENOSYS;
1238
1239     switch (cmd) {
1240     case EVTCHNOP_init_control:
1241     case EVTCHNOP_expand_array:
1242     case EVTCHNOP_set_priority:
1243         /* We do not support FIFO channels at this point */
1244         err = -ENOSYS;
1245         break;
1246
1247     case EVTCHNOP_status: {
1248         struct evtchn_status status;
1249
1250         qemu_build_assert(sizeof(status) == 24);
1251         if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) {
1252             err = -EFAULT;
1253             break;
1254         }
1255
1256         err = xen_evtchn_status_op(&status);
1257         if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) {
1258             err = -EFAULT;
1259         }
1260         break;
1261     }
1262     case EVTCHNOP_close: {
1263         struct evtchn_close close;
1264
1265         qemu_build_assert(sizeof(close) == 4);
1266         if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) {
1267             err = -EFAULT;
1268             break;
1269         }
1270
1271         err = xen_evtchn_close_op(&close);
1272         break;
1273     }
1274     case EVTCHNOP_unmask: {
1275         struct evtchn_unmask unmask;
1276
1277         qemu_build_assert(sizeof(unmask) == 4);
1278         if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) {
1279             err = -EFAULT;
1280             break;
1281         }
1282
1283         err = xen_evtchn_unmask_op(&unmask);
1284         break;
1285     }
1286     case EVTCHNOP_bind_virq: {
1287         struct evtchn_bind_virq virq;
1288
1289         qemu_build_assert(sizeof(virq) == 12);
1290         if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
1291             err = -EFAULT;
1292             break;
1293         }
1294
1295         err = xen_evtchn_bind_virq_op(&virq);
1296         if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
1297             err = -EFAULT;
1298         }
1299         break;
1300     }
1301     case EVTCHNOP_bind_pirq: {
1302         struct evtchn_bind_pirq pirq;
1303
1304         qemu_build_assert(sizeof(pirq) == 12);
1305         if (kvm_copy_from_gva(cs, arg, &pirq, sizeof(pirq))) {
1306             err = -EFAULT;
1307             break;
1308         }
1309
1310         err = xen_evtchn_bind_pirq_op(&pirq);
1311         if (!err && kvm_copy_to_gva(cs, arg, &pirq, sizeof(pirq))) {
1312             err = -EFAULT;
1313         }
1314         break;
1315     }
1316     case EVTCHNOP_bind_ipi: {
1317         struct evtchn_bind_ipi ipi;
1318
1319         qemu_build_assert(sizeof(ipi) == 8);
1320         if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) {
1321             err = -EFAULT;
1322             break;
1323         }
1324
1325         err = xen_evtchn_bind_ipi_op(&ipi);
1326         if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) {
1327             err = -EFAULT;
1328         }
1329         break;
1330     }
1331     case EVTCHNOP_send: {
1332         struct evtchn_send send;
1333
1334         qemu_build_assert(sizeof(send) == 4);
1335         if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) {
1336             err = -EFAULT;
1337             break;
1338         }
1339
1340         err = xen_evtchn_send_op(&send);
1341         break;
1342     }
1343     case EVTCHNOP_alloc_unbound: {
1344         struct evtchn_alloc_unbound alloc;
1345
1346         qemu_build_assert(sizeof(alloc) == 8);
1347         if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) {
1348             err = -EFAULT;
1349             break;
1350         }
1351
1352         err = xen_evtchn_alloc_unbound_op(&alloc);
1353         if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) {
1354             err = -EFAULT;
1355         }
1356         break;
1357     }
1358     case EVTCHNOP_bind_interdomain: {
1359         struct evtchn_bind_interdomain interdomain;
1360
1361         qemu_build_assert(sizeof(interdomain) == 12);
1362         if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1363             err = -EFAULT;
1364             break;
1365         }
1366
1367         err = xen_evtchn_bind_interdomain_op(&interdomain);
1368         if (!err &&
1369             kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1370             err = -EFAULT;
1371         }
1372         break;
1373     }
1374     case EVTCHNOP_bind_vcpu: {
1375         struct evtchn_bind_vcpu vcpu;
1376
1377         qemu_build_assert(sizeof(vcpu) == 8);
1378         if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) {
1379             err = -EFAULT;
1380             break;
1381         }
1382
1383         err = xen_evtchn_bind_vcpu_op(&vcpu);
1384         break;
1385     }
1386     case EVTCHNOP_reset: {
1387         struct evtchn_reset reset;
1388
1389         qemu_build_assert(sizeof(reset) == 2);
1390         if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) {
1391             err = -EFAULT;
1392             break;
1393         }
1394
1395         err = xen_evtchn_reset_op(&reset);
1396         break;
1397     }
1398     default:
1399         return false;
1400     }
1401
1402     exit->u.hcall.result = err;
1403     return true;
1404 }
1405
1406 int kvm_xen_soft_reset(void)
1407 {
1408     CPUState *cpu;
1409     int err;
1410
1411     assert(qemu_mutex_iothread_locked());
1412
1413     trace_kvm_xen_soft_reset();
1414
1415     err = xen_evtchn_soft_reset();
1416     if (err) {
1417         return err;
1418     }
1419
1420     /*
1421      * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
1422      * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
1423      * to deliver to the timer interrupt and treats that as 'disabled'.
1424      */
1425     err = xen_evtchn_set_callback_param(0);
1426     if (err) {
1427         return err;
1428     }
1429
1430     CPU_FOREACH(cpu) {
1431         async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
1432     }
1433
1434     err = xen_overlay_map_shinfo_page(INVALID_GFN);
1435     if (err) {
1436         return err;
1437     }
1438
1439     err = xen_gnttab_reset();
1440     if (err) {
1441         return err;
1442     }
1443
1444     err = xen_primary_console_reset();
1445     if (err) {
1446         return err;
1447     }
1448
1449     err = xen_xenstore_reset();
1450     if (err) {
1451         return err;
1452     }
1453
1454     return 0;
1455 }
1456
1457 static int schedop_shutdown(CPUState *cs, uint64_t arg)
1458 {
1459     struct sched_shutdown shutdown;
1460     int ret = 0;
1461
1462     /* No need for 32/64 compat handling */
1463     qemu_build_assert(sizeof(shutdown) == 4);
1464
1465     if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
1466         return -EFAULT;
1467     }
1468
1469     switch (shutdown.reason) {
1470     case SHUTDOWN_crash:
1471         cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
1472         qemu_system_guest_panicked(NULL);
1473         break;
1474
1475     case SHUTDOWN_reboot:
1476         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
1477         break;
1478
1479     case SHUTDOWN_poweroff:
1480         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
1481         break;
1482
1483     case SHUTDOWN_soft_reset:
1484         qemu_mutex_lock_iothread();
1485         ret = kvm_xen_soft_reset();
1486         qemu_mutex_unlock_iothread();
1487         break;
1488
1489     default:
1490         ret = -EINVAL;
1491         break;
1492     }
1493
1494     return ret;
1495 }
1496
1497 static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1498                                    int cmd, uint64_t arg)
1499 {
1500     CPUState *cs = CPU(cpu);
1501     int err = -ENOSYS;
1502
1503     switch (cmd) {
1504     case SCHEDOP_shutdown:
1505         err = schedop_shutdown(cs, arg);
1506         break;
1507
1508     case SCHEDOP_poll:
1509         /*
1510          * Linux will panic if this doesn't work. Just yield; it's not
1511          * worth overthinking it because with event channel handling
1512          * in KVM, the kernel will intercept this and it will never
1513          * reach QEMU anyway. The semantics of the hypercall explicltly
1514          * permit spurious wakeups.
1515          */
1516     case SCHEDOP_yield:
1517         sched_yield();
1518         err = 0;
1519         break;
1520
1521     default:
1522         return false;
1523     }
1524
1525     exit->u.hcall.result = err;
1526     return true;
1527 }
1528
1529 static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1530                                     int cmd, uint64_t arg, int count)
1531 {
1532     CPUState *cs = CPU(cpu);
1533     int err;
1534
1535     switch (cmd) {
1536     case GNTTABOP_set_version: {
1537         struct gnttab_set_version set;
1538
1539         qemu_build_assert(sizeof(set) == 4);
1540         if (kvm_copy_from_gva(cs, arg, &set, sizeof(set))) {
1541             err = -EFAULT;
1542             break;
1543         }
1544
1545         err = xen_gnttab_set_version_op(&set);
1546         if (!err && kvm_copy_to_gva(cs, arg, &set, sizeof(set))) {
1547             err = -EFAULT;
1548         }
1549         break;
1550     }
1551     case GNTTABOP_get_version: {
1552         struct gnttab_get_version get;
1553
1554         qemu_build_assert(sizeof(get) == 8);
1555         if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1556             err = -EFAULT;
1557             break;
1558         }
1559
1560         err = xen_gnttab_get_version_op(&get);
1561         if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1562             err = -EFAULT;
1563         }
1564         break;
1565     }
1566     case GNTTABOP_query_size: {
1567         struct gnttab_query_size size;
1568
1569         qemu_build_assert(sizeof(size) == 16);
1570         if (kvm_copy_from_gva(cs, arg, &size, sizeof(size))) {
1571             err = -EFAULT;
1572             break;
1573         }
1574
1575         err = xen_gnttab_query_size_op(&size);
1576         if (!err && kvm_copy_to_gva(cs, arg, &size, sizeof(size))) {
1577             err = -EFAULT;
1578         }
1579         break;
1580     }
1581     case GNTTABOP_setup_table:
1582     case GNTTABOP_copy:
1583     case GNTTABOP_map_grant_ref:
1584     case GNTTABOP_unmap_grant_ref:
1585     case GNTTABOP_swap_grant_ref:
1586         return false;
1587
1588     default:
1589         /* Xen explicitly returns -ENOSYS to HVM guests for all others */
1590         err = -ENOSYS;
1591         break;
1592     }
1593
1594     exit->u.hcall.result = err;
1595     return true;
1596 }
1597
1598 static bool kvm_xen_hcall_physdev_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1599                                      int cmd, uint64_t arg)
1600 {
1601     CPUState *cs = CPU(cpu);
1602     int err;
1603
1604     switch (cmd) {
1605     case PHYSDEVOP_map_pirq: {
1606         struct physdev_map_pirq map;
1607
1608         if (hypercall_compat32(exit->u.hcall.longmode)) {
1609             struct compat_physdev_map_pirq *map32 = (void *)&map;
1610
1611             if (kvm_copy_from_gva(cs, arg, map32, sizeof(*map32))) {
1612                 return -EFAULT;
1613             }
1614
1615             /*
1616              * The only thing that's different is the alignment of the
1617              * uint64_t table_base at the end, which gets padding to make
1618              * it 64-bit aligned in the 64-bit version.
1619              */
1620             qemu_build_assert(sizeof(*map32) == 36);
1621             qemu_build_assert(offsetof(struct physdev_map_pirq, entry_nr) ==
1622                               offsetof(struct compat_physdev_map_pirq, entry_nr));
1623             memmove(&map.table_base, &map32->table_base, sizeof(map.table_base));
1624         } else {
1625             if (kvm_copy_from_gva(cs, arg, &map, sizeof(map))) {
1626                 err = -EFAULT;
1627                 break;
1628             }
1629         }
1630         err = xen_physdev_map_pirq(&map);
1631         /*
1632          * Since table_base is an IN parameter and won't be changed, just
1633          * copy the size of the compat structure back to the guest.
1634          */
1635         if (!err && kvm_copy_to_gva(cs, arg, &map,
1636                                     sizeof(struct compat_physdev_map_pirq))) {
1637             err = -EFAULT;
1638         }
1639         break;
1640     }
1641     case PHYSDEVOP_unmap_pirq: {
1642         struct physdev_unmap_pirq unmap;
1643
1644         qemu_build_assert(sizeof(unmap) == 8);
1645         if (kvm_copy_from_gva(cs, arg, &unmap, sizeof(unmap))) {
1646             err = -EFAULT;
1647             break;
1648         }
1649
1650         err = xen_physdev_unmap_pirq(&unmap);
1651         if (!err && kvm_copy_to_gva(cs, arg, &unmap, sizeof(unmap))) {
1652             err = -EFAULT;
1653         }
1654         break;
1655     }
1656     case PHYSDEVOP_eoi: {
1657         struct physdev_eoi eoi;
1658
1659         qemu_build_assert(sizeof(eoi) == 4);
1660         if (kvm_copy_from_gva(cs, arg, &eoi, sizeof(eoi))) {
1661             err = -EFAULT;
1662             break;
1663         }
1664
1665         err = xen_physdev_eoi_pirq(&eoi);
1666         if (!err && kvm_copy_to_gva(cs, arg, &eoi, sizeof(eoi))) {
1667             err = -EFAULT;
1668         }
1669         break;
1670     }
1671     case PHYSDEVOP_irq_status_query: {
1672         struct physdev_irq_status_query query;
1673
1674         qemu_build_assert(sizeof(query) == 8);
1675         if (kvm_copy_from_gva(cs, arg, &query, sizeof(query))) {
1676             err = -EFAULT;
1677             break;
1678         }
1679
1680         err = xen_physdev_query_pirq(&query);
1681         if (!err && kvm_copy_to_gva(cs, arg, &query, sizeof(query))) {
1682             err = -EFAULT;
1683         }
1684         break;
1685     }
1686     case PHYSDEVOP_get_free_pirq: {
1687         struct physdev_get_free_pirq get;
1688
1689         qemu_build_assert(sizeof(get) == 8);
1690         if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1691             err = -EFAULT;
1692             break;
1693         }
1694
1695         err = xen_physdev_get_free_pirq(&get);
1696         if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1697             err = -EFAULT;
1698         }
1699         break;
1700     }
1701     case PHYSDEVOP_pirq_eoi_gmfn_v2: /* FreeBSD 13 makes this hypercall */
1702         err = -ENOSYS;
1703         break;
1704
1705     default:
1706         return false;
1707     }
1708
1709     exit->u.hcall.result = err;
1710     return true;
1711 }
1712
1713 static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1714 {
1715     uint16_t code = exit->u.hcall.input;
1716
1717     if (exit->u.hcall.cpl > 0) {
1718         exit->u.hcall.result = -EPERM;
1719         return true;
1720     }
1721
1722     switch (code) {
1723     case __HYPERVISOR_set_timer_op:
1724         if (exit->u.hcall.longmode) {
1725             return kvm_xen_hcall_set_timer_op(exit, cpu,
1726                                               exit->u.hcall.params[0]);
1727         } else {
1728             /* In 32-bit mode, the 64-bit timer value is in two args. */
1729             uint64_t val = ((uint64_t)exit->u.hcall.params[1]) << 32 |
1730                 (uint32_t)exit->u.hcall.params[0];
1731             return kvm_xen_hcall_set_timer_op(exit, cpu, val);
1732         }
1733     case __HYPERVISOR_grant_table_op:
1734         return kvm_xen_hcall_gnttab_op(exit, cpu, exit->u.hcall.params[0],
1735                                        exit->u.hcall.params[1],
1736                                        exit->u.hcall.params[2]);
1737     case __HYPERVISOR_sched_op:
1738         return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
1739                                       exit->u.hcall.params[1]);
1740     case __HYPERVISOR_event_channel_op:
1741         return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
1742                                        exit->u.hcall.params[1]);
1743     case __HYPERVISOR_vcpu_op:
1744         return kvm_xen_hcall_vcpu_op(exit, cpu,
1745                                      exit->u.hcall.params[0],
1746                                      exit->u.hcall.params[1],
1747                                      exit->u.hcall.params[2]);
1748     case __HYPERVISOR_hvm_op:
1749         return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
1750                                     exit->u.hcall.params[1]);
1751     case __HYPERVISOR_memory_op:
1752         return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
1753                                        exit->u.hcall.params[1]);
1754     case __HYPERVISOR_physdev_op:
1755         return kvm_xen_hcall_physdev_op(exit, cpu, exit->u.hcall.params[0],
1756                                         exit->u.hcall.params[1]);
1757     case __HYPERVISOR_xen_version:
1758         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
1759                                          exit->u.hcall.params[1]);
1760     default:
1761         return false;
1762     }
1763 }
1764
1765 int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1766 {
1767     if (exit->type != KVM_EXIT_XEN_HCALL) {
1768         return -1;
1769     }
1770
1771     /*
1772      * The kernel latches the guest 32/64 mode when the MSR is used to fill
1773      * the hypercall page. So if we see a hypercall in a mode that doesn't
1774      * match our own idea of the guest mode, fetch the kernel's idea of the
1775      * "long mode" to remain in sync.
1776      */
1777     if (exit->u.hcall.longmode != xen_is_long_mode()) {
1778         xen_sync_long_mode();
1779     }
1780
1781     if (!do_kvm_xen_handle_exit(cpu, exit)) {
1782         /*
1783          * Some hypercalls will be deliberately "implemented" by returning
1784          * -ENOSYS. This case is for hypercalls which are unexpected.
1785          */
1786         exit->u.hcall.result = -ENOSYS;
1787         qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
1788                       PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
1789                       (uint64_t)exit->u.hcall.input,
1790                       (uint64_t)exit->u.hcall.params[0],
1791                       (uint64_t)exit->u.hcall.params[1],
1792                       (uint64_t)exit->u.hcall.params[2]);
1793     }
1794
1795     trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
1796                             exit->u.hcall.input, exit->u.hcall.params[0],
1797                             exit->u.hcall.params[1], exit->u.hcall.params[2],
1798                             exit->u.hcall.result);
1799     return 0;
1800 }
1801
1802 uint16_t kvm_xen_get_gnttab_max_frames(void)
1803 {
1804     KVMState *s = KVM_STATE(current_accel());
1805     return s->xen_gnttab_max_frames;
1806 }
1807
1808 uint16_t kvm_xen_get_evtchn_max_pirq(void)
1809 {
1810     KVMState *s = KVM_STATE(current_accel());
1811     return s->xen_evtchn_max_pirq;
1812 }
1813
1814 int kvm_put_xen_state(CPUState *cs)
1815 {
1816     X86CPU *cpu = X86_CPU(cs);
1817     CPUX86State *env = &cpu->env;
1818     uint64_t gpa;
1819     int ret;
1820
1821     gpa = env->xen_vcpu_info_gpa;
1822     if (gpa == INVALID_GPA) {
1823         gpa = env->xen_vcpu_info_default_gpa;
1824     }
1825
1826     if (gpa != INVALID_GPA) {
1827         ret = set_vcpu_info(cs, gpa);
1828         if (ret < 0) {
1829             return ret;
1830         }
1831     }
1832
1833     gpa = env->xen_vcpu_time_info_gpa;
1834     if (gpa != INVALID_GPA) {
1835         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
1836                                     gpa);
1837         if (ret < 0) {
1838             return ret;
1839         }
1840     }
1841
1842     gpa = env->xen_vcpu_runstate_gpa;
1843     if (gpa != INVALID_GPA) {
1844         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
1845                                     gpa);
1846         if (ret < 0) {
1847             return ret;
1848         }
1849     }
1850
1851     if (env->xen_periodic_timer_period) {
1852         ret = do_set_periodic_timer(cs, env->xen_periodic_timer_period);
1853         if (ret < 0) {
1854             return ret;
1855         }
1856     }
1857
1858     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1859         /*
1860          * If the kernel has EVTCHN_SEND support then it handles timers too,
1861          * so the timer will be restored by kvm_xen_set_vcpu_timer() below.
1862          */
1863         QEMU_LOCK_GUARD(&env->xen_timers_lock);
1864         if (env->xen_singleshot_timer_ns) {
1865             ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns,
1866                                           false);
1867             if (ret < 0) {
1868                 return ret;
1869             }
1870         }
1871         return 0;
1872     }
1873
1874     if (env->xen_vcpu_callback_vector) {
1875         ret = kvm_xen_set_vcpu_callback_vector(cs);
1876         if (ret < 0) {
1877             return ret;
1878         }
1879     }
1880
1881     if (env->xen_virq[VIRQ_TIMER]) {
1882         do_set_vcpu_timer_virq(cs,
1883                                RUN_ON_CPU_HOST_INT(env->xen_virq[VIRQ_TIMER]));
1884     }
1885     return 0;
1886 }
1887
1888 int kvm_get_xen_state(CPUState *cs)
1889 {
1890     X86CPU *cpu = X86_CPU(cs);
1891     CPUX86State *env = &cpu->env;
1892     uint64_t gpa;
1893     int ret;
1894
1895     /*
1896      * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1897      * to it. It's up to userspace to *assume* that any page shared thus is
1898      * always considered dirty. The shared_info page is different since it's
1899      * an overlay and migrated separately anyway.
1900      */
1901     gpa = env->xen_vcpu_info_gpa;
1902     if (gpa == INVALID_GPA) {
1903         gpa = env->xen_vcpu_info_default_gpa;
1904     }
1905     if (gpa != INVALID_GPA) {
1906         MemoryRegionSection mrs = memory_region_find(get_system_memory(),
1907                                                      gpa,
1908                                                      sizeof(struct vcpu_info));
1909         if (mrs.mr &&
1910             !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
1911             memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
1912                                     sizeof(struct vcpu_info));
1913         }
1914     }
1915
1916     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1917         return 0;
1918     }
1919
1920     /*
1921      * If the kernel is accelerating timers, read out the current value of the
1922      * singleshot timer deadline.
1923      */
1924     if (env->xen_virq[VIRQ_TIMER]) {
1925         struct kvm_xen_vcpu_attr va = {
1926             .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
1927         };
1928         ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
1929         if (ret < 0) {
1930             return ret;
1931         }
1932
1933         /*
1934          * This locking is fairly pointless, and is here to appease Coverity.
1935          * There is an unavoidable race condition if a different vCPU sets a
1936          * timer for this vCPU after the value has been read out. But that's
1937          * OK in practice because *all* the vCPUs need to be stopped before
1938          * we set about migrating their state.
1939          */
1940         QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
1941         env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
1942     }
1943
1944     return 0;
1945 }