]> git.proxmox.com Git - mirror_qemu.git/blame - target/i386/kvm/xen-emu.c
Merge tag 'pull-block-2023-11-06' of https://gitlab.com/hreitz/qemu into staging
[mirror_qemu.git] / target / i386 / kvm / xen-emu.c
CommitLineData
61491cf4
DW
1/*
2 * Xen HVM emulation support in KVM
3 *
4 * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
5 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
9 *
10 */
11
12#include "qemu/osdep.h"
55a3f666 13#include "qemu/log.h"
79b7067d 14#include "qemu/main-loop.h"
cc37d98b 15#include "qemu/error-report.h"
fb0fd2ce 16#include "hw/xen/xen.h"
61491cf4
DW
17#include "sysemu/kvm_int.h"
18#include "sysemu/kvm_xen.h"
19#include "kvm/kvm_i386.h"
bedcc139 20#include "exec/address-spaces.h"
61491cf4 21#include "xen-emu.h"
55a3f666 22#include "trace.h"
79b7067d 23#include "sysemu/runstate.h"
61491cf4 24
27d4075d
DW
25#include "hw/pci/msi.h"
26#include "hw/i386/apic-msidef.h"
8b57d5c5 27#include "hw/i386/e820_memory_layout.h"
110a0ea5 28#include "hw/i386/kvm/xen_overlay.h"
91cce756 29#include "hw/i386/kvm/xen_evtchn.h"
a28b0fc0 30#include "hw/i386/kvm/xen_gnttab.h"
c08f5d0e 31#include "hw/i386/kvm/xen_xenstore.h"
110a0ea5 32
bedcc139 33#include "hw/xen/interface/version.h"
79b7067d 34#include "hw/xen/interface/sched.h"
fb0fd2ce 35#include "hw/xen/interface/memory.h"
671bfdcd 36#include "hw/xen/interface/hvm/hvm_op.h"
105b47fd 37#include "hw/xen/interface/hvm/params.h"
d70bd6a4 38#include "hw/xen/interface/vcpu.h"
3b06f29b 39#include "hw/xen/interface/event_channel.h"
28b7ae94 40#include "hw/xen/interface/grant_table.h"
fb0fd2ce
JM
41
42#include "xen-compat.h"
43
b746a779
JM
44static void xen_vcpu_singleshot_timer_event(void *opaque);
45static void xen_vcpu_periodic_timer_event(void *opaque);
19c417ec 46static int vcpuop_stop_singleshot_timer(CPUState *cs);
b746a779 47
fb0fd2ce
JM
48#ifdef TARGET_X86_64
49#define hypercall_compat32(longmode) (!(longmode))
50#else
51#define hypercall_compat32(longmode) (false)
52#endif
bedcc139 53
f0689302
JM
54static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
55 size_t *len, bool is_write)
bedcc139 56{
bedcc139
JM
57 struct kvm_translation tr = {
58 .linear_address = gva,
59 };
60
f0689302
JM
61 if (len) {
62 *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
63 }
64
65 if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
66 (is_write && !tr.writeable)) {
67 return false;
bedcc139 68 }
f0689302
JM
69 *gpa = tr.physical_address;
70 return true;
71}
72
73static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
74 bool is_write)
75{
76 uint8_t *buf = (uint8_t *)_buf;
77 uint64_t gpa;
78 size_t len;
bedcc139 79
f0689302
JM
80 while (sz) {
81 if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
bedcc139
JM
82 return -EFAULT;
83 }
f0689302
JM
84 if (len > sz) {
85 len = sz;
86 }
bedcc139 87
f0689302 88 cpu_physical_memory_rw(gpa, buf, len, is_write);
bedcc139
JM
89
90 buf += len;
91 sz -= len;
92 gva += len;
93 }
94
95 return 0;
96}
97
98static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
99 size_t sz)
100{
101 return kvm_gva_rw(cs, gva, buf, sz, false);
102}
103
104static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
105 size_t sz)
106{
107 return kvm_gva_rw(cs, gva, buf, sz, true);
108}
109
f66b8a83 110int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
61491cf4
DW
111{
112 const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
113 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
114 struct kvm_xen_hvm_config cfg = {
f66b8a83 115 .msr = hypercall_msr,
61491cf4
DW
116 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
117 };
118 int xen_caps, ret;
119
120 xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
121 if (required_caps & ~xen_caps) {
122 error_report("kvm: Xen HVM guest support not present or insufficient");
123 return -ENOSYS;
124 }
125
126 if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
127 struct kvm_xen_hvm_attr ha = {
128 .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
129 .u.xen_version = s->xen_version,
130 };
131 (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
132
133 cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
134 }
135
136 ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
137 if (ret < 0) {
138 error_report("kvm: Failed to enable Xen HVM support: %s",
139 strerror(-ret));
140 return ret;
141 }
142
2aff696b
DW
143 /* If called a second time, don't repeat the rest of the setup. */
144 if (s->xen_caps) {
145 return 0;
146 }
147
148 /*
149 * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
150 * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
151 *
152 * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
153 * such things to be polled at precisely the right time. We *could* do
154 * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
155 * the moment the IRQ is acked, and see if it should be reasserted.
156 *
157 * But the in-kernel irqchip is deprecated, so we're unlikely to add
158 * that support in the kernel. Insist on using the split irqchip mode
159 * instead.
160 *
161 * This leaves us polling for the level going low in QEMU, which lacks
162 * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
163 * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
164 * the device (for which it has to unmap the device and trap access, for
165 * some period after an IRQ!!). In the Xen case, we do it on exit from
166 * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
167 * Which is kind of icky, but less so than the VFIO one. I may fix them
168 * both later...
169 */
170 if (!kvm_kernel_irqchip_split()) {
171 error_report("kvm: Xen support requires kernel-irqchip=split");
172 return -EINVAL;
173 }
174
61491cf4 175 s->xen_caps = xen_caps;
8b57d5c5
DW
176
177 /* Tell fw_cfg to notify the BIOS to reserve the range. */
178 ret = e820_add_entry(XEN_SPECIAL_AREA_ADDR, XEN_SPECIAL_AREA_SIZE,
179 E820_RESERVED);
180 if (ret < 0) {
181 fprintf(stderr, "e820_add_entry() table is full\n");
182 return ret;
183 }
184
c08f5d0e
DW
185 /* The page couldn't be overlaid until KVM was initialized */
186 xen_xenstore_reset();
187
61491cf4
DW
188 return 0;
189}
190
5e691a95
DW
191int kvm_xen_init_vcpu(CPUState *cs)
192{
c345104c
JM
193 X86CPU *cpu = X86_CPU(cs);
194 CPUX86State *env = &cpu->env;
5e691a95
DW
195 int err;
196
197 /*
198 * The kernel needs to know the Xen/ACPI vCPU ID because that's
199 * what the guest uses in hypercalls such as timers. It doesn't
200 * match the APIC ID which is generally used for talking to the
201 * kernel about vCPUs. And if vCPU threads race with creating
202 * their KVM vCPUs out of order, it doesn't necessarily match
203 * with the kernel's internal vCPU indices either.
204 */
205 if (kvm_xen_has_cap(EVTCHN_SEND)) {
206 struct kvm_xen_vcpu_attr va = {
207 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
208 .u.vcpu_id = cs->cpu_index,
209 };
210 err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
211 if (err) {
212 error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
213 strerror(-err));
214 return err;
215 }
216 }
217
c345104c
JM
218 env->xen_vcpu_info_gpa = INVALID_GPA;
219 env->xen_vcpu_info_default_gpa = INVALID_GPA;
f0689302 220 env->xen_vcpu_time_info_gpa = INVALID_GPA;
5092db87 221 env->xen_vcpu_runstate_gpa = INVALID_GPA;
c345104c 222
b746a779
JM
223 qemu_mutex_init(&env->xen_timers_lock);
224 env->xen_singleshot_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
225 xen_vcpu_singleshot_timer_event,
226 cpu);
227 if (!env->xen_singleshot_timer) {
228 return -ENOMEM;
229 }
230 env->xen_singleshot_timer->opaque = cs;
231
232 env->xen_periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
233 xen_vcpu_periodic_timer_event,
234 cpu);
235 if (!env->xen_periodic_timer) {
236 return -ENOMEM;
237 }
238 env->xen_periodic_timer->opaque = cs;
239
5e691a95
DW
240 return 0;
241}
242
61491cf4
DW
243uint32_t kvm_xen_get_caps(void)
244{
245 return kvm_state->xen_caps;
246}
55a3f666 247
bedcc139
JM
248static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
249 int cmd, uint64_t arg)
250{
251 int err = 0;
252
253 switch (cmd) {
254 case XENVER_get_features: {
255 struct xen_feature_info fi;
256
257 /* No need for 32/64 compat handling */
258 qemu_build_assert(sizeof(fi) == 8);
259
260 err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
261 if (err) {
262 break;
263 }
264
265 fi.submap = 0;
266 if (fi.submap_idx == 0) {
267 fi.submap |= 1 << XENFEAT_writable_page_tables |
268 1 << XENFEAT_writable_descriptor_tables |
269 1 << XENFEAT_auto_translated_physmap |
b746a779 270 1 << XENFEAT_hvm_callback_vector |
6096cf78
DW
271 1 << XENFEAT_hvm_safe_pvclock |
272 1 << XENFEAT_hvm_pirqs;
bedcc139
JM
273 }
274
275 err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
276 break;
277 }
278
279 default:
280 return false;
281 }
282
283 exit->u.hcall.result = err;
284 return true;
285}
286
c345104c
JM
287static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
288{
289 struct kvm_xen_vcpu_attr xhsi;
290
291 xhsi.type = type;
292 xhsi.u.gpa = gpa;
293
294 trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
295
296 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
297}
298
105b47fd
AA
299static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
300{
301 uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
302 struct kvm_xen_vcpu_attr xva;
303
304 xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
305 xva.u.vector = vector;
306
307 trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
308
e7dbb62f 309 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xva);
105b47fd
AA
310}
311
312static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
313{
314 X86CPU *cpu = X86_CPU(cs);
315 CPUX86State *env = &cpu->env;
316
317 env->xen_vcpu_callback_vector = data.host_int;
318
319 if (kvm_xen_has_cap(EVTCHN_SEND)) {
320 kvm_xen_set_vcpu_callback_vector(cs);
321 }
322}
323
27d4075d
DW
324static int set_vcpu_info(CPUState *cs, uint64_t gpa)
325{
326 X86CPU *cpu = X86_CPU(cs);
327 CPUX86State *env = &cpu->env;
328 MemoryRegionSection mrs = { .mr = NULL };
329 void *vcpu_info_hva = NULL;
330 int ret;
331
332 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
333 if (ret || gpa == INVALID_GPA) {
334 goto out;
335 }
336
337 mrs = memory_region_find(get_system_memory(), gpa,
338 sizeof(struct vcpu_info));
339 if (mrs.mr && mrs.mr->ram_block &&
340 !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
341 vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
342 mrs.offset_within_region);
343 }
344 if (!vcpu_info_hva) {
345 if (mrs.mr) {
346 memory_region_unref(mrs.mr);
347 mrs.mr = NULL;
348 }
349 ret = -EINVAL;
350 }
351
352 out:
353 if (env->xen_vcpu_info_mr) {
354 memory_region_unref(env->xen_vcpu_info_mr);
355 }
356 env->xen_vcpu_info_hva = vcpu_info_hva;
357 env->xen_vcpu_info_mr = mrs.mr;
358 return ret;
359}
360
c345104c
JM
361static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
362{
363 X86CPU *cpu = X86_CPU(cs);
364 CPUX86State *env = &cpu->env;
365
366 env->xen_vcpu_info_default_gpa = data.host_ulong;
367
368 /* Changing the default does nothing if a vcpu_info was explicitly set. */
369 if (env->xen_vcpu_info_gpa == INVALID_GPA) {
27d4075d 370 set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
c345104c
JM
371 }
372}
373
374static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
375{
376 X86CPU *cpu = X86_CPU(cs);
377 CPUX86State *env = &cpu->env;
378
379 env->xen_vcpu_info_gpa = data.host_ulong;
380
27d4075d
DW
381 set_vcpu_info(cs, env->xen_vcpu_info_gpa);
382}
383
384void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
385{
386 CPUState *cs = qemu_get_cpu(vcpu_id);
387 if (!cs) {
388 return NULL;
389 }
390
391 return X86_CPU(cs)->env.xen_vcpu_info_hva;
392}
393
ddf0fd9a
DW
394void kvm_xen_maybe_deassert_callback(CPUState *cs)
395{
396 CPUX86State *env = &X86_CPU(cs)->env;
397 struct vcpu_info *vi = env->xen_vcpu_info_hva;
398 if (!vi) {
399 return;
400 }
401
402 /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
403 if (!vi->evtchn_upcall_pending) {
404 qemu_mutex_lock_iothread();
405 /*
406 * Check again now we have the lock, because it may have been
407 * asserted in the interim. And we don't want to take the lock
408 * every time because this is a fast path.
409 */
410 if (!vi->evtchn_upcall_pending) {
411 X86_CPU(cs)->env.xen_callback_asserted = false;
412 xen_evtchn_set_callback_level(0);
413 }
414 qemu_mutex_unlock_iothread();
415 }
416}
417
418void kvm_xen_set_callback_asserted(void)
419{
420 CPUState *cs = qemu_get_cpu(0);
421
422 if (cs) {
423 X86_CPU(cs)->env.xen_callback_asserted = true;
424 }
425}
426
18e83f28
DW
427bool kvm_xen_has_vcpu_callback_vector(void)
428{
429 CPUState *cs = qemu_get_cpu(0);
430
431 return cs && !!X86_CPU(cs)->env.xen_vcpu_callback_vector;
432}
433
27d4075d
DW
434void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
435{
436 CPUState *cs = qemu_get_cpu(vcpu_id);
437 uint8_t vector;
438
439 if (!cs) {
440 return;
441 }
442
443 vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
444 if (vector) {
445 /*
446 * The per-vCPU callback vector injected via lapic. Just
447 * deliver it as an MSI.
448 */
449 MSIMessage msg = {
e7dbb62f
DW
450 .address = APIC_DEFAULT_ADDRESS |
451 (X86_CPU(cs)->apic_id << MSI_ADDR_DEST_ID_SHIFT),
27d4075d
DW
452 .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
453 };
454 kvm_irqchip_send_msi(kvm_state, msg);
455 return;
456 }
457
458 switch (type) {
459 case HVM_PARAM_CALLBACK_TYPE_VECTOR:
460 /*
461 * If the evtchn_upcall_pending field in the vcpu_info is set, then
462 * KVM will automatically deliver the vector on entering the vCPU
463 * so all we have to do is kick it out.
464 */
465 qemu_cpu_kick(cs);
466 break;
ddf0fd9a
DW
467
468 case HVM_PARAM_CALLBACK_TYPE_GSI:
469 case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
470 if (vcpu_id == 0) {
471 xen_evtchn_set_callback_level(1);
472 }
473 break;
27d4075d 474 }
c345104c
JM
475}
476
19c417ec 477/* Must always be called with xen_timers_lock held */
c723d4c1
DW
478static int kvm_xen_set_vcpu_timer(CPUState *cs)
479{
480 X86CPU *cpu = X86_CPU(cs);
481 CPUX86State *env = &cpu->env;
482
483 struct kvm_xen_vcpu_attr va = {
484 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
485 .u.timer.port = env->xen_virq[VIRQ_TIMER],
486 .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
487 .u.timer.expires_ns = env->xen_singleshot_timer_ns,
488 };
489
490 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
491}
492
493static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
494{
19c417ec 495 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
c723d4c1
DW
496 kvm_xen_set_vcpu_timer(cs);
497}
498
499int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
500{
501 CPUState *cs = qemu_get_cpu(vcpu_id);
502
503 if (!cs) {
504 return -ENOENT;
505 }
506
507 /* cpu.h doesn't include the actual Xen header. */
508 qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
509
510 if (virq >= NR_VIRQS) {
511 return -EINVAL;
512 }
513
514 if (port && X86_CPU(cs)->env.xen_virq[virq]) {
515 return -EEXIST;
516 }
517
518 X86_CPU(cs)->env.xen_virq[virq] = port;
519 if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
520 async_run_on_cpu(cs, do_set_vcpu_timer_virq,
521 RUN_ON_CPU_HOST_INT(port));
522 }
523 return 0;
524}
525
f0689302
JM
526static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
527{
528 X86CPU *cpu = X86_CPU(cs);
529 CPUX86State *env = &cpu->env;
530
531 env->xen_vcpu_time_info_gpa = data.host_ulong;
532
533 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
534 env->xen_vcpu_time_info_gpa);
535}
536
5092db87
JM
537static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
538{
539 X86CPU *cpu = X86_CPU(cs);
540 CPUX86State *env = &cpu->env;
541
542 env->xen_vcpu_runstate_gpa = data.host_ulong;
543
544 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
545 env->xen_vcpu_runstate_gpa);
546}
547
c345104c
JM
548static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
549{
550 X86CPU *cpu = X86_CPU(cs);
551 CPUX86State *env = &cpu->env;
552
553 env->xen_vcpu_info_gpa = INVALID_GPA;
554 env->xen_vcpu_info_default_gpa = INVALID_GPA;
f0689302 555 env->xen_vcpu_time_info_gpa = INVALID_GPA;
5092db87 556 env->xen_vcpu_runstate_gpa = INVALID_GPA;
105b47fd 557 env->xen_vcpu_callback_vector = 0;
c723d4c1 558 memset(env->xen_virq, 0, sizeof(env->xen_virq));
c345104c 559
27d4075d 560 set_vcpu_info(cs, INVALID_GPA);
f0689302
JM
561 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
562 INVALID_GPA);
5092db87
JM
563 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
564 INVALID_GPA);
105b47fd
AA
565 if (kvm_xen_has_cap(EVTCHN_SEND)) {
566 kvm_xen_set_vcpu_callback_vector(cs);
19c417ec
DW
567
568 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
569 env->xen_singleshot_timer_ns = 0;
c723d4c1 570 kvm_xen_set_vcpu_timer(cs);
19c417ec
DW
571 } else {
572 vcpuop_stop_singleshot_timer(cs);
573 };
5092db87 574
c345104c
JM
575}
576
fb0fd2ce
JM
577static int xen_set_shared_info(uint64_t gfn)
578{
579 uint64_t gpa = gfn << TARGET_PAGE_BITS;
c345104c 580 int i, err;
fb0fd2ce
JM
581
582 QEMU_IOTHREAD_LOCK_GUARD();
583
584 /*
585 * The xen_overlay device tells KVM about it too, since it had to
586 * do that on migration load anyway (unless we're going to jump
587 * through lots of hoops to maintain the fiction that this isn't
588 * KVM-specific.
589 */
590 err = xen_overlay_map_shinfo_page(gpa);
591 if (err) {
592 return err;
593 }
594
595 trace_kvm_xen_set_shared_info(gfn);
596
c345104c
JM
597 for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
598 CPUState *cpu = qemu_get_cpu(i);
599 if (cpu) {
600 async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
601 RUN_ON_CPU_HOST_ULONG(gpa));
602 }
603 gpa += sizeof(vcpu_info_t);
604 }
605
fb0fd2ce
JM
606 return err;
607}
608
609static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
610{
611 switch (space) {
612 case XENMAPSPACE_shared_info:
613 if (idx > 0) {
614 return -EINVAL;
615 }
616 return xen_set_shared_info(gfn);
617
618 case XENMAPSPACE_grant_table:
a28b0fc0
DW
619 return xen_gnttab_map_page(idx, gfn);
620
fb0fd2ce
JM
621 case XENMAPSPACE_gmfn:
622 case XENMAPSPACE_gmfn_range:
623 return -ENOTSUP;
624
625 case XENMAPSPACE_gmfn_foreign:
626 case XENMAPSPACE_dev_mmio:
627 return -EPERM;
628
629 default:
630 return -EINVAL;
631 }
632}
633
634static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
635 uint64_t arg)
636{
637 struct xen_add_to_physmap xatp;
638 CPUState *cs = CPU(cpu);
639
640 if (hypercall_compat32(exit->u.hcall.longmode)) {
641 struct compat_xen_add_to_physmap xatp32;
642
643 qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
644 if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
645 return -EFAULT;
646 }
647 xatp.domid = xatp32.domid;
648 xatp.size = xatp32.size;
649 xatp.space = xatp32.space;
650 xatp.idx = xatp32.idx;
651 xatp.gpfn = xatp32.gpfn;
652 } else {
653 if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
654 return -EFAULT;
655 }
656 }
657
658 if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
659 return -ESRCH;
660 }
661
662 return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
663}
664
782a7960
DW
665static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
666 uint64_t arg)
667{
668 struct xen_add_to_physmap_batch xatpb;
669 unsigned long idxs_gva, gpfns_gva, errs_gva;
670 CPUState *cs = CPU(cpu);
671 size_t op_sz;
672
673 if (hypercall_compat32(exit->u.hcall.longmode)) {
674 struct compat_xen_add_to_physmap_batch xatpb32;
675
676 qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
677 if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
678 return -EFAULT;
679 }
680 xatpb.domid = xatpb32.domid;
681 xatpb.space = xatpb32.space;
682 xatpb.size = xatpb32.size;
683
684 idxs_gva = xatpb32.idxs.c;
685 gpfns_gva = xatpb32.gpfns.c;
686 errs_gva = xatpb32.errs.c;
687 op_sz = sizeof(uint32_t);
688 } else {
689 if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
690 return -EFAULT;
691 }
692 op_sz = sizeof(unsigned long);
693 idxs_gva = (unsigned long)xatpb.idxs.p;
694 gpfns_gva = (unsigned long)xatpb.gpfns.p;
695 errs_gva = (unsigned long)xatpb.errs.p;
696 }
697
698 if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
699 return -ESRCH;
700 }
701
702 /* Explicitly invalid for the batch op. Not that we implement it anyway. */
703 if (xatpb.space == XENMAPSPACE_gmfn_range) {
704 return -EINVAL;
705 }
706
707 while (xatpb.size--) {
708 unsigned long idx = 0;
709 unsigned long gpfn = 0;
710 int err;
711
712 /* For 32-bit compat this only copies the low 32 bits of each */
713 if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
714 kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
715 return -EFAULT;
716 }
717 idxs_gva += op_sz;
718 gpfns_gva += op_sz;
719
720 err = add_to_physmap_one(xatpb.space, idx, gpfn);
721
722 if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
723 return -EFAULT;
724 }
725 errs_gva += sizeof(err);
726 }
727 return 0;
728}
729
fb0fd2ce
JM
730static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
731 int cmd, uint64_t arg)
732{
733 int err;
734
735 switch (cmd) {
736 case XENMEM_add_to_physmap:
737 err = do_add_to_physmap(exit, cpu, arg);
738 break;
739
782a7960
DW
740 case XENMEM_add_to_physmap_batch:
741 err = do_add_to_physmap_batch(exit, cpu, arg);
742 break;
743
fb0fd2ce
JM
744 default:
745 return false;
746 }
747
748 exit->u.hcall.result = err;
749 return true;
750}
751
5dbcd01a
AA
752static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
753 uint64_t arg)
754{
755 CPUState *cs = CPU(cpu);
756 struct xen_hvm_param hp;
757 int err = 0;
758
759 /* No need for 32/64 compat handling */
760 qemu_build_assert(sizeof(hp) == 16);
761
762 if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
763 err = -EFAULT;
764 goto out;
765 }
766
767 if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
768 err = -ESRCH;
769 goto out;
770 }
771
772 switch (hp.index) {
91cce756 773 case HVM_PARAM_CALLBACK_IRQ:
2aff696b 774 qemu_mutex_lock_iothread();
91cce756 775 err = xen_evtchn_set_callback_param(hp.value);
2aff696b 776 qemu_mutex_unlock_iothread();
91cce756
DW
777 xen_set_long_mode(exit->u.hcall.longmode);
778 break;
5dbcd01a
AA
779 default:
780 return false;
781 }
782
783out:
784 exit->u.hcall.result = err;
785 return true;
786}
787
c6623cc3
JM
788static bool handle_get_param(struct kvm_xen_exit *exit, X86CPU *cpu,
789 uint64_t arg)
790{
791 CPUState *cs = CPU(cpu);
792 struct xen_hvm_param hp;
793 int err = 0;
794
795 /* No need for 32/64 compat handling */
796 qemu_build_assert(sizeof(hp) == 16);
797
798 if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
799 err = -EFAULT;
800 goto out;
801 }
802
803 if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
804 err = -ESRCH;
805 goto out;
806 }
807
808 switch (hp.index) {
809 case HVM_PARAM_STORE_PFN:
810 hp.value = XEN_SPECIAL_PFN(XENSTORE);
811 break;
c08f5d0e
DW
812 case HVM_PARAM_STORE_EVTCHN:
813 hp.value = xen_xenstore_get_port();
814 break;
c6623cc3
JM
815 default:
816 return false;
817 }
818
819 if (kvm_copy_to_gva(cs, arg, &hp, sizeof(hp))) {
820 err = -EFAULT;
821 }
822out:
823 exit->u.hcall.result = err;
824 return true;
825}
826
105b47fd
AA
827static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
828 X86CPU *cpu, uint64_t arg)
829{
830 struct xen_hvm_evtchn_upcall_vector up;
831 CPUState *target_cs;
832
833 /* No need for 32/64 compat handling */
834 qemu_build_assert(sizeof(up) == 8);
835
836 if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
837 return -EFAULT;
838 }
839
840 if (up.vector < 0x10) {
841 return -EINVAL;
842 }
843
844 target_cs = qemu_get_cpu(up.vcpu);
845 if (!target_cs) {
846 return -EINVAL;
847 }
848
849 async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
850 RUN_ON_CPU_HOST_INT(up.vector));
851 return 0;
852}
853
671bfdcd
JM
854static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
855 int cmd, uint64_t arg)
856{
105b47fd 857 int ret = -ENOSYS;
671bfdcd 858 switch (cmd) {
105b47fd 859 case HVMOP_set_evtchn_upcall_vector:
e7dbb62f 860 ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, arg);
105b47fd
AA
861 break;
862
671bfdcd 863 case HVMOP_pagetable_dying:
105b47fd
AA
864 ret = -ENOSYS;
865 break;
671bfdcd 866
5dbcd01a
AA
867 case HVMOP_set_param:
868 return handle_set_param(exit, cpu, arg);
869
c6623cc3
JM
870 case HVMOP_get_param:
871 return handle_get_param(exit, cpu, arg);
872
671bfdcd
JM
873 default:
874 return false;
875 }
105b47fd
AA
876
877 exit->u.hcall.result = ret;
878 return true;
671bfdcd
JM
879}
880
c345104c
JM
881static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
882 uint64_t arg)
883{
884 struct vcpu_register_vcpu_info rvi;
885 uint64_t gpa;
886
887 /* No need for 32/64 compat handling */
888 qemu_build_assert(sizeof(rvi) == 16);
889 qemu_build_assert(sizeof(struct vcpu_info) == 64);
890
891 if (!target) {
892 return -ENOENT;
893 }
894
895 if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
896 return -EFAULT;
897 }
898
899 if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
900 return -EINVAL;
901 }
902
903 gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
904 async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
905 return 0;
906}
907
f0689302
JM
908static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
909 uint64_t arg)
910{
911 struct vcpu_register_time_memory_area tma;
912 uint64_t gpa;
913 size_t len;
914
915 /* No need for 32/64 compat handling */
916 qemu_build_assert(sizeof(tma) == 8);
917 qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
918
919 if (!target) {
920 return -ENOENT;
921 }
922
923 if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
924 return -EFAULT;
925 }
926
927 /*
928 * Xen actually uses the GVA and does the translation through the guest
929 * page tables each time. But Linux/KVM uses the GPA, on the assumption
930 * that guests only ever use *global* addresses (kernel virtual addresses)
931 * for it. If Linux is changed to redo the GVA→GPA translation each time,
932 * it will offer a new vCPU attribute for that, and we'll use it instead.
933 */
934 if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
935 len < sizeof(struct vcpu_time_info)) {
936 return -EFAULT;
937 }
938
939 async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
940 RUN_ON_CPU_HOST_ULONG(gpa));
941 return 0;
942}
943
5092db87
JM
944static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
945 uint64_t arg)
946{
947 struct vcpu_register_runstate_memory_area rma;
948 uint64_t gpa;
949 size_t len;
950
951 /* No need for 32/64 compat handling */
952 qemu_build_assert(sizeof(rma) == 8);
953 /* The runstate area actually does change size, but Linux copes. */
954
955 if (!target) {
956 return -ENOENT;
957 }
958
959 if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
960 return -EFAULT;
961 }
962
963 /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
964 if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
965 return -EFAULT;
966 }
967
968 async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
969 RUN_ON_CPU_HOST_ULONG(gpa));
970 return 0;
971}
972
b746a779
JM
973static uint64_t kvm_get_current_ns(void)
974{
975 struct kvm_clock_data data;
976 int ret;
977
978 ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
979 if (ret < 0) {
980 fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
981 abort();
982 }
983
984 return data.clock;
985}
986
987static void xen_vcpu_singleshot_timer_event(void *opaque)
988{
989 CPUState *cpu = opaque;
990 CPUX86State *env = &X86_CPU(cpu)->env;
991 uint16_t port = env->xen_virq[VIRQ_TIMER];
992
993 if (likely(port)) {
994 xen_evtchn_set_port(port);
995 }
996
997 qemu_mutex_lock(&env->xen_timers_lock);
998 env->xen_singleshot_timer_ns = 0;
999 qemu_mutex_unlock(&env->xen_timers_lock);
1000}
1001
1002static void xen_vcpu_periodic_timer_event(void *opaque)
1003{
1004 CPUState *cpu = opaque;
1005 CPUX86State *env = &X86_CPU(cpu)->env;
1006 uint16_t port = env->xen_virq[VIRQ_TIMER];
1007 int64_t qemu_now;
1008
1009 if (likely(port)) {
1010 xen_evtchn_set_port(port);
1011 }
1012
1013 qemu_mutex_lock(&env->xen_timers_lock);
1014
1015 qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1016 timer_mod_ns(env->xen_periodic_timer,
1017 qemu_now + env->xen_periodic_timer_period);
1018
1019 qemu_mutex_unlock(&env->xen_timers_lock);
1020}
1021
1022static int do_set_periodic_timer(CPUState *target, uint64_t period_ns)
1023{
1024 CPUX86State *tenv = &X86_CPU(target)->env;
1025 int64_t qemu_now;
1026
1027 timer_del(tenv->xen_periodic_timer);
1028
1029 qemu_mutex_lock(&tenv->xen_timers_lock);
1030
1031 qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1032 timer_mod_ns(tenv->xen_periodic_timer, qemu_now + period_ns);
1033 tenv->xen_periodic_timer_period = period_ns;
1034
1035 qemu_mutex_unlock(&tenv->xen_timers_lock);
1036 return 0;
1037}
1038
1039#define MILLISECS(_ms) ((int64_t)((_ms) * 1000000ULL))
1040#define MICROSECS(_us) ((int64_t)((_us) * 1000ULL))
1041#define STIME_MAX ((time_t)((int64_t)~0ull >> 1))
bad5cfcd 1042/* Chosen so (NOW() + delta) won't overflow without an uptime of 200 years */
b746a779
JM
1043#define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2))
1044
1045static int vcpuop_set_periodic_timer(CPUState *cs, CPUState *target,
1046 uint64_t arg)
1047{
1048 struct vcpu_set_periodic_timer spt;
1049
1050 qemu_build_assert(sizeof(spt) == 8);
1051 if (kvm_copy_from_gva(cs, arg, &spt, sizeof(spt))) {
1052 return -EFAULT;
1053 }
1054
1055 if (spt.period_ns < MILLISECS(1) || spt.period_ns > STIME_DELTA_MAX) {
1056 return -EINVAL;
1057 }
1058
1059 return do_set_periodic_timer(target, spt.period_ns);
1060}
1061
1062static int vcpuop_stop_periodic_timer(CPUState *target)
1063{
1064 CPUX86State *tenv = &X86_CPU(target)->env;
1065
1066 qemu_mutex_lock(&tenv->xen_timers_lock);
1067
1068 timer_del(tenv->xen_periodic_timer);
1069 tenv->xen_periodic_timer_period = 0;
1070
1071 qemu_mutex_unlock(&tenv->xen_timers_lock);
1072 return 0;
1073}
1074
19c417ec
DW
1075/*
1076 * Userspace handling of timer, for older kernels.
1077 * Must always be called with xen_timers_lock held.
1078 */
b746a779
JM
1079static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs,
1080 bool future, bool linux_wa)
1081{
1082 CPUX86State *env = &X86_CPU(cs)->env;
1083 int64_t now = kvm_get_current_ns();
1084 int64_t qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1085 int64_t delta = timeout_abs - now;
1086
1087 if (future && timeout_abs < now) {
1088 return -ETIME;
1089 }
1090
1091 if (linux_wa && unlikely((int64_t)timeout_abs < 0 ||
1092 (delta > 0 && (uint32_t)(delta >> 50) != 0))) {
1093 /*
1094 * Xen has a 'Linux workaround' in do_set_timer_op() which checks
1095 * for negative absolute timeout values (caused by integer
1096 * overflow), and for values about 13 days in the future (2^50ns)
1097 * which would be caused by jiffies overflow. For those cases, it
1098 * sets the timeout 100ms in the future (not *too* soon, since if
1099 * a guest really did set a long timeout on purpose we don't want
1100 * to keep churning CPU time by waking it up).
1101 */
1102 delta = (100 * SCALE_MS);
1103 timeout_abs = now + delta;
1104 }
1105
b746a779
JM
1106 timer_mod_ns(env->xen_singleshot_timer, qemu_now + delta);
1107 env->xen_singleshot_timer_ns = now + delta;
b746a779
JM
1108 return 0;
1109}
1110
1111static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg)
1112{
1113 struct vcpu_set_singleshot_timer sst = { 0 };
1114
1115 /*
1116 * The struct is a uint64_t followed by a uint32_t. On 32-bit that
1117 * makes it 12 bytes. On 64-bit it gets padded to 16. The parts
1118 * that get used are identical, and there's four bytes of padding
1119 * unused at the end. For true Xen compatibility we should attempt
1120 * to copy the full 16 bytes from 64-bit guests, and return -EFAULT
1121 * if we can't get the padding too. But that's daft. Just copy what
1122 * we need.
1123 */
1124 qemu_build_assert(offsetof(struct vcpu_set_singleshot_timer, flags) == 8);
1125 qemu_build_assert(sizeof(sst) >= 12);
1126
1127 if (kvm_copy_from_gva(cs, arg, &sst, 12)) {
1128 return -EFAULT;
1129 }
1130
19c417ec 1131 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
b746a779
JM
1132 return do_set_singleshot_timer(cs, sst.timeout_abs_ns,
1133 !!(sst.flags & VCPU_SSHOTTMR_future),
1134 false);
1135}
1136
1137static int vcpuop_stop_singleshot_timer(CPUState *cs)
1138{
1139 CPUX86State *env = &X86_CPU(cs)->env;
1140
1141 qemu_mutex_lock(&env->xen_timers_lock);
1142
1143 timer_del(env->xen_singleshot_timer);
1144 env->xen_singleshot_timer_ns = 0;
1145
1146 qemu_mutex_unlock(&env->xen_timers_lock);
1147 return 0;
1148}
1149
1150static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1151 uint64_t timeout)
1152{
1153 int err;
1154
1155 if (unlikely(timeout == 0)) {
1156 err = vcpuop_stop_singleshot_timer(CPU(cpu));
1157 } else {
19c417ec 1158 QEMU_LOCK_GUARD(&X86_CPU(cpu)->env.xen_timers_lock);
b746a779
JM
1159 err = do_set_singleshot_timer(CPU(cpu), timeout, false, true);
1160 }
1161 exit->u.hcall.result = err;
1162 return true;
1163}
1164
d70bd6a4
JM
1165static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1166 int cmd, int vcpu_id, uint64_t arg)
1167{
c345104c 1168 CPUState *cs = CPU(cpu);
b746a779 1169 CPUState *dest = cs->cpu_index == vcpu_id ? cs : qemu_get_cpu(vcpu_id);
d70bd6a4
JM
1170 int err;
1171
b746a779
JM
1172 if (!dest) {
1173 err = -ENOENT;
1174 goto out;
1175 }
1176
d70bd6a4 1177 switch (cmd) {
5092db87
JM
1178 case VCPUOP_register_runstate_memory_area:
1179 err = vcpuop_register_runstate_info(cs, dest, arg);
1180 break;
f0689302
JM
1181 case VCPUOP_register_vcpu_time_memory_area:
1182 err = vcpuop_register_vcpu_time_info(cs, dest, arg);
1183 break;
d70bd6a4 1184 case VCPUOP_register_vcpu_info:
c345104c 1185 err = vcpuop_register_vcpu_info(cs, dest, arg);
d70bd6a4 1186 break;
b746a779
JM
1187 case VCPUOP_set_singleshot_timer: {
1188 if (cs->cpu_index == vcpu_id) {
1189 err = vcpuop_set_singleshot_timer(dest, arg);
1190 } else {
1191 err = -EINVAL;
1192 }
1193 break;
1194 }
1195 case VCPUOP_stop_singleshot_timer:
1196 if (cs->cpu_index == vcpu_id) {
1197 err = vcpuop_stop_singleshot_timer(dest);
1198 } else {
1199 err = -EINVAL;
1200 }
1201 break;
1202 case VCPUOP_set_periodic_timer: {
1203 err = vcpuop_set_periodic_timer(cs, dest, arg);
1204 break;
1205 }
1206 case VCPUOP_stop_periodic_timer:
1207 err = vcpuop_stop_periodic_timer(dest);
1208 break;
d70bd6a4
JM
1209
1210 default:
1211 return false;
1212 }
1213
b746a779 1214 out:
d70bd6a4
JM
1215 exit->u.hcall.result = err;
1216 return true;
1217}
1218
4858ba20 1219static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
3b06f29b
JM
1220 int cmd, uint64_t arg)
1221{
4858ba20 1222 CPUState *cs = CPU(cpu);
3b06f29b
JM
1223 int err = -ENOSYS;
1224
1225 switch (cmd) {
1226 case EVTCHNOP_init_control:
1227 case EVTCHNOP_expand_array:
1228 case EVTCHNOP_set_priority:
1229 /* We do not support FIFO channels at this point */
1230 err = -ENOSYS;
1231 break;
1232
4858ba20
DW
1233 case EVTCHNOP_status: {
1234 struct evtchn_status status;
1235
1236 qemu_build_assert(sizeof(status) == 24);
1237 if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) {
1238 err = -EFAULT;
1239 break;
1240 }
1241
1242 err = xen_evtchn_status_op(&status);
1243 if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) {
1244 err = -EFAULT;
1245 }
1246 break;
1247 }
83eb5811
DW
1248 case EVTCHNOP_close: {
1249 struct evtchn_close close;
1250
1251 qemu_build_assert(sizeof(close) == 4);
1252 if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) {
1253 err = -EFAULT;
1254 break;
1255 }
1256
1257 err = xen_evtchn_close_op(&close);
1258 break;
1259 }
190cc3c0
DW
1260 case EVTCHNOP_unmask: {
1261 struct evtchn_unmask unmask;
1262
1263 qemu_build_assert(sizeof(unmask) == 4);
1264 if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) {
1265 err = -EFAULT;
1266 break;
1267 }
1268
1269 err = xen_evtchn_unmask_op(&unmask);
1270 break;
1271 }
c723d4c1
DW
1272 case EVTCHNOP_bind_virq: {
1273 struct evtchn_bind_virq virq;
1274
1275 qemu_build_assert(sizeof(virq) == 12);
1276 if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
1277 err = -EFAULT;
1278 break;
1279 }
1280
1281 err = xen_evtchn_bind_virq_op(&virq);
1282 if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
1283 err = -EFAULT;
1284 }
1285 break;
1286 }
aa98ee38
DW
1287 case EVTCHNOP_bind_pirq: {
1288 struct evtchn_bind_pirq pirq;
1289
1290 qemu_build_assert(sizeof(pirq) == 12);
1291 if (kvm_copy_from_gva(cs, arg, &pirq, sizeof(pirq))) {
1292 err = -EFAULT;
1293 break;
1294 }
1295
1296 err = xen_evtchn_bind_pirq_op(&pirq);
1297 if (!err && kvm_copy_to_gva(cs, arg, &pirq, sizeof(pirq))) {
1298 err = -EFAULT;
1299 }
1300 break;
1301 }
f5417856
DW
1302 case EVTCHNOP_bind_ipi: {
1303 struct evtchn_bind_ipi ipi;
1304
1305 qemu_build_assert(sizeof(ipi) == 8);
1306 if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) {
1307 err = -EFAULT;
1308 break;
1309 }
1310
1311 err = xen_evtchn_bind_ipi_op(&ipi);
1312 if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) {
1313 err = -EFAULT;
1314 }
1315 break;
1316 }
cf7679ab
DW
1317 case EVTCHNOP_send: {
1318 struct evtchn_send send;
1319
1320 qemu_build_assert(sizeof(send) == 4);
1321 if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) {
1322 err = -EFAULT;
1323 break;
1324 }
1325
1326 err = xen_evtchn_send_op(&send);
1327 break;
1328 }
e1db61b8
DW
1329 case EVTCHNOP_alloc_unbound: {
1330 struct evtchn_alloc_unbound alloc;
1331
1332 qemu_build_assert(sizeof(alloc) == 8);
1333 if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) {
1334 err = -EFAULT;
1335 break;
1336 }
1337
1338 err = xen_evtchn_alloc_unbound_op(&alloc);
1339 if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) {
1340 err = -EFAULT;
1341 }
1342 break;
1343 }
84327881
DW
1344 case EVTCHNOP_bind_interdomain: {
1345 struct evtchn_bind_interdomain interdomain;
1346
1347 qemu_build_assert(sizeof(interdomain) == 12);
1348 if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1349 err = -EFAULT;
1350 break;
1351 }
1352
1353 err = xen_evtchn_bind_interdomain_op(&interdomain);
1354 if (!err &&
1355 kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1356 err = -EFAULT;
1357 }
1358 break;
1359 }
30667046
DW
1360 case EVTCHNOP_bind_vcpu: {
1361 struct evtchn_bind_vcpu vcpu;
1362
1363 qemu_build_assert(sizeof(vcpu) == 8);
1364 if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) {
1365 err = -EFAULT;
1366 break;
1367 }
1368
1369 err = xen_evtchn_bind_vcpu_op(&vcpu);
1370 break;
1371 }
a15b1097
DW
1372 case EVTCHNOP_reset: {
1373 struct evtchn_reset reset;
1374
1375 qemu_build_assert(sizeof(reset) == 2);
1376 if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) {
1377 err = -EFAULT;
1378 break;
1379 }
1380
1381 err = xen_evtchn_reset_op(&reset);
1382 break;
1383 }
3b06f29b
JM
1384 default:
1385 return false;
1386 }
1387
1388 exit->u.hcall.result = err;
1389 return true;
1390}
1391
79b7067d
JM
1392int kvm_xen_soft_reset(void)
1393{
c345104c 1394 CPUState *cpu;
fb0fd2ce
JM
1395 int err;
1396
79b7067d
JM
1397 assert(qemu_mutex_iothread_locked());
1398
1399 trace_kvm_xen_soft_reset();
1400
a15b1097
DW
1401 err = xen_evtchn_soft_reset();
1402 if (err) {
1403 return err;
1404 }
1405
91cce756
DW
1406 /*
1407 * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
1408 * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
1409 * to deliver to the timer interrupt and treats that as 'disabled'.
1410 */
1411 err = xen_evtchn_set_callback_param(0);
1412 if (err) {
1413 return err;
1414 }
1415
c345104c
JM
1416 CPU_FOREACH(cpu) {
1417 async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
1418 }
1419
fb0fd2ce
JM
1420 err = xen_overlay_map_shinfo_page(INVALID_GFN);
1421 if (err) {
1422 return err;
1423 }
1424
de26b261
DW
1425 err = xen_gnttab_reset();
1426 if (err) {
1427 return err;
1428 }
1429
c08f5d0e
DW
1430 err = xen_xenstore_reset();
1431 if (err) {
1432 return err;
1433 }
1434
79b7067d
JM
1435 return 0;
1436}
1437
1438static int schedop_shutdown(CPUState *cs, uint64_t arg)
1439{
1440 struct sched_shutdown shutdown;
1441 int ret = 0;
1442
1443 /* No need for 32/64 compat handling */
1444 qemu_build_assert(sizeof(shutdown) == 4);
1445
1446 if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
1447 return -EFAULT;
1448 }
1449
1450 switch (shutdown.reason) {
1451 case SHUTDOWN_crash:
1452 cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
1453 qemu_system_guest_panicked(NULL);
1454 break;
1455
1456 case SHUTDOWN_reboot:
1457 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
1458 break;
1459
1460 case SHUTDOWN_poweroff:
1461 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
1462 break;
1463
1464 case SHUTDOWN_soft_reset:
1465 qemu_mutex_lock_iothread();
1466 ret = kvm_xen_soft_reset();
1467 qemu_mutex_unlock_iothread();
1468 break;
1469
1470 default:
1471 ret = -EINVAL;
1472 break;
1473 }
1474
1475 return ret;
1476}
1477
1478static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1479 int cmd, uint64_t arg)
1480{
1481 CPUState *cs = CPU(cpu);
1482 int err = -ENOSYS;
1483
1484 switch (cmd) {
1485 case SCHEDOP_shutdown:
1486 err = schedop_shutdown(cs, arg);
1487 break;
1488
c789b9ef
DW
1489 case SCHEDOP_poll:
1490 /*
1491 * Linux will panic if this doesn't work. Just yield; it's not
1492 * worth overthinking it because with event channel handling
1493 * in KVM, the kernel will intercept this and it will never
1494 * reach QEMU anyway. The semantics of the hypercall explicltly
1495 * permit spurious wakeups.
1496 */
1497 case SCHEDOP_yield:
1498 sched_yield();
1499 err = 0;
1500 break;
1501
79b7067d
JM
1502 default:
1503 return false;
1504 }
1505
1506 exit->u.hcall.result = err;
1507 return true;
1508}
1509
28b7ae94
DW
1510static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1511 int cmd, uint64_t arg, int count)
1512{
1513 CPUState *cs = CPU(cpu);
1514 int err;
1515
1516 switch (cmd) {
1517 case GNTTABOP_set_version: {
1518 struct gnttab_set_version set;
1519
1520 qemu_build_assert(sizeof(set) == 4);
1521 if (kvm_copy_from_gva(cs, arg, &set, sizeof(set))) {
1522 err = -EFAULT;
1523 break;
1524 }
1525
1526 err = xen_gnttab_set_version_op(&set);
1527 if (!err && kvm_copy_to_gva(cs, arg, &set, sizeof(set))) {
1528 err = -EFAULT;
1529 }
1530 break;
1531 }
1532 case GNTTABOP_get_version: {
1533 struct gnttab_get_version get;
1534
1535 qemu_build_assert(sizeof(get) == 8);
1536 if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1537 err = -EFAULT;
1538 break;
1539 }
1540
1541 err = xen_gnttab_get_version_op(&get);
1542 if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1543 err = -EFAULT;
1544 }
1545 break;
1546 }
b46f9745
DW
1547 case GNTTABOP_query_size: {
1548 struct gnttab_query_size size;
1549
1550 qemu_build_assert(sizeof(size) == 16);
1551 if (kvm_copy_from_gva(cs, arg, &size, sizeof(size))) {
1552 err = -EFAULT;
1553 break;
1554 }
1555
1556 err = xen_gnttab_query_size_op(&size);
1557 if (!err && kvm_copy_to_gva(cs, arg, &size, sizeof(size))) {
1558 err = -EFAULT;
1559 }
1560 break;
1561 }
28b7ae94
DW
1562 case GNTTABOP_setup_table:
1563 case GNTTABOP_copy:
1564 case GNTTABOP_map_grant_ref:
1565 case GNTTABOP_unmap_grant_ref:
1566 case GNTTABOP_swap_grant_ref:
1567 return false;
1568
1569 default:
1570 /* Xen explicitly returns -ENOSYS to HVM guests for all others */
1571 err = -ENOSYS;
1572 break;
1573 }
1574
1575 exit->u.hcall.result = err;
1576 return true;
1577}
1578
799c2354
DW
1579static bool kvm_xen_hcall_physdev_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1580 int cmd, uint64_t arg)
1581{
1582 CPUState *cs = CPU(cpu);
1583 int err;
1584
1585 switch (cmd) {
1586 case PHYSDEVOP_map_pirq: {
1587 struct physdev_map_pirq map;
1588
1589 if (hypercall_compat32(exit->u.hcall.longmode)) {
1590 struct compat_physdev_map_pirq *map32 = (void *)&map;
1591
1592 if (kvm_copy_from_gva(cs, arg, map32, sizeof(*map32))) {
1593 return -EFAULT;
1594 }
1595
1596 /*
1597 * The only thing that's different is the alignment of the
1598 * uint64_t table_base at the end, which gets padding to make
1599 * it 64-bit aligned in the 64-bit version.
1600 */
1601 qemu_build_assert(sizeof(*map32) == 36);
1602 qemu_build_assert(offsetof(struct physdev_map_pirq, entry_nr) ==
1603 offsetof(struct compat_physdev_map_pirq, entry_nr));
1604 memmove(&map.table_base, &map32->table_base, sizeof(map.table_base));
1605 } else {
1606 if (kvm_copy_from_gva(cs, arg, &map, sizeof(map))) {
1607 err = -EFAULT;
1608 break;
1609 }
1610 }
1611 err = xen_physdev_map_pirq(&map);
1612 /*
1613 * Since table_base is an IN parameter and won't be changed, just
1614 * copy the size of the compat structure back to the guest.
1615 */
1616 if (!err && kvm_copy_to_gva(cs, arg, &map,
1617 sizeof(struct compat_physdev_map_pirq))) {
1618 err = -EFAULT;
1619 }
1620 break;
1621 }
1622 case PHYSDEVOP_unmap_pirq: {
1623 struct physdev_unmap_pirq unmap;
1624
1625 qemu_build_assert(sizeof(unmap) == 8);
1626 if (kvm_copy_from_gva(cs, arg, &unmap, sizeof(unmap))) {
1627 err = -EFAULT;
1628 break;
1629 }
1630
1631 err = xen_physdev_unmap_pirq(&unmap);
1632 if (!err && kvm_copy_to_gva(cs, arg, &unmap, sizeof(unmap))) {
1633 err = -EFAULT;
1634 }
1635 break;
1636 }
1637 case PHYSDEVOP_eoi: {
1638 struct physdev_eoi eoi;
1639
1640 qemu_build_assert(sizeof(eoi) == 4);
1641 if (kvm_copy_from_gva(cs, arg, &eoi, sizeof(eoi))) {
1642 err = -EFAULT;
1643 break;
1644 }
1645
1646 err = xen_physdev_eoi_pirq(&eoi);
1647 if (!err && kvm_copy_to_gva(cs, arg, &eoi, sizeof(eoi))) {
1648 err = -EFAULT;
1649 }
1650 break;
1651 }
1652 case PHYSDEVOP_irq_status_query: {
1653 struct physdev_irq_status_query query;
1654
1655 qemu_build_assert(sizeof(query) == 8);
1656 if (kvm_copy_from_gva(cs, arg, &query, sizeof(query))) {
1657 err = -EFAULT;
1658 break;
1659 }
1660
1661 err = xen_physdev_query_pirq(&query);
1662 if (!err && kvm_copy_to_gva(cs, arg, &query, sizeof(query))) {
1663 err = -EFAULT;
1664 }
1665 break;
1666 }
1667 case PHYSDEVOP_get_free_pirq: {
1668 struct physdev_get_free_pirq get;
1669
1670 qemu_build_assert(sizeof(get) == 8);
1671 if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1672 err = -EFAULT;
1673 break;
1674 }
1675
1676 err = xen_physdev_get_free_pirq(&get);
1677 if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1678 err = -EFAULT;
1679 }
1680 break;
1681 }
1682 case PHYSDEVOP_pirq_eoi_gmfn_v2: /* FreeBSD 13 makes this hypercall */
1683 err = -ENOSYS;
1684 break;
1685
1686 default:
1687 return false;
1688 }
1689
1690 exit->u.hcall.result = err;
1691 return true;
1692}
1693
55a3f666
JM
1694static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1695{
1696 uint16_t code = exit->u.hcall.input;
1697
1698 if (exit->u.hcall.cpl > 0) {
1699 exit->u.hcall.result = -EPERM;
1700 return true;
1701 }
1702
1703 switch (code) {
b746a779
JM
1704 case __HYPERVISOR_set_timer_op:
1705 if (exit->u.hcall.longmode) {
1706 return kvm_xen_hcall_set_timer_op(exit, cpu,
1707 exit->u.hcall.params[0]);
1708 } else {
1709 /* In 32-bit mode, the 64-bit timer value is in two args. */
1710 uint64_t val = ((uint64_t)exit->u.hcall.params[1]) << 32 |
1711 (uint32_t)exit->u.hcall.params[0];
1712 return kvm_xen_hcall_set_timer_op(exit, cpu, val);
1713 }
28b7ae94
DW
1714 case __HYPERVISOR_grant_table_op:
1715 return kvm_xen_hcall_gnttab_op(exit, cpu, exit->u.hcall.params[0],
1716 exit->u.hcall.params[1],
1717 exit->u.hcall.params[2]);
79b7067d
JM
1718 case __HYPERVISOR_sched_op:
1719 return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
1720 exit->u.hcall.params[1]);
3b06f29b 1721 case __HYPERVISOR_event_channel_op:
4858ba20 1722 return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
3b06f29b 1723 exit->u.hcall.params[1]);
d70bd6a4
JM
1724 case __HYPERVISOR_vcpu_op:
1725 return kvm_xen_hcall_vcpu_op(exit, cpu,
1726 exit->u.hcall.params[0],
1727 exit->u.hcall.params[1],
1728 exit->u.hcall.params[2]);
671bfdcd
JM
1729 case __HYPERVISOR_hvm_op:
1730 return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
1731 exit->u.hcall.params[1]);
fb0fd2ce
JM
1732 case __HYPERVISOR_memory_op:
1733 return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
1734 exit->u.hcall.params[1]);
799c2354
DW
1735 case __HYPERVISOR_physdev_op:
1736 return kvm_xen_hcall_physdev_op(exit, cpu, exit->u.hcall.params[0],
1737 exit->u.hcall.params[1]);
bedcc139
JM
1738 case __HYPERVISOR_xen_version:
1739 return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
1740 exit->u.hcall.params[1]);
55a3f666
JM
1741 default:
1742 return false;
1743 }
1744}
1745
1746int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1747{
1748 if (exit->type != KVM_EXIT_XEN_HCALL) {
1749 return -1;
1750 }
1751
110a0ea5
DW
1752 /*
1753 * The kernel latches the guest 32/64 mode when the MSR is used to fill
1754 * the hypercall page. So if we see a hypercall in a mode that doesn't
1755 * match our own idea of the guest mode, fetch the kernel's idea of the
1756 * "long mode" to remain in sync.
1757 */
1758 if (exit->u.hcall.longmode != xen_is_long_mode()) {
1759 xen_sync_long_mode();
1760 }
1761
55a3f666
JM
1762 if (!do_kvm_xen_handle_exit(cpu, exit)) {
1763 /*
1764 * Some hypercalls will be deliberately "implemented" by returning
1765 * -ENOSYS. This case is for hypercalls which are unexpected.
1766 */
1767 exit->u.hcall.result = -ENOSYS;
1768 qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
1769 PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
1770 (uint64_t)exit->u.hcall.input,
1771 (uint64_t)exit->u.hcall.params[0],
1772 (uint64_t)exit->u.hcall.params[1],
1773 (uint64_t)exit->u.hcall.params[2]);
1774 }
1775
1776 trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
1777 exit->u.hcall.input, exit->u.hcall.params[0],
1778 exit->u.hcall.params[1], exit->u.hcall.params[2],
1779 exit->u.hcall.result);
1780 return 0;
1781}
c345104c 1782
6f43f2ee
DW
1783uint16_t kvm_xen_get_gnttab_max_frames(void)
1784{
1785 KVMState *s = KVM_STATE(current_accel());
1786 return s->xen_gnttab_max_frames;
1787}
1788
e16aff4c
DW
1789uint16_t kvm_xen_get_evtchn_max_pirq(void)
1790{
1791 KVMState *s = KVM_STATE(current_accel());
1792 return s->xen_evtchn_max_pirq;
1793}
1794
c345104c
JM
1795int kvm_put_xen_state(CPUState *cs)
1796{
1797 X86CPU *cpu = X86_CPU(cs);
1798 CPUX86State *env = &cpu->env;
1799 uint64_t gpa;
1800 int ret;
1801
1802 gpa = env->xen_vcpu_info_gpa;
1803 if (gpa == INVALID_GPA) {
1804 gpa = env->xen_vcpu_info_default_gpa;
1805 }
1806
1807 if (gpa != INVALID_GPA) {
27d4075d 1808 ret = set_vcpu_info(cs, gpa);
c345104c
JM
1809 if (ret < 0) {
1810 return ret;
1811 }
1812 }
1813
f0689302
JM
1814 gpa = env->xen_vcpu_time_info_gpa;
1815 if (gpa != INVALID_GPA) {
1816 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
1817 gpa);
1818 if (ret < 0) {
1819 return ret;
1820 }
1821 }
1822
5092db87
JM
1823 gpa = env->xen_vcpu_runstate_gpa;
1824 if (gpa != INVALID_GPA) {
1825 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
1826 gpa);
1827 if (ret < 0) {
1828 return ret;
1829 }
1830 }
1831
b746a779
JM
1832 if (env->xen_periodic_timer_period) {
1833 ret = do_set_periodic_timer(cs, env->xen_periodic_timer_period);
1834 if (ret < 0) {
1835 return ret;
1836 }
1837 }
1838
105b47fd 1839 if (!kvm_xen_has_cap(EVTCHN_SEND)) {
b746a779
JM
1840 /*
1841 * If the kernel has EVTCHN_SEND support then it handles timers too,
1842 * so the timer will be restored by kvm_xen_set_vcpu_timer() below.
1843 */
19c417ec 1844 QEMU_LOCK_GUARD(&env->xen_timers_lock);
b746a779
JM
1845 if (env->xen_singleshot_timer_ns) {
1846 ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns,
1847 false, false);
1848 if (ret < 0) {
1849 return ret;
1850 }
1851 }
105b47fd
AA
1852 return 0;
1853 }
1854
1855 if (env->xen_vcpu_callback_vector) {
1856 ret = kvm_xen_set_vcpu_callback_vector(cs);
1857 if (ret < 0) {
1858 return ret;
1859 }
1860 }
1861
c723d4c1 1862 if (env->xen_virq[VIRQ_TIMER]) {
19c417ec
DW
1863 do_set_vcpu_timer_virq(cs,
1864 RUN_ON_CPU_HOST_INT(env->xen_virq[VIRQ_TIMER]));
c723d4c1 1865 }
c345104c
JM
1866 return 0;
1867}
1868
1869int kvm_get_xen_state(CPUState *cs)
1870{
1871 X86CPU *cpu = X86_CPU(cs);
1872 CPUX86State *env = &cpu->env;
1873 uint64_t gpa;
c723d4c1 1874 int ret;
c345104c
JM
1875
1876 /*
1877 * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1878 * to it. It's up to userspace to *assume* that any page shared thus is
1879 * always considered dirty. The shared_info page is different since it's
1880 * an overlay and migrated separately anyway.
1881 */
1882 gpa = env->xen_vcpu_info_gpa;
1883 if (gpa == INVALID_GPA) {
1884 gpa = env->xen_vcpu_info_default_gpa;
1885 }
1886 if (gpa != INVALID_GPA) {
1887 MemoryRegionSection mrs = memory_region_find(get_system_memory(),
1888 gpa,
1889 sizeof(struct vcpu_info));
1890 if (mrs.mr &&
1891 !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
1892 memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
1893 sizeof(struct vcpu_info));
1894 }
1895 }
1896
c723d4c1
DW
1897 if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1898 return 0;
1899 }
1900
1901 /*
1902 * If the kernel is accelerating timers, read out the current value of the
1903 * singleshot timer deadline.
1904 */
1905 if (env->xen_virq[VIRQ_TIMER]) {
1906 struct kvm_xen_vcpu_attr va = {
1907 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
1908 };
1909 ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
1910 if (ret < 0) {
1911 return ret;
1912 }
19c417ec
DW
1913
1914 /*
1915 * This locking is fairly pointless, and is here to appease Coverity.
1916 * There is an unavoidable race condition if a different vCPU sets a
1917 * timer for this vCPU after the value has been read out. But that's
1918 * OK in practice because *all* the vCPUs need to be stopped before
1919 * we set about migrating their state.
1920 */
1921 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
c723d4c1
DW
1922 env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
1923 }
1924
c345104c
JM
1925 return 0;
1926}