]> git.proxmox.com Git - mirror_qemu.git/blame - target/i386/kvm/xen-emu.c
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into...
[mirror_qemu.git] / target / i386 / kvm / xen-emu.c
CommitLineData
61491cf4
DW
1/*
2 * Xen HVM emulation support in KVM
3 *
4 * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
5 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
9 *
10 */
11
12#include "qemu/osdep.h"
55a3f666 13#include "qemu/log.h"
79b7067d 14#include "qemu/main-loop.h"
fb0fd2ce 15#include "hw/xen/xen.h"
61491cf4
DW
16#include "sysemu/kvm_int.h"
17#include "sysemu/kvm_xen.h"
18#include "kvm/kvm_i386.h"
bedcc139 19#include "exec/address-spaces.h"
61491cf4 20#include "xen-emu.h"
55a3f666 21#include "trace.h"
79b7067d 22#include "sysemu/runstate.h"
61491cf4 23
27d4075d
DW
24#include "hw/pci/msi.h"
25#include "hw/i386/apic-msidef.h"
8b57d5c5 26#include "hw/i386/e820_memory_layout.h"
110a0ea5 27#include "hw/i386/kvm/xen_overlay.h"
91cce756 28#include "hw/i386/kvm/xen_evtchn.h"
a28b0fc0 29#include "hw/i386/kvm/xen_gnttab.h"
c08f5d0e 30#include "hw/i386/kvm/xen_xenstore.h"
110a0ea5 31
bedcc139 32#include "hw/xen/interface/version.h"
79b7067d 33#include "hw/xen/interface/sched.h"
fb0fd2ce 34#include "hw/xen/interface/memory.h"
671bfdcd 35#include "hw/xen/interface/hvm/hvm_op.h"
105b47fd 36#include "hw/xen/interface/hvm/params.h"
d70bd6a4 37#include "hw/xen/interface/vcpu.h"
3b06f29b 38#include "hw/xen/interface/event_channel.h"
28b7ae94 39#include "hw/xen/interface/grant_table.h"
fb0fd2ce
JM
40
41#include "xen-compat.h"
42
b746a779
JM
43static void xen_vcpu_singleshot_timer_event(void *opaque);
44static void xen_vcpu_periodic_timer_event(void *opaque);
45
fb0fd2ce
JM
46#ifdef TARGET_X86_64
47#define hypercall_compat32(longmode) (!(longmode))
48#else
49#define hypercall_compat32(longmode) (false)
50#endif
bedcc139 51
f0689302
JM
52static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
53 size_t *len, bool is_write)
bedcc139 54{
bedcc139
JM
55 struct kvm_translation tr = {
56 .linear_address = gva,
57 };
58
f0689302
JM
59 if (len) {
60 *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
61 }
62
63 if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
64 (is_write && !tr.writeable)) {
65 return false;
bedcc139 66 }
f0689302
JM
67 *gpa = tr.physical_address;
68 return true;
69}
70
71static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
72 bool is_write)
73{
74 uint8_t *buf = (uint8_t *)_buf;
75 uint64_t gpa;
76 size_t len;
bedcc139 77
f0689302
JM
78 while (sz) {
79 if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
bedcc139
JM
80 return -EFAULT;
81 }
f0689302
JM
82 if (len > sz) {
83 len = sz;
84 }
bedcc139 85
f0689302 86 cpu_physical_memory_rw(gpa, buf, len, is_write);
bedcc139
JM
87
88 buf += len;
89 sz -= len;
90 gva += len;
91 }
92
93 return 0;
94}
95
96static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
97 size_t sz)
98{
99 return kvm_gva_rw(cs, gva, buf, sz, false);
100}
101
102static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
103 size_t sz)
104{
105 return kvm_gva_rw(cs, gva, buf, sz, true);
106}
107
f66b8a83 108int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
61491cf4
DW
109{
110 const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
111 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
112 struct kvm_xen_hvm_config cfg = {
f66b8a83 113 .msr = hypercall_msr,
61491cf4
DW
114 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
115 };
116 int xen_caps, ret;
117
118 xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
119 if (required_caps & ~xen_caps) {
120 error_report("kvm: Xen HVM guest support not present or insufficient");
121 return -ENOSYS;
122 }
123
124 if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
125 struct kvm_xen_hvm_attr ha = {
126 .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
127 .u.xen_version = s->xen_version,
128 };
129 (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
130
131 cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
132 }
133
134 ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
135 if (ret < 0) {
136 error_report("kvm: Failed to enable Xen HVM support: %s",
137 strerror(-ret));
138 return ret;
139 }
140
2aff696b
DW
141 /* If called a second time, don't repeat the rest of the setup. */
142 if (s->xen_caps) {
143 return 0;
144 }
145
146 /*
147 * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
148 * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
149 *
150 * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
151 * such things to be polled at precisely the right time. We *could* do
152 * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
153 * the moment the IRQ is acked, and see if it should be reasserted.
154 *
155 * But the in-kernel irqchip is deprecated, so we're unlikely to add
156 * that support in the kernel. Insist on using the split irqchip mode
157 * instead.
158 *
159 * This leaves us polling for the level going low in QEMU, which lacks
160 * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
161 * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
162 * the device (for which it has to unmap the device and trap access, for
163 * some period after an IRQ!!). In the Xen case, we do it on exit from
164 * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
165 * Which is kind of icky, but less so than the VFIO one. I may fix them
166 * both later...
167 */
168 if (!kvm_kernel_irqchip_split()) {
169 error_report("kvm: Xen support requires kernel-irqchip=split");
170 return -EINVAL;
171 }
172
61491cf4 173 s->xen_caps = xen_caps;
8b57d5c5
DW
174
175 /* Tell fw_cfg to notify the BIOS to reserve the range. */
176 ret = e820_add_entry(XEN_SPECIAL_AREA_ADDR, XEN_SPECIAL_AREA_SIZE,
177 E820_RESERVED);
178 if (ret < 0) {
179 fprintf(stderr, "e820_add_entry() table is full\n");
180 return ret;
181 }
182
c08f5d0e
DW
183 /* The page couldn't be overlaid until KVM was initialized */
184 xen_xenstore_reset();
185
61491cf4
DW
186 return 0;
187}
188
5e691a95
DW
189int kvm_xen_init_vcpu(CPUState *cs)
190{
c345104c
JM
191 X86CPU *cpu = X86_CPU(cs);
192 CPUX86State *env = &cpu->env;
5e691a95
DW
193 int err;
194
195 /*
196 * The kernel needs to know the Xen/ACPI vCPU ID because that's
197 * what the guest uses in hypercalls such as timers. It doesn't
198 * match the APIC ID which is generally used for talking to the
199 * kernel about vCPUs. And if vCPU threads race with creating
200 * their KVM vCPUs out of order, it doesn't necessarily match
201 * with the kernel's internal vCPU indices either.
202 */
203 if (kvm_xen_has_cap(EVTCHN_SEND)) {
204 struct kvm_xen_vcpu_attr va = {
205 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
206 .u.vcpu_id = cs->cpu_index,
207 };
208 err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
209 if (err) {
210 error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
211 strerror(-err));
212 return err;
213 }
214 }
215
c345104c
JM
216 env->xen_vcpu_info_gpa = INVALID_GPA;
217 env->xen_vcpu_info_default_gpa = INVALID_GPA;
f0689302 218 env->xen_vcpu_time_info_gpa = INVALID_GPA;
5092db87 219 env->xen_vcpu_runstate_gpa = INVALID_GPA;
c345104c 220
b746a779
JM
221 qemu_mutex_init(&env->xen_timers_lock);
222 env->xen_singleshot_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
223 xen_vcpu_singleshot_timer_event,
224 cpu);
225 if (!env->xen_singleshot_timer) {
226 return -ENOMEM;
227 }
228 env->xen_singleshot_timer->opaque = cs;
229
230 env->xen_periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
231 xen_vcpu_periodic_timer_event,
232 cpu);
233 if (!env->xen_periodic_timer) {
234 return -ENOMEM;
235 }
236 env->xen_periodic_timer->opaque = cs;
237
5e691a95
DW
238 return 0;
239}
240
61491cf4
DW
241uint32_t kvm_xen_get_caps(void)
242{
243 return kvm_state->xen_caps;
244}
55a3f666 245
bedcc139
JM
246static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
247 int cmd, uint64_t arg)
248{
249 int err = 0;
250
251 switch (cmd) {
252 case XENVER_get_features: {
253 struct xen_feature_info fi;
254
255 /* No need for 32/64 compat handling */
256 qemu_build_assert(sizeof(fi) == 8);
257
258 err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
259 if (err) {
260 break;
261 }
262
263 fi.submap = 0;
264 if (fi.submap_idx == 0) {
265 fi.submap |= 1 << XENFEAT_writable_page_tables |
266 1 << XENFEAT_writable_descriptor_tables |
267 1 << XENFEAT_auto_translated_physmap |
105b47fd 268 1 << XENFEAT_supervisor_mode_kernel |
b746a779 269 1 << XENFEAT_hvm_callback_vector |
6096cf78
DW
270 1 << XENFEAT_hvm_safe_pvclock |
271 1 << XENFEAT_hvm_pirqs;
bedcc139
JM
272 }
273
274 err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
275 break;
276 }
277
278 default:
279 return false;
280 }
281
282 exit->u.hcall.result = err;
283 return true;
284}
285
c345104c
JM
286static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
287{
288 struct kvm_xen_vcpu_attr xhsi;
289
290 xhsi.type = type;
291 xhsi.u.gpa = gpa;
292
293 trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
294
295 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
296}
297
105b47fd
AA
298static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
299{
300 uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
301 struct kvm_xen_vcpu_attr xva;
302
303 xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
304 xva.u.vector = vector;
305
306 trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
307
308 return kvm_vcpu_ioctl(cs, KVM_XEN_HVM_SET_ATTR, &xva);
309}
310
311static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
312{
313 X86CPU *cpu = X86_CPU(cs);
314 CPUX86State *env = &cpu->env;
315
316 env->xen_vcpu_callback_vector = data.host_int;
317
318 if (kvm_xen_has_cap(EVTCHN_SEND)) {
319 kvm_xen_set_vcpu_callback_vector(cs);
320 }
321}
322
27d4075d
DW
323static int set_vcpu_info(CPUState *cs, uint64_t gpa)
324{
325 X86CPU *cpu = X86_CPU(cs);
326 CPUX86State *env = &cpu->env;
327 MemoryRegionSection mrs = { .mr = NULL };
328 void *vcpu_info_hva = NULL;
329 int ret;
330
331 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
332 if (ret || gpa == INVALID_GPA) {
333 goto out;
334 }
335
336 mrs = memory_region_find(get_system_memory(), gpa,
337 sizeof(struct vcpu_info));
338 if (mrs.mr && mrs.mr->ram_block &&
339 !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
340 vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
341 mrs.offset_within_region);
342 }
343 if (!vcpu_info_hva) {
344 if (mrs.mr) {
345 memory_region_unref(mrs.mr);
346 mrs.mr = NULL;
347 }
348 ret = -EINVAL;
349 }
350
351 out:
352 if (env->xen_vcpu_info_mr) {
353 memory_region_unref(env->xen_vcpu_info_mr);
354 }
355 env->xen_vcpu_info_hva = vcpu_info_hva;
356 env->xen_vcpu_info_mr = mrs.mr;
357 return ret;
358}
359
c345104c
JM
360static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
361{
362 X86CPU *cpu = X86_CPU(cs);
363 CPUX86State *env = &cpu->env;
364
365 env->xen_vcpu_info_default_gpa = data.host_ulong;
366
367 /* Changing the default does nothing if a vcpu_info was explicitly set. */
368 if (env->xen_vcpu_info_gpa == INVALID_GPA) {
27d4075d 369 set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
c345104c
JM
370 }
371}
372
373static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
374{
375 X86CPU *cpu = X86_CPU(cs);
376 CPUX86State *env = &cpu->env;
377
378 env->xen_vcpu_info_gpa = data.host_ulong;
379
27d4075d
DW
380 set_vcpu_info(cs, env->xen_vcpu_info_gpa);
381}
382
383void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
384{
385 CPUState *cs = qemu_get_cpu(vcpu_id);
386 if (!cs) {
387 return NULL;
388 }
389
390 return X86_CPU(cs)->env.xen_vcpu_info_hva;
391}
392
ddf0fd9a
DW
393void kvm_xen_maybe_deassert_callback(CPUState *cs)
394{
395 CPUX86State *env = &X86_CPU(cs)->env;
396 struct vcpu_info *vi = env->xen_vcpu_info_hva;
397 if (!vi) {
398 return;
399 }
400
401 /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
402 if (!vi->evtchn_upcall_pending) {
403 qemu_mutex_lock_iothread();
404 /*
405 * Check again now we have the lock, because it may have been
406 * asserted in the interim. And we don't want to take the lock
407 * every time because this is a fast path.
408 */
409 if (!vi->evtchn_upcall_pending) {
410 X86_CPU(cs)->env.xen_callback_asserted = false;
411 xen_evtchn_set_callback_level(0);
412 }
413 qemu_mutex_unlock_iothread();
414 }
415}
416
417void kvm_xen_set_callback_asserted(void)
418{
419 CPUState *cs = qemu_get_cpu(0);
420
421 if (cs) {
422 X86_CPU(cs)->env.xen_callback_asserted = true;
423 }
424}
425
27d4075d
DW
426void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
427{
428 CPUState *cs = qemu_get_cpu(vcpu_id);
429 uint8_t vector;
430
431 if (!cs) {
432 return;
433 }
434
435 vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
436 if (vector) {
437 /*
438 * The per-vCPU callback vector injected via lapic. Just
439 * deliver it as an MSI.
440 */
441 MSIMessage msg = {
442 .address = APIC_DEFAULT_ADDRESS | X86_CPU(cs)->apic_id,
443 .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
444 };
445 kvm_irqchip_send_msi(kvm_state, msg);
446 return;
447 }
448
449 switch (type) {
450 case HVM_PARAM_CALLBACK_TYPE_VECTOR:
451 /*
452 * If the evtchn_upcall_pending field in the vcpu_info is set, then
453 * KVM will automatically deliver the vector on entering the vCPU
454 * so all we have to do is kick it out.
455 */
456 qemu_cpu_kick(cs);
457 break;
ddf0fd9a
DW
458
459 case HVM_PARAM_CALLBACK_TYPE_GSI:
460 case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
461 if (vcpu_id == 0) {
462 xen_evtchn_set_callback_level(1);
463 }
464 break;
27d4075d 465 }
c345104c
JM
466}
467
c723d4c1
DW
468static int kvm_xen_set_vcpu_timer(CPUState *cs)
469{
470 X86CPU *cpu = X86_CPU(cs);
471 CPUX86State *env = &cpu->env;
472
473 struct kvm_xen_vcpu_attr va = {
474 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
475 .u.timer.port = env->xen_virq[VIRQ_TIMER],
476 .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
477 .u.timer.expires_ns = env->xen_singleshot_timer_ns,
478 };
479
480 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
481}
482
483static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
484{
485 kvm_xen_set_vcpu_timer(cs);
486}
487
488int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
489{
490 CPUState *cs = qemu_get_cpu(vcpu_id);
491
492 if (!cs) {
493 return -ENOENT;
494 }
495
496 /* cpu.h doesn't include the actual Xen header. */
497 qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
498
499 if (virq >= NR_VIRQS) {
500 return -EINVAL;
501 }
502
503 if (port && X86_CPU(cs)->env.xen_virq[virq]) {
504 return -EEXIST;
505 }
506
507 X86_CPU(cs)->env.xen_virq[virq] = port;
508 if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
509 async_run_on_cpu(cs, do_set_vcpu_timer_virq,
510 RUN_ON_CPU_HOST_INT(port));
511 }
512 return 0;
513}
514
f0689302
JM
515static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
516{
517 X86CPU *cpu = X86_CPU(cs);
518 CPUX86State *env = &cpu->env;
519
520 env->xen_vcpu_time_info_gpa = data.host_ulong;
521
522 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
523 env->xen_vcpu_time_info_gpa);
524}
525
5092db87
JM
526static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
527{
528 X86CPU *cpu = X86_CPU(cs);
529 CPUX86State *env = &cpu->env;
530
531 env->xen_vcpu_runstate_gpa = data.host_ulong;
532
533 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
534 env->xen_vcpu_runstate_gpa);
535}
536
c345104c
JM
537static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
538{
539 X86CPU *cpu = X86_CPU(cs);
540 CPUX86State *env = &cpu->env;
541
542 env->xen_vcpu_info_gpa = INVALID_GPA;
543 env->xen_vcpu_info_default_gpa = INVALID_GPA;
f0689302 544 env->xen_vcpu_time_info_gpa = INVALID_GPA;
5092db87 545 env->xen_vcpu_runstate_gpa = INVALID_GPA;
105b47fd 546 env->xen_vcpu_callback_vector = 0;
c723d4c1
DW
547 env->xen_singleshot_timer_ns = 0;
548 memset(env->xen_virq, 0, sizeof(env->xen_virq));
c345104c 549
27d4075d 550 set_vcpu_info(cs, INVALID_GPA);
f0689302
JM
551 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
552 INVALID_GPA);
5092db87
JM
553 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
554 INVALID_GPA);
105b47fd
AA
555 if (kvm_xen_has_cap(EVTCHN_SEND)) {
556 kvm_xen_set_vcpu_callback_vector(cs);
c723d4c1 557 kvm_xen_set_vcpu_timer(cs);
105b47fd 558 }
5092db87 559
c345104c
JM
560}
561
fb0fd2ce
JM
562static int xen_set_shared_info(uint64_t gfn)
563{
564 uint64_t gpa = gfn << TARGET_PAGE_BITS;
c345104c 565 int i, err;
fb0fd2ce
JM
566
567 QEMU_IOTHREAD_LOCK_GUARD();
568
569 /*
570 * The xen_overlay device tells KVM about it too, since it had to
571 * do that on migration load anyway (unless we're going to jump
572 * through lots of hoops to maintain the fiction that this isn't
573 * KVM-specific.
574 */
575 err = xen_overlay_map_shinfo_page(gpa);
576 if (err) {
577 return err;
578 }
579
580 trace_kvm_xen_set_shared_info(gfn);
581
c345104c
JM
582 for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
583 CPUState *cpu = qemu_get_cpu(i);
584 if (cpu) {
585 async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
586 RUN_ON_CPU_HOST_ULONG(gpa));
587 }
588 gpa += sizeof(vcpu_info_t);
589 }
590
fb0fd2ce
JM
591 return err;
592}
593
594static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
595{
596 switch (space) {
597 case XENMAPSPACE_shared_info:
598 if (idx > 0) {
599 return -EINVAL;
600 }
601 return xen_set_shared_info(gfn);
602
603 case XENMAPSPACE_grant_table:
a28b0fc0
DW
604 return xen_gnttab_map_page(idx, gfn);
605
fb0fd2ce
JM
606 case XENMAPSPACE_gmfn:
607 case XENMAPSPACE_gmfn_range:
608 return -ENOTSUP;
609
610 case XENMAPSPACE_gmfn_foreign:
611 case XENMAPSPACE_dev_mmio:
612 return -EPERM;
613
614 default:
615 return -EINVAL;
616 }
617}
618
619static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
620 uint64_t arg)
621{
622 struct xen_add_to_physmap xatp;
623 CPUState *cs = CPU(cpu);
624
625 if (hypercall_compat32(exit->u.hcall.longmode)) {
626 struct compat_xen_add_to_physmap xatp32;
627
628 qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
629 if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
630 return -EFAULT;
631 }
632 xatp.domid = xatp32.domid;
633 xatp.size = xatp32.size;
634 xatp.space = xatp32.space;
635 xatp.idx = xatp32.idx;
636 xatp.gpfn = xatp32.gpfn;
637 } else {
638 if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
639 return -EFAULT;
640 }
641 }
642
643 if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
644 return -ESRCH;
645 }
646
647 return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
648}
649
782a7960
DW
650static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
651 uint64_t arg)
652{
653 struct xen_add_to_physmap_batch xatpb;
654 unsigned long idxs_gva, gpfns_gva, errs_gva;
655 CPUState *cs = CPU(cpu);
656 size_t op_sz;
657
658 if (hypercall_compat32(exit->u.hcall.longmode)) {
659 struct compat_xen_add_to_physmap_batch xatpb32;
660
661 qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
662 if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
663 return -EFAULT;
664 }
665 xatpb.domid = xatpb32.domid;
666 xatpb.space = xatpb32.space;
667 xatpb.size = xatpb32.size;
668
669 idxs_gva = xatpb32.idxs.c;
670 gpfns_gva = xatpb32.gpfns.c;
671 errs_gva = xatpb32.errs.c;
672 op_sz = sizeof(uint32_t);
673 } else {
674 if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
675 return -EFAULT;
676 }
677 op_sz = sizeof(unsigned long);
678 idxs_gva = (unsigned long)xatpb.idxs.p;
679 gpfns_gva = (unsigned long)xatpb.gpfns.p;
680 errs_gva = (unsigned long)xatpb.errs.p;
681 }
682
683 if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
684 return -ESRCH;
685 }
686
687 /* Explicitly invalid for the batch op. Not that we implement it anyway. */
688 if (xatpb.space == XENMAPSPACE_gmfn_range) {
689 return -EINVAL;
690 }
691
692 while (xatpb.size--) {
693 unsigned long idx = 0;
694 unsigned long gpfn = 0;
695 int err;
696
697 /* For 32-bit compat this only copies the low 32 bits of each */
698 if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
699 kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
700 return -EFAULT;
701 }
702 idxs_gva += op_sz;
703 gpfns_gva += op_sz;
704
705 err = add_to_physmap_one(xatpb.space, idx, gpfn);
706
707 if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
708 return -EFAULT;
709 }
710 errs_gva += sizeof(err);
711 }
712 return 0;
713}
714
fb0fd2ce
JM
715static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
716 int cmd, uint64_t arg)
717{
718 int err;
719
720 switch (cmd) {
721 case XENMEM_add_to_physmap:
722 err = do_add_to_physmap(exit, cpu, arg);
723 break;
724
782a7960
DW
725 case XENMEM_add_to_physmap_batch:
726 err = do_add_to_physmap_batch(exit, cpu, arg);
727 break;
728
fb0fd2ce
JM
729 default:
730 return false;
731 }
732
733 exit->u.hcall.result = err;
734 return true;
735}
736
5dbcd01a
AA
737static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
738 uint64_t arg)
739{
740 CPUState *cs = CPU(cpu);
741 struct xen_hvm_param hp;
742 int err = 0;
743
744 /* No need for 32/64 compat handling */
745 qemu_build_assert(sizeof(hp) == 16);
746
747 if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
748 err = -EFAULT;
749 goto out;
750 }
751
752 if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
753 err = -ESRCH;
754 goto out;
755 }
756
757 switch (hp.index) {
91cce756 758 case HVM_PARAM_CALLBACK_IRQ:
2aff696b 759 qemu_mutex_lock_iothread();
91cce756 760 err = xen_evtchn_set_callback_param(hp.value);
2aff696b 761 qemu_mutex_unlock_iothread();
91cce756
DW
762 xen_set_long_mode(exit->u.hcall.longmode);
763 break;
5dbcd01a
AA
764 default:
765 return false;
766 }
767
768out:
769 exit->u.hcall.result = err;
770 return true;
771}
772
c6623cc3
JM
773static bool handle_get_param(struct kvm_xen_exit *exit, X86CPU *cpu,
774 uint64_t arg)
775{
776 CPUState *cs = CPU(cpu);
777 struct xen_hvm_param hp;
778 int err = 0;
779
780 /* No need for 32/64 compat handling */
781 qemu_build_assert(sizeof(hp) == 16);
782
783 if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
784 err = -EFAULT;
785 goto out;
786 }
787
788 if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
789 err = -ESRCH;
790 goto out;
791 }
792
793 switch (hp.index) {
794 case HVM_PARAM_STORE_PFN:
795 hp.value = XEN_SPECIAL_PFN(XENSTORE);
796 break;
c08f5d0e
DW
797 case HVM_PARAM_STORE_EVTCHN:
798 hp.value = xen_xenstore_get_port();
799 break;
c6623cc3
JM
800 default:
801 return false;
802 }
803
804 if (kvm_copy_to_gva(cs, arg, &hp, sizeof(hp))) {
805 err = -EFAULT;
806 }
807out:
808 exit->u.hcall.result = err;
809 return true;
810}
811
105b47fd
AA
812static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
813 X86CPU *cpu, uint64_t arg)
814{
815 struct xen_hvm_evtchn_upcall_vector up;
816 CPUState *target_cs;
817
818 /* No need for 32/64 compat handling */
819 qemu_build_assert(sizeof(up) == 8);
820
821 if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
822 return -EFAULT;
823 }
824
825 if (up.vector < 0x10) {
826 return -EINVAL;
827 }
828
829 target_cs = qemu_get_cpu(up.vcpu);
830 if (!target_cs) {
831 return -EINVAL;
832 }
833
834 async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
835 RUN_ON_CPU_HOST_INT(up.vector));
836 return 0;
837}
838
671bfdcd
JM
839static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
840 int cmd, uint64_t arg)
841{
105b47fd 842 int ret = -ENOSYS;
671bfdcd 843 switch (cmd) {
105b47fd
AA
844 case HVMOP_set_evtchn_upcall_vector:
845 ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu,
846 exit->u.hcall.params[0]);
847 break;
848
671bfdcd 849 case HVMOP_pagetable_dying:
105b47fd
AA
850 ret = -ENOSYS;
851 break;
671bfdcd 852
5dbcd01a
AA
853 case HVMOP_set_param:
854 return handle_set_param(exit, cpu, arg);
855
c6623cc3
JM
856 case HVMOP_get_param:
857 return handle_get_param(exit, cpu, arg);
858
671bfdcd
JM
859 default:
860 return false;
861 }
105b47fd
AA
862
863 exit->u.hcall.result = ret;
864 return true;
671bfdcd
JM
865}
866
c345104c
JM
867static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
868 uint64_t arg)
869{
870 struct vcpu_register_vcpu_info rvi;
871 uint64_t gpa;
872
873 /* No need for 32/64 compat handling */
874 qemu_build_assert(sizeof(rvi) == 16);
875 qemu_build_assert(sizeof(struct vcpu_info) == 64);
876
877 if (!target) {
878 return -ENOENT;
879 }
880
881 if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
882 return -EFAULT;
883 }
884
885 if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
886 return -EINVAL;
887 }
888
889 gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
890 async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
891 return 0;
892}
893
f0689302
JM
894static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
895 uint64_t arg)
896{
897 struct vcpu_register_time_memory_area tma;
898 uint64_t gpa;
899 size_t len;
900
901 /* No need for 32/64 compat handling */
902 qemu_build_assert(sizeof(tma) == 8);
903 qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
904
905 if (!target) {
906 return -ENOENT;
907 }
908
909 if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
910 return -EFAULT;
911 }
912
913 /*
914 * Xen actually uses the GVA and does the translation through the guest
915 * page tables each time. But Linux/KVM uses the GPA, on the assumption
916 * that guests only ever use *global* addresses (kernel virtual addresses)
917 * for it. If Linux is changed to redo the GVA→GPA translation each time,
918 * it will offer a new vCPU attribute for that, and we'll use it instead.
919 */
920 if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
921 len < sizeof(struct vcpu_time_info)) {
922 return -EFAULT;
923 }
924
925 async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
926 RUN_ON_CPU_HOST_ULONG(gpa));
927 return 0;
928}
929
5092db87
JM
930static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
931 uint64_t arg)
932{
933 struct vcpu_register_runstate_memory_area rma;
934 uint64_t gpa;
935 size_t len;
936
937 /* No need for 32/64 compat handling */
938 qemu_build_assert(sizeof(rma) == 8);
939 /* The runstate area actually does change size, but Linux copes. */
940
941 if (!target) {
942 return -ENOENT;
943 }
944
945 if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
946 return -EFAULT;
947 }
948
949 /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
950 if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
951 return -EFAULT;
952 }
953
954 async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
955 RUN_ON_CPU_HOST_ULONG(gpa));
956 return 0;
957}
958
b746a779
JM
959static uint64_t kvm_get_current_ns(void)
960{
961 struct kvm_clock_data data;
962 int ret;
963
964 ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
965 if (ret < 0) {
966 fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
967 abort();
968 }
969
970 return data.clock;
971}
972
973static void xen_vcpu_singleshot_timer_event(void *opaque)
974{
975 CPUState *cpu = opaque;
976 CPUX86State *env = &X86_CPU(cpu)->env;
977 uint16_t port = env->xen_virq[VIRQ_TIMER];
978
979 if (likely(port)) {
980 xen_evtchn_set_port(port);
981 }
982
983 qemu_mutex_lock(&env->xen_timers_lock);
984 env->xen_singleshot_timer_ns = 0;
985 qemu_mutex_unlock(&env->xen_timers_lock);
986}
987
988static void xen_vcpu_periodic_timer_event(void *opaque)
989{
990 CPUState *cpu = opaque;
991 CPUX86State *env = &X86_CPU(cpu)->env;
992 uint16_t port = env->xen_virq[VIRQ_TIMER];
993 int64_t qemu_now;
994
995 if (likely(port)) {
996 xen_evtchn_set_port(port);
997 }
998
999 qemu_mutex_lock(&env->xen_timers_lock);
1000
1001 qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1002 timer_mod_ns(env->xen_periodic_timer,
1003 qemu_now + env->xen_periodic_timer_period);
1004
1005 qemu_mutex_unlock(&env->xen_timers_lock);
1006}
1007
1008static int do_set_periodic_timer(CPUState *target, uint64_t period_ns)
1009{
1010 CPUX86State *tenv = &X86_CPU(target)->env;
1011 int64_t qemu_now;
1012
1013 timer_del(tenv->xen_periodic_timer);
1014
1015 qemu_mutex_lock(&tenv->xen_timers_lock);
1016
1017 qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1018 timer_mod_ns(tenv->xen_periodic_timer, qemu_now + period_ns);
1019 tenv->xen_periodic_timer_period = period_ns;
1020
1021 qemu_mutex_unlock(&tenv->xen_timers_lock);
1022 return 0;
1023}
1024
1025#define MILLISECS(_ms) ((int64_t)((_ms) * 1000000ULL))
1026#define MICROSECS(_us) ((int64_t)((_us) * 1000ULL))
1027#define STIME_MAX ((time_t)((int64_t)~0ull >> 1))
1028/* Chosen so (NOW() + delta) wont overflow without an uptime of 200 years */
1029#define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2))
1030
1031static int vcpuop_set_periodic_timer(CPUState *cs, CPUState *target,
1032 uint64_t arg)
1033{
1034 struct vcpu_set_periodic_timer spt;
1035
1036 qemu_build_assert(sizeof(spt) == 8);
1037 if (kvm_copy_from_gva(cs, arg, &spt, sizeof(spt))) {
1038 return -EFAULT;
1039 }
1040
1041 if (spt.period_ns < MILLISECS(1) || spt.period_ns > STIME_DELTA_MAX) {
1042 return -EINVAL;
1043 }
1044
1045 return do_set_periodic_timer(target, spt.period_ns);
1046}
1047
1048static int vcpuop_stop_periodic_timer(CPUState *target)
1049{
1050 CPUX86State *tenv = &X86_CPU(target)->env;
1051
1052 qemu_mutex_lock(&tenv->xen_timers_lock);
1053
1054 timer_del(tenv->xen_periodic_timer);
1055 tenv->xen_periodic_timer_period = 0;
1056
1057 qemu_mutex_unlock(&tenv->xen_timers_lock);
1058 return 0;
1059}
1060
1061static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs,
1062 bool future, bool linux_wa)
1063{
1064 CPUX86State *env = &X86_CPU(cs)->env;
1065 int64_t now = kvm_get_current_ns();
1066 int64_t qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1067 int64_t delta = timeout_abs - now;
1068
1069 if (future && timeout_abs < now) {
1070 return -ETIME;
1071 }
1072
1073 if (linux_wa && unlikely((int64_t)timeout_abs < 0 ||
1074 (delta > 0 && (uint32_t)(delta >> 50) != 0))) {
1075 /*
1076 * Xen has a 'Linux workaround' in do_set_timer_op() which checks
1077 * for negative absolute timeout values (caused by integer
1078 * overflow), and for values about 13 days in the future (2^50ns)
1079 * which would be caused by jiffies overflow. For those cases, it
1080 * sets the timeout 100ms in the future (not *too* soon, since if
1081 * a guest really did set a long timeout on purpose we don't want
1082 * to keep churning CPU time by waking it up).
1083 */
1084 delta = (100 * SCALE_MS);
1085 timeout_abs = now + delta;
1086 }
1087
1088 qemu_mutex_lock(&env->xen_timers_lock);
1089
1090 timer_mod_ns(env->xen_singleshot_timer, qemu_now + delta);
1091 env->xen_singleshot_timer_ns = now + delta;
1092
1093 qemu_mutex_unlock(&env->xen_timers_lock);
1094 return 0;
1095}
1096
1097static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg)
1098{
1099 struct vcpu_set_singleshot_timer sst = { 0 };
1100
1101 /*
1102 * The struct is a uint64_t followed by a uint32_t. On 32-bit that
1103 * makes it 12 bytes. On 64-bit it gets padded to 16. The parts
1104 * that get used are identical, and there's four bytes of padding
1105 * unused at the end. For true Xen compatibility we should attempt
1106 * to copy the full 16 bytes from 64-bit guests, and return -EFAULT
1107 * if we can't get the padding too. But that's daft. Just copy what
1108 * we need.
1109 */
1110 qemu_build_assert(offsetof(struct vcpu_set_singleshot_timer, flags) == 8);
1111 qemu_build_assert(sizeof(sst) >= 12);
1112
1113 if (kvm_copy_from_gva(cs, arg, &sst, 12)) {
1114 return -EFAULT;
1115 }
1116
1117 return do_set_singleshot_timer(cs, sst.timeout_abs_ns,
1118 !!(sst.flags & VCPU_SSHOTTMR_future),
1119 false);
1120}
1121
1122static int vcpuop_stop_singleshot_timer(CPUState *cs)
1123{
1124 CPUX86State *env = &X86_CPU(cs)->env;
1125
1126 qemu_mutex_lock(&env->xen_timers_lock);
1127
1128 timer_del(env->xen_singleshot_timer);
1129 env->xen_singleshot_timer_ns = 0;
1130
1131 qemu_mutex_unlock(&env->xen_timers_lock);
1132 return 0;
1133}
1134
1135static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1136 uint64_t timeout)
1137{
1138 int err;
1139
1140 if (unlikely(timeout == 0)) {
1141 err = vcpuop_stop_singleshot_timer(CPU(cpu));
1142 } else {
1143 err = do_set_singleshot_timer(CPU(cpu), timeout, false, true);
1144 }
1145 exit->u.hcall.result = err;
1146 return true;
1147}
1148
d70bd6a4
JM
1149static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1150 int cmd, int vcpu_id, uint64_t arg)
1151{
c345104c 1152 CPUState *cs = CPU(cpu);
b746a779 1153 CPUState *dest = cs->cpu_index == vcpu_id ? cs : qemu_get_cpu(vcpu_id);
d70bd6a4
JM
1154 int err;
1155
b746a779
JM
1156 if (!dest) {
1157 err = -ENOENT;
1158 goto out;
1159 }
1160
d70bd6a4 1161 switch (cmd) {
5092db87
JM
1162 case VCPUOP_register_runstate_memory_area:
1163 err = vcpuop_register_runstate_info(cs, dest, arg);
1164 break;
f0689302
JM
1165 case VCPUOP_register_vcpu_time_memory_area:
1166 err = vcpuop_register_vcpu_time_info(cs, dest, arg);
1167 break;
d70bd6a4 1168 case VCPUOP_register_vcpu_info:
c345104c 1169 err = vcpuop_register_vcpu_info(cs, dest, arg);
d70bd6a4 1170 break;
b746a779
JM
1171 case VCPUOP_set_singleshot_timer: {
1172 if (cs->cpu_index == vcpu_id) {
1173 err = vcpuop_set_singleshot_timer(dest, arg);
1174 } else {
1175 err = -EINVAL;
1176 }
1177 break;
1178 }
1179 case VCPUOP_stop_singleshot_timer:
1180 if (cs->cpu_index == vcpu_id) {
1181 err = vcpuop_stop_singleshot_timer(dest);
1182 } else {
1183 err = -EINVAL;
1184 }
1185 break;
1186 case VCPUOP_set_periodic_timer: {
1187 err = vcpuop_set_periodic_timer(cs, dest, arg);
1188 break;
1189 }
1190 case VCPUOP_stop_periodic_timer:
1191 err = vcpuop_stop_periodic_timer(dest);
1192 break;
d70bd6a4
JM
1193
1194 default:
1195 return false;
1196 }
1197
b746a779 1198 out:
d70bd6a4
JM
1199 exit->u.hcall.result = err;
1200 return true;
1201}
1202
4858ba20 1203static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
3b06f29b
JM
1204 int cmd, uint64_t arg)
1205{
4858ba20 1206 CPUState *cs = CPU(cpu);
3b06f29b
JM
1207 int err = -ENOSYS;
1208
1209 switch (cmd) {
1210 case EVTCHNOP_init_control:
1211 case EVTCHNOP_expand_array:
1212 case EVTCHNOP_set_priority:
1213 /* We do not support FIFO channels at this point */
1214 err = -ENOSYS;
1215 break;
1216
4858ba20
DW
1217 case EVTCHNOP_status: {
1218 struct evtchn_status status;
1219
1220 qemu_build_assert(sizeof(status) == 24);
1221 if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) {
1222 err = -EFAULT;
1223 break;
1224 }
1225
1226 err = xen_evtchn_status_op(&status);
1227 if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) {
1228 err = -EFAULT;
1229 }
1230 break;
1231 }
83eb5811
DW
1232 case EVTCHNOP_close: {
1233 struct evtchn_close close;
1234
1235 qemu_build_assert(sizeof(close) == 4);
1236 if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) {
1237 err = -EFAULT;
1238 break;
1239 }
1240
1241 err = xen_evtchn_close_op(&close);
1242 break;
1243 }
190cc3c0
DW
1244 case EVTCHNOP_unmask: {
1245 struct evtchn_unmask unmask;
1246
1247 qemu_build_assert(sizeof(unmask) == 4);
1248 if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) {
1249 err = -EFAULT;
1250 break;
1251 }
1252
1253 err = xen_evtchn_unmask_op(&unmask);
1254 break;
1255 }
c723d4c1
DW
1256 case EVTCHNOP_bind_virq: {
1257 struct evtchn_bind_virq virq;
1258
1259 qemu_build_assert(sizeof(virq) == 12);
1260 if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
1261 err = -EFAULT;
1262 break;
1263 }
1264
1265 err = xen_evtchn_bind_virq_op(&virq);
1266 if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
1267 err = -EFAULT;
1268 }
1269 break;
1270 }
aa98ee38
DW
1271 case EVTCHNOP_bind_pirq: {
1272 struct evtchn_bind_pirq pirq;
1273
1274 qemu_build_assert(sizeof(pirq) == 12);
1275 if (kvm_copy_from_gva(cs, arg, &pirq, sizeof(pirq))) {
1276 err = -EFAULT;
1277 break;
1278 }
1279
1280 err = xen_evtchn_bind_pirq_op(&pirq);
1281 if (!err && kvm_copy_to_gva(cs, arg, &pirq, sizeof(pirq))) {
1282 err = -EFAULT;
1283 }
1284 break;
1285 }
f5417856
DW
1286 case EVTCHNOP_bind_ipi: {
1287 struct evtchn_bind_ipi ipi;
1288
1289 qemu_build_assert(sizeof(ipi) == 8);
1290 if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) {
1291 err = -EFAULT;
1292 break;
1293 }
1294
1295 err = xen_evtchn_bind_ipi_op(&ipi);
1296 if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) {
1297 err = -EFAULT;
1298 }
1299 break;
1300 }
cf7679ab
DW
1301 case EVTCHNOP_send: {
1302 struct evtchn_send send;
1303
1304 qemu_build_assert(sizeof(send) == 4);
1305 if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) {
1306 err = -EFAULT;
1307 break;
1308 }
1309
1310 err = xen_evtchn_send_op(&send);
1311 break;
1312 }
e1db61b8
DW
1313 case EVTCHNOP_alloc_unbound: {
1314 struct evtchn_alloc_unbound alloc;
1315
1316 qemu_build_assert(sizeof(alloc) == 8);
1317 if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) {
1318 err = -EFAULT;
1319 break;
1320 }
1321
1322 err = xen_evtchn_alloc_unbound_op(&alloc);
1323 if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) {
1324 err = -EFAULT;
1325 }
1326 break;
1327 }
84327881
DW
1328 case EVTCHNOP_bind_interdomain: {
1329 struct evtchn_bind_interdomain interdomain;
1330
1331 qemu_build_assert(sizeof(interdomain) == 12);
1332 if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1333 err = -EFAULT;
1334 break;
1335 }
1336
1337 err = xen_evtchn_bind_interdomain_op(&interdomain);
1338 if (!err &&
1339 kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1340 err = -EFAULT;
1341 }
1342 break;
1343 }
30667046
DW
1344 case EVTCHNOP_bind_vcpu: {
1345 struct evtchn_bind_vcpu vcpu;
1346
1347 qemu_build_assert(sizeof(vcpu) == 8);
1348 if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) {
1349 err = -EFAULT;
1350 break;
1351 }
1352
1353 err = xen_evtchn_bind_vcpu_op(&vcpu);
1354 break;
1355 }
a15b1097
DW
1356 case EVTCHNOP_reset: {
1357 struct evtchn_reset reset;
1358
1359 qemu_build_assert(sizeof(reset) == 2);
1360 if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) {
1361 err = -EFAULT;
1362 break;
1363 }
1364
1365 err = xen_evtchn_reset_op(&reset);
1366 break;
1367 }
3b06f29b
JM
1368 default:
1369 return false;
1370 }
1371
1372 exit->u.hcall.result = err;
1373 return true;
1374}
1375
79b7067d
JM
1376int kvm_xen_soft_reset(void)
1377{
c345104c 1378 CPUState *cpu;
fb0fd2ce
JM
1379 int err;
1380
79b7067d
JM
1381 assert(qemu_mutex_iothread_locked());
1382
1383 trace_kvm_xen_soft_reset();
1384
a15b1097
DW
1385 err = xen_evtchn_soft_reset();
1386 if (err) {
1387 return err;
1388 }
1389
91cce756
DW
1390 /*
1391 * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
1392 * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
1393 * to deliver to the timer interrupt and treats that as 'disabled'.
1394 */
1395 err = xen_evtchn_set_callback_param(0);
1396 if (err) {
1397 return err;
1398 }
1399
c345104c
JM
1400 CPU_FOREACH(cpu) {
1401 async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
1402 }
1403
fb0fd2ce
JM
1404 err = xen_overlay_map_shinfo_page(INVALID_GFN);
1405 if (err) {
1406 return err;
1407 }
1408
de26b261
DW
1409 err = xen_gnttab_reset();
1410 if (err) {
1411 return err;
1412 }
1413
c08f5d0e
DW
1414 err = xen_xenstore_reset();
1415 if (err) {
1416 return err;
1417 }
1418
79b7067d
JM
1419 return 0;
1420}
1421
1422static int schedop_shutdown(CPUState *cs, uint64_t arg)
1423{
1424 struct sched_shutdown shutdown;
1425 int ret = 0;
1426
1427 /* No need for 32/64 compat handling */
1428 qemu_build_assert(sizeof(shutdown) == 4);
1429
1430 if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
1431 return -EFAULT;
1432 }
1433
1434 switch (shutdown.reason) {
1435 case SHUTDOWN_crash:
1436 cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
1437 qemu_system_guest_panicked(NULL);
1438 break;
1439
1440 case SHUTDOWN_reboot:
1441 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
1442 break;
1443
1444 case SHUTDOWN_poweroff:
1445 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
1446 break;
1447
1448 case SHUTDOWN_soft_reset:
1449 qemu_mutex_lock_iothread();
1450 ret = kvm_xen_soft_reset();
1451 qemu_mutex_unlock_iothread();
1452 break;
1453
1454 default:
1455 ret = -EINVAL;
1456 break;
1457 }
1458
1459 return ret;
1460}
1461
1462static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1463 int cmd, uint64_t arg)
1464{
1465 CPUState *cs = CPU(cpu);
1466 int err = -ENOSYS;
1467
1468 switch (cmd) {
1469 case SCHEDOP_shutdown:
1470 err = schedop_shutdown(cs, arg);
1471 break;
1472
c789b9ef
DW
1473 case SCHEDOP_poll:
1474 /*
1475 * Linux will panic if this doesn't work. Just yield; it's not
1476 * worth overthinking it because with event channel handling
1477 * in KVM, the kernel will intercept this and it will never
1478 * reach QEMU anyway. The semantics of the hypercall explicltly
1479 * permit spurious wakeups.
1480 */
1481 case SCHEDOP_yield:
1482 sched_yield();
1483 err = 0;
1484 break;
1485
79b7067d
JM
1486 default:
1487 return false;
1488 }
1489
1490 exit->u.hcall.result = err;
1491 return true;
1492}
1493
28b7ae94
DW
1494static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1495 int cmd, uint64_t arg, int count)
1496{
1497 CPUState *cs = CPU(cpu);
1498 int err;
1499
1500 switch (cmd) {
1501 case GNTTABOP_set_version: {
1502 struct gnttab_set_version set;
1503
1504 qemu_build_assert(sizeof(set) == 4);
1505 if (kvm_copy_from_gva(cs, arg, &set, sizeof(set))) {
1506 err = -EFAULT;
1507 break;
1508 }
1509
1510 err = xen_gnttab_set_version_op(&set);
1511 if (!err && kvm_copy_to_gva(cs, arg, &set, sizeof(set))) {
1512 err = -EFAULT;
1513 }
1514 break;
1515 }
1516 case GNTTABOP_get_version: {
1517 struct gnttab_get_version get;
1518
1519 qemu_build_assert(sizeof(get) == 8);
1520 if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1521 err = -EFAULT;
1522 break;
1523 }
1524
1525 err = xen_gnttab_get_version_op(&get);
1526 if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1527 err = -EFAULT;
1528 }
1529 break;
1530 }
b46f9745
DW
1531 case GNTTABOP_query_size: {
1532 struct gnttab_query_size size;
1533
1534 qemu_build_assert(sizeof(size) == 16);
1535 if (kvm_copy_from_gva(cs, arg, &size, sizeof(size))) {
1536 err = -EFAULT;
1537 break;
1538 }
1539
1540 err = xen_gnttab_query_size_op(&size);
1541 if (!err && kvm_copy_to_gva(cs, arg, &size, sizeof(size))) {
1542 err = -EFAULT;
1543 }
1544 break;
1545 }
28b7ae94
DW
1546 case GNTTABOP_setup_table:
1547 case GNTTABOP_copy:
1548 case GNTTABOP_map_grant_ref:
1549 case GNTTABOP_unmap_grant_ref:
1550 case GNTTABOP_swap_grant_ref:
1551 return false;
1552
1553 default:
1554 /* Xen explicitly returns -ENOSYS to HVM guests for all others */
1555 err = -ENOSYS;
1556 break;
1557 }
1558
1559 exit->u.hcall.result = err;
1560 return true;
1561}
1562
799c2354
DW
1563static bool kvm_xen_hcall_physdev_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1564 int cmd, uint64_t arg)
1565{
1566 CPUState *cs = CPU(cpu);
1567 int err;
1568
1569 switch (cmd) {
1570 case PHYSDEVOP_map_pirq: {
1571 struct physdev_map_pirq map;
1572
1573 if (hypercall_compat32(exit->u.hcall.longmode)) {
1574 struct compat_physdev_map_pirq *map32 = (void *)&map;
1575
1576 if (kvm_copy_from_gva(cs, arg, map32, sizeof(*map32))) {
1577 return -EFAULT;
1578 }
1579
1580 /*
1581 * The only thing that's different is the alignment of the
1582 * uint64_t table_base at the end, which gets padding to make
1583 * it 64-bit aligned in the 64-bit version.
1584 */
1585 qemu_build_assert(sizeof(*map32) == 36);
1586 qemu_build_assert(offsetof(struct physdev_map_pirq, entry_nr) ==
1587 offsetof(struct compat_physdev_map_pirq, entry_nr));
1588 memmove(&map.table_base, &map32->table_base, sizeof(map.table_base));
1589 } else {
1590 if (kvm_copy_from_gva(cs, arg, &map, sizeof(map))) {
1591 err = -EFAULT;
1592 break;
1593 }
1594 }
1595 err = xen_physdev_map_pirq(&map);
1596 /*
1597 * Since table_base is an IN parameter and won't be changed, just
1598 * copy the size of the compat structure back to the guest.
1599 */
1600 if (!err && kvm_copy_to_gva(cs, arg, &map,
1601 sizeof(struct compat_physdev_map_pirq))) {
1602 err = -EFAULT;
1603 }
1604 break;
1605 }
1606 case PHYSDEVOP_unmap_pirq: {
1607 struct physdev_unmap_pirq unmap;
1608
1609 qemu_build_assert(sizeof(unmap) == 8);
1610 if (kvm_copy_from_gva(cs, arg, &unmap, sizeof(unmap))) {
1611 err = -EFAULT;
1612 break;
1613 }
1614
1615 err = xen_physdev_unmap_pirq(&unmap);
1616 if (!err && kvm_copy_to_gva(cs, arg, &unmap, sizeof(unmap))) {
1617 err = -EFAULT;
1618 }
1619 break;
1620 }
1621 case PHYSDEVOP_eoi: {
1622 struct physdev_eoi eoi;
1623
1624 qemu_build_assert(sizeof(eoi) == 4);
1625 if (kvm_copy_from_gva(cs, arg, &eoi, sizeof(eoi))) {
1626 err = -EFAULT;
1627 break;
1628 }
1629
1630 err = xen_physdev_eoi_pirq(&eoi);
1631 if (!err && kvm_copy_to_gva(cs, arg, &eoi, sizeof(eoi))) {
1632 err = -EFAULT;
1633 }
1634 break;
1635 }
1636 case PHYSDEVOP_irq_status_query: {
1637 struct physdev_irq_status_query query;
1638
1639 qemu_build_assert(sizeof(query) == 8);
1640 if (kvm_copy_from_gva(cs, arg, &query, sizeof(query))) {
1641 err = -EFAULT;
1642 break;
1643 }
1644
1645 err = xen_physdev_query_pirq(&query);
1646 if (!err && kvm_copy_to_gva(cs, arg, &query, sizeof(query))) {
1647 err = -EFAULT;
1648 }
1649 break;
1650 }
1651 case PHYSDEVOP_get_free_pirq: {
1652 struct physdev_get_free_pirq get;
1653
1654 qemu_build_assert(sizeof(get) == 8);
1655 if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1656 err = -EFAULT;
1657 break;
1658 }
1659
1660 err = xen_physdev_get_free_pirq(&get);
1661 if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1662 err = -EFAULT;
1663 }
1664 break;
1665 }
1666 case PHYSDEVOP_pirq_eoi_gmfn_v2: /* FreeBSD 13 makes this hypercall */
1667 err = -ENOSYS;
1668 break;
1669
1670 default:
1671 return false;
1672 }
1673
1674 exit->u.hcall.result = err;
1675 return true;
1676}
1677
55a3f666
JM
1678static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1679{
1680 uint16_t code = exit->u.hcall.input;
1681
1682 if (exit->u.hcall.cpl > 0) {
1683 exit->u.hcall.result = -EPERM;
1684 return true;
1685 }
1686
1687 switch (code) {
b746a779
JM
1688 case __HYPERVISOR_set_timer_op:
1689 if (exit->u.hcall.longmode) {
1690 return kvm_xen_hcall_set_timer_op(exit, cpu,
1691 exit->u.hcall.params[0]);
1692 } else {
1693 /* In 32-bit mode, the 64-bit timer value is in two args. */
1694 uint64_t val = ((uint64_t)exit->u.hcall.params[1]) << 32 |
1695 (uint32_t)exit->u.hcall.params[0];
1696 return kvm_xen_hcall_set_timer_op(exit, cpu, val);
1697 }
28b7ae94
DW
1698 case __HYPERVISOR_grant_table_op:
1699 return kvm_xen_hcall_gnttab_op(exit, cpu, exit->u.hcall.params[0],
1700 exit->u.hcall.params[1],
1701 exit->u.hcall.params[2]);
79b7067d
JM
1702 case __HYPERVISOR_sched_op:
1703 return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
1704 exit->u.hcall.params[1]);
3b06f29b 1705 case __HYPERVISOR_event_channel_op:
4858ba20 1706 return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
3b06f29b 1707 exit->u.hcall.params[1]);
d70bd6a4
JM
1708 case __HYPERVISOR_vcpu_op:
1709 return kvm_xen_hcall_vcpu_op(exit, cpu,
1710 exit->u.hcall.params[0],
1711 exit->u.hcall.params[1],
1712 exit->u.hcall.params[2]);
671bfdcd
JM
1713 case __HYPERVISOR_hvm_op:
1714 return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
1715 exit->u.hcall.params[1]);
fb0fd2ce
JM
1716 case __HYPERVISOR_memory_op:
1717 return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
1718 exit->u.hcall.params[1]);
799c2354
DW
1719 case __HYPERVISOR_physdev_op:
1720 return kvm_xen_hcall_physdev_op(exit, cpu, exit->u.hcall.params[0],
1721 exit->u.hcall.params[1]);
bedcc139
JM
1722 case __HYPERVISOR_xen_version:
1723 return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
1724 exit->u.hcall.params[1]);
55a3f666
JM
1725 default:
1726 return false;
1727 }
1728}
1729
1730int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1731{
1732 if (exit->type != KVM_EXIT_XEN_HCALL) {
1733 return -1;
1734 }
1735
110a0ea5
DW
1736 /*
1737 * The kernel latches the guest 32/64 mode when the MSR is used to fill
1738 * the hypercall page. So if we see a hypercall in a mode that doesn't
1739 * match our own idea of the guest mode, fetch the kernel's idea of the
1740 * "long mode" to remain in sync.
1741 */
1742 if (exit->u.hcall.longmode != xen_is_long_mode()) {
1743 xen_sync_long_mode();
1744 }
1745
55a3f666
JM
1746 if (!do_kvm_xen_handle_exit(cpu, exit)) {
1747 /*
1748 * Some hypercalls will be deliberately "implemented" by returning
1749 * -ENOSYS. This case is for hypercalls which are unexpected.
1750 */
1751 exit->u.hcall.result = -ENOSYS;
1752 qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
1753 PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
1754 (uint64_t)exit->u.hcall.input,
1755 (uint64_t)exit->u.hcall.params[0],
1756 (uint64_t)exit->u.hcall.params[1],
1757 (uint64_t)exit->u.hcall.params[2]);
1758 }
1759
1760 trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
1761 exit->u.hcall.input, exit->u.hcall.params[0],
1762 exit->u.hcall.params[1], exit->u.hcall.params[2],
1763 exit->u.hcall.result);
1764 return 0;
1765}
c345104c 1766
6f43f2ee
DW
1767uint16_t kvm_xen_get_gnttab_max_frames(void)
1768{
1769 KVMState *s = KVM_STATE(current_accel());
1770 return s->xen_gnttab_max_frames;
1771}
1772
e16aff4c
DW
1773uint16_t kvm_xen_get_evtchn_max_pirq(void)
1774{
1775 KVMState *s = KVM_STATE(current_accel());
1776 return s->xen_evtchn_max_pirq;
1777}
1778
c345104c
JM
1779int kvm_put_xen_state(CPUState *cs)
1780{
1781 X86CPU *cpu = X86_CPU(cs);
1782 CPUX86State *env = &cpu->env;
1783 uint64_t gpa;
1784 int ret;
1785
1786 gpa = env->xen_vcpu_info_gpa;
1787 if (gpa == INVALID_GPA) {
1788 gpa = env->xen_vcpu_info_default_gpa;
1789 }
1790
1791 if (gpa != INVALID_GPA) {
27d4075d 1792 ret = set_vcpu_info(cs, gpa);
c345104c
JM
1793 if (ret < 0) {
1794 return ret;
1795 }
1796 }
1797
f0689302
JM
1798 gpa = env->xen_vcpu_time_info_gpa;
1799 if (gpa != INVALID_GPA) {
1800 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
1801 gpa);
1802 if (ret < 0) {
1803 return ret;
1804 }
1805 }
1806
5092db87
JM
1807 gpa = env->xen_vcpu_runstate_gpa;
1808 if (gpa != INVALID_GPA) {
1809 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
1810 gpa);
1811 if (ret < 0) {
1812 return ret;
1813 }
1814 }
1815
b746a779
JM
1816 if (env->xen_periodic_timer_period) {
1817 ret = do_set_periodic_timer(cs, env->xen_periodic_timer_period);
1818 if (ret < 0) {
1819 return ret;
1820 }
1821 }
1822
105b47fd 1823 if (!kvm_xen_has_cap(EVTCHN_SEND)) {
b746a779
JM
1824 /*
1825 * If the kernel has EVTCHN_SEND support then it handles timers too,
1826 * so the timer will be restored by kvm_xen_set_vcpu_timer() below.
1827 */
1828 if (env->xen_singleshot_timer_ns) {
1829 ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns,
1830 false, false);
1831 if (ret < 0) {
1832 return ret;
1833 }
1834 }
105b47fd
AA
1835 return 0;
1836 }
1837
1838 if (env->xen_vcpu_callback_vector) {
1839 ret = kvm_xen_set_vcpu_callback_vector(cs);
1840 if (ret < 0) {
1841 return ret;
1842 }
1843 }
1844
c723d4c1
DW
1845 if (env->xen_virq[VIRQ_TIMER]) {
1846 ret = kvm_xen_set_vcpu_timer(cs);
1847 if (ret < 0) {
1848 return ret;
1849 }
1850 }
c345104c
JM
1851 return 0;
1852}
1853
1854int kvm_get_xen_state(CPUState *cs)
1855{
1856 X86CPU *cpu = X86_CPU(cs);
1857 CPUX86State *env = &cpu->env;
1858 uint64_t gpa;
c723d4c1 1859 int ret;
c345104c
JM
1860
1861 /*
1862 * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1863 * to it. It's up to userspace to *assume* that any page shared thus is
1864 * always considered dirty. The shared_info page is different since it's
1865 * an overlay and migrated separately anyway.
1866 */
1867 gpa = env->xen_vcpu_info_gpa;
1868 if (gpa == INVALID_GPA) {
1869 gpa = env->xen_vcpu_info_default_gpa;
1870 }
1871 if (gpa != INVALID_GPA) {
1872 MemoryRegionSection mrs = memory_region_find(get_system_memory(),
1873 gpa,
1874 sizeof(struct vcpu_info));
1875 if (mrs.mr &&
1876 !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
1877 memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
1878 sizeof(struct vcpu_info));
1879 }
1880 }
1881
c723d4c1
DW
1882 if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1883 return 0;
1884 }
1885
1886 /*
1887 * If the kernel is accelerating timers, read out the current value of the
1888 * singleshot timer deadline.
1889 */
1890 if (env->xen_virq[VIRQ_TIMER]) {
1891 struct kvm_xen_vcpu_attr va = {
1892 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
1893 };
1894 ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
1895 if (ret < 0) {
1896 return ret;
1897 }
1898 env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
1899 }
1900
c345104c
JM
1901 return 0;
1902}