2 * Hyper-V guest/hypervisor interaction
4 * Copyright (c) 2015-2018 Virtuozzo International GmbH.
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/module.h"
13 #include "qapi/error.h"
14 #include "exec/address-spaces.h"
15 #include "sysemu/kvm.h"
16 #include "qemu/bitops.h"
17 #include "qemu/error-report.h"
18 #include "qemu/lockable.h"
19 #include "qemu/queue.h"
21 #include "qemu/rcu_queue.h"
22 #include "hw/hyperv/hyperv.h"
23 #include "qom/object.h"
26 DeviceState parent_obj
;
32 hwaddr event_page_addr
;
33 MemoryRegion msg_page_mr
;
34 MemoryRegion event_page_mr
;
35 struct hyperv_message_page
*msg_page
;
36 struct hyperv_event_flags_page
*event_page
;
38 typedef struct SynICState SynICState
;
40 #define TYPE_SYNIC "hyperv-synic"
41 DECLARE_INSTANCE_CHECKER(SynICState
, SYNIC
,
44 static bool synic_enabled
;
46 bool hyperv_is_synic_enabled(void)
51 static SynICState
*get_synic(CPUState
*cs
)
53 return SYNIC(object_resolve_path_component(OBJECT(cs
), "synic"));
56 static void synic_update(SynICState
*synic
, bool enable
,
57 hwaddr msg_page_addr
, hwaddr event_page_addr
)
60 synic
->enabled
= enable
;
61 if (synic
->msg_page_addr
!= msg_page_addr
) {
62 if (synic
->msg_page_addr
) {
63 memory_region_del_subregion(get_system_memory(),
67 memory_region_add_subregion(get_system_memory(), msg_page_addr
,
70 synic
->msg_page_addr
= msg_page_addr
;
72 if (synic
->event_page_addr
!= event_page_addr
) {
73 if (synic
->event_page_addr
) {
74 memory_region_del_subregion(get_system_memory(),
75 &synic
->event_page_mr
);
77 if (event_page_addr
) {
78 memory_region_add_subregion(get_system_memory(), event_page_addr
,
79 &synic
->event_page_mr
);
81 synic
->event_page_addr
= event_page_addr
;
85 void hyperv_synic_update(CPUState
*cs
, bool enable
,
86 hwaddr msg_page_addr
, hwaddr event_page_addr
)
88 SynICState
*synic
= get_synic(cs
);
94 synic_update(synic
, enable
, msg_page_addr
, event_page_addr
);
97 static void synic_realize(DeviceState
*dev
, Error
**errp
)
99 Object
*obj
= OBJECT(dev
);
100 SynICState
*synic
= SYNIC(dev
);
101 char *msgp_name
, *eventp_name
;
104 /* memory region names have to be globally unique */
105 vp_index
= hyperv_vp_index(synic
->cs
);
106 msgp_name
= g_strdup_printf("synic-%u-msg-page", vp_index
);
107 eventp_name
= g_strdup_printf("synic-%u-event-page", vp_index
);
109 memory_region_init_ram(&synic
->msg_page_mr
, obj
, msgp_name
,
110 sizeof(*synic
->msg_page
), &error_abort
);
111 memory_region_init_ram(&synic
->event_page_mr
, obj
, eventp_name
,
112 sizeof(*synic
->event_page
), &error_abort
);
113 synic
->msg_page
= memory_region_get_ram_ptr(&synic
->msg_page_mr
);
114 synic
->event_page
= memory_region_get_ram_ptr(&synic
->event_page_mr
);
119 static void synic_reset(DeviceState
*dev
)
121 SynICState
*synic
= SYNIC(dev
);
122 memset(synic
->msg_page
, 0, sizeof(*synic
->msg_page
));
123 memset(synic
->event_page
, 0, sizeof(*synic
->event_page
));
124 synic_update(synic
, false, 0, 0);
127 static void synic_class_init(ObjectClass
*klass
, void *data
)
129 DeviceClass
*dc
= DEVICE_CLASS(klass
);
131 dc
->realize
= synic_realize
;
132 dc
->reset
= synic_reset
;
133 dc
->user_creatable
= false;
136 void hyperv_synic_add(CPUState
*cs
)
141 obj
= object_new(TYPE_SYNIC
);
144 object_property_add_child(OBJECT(cs
), "synic", obj
);
146 qdev_realize(DEVICE(obj
), NULL
, &error_abort
);
147 synic_enabled
= true;
150 void hyperv_synic_reset(CPUState
*cs
)
152 SynICState
*synic
= get_synic(cs
);
155 device_legacy_reset(DEVICE(synic
));
159 static const TypeInfo synic_type_info
= {
161 .parent
= TYPE_DEVICE
,
162 .instance_size
= sizeof(SynICState
),
163 .class_init
= synic_class_init
,
166 static void synic_register_types(void)
168 type_register_static(&synic_type_info
);
171 type_init(synic_register_types
)
174 * KVM has its own message producers (SynIC timers). To guarantee
175 * serialization with both KVM vcpu and the guest cpu, the messages are first
176 * staged in an intermediate area and then posted to the SynIC message page in
179 typedef struct HvSintStagedMessage
{
180 /* message content staged by hyperv_post_msg */
181 struct hyperv_message msg
;
182 /* callback + data (r/o) to complete the processing in a BH */
185 /* message posting status filled by cpu_post_msg */
187 /* passing the buck: */
192 * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
193 * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
197 * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
198 * notify the guest, records the status, marks the posting done (BUSY
199 * -> POSTED), and schedules sint_msg_bh BH
201 HV_STAGED_MSG_POSTED
,
203 * sint_msg_bh (BH) verifies that the posting is done, runs the
204 * callback, and starts over (POSTED -> FREE)
207 } HvSintStagedMessage
;
213 EventNotifier sint_set_notifier
;
214 EventNotifier sint_ack_notifier
;
216 HvSintStagedMessage
*staged_msg
;
221 static CPUState
*hyperv_find_vcpu(uint32_t vp_index
)
223 CPUState
*cs
= qemu_get_cpu(vp_index
);
224 assert(hyperv_vp_index(cs
) == vp_index
);
229 * BH to complete the processing of a staged message.
231 static void sint_msg_bh(void *opaque
)
233 HvSintRoute
*sint_route
= opaque
;
234 HvSintStagedMessage
*staged_msg
= sint_route
->staged_msg
;
236 if (atomic_read(&staged_msg
->state
) != HV_STAGED_MSG_POSTED
) {
237 /* status nor ready yet (spurious ack from guest?), ignore */
241 staged_msg
->cb(staged_msg
->cb_data
, staged_msg
->status
);
242 staged_msg
->status
= 0;
244 /* staged message processing finished, ready to start over */
245 atomic_set(&staged_msg
->state
, HV_STAGED_MSG_FREE
);
246 /* drop the reference taken in hyperv_post_msg */
247 hyperv_sint_route_unref(sint_route
);
251 * Worker to transfer the message from the staging area into the SynIC message
252 * page in vcpu context.
254 static void cpu_post_msg(CPUState
*cs
, run_on_cpu_data data
)
256 HvSintRoute
*sint_route
= data
.host_ptr
;
257 HvSintStagedMessage
*staged_msg
= sint_route
->staged_msg
;
258 SynICState
*synic
= sint_route
->synic
;
259 struct hyperv_message
*dst_msg
;
260 bool wait_for_sint_ack
= false;
262 assert(staged_msg
->state
== HV_STAGED_MSG_BUSY
);
264 if (!synic
->enabled
|| !synic
->msg_page_addr
) {
265 staged_msg
->status
= -ENXIO
;
269 dst_msg
= &synic
->msg_page
->slot
[sint_route
->sint
];
271 if (dst_msg
->header
.message_type
!= HV_MESSAGE_NONE
) {
272 dst_msg
->header
.message_flags
|= HV_MESSAGE_FLAG_PENDING
;
273 staged_msg
->status
= -EAGAIN
;
274 wait_for_sint_ack
= true;
276 memcpy(dst_msg
, &staged_msg
->msg
, sizeof(*dst_msg
));
277 staged_msg
->status
= hyperv_sint_route_set_sint(sint_route
);
280 memory_region_set_dirty(&synic
->msg_page_mr
, 0, sizeof(*synic
->msg_page
));
283 atomic_set(&staged_msg
->state
, HV_STAGED_MSG_POSTED
);
285 * Notify the msg originator of the progress made; if the slot was busy we
286 * set msg_pending flag in it so it will be the guest who will do EOM and
287 * trigger the notification from KVM via sint_ack_notifier
289 if (!wait_for_sint_ack
) {
290 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh
,
296 * Post a Hyper-V message to the staging area, for delivery to guest in the
299 int hyperv_post_msg(HvSintRoute
*sint_route
, struct hyperv_message
*src_msg
)
301 HvSintStagedMessage
*staged_msg
= sint_route
->staged_msg
;
305 /* grab the staging area */
306 if (atomic_cmpxchg(&staged_msg
->state
, HV_STAGED_MSG_FREE
,
307 HV_STAGED_MSG_BUSY
) != HV_STAGED_MSG_FREE
) {
311 memcpy(&staged_msg
->msg
, src_msg
, sizeof(*src_msg
));
313 /* hold a reference on sint_route until the callback is finished */
314 hyperv_sint_route_ref(sint_route
);
316 /* schedule message posting attempt in vcpu thread */
317 async_run_on_cpu(sint_route
->synic
->cs
, cpu_post_msg
,
318 RUN_ON_CPU_HOST_PTR(sint_route
));
322 static void sint_ack_handler(EventNotifier
*notifier
)
324 HvSintRoute
*sint_route
= container_of(notifier
, HvSintRoute
,
326 event_notifier_test_and_clear(notifier
);
329 * the guest consumed the previous message so complete the current one with
330 * -EAGAIN and let the msg originator retry
332 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh
, sint_route
);
336 * Set given event flag for a given sint on a given vcpu, and signal the sint.
338 int hyperv_set_event_flag(HvSintRoute
*sint_route
, unsigned eventno
)
341 SynICState
*synic
= sint_route
->synic
;
342 unsigned long *flags
, set_mask
;
345 if (eventno
> HV_EVENT_FLAGS_COUNT
) {
348 if (!synic
->enabled
|| !synic
->event_page_addr
) {
352 set_idx
= BIT_WORD(eventno
);
353 set_mask
= BIT_MASK(eventno
);
354 flags
= synic
->event_page
->slot
[sint_route
->sint
].flags
;
356 if ((atomic_fetch_or(&flags
[set_idx
], set_mask
) & set_mask
) != set_mask
) {
357 memory_region_set_dirty(&synic
->event_page_mr
, 0,
358 sizeof(*synic
->event_page
));
359 ret
= hyperv_sint_route_set_sint(sint_route
);
366 HvSintRoute
*hyperv_sint_route_new(uint32_t vp_index
, uint32_t sint
,
367 HvSintMsgCb cb
, void *cb_data
)
369 HvSintRoute
*sint_route
;
370 EventNotifier
*ack_notifier
;
375 cs
= hyperv_find_vcpu(vp_index
);
380 synic
= get_synic(cs
);
385 sint_route
= g_new0(HvSintRoute
, 1);
386 r
= event_notifier_init(&sint_route
->sint_set_notifier
, false);
392 ack_notifier
= cb
? &sint_route
->sint_ack_notifier
: NULL
;
394 sint_route
->staged_msg
= g_new0(HvSintStagedMessage
, 1);
395 sint_route
->staged_msg
->cb
= cb
;
396 sint_route
->staged_msg
->cb_data
= cb_data
;
398 r
= event_notifier_init(ack_notifier
, false);
400 goto err_sint_set_notifier
;
403 event_notifier_set_handler(ack_notifier
, sint_ack_handler
);
406 gsi
= kvm_irqchip_add_hv_sint_route(kvm_state
, vp_index
, sint
);
411 r
= kvm_irqchip_add_irqfd_notifier_gsi(kvm_state
,
412 &sint_route
->sint_set_notifier
,
417 sint_route
->gsi
= gsi
;
418 sint_route
->synic
= synic
;
419 sint_route
->sint
= sint
;
420 sint_route
->refcount
= 1;
425 kvm_irqchip_release_virq(kvm_state
, gsi
);
428 event_notifier_set_handler(ack_notifier
, NULL
);
429 event_notifier_cleanup(ack_notifier
);
430 g_free(sint_route
->staged_msg
);
432 err_sint_set_notifier
:
433 event_notifier_cleanup(&sint_route
->sint_set_notifier
);
440 void hyperv_sint_route_ref(HvSintRoute
*sint_route
)
442 sint_route
->refcount
++;
445 void hyperv_sint_route_unref(HvSintRoute
*sint_route
)
451 assert(sint_route
->refcount
> 0);
453 if (--sint_route
->refcount
) {
457 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state
,
458 &sint_route
->sint_set_notifier
,
460 kvm_irqchip_release_virq(kvm_state
, sint_route
->gsi
);
461 if (sint_route
->staged_msg
) {
462 event_notifier_set_handler(&sint_route
->sint_ack_notifier
, NULL
);
463 event_notifier_cleanup(&sint_route
->sint_ack_notifier
);
464 g_free(sint_route
->staged_msg
);
466 event_notifier_cleanup(&sint_route
->sint_set_notifier
);
470 int hyperv_sint_route_set_sint(HvSintRoute
*sint_route
)
472 return event_notifier_set(&sint_route
->sint_set_notifier
);
475 typedef struct MsgHandler
{
477 QLIST_ENTRY(MsgHandler
) link
;
479 HvMsgHandler handler
;
483 typedef struct EventFlagHandler
{
485 QLIST_ENTRY(EventFlagHandler
) link
;
487 EventNotifier
*notifier
;
490 static QLIST_HEAD(, MsgHandler
) msg_handlers
;
491 static QLIST_HEAD(, EventFlagHandler
) event_flag_handlers
;
492 static QemuMutex handlers_mutex
;
494 static void __attribute__((constructor
)) hv_init(void)
496 QLIST_INIT(&msg_handlers
);
497 QLIST_INIT(&event_flag_handlers
);
498 qemu_mutex_init(&handlers_mutex
);
501 int hyperv_set_msg_handler(uint32_t conn_id
, HvMsgHandler handler
, void *data
)
506 QEMU_LOCK_GUARD(&handlers_mutex
);
507 QLIST_FOREACH(mh
, &msg_handlers
, link
) {
508 if (mh
->conn_id
== conn_id
) {
512 QLIST_REMOVE_RCU(mh
, link
);
521 mh
= g_new(MsgHandler
, 1);
522 mh
->conn_id
= conn_id
;
523 mh
->handler
= handler
;
525 QLIST_INSERT_HEAD_RCU(&msg_handlers
, mh
, link
);
534 uint16_t hyperv_hcall_post_message(uint64_t param
, bool fast
)
538 struct hyperv_post_message_input
*msg
;
542 return HV_STATUS_INVALID_HYPERCALL_CODE
;
544 if (param
& (__alignof__(*msg
) - 1)) {
545 return HV_STATUS_INVALID_ALIGNMENT
;
549 msg
= cpu_physical_memory_map(param
, &len
, 0);
550 if (len
< sizeof(*msg
)) {
551 ret
= HV_STATUS_INSUFFICIENT_MEMORY
;
554 if (msg
->payload_size
> sizeof(msg
->payload
)) {
555 ret
= HV_STATUS_INVALID_HYPERCALL_INPUT
;
559 ret
= HV_STATUS_INVALID_CONNECTION_ID
;
560 WITH_RCU_READ_LOCK_GUARD() {
561 QLIST_FOREACH_RCU(mh
, &msg_handlers
, link
) {
562 if (mh
->conn_id
== (msg
->connection_id
& HV_CONNECTION_ID_MASK
)) {
563 ret
= mh
->handler(msg
, mh
->data
);
570 cpu_physical_memory_unmap(msg
, len
, 0, 0);
574 static int set_event_flag_handler(uint32_t conn_id
, EventNotifier
*notifier
)
577 EventFlagHandler
*handler
;
579 QEMU_LOCK_GUARD(&handlers_mutex
);
580 QLIST_FOREACH(handler
, &event_flag_handlers
, link
) {
581 if (handler
->conn_id
== conn_id
) {
585 QLIST_REMOVE_RCU(handler
, link
);
586 g_free_rcu(handler
, rcu
);
594 handler
= g_new(EventFlagHandler
, 1);
595 handler
->conn_id
= conn_id
;
596 handler
->notifier
= notifier
;
597 QLIST_INSERT_HEAD_RCU(&event_flag_handlers
, handler
, link
);
606 static bool process_event_flags_userspace
;
608 int hyperv_set_event_flag_handler(uint32_t conn_id
, EventNotifier
*notifier
)
610 if (!process_event_flags_userspace
&&
611 !kvm_check_extension(kvm_state
, KVM_CAP_HYPERV_EVENTFD
)) {
612 process_event_flags_userspace
= true;
614 warn_report("Hyper-V event signaling is not supported by this kernel; "
615 "using slower userspace hypercall processing");
618 if (!process_event_flags_userspace
) {
619 struct kvm_hyperv_eventfd hvevfd
= {
621 .fd
= notifier
? event_notifier_get_fd(notifier
) : -1,
622 .flags
= notifier
? 0 : KVM_HYPERV_EVENTFD_DEASSIGN
,
625 return kvm_vm_ioctl(kvm_state
, KVM_HYPERV_EVENTFD
, &hvevfd
);
627 return set_event_flag_handler(conn_id
, notifier
);
630 uint16_t hyperv_hcall_signal_event(uint64_t param
, bool fast
)
632 EventFlagHandler
*handler
;
634 if (unlikely(!fast
)) {
637 if (addr
& (__alignof__(addr
) - 1)) {
638 return HV_STATUS_INVALID_ALIGNMENT
;
641 param
= ldq_phys(&address_space_memory
, addr
);
645 * Per spec, bits 32-47 contain the extra "flag number". However, we
646 * have no use for it, and in all known usecases it is zero, so just
647 * report lookup failure if it isn't.
649 if (param
& 0xffff00000000ULL
) {
650 return HV_STATUS_INVALID_PORT_ID
;
652 /* remaining bits are reserved-zero */
653 if (param
& ~HV_CONNECTION_ID_MASK
) {
654 return HV_STATUS_INVALID_HYPERCALL_INPUT
;
657 RCU_READ_LOCK_GUARD();
658 QLIST_FOREACH_RCU(handler
, &event_flag_handlers
, link
) {
659 if (handler
->conn_id
== param
) {
660 event_notifier_set(handler
->notifier
);
664 return HV_STATUS_INVALID_CONNECTION_ID
;