]> git.proxmox.com Git - mirror_qemu.git/blob - hw/hyperv/hyperv.c
Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2020-05-05' into staging
[mirror_qemu.git] / hw / hyperv / hyperv.c
1 /*
2 * Hyper-V guest/hypervisor interaction
3 *
4 * Copyright (c) 2015-2018 Virtuozzo International GmbH.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 */
9
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/module.h"
13 #include "qapi/error.h"
14 #include "exec/address-spaces.h"
15 #include "sysemu/kvm.h"
16 #include "qemu/bitops.h"
17 #include "qemu/error-report.h"
18 #include "qemu/lockable.h"
19 #include "qemu/queue.h"
20 #include "qemu/rcu.h"
21 #include "qemu/rcu_queue.h"
22 #include "hw/hyperv/hyperv.h"
23
24 typedef struct SynICState {
25 DeviceState parent_obj;
26
27 CPUState *cs;
28
29 bool enabled;
30 hwaddr msg_page_addr;
31 hwaddr event_page_addr;
32 MemoryRegion msg_page_mr;
33 MemoryRegion event_page_mr;
34 struct hyperv_message_page *msg_page;
35 struct hyperv_event_flags_page *event_page;
36 } SynICState;
37
38 #define TYPE_SYNIC "hyperv-synic"
39 #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
40
41 static SynICState *get_synic(CPUState *cs)
42 {
43 return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
44 }
45
46 static void synic_update(SynICState *synic, bool enable,
47 hwaddr msg_page_addr, hwaddr event_page_addr)
48 {
49
50 synic->enabled = enable;
51 if (synic->msg_page_addr != msg_page_addr) {
52 if (synic->msg_page_addr) {
53 memory_region_del_subregion(get_system_memory(),
54 &synic->msg_page_mr);
55 }
56 if (msg_page_addr) {
57 memory_region_add_subregion(get_system_memory(), msg_page_addr,
58 &synic->msg_page_mr);
59 }
60 synic->msg_page_addr = msg_page_addr;
61 }
62 if (synic->event_page_addr != event_page_addr) {
63 if (synic->event_page_addr) {
64 memory_region_del_subregion(get_system_memory(),
65 &synic->event_page_mr);
66 }
67 if (event_page_addr) {
68 memory_region_add_subregion(get_system_memory(), event_page_addr,
69 &synic->event_page_mr);
70 }
71 synic->event_page_addr = event_page_addr;
72 }
73 }
74
75 void hyperv_synic_update(CPUState *cs, bool enable,
76 hwaddr msg_page_addr, hwaddr event_page_addr)
77 {
78 SynICState *synic = get_synic(cs);
79
80 if (!synic) {
81 return;
82 }
83
84 synic_update(synic, enable, msg_page_addr, event_page_addr);
85 }
86
87 static void synic_realize(DeviceState *dev, Error **errp)
88 {
89 Object *obj = OBJECT(dev);
90 SynICState *synic = SYNIC(dev);
91 char *msgp_name, *eventp_name;
92 uint32_t vp_index;
93
94 /* memory region names have to be globally unique */
95 vp_index = hyperv_vp_index(synic->cs);
96 msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
97 eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
98
99 memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
100 sizeof(*synic->msg_page), &error_abort);
101 memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
102 sizeof(*synic->event_page), &error_abort);
103 synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
104 synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
105
106 g_free(msgp_name);
107 g_free(eventp_name);
108 }
109 static void synic_reset(DeviceState *dev)
110 {
111 SynICState *synic = SYNIC(dev);
112 memset(synic->msg_page, 0, sizeof(*synic->msg_page));
113 memset(synic->event_page, 0, sizeof(*synic->event_page));
114 synic_update(synic, false, 0, 0);
115 }
116
117 static void synic_class_init(ObjectClass *klass, void *data)
118 {
119 DeviceClass *dc = DEVICE_CLASS(klass);
120
121 dc->realize = synic_realize;
122 dc->reset = synic_reset;
123 dc->user_creatable = false;
124 }
125
126 void hyperv_synic_add(CPUState *cs)
127 {
128 Object *obj;
129 SynICState *synic;
130
131 obj = object_new(TYPE_SYNIC);
132 synic = SYNIC(obj);
133 synic->cs = cs;
134 object_property_add_child(OBJECT(cs), "synic", obj, &error_abort);
135 object_unref(obj);
136 object_property_set_bool(obj, true, "realized", &error_abort);
137 }
138
139 void hyperv_synic_reset(CPUState *cs)
140 {
141 SynICState *synic = get_synic(cs);
142
143 if (synic) {
144 device_legacy_reset(DEVICE(synic));
145 }
146 }
147
148 static const TypeInfo synic_type_info = {
149 .name = TYPE_SYNIC,
150 .parent = TYPE_DEVICE,
151 .instance_size = sizeof(SynICState),
152 .class_init = synic_class_init,
153 };
154
155 static void synic_register_types(void)
156 {
157 type_register_static(&synic_type_info);
158 }
159
160 type_init(synic_register_types)
161
162 /*
163 * KVM has its own message producers (SynIC timers). To guarantee
164 * serialization with both KVM vcpu and the guest cpu, the messages are first
165 * staged in an intermediate area and then posted to the SynIC message page in
166 * the vcpu thread.
167 */
168 typedef struct HvSintStagedMessage {
169 /* message content staged by hyperv_post_msg */
170 struct hyperv_message msg;
171 /* callback + data (r/o) to complete the processing in a BH */
172 HvSintMsgCb cb;
173 void *cb_data;
174 /* message posting status filled by cpu_post_msg */
175 int status;
176 /* passing the buck: */
177 enum {
178 /* initial state */
179 HV_STAGED_MSG_FREE,
180 /*
181 * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
182 * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
183 */
184 HV_STAGED_MSG_BUSY,
185 /*
186 * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
187 * notify the guest, records the status, marks the posting done (BUSY
188 * -> POSTED), and schedules sint_msg_bh BH
189 */
190 HV_STAGED_MSG_POSTED,
191 /*
192 * sint_msg_bh (BH) verifies that the posting is done, runs the
193 * callback, and starts over (POSTED -> FREE)
194 */
195 } state;
196 } HvSintStagedMessage;
197
198 struct HvSintRoute {
199 uint32_t sint;
200 SynICState *synic;
201 int gsi;
202 EventNotifier sint_set_notifier;
203 EventNotifier sint_ack_notifier;
204
205 HvSintStagedMessage *staged_msg;
206
207 unsigned refcount;
208 };
209
210 static CPUState *hyperv_find_vcpu(uint32_t vp_index)
211 {
212 CPUState *cs = qemu_get_cpu(vp_index);
213 assert(hyperv_vp_index(cs) == vp_index);
214 return cs;
215 }
216
217 /*
218 * BH to complete the processing of a staged message.
219 */
220 static void sint_msg_bh(void *opaque)
221 {
222 HvSintRoute *sint_route = opaque;
223 HvSintStagedMessage *staged_msg = sint_route->staged_msg;
224
225 if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
226 /* status nor ready yet (spurious ack from guest?), ignore */
227 return;
228 }
229
230 staged_msg->cb(staged_msg->cb_data, staged_msg->status);
231 staged_msg->status = 0;
232
233 /* staged message processing finished, ready to start over */
234 atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
235 /* drop the reference taken in hyperv_post_msg */
236 hyperv_sint_route_unref(sint_route);
237 }
238
239 /*
240 * Worker to transfer the message from the staging area into the SynIC message
241 * page in vcpu context.
242 */
243 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
244 {
245 HvSintRoute *sint_route = data.host_ptr;
246 HvSintStagedMessage *staged_msg = sint_route->staged_msg;
247 SynICState *synic = sint_route->synic;
248 struct hyperv_message *dst_msg;
249 bool wait_for_sint_ack = false;
250
251 assert(staged_msg->state == HV_STAGED_MSG_BUSY);
252
253 if (!synic->enabled || !synic->msg_page_addr) {
254 staged_msg->status = -ENXIO;
255 goto posted;
256 }
257
258 dst_msg = &synic->msg_page->slot[sint_route->sint];
259
260 if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
261 dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
262 staged_msg->status = -EAGAIN;
263 wait_for_sint_ack = true;
264 } else {
265 memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
266 staged_msg->status = hyperv_sint_route_set_sint(sint_route);
267 }
268
269 memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
270
271 posted:
272 atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
273 /*
274 * Notify the msg originator of the progress made; if the slot was busy we
275 * set msg_pending flag in it so it will be the guest who will do EOM and
276 * trigger the notification from KVM via sint_ack_notifier
277 */
278 if (!wait_for_sint_ack) {
279 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
280 sint_route);
281 }
282 }
283
284 /*
285 * Post a Hyper-V message to the staging area, for delivery to guest in the
286 * vcpu thread.
287 */
288 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
289 {
290 HvSintStagedMessage *staged_msg = sint_route->staged_msg;
291
292 assert(staged_msg);
293
294 /* grab the staging area */
295 if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
296 HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
297 return -EAGAIN;
298 }
299
300 memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
301
302 /* hold a reference on sint_route until the callback is finished */
303 hyperv_sint_route_ref(sint_route);
304
305 /* schedule message posting attempt in vcpu thread */
306 async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
307 RUN_ON_CPU_HOST_PTR(sint_route));
308 return 0;
309 }
310
311 static void sint_ack_handler(EventNotifier *notifier)
312 {
313 HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
314 sint_ack_notifier);
315 event_notifier_test_and_clear(notifier);
316
317 /*
318 * the guest consumed the previous message so complete the current one with
319 * -EAGAIN and let the msg originator retry
320 */
321 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
322 }
323
324 /*
325 * Set given event flag for a given sint on a given vcpu, and signal the sint.
326 */
327 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
328 {
329 int ret;
330 SynICState *synic = sint_route->synic;
331 unsigned long *flags, set_mask;
332 unsigned set_idx;
333
334 if (eventno > HV_EVENT_FLAGS_COUNT) {
335 return -EINVAL;
336 }
337 if (!synic->enabled || !synic->event_page_addr) {
338 return -ENXIO;
339 }
340
341 set_idx = BIT_WORD(eventno);
342 set_mask = BIT_MASK(eventno);
343 flags = synic->event_page->slot[sint_route->sint].flags;
344
345 if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
346 memory_region_set_dirty(&synic->event_page_mr, 0,
347 sizeof(*synic->event_page));
348 ret = hyperv_sint_route_set_sint(sint_route);
349 } else {
350 ret = 0;
351 }
352 return ret;
353 }
354
355 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
356 HvSintMsgCb cb, void *cb_data)
357 {
358 HvSintRoute *sint_route;
359 EventNotifier *ack_notifier;
360 int r, gsi;
361 CPUState *cs;
362 SynICState *synic;
363
364 cs = hyperv_find_vcpu(vp_index);
365 if (!cs) {
366 return NULL;
367 }
368
369 synic = get_synic(cs);
370 if (!synic) {
371 return NULL;
372 }
373
374 sint_route = g_new0(HvSintRoute, 1);
375 r = event_notifier_init(&sint_route->sint_set_notifier, false);
376 if (r) {
377 goto err;
378 }
379
380
381 ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
382 if (ack_notifier) {
383 sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
384 sint_route->staged_msg->cb = cb;
385 sint_route->staged_msg->cb_data = cb_data;
386
387 r = event_notifier_init(ack_notifier, false);
388 if (r) {
389 goto err_sint_set_notifier;
390 }
391
392 event_notifier_set_handler(ack_notifier, sint_ack_handler);
393 }
394
395 gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
396 if (gsi < 0) {
397 goto err_gsi;
398 }
399
400 r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
401 &sint_route->sint_set_notifier,
402 ack_notifier, gsi);
403 if (r) {
404 goto err_irqfd;
405 }
406 sint_route->gsi = gsi;
407 sint_route->synic = synic;
408 sint_route->sint = sint;
409 sint_route->refcount = 1;
410
411 return sint_route;
412
413 err_irqfd:
414 kvm_irqchip_release_virq(kvm_state, gsi);
415 err_gsi:
416 if (ack_notifier) {
417 event_notifier_set_handler(ack_notifier, NULL);
418 event_notifier_cleanup(ack_notifier);
419 g_free(sint_route->staged_msg);
420 }
421 err_sint_set_notifier:
422 event_notifier_cleanup(&sint_route->sint_set_notifier);
423 err:
424 g_free(sint_route);
425
426 return NULL;
427 }
428
429 void hyperv_sint_route_ref(HvSintRoute *sint_route)
430 {
431 sint_route->refcount++;
432 }
433
434 void hyperv_sint_route_unref(HvSintRoute *sint_route)
435 {
436 if (!sint_route) {
437 return;
438 }
439
440 assert(sint_route->refcount > 0);
441
442 if (--sint_route->refcount) {
443 return;
444 }
445
446 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
447 &sint_route->sint_set_notifier,
448 sint_route->gsi);
449 kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
450 if (sint_route->staged_msg) {
451 event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
452 event_notifier_cleanup(&sint_route->sint_ack_notifier);
453 g_free(sint_route->staged_msg);
454 }
455 event_notifier_cleanup(&sint_route->sint_set_notifier);
456 g_free(sint_route);
457 }
458
459 int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
460 {
461 return event_notifier_set(&sint_route->sint_set_notifier);
462 }
463
464 typedef struct MsgHandler {
465 struct rcu_head rcu;
466 QLIST_ENTRY(MsgHandler) link;
467 uint32_t conn_id;
468 HvMsgHandler handler;
469 void *data;
470 } MsgHandler;
471
472 typedef struct EventFlagHandler {
473 struct rcu_head rcu;
474 QLIST_ENTRY(EventFlagHandler) link;
475 uint32_t conn_id;
476 EventNotifier *notifier;
477 } EventFlagHandler;
478
479 static QLIST_HEAD(, MsgHandler) msg_handlers;
480 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
481 static QemuMutex handlers_mutex;
482
483 static void __attribute__((constructor)) hv_init(void)
484 {
485 QLIST_INIT(&msg_handlers);
486 QLIST_INIT(&event_flag_handlers);
487 qemu_mutex_init(&handlers_mutex);
488 }
489
490 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
491 {
492 int ret;
493 MsgHandler *mh;
494
495 QEMU_LOCK_GUARD(&handlers_mutex);
496 QLIST_FOREACH(mh, &msg_handlers, link) {
497 if (mh->conn_id == conn_id) {
498 if (handler) {
499 ret = -EEXIST;
500 } else {
501 QLIST_REMOVE_RCU(mh, link);
502 g_free_rcu(mh, rcu);
503 ret = 0;
504 }
505 return ret;
506 }
507 }
508
509 if (handler) {
510 mh = g_new(MsgHandler, 1);
511 mh->conn_id = conn_id;
512 mh->handler = handler;
513 mh->data = data;
514 QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
515 ret = 0;
516 } else {
517 ret = -ENOENT;
518 }
519
520 return ret;
521 }
522
523 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
524 {
525 uint16_t ret;
526 hwaddr len;
527 struct hyperv_post_message_input *msg;
528 MsgHandler *mh;
529
530 if (fast) {
531 return HV_STATUS_INVALID_HYPERCALL_CODE;
532 }
533 if (param & (__alignof__(*msg) - 1)) {
534 return HV_STATUS_INVALID_ALIGNMENT;
535 }
536
537 len = sizeof(*msg);
538 msg = cpu_physical_memory_map(param, &len, 0);
539 if (len < sizeof(*msg)) {
540 ret = HV_STATUS_INSUFFICIENT_MEMORY;
541 goto unmap;
542 }
543 if (msg->payload_size > sizeof(msg->payload)) {
544 ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
545 goto unmap;
546 }
547
548 ret = HV_STATUS_INVALID_CONNECTION_ID;
549 WITH_RCU_READ_LOCK_GUARD() {
550 QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
551 if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
552 ret = mh->handler(msg, mh->data);
553 break;
554 }
555 }
556 }
557
558 unmap:
559 cpu_physical_memory_unmap(msg, len, 0, 0);
560 return ret;
561 }
562
563 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
564 {
565 int ret;
566 EventFlagHandler *handler;
567
568 QEMU_LOCK_GUARD(&handlers_mutex);
569 QLIST_FOREACH(handler, &event_flag_handlers, link) {
570 if (handler->conn_id == conn_id) {
571 if (notifier) {
572 ret = -EEXIST;
573 } else {
574 QLIST_REMOVE_RCU(handler, link);
575 g_free_rcu(handler, rcu);
576 ret = 0;
577 }
578 return ret;
579 }
580 }
581
582 if (notifier) {
583 handler = g_new(EventFlagHandler, 1);
584 handler->conn_id = conn_id;
585 handler->notifier = notifier;
586 QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
587 ret = 0;
588 } else {
589 ret = -ENOENT;
590 }
591
592 return ret;
593 }
594
595 static bool process_event_flags_userspace;
596
597 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
598 {
599 if (!process_event_flags_userspace &&
600 !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
601 process_event_flags_userspace = true;
602
603 warn_report("Hyper-V event signaling is not supported by this kernel; "
604 "using slower userspace hypercall processing");
605 }
606
607 if (!process_event_flags_userspace) {
608 struct kvm_hyperv_eventfd hvevfd = {
609 .conn_id = conn_id,
610 .fd = notifier ? event_notifier_get_fd(notifier) : -1,
611 .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
612 };
613
614 return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
615 }
616 return set_event_flag_handler(conn_id, notifier);
617 }
618
619 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
620 {
621 EventFlagHandler *handler;
622
623 if (unlikely(!fast)) {
624 hwaddr addr = param;
625
626 if (addr & (__alignof__(addr) - 1)) {
627 return HV_STATUS_INVALID_ALIGNMENT;
628 }
629
630 param = ldq_phys(&address_space_memory, addr);
631 }
632
633 /*
634 * Per spec, bits 32-47 contain the extra "flag number". However, we
635 * have no use for it, and in all known usecases it is zero, so just
636 * report lookup failure if it isn't.
637 */
638 if (param & 0xffff00000000ULL) {
639 return HV_STATUS_INVALID_PORT_ID;
640 }
641 /* remaining bits are reserved-zero */
642 if (param & ~HV_CONNECTION_ID_MASK) {
643 return HV_STATUS_INVALID_HYPERCALL_INPUT;
644 }
645
646 RCU_READ_LOCK_GUARD();
647 QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
648 if (handler->conn_id == param) {
649 event_notifier_set(handler->notifier);
650 return 0;
651 }
652 }
653 return HV_STATUS_INVALID_CONNECTION_ID;
654 }