]> git.proxmox.com Git - mirror_qemu.git/blob - hw/hyperv/hyperv.c
Merge remote-tracking branch 'remotes/ehabkost/tags/x86-for-3.1-pull-request' into...
[mirror_qemu.git] / hw / hyperv / hyperv.c
1 /*
2 * Hyper-V guest/hypervisor interaction
3 *
4 * Copyright (c) 2015-2018 Virtuozzo International GmbH.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 */
9
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qapi/error.h"
13 #include "exec/address-spaces.h"
14 #include "sysemu/kvm.h"
15 #include "qemu/bitops.h"
16 #include "qemu/error-report.h"
17 #include "qemu/queue.h"
18 #include "qemu/rcu.h"
19 #include "qemu/rcu_queue.h"
20 #include "hw/hyperv/hyperv.h"
21
22 typedef struct SynICState {
23 DeviceState parent_obj;
24
25 CPUState *cs;
26
27 bool enabled;
28 hwaddr msg_page_addr;
29 hwaddr event_page_addr;
30 MemoryRegion msg_page_mr;
31 MemoryRegion event_page_mr;
32 struct hyperv_message_page *msg_page;
33 struct hyperv_event_flags_page *event_page;
34 } SynICState;
35
36 #define TYPE_SYNIC "hyperv-synic"
37 #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
38
39 static SynICState *get_synic(CPUState *cs)
40 {
41 return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
42 }
43
44 static void synic_update(SynICState *synic, bool enable,
45 hwaddr msg_page_addr, hwaddr event_page_addr)
46 {
47
48 synic->enabled = enable;
49 if (synic->msg_page_addr != msg_page_addr) {
50 if (synic->msg_page_addr) {
51 memory_region_del_subregion(get_system_memory(),
52 &synic->msg_page_mr);
53 }
54 if (msg_page_addr) {
55 memory_region_add_subregion(get_system_memory(), msg_page_addr,
56 &synic->msg_page_mr);
57 }
58 synic->msg_page_addr = msg_page_addr;
59 }
60 if (synic->event_page_addr != event_page_addr) {
61 if (synic->event_page_addr) {
62 memory_region_del_subregion(get_system_memory(),
63 &synic->event_page_mr);
64 }
65 if (event_page_addr) {
66 memory_region_add_subregion(get_system_memory(), event_page_addr,
67 &synic->event_page_mr);
68 }
69 synic->event_page_addr = event_page_addr;
70 }
71 }
72
73 void hyperv_synic_update(CPUState *cs, bool enable,
74 hwaddr msg_page_addr, hwaddr event_page_addr)
75 {
76 SynICState *synic = get_synic(cs);
77
78 if (!synic) {
79 return;
80 }
81
82 synic_update(synic, enable, msg_page_addr, event_page_addr);
83 }
84
85 static void synic_realize(DeviceState *dev, Error **errp)
86 {
87 Object *obj = OBJECT(dev);
88 SynICState *synic = SYNIC(dev);
89 char *msgp_name, *eventp_name;
90 uint32_t vp_index;
91
92 /* memory region names have to be globally unique */
93 vp_index = hyperv_vp_index(synic->cs);
94 msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
95 eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
96
97 memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
98 sizeof(*synic->msg_page), &error_abort);
99 memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
100 sizeof(*synic->event_page), &error_abort);
101 synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
102 synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
103
104 g_free(msgp_name);
105 g_free(eventp_name);
106 }
107 static void synic_reset(DeviceState *dev)
108 {
109 SynICState *synic = SYNIC(dev);
110 memset(synic->msg_page, 0, sizeof(*synic->msg_page));
111 memset(synic->event_page, 0, sizeof(*synic->event_page));
112 synic_update(synic, false, 0, 0);
113 }
114
115 static void synic_class_init(ObjectClass *klass, void *data)
116 {
117 DeviceClass *dc = DEVICE_CLASS(klass);
118
119 dc->realize = synic_realize;
120 dc->reset = synic_reset;
121 dc->user_creatable = false;
122 }
123
124 void hyperv_synic_add(CPUState *cs)
125 {
126 Object *obj;
127 SynICState *synic;
128
129 obj = object_new(TYPE_SYNIC);
130 synic = SYNIC(obj);
131 synic->cs = cs;
132 object_property_add_child(OBJECT(cs), "synic", obj, &error_abort);
133 object_unref(obj);
134 object_property_set_bool(obj, true, "realized", &error_abort);
135 }
136
137 void hyperv_synic_reset(CPUState *cs)
138 {
139 SynICState *synic = get_synic(cs);
140
141 if (synic) {
142 device_reset(DEVICE(synic));
143 }
144 }
145
146 static const TypeInfo synic_type_info = {
147 .name = TYPE_SYNIC,
148 .parent = TYPE_DEVICE,
149 .instance_size = sizeof(SynICState),
150 .class_init = synic_class_init,
151 };
152
153 static void synic_register_types(void)
154 {
155 type_register_static(&synic_type_info);
156 }
157
158 type_init(synic_register_types)
159
160 /*
161 * KVM has its own message producers (SynIC timers). To guarantee
162 * serialization with both KVM vcpu and the guest cpu, the messages are first
163 * staged in an intermediate area and then posted to the SynIC message page in
164 * the vcpu thread.
165 */
166 typedef struct HvSintStagedMessage {
167 /* message content staged by hyperv_post_msg */
168 struct hyperv_message msg;
169 /* callback + data (r/o) to complete the processing in a BH */
170 HvSintMsgCb cb;
171 void *cb_data;
172 /* message posting status filled by cpu_post_msg */
173 int status;
174 /* passing the buck: */
175 enum {
176 /* initial state */
177 HV_STAGED_MSG_FREE,
178 /*
179 * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
180 * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
181 */
182 HV_STAGED_MSG_BUSY,
183 /*
184 * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
185 * notify the guest, records the status, marks the posting done (BUSY
186 * -> POSTED), and schedules sint_msg_bh BH
187 */
188 HV_STAGED_MSG_POSTED,
189 /*
190 * sint_msg_bh (BH) verifies that the posting is done, runs the
191 * callback, and starts over (POSTED -> FREE)
192 */
193 } state;
194 } HvSintStagedMessage;
195
196 struct HvSintRoute {
197 uint32_t sint;
198 SynICState *synic;
199 int gsi;
200 EventNotifier sint_set_notifier;
201 EventNotifier sint_ack_notifier;
202
203 HvSintStagedMessage *staged_msg;
204
205 unsigned refcount;
206 };
207
208 static CPUState *hyperv_find_vcpu(uint32_t vp_index)
209 {
210 CPUState *cs = qemu_get_cpu(vp_index);
211 assert(hyperv_vp_index(cs) == vp_index);
212 return cs;
213 }
214
215 /*
216 * BH to complete the processing of a staged message.
217 */
218 static void sint_msg_bh(void *opaque)
219 {
220 HvSintRoute *sint_route = opaque;
221 HvSintStagedMessage *staged_msg = sint_route->staged_msg;
222
223 if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
224 /* status nor ready yet (spurious ack from guest?), ignore */
225 return;
226 }
227
228 staged_msg->cb(staged_msg->cb_data, staged_msg->status);
229 staged_msg->status = 0;
230
231 /* staged message processing finished, ready to start over */
232 atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
233 /* drop the reference taken in hyperv_post_msg */
234 hyperv_sint_route_unref(sint_route);
235 }
236
237 /*
238 * Worker to transfer the message from the staging area into the SynIC message
239 * page in vcpu context.
240 */
241 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
242 {
243 HvSintRoute *sint_route = data.host_ptr;
244 HvSintStagedMessage *staged_msg = sint_route->staged_msg;
245 SynICState *synic = sint_route->synic;
246 struct hyperv_message *dst_msg;
247 bool wait_for_sint_ack = false;
248
249 assert(staged_msg->state == HV_STAGED_MSG_BUSY);
250
251 if (!synic->enabled || !synic->msg_page_addr) {
252 staged_msg->status = -ENXIO;
253 goto posted;
254 }
255
256 dst_msg = &synic->msg_page->slot[sint_route->sint];
257
258 if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
259 dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
260 staged_msg->status = -EAGAIN;
261 wait_for_sint_ack = true;
262 } else {
263 memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
264 staged_msg->status = hyperv_sint_route_set_sint(sint_route);
265 }
266
267 memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
268
269 posted:
270 atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
271 /*
272 * Notify the msg originator of the progress made; if the slot was busy we
273 * set msg_pending flag in it so it will be the guest who will do EOM and
274 * trigger the notification from KVM via sint_ack_notifier
275 */
276 if (!wait_for_sint_ack) {
277 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
278 sint_route);
279 }
280 }
281
282 /*
283 * Post a Hyper-V message to the staging area, for delivery to guest in the
284 * vcpu thread.
285 */
286 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
287 {
288 HvSintStagedMessage *staged_msg = sint_route->staged_msg;
289
290 assert(staged_msg);
291
292 /* grab the staging area */
293 if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
294 HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
295 return -EAGAIN;
296 }
297
298 memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
299
300 /* hold a reference on sint_route until the callback is finished */
301 hyperv_sint_route_ref(sint_route);
302
303 /* schedule message posting attempt in vcpu thread */
304 async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
305 RUN_ON_CPU_HOST_PTR(sint_route));
306 return 0;
307 }
308
309 static void sint_ack_handler(EventNotifier *notifier)
310 {
311 HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
312 sint_ack_notifier);
313 event_notifier_test_and_clear(notifier);
314
315 /*
316 * the guest consumed the previous message so complete the current one with
317 * -EAGAIN and let the msg originator retry
318 */
319 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
320 }
321
322 /*
323 * Set given event flag for a given sint on a given vcpu, and signal the sint.
324 */
325 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
326 {
327 int ret;
328 SynICState *synic = sint_route->synic;
329 unsigned long *flags, set_mask;
330 unsigned set_idx;
331
332 if (eventno > HV_EVENT_FLAGS_COUNT) {
333 return -EINVAL;
334 }
335 if (!synic->enabled || !synic->event_page_addr) {
336 return -ENXIO;
337 }
338
339 set_idx = BIT_WORD(eventno);
340 set_mask = BIT_MASK(eventno);
341 flags = synic->event_page->slot[sint_route->sint].flags;
342
343 if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
344 memory_region_set_dirty(&synic->event_page_mr, 0,
345 sizeof(*synic->event_page));
346 ret = hyperv_sint_route_set_sint(sint_route);
347 } else {
348 ret = 0;
349 }
350 return ret;
351 }
352
353 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
354 HvSintMsgCb cb, void *cb_data)
355 {
356 HvSintRoute *sint_route;
357 EventNotifier *ack_notifier;
358 int r, gsi;
359 CPUState *cs;
360 SynICState *synic;
361
362 cs = hyperv_find_vcpu(vp_index);
363 if (!cs) {
364 return NULL;
365 }
366
367 synic = get_synic(cs);
368 if (!synic) {
369 return NULL;
370 }
371
372 sint_route = g_new0(HvSintRoute, 1);
373 r = event_notifier_init(&sint_route->sint_set_notifier, false);
374 if (r) {
375 goto err;
376 }
377
378
379 ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
380 if (ack_notifier) {
381 sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
382 sint_route->staged_msg->cb = cb;
383 sint_route->staged_msg->cb_data = cb_data;
384
385 r = event_notifier_init(ack_notifier, false);
386 if (r) {
387 goto err_sint_set_notifier;
388 }
389
390 event_notifier_set_handler(ack_notifier, sint_ack_handler);
391 }
392
393 gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
394 if (gsi < 0) {
395 goto err_gsi;
396 }
397
398 r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
399 &sint_route->sint_set_notifier,
400 ack_notifier, gsi);
401 if (r) {
402 goto err_irqfd;
403 }
404 sint_route->gsi = gsi;
405 sint_route->synic = synic;
406 sint_route->sint = sint;
407 sint_route->refcount = 1;
408
409 return sint_route;
410
411 err_irqfd:
412 kvm_irqchip_release_virq(kvm_state, gsi);
413 err_gsi:
414 if (ack_notifier) {
415 event_notifier_set_handler(ack_notifier, NULL);
416 event_notifier_cleanup(ack_notifier);
417 g_free(sint_route->staged_msg);
418 }
419 err_sint_set_notifier:
420 event_notifier_cleanup(&sint_route->sint_set_notifier);
421 err:
422 g_free(sint_route);
423
424 return NULL;
425 }
426
427 void hyperv_sint_route_ref(HvSintRoute *sint_route)
428 {
429 sint_route->refcount++;
430 }
431
432 void hyperv_sint_route_unref(HvSintRoute *sint_route)
433 {
434 if (!sint_route) {
435 return;
436 }
437
438 assert(sint_route->refcount > 0);
439
440 if (--sint_route->refcount) {
441 return;
442 }
443
444 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
445 &sint_route->sint_set_notifier,
446 sint_route->gsi);
447 kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
448 if (sint_route->staged_msg) {
449 event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
450 event_notifier_cleanup(&sint_route->sint_ack_notifier);
451 g_free(sint_route->staged_msg);
452 }
453 event_notifier_cleanup(&sint_route->sint_set_notifier);
454 g_free(sint_route);
455 }
456
457 int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
458 {
459 return event_notifier_set(&sint_route->sint_set_notifier);
460 }
461
462 typedef struct MsgHandler {
463 struct rcu_head rcu;
464 QLIST_ENTRY(MsgHandler) link;
465 uint32_t conn_id;
466 HvMsgHandler handler;
467 void *data;
468 } MsgHandler;
469
470 typedef struct EventFlagHandler {
471 struct rcu_head rcu;
472 QLIST_ENTRY(EventFlagHandler) link;
473 uint32_t conn_id;
474 EventNotifier *notifier;
475 } EventFlagHandler;
476
477 static QLIST_HEAD(, MsgHandler) msg_handlers;
478 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
479 static QemuMutex handlers_mutex;
480
481 static void __attribute__((constructor)) hv_init(void)
482 {
483 QLIST_INIT(&msg_handlers);
484 QLIST_INIT(&event_flag_handlers);
485 qemu_mutex_init(&handlers_mutex);
486 }
487
488 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
489 {
490 int ret;
491 MsgHandler *mh;
492
493 qemu_mutex_lock(&handlers_mutex);
494 QLIST_FOREACH(mh, &msg_handlers, link) {
495 if (mh->conn_id == conn_id) {
496 if (handler) {
497 ret = -EEXIST;
498 } else {
499 QLIST_REMOVE_RCU(mh, link);
500 g_free_rcu(mh, rcu);
501 ret = 0;
502 }
503 goto unlock;
504 }
505 }
506
507 if (handler) {
508 mh = g_new(MsgHandler, 1);
509 mh->conn_id = conn_id;
510 mh->handler = handler;
511 mh->data = data;
512 QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
513 ret = 0;
514 } else {
515 ret = -ENOENT;
516 }
517 unlock:
518 qemu_mutex_unlock(&handlers_mutex);
519 return ret;
520 }
521
522 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
523 {
524 uint16_t ret;
525 hwaddr len;
526 struct hyperv_post_message_input *msg;
527 MsgHandler *mh;
528
529 if (fast) {
530 return HV_STATUS_INVALID_HYPERCALL_CODE;
531 }
532 if (param & (__alignof__(*msg) - 1)) {
533 return HV_STATUS_INVALID_ALIGNMENT;
534 }
535
536 len = sizeof(*msg);
537 msg = cpu_physical_memory_map(param, &len, 0);
538 if (len < sizeof(*msg)) {
539 ret = HV_STATUS_INSUFFICIENT_MEMORY;
540 goto unmap;
541 }
542 if (msg->payload_size > sizeof(msg->payload)) {
543 ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
544 goto unmap;
545 }
546
547 ret = HV_STATUS_INVALID_CONNECTION_ID;
548 rcu_read_lock();
549 QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
550 if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
551 ret = mh->handler(msg, mh->data);
552 break;
553 }
554 }
555 rcu_read_unlock();
556
557 unmap:
558 cpu_physical_memory_unmap(msg, len, 0, 0);
559 return ret;
560 }
561
562 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
563 {
564 int ret;
565 EventFlagHandler *handler;
566
567 qemu_mutex_lock(&handlers_mutex);
568 QLIST_FOREACH(handler, &event_flag_handlers, link) {
569 if (handler->conn_id == conn_id) {
570 if (notifier) {
571 ret = -EEXIST;
572 } else {
573 QLIST_REMOVE_RCU(handler, link);
574 g_free_rcu(handler, rcu);
575 ret = 0;
576 }
577 goto unlock;
578 }
579 }
580
581 if (notifier) {
582 handler = g_new(EventFlagHandler, 1);
583 handler->conn_id = conn_id;
584 handler->notifier = notifier;
585 QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
586 ret = 0;
587 } else {
588 ret = -ENOENT;
589 }
590 unlock:
591 qemu_mutex_unlock(&handlers_mutex);
592 return ret;
593 }
594
595 static bool process_event_flags_userspace;
596
597 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
598 {
599 if (!process_event_flags_userspace &&
600 !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
601 process_event_flags_userspace = true;
602
603 warn_report("Hyper-V event signaling is not supported by this kernel; "
604 "using slower userspace hypercall processing");
605 }
606
607 if (!process_event_flags_userspace) {
608 struct kvm_hyperv_eventfd hvevfd = {
609 .conn_id = conn_id,
610 .fd = notifier ? event_notifier_get_fd(notifier) : -1,
611 .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
612 };
613
614 return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
615 }
616 return set_event_flag_handler(conn_id, notifier);
617 }
618
619 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
620 {
621 uint16_t ret;
622 EventFlagHandler *handler;
623
624 if (unlikely(!fast)) {
625 hwaddr addr = param;
626
627 if (addr & (__alignof__(addr) - 1)) {
628 return HV_STATUS_INVALID_ALIGNMENT;
629 }
630
631 param = ldq_phys(&address_space_memory, addr);
632 }
633
634 /*
635 * Per spec, bits 32-47 contain the extra "flag number". However, we
636 * have no use for it, and in all known usecases it is zero, so just
637 * report lookup failure if it isn't.
638 */
639 if (param & 0xffff00000000ULL) {
640 return HV_STATUS_INVALID_PORT_ID;
641 }
642 /* remaining bits are reserved-zero */
643 if (param & ~HV_CONNECTION_ID_MASK) {
644 return HV_STATUS_INVALID_HYPERCALL_INPUT;
645 }
646
647 ret = HV_STATUS_INVALID_CONNECTION_ID;
648 rcu_read_lock();
649 QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
650 if (handler->conn_id == param) {
651 event_notifier_set(handler->notifier);
652 ret = 0;
653 break;
654 }
655 }
656 rcu_read_unlock();
657 return ret;
658 }