]> git.proxmox.com Git - mirror_qemu.git/blame - hw/hyperv/hyperv.c
qemu/atomic.h: rename atomic_ to qatomic_
[mirror_qemu.git] / hw / hyperv / hyperv.c
CommitLineData
701189e3
RK
1/*
2 * Hyper-V guest/hypervisor interaction
3 *
4 * Copyright (c) 2015-2018 Virtuozzo International GmbH.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 */
9
10#include "qemu/osdep.h"
11#include "qemu/main-loop.h"
0b8fa32f 12#include "qemu/module.h"
606c34bf 13#include "qapi/error.h"
267e071b 14#include "exec/address-spaces.h"
701189e3 15#include "sysemu/kvm.h"
f5642f8b 16#include "qemu/bitops.h"
8d3bc0b7 17#include "qemu/error-report.h"
08b689aa 18#include "qemu/lockable.h"
e6ea9f45
RK
19#include "qemu/queue.h"
20#include "qemu/rcu.h"
21#include "qemu/rcu_queue.h"
701189e3 22#include "hw/hyperv/hyperv.h"
db1015e9 23#include "qom/object.h"
701189e3 24
db1015e9 25struct SynICState {
606c34bf
RK
26 DeviceState parent_obj;
27
28 CPUState *cs;
29
30 bool enabled;
31 hwaddr msg_page_addr;
32 hwaddr event_page_addr;
267e071b
RK
33 MemoryRegion msg_page_mr;
34 MemoryRegion event_page_mr;
35 struct hyperv_message_page *msg_page;
36 struct hyperv_event_flags_page *event_page;
db1015e9 37};
606c34bf
RK
38
39#define TYPE_SYNIC "hyperv-synic"
8063396b 40OBJECT_DECLARE_SIMPLE_TYPE(SynICState, SYNIC)
606c34bf 41
d42cd961
JD
42static bool synic_enabled;
43
44bool hyperv_is_synic_enabled(void)
45{
46 return synic_enabled;
47}
48
606c34bf
RK
49static SynICState *get_synic(CPUState *cs)
50{
51 return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
52}
53
54static void synic_update(SynICState *synic, bool enable,
55 hwaddr msg_page_addr, hwaddr event_page_addr)
56{
57
58 synic->enabled = enable;
267e071b
RK
59 if (synic->msg_page_addr != msg_page_addr) {
60 if (synic->msg_page_addr) {
61 memory_region_del_subregion(get_system_memory(),
62 &synic->msg_page_mr);
63 }
64 if (msg_page_addr) {
65 memory_region_add_subregion(get_system_memory(), msg_page_addr,
66 &synic->msg_page_mr);
67 }
68 synic->msg_page_addr = msg_page_addr;
69 }
70 if (synic->event_page_addr != event_page_addr) {
71 if (synic->event_page_addr) {
72 memory_region_del_subregion(get_system_memory(),
73 &synic->event_page_mr);
74 }
75 if (event_page_addr) {
76 memory_region_add_subregion(get_system_memory(), event_page_addr,
77 &synic->event_page_mr);
78 }
79 synic->event_page_addr = event_page_addr;
80 }
606c34bf
RK
81}
82
83void hyperv_synic_update(CPUState *cs, bool enable,
84 hwaddr msg_page_addr, hwaddr event_page_addr)
85{
86 SynICState *synic = get_synic(cs);
87
88 if (!synic) {
89 return;
90 }
91
92 synic_update(synic, enable, msg_page_addr, event_page_addr);
93}
94
95static void synic_realize(DeviceState *dev, Error **errp)
96{
267e071b
RK
97 Object *obj = OBJECT(dev);
98 SynICState *synic = SYNIC(dev);
99 char *msgp_name, *eventp_name;
100 uint32_t vp_index;
101
102 /* memory region names have to be globally unique */
103 vp_index = hyperv_vp_index(synic->cs);
104 msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
105 eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
106
107 memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
108 sizeof(*synic->msg_page), &error_abort);
109 memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
110 sizeof(*synic->event_page), &error_abort);
111 synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
112 synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
113
114 g_free(msgp_name);
115 g_free(eventp_name);
606c34bf 116}
606c34bf
RK
117static void synic_reset(DeviceState *dev)
118{
119 SynICState *synic = SYNIC(dev);
267e071b
RK
120 memset(synic->msg_page, 0, sizeof(*synic->msg_page));
121 memset(synic->event_page, 0, sizeof(*synic->event_page));
606c34bf
RK
122 synic_update(synic, false, 0, 0);
123}
124
125static void synic_class_init(ObjectClass *klass, void *data)
126{
127 DeviceClass *dc = DEVICE_CLASS(klass);
128
129 dc->realize = synic_realize;
130 dc->reset = synic_reset;
131 dc->user_creatable = false;
132}
133
134void hyperv_synic_add(CPUState *cs)
135{
136 Object *obj;
137 SynICState *synic;
138
139 obj = object_new(TYPE_SYNIC);
140 synic = SYNIC(obj);
141 synic->cs = cs;
d2623129 142 object_property_add_child(OBJECT(cs), "synic", obj);
606c34bf 143 object_unref(obj);
ce189ab2 144 qdev_realize(DEVICE(obj), NULL, &error_abort);
d42cd961 145 synic_enabled = true;
606c34bf
RK
146}
147
148void hyperv_synic_reset(CPUState *cs)
149{
30a759b6
RK
150 SynICState *synic = get_synic(cs);
151
152 if (synic) {
f703a04c 153 device_legacy_reset(DEVICE(synic));
30a759b6 154 }
606c34bf
RK
155}
156
157static const TypeInfo synic_type_info = {
158 .name = TYPE_SYNIC,
159 .parent = TYPE_DEVICE,
160 .instance_size = sizeof(SynICState),
161 .class_init = synic_class_init,
162};
163
164static void synic_register_types(void)
165{
166 type_register_static(&synic_type_info);
167}
168
169type_init(synic_register_types)
170
4cbaf3c1
RK
171/*
172 * KVM has its own message producers (SynIC timers). To guarantee
173 * serialization with both KVM vcpu and the guest cpu, the messages are first
174 * staged in an intermediate area and then posted to the SynIC message page in
175 * the vcpu thread.
176 */
177typedef struct HvSintStagedMessage {
178 /* message content staged by hyperv_post_msg */
179 struct hyperv_message msg;
180 /* callback + data (r/o) to complete the processing in a BH */
181 HvSintMsgCb cb;
182 void *cb_data;
183 /* message posting status filled by cpu_post_msg */
184 int status;
185 /* passing the buck: */
186 enum {
187 /* initial state */
188 HV_STAGED_MSG_FREE,
189 /*
190 * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
191 * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
192 */
193 HV_STAGED_MSG_BUSY,
194 /*
195 * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
196 * notify the guest, records the status, marks the posting done (BUSY
197 * -> POSTED), and schedules sint_msg_bh BH
198 */
199 HV_STAGED_MSG_POSTED,
200 /*
201 * sint_msg_bh (BH) verifies that the posting is done, runs the
202 * callback, and starts over (POSTED -> FREE)
203 */
204 } state;
205} HvSintStagedMessage;
206
701189e3
RK
207struct HvSintRoute {
208 uint32_t sint;
606c34bf 209 SynICState *synic;
701189e3
RK
210 int gsi;
211 EventNotifier sint_set_notifier;
212 EventNotifier sint_ack_notifier;
4cbaf3c1
RK
213
214 HvSintStagedMessage *staged_msg;
215
701189e3
RK
216 unsigned refcount;
217};
218
219static CPUState *hyperv_find_vcpu(uint32_t vp_index)
220{
221 CPUState *cs = qemu_get_cpu(vp_index);
222 assert(hyperv_vp_index(cs) == vp_index);
223 return cs;
224}
225
4cbaf3c1
RK
226/*
227 * BH to complete the processing of a staged message.
228 */
229static void sint_msg_bh(void *opaque)
230{
231 HvSintRoute *sint_route = opaque;
232 HvSintStagedMessage *staged_msg = sint_route->staged_msg;
233
d73415a3 234 if (qatomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
4cbaf3c1
RK
235 /* status nor ready yet (spurious ack from guest?), ignore */
236 return;
237 }
238
239 staged_msg->cb(staged_msg->cb_data, staged_msg->status);
240 staged_msg->status = 0;
241
242 /* staged message processing finished, ready to start over */
d73415a3 243 qatomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
4cbaf3c1
RK
244 /* drop the reference taken in hyperv_post_msg */
245 hyperv_sint_route_unref(sint_route);
246}
247
248/*
249 * Worker to transfer the message from the staging area into the SynIC message
250 * page in vcpu context.
251 */
252static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
253{
254 HvSintRoute *sint_route = data.host_ptr;
255 HvSintStagedMessage *staged_msg = sint_route->staged_msg;
256 SynICState *synic = sint_route->synic;
257 struct hyperv_message *dst_msg;
258 bool wait_for_sint_ack = false;
259
260 assert(staged_msg->state == HV_STAGED_MSG_BUSY);
261
262 if (!synic->enabled || !synic->msg_page_addr) {
263 staged_msg->status = -ENXIO;
264 goto posted;
265 }
266
267 dst_msg = &synic->msg_page->slot[sint_route->sint];
268
269 if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
270 dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
271 staged_msg->status = -EAGAIN;
272 wait_for_sint_ack = true;
273 } else {
274 memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
275 staged_msg->status = hyperv_sint_route_set_sint(sint_route);
276 }
277
278 memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
279
280posted:
d73415a3 281 qatomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
4cbaf3c1
RK
282 /*
283 * Notify the msg originator of the progress made; if the slot was busy we
284 * set msg_pending flag in it so it will be the guest who will do EOM and
285 * trigger the notification from KVM via sint_ack_notifier
286 */
287 if (!wait_for_sint_ack) {
288 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
289 sint_route);
290 }
291}
292
293/*
294 * Post a Hyper-V message to the staging area, for delivery to guest in the
295 * vcpu thread.
296 */
297int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
298{
299 HvSintStagedMessage *staged_msg = sint_route->staged_msg;
300
301 assert(staged_msg);
302
303 /* grab the staging area */
d73415a3 304 if (qatomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
4cbaf3c1
RK
305 HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
306 return -EAGAIN;
307 }
308
309 memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
310
311 /* hold a reference on sint_route until the callback is finished */
312 hyperv_sint_route_ref(sint_route);
313
314 /* schedule message posting attempt in vcpu thread */
315 async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
316 RUN_ON_CPU_HOST_PTR(sint_route));
317 return 0;
318}
319
320static void sint_ack_handler(EventNotifier *notifier)
701189e3
RK
321{
322 HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
323 sint_ack_notifier);
324 event_notifier_test_and_clear(notifier);
4cbaf3c1
RK
325
326 /*
327 * the guest consumed the previous message so complete the current one with
328 * -EAGAIN and let the msg originator retry
329 */
330 aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
701189e3
RK
331}
332
f5642f8b
RK
333/*
334 * Set given event flag for a given sint on a given vcpu, and signal the sint.
335 */
336int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
337{
338 int ret;
339 SynICState *synic = sint_route->synic;
340 unsigned long *flags, set_mask;
341 unsigned set_idx;
342
343 if (eventno > HV_EVENT_FLAGS_COUNT) {
344 return -EINVAL;
345 }
346 if (!synic->enabled || !synic->event_page_addr) {
347 return -ENXIO;
348 }
349
350 set_idx = BIT_WORD(eventno);
351 set_mask = BIT_MASK(eventno);
352 flags = synic->event_page->slot[sint_route->sint].flags;
353
d73415a3 354 if ((qatomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
f5642f8b
RK
355 memory_region_set_dirty(&synic->event_page_mr, 0,
356 sizeof(*synic->event_page));
357 ret = hyperv_sint_route_set_sint(sint_route);
358 } else {
359 ret = 0;
360 }
361 return ret;
362}
363
701189e3 364HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
4cbaf3c1 365 HvSintMsgCb cb, void *cb_data)
701189e3
RK
366{
367 HvSintRoute *sint_route;
368 EventNotifier *ack_notifier;
369 int r, gsi;
370 CPUState *cs;
606c34bf 371 SynICState *synic;
701189e3
RK
372
373 cs = hyperv_find_vcpu(vp_index);
374 if (!cs) {
375 return NULL;
376 }
377
606c34bf
RK
378 synic = get_synic(cs);
379 if (!synic) {
380 return NULL;
381 }
382
701189e3
RK
383 sint_route = g_new0(HvSintRoute, 1);
384 r = event_notifier_init(&sint_route->sint_set_notifier, false);
385 if (r) {
386 goto err;
387 }
388
4cbaf3c1
RK
389
390 ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
701189e3 391 if (ack_notifier) {
4cbaf3c1
RK
392 sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
393 sint_route->staged_msg->cb = cb;
394 sint_route->staged_msg->cb_data = cb_data;
395
701189e3
RK
396 r = event_notifier_init(ack_notifier, false);
397 if (r) {
398 goto err_sint_set_notifier;
399 }
400
4cbaf3c1 401 event_notifier_set_handler(ack_notifier, sint_ack_handler);
701189e3
RK
402 }
403
404 gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
405 if (gsi < 0) {
406 goto err_gsi;
407 }
408
409 r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
410 &sint_route->sint_set_notifier,
411 ack_notifier, gsi);
412 if (r) {
413 goto err_irqfd;
414 }
415 sint_route->gsi = gsi;
606c34bf 416 sint_route->synic = synic;
701189e3
RK
417 sint_route->sint = sint;
418 sint_route->refcount = 1;
419
420 return sint_route;
421
422err_irqfd:
423 kvm_irqchip_release_virq(kvm_state, gsi);
424err_gsi:
425 if (ack_notifier) {
426 event_notifier_set_handler(ack_notifier, NULL);
427 event_notifier_cleanup(ack_notifier);
4cbaf3c1 428 g_free(sint_route->staged_msg);
701189e3
RK
429 }
430err_sint_set_notifier:
431 event_notifier_cleanup(&sint_route->sint_set_notifier);
432err:
433 g_free(sint_route);
434
435 return NULL;
436}
437
438void hyperv_sint_route_ref(HvSintRoute *sint_route)
439{
440 sint_route->refcount++;
441}
442
443void hyperv_sint_route_unref(HvSintRoute *sint_route)
444{
445 if (!sint_route) {
446 return;
447 }
448
449 assert(sint_route->refcount > 0);
450
451 if (--sint_route->refcount) {
452 return;
453 }
454
455 kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
456 &sint_route->sint_set_notifier,
457 sint_route->gsi);
458 kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
4cbaf3c1 459 if (sint_route->staged_msg) {
701189e3
RK
460 event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
461 event_notifier_cleanup(&sint_route->sint_ack_notifier);
4cbaf3c1 462 g_free(sint_route->staged_msg);
701189e3
RK
463 }
464 event_notifier_cleanup(&sint_route->sint_set_notifier);
465 g_free(sint_route);
466}
467
468int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
469{
470 return event_notifier_set(&sint_route->sint_set_notifier);
471}
e6ea9f45 472
76036a5f
RK
473typedef struct MsgHandler {
474 struct rcu_head rcu;
475 QLIST_ENTRY(MsgHandler) link;
476 uint32_t conn_id;
477 HvMsgHandler handler;
478 void *data;
479} MsgHandler;
480
e6ea9f45
RK
481typedef struct EventFlagHandler {
482 struct rcu_head rcu;
483 QLIST_ENTRY(EventFlagHandler) link;
484 uint32_t conn_id;
485 EventNotifier *notifier;
486} EventFlagHandler;
487
76036a5f 488static QLIST_HEAD(, MsgHandler) msg_handlers;
e6ea9f45
RK
489static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
490static QemuMutex handlers_mutex;
491
492static void __attribute__((constructor)) hv_init(void)
493{
76036a5f 494 QLIST_INIT(&msg_handlers);
e6ea9f45
RK
495 QLIST_INIT(&event_flag_handlers);
496 qemu_mutex_init(&handlers_mutex);
497}
498
76036a5f
RK
499int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
500{
501 int ret;
502 MsgHandler *mh;
503
08b689aa 504 QEMU_LOCK_GUARD(&handlers_mutex);
76036a5f
RK
505 QLIST_FOREACH(mh, &msg_handlers, link) {
506 if (mh->conn_id == conn_id) {
507 if (handler) {
508 ret = -EEXIST;
509 } else {
510 QLIST_REMOVE_RCU(mh, link);
511 g_free_rcu(mh, rcu);
512 ret = 0;
513 }
08b689aa 514 return ret;
76036a5f
RK
515 }
516 }
517
518 if (handler) {
519 mh = g_new(MsgHandler, 1);
520 mh->conn_id = conn_id;
521 mh->handler = handler;
522 mh->data = data;
523 QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
524 ret = 0;
525 } else {
526 ret = -ENOENT;
527 }
08b689aa 528
76036a5f
RK
529 return ret;
530}
531
532uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
533{
534 uint16_t ret;
535 hwaddr len;
536 struct hyperv_post_message_input *msg;
537 MsgHandler *mh;
538
539 if (fast) {
540 return HV_STATUS_INVALID_HYPERCALL_CODE;
541 }
542 if (param & (__alignof__(*msg) - 1)) {
543 return HV_STATUS_INVALID_ALIGNMENT;
544 }
545
546 len = sizeof(*msg);
547 msg = cpu_physical_memory_map(param, &len, 0);
548 if (len < sizeof(*msg)) {
549 ret = HV_STATUS_INSUFFICIENT_MEMORY;
550 goto unmap;
551 }
552 if (msg->payload_size > sizeof(msg->payload)) {
553 ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
554 goto unmap;
555 }
556
557 ret = HV_STATUS_INVALID_CONNECTION_ID;
b66173af
DDAG
558 WITH_RCU_READ_LOCK_GUARD() {
559 QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
560 if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
561 ret = mh->handler(msg, mh->data);
562 break;
563 }
76036a5f
RK
564 }
565 }
76036a5f
RK
566
567unmap:
568 cpu_physical_memory_unmap(msg, len, 0, 0);
569 return ret;
570}
571
8d3bc0b7 572static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
e6ea9f45
RK
573{
574 int ret;
575 EventFlagHandler *handler;
576
08b689aa 577 QEMU_LOCK_GUARD(&handlers_mutex);
e6ea9f45
RK
578 QLIST_FOREACH(handler, &event_flag_handlers, link) {
579 if (handler->conn_id == conn_id) {
580 if (notifier) {
581 ret = -EEXIST;
582 } else {
583 QLIST_REMOVE_RCU(handler, link);
584 g_free_rcu(handler, rcu);
585 ret = 0;
586 }
08b689aa 587 return ret;
e6ea9f45
RK
588 }
589 }
590
591 if (notifier) {
592 handler = g_new(EventFlagHandler, 1);
593 handler->conn_id = conn_id;
594 handler->notifier = notifier;
595 QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
596 ret = 0;
597 } else {
598 ret = -ENOENT;
599 }
08b689aa 600
e6ea9f45
RK
601 return ret;
602}
603
8d3bc0b7
RK
604static bool process_event_flags_userspace;
605
606int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
607{
608 if (!process_event_flags_userspace &&
609 !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
610 process_event_flags_userspace = true;
611
612 warn_report("Hyper-V event signaling is not supported by this kernel; "
613 "using slower userspace hypercall processing");
614 }
615
616 if (!process_event_flags_userspace) {
617 struct kvm_hyperv_eventfd hvevfd = {
618 .conn_id = conn_id,
619 .fd = notifier ? event_notifier_get_fd(notifier) : -1,
620 .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
621 };
622
623 return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
624 }
625 return set_event_flag_handler(conn_id, notifier);
626}
627
e6ea9f45
RK
628uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
629{
e6ea9f45
RK
630 EventFlagHandler *handler;
631
632 if (unlikely(!fast)) {
633 hwaddr addr = param;
634
635 if (addr & (__alignof__(addr) - 1)) {
636 return HV_STATUS_INVALID_ALIGNMENT;
637 }
638
639 param = ldq_phys(&address_space_memory, addr);
640 }
641
642 /*
643 * Per spec, bits 32-47 contain the extra "flag number". However, we
644 * have no use for it, and in all known usecases it is zero, so just
645 * report lookup failure if it isn't.
646 */
647 if (param & 0xffff00000000ULL) {
648 return HV_STATUS_INVALID_PORT_ID;
649 }
650 /* remaining bits are reserved-zero */
651 if (param & ~HV_CONNECTION_ID_MASK) {
652 return HV_STATUS_INVALID_HYPERCALL_INPUT;
653 }
654
b66173af 655 RCU_READ_LOCK_GUARD();
e6ea9f45
RK
656 QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
657 if (handler->conn_id == param) {
658 event_notifier_set(handler->notifier);
b66173af 659 return 0;
e6ea9f45
RK
660 }
661 }
b66173af 662 return HV_STATUS_INVALID_CONNECTION_ID;
e6ea9f45 663}