]>
Commit | Line | Data |
---|---|---|
0d71f708 JD |
1 | /* |
2 | * QEMU Hyper-V VMBus | |
3 | * | |
4 | * Copyright (c) 2017-2018 Virtuozzo International GmbH. | |
5 | * | |
6 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
7 | * See the COPYING file in the top-level directory. | |
8 | */ | |
9 | ||
10 | #include "qemu/osdep.h" | |
11 | #include "qemu/error-report.h" | |
12 | #include "qemu/main-loop.h" | |
13 | #include "qapi/error.h" | |
14 | #include "migration/vmstate.h" | |
15 | #include "hw/qdev-properties.h" | |
ce35e229 | 16 | #include "hw/qdev-properties-system.h" |
0d71f708 JD |
17 | #include "hw/hyperv/hyperv.h" |
18 | #include "hw/hyperv/vmbus.h" | |
19 | #include "hw/hyperv/vmbus-bridge.h" | |
20 | #include "hw/sysbus.h" | |
21 | #include "cpu.h" | |
22 | #include "trace.h" | |
23 | ||
0d71f708 JD |
24 | enum { |
25 | VMGPADL_INIT, | |
26 | VMGPADL_ALIVE, | |
27 | VMGPADL_TEARINGDOWN, | |
28 | VMGPADL_TORNDOWN, | |
29 | }; | |
30 | ||
31 | struct VMBusGpadl { | |
32 | /* GPADL id */ | |
33 | uint32_t id; | |
34 | /* associated channel id (rudimentary?) */ | |
35 | uint32_t child_relid; | |
36 | ||
37 | /* number of pages in the GPADL as declared in GPADL_HEADER message */ | |
38 | uint32_t num_gfns; | |
39 | /* | |
40 | * Due to limited message size, GPADL may not fit fully in a single | |
41 | * GPADL_HEADER message, and is further popluated using GPADL_BODY | |
42 | * messages. @seen_gfns is the number of pages seen so far; once it | |
43 | * reaches @num_gfns, the GPADL is ready to use. | |
44 | */ | |
45 | uint32_t seen_gfns; | |
46 | /* array of GFNs (of size @num_gfns once allocated) */ | |
47 | uint64_t *gfns; | |
48 | ||
49 | uint8_t state; | |
50 | ||
51 | QTAILQ_ENTRY(VMBusGpadl) link; | |
52 | VMBus *vmbus; | |
53 | unsigned refcount; | |
54 | }; | |
55 | ||
56 | /* | |
57 | * Wrap sequential read from / write to GPADL. | |
58 | */ | |
59 | typedef struct GpadlIter { | |
60 | VMBusGpadl *gpadl; | |
61 | AddressSpace *as; | |
62 | DMADirection dir; | |
63 | /* offset into GPADL where the next i/o will be performed */ | |
64 | uint32_t off; | |
65 | /* | |
66 | * Cached mapping of the currently accessed page, up to page boundary. | |
67 | * Updated lazily on i/o. | |
68 | * Note: MemoryRegionCache can not be used here because pages in the GPADL | |
69 | * are non-contiguous and may belong to different memory regions. | |
70 | */ | |
71 | void *map; | |
72 | /* offset after last i/o (i.e. not affected by seek) */ | |
73 | uint32_t last_off; | |
74 | /* | |
75 | * Indicator that the iterator is active and may have a cached mapping. | |
76 | * Allows to enforce bracketing of all i/o (which may create cached | |
77 | * mappings) and thus exclude mapping leaks. | |
78 | */ | |
79 | bool active; | |
80 | } GpadlIter; | |
81 | ||
82 | /* | |
83 | * Ring buffer. There are two of them, sitting in the same GPADL, for each | |
84 | * channel. | |
85 | * Each ring buffer consists of a set of pages, with the first page containing | |
86 | * the ring buffer header, and the remaining pages being for data packets. | |
87 | */ | |
88 | typedef struct VMBusRingBufCommon { | |
89 | AddressSpace *as; | |
90 | /* GPA of the ring buffer header */ | |
91 | dma_addr_t rb_addr; | |
92 | /* start and length of the ring buffer data area within GPADL */ | |
93 | uint32_t base; | |
94 | uint32_t len; | |
95 | ||
96 | GpadlIter iter; | |
97 | } VMBusRingBufCommon; | |
98 | ||
99 | typedef struct VMBusSendRingBuf { | |
100 | VMBusRingBufCommon common; | |
101 | /* current write index, to be committed at the end of send */ | |
102 | uint32_t wr_idx; | |
103 | /* write index at the start of send */ | |
104 | uint32_t last_wr_idx; | |
105 | /* space to be requested from the guest */ | |
106 | uint32_t wanted; | |
107 | /* space reserved for planned sends */ | |
108 | uint32_t reserved; | |
109 | /* last seen read index */ | |
110 | uint32_t last_seen_rd_idx; | |
111 | } VMBusSendRingBuf; | |
112 | ||
113 | typedef struct VMBusRecvRingBuf { | |
114 | VMBusRingBufCommon common; | |
115 | /* current read index, to be committed at the end of receive */ | |
116 | uint32_t rd_idx; | |
117 | /* read index at the start of receive */ | |
118 | uint32_t last_rd_idx; | |
119 | /* last seen write index */ | |
120 | uint32_t last_seen_wr_idx; | |
121 | } VMBusRecvRingBuf; | |
122 | ||
123 | ||
124 | enum { | |
125 | VMOFFER_INIT, | |
126 | VMOFFER_SENDING, | |
127 | VMOFFER_SENT, | |
128 | }; | |
129 | ||
130 | enum { | |
131 | VMCHAN_INIT, | |
132 | VMCHAN_OPENING, | |
133 | VMCHAN_OPEN, | |
134 | }; | |
135 | ||
136 | struct VMBusChannel { | |
137 | VMBusDevice *dev; | |
138 | ||
139 | /* channel id */ | |
140 | uint32_t id; | |
141 | /* | |
142 | * subchannel index within the device; subchannel #0 is "primary" and | |
143 | * always exists | |
144 | */ | |
145 | uint16_t subchan_idx; | |
146 | uint32_t open_id; | |
147 | /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */ | |
148 | uint32_t target_vp; | |
149 | /* GPADL id to use for the ring buffers */ | |
150 | uint32_t ringbuf_gpadl; | |
151 | /* start (in pages) of the send ring buffer within @ringbuf_gpadl */ | |
152 | uint32_t ringbuf_send_offset; | |
153 | ||
154 | uint8_t offer_state; | |
155 | uint8_t state; | |
156 | bool is_open; | |
157 | ||
158 | /* main device worker; copied from the device class */ | |
159 | VMBusChannelNotifyCb notify_cb; | |
160 | /* | |
161 | * guest->host notifications, either sent directly or dispatched via | |
162 | * interrupt page (older VMBus) | |
163 | */ | |
164 | EventNotifier notifier; | |
165 | ||
166 | VMBus *vmbus; | |
167 | /* | |
168 | * SINT route to signal with host->guest notifications; may be shared with | |
169 | * the main VMBus SINT route | |
170 | */ | |
171 | HvSintRoute *notify_route; | |
172 | VMBusGpadl *gpadl; | |
173 | ||
174 | VMBusSendRingBuf send_ringbuf; | |
175 | VMBusRecvRingBuf recv_ringbuf; | |
176 | ||
177 | QTAILQ_ENTRY(VMBusChannel) link; | |
178 | }; | |
179 | ||
180 | /* | |
181 | * Hyper-V spec mandates that every message port has 16 buffers, which means | |
182 | * that the guest can post up to this many messages without blocking. | |
183 | * Therefore a queue for incoming messages has to be provided. | |
184 | * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just | |
185 | * doesn't transition to a new state until the message is known to have been | |
186 | * successfully delivered to the respective SynIC message slot. | |
187 | */ | |
188 | #define HV_MSG_QUEUE_LEN 16 | |
189 | ||
190 | /* Hyper-V devices never use channel #0. Must be something special. */ | |
191 | #define VMBUS_FIRST_CHANID 1 | |
192 | /* Each channel occupies one bit within a single event page sint slot. */ | |
193 | #define VMBUS_CHANID_COUNT (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID) | |
194 | /* Leave a few connection numbers for other purposes. */ | |
195 | #define VMBUS_CHAN_CONNECTION_OFFSET 16 | |
196 | ||
197 | /* | |
198 | * Since the success or failure of sending a message is reported | |
199 | * asynchronously, the VMBus state machine has effectively two entry points: | |
200 | * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest | |
201 | * message delivery status becomes known). Both are run as oneshot BHs on the | |
202 | * main aio context, ensuring serialization. | |
203 | */ | |
204 | enum { | |
205 | VMBUS_LISTEN, | |
206 | VMBUS_HANDSHAKE, | |
207 | VMBUS_OFFER, | |
208 | VMBUS_CREATE_GPADL, | |
209 | VMBUS_TEARDOWN_GPADL, | |
210 | VMBUS_OPEN_CHANNEL, | |
211 | VMBUS_UNLOAD, | |
212 | VMBUS_STATE_MAX | |
213 | }; | |
214 | ||
215 | struct VMBus { | |
216 | BusState parent; | |
217 | ||
218 | uint8_t state; | |
219 | /* protection against recursive aio_poll (see vmbus_run) */ | |
220 | bool in_progress; | |
221 | /* whether there's a message being delivered to the guest */ | |
222 | bool msg_in_progress; | |
223 | uint32_t version; | |
224 | /* VP_INDEX of the vCPU to send messages and interrupts to */ | |
225 | uint32_t target_vp; | |
226 | HvSintRoute *sint_route; | |
227 | /* | |
228 | * interrupt page for older protocol versions; newer ones use SynIC event | |
229 | * flags directly | |
230 | */ | |
231 | hwaddr int_page_gpa; | |
232 | ||
233 | DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT); | |
234 | ||
235 | /* incoming message queue */ | |
236 | struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN]; | |
237 | uint8_t rx_queue_head; | |
238 | uint8_t rx_queue_size; | |
239 | QemuMutex rx_queue_lock; | |
240 | ||
241 | QTAILQ_HEAD(, VMBusGpadl) gpadl_list; | |
242 | QTAILQ_HEAD(, VMBusChannel) channel_list; | |
243 | ||
244 | /* | |
245 | * guest->host notifications for older VMBus, to be dispatched via | |
246 | * interrupt page | |
247 | */ | |
248 | EventNotifier notifier; | |
249 | }; | |
250 | ||
251 | static bool gpadl_full(VMBusGpadl *gpadl) | |
252 | { | |
253 | return gpadl->seen_gfns == gpadl->num_gfns; | |
254 | } | |
255 | ||
256 | static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id, | |
257 | uint32_t child_relid, uint32_t num_gfns) | |
258 | { | |
259 | VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1); | |
260 | ||
261 | gpadl->id = id; | |
262 | gpadl->child_relid = child_relid; | |
263 | gpadl->num_gfns = num_gfns; | |
264 | gpadl->gfns = g_new(uint64_t, num_gfns); | |
265 | QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link); | |
266 | gpadl->vmbus = vmbus; | |
267 | gpadl->refcount = 1; | |
268 | return gpadl; | |
269 | } | |
270 | ||
271 | static void free_gpadl(VMBusGpadl *gpadl) | |
272 | { | |
273 | QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link); | |
274 | g_free(gpadl->gfns); | |
275 | g_free(gpadl); | |
276 | } | |
277 | ||
278 | static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id) | |
279 | { | |
280 | VMBusGpadl *gpadl; | |
281 | QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { | |
282 | if (gpadl->id == gpadl_id) { | |
283 | return gpadl; | |
284 | } | |
285 | } | |
286 | return NULL; | |
287 | } | |
288 | ||
289 | VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id) | |
290 | { | |
291 | VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id); | |
292 | if (!gpadl || !gpadl_full(gpadl)) { | |
293 | return NULL; | |
294 | } | |
295 | gpadl->refcount++; | |
296 | return gpadl; | |
297 | } | |
298 | ||
299 | void vmbus_put_gpadl(VMBusGpadl *gpadl) | |
300 | { | |
301 | if (!gpadl) { | |
302 | return; | |
303 | } | |
304 | if (--gpadl->refcount) { | |
305 | return; | |
306 | } | |
307 | free_gpadl(gpadl); | |
308 | } | |
309 | ||
310 | uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl) | |
311 | { | |
312 | return gpadl->num_gfns * TARGET_PAGE_SIZE; | |
313 | } | |
314 | ||
315 | static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl, | |
316 | AddressSpace *as, DMADirection dir) | |
317 | { | |
318 | iter->gpadl = gpadl; | |
319 | iter->as = as; | |
320 | iter->dir = dir; | |
321 | iter->active = false; | |
322 | } | |
323 | ||
324 | static inline void gpadl_iter_cache_unmap(GpadlIter *iter) | |
325 | { | |
326 | uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK; | |
327 | uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1; | |
328 | ||
329 | /* mapping is only done to do non-zero amount of i/o */ | |
330 | assert(iter->last_off > 0); | |
331 | assert(map_start_in_page < io_end_in_page); | |
332 | ||
333 | dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page, | |
334 | iter->dir, io_end_in_page - map_start_in_page); | |
335 | } | |
336 | ||
337 | /* | |
338 | * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf. | |
339 | * The direction of the copy is determined by @iter->dir. | |
340 | * The caller must ensure the operation overflows neither @buf nor the GPADL | |
341 | * (there's an assert for the latter). | |
342 | * Reuse the currently mapped page in the GPADL if possible. | |
343 | */ | |
344 | static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len) | |
345 | { | |
346 | ssize_t ret = len; | |
347 | ||
348 | assert(iter->active); | |
349 | ||
350 | while (len) { | |
351 | uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK; | |
352 | uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page; | |
353 | uint32_t cplen = MIN(pgleft, len); | |
354 | void *p; | |
355 | ||
356 | /* try to reuse the cached mapping */ | |
357 | if (iter->map) { | |
358 | uint32_t map_start_in_page = | |
359 | (uintptr_t)iter->map & ~TARGET_PAGE_MASK; | |
360 | uint32_t off_base = iter->off & ~TARGET_PAGE_MASK; | |
361 | uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK; | |
362 | if (off_base != mapped_base || off_in_page < map_start_in_page) { | |
363 | gpadl_iter_cache_unmap(iter); | |
364 | iter->map = NULL; | |
365 | } | |
366 | } | |
367 | ||
368 | if (!iter->map) { | |
369 | dma_addr_t maddr; | |
370 | dma_addr_t mlen = pgleft; | |
371 | uint32_t idx = iter->off >> TARGET_PAGE_BITS; | |
372 | assert(idx < iter->gpadl->num_gfns); | |
373 | ||
374 | maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page; | |
375 | ||
a1d4b0a3 PMD |
376 | iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir, |
377 | MEMTXATTRS_UNSPECIFIED); | |
0d71f708 JD |
378 | if (mlen != pgleft) { |
379 | dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0); | |
380 | iter->map = NULL; | |
381 | return -EFAULT; | |
382 | } | |
383 | } | |
384 | ||
8b39aa90 JD |
385 | p = (void *)(uintptr_t)(((uintptr_t)iter->map & TARGET_PAGE_MASK) | |
386 | off_in_page); | |
0d71f708 JD |
387 | if (iter->dir == DMA_DIRECTION_FROM_DEVICE) { |
388 | memcpy(p, buf, cplen); | |
389 | } else { | |
390 | memcpy(buf, p, cplen); | |
391 | } | |
392 | ||
393 | buf += cplen; | |
394 | len -= cplen; | |
395 | iter->off += cplen; | |
396 | iter->last_off = iter->off; | |
397 | } | |
398 | ||
399 | return ret; | |
400 | } | |
401 | ||
402 | /* | |
403 | * Position the iterator @iter at new offset @new_off. | |
404 | * If this results in the cached mapping being unusable with the new offset, | |
405 | * unmap it. | |
406 | */ | |
407 | static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off) | |
408 | { | |
409 | assert(iter->active); | |
410 | iter->off = new_off; | |
411 | } | |
412 | ||
413 | /* | |
414 | * Start a series of i/o on the GPADL. | |
415 | * After this i/o and seek operations on @iter become legal. | |
416 | */ | |
417 | static inline void gpadl_iter_start_io(GpadlIter *iter) | |
418 | { | |
419 | assert(!iter->active); | |
420 | /* mapping is cached lazily on i/o */ | |
421 | iter->map = NULL; | |
422 | iter->active = true; | |
423 | } | |
424 | ||
425 | /* | |
426 | * End the eariler started series of i/o on the GPADL and release the cached | |
427 | * mapping if any. | |
428 | */ | |
429 | static inline void gpadl_iter_end_io(GpadlIter *iter) | |
430 | { | |
431 | assert(iter->active); | |
432 | ||
433 | if (iter->map) { | |
434 | gpadl_iter_cache_unmap(iter); | |
435 | } | |
436 | ||
437 | iter->active = false; | |
438 | } | |
439 | ||
440 | static void vmbus_resched(VMBus *vmbus); | |
441 | static void vmbus_msg_cb(void *data, int status); | |
442 | ||
443 | ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off, | |
444 | const struct iovec *iov, size_t iov_cnt) | |
445 | { | |
446 | GpadlIter iter; | |
447 | size_t i; | |
448 | ssize_t ret = 0; | |
449 | ||
450 | gpadl_iter_init(&iter, gpadl, chan->dev->dma_as, | |
451 | DMA_DIRECTION_FROM_DEVICE); | |
452 | gpadl_iter_start_io(&iter); | |
453 | gpadl_iter_seek(&iter, off); | |
454 | for (i = 0; i < iov_cnt; i++) { | |
455 | ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len); | |
456 | if (ret < 0) { | |
457 | goto out; | |
458 | } | |
459 | } | |
460 | out: | |
461 | gpadl_iter_end_io(&iter); | |
462 | return ret; | |
463 | } | |
464 | ||
465 | int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, | |
466 | unsigned iov_cnt, size_t len, size_t off) | |
467 | { | |
468 | int ret_cnt = 0, ret; | |
469 | unsigned i; | |
470 | QEMUSGList *sgl = &req->sgl; | |
471 | ScatterGatherEntry *sg = sgl->sg; | |
472 | ||
473 | for (i = 0; i < sgl->nsg; i++) { | |
474 | if (sg[i].len > off) { | |
475 | break; | |
476 | } | |
477 | off -= sg[i].len; | |
478 | } | |
479 | for (; len && i < sgl->nsg; i++) { | |
480 | dma_addr_t mlen = MIN(sg[i].len - off, len); | |
481 | dma_addr_t addr = sg[i].base + off; | |
482 | len -= mlen; | |
483 | off = 0; | |
484 | ||
485 | for (; mlen; ret_cnt++) { | |
486 | dma_addr_t l = mlen; | |
487 | dma_addr_t a = addr; | |
488 | ||
489 | if (ret_cnt == iov_cnt) { | |
490 | ret = -ENOBUFS; | |
491 | goto err; | |
492 | } | |
493 | ||
a1d4b0a3 PMD |
494 | iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir, |
495 | MEMTXATTRS_UNSPECIFIED); | |
0d71f708 JD |
496 | if (!l) { |
497 | ret = -EFAULT; | |
498 | goto err; | |
499 | } | |
500 | iov[ret_cnt].iov_len = l; | |
501 | addr += l; | |
502 | mlen -= l; | |
503 | } | |
504 | } | |
505 | ||
506 | return ret_cnt; | |
507 | err: | |
508 | vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0); | |
509 | return ret; | |
510 | } | |
511 | ||
512 | void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov, | |
513 | unsigned iov_cnt, size_t accessed) | |
514 | { | |
515 | QEMUSGList *sgl = &req->sgl; | |
516 | unsigned i; | |
517 | ||
518 | for (i = 0; i < iov_cnt; i++) { | |
519 | size_t acsd = MIN(accessed, iov[i].iov_len); | |
520 | dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd); | |
521 | accessed -= acsd; | |
522 | } | |
523 | } | |
524 | ||
525 | static const VMStateDescription vmstate_gpadl = { | |
526 | .name = "vmbus/gpadl", | |
527 | .version_id = 0, | |
528 | .minimum_version_id = 0, | |
529 | .fields = (VMStateField[]) { | |
530 | VMSTATE_UINT32(id, VMBusGpadl), | |
531 | VMSTATE_UINT32(child_relid, VMBusGpadl), | |
532 | VMSTATE_UINT32(num_gfns, VMBusGpadl), | |
533 | VMSTATE_UINT32(seen_gfns, VMBusGpadl), | |
534 | VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0, | |
535 | vmstate_info_uint64, uint64_t), | |
536 | VMSTATE_UINT8(state, VMBusGpadl), | |
537 | VMSTATE_END_OF_LIST() | |
538 | } | |
539 | }; | |
540 | ||
541 | /* | |
542 | * Wrap the index into a ring buffer of @len bytes. | |
543 | * @idx is assumed not to exceed twice the size of the ringbuffer, so only | |
544 | * single wraparound is considered. | |
545 | */ | |
546 | static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len) | |
547 | { | |
548 | if (idx >= len) { | |
549 | idx -= len; | |
550 | } | |
551 | return idx; | |
552 | } | |
553 | ||
554 | /* | |
555 | * Circular difference between two indices into a ring buffer of @len bytes. | |
556 | * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch | |
557 | * up write index but not vice versa. | |
558 | */ | |
559 | static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len, | |
560 | bool allow_catchup) | |
561 | { | |
562 | return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len); | |
563 | } | |
564 | ||
565 | static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf) | |
566 | { | |
567 | vmbus_ring_buffer *rb; | |
568 | dma_addr_t mlen = sizeof(*rb); | |
569 | ||
570 | rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen, | |
a1d4b0a3 | 571 | DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED); |
0d71f708 JD |
572 | if (mlen != sizeof(*rb)) { |
573 | dma_memory_unmap(ringbuf->as, rb, mlen, | |
574 | DMA_DIRECTION_FROM_DEVICE, 0); | |
575 | return NULL; | |
576 | } | |
577 | return rb; | |
578 | } | |
579 | ||
580 | static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf, | |
581 | vmbus_ring_buffer *rb, bool dirty) | |
582 | { | |
583 | assert(rb); | |
584 | ||
585 | dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE, | |
586 | dirty ? sizeof(*rb) : 0); | |
587 | } | |
588 | ||
589 | static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl, | |
590 | AddressSpace *as, DMADirection dir, | |
591 | uint32_t begin, uint32_t end) | |
592 | { | |
593 | ringbuf->as = as; | |
594 | ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS; | |
595 | ringbuf->base = (begin + 1) << TARGET_PAGE_BITS; | |
596 | ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS; | |
597 | gpadl_iter_init(&ringbuf->iter, gpadl, as, dir); | |
598 | } | |
599 | ||
600 | static int ringbufs_init(VMBusChannel *chan) | |
601 | { | |
602 | vmbus_ring_buffer *rb; | |
603 | VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf; | |
604 | VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf; | |
605 | ||
606 | if (chan->ringbuf_send_offset <= 1 || | |
607 | chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) { | |
608 | return -EINVAL; | |
609 | } | |
610 | ||
611 | ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as, | |
612 | DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset); | |
613 | ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as, | |
614 | DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset, | |
615 | chan->gpadl->num_gfns); | |
616 | send_ringbuf->wanted = 0; | |
617 | send_ringbuf->reserved = 0; | |
618 | ||
619 | rb = ringbuf_map_hdr(&recv_ringbuf->common); | |
620 | if (!rb) { | |
621 | return -EFAULT; | |
622 | } | |
623 | recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index; | |
624 | ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false); | |
625 | ||
626 | rb = ringbuf_map_hdr(&send_ringbuf->common); | |
627 | if (!rb) { | |
628 | return -EFAULT; | |
629 | } | |
630 | send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index; | |
631 | send_ringbuf->last_seen_rd_idx = rb->read_index; | |
632 | rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ; | |
633 | ringbuf_unmap_hdr(&send_ringbuf->common, rb, true); | |
634 | ||
635 | if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len || | |
636 | send_ringbuf->wr_idx >= send_ringbuf->common.len) { | |
637 | return -EOVERFLOW; | |
638 | } | |
639 | ||
640 | return 0; | |
641 | } | |
642 | ||
643 | /* | |
644 | * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping | |
645 | * around if needed. | |
646 | * @len is assumed not to exceed the size of the ringbuffer, so only single | |
647 | * wraparound is considered. | |
648 | */ | |
649 | static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len) | |
650 | { | |
651 | ssize_t ret1 = 0, ret2 = 0; | |
652 | uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off; | |
653 | ||
654 | if (len >= remain) { | |
655 | ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain); | |
656 | if (ret1 < 0) { | |
657 | return ret1; | |
658 | } | |
659 | gpadl_iter_seek(&ringbuf->iter, ringbuf->base); | |
660 | buf += remain; | |
661 | len -= remain; | |
662 | } | |
663 | ret2 = gpadl_iter_io(&ringbuf->iter, buf, len); | |
664 | if (ret2 < 0) { | |
665 | return ret2; | |
666 | } | |
667 | return ret1 + ret2; | |
668 | } | |
669 | ||
670 | /* | |
671 | * Position the circular iterator within @ringbuf to offset @new_off, wrapping | |
672 | * around if needed. | |
673 | * @new_off is assumed not to exceed twice the size of the ringbuffer, so only | |
674 | * single wraparound is considered. | |
675 | */ | |
676 | static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off) | |
677 | { | |
678 | gpadl_iter_seek(&ringbuf->iter, | |
679 | ringbuf->base + rb_idx_wrap(new_off, ringbuf->len)); | |
680 | } | |
681 | ||
682 | static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf) | |
683 | { | |
684 | return ringbuf->iter.off - ringbuf->base; | |
685 | } | |
686 | ||
687 | static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf) | |
688 | { | |
689 | gpadl_iter_start_io(&ringbuf->iter); | |
690 | } | |
691 | ||
692 | static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf) | |
693 | { | |
694 | gpadl_iter_end_io(&ringbuf->iter); | |
695 | } | |
696 | ||
697 | VMBusDevice *vmbus_channel_device(VMBusChannel *chan) | |
698 | { | |
699 | return chan->dev; | |
700 | } | |
701 | ||
702 | VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx) | |
703 | { | |
704 | if (chan_idx >= dev->num_channels) { | |
705 | return NULL; | |
706 | } | |
707 | return &dev->channels[chan_idx]; | |
708 | } | |
709 | ||
710 | uint32_t vmbus_channel_idx(VMBusChannel *chan) | |
711 | { | |
712 | return chan - chan->dev->channels; | |
713 | } | |
714 | ||
715 | void vmbus_channel_notify_host(VMBusChannel *chan) | |
716 | { | |
717 | event_notifier_set(&chan->notifier); | |
718 | } | |
719 | ||
720 | bool vmbus_channel_is_open(VMBusChannel *chan) | |
721 | { | |
722 | return chan->is_open; | |
723 | } | |
724 | ||
725 | /* | |
726 | * Notify the guest side about the data to work on in the channel ring buffer. | |
727 | * The notification is done by signaling a dedicated per-channel SynIC event | |
728 | * flag (more recent guests) or setting a bit in the interrupt page and firing | |
729 | * the VMBus SINT (older guests). | |
730 | */ | |
731 | static int vmbus_channel_notify_guest(VMBusChannel *chan) | |
732 | { | |
733 | int res = 0; | |
734 | unsigned long *int_map, mask; | |
735 | unsigned idx; | |
736 | hwaddr addr = chan->vmbus->int_page_gpa; | |
737 | hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0; | |
738 | ||
739 | trace_vmbus_channel_notify_guest(chan->id); | |
740 | ||
741 | if (!addr) { | |
742 | return hyperv_set_event_flag(chan->notify_route, chan->id); | |
743 | } | |
744 | ||
745 | int_map = cpu_physical_memory_map(addr, &len, 1); | |
746 | if (len != TARGET_PAGE_SIZE / 2) { | |
747 | res = -ENXIO; | |
748 | goto unmap; | |
749 | } | |
750 | ||
751 | idx = BIT_WORD(chan->id); | |
752 | mask = BIT_MASK(chan->id); | |
d73415a3 | 753 | if ((qatomic_fetch_or(&int_map[idx], mask) & mask) != mask) { |
0d71f708 JD |
754 | res = hyperv_sint_route_set_sint(chan->notify_route); |
755 | dirty = len; | |
756 | } | |
757 | ||
758 | unmap: | |
759 | cpu_physical_memory_unmap(int_map, len, 1, dirty); | |
760 | return res; | |
761 | } | |
762 | ||
763 | #define VMBUS_PKT_TRAILER sizeof(uint64_t) | |
764 | ||
765 | static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr, | |
766 | uint32_t desclen, uint32_t msglen) | |
767 | { | |
768 | hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) + | |
769 | DIV_ROUND_UP(desclen, sizeof(uint64_t)); | |
770 | hdr->len_qwords = hdr->offset_qwords + | |
771 | DIV_ROUND_UP(msglen, sizeof(uint64_t)); | |
772 | return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER; | |
773 | } | |
774 | ||
775 | /* | |
776 | * Simplified ring buffer operation with paired barriers annotations in the | |
777 | * producer and consumer loops: | |
778 | * | |
779 | * producer * consumer | |
780 | * ~~~~~~~~ * ~~~~~~~~ | |
781 | * write pending_send_sz * read write_index | |
782 | * smp_mb [A] * smp_mb [C] | |
783 | * read read_index * read packet | |
784 | * smp_mb [B] * read/write out-of-band data | |
785 | * read/write out-of-band data * smp_mb [B] | |
786 | * write packet * write read_index | |
787 | * smp_mb [C] * smp_mb [A] | |
788 | * write write_index * read pending_send_sz | |
789 | * smp_wmb [D] * smp_rmb [D] | |
790 | * write pending_send_sz * read write_index | |
791 | * ... * ... | |
792 | */ | |
793 | ||
794 | static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf) | |
795 | { | |
796 | /* don't trust guest data */ | |
797 | if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) { | |
798 | return 0; | |
799 | } | |
800 | return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx, | |
801 | ringbuf->common.len, false); | |
802 | } | |
803 | ||
804 | static ssize_t ringbuf_send_update_idx(VMBusChannel *chan) | |
805 | { | |
806 | VMBusSendRingBuf *ringbuf = &chan->send_ringbuf; | |
807 | vmbus_ring_buffer *rb; | |
808 | uint32_t written; | |
809 | ||
810 | written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx, | |
811 | ringbuf->common.len, true); | |
812 | if (!written) { | |
813 | return 0; | |
814 | } | |
815 | ||
816 | rb = ringbuf_map_hdr(&ringbuf->common); | |
817 | if (!rb) { | |
818 | return -EFAULT; | |
819 | } | |
820 | ||
821 | ringbuf->reserved -= written; | |
822 | ||
823 | /* prevent reorder with the data operation and packet write */ | |
824 | smp_mb(); /* barrier pair [C] */ | |
825 | rb->write_index = ringbuf->wr_idx; | |
826 | ||
827 | /* | |
828 | * If the producer earlier indicated that it wants to be notified when the | |
829 | * consumer frees certain amount of space in the ring buffer, that amount | |
830 | * is reduced by the size of the completed write. | |
831 | */ | |
832 | if (ringbuf->wanted) { | |
833 | /* otherwise reservation would fail */ | |
834 | assert(ringbuf->wanted < written); | |
835 | ringbuf->wanted -= written; | |
836 | /* prevent reorder with write_index write */ | |
837 | smp_wmb(); /* barrier pair [D] */ | |
838 | rb->pending_send_sz = ringbuf->wanted; | |
839 | } | |
840 | ||
841 | /* prevent reorder with write_index or pending_send_sz write */ | |
842 | smp_mb(); /* barrier pair [A] */ | |
843 | ringbuf->last_seen_rd_idx = rb->read_index; | |
844 | ||
845 | /* | |
846 | * The consumer may have missed the reduction of pending_send_sz and skip | |
847 | * notification, so re-check the blocking condition, and, if it's no longer | |
848 | * true, ensure processing another iteration by simulating consumer's | |
849 | * notification. | |
850 | */ | |
851 | if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) { | |
852 | vmbus_channel_notify_host(chan); | |
853 | } | |
854 | ||
855 | /* skip notification by consumer's request */ | |
856 | if (rb->interrupt_mask) { | |
857 | goto out; | |
858 | } | |
859 | ||
860 | /* | |
861 | * The consumer hasn't caught up with the producer's previous state so it's | |
862 | * not blocked. | |
863 | * (last_seen_rd_idx comes from the guest but it's safe to use w/o | |
864 | * validation here as it only affects notification.) | |
865 | */ | |
866 | if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx, | |
867 | ringbuf->common.len, true) > written) { | |
868 | goto out; | |
869 | } | |
870 | ||
871 | vmbus_channel_notify_guest(chan); | |
872 | out: | |
873 | ringbuf_unmap_hdr(&ringbuf->common, rb, true); | |
874 | ringbuf->last_wr_idx = ringbuf->wr_idx; | |
875 | return written; | |
876 | } | |
877 | ||
878 | int vmbus_channel_reserve(VMBusChannel *chan, | |
879 | uint32_t desclen, uint32_t msglen) | |
880 | { | |
881 | VMBusSendRingBuf *ringbuf = &chan->send_ringbuf; | |
882 | vmbus_ring_buffer *rb = NULL; | |
883 | vmbus_packet_hdr hdr; | |
884 | uint32_t needed = ringbuf->reserved + | |
885 | vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen); | |
886 | ||
887 | /* avoid touching the guest memory if possible */ | |
888 | if (likely(needed <= ringbuf_send_avail(ringbuf))) { | |
889 | goto success; | |
890 | } | |
891 | ||
892 | rb = ringbuf_map_hdr(&ringbuf->common); | |
893 | if (!rb) { | |
894 | return -EFAULT; | |
895 | } | |
896 | ||
897 | /* fetch read index from guest memory and try again */ | |
898 | ringbuf->last_seen_rd_idx = rb->read_index; | |
899 | ||
900 | if (likely(needed <= ringbuf_send_avail(ringbuf))) { | |
901 | goto success; | |
902 | } | |
903 | ||
904 | rb->pending_send_sz = needed; | |
905 | ||
906 | /* | |
907 | * The consumer may have made progress and freed up some space before | |
908 | * seeing updated pending_send_sz, so re-read read_index (preventing | |
909 | * reorder with the pending_send_sz write) and try again. | |
910 | */ | |
911 | smp_mb(); /* barrier pair [A] */ | |
912 | ringbuf->last_seen_rd_idx = rb->read_index; | |
913 | ||
914 | if (needed > ringbuf_send_avail(ringbuf)) { | |
915 | goto out; | |
916 | } | |
917 | ||
918 | success: | |
919 | ringbuf->reserved = needed; | |
920 | needed = 0; | |
921 | ||
922 | /* clear pending_send_sz if it was set */ | |
923 | if (ringbuf->wanted) { | |
924 | if (!rb) { | |
925 | rb = ringbuf_map_hdr(&ringbuf->common); | |
926 | if (!rb) { | |
927 | /* failure to clear pending_send_sz is non-fatal */ | |
928 | goto out; | |
929 | } | |
930 | } | |
931 | ||
932 | rb->pending_send_sz = 0; | |
933 | } | |
934 | ||
935 | /* prevent reorder of the following data operation with read_index read */ | |
936 | smp_mb(); /* barrier pair [B] */ | |
937 | ||
938 | out: | |
939 | if (rb) { | |
940 | ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed); | |
941 | } | |
942 | ringbuf->wanted = needed; | |
943 | return needed ? -ENOSPC : 0; | |
944 | } | |
945 | ||
946 | ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type, | |
947 | void *desc, uint32_t desclen, | |
948 | void *msg, uint32_t msglen, | |
949 | bool need_comp, uint64_t transaction_id) | |
950 | { | |
951 | ssize_t ret = 0; | |
952 | vmbus_packet_hdr hdr; | |
953 | uint32_t totlen; | |
954 | VMBusSendRingBuf *ringbuf = &chan->send_ringbuf; | |
955 | ||
956 | if (!vmbus_channel_is_open(chan)) { | |
957 | return -EINVAL; | |
958 | } | |
959 | ||
960 | totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen); | |
961 | hdr.type = pkt_type; | |
962 | hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0; | |
963 | hdr.transaction_id = transaction_id; | |
964 | ||
965 | assert(totlen <= ringbuf->reserved); | |
966 | ||
967 | ringbuf_start_io(&ringbuf->common); | |
968 | ringbuf_seek(&ringbuf->common, ringbuf->wr_idx); | |
969 | ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)); | |
970 | if (ret < 0) { | |
971 | goto out; | |
972 | } | |
973 | if (desclen) { | |
974 | assert(desc); | |
975 | ret = ringbuf_io(&ringbuf->common, desc, desclen); | |
976 | if (ret < 0) { | |
977 | goto out; | |
978 | } | |
979 | ringbuf_seek(&ringbuf->common, | |
980 | ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t)); | |
981 | } | |
982 | ret = ringbuf_io(&ringbuf->common, msg, msglen); | |
983 | if (ret < 0) { | |
984 | goto out; | |
985 | } | |
986 | ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen); | |
987 | ringbuf->wr_idx = ringbuf_tell(&ringbuf->common); | |
988 | ret = 0; | |
989 | out: | |
990 | ringbuf_end_io(&ringbuf->common); | |
991 | if (ret) { | |
992 | return ret; | |
993 | } | |
994 | return ringbuf_send_update_idx(chan); | |
995 | } | |
996 | ||
997 | ssize_t vmbus_channel_send_completion(VMBusChanReq *req, | |
998 | void *msg, uint32_t msglen) | |
999 | { | |
1000 | assert(req->need_comp); | |
1001 | return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0, | |
1002 | msg, msglen, false, req->transaction_id); | |
1003 | } | |
1004 | ||
1005 | static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev, | |
1006 | VMBusRingBufCommon *ringbuf, uint32_t len) | |
1007 | { | |
1008 | int ret; | |
1009 | vmbus_pkt_gpa_direct hdr; | |
1010 | hwaddr curaddr = 0; | |
1011 | hwaddr curlen = 0; | |
1012 | int num; | |
1013 | ||
1014 | if (len < sizeof(hdr)) { | |
1015 | return -EIO; | |
1016 | } | |
1017 | ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr)); | |
1018 | if (ret < 0) { | |
1019 | return ret; | |
1020 | } | |
1021 | len -= sizeof(hdr); | |
1022 | ||
1023 | num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t); | |
1024 | if (num < 0) { | |
1025 | return -EIO; | |
1026 | } | |
1027 | qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as); | |
1028 | ||
1029 | for (; hdr.rangecount; hdr.rangecount--) { | |
1030 | vmbus_gpa_range range; | |
1031 | ||
1032 | if (len < sizeof(range)) { | |
1033 | goto eio; | |
1034 | } | |
1035 | ret = ringbuf_io(ringbuf, &range, sizeof(range)); | |
1036 | if (ret < 0) { | |
1037 | goto err; | |
1038 | } | |
1039 | len -= sizeof(range); | |
1040 | ||
1041 | if (range.byte_offset & TARGET_PAGE_MASK) { | |
1042 | goto eio; | |
1043 | } | |
1044 | ||
1045 | for (; range.byte_count; range.byte_offset = 0) { | |
1046 | uint64_t paddr; | |
1047 | uint32_t plen = MIN(range.byte_count, | |
1048 | TARGET_PAGE_SIZE - range.byte_offset); | |
1049 | ||
1050 | if (len < sizeof(uint64_t)) { | |
1051 | goto eio; | |
1052 | } | |
1053 | ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr)); | |
1054 | if (ret < 0) { | |
1055 | goto err; | |
1056 | } | |
1057 | len -= sizeof(uint64_t); | |
1058 | paddr <<= TARGET_PAGE_BITS; | |
1059 | paddr |= range.byte_offset; | |
1060 | range.byte_count -= plen; | |
1061 | ||
1062 | if (curaddr + curlen == paddr) { | |
1063 | /* consecutive fragments - join */ | |
1064 | curlen += plen; | |
1065 | } else { | |
1066 | if (curlen) { | |
1067 | qemu_sglist_add(sgl, curaddr, curlen); | |
1068 | } | |
1069 | ||
1070 | curaddr = paddr; | |
1071 | curlen = plen; | |
1072 | } | |
1073 | } | |
1074 | } | |
1075 | ||
1076 | if (curlen) { | |
1077 | qemu_sglist_add(sgl, curaddr, curlen); | |
1078 | } | |
1079 | ||
1080 | return 0; | |
1081 | eio: | |
1082 | ret = -EIO; | |
1083 | err: | |
1084 | qemu_sglist_destroy(sgl); | |
1085 | return ret; | |
1086 | } | |
1087 | ||
1088 | static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan, | |
1089 | uint32_t size, uint16_t pkt_type, | |
1090 | uint32_t msglen, uint64_t transaction_id, | |
1091 | bool need_comp) | |
1092 | { | |
1093 | VMBusChanReq *req; | |
1094 | uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg)); | |
1095 | uint32_t totlen = msgoff + msglen; | |
1096 | ||
1097 | req = g_malloc0(totlen); | |
1098 | req->chan = chan; | |
1099 | req->pkt_type = pkt_type; | |
1100 | req->msg = (void *)req + msgoff; | |
1101 | req->msglen = msglen; | |
1102 | req->transaction_id = transaction_id; | |
1103 | req->need_comp = need_comp; | |
1104 | return req; | |
1105 | } | |
1106 | ||
1107 | int vmbus_channel_recv_start(VMBusChannel *chan) | |
1108 | { | |
1109 | VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; | |
1110 | vmbus_ring_buffer *rb; | |
1111 | ||
1112 | rb = ringbuf_map_hdr(&ringbuf->common); | |
1113 | if (!rb) { | |
1114 | return -EFAULT; | |
1115 | } | |
1116 | ringbuf->last_seen_wr_idx = rb->write_index; | |
1117 | ringbuf_unmap_hdr(&ringbuf->common, rb, false); | |
1118 | ||
1119 | if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) { | |
1120 | return -EOVERFLOW; | |
1121 | } | |
1122 | ||
1123 | /* prevent reorder of the following data operation with write_index read */ | |
1124 | smp_mb(); /* barrier pair [C] */ | |
1125 | return 0; | |
1126 | } | |
1127 | ||
1128 | void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size) | |
1129 | { | |
1130 | VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; | |
1131 | vmbus_packet_hdr hdr = {}; | |
1132 | VMBusChanReq *req; | |
1133 | uint32_t avail; | |
1134 | uint32_t totlen, pktlen, msglen, msgoff, desclen; | |
1135 | ||
1136 | assert(size >= sizeof(*req)); | |
1137 | ||
1138 | /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */ | |
1139 | avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx, | |
1140 | ringbuf->common.len, true); | |
1141 | if (avail < sizeof(hdr)) { | |
1142 | return NULL; | |
1143 | } | |
1144 | ||
1145 | ringbuf_seek(&ringbuf->common, ringbuf->rd_idx); | |
1146 | if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) { | |
1147 | return NULL; | |
1148 | } | |
1149 | ||
1150 | pktlen = hdr.len_qwords * sizeof(uint64_t); | |
1151 | totlen = pktlen + VMBUS_PKT_TRAILER; | |
1152 | if (totlen > avail) { | |
1153 | return NULL; | |
1154 | } | |
1155 | ||
1156 | msgoff = hdr.offset_qwords * sizeof(uint64_t); | |
1157 | if (msgoff > pktlen || msgoff < sizeof(hdr)) { | |
1158 | error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen); | |
1159 | return NULL; | |
1160 | } | |
1161 | ||
1162 | msglen = pktlen - msgoff; | |
1163 | ||
1164 | req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id, | |
1165 | hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION); | |
1166 | ||
1167 | switch (hdr.type) { | |
1168 | case VMBUS_PACKET_DATA_USING_GPA_DIRECT: | |
1169 | desclen = msgoff - sizeof(hdr); | |
1170 | if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common, | |
1171 | desclen) < 0) { | |
1172 | error_report("%s: failed to convert GPA ranges to SGL", __func__); | |
1173 | goto free_req; | |
1174 | } | |
1175 | break; | |
1176 | case VMBUS_PACKET_DATA_INBAND: | |
1177 | case VMBUS_PACKET_COMP: | |
1178 | break; | |
1179 | default: | |
1180 | error_report("%s: unexpected msg type: %x", __func__, hdr.type); | |
1181 | goto free_req; | |
1182 | } | |
1183 | ||
1184 | ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff); | |
1185 | if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) { | |
1186 | goto free_req; | |
1187 | } | |
1188 | ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen); | |
1189 | ||
1190 | return req; | |
1191 | free_req: | |
1192 | vmbus_free_req(req); | |
1193 | return NULL; | |
1194 | } | |
1195 | ||
1196 | void vmbus_channel_recv_pop(VMBusChannel *chan) | |
1197 | { | |
1198 | VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; | |
1199 | ringbuf->rd_idx = ringbuf_tell(&ringbuf->common); | |
1200 | } | |
1201 | ||
1202 | ssize_t vmbus_channel_recv_done(VMBusChannel *chan) | |
1203 | { | |
1204 | VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; | |
1205 | vmbus_ring_buffer *rb; | |
1206 | uint32_t read; | |
1207 | ||
1208 | read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx, | |
1209 | ringbuf->common.len, true); | |
1210 | if (!read) { | |
1211 | return 0; | |
1212 | } | |
1213 | ||
1214 | rb = ringbuf_map_hdr(&ringbuf->common); | |
1215 | if (!rb) { | |
1216 | return -EFAULT; | |
1217 | } | |
1218 | ||
1219 | /* prevent reorder with the data operation and packet read */ | |
1220 | smp_mb(); /* barrier pair [B] */ | |
1221 | rb->read_index = ringbuf->rd_idx; | |
1222 | ||
1223 | /* prevent reorder of the following pending_send_sz read */ | |
1224 | smp_mb(); /* barrier pair [A] */ | |
1225 | ||
1226 | if (rb->interrupt_mask) { | |
1227 | goto out; | |
1228 | } | |
1229 | ||
1230 | if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) { | |
1231 | uint32_t wr_idx, wr_avail; | |
1232 | uint32_t wanted = rb->pending_send_sz; | |
1233 | ||
1234 | if (!wanted) { | |
1235 | goto out; | |
1236 | } | |
1237 | ||
1238 | /* prevent reorder with pending_send_sz read */ | |
1239 | smp_rmb(); /* barrier pair [D] */ | |
1240 | wr_idx = rb->write_index; | |
1241 | ||
1242 | wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len, | |
1243 | true); | |
1244 | ||
1245 | /* the producer wasn't blocked on the consumer state */ | |
1246 | if (wr_avail >= read + wanted) { | |
1247 | goto out; | |
1248 | } | |
1249 | /* there's not enough space for the producer to make progress */ | |
1250 | if (wr_avail < wanted) { | |
1251 | goto out; | |
1252 | } | |
1253 | } | |
1254 | ||
1255 | vmbus_channel_notify_guest(chan); | |
1256 | out: | |
1257 | ringbuf_unmap_hdr(&ringbuf->common, rb, true); | |
1258 | ringbuf->last_rd_idx = ringbuf->rd_idx; | |
1259 | return read; | |
1260 | } | |
1261 | ||
1262 | void vmbus_free_req(void *req) | |
1263 | { | |
1264 | VMBusChanReq *r = req; | |
1265 | ||
1266 | if (!req) { | |
1267 | return; | |
1268 | } | |
1269 | ||
1270 | if (r->sgl.dev) { | |
1271 | qemu_sglist_destroy(&r->sgl); | |
1272 | } | |
1273 | g_free(req); | |
1274 | } | |
1275 | ||
1276 | static void channel_event_cb(EventNotifier *e) | |
1277 | { | |
1278 | VMBusChannel *chan = container_of(e, VMBusChannel, notifier); | |
1279 | if (event_notifier_test_and_clear(e)) { | |
1280 | /* | |
1281 | * All receives are supposed to happen within the device worker, so | |
1282 | * bracket it with ringbuf_start/end_io on the receive ringbuffer, and | |
1283 | * potentially reuse the cached mapping throughout the worker. | |
1284 | * Can't do this for sends as they may happen outside the device | |
1285 | * worker. | |
1286 | */ | |
1287 | VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf; | |
1288 | ringbuf_start_io(&ringbuf->common); | |
1289 | chan->notify_cb(chan); | |
1290 | ringbuf_end_io(&ringbuf->common); | |
1291 | ||
1292 | } | |
1293 | } | |
1294 | ||
1295 | static int alloc_chan_id(VMBus *vmbus) | |
1296 | { | |
1297 | int ret; | |
1298 | ||
1299 | ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0); | |
1300 | if (ret == VMBUS_CHANID_COUNT) { | |
1301 | return -ENOMEM; | |
1302 | } | |
1303 | return ret + VMBUS_FIRST_CHANID; | |
1304 | } | |
1305 | ||
1306 | static int register_chan_id(VMBusChannel *chan) | |
1307 | { | |
1308 | return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID, | |
1309 | chan->vmbus->chanid_bitmap) ? -EEXIST : 0; | |
1310 | } | |
1311 | ||
1312 | static void unregister_chan_id(VMBusChannel *chan) | |
1313 | { | |
1314 | clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap); | |
1315 | } | |
1316 | ||
1317 | static uint32_t chan_connection_id(VMBusChannel *chan) | |
1318 | { | |
1319 | return VMBUS_CHAN_CONNECTION_OFFSET + chan->id; | |
1320 | } | |
1321 | ||
1322 | static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc, | |
1323 | VMBusChannel *chan, uint16_t idx, Error **errp) | |
1324 | { | |
1325 | int res; | |
1326 | ||
1327 | chan->dev = dev; | |
1328 | chan->notify_cb = vdc->chan_notify_cb; | |
1329 | chan->subchan_idx = idx; | |
1330 | chan->vmbus = vmbus; | |
1331 | ||
1332 | res = alloc_chan_id(vmbus); | |
1333 | if (res < 0) { | |
1334 | error_setg(errp, "no spare channel id"); | |
1335 | return; | |
1336 | } | |
1337 | chan->id = res; | |
1338 | register_chan_id(chan); | |
1339 | ||
1340 | /* | |
1341 | * The guest drivers depend on the device subchannels (idx #1+) to be | |
1342 | * offered after the primary channel (idx #0) of that device. To ensure | |
1343 | * that, record the channels on the channel list in the order they appear | |
1344 | * within the device. | |
1345 | */ | |
1346 | QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link); | |
1347 | } | |
1348 | ||
1349 | static void deinit_channel(VMBusChannel *chan) | |
1350 | { | |
1351 | assert(chan->state == VMCHAN_INIT); | |
1352 | QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link); | |
1353 | unregister_chan_id(chan); | |
1354 | } | |
1355 | ||
1356 | static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp) | |
1357 | { | |
1358 | uint16_t i; | |
1359 | VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev); | |
1360 | Error *err = NULL; | |
1361 | ||
1362 | dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1; | |
1363 | if (dev->num_channels < 1) { | |
dcfe4805 MA |
1364 | error_setg(errp, "invalid #channels: %u", dev->num_channels); |
1365 | return; | |
0d71f708 JD |
1366 | } |
1367 | ||
1368 | dev->channels = g_new0(VMBusChannel, dev->num_channels); | |
1369 | for (i = 0; i < dev->num_channels; i++) { | |
1370 | init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err); | |
1371 | if (err) { | |
1372 | goto err_init; | |
1373 | } | |
1374 | } | |
1375 | ||
1376 | return; | |
1377 | ||
1378 | err_init: | |
1379 | while (i--) { | |
1380 | deinit_channel(&dev->channels[i]); | |
1381 | } | |
0d71f708 JD |
1382 | error_propagate(errp, err); |
1383 | } | |
1384 | ||
1385 | static void free_channels(VMBusDevice *dev) | |
1386 | { | |
1387 | uint16_t i; | |
1388 | for (i = 0; i < dev->num_channels; i++) { | |
1389 | deinit_channel(&dev->channels[i]); | |
1390 | } | |
1391 | g_free(dev->channels); | |
1392 | } | |
1393 | ||
1394 | static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index) | |
1395 | { | |
1396 | VMBusChannel *chan; | |
1397 | ||
1398 | if (vp_index == vmbus->target_vp) { | |
1399 | hyperv_sint_route_ref(vmbus->sint_route); | |
1400 | return vmbus->sint_route; | |
1401 | } | |
1402 | ||
1403 | QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { | |
1404 | if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) { | |
1405 | hyperv_sint_route_ref(chan->notify_route); | |
1406 | return chan->notify_route; | |
1407 | } | |
1408 | } | |
1409 | ||
1410 | return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL); | |
1411 | } | |
1412 | ||
1413 | static void open_channel(VMBusChannel *chan) | |
1414 | { | |
1415 | VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev); | |
1416 | ||
1417 | chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl); | |
1418 | if (!chan->gpadl) { | |
1419 | return; | |
1420 | } | |
1421 | ||
1422 | if (ringbufs_init(chan)) { | |
1423 | goto put_gpadl; | |
1424 | } | |
1425 | ||
1426 | if (event_notifier_init(&chan->notifier, 0)) { | |
1427 | goto put_gpadl; | |
1428 | } | |
1429 | ||
1430 | event_notifier_set_handler(&chan->notifier, channel_event_cb); | |
1431 | ||
1432 | if (hyperv_set_event_flag_handler(chan_connection_id(chan), | |
1433 | &chan->notifier)) { | |
1434 | goto cleanup_notifier; | |
1435 | } | |
1436 | ||
1437 | chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp); | |
1438 | if (!chan->notify_route) { | |
1439 | goto clear_event_flag_handler; | |
1440 | } | |
1441 | ||
1442 | if (vdc->open_channel && vdc->open_channel(chan)) { | |
1443 | goto unref_sint_route; | |
1444 | } | |
1445 | ||
1446 | chan->is_open = true; | |
1447 | return; | |
1448 | ||
1449 | unref_sint_route: | |
1450 | hyperv_sint_route_unref(chan->notify_route); | |
1451 | clear_event_flag_handler: | |
1452 | hyperv_set_event_flag_handler(chan_connection_id(chan), NULL); | |
1453 | cleanup_notifier: | |
1454 | event_notifier_set_handler(&chan->notifier, NULL); | |
1455 | event_notifier_cleanup(&chan->notifier); | |
1456 | put_gpadl: | |
1457 | vmbus_put_gpadl(chan->gpadl); | |
1458 | } | |
1459 | ||
1460 | static void close_channel(VMBusChannel *chan) | |
1461 | { | |
1462 | VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev); | |
1463 | ||
1464 | if (!chan->is_open) { | |
1465 | return; | |
1466 | } | |
1467 | ||
1468 | if (vdc->close_channel) { | |
1469 | vdc->close_channel(chan); | |
1470 | } | |
1471 | ||
1472 | hyperv_sint_route_unref(chan->notify_route); | |
1473 | hyperv_set_event_flag_handler(chan_connection_id(chan), NULL); | |
1474 | event_notifier_set_handler(&chan->notifier, NULL); | |
1475 | event_notifier_cleanup(&chan->notifier); | |
1476 | vmbus_put_gpadl(chan->gpadl); | |
1477 | chan->is_open = false; | |
1478 | } | |
1479 | ||
1480 | static int channel_post_load(void *opaque, int version_id) | |
1481 | { | |
1482 | VMBusChannel *chan = opaque; | |
1483 | ||
1484 | return register_chan_id(chan); | |
1485 | } | |
1486 | ||
1487 | static const VMStateDescription vmstate_channel = { | |
1488 | .name = "vmbus/channel", | |
1489 | .version_id = 0, | |
1490 | .minimum_version_id = 0, | |
1491 | .post_load = channel_post_load, | |
1492 | .fields = (VMStateField[]) { | |
1493 | VMSTATE_UINT32(id, VMBusChannel), | |
1494 | VMSTATE_UINT16(subchan_idx, VMBusChannel), | |
1495 | VMSTATE_UINT32(open_id, VMBusChannel), | |
1496 | VMSTATE_UINT32(target_vp, VMBusChannel), | |
1497 | VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel), | |
1498 | VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel), | |
1499 | VMSTATE_UINT8(offer_state, VMBusChannel), | |
1500 | VMSTATE_UINT8(state, VMBusChannel), | |
1501 | VMSTATE_END_OF_LIST() | |
1502 | } | |
1503 | }; | |
1504 | ||
1505 | static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id) | |
1506 | { | |
1507 | VMBusChannel *chan; | |
1508 | QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { | |
1509 | if (chan->id == id) { | |
1510 | return chan; | |
1511 | } | |
1512 | } | |
1513 | return NULL; | |
1514 | } | |
1515 | ||
1516 | static int enqueue_incoming_message(VMBus *vmbus, | |
1517 | const struct hyperv_post_message_input *msg) | |
1518 | { | |
1519 | int ret = 0; | |
1520 | uint8_t idx, prev_size; | |
1521 | ||
1522 | qemu_mutex_lock(&vmbus->rx_queue_lock); | |
1523 | ||
1524 | if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) { | |
1525 | ret = -ENOBUFS; | |
1526 | goto out; | |
1527 | } | |
1528 | ||
1529 | prev_size = vmbus->rx_queue_size; | |
1530 | idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN; | |
1531 | memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg)); | |
1532 | vmbus->rx_queue_size++; | |
1533 | ||
1534 | /* only need to resched if the queue was empty before */ | |
1535 | if (!prev_size) { | |
1536 | vmbus_resched(vmbus); | |
1537 | } | |
1538 | out: | |
1539 | qemu_mutex_unlock(&vmbus->rx_queue_lock); | |
1540 | return ret; | |
1541 | } | |
1542 | ||
1543 | static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg, | |
1544 | void *data) | |
1545 | { | |
1546 | VMBus *vmbus = data; | |
1547 | struct vmbus_message_header *vmbus_msg; | |
1548 | ||
1549 | if (msg->message_type != HV_MESSAGE_VMBUS) { | |
1550 | return HV_STATUS_INVALID_HYPERCALL_INPUT; | |
1551 | } | |
1552 | ||
1553 | if (msg->payload_size < sizeof(struct vmbus_message_header)) { | |
1554 | return HV_STATUS_INVALID_HYPERCALL_INPUT; | |
1555 | } | |
1556 | ||
1557 | vmbus_msg = (struct vmbus_message_header *)msg->payload; | |
1558 | ||
1559 | trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size); | |
1560 | ||
1561 | if (vmbus_msg->message_type == VMBUS_MSG_INVALID || | |
1562 | vmbus_msg->message_type >= VMBUS_MSG_COUNT) { | |
1563 | error_report("vmbus: unknown message type %#x", | |
1564 | vmbus_msg->message_type); | |
1565 | return HV_STATUS_INVALID_HYPERCALL_INPUT; | |
1566 | } | |
1567 | ||
1568 | if (enqueue_incoming_message(vmbus, msg)) { | |
1569 | return HV_STATUS_INSUFFICIENT_BUFFERS; | |
1570 | } | |
1571 | return HV_STATUS_SUCCESS; | |
1572 | } | |
1573 | ||
1574 | static bool vmbus_initialized(VMBus *vmbus) | |
1575 | { | |
1576 | return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT; | |
1577 | } | |
1578 | ||
1579 | static void vmbus_reset_all(VMBus *vmbus) | |
1580 | { | |
8cadd251 | 1581 | bus_cold_reset(BUS(vmbus)); |
0d71f708 JD |
1582 | } |
1583 | ||
1584 | static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen) | |
1585 | { | |
1586 | int ret; | |
1587 | struct hyperv_message msg = { | |
1588 | .header.message_type = HV_MESSAGE_VMBUS, | |
1589 | }; | |
1590 | ||
1591 | assert(!vmbus->msg_in_progress); | |
1592 | assert(msglen <= sizeof(msg.payload)); | |
1593 | assert(msglen >= sizeof(struct vmbus_message_header)); | |
1594 | ||
1595 | vmbus->msg_in_progress = true; | |
1596 | ||
1597 | trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type, | |
1598 | msglen); | |
1599 | ||
1600 | memcpy(msg.payload, msgdata, msglen); | |
1601 | msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN); | |
1602 | ||
1603 | ret = hyperv_post_msg(vmbus->sint_route, &msg); | |
1604 | if (ret == 0 || ret == -EAGAIN) { | |
1605 | return; | |
1606 | } | |
1607 | ||
1608 | error_report("message delivery fatal failure: %d; aborting vmbus", ret); | |
1609 | vmbus_reset_all(vmbus); | |
1610 | } | |
1611 | ||
1612 | static int vmbus_init(VMBus *vmbus) | |
1613 | { | |
1614 | if (vmbus->target_vp != (uint32_t)-1) { | |
1615 | vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT, | |
1616 | vmbus_msg_cb, vmbus); | |
1617 | if (!vmbus->sint_route) { | |
1618 | error_report("failed to set up SINT route"); | |
1619 | return -ENOMEM; | |
1620 | } | |
1621 | } | |
1622 | return 0; | |
1623 | } | |
1624 | ||
1625 | static void vmbus_deinit(VMBus *vmbus) | |
1626 | { | |
1627 | VMBusGpadl *gpadl, *tmp_gpadl; | |
1628 | VMBusChannel *chan; | |
1629 | ||
1630 | QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) { | |
1631 | if (gpadl->state == VMGPADL_TORNDOWN) { | |
1632 | continue; | |
1633 | } | |
1634 | vmbus_put_gpadl(gpadl); | |
1635 | } | |
1636 | ||
1637 | QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { | |
1638 | chan->offer_state = VMOFFER_INIT; | |
1639 | } | |
1640 | ||
1641 | hyperv_sint_route_unref(vmbus->sint_route); | |
1642 | vmbus->sint_route = NULL; | |
1643 | vmbus->int_page_gpa = 0; | |
1644 | vmbus->target_vp = (uint32_t)-1; | |
1645 | vmbus->version = 0; | |
1646 | vmbus->state = VMBUS_LISTEN; | |
1647 | vmbus->msg_in_progress = false; | |
1648 | } | |
1649 | ||
1650 | static void handle_initiate_contact(VMBus *vmbus, | |
1651 | vmbus_message_initiate_contact *msg, | |
1652 | uint32_t msglen) | |
1653 | { | |
1654 | if (msglen < sizeof(*msg)) { | |
1655 | return; | |
1656 | } | |
1657 | ||
1658 | trace_vmbus_initiate_contact(msg->version_requested >> 16, | |
1659 | msg->version_requested & 0xffff, | |
1660 | msg->target_vcpu, msg->monitor_page1, | |
1661 | msg->monitor_page2, msg->interrupt_page); | |
1662 | ||
1663 | /* | |
1664 | * Reset vmbus on INITIATE_CONTACT regardless of its previous state. | |
1665 | * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down | |
1666 | * before handing over to OS loader. | |
1667 | */ | |
1668 | vmbus_reset_all(vmbus); | |
1669 | ||
1670 | vmbus->target_vp = msg->target_vcpu; | |
1671 | vmbus->version = msg->version_requested; | |
1672 | if (vmbus->version < VMBUS_VERSION_WIN8) { | |
1673 | /* linux passes interrupt page even when it doesn't need it */ | |
1674 | vmbus->int_page_gpa = msg->interrupt_page; | |
1675 | } | |
1676 | vmbus->state = VMBUS_HANDSHAKE; | |
1677 | ||
1678 | if (vmbus_init(vmbus)) { | |
1679 | error_report("failed to init vmbus; aborting"); | |
1680 | vmbus_deinit(vmbus); | |
1681 | return; | |
1682 | } | |
1683 | } | |
1684 | ||
1685 | static void send_handshake(VMBus *vmbus) | |
1686 | { | |
1687 | struct vmbus_message_version_response msg = { | |
1688 | .header.message_type = VMBUS_MSG_VERSION_RESPONSE, | |
1689 | .version_supported = vmbus_initialized(vmbus), | |
1690 | }; | |
1691 | ||
1692 | post_msg(vmbus, &msg, sizeof(msg)); | |
1693 | } | |
1694 | ||
1695 | static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen) | |
1696 | { | |
1697 | VMBusChannel *chan; | |
1698 | ||
1699 | if (!vmbus_initialized(vmbus)) { | |
1700 | return; | |
1701 | } | |
1702 | ||
1703 | QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { | |
1704 | if (chan->offer_state == VMOFFER_INIT) { | |
1705 | chan->offer_state = VMOFFER_SENDING; | |
1706 | break; | |
1707 | } | |
1708 | } | |
1709 | ||
1710 | vmbus->state = VMBUS_OFFER; | |
1711 | } | |
1712 | ||
1713 | static void send_offer(VMBus *vmbus) | |
1714 | { | |
1715 | VMBusChannel *chan; | |
1716 | struct vmbus_message_header alloffers_msg = { | |
1717 | .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED, | |
1718 | }; | |
1719 | ||
1720 | QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { | |
1721 | if (chan->offer_state == VMOFFER_SENDING) { | |
1722 | VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev); | |
1723 | /* Hyper-V wants LE GUIDs */ | |
1724 | QemuUUID classid = qemu_uuid_bswap(vdc->classid); | |
1725 | QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid); | |
1726 | struct vmbus_message_offer_channel msg = { | |
1727 | .header.message_type = VMBUS_MSG_OFFERCHANNEL, | |
1728 | .child_relid = chan->id, | |
1729 | .connection_id = chan_connection_id(chan), | |
1730 | .channel_flags = vdc->channel_flags, | |
1731 | .mmio_size_mb = vdc->mmio_size_mb, | |
1732 | .sub_channel_index = vmbus_channel_idx(chan), | |
1733 | .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED, | |
1734 | }; | |
1735 | ||
1736 | memcpy(msg.type_uuid, &classid, sizeof(classid)); | |
1737 | memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid)); | |
1738 | ||
1739 | trace_vmbus_send_offer(chan->id, chan->dev); | |
1740 | ||
1741 | post_msg(vmbus, &msg, sizeof(msg)); | |
1742 | return; | |
1743 | } | |
1744 | } | |
1745 | ||
1746 | /* no more offers, send terminator message */ | |
1747 | trace_vmbus_terminate_offers(); | |
1748 | post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg)); | |
1749 | } | |
1750 | ||
1751 | static bool complete_offer(VMBus *vmbus) | |
1752 | { | |
1753 | VMBusChannel *chan; | |
1754 | ||
1755 | QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { | |
1756 | if (chan->offer_state == VMOFFER_SENDING) { | |
1757 | chan->offer_state = VMOFFER_SENT; | |
1758 | goto next_offer; | |
1759 | } | |
1760 | } | |
1761 | /* | |
1762 | * no transitioning channels found so this is completing the terminator | |
1763 | * message, and vmbus can move to the next state | |
1764 | */ | |
1765 | return true; | |
1766 | ||
1767 | next_offer: | |
1768 | /* try to mark another channel for offering */ | |
1769 | QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { | |
1770 | if (chan->offer_state == VMOFFER_INIT) { | |
1771 | chan->offer_state = VMOFFER_SENDING; | |
1772 | break; | |
1773 | } | |
1774 | } | |
1775 | /* | |
1776 | * if an offer has been sent there are more offers or the terminator yet to | |
1777 | * send, so no state transition for vmbus | |
1778 | */ | |
1779 | return false; | |
1780 | } | |
1781 | ||
1782 | ||
1783 | static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg, | |
1784 | uint32_t msglen) | |
1785 | { | |
1786 | VMBusGpadl *gpadl; | |
1787 | uint32_t num_gfns, i; | |
1788 | ||
1789 | /* must include at least one gpa range */ | |
1790 | if (msglen < sizeof(*msg) + sizeof(msg->range[0]) || | |
1791 | !vmbus_initialized(vmbus)) { | |
1792 | return; | |
1793 | } | |
1794 | ||
1795 | num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) / | |
1796 | sizeof(msg->range[0].pfn_array[0]); | |
1797 | ||
1798 | trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns); | |
1799 | ||
1800 | /* | |
1801 | * In theory the GPADL_HEADER message can define a GPADL with multiple GPA | |
1802 | * ranges each with arbitrary size and alignment. However in practice only | |
1803 | * single-range page-aligned GPADLs have been observed so just ignore | |
1804 | * anything else and simplify things greatly. | |
1805 | */ | |
1806 | if (msg->rangecount != 1 || msg->range[0].byte_offset || | |
1807 | (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) { | |
1808 | return; | |
1809 | } | |
1810 | ||
1811 | /* ignore requests to create already existing GPADLs */ | |
1812 | if (find_gpadl(vmbus, msg->gpadl_id)) { | |
1813 | return; | |
1814 | } | |
1815 | ||
1816 | gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns); | |
1817 | ||
1818 | for (i = 0; i < num_gfns && | |
1819 | (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen; | |
1820 | i++) { | |
1821 | gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i]; | |
1822 | } | |
1823 | ||
1824 | if (gpadl_full(gpadl)) { | |
1825 | vmbus->state = VMBUS_CREATE_GPADL; | |
1826 | } | |
1827 | } | |
1828 | ||
1829 | static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg, | |
1830 | uint32_t msglen) | |
1831 | { | |
1832 | VMBusGpadl *gpadl; | |
1833 | uint32_t num_gfns_left, i; | |
1834 | ||
1835 | if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { | |
1836 | return; | |
1837 | } | |
1838 | ||
1839 | trace_vmbus_gpadl_body(msg->gpadl_id); | |
1840 | ||
1841 | gpadl = find_gpadl(vmbus, msg->gpadl_id); | |
1842 | if (!gpadl) { | |
1843 | return; | |
1844 | } | |
1845 | ||
1846 | num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns; | |
1847 | assert(num_gfns_left); | |
1848 | ||
1849 | for (i = 0; i < num_gfns_left && | |
1850 | (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) { | |
1851 | gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i]; | |
1852 | } | |
1853 | ||
1854 | if (gpadl_full(gpadl)) { | |
1855 | vmbus->state = VMBUS_CREATE_GPADL; | |
1856 | } | |
1857 | } | |
1858 | ||
1859 | static void send_create_gpadl(VMBus *vmbus) | |
1860 | { | |
1861 | VMBusGpadl *gpadl; | |
1862 | ||
1863 | QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { | |
1864 | if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) { | |
1865 | struct vmbus_message_gpadl_created msg = { | |
1866 | .header.message_type = VMBUS_MSG_GPADL_CREATED, | |
1867 | .gpadl_id = gpadl->id, | |
1868 | .child_relid = gpadl->child_relid, | |
1869 | }; | |
1870 | ||
1871 | trace_vmbus_gpadl_created(gpadl->id); | |
1872 | post_msg(vmbus, &msg, sizeof(msg)); | |
1873 | return; | |
1874 | } | |
1875 | } | |
1876 | ||
1877 | assert(false); | |
1878 | } | |
1879 | ||
1880 | static bool complete_create_gpadl(VMBus *vmbus) | |
1881 | { | |
1882 | VMBusGpadl *gpadl; | |
1883 | ||
1884 | QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { | |
1885 | if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) { | |
1886 | gpadl->state = VMGPADL_ALIVE; | |
1887 | ||
1888 | return true; | |
1889 | } | |
1890 | } | |
1891 | ||
1892 | assert(false); | |
1893 | return false; | |
1894 | } | |
1895 | ||
1896 | static void handle_gpadl_teardown(VMBus *vmbus, | |
1897 | vmbus_message_gpadl_teardown *msg, | |
1898 | uint32_t msglen) | |
1899 | { | |
1900 | VMBusGpadl *gpadl; | |
1901 | ||
1902 | if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { | |
1903 | return; | |
1904 | } | |
1905 | ||
1906 | trace_vmbus_gpadl_teardown(msg->gpadl_id); | |
1907 | ||
1908 | gpadl = find_gpadl(vmbus, msg->gpadl_id); | |
1909 | if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) { | |
1910 | return; | |
1911 | } | |
1912 | ||
1913 | gpadl->state = VMGPADL_TEARINGDOWN; | |
1914 | vmbus->state = VMBUS_TEARDOWN_GPADL; | |
1915 | } | |
1916 | ||
1917 | static void send_teardown_gpadl(VMBus *vmbus) | |
1918 | { | |
1919 | VMBusGpadl *gpadl; | |
1920 | ||
1921 | QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { | |
1922 | if (gpadl->state == VMGPADL_TEARINGDOWN) { | |
1923 | struct vmbus_message_gpadl_torndown msg = { | |
1924 | .header.message_type = VMBUS_MSG_GPADL_TORNDOWN, | |
1925 | .gpadl_id = gpadl->id, | |
1926 | }; | |
1927 | ||
1928 | trace_vmbus_gpadl_torndown(gpadl->id); | |
1929 | post_msg(vmbus, &msg, sizeof(msg)); | |
1930 | return; | |
1931 | } | |
1932 | } | |
1933 | ||
1934 | assert(false); | |
1935 | } | |
1936 | ||
1937 | static bool complete_teardown_gpadl(VMBus *vmbus) | |
1938 | { | |
1939 | VMBusGpadl *gpadl; | |
1940 | ||
1941 | QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { | |
1942 | if (gpadl->state == VMGPADL_TEARINGDOWN) { | |
1943 | gpadl->state = VMGPADL_TORNDOWN; | |
1944 | vmbus_put_gpadl(gpadl); | |
1945 | return true; | |
1946 | } | |
1947 | } | |
1948 | ||
1949 | assert(false); | |
1950 | return false; | |
1951 | } | |
1952 | ||
1953 | static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg, | |
1954 | uint32_t msglen) | |
1955 | { | |
1956 | VMBusChannel *chan; | |
1957 | ||
1958 | if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { | |
1959 | return; | |
1960 | } | |
1961 | ||
1962 | trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id, | |
1963 | msg->target_vp); | |
1964 | chan = find_channel(vmbus, msg->child_relid); | |
1965 | if (!chan || chan->state != VMCHAN_INIT) { | |
1966 | return; | |
1967 | } | |
1968 | ||
1969 | chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id; | |
1970 | chan->ringbuf_send_offset = msg->ring_buffer_offset; | |
1971 | chan->target_vp = msg->target_vp; | |
1972 | chan->open_id = msg->open_id; | |
1973 | ||
1974 | open_channel(chan); | |
1975 | ||
1976 | chan->state = VMCHAN_OPENING; | |
1977 | vmbus->state = VMBUS_OPEN_CHANNEL; | |
1978 | } | |
1979 | ||
1980 | static void send_open_channel(VMBus *vmbus) | |
1981 | { | |
1982 | VMBusChannel *chan; | |
1983 | ||
1984 | QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { | |
1985 | if (chan->state == VMCHAN_OPENING) { | |
1986 | struct vmbus_message_open_result msg = { | |
1987 | .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT, | |
1988 | .child_relid = chan->id, | |
1989 | .open_id = chan->open_id, | |
1990 | .status = !vmbus_channel_is_open(chan), | |
1991 | }; | |
1992 | ||
1993 | trace_vmbus_channel_open(chan->id, msg.status); | |
1994 | post_msg(vmbus, &msg, sizeof(msg)); | |
1995 | return; | |
1996 | } | |
1997 | } | |
1998 | ||
1999 | assert(false); | |
2000 | } | |
2001 | ||
2002 | static bool complete_open_channel(VMBus *vmbus) | |
2003 | { | |
2004 | VMBusChannel *chan; | |
2005 | ||
2006 | QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { | |
2007 | if (chan->state == VMCHAN_OPENING) { | |
2008 | if (vmbus_channel_is_open(chan)) { | |
2009 | chan->state = VMCHAN_OPEN; | |
2010 | /* | |
2011 | * simulate guest notification of ringbuffer space made | |
2012 | * available, for the channel protocols where the host | |
2013 | * initiates the communication | |
2014 | */ | |
2015 | vmbus_channel_notify_host(chan); | |
2016 | } else { | |
2017 | chan->state = VMCHAN_INIT; | |
2018 | } | |
2019 | return true; | |
2020 | } | |
2021 | } | |
2022 | ||
2023 | assert(false); | |
2024 | return false; | |
2025 | } | |
2026 | ||
2027 | static void vdev_reset_on_close(VMBusDevice *vdev) | |
2028 | { | |
2029 | uint16_t i; | |
2030 | ||
2031 | for (i = 0; i < vdev->num_channels; i++) { | |
2032 | if (vmbus_channel_is_open(&vdev->channels[i])) { | |
2033 | return; | |
2034 | } | |
2035 | } | |
2036 | ||
2037 | /* all channels closed -- reset device */ | |
8cadd251 | 2038 | device_cold_reset(DEVICE(vdev)); |
0d71f708 JD |
2039 | } |
2040 | ||
2041 | static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg, | |
2042 | uint32_t msglen) | |
2043 | { | |
2044 | VMBusChannel *chan; | |
2045 | ||
2046 | if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) { | |
2047 | return; | |
2048 | } | |
2049 | ||
2050 | trace_vmbus_close_channel(msg->child_relid); | |
2051 | ||
2052 | chan = find_channel(vmbus, msg->child_relid); | |
2053 | if (!chan) { | |
2054 | return; | |
2055 | } | |
2056 | ||
2057 | close_channel(chan); | |
2058 | chan->state = VMCHAN_INIT; | |
2059 | ||
2060 | vdev_reset_on_close(chan->dev); | |
2061 | } | |
2062 | ||
2063 | static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen) | |
2064 | { | |
2065 | vmbus->state = VMBUS_UNLOAD; | |
2066 | } | |
2067 | ||
2068 | static void send_unload(VMBus *vmbus) | |
2069 | { | |
2070 | vmbus_message_header msg = { | |
2071 | .message_type = VMBUS_MSG_UNLOAD_RESPONSE, | |
2072 | }; | |
2073 | ||
2074 | qemu_mutex_lock(&vmbus->rx_queue_lock); | |
2075 | vmbus->rx_queue_size = 0; | |
2076 | qemu_mutex_unlock(&vmbus->rx_queue_lock); | |
2077 | ||
2078 | post_msg(vmbus, &msg, sizeof(msg)); | |
2079 | return; | |
2080 | } | |
2081 | ||
2082 | static bool complete_unload(VMBus *vmbus) | |
2083 | { | |
2084 | vmbus_reset_all(vmbus); | |
2085 | return true; | |
2086 | } | |
2087 | ||
2088 | static void process_message(VMBus *vmbus) | |
2089 | { | |
2090 | struct hyperv_post_message_input *hv_msg; | |
2091 | struct vmbus_message_header *msg; | |
2092 | void *msgdata; | |
2093 | uint32_t msglen; | |
2094 | ||
2095 | qemu_mutex_lock(&vmbus->rx_queue_lock); | |
2096 | ||
2097 | if (!vmbus->rx_queue_size) { | |
2098 | goto unlock; | |
2099 | } | |
2100 | ||
2101 | hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head]; | |
2102 | msglen = hv_msg->payload_size; | |
2103 | if (msglen < sizeof(*msg)) { | |
2104 | goto out; | |
2105 | } | |
2106 | msgdata = hv_msg->payload; | |
3d558330 | 2107 | msg = msgdata; |
0d71f708 JD |
2108 | |
2109 | trace_vmbus_process_incoming_message(msg->message_type); | |
2110 | ||
2111 | switch (msg->message_type) { | |
2112 | case VMBUS_MSG_INITIATE_CONTACT: | |
2113 | handle_initiate_contact(vmbus, msgdata, msglen); | |
2114 | break; | |
2115 | case VMBUS_MSG_REQUESTOFFERS: | |
2116 | handle_request_offers(vmbus, msgdata, msglen); | |
2117 | break; | |
2118 | case VMBUS_MSG_GPADL_HEADER: | |
2119 | handle_gpadl_header(vmbus, msgdata, msglen); | |
2120 | break; | |
2121 | case VMBUS_MSG_GPADL_BODY: | |
2122 | handle_gpadl_body(vmbus, msgdata, msglen); | |
2123 | break; | |
2124 | case VMBUS_MSG_GPADL_TEARDOWN: | |
2125 | handle_gpadl_teardown(vmbus, msgdata, msglen); | |
2126 | break; | |
2127 | case VMBUS_MSG_OPENCHANNEL: | |
2128 | handle_open_channel(vmbus, msgdata, msglen); | |
2129 | break; | |
2130 | case VMBUS_MSG_CLOSECHANNEL: | |
2131 | handle_close_channel(vmbus, msgdata, msglen); | |
2132 | break; | |
2133 | case VMBUS_MSG_UNLOAD: | |
2134 | handle_unload(vmbus, msgdata, msglen); | |
2135 | break; | |
2136 | default: | |
2137 | error_report("unknown message type %#x", msg->message_type); | |
2138 | break; | |
2139 | } | |
2140 | ||
2141 | out: | |
2142 | vmbus->rx_queue_size--; | |
2143 | vmbus->rx_queue_head++; | |
2144 | vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN; | |
2145 | ||
2146 | vmbus_resched(vmbus); | |
2147 | unlock: | |
2148 | qemu_mutex_unlock(&vmbus->rx_queue_lock); | |
2149 | } | |
2150 | ||
2151 | static const struct { | |
2152 | void (*run)(VMBus *vmbus); | |
2153 | bool (*complete)(VMBus *vmbus); | |
2154 | } state_runner[] = { | |
2155 | [VMBUS_LISTEN] = {process_message, NULL}, | |
2156 | [VMBUS_HANDSHAKE] = {send_handshake, NULL}, | |
2157 | [VMBUS_OFFER] = {send_offer, complete_offer}, | |
2158 | [VMBUS_CREATE_GPADL] = {send_create_gpadl, complete_create_gpadl}, | |
2159 | [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl}, | |
2160 | [VMBUS_OPEN_CHANNEL] = {send_open_channel, complete_open_channel}, | |
2161 | [VMBUS_UNLOAD] = {send_unload, complete_unload}, | |
2162 | }; | |
2163 | ||
2164 | static void vmbus_do_run(VMBus *vmbus) | |
2165 | { | |
2166 | if (vmbus->msg_in_progress) { | |
2167 | return; | |
2168 | } | |
2169 | ||
2170 | assert(vmbus->state < VMBUS_STATE_MAX); | |
2171 | assert(state_runner[vmbus->state].run); | |
2172 | state_runner[vmbus->state].run(vmbus); | |
2173 | } | |
2174 | ||
2175 | static void vmbus_run(void *opaque) | |
2176 | { | |
2177 | VMBus *vmbus = opaque; | |
2178 | ||
2179 | /* make sure no recursion happens (e.g. due to recursive aio_poll()) */ | |
2180 | if (vmbus->in_progress) { | |
2181 | return; | |
2182 | } | |
2183 | ||
2184 | vmbus->in_progress = true; | |
2185 | /* | |
2186 | * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it | |
2187 | * should go *after* the code that can result in aio_poll; otherwise | |
2188 | * reschedules can be missed. No idea how to enforce that. | |
2189 | */ | |
2190 | vmbus_do_run(vmbus); | |
2191 | vmbus->in_progress = false; | |
2192 | } | |
2193 | ||
2194 | static void vmbus_msg_cb(void *data, int status) | |
2195 | { | |
2196 | VMBus *vmbus = data; | |
2197 | bool (*complete)(VMBus *vmbus); | |
2198 | ||
2199 | assert(vmbus->msg_in_progress); | |
2200 | ||
2201 | trace_vmbus_msg_cb(status); | |
2202 | ||
2203 | if (status == -EAGAIN) { | |
2204 | goto out; | |
2205 | } | |
2206 | if (status) { | |
2207 | error_report("message delivery fatal failure: %d; aborting vmbus", | |
2208 | status); | |
2209 | vmbus_reset_all(vmbus); | |
2210 | return; | |
2211 | } | |
2212 | ||
2213 | assert(vmbus->state < VMBUS_STATE_MAX); | |
2214 | complete = state_runner[vmbus->state].complete; | |
2215 | if (!complete || complete(vmbus)) { | |
2216 | vmbus->state = VMBUS_LISTEN; | |
2217 | } | |
2218 | out: | |
2219 | vmbus->msg_in_progress = false; | |
2220 | vmbus_resched(vmbus); | |
2221 | } | |
2222 | ||
2223 | static void vmbus_resched(VMBus *vmbus) | |
2224 | { | |
2225 | aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus); | |
2226 | } | |
2227 | ||
2228 | static void vmbus_signal_event(EventNotifier *e) | |
2229 | { | |
2230 | VMBusChannel *chan; | |
2231 | VMBus *vmbus = container_of(e, VMBus, notifier); | |
2232 | unsigned long *int_map; | |
2233 | hwaddr addr, len; | |
2234 | bool is_dirty = false; | |
2235 | ||
2236 | if (!event_notifier_test_and_clear(e)) { | |
2237 | return; | |
2238 | } | |
2239 | ||
2240 | trace_vmbus_signal_event(); | |
2241 | ||
2242 | if (!vmbus->int_page_gpa) { | |
2243 | return; | |
2244 | } | |
2245 | ||
2246 | addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2; | |
2247 | len = TARGET_PAGE_SIZE / 2; | |
2248 | int_map = cpu_physical_memory_map(addr, &len, 1); | |
2249 | if (len != TARGET_PAGE_SIZE / 2) { | |
2250 | goto unmap; | |
2251 | } | |
2252 | ||
2253 | QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { | |
2254 | if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) { | |
2255 | if (!vmbus_channel_is_open(chan)) { | |
2256 | continue; | |
2257 | } | |
2258 | vmbus_channel_notify_host(chan); | |
2259 | is_dirty = true; | |
2260 | } | |
2261 | } | |
2262 | ||
2263 | unmap: | |
2264 | cpu_physical_memory_unmap(int_map, len, 1, is_dirty); | |
2265 | } | |
2266 | ||
2267 | static void vmbus_dev_realize(DeviceState *dev, Error **errp) | |
2268 | { | |
2269 | VMBusDevice *vdev = VMBUS_DEVICE(dev); | |
2270 | VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); | |
2271 | VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev)); | |
2272 | BusChild *child; | |
2273 | Error *err = NULL; | |
2274 | char idstr[UUID_FMT_LEN + 1]; | |
2275 | ||
2276 | assert(!qemu_uuid_is_null(&vdev->instanceid)); | |
2277 | ||
80cc1a0d EH |
2278 | if (!qemu_uuid_is_null(&vdc->instanceid)) { |
2279 | /* Class wants to only have a single instance with a fixed UUID */ | |
2280 | if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) { | |
2281 | error_setg(&err, "instance id can't be changed"); | |
2282 | goto error_out; | |
2283 | } | |
2284 | } | |
2285 | ||
0d71f708 JD |
2286 | /* Check for instance id collision for this class id */ |
2287 | QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) { | |
2288 | VMBusDevice *child_dev = VMBUS_DEVICE(child->child); | |
2289 | ||
2290 | if (child_dev == vdev) { | |
2291 | continue; | |
2292 | } | |
2293 | ||
2294 | if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) { | |
2295 | qemu_uuid_unparse(&vdev->instanceid, idstr); | |
2296 | error_setg(&err, "duplicate vmbus device instance id %s", idstr); | |
2297 | goto error_out; | |
2298 | } | |
2299 | } | |
2300 | ||
2301 | vdev->dma_as = &address_space_memory; | |
2302 | ||
2303 | create_channels(vmbus, vdev, &err); | |
2304 | if (err) { | |
2305 | goto error_out; | |
2306 | } | |
2307 | ||
2308 | if (vdc->vmdev_realize) { | |
2309 | vdc->vmdev_realize(vdev, &err); | |
2310 | if (err) { | |
2311 | goto err_vdc_realize; | |
2312 | } | |
2313 | } | |
2314 | return; | |
2315 | ||
2316 | err_vdc_realize: | |
2317 | free_channels(vdev); | |
2318 | error_out: | |
2319 | error_propagate(errp, err); | |
2320 | } | |
2321 | ||
2322 | static void vmbus_dev_reset(DeviceState *dev) | |
2323 | { | |
2324 | uint16_t i; | |
2325 | VMBusDevice *vdev = VMBUS_DEVICE(dev); | |
2326 | VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); | |
2327 | ||
2328 | if (vdev->channels) { | |
2329 | for (i = 0; i < vdev->num_channels; i++) { | |
2330 | VMBusChannel *chan = &vdev->channels[i]; | |
2331 | close_channel(chan); | |
2332 | chan->state = VMCHAN_INIT; | |
2333 | } | |
2334 | } | |
2335 | ||
2336 | if (vdc->vmdev_reset) { | |
2337 | vdc->vmdev_reset(vdev); | |
2338 | } | |
2339 | } | |
2340 | ||
2341 | static void vmbus_dev_unrealize(DeviceState *dev) | |
2342 | { | |
2343 | VMBusDevice *vdev = VMBUS_DEVICE(dev); | |
2344 | VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); | |
2345 | ||
2346 | if (vdc->vmdev_unrealize) { | |
2347 | vdc->vmdev_unrealize(vdev); | |
2348 | } | |
2349 | free_channels(vdev); | |
2350 | } | |
2351 | ||
80cc1a0d EH |
2352 | static Property vmbus_dev_props[] = { |
2353 | DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid), | |
2354 | DEFINE_PROP_END_OF_LIST() | |
2355 | }; | |
2356 | ||
2357 | ||
0d71f708 JD |
2358 | static void vmbus_dev_class_init(ObjectClass *klass, void *data) |
2359 | { | |
2360 | DeviceClass *kdev = DEVICE_CLASS(klass); | |
80cc1a0d | 2361 | device_class_set_props(kdev, vmbus_dev_props); |
0d71f708 JD |
2362 | kdev->bus_type = TYPE_VMBUS; |
2363 | kdev->realize = vmbus_dev_realize; | |
2364 | kdev->unrealize = vmbus_dev_unrealize; | |
2365 | kdev->reset = vmbus_dev_reset; | |
2366 | } | |
2367 | ||
0d71f708 JD |
2368 | static void vmbus_dev_instance_init(Object *obj) |
2369 | { | |
2370 | VMBusDevice *vdev = VMBUS_DEVICE(obj); | |
2371 | VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev); | |
2372 | ||
2373 | if (!qemu_uuid_is_null(&vdc->instanceid)) { | |
2374 | /* Class wants to only have a single instance with a fixed UUID */ | |
2375 | vdev->instanceid = vdc->instanceid; | |
0d71f708 JD |
2376 | } |
2377 | } | |
2378 | ||
2379 | const VMStateDescription vmstate_vmbus_dev = { | |
2380 | .name = TYPE_VMBUS_DEVICE, | |
2381 | .version_id = 0, | |
2382 | .minimum_version_id = 0, | |
2383 | .fields = (VMStateField[]) { | |
2384 | VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16), | |
2385 | VMSTATE_UINT16(num_channels, VMBusDevice), | |
2386 | VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice, | |
2387 | num_channels, vmstate_channel, | |
2388 | VMBusChannel), | |
2389 | VMSTATE_END_OF_LIST() | |
2390 | } | |
2391 | }; | |
2392 | ||
2393 | /* vmbus generic device base */ | |
2394 | static const TypeInfo vmbus_dev_type_info = { | |
2395 | .name = TYPE_VMBUS_DEVICE, | |
2396 | .parent = TYPE_DEVICE, | |
2397 | .abstract = true, | |
2398 | .instance_size = sizeof(VMBusDevice), | |
2399 | .class_size = sizeof(VMBusDeviceClass), | |
2400 | .class_init = vmbus_dev_class_init, | |
2401 | .instance_init = vmbus_dev_instance_init, | |
2402 | }; | |
2403 | ||
2404 | static void vmbus_realize(BusState *bus, Error **errp) | |
2405 | { | |
2406 | int ret = 0; | |
0d71f708 JD |
2407 | VMBus *vmbus = VMBUS(bus); |
2408 | ||
2409 | qemu_mutex_init(&vmbus->rx_queue_lock); | |
2410 | ||
2411 | QTAILQ_INIT(&vmbus->gpadl_list); | |
2412 | QTAILQ_INIT(&vmbus->channel_list); | |
2413 | ||
2414 | ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, | |
2415 | vmbus_recv_message, vmbus); | |
2416 | if (ret != 0) { | |
6c37ebf3 | 2417 | error_setg(errp, "hyperv set message handler failed: %d", ret); |
0d71f708 JD |
2418 | goto error_out; |
2419 | } | |
2420 | ||
2421 | ret = event_notifier_init(&vmbus->notifier, 0); | |
2422 | if (ret != 0) { | |
6c37ebf3 | 2423 | error_setg(errp, "event notifier failed to init with %d", ret); |
0d71f708 JD |
2424 | goto remove_msg_handler; |
2425 | } | |
2426 | ||
2427 | event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event); | |
2428 | ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, | |
2429 | &vmbus->notifier); | |
2430 | if (ret != 0) { | |
6c37ebf3 | 2431 | error_setg(errp, "hyperv set event handler failed with %d", ret); |
0d71f708 JD |
2432 | goto clear_event_notifier; |
2433 | } | |
2434 | ||
2435 | return; | |
2436 | ||
2437 | clear_event_notifier: | |
2438 | event_notifier_cleanup(&vmbus->notifier); | |
2439 | remove_msg_handler: | |
2440 | hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL); | |
2441 | error_out: | |
2442 | qemu_mutex_destroy(&vmbus->rx_queue_lock); | |
0d71f708 JD |
2443 | } |
2444 | ||
2445 | static void vmbus_unrealize(BusState *bus) | |
2446 | { | |
2447 | VMBus *vmbus = VMBUS(bus); | |
2448 | ||
2449 | hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL); | |
2450 | hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL); | |
2451 | event_notifier_cleanup(&vmbus->notifier); | |
2452 | ||
2453 | qemu_mutex_destroy(&vmbus->rx_queue_lock); | |
2454 | } | |
2455 | ||
2456 | static void vmbus_reset(BusState *bus) | |
2457 | { | |
2458 | vmbus_deinit(VMBUS(bus)); | |
2459 | } | |
2460 | ||
2461 | static char *vmbus_get_dev_path(DeviceState *dev) | |
2462 | { | |
2463 | BusState *bus = qdev_get_parent_bus(dev); | |
2464 | return qdev_get_dev_path(bus->parent); | |
2465 | } | |
2466 | ||
2467 | static char *vmbus_get_fw_dev_path(DeviceState *dev) | |
2468 | { | |
2469 | VMBusDevice *vdev = VMBUS_DEVICE(dev); | |
2470 | char uuid[UUID_FMT_LEN + 1]; | |
2471 | ||
2472 | qemu_uuid_unparse(&vdev->instanceid, uuid); | |
2473 | return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid); | |
2474 | } | |
2475 | ||
2476 | static void vmbus_class_init(ObjectClass *klass, void *data) | |
2477 | { | |
2478 | BusClass *k = BUS_CLASS(klass); | |
2479 | ||
2480 | k->get_dev_path = vmbus_get_dev_path; | |
2481 | k->get_fw_dev_path = vmbus_get_fw_dev_path; | |
2482 | k->realize = vmbus_realize; | |
2483 | k->unrealize = vmbus_unrealize; | |
2484 | k->reset = vmbus_reset; | |
2485 | } | |
2486 | ||
2487 | static int vmbus_pre_load(void *opaque) | |
2488 | { | |
2489 | VMBusChannel *chan; | |
2490 | VMBus *vmbus = VMBUS(opaque); | |
2491 | ||
2492 | /* | |
2493 | * channel IDs allocated by the source will come in the migration stream | |
2494 | * for each channel, so clean up the ones allocated at realize | |
2495 | */ | |
2496 | QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { | |
2497 | unregister_chan_id(chan); | |
2498 | } | |
2499 | ||
2500 | return 0; | |
2501 | } | |
2502 | static int vmbus_post_load(void *opaque, int version_id) | |
2503 | { | |
2504 | int ret; | |
2505 | VMBus *vmbus = VMBUS(opaque); | |
2506 | VMBusGpadl *gpadl; | |
2507 | VMBusChannel *chan; | |
2508 | ||
2509 | ret = vmbus_init(vmbus); | |
2510 | if (ret) { | |
2511 | return ret; | |
2512 | } | |
2513 | ||
2514 | QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) { | |
2515 | gpadl->vmbus = vmbus; | |
2516 | gpadl->refcount = 1; | |
2517 | } | |
2518 | ||
2519 | /* | |
2520 | * reopening channels depends on initialized vmbus so it's done here | |
2521 | * instead of channel_post_load() | |
2522 | */ | |
2523 | QTAILQ_FOREACH(chan, &vmbus->channel_list, link) { | |
2524 | ||
2525 | if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) { | |
2526 | open_channel(chan); | |
2527 | } | |
2528 | ||
2529 | if (chan->state != VMCHAN_OPEN) { | |
2530 | continue; | |
2531 | } | |
2532 | ||
2533 | if (!vmbus_channel_is_open(chan)) { | |
2534 | /* reopen failed, abort loading */ | |
2535 | return -1; | |
2536 | } | |
2537 | ||
2538 | /* resume processing on the guest side if it missed the notification */ | |
2539 | hyperv_sint_route_set_sint(chan->notify_route); | |
2540 | /* ditto on the host side */ | |
2541 | vmbus_channel_notify_host(chan); | |
2542 | } | |
2543 | ||
2544 | vmbus_resched(vmbus); | |
2545 | return 0; | |
2546 | } | |
2547 | ||
2548 | static const VMStateDescription vmstate_post_message_input = { | |
2549 | .name = "vmbus/hyperv_post_message_input", | |
2550 | .version_id = 0, | |
2551 | .minimum_version_id = 0, | |
2552 | .fields = (VMStateField[]) { | |
2553 | /* | |
2554 | * skip connection_id and message_type as they are validated before | |
2555 | * queueing and ignored on dequeueing | |
2556 | */ | |
2557 | VMSTATE_UINT32(payload_size, struct hyperv_post_message_input), | |
2558 | VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input, | |
2559 | HV_MESSAGE_PAYLOAD_SIZE), | |
2560 | VMSTATE_END_OF_LIST() | |
2561 | } | |
2562 | }; | |
2563 | ||
2564 | static bool vmbus_rx_queue_needed(void *opaque) | |
2565 | { | |
2566 | VMBus *vmbus = VMBUS(opaque); | |
2567 | return vmbus->rx_queue_size; | |
2568 | } | |
2569 | ||
2570 | static const VMStateDescription vmstate_rx_queue = { | |
2571 | .name = "vmbus/rx_queue", | |
2572 | .version_id = 0, | |
2573 | .minimum_version_id = 0, | |
2574 | .needed = vmbus_rx_queue_needed, | |
2575 | .fields = (VMStateField[]) { | |
2576 | VMSTATE_UINT8(rx_queue_head, VMBus), | |
2577 | VMSTATE_UINT8(rx_queue_size, VMBus), | |
2578 | VMSTATE_STRUCT_ARRAY(rx_queue, VMBus, | |
2579 | HV_MSG_QUEUE_LEN, 0, | |
2580 | vmstate_post_message_input, | |
2581 | struct hyperv_post_message_input), | |
2582 | VMSTATE_END_OF_LIST() | |
2583 | } | |
2584 | }; | |
2585 | ||
2586 | static const VMStateDescription vmstate_vmbus = { | |
2587 | .name = TYPE_VMBUS, | |
2588 | .version_id = 0, | |
2589 | .minimum_version_id = 0, | |
2590 | .pre_load = vmbus_pre_load, | |
2591 | .post_load = vmbus_post_load, | |
2592 | .fields = (VMStateField[]) { | |
2593 | VMSTATE_UINT8(state, VMBus), | |
2594 | VMSTATE_UINT32(version, VMBus), | |
2595 | VMSTATE_UINT32(target_vp, VMBus), | |
2596 | VMSTATE_UINT64(int_page_gpa, VMBus), | |
2597 | VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0, | |
2598 | vmstate_gpadl, VMBusGpadl, link), | |
2599 | VMSTATE_END_OF_LIST() | |
2600 | }, | |
2601 | .subsections = (const VMStateDescription * []) { | |
2602 | &vmstate_rx_queue, | |
2603 | NULL | |
2604 | } | |
2605 | }; | |
2606 | ||
2607 | static const TypeInfo vmbus_type_info = { | |
2608 | .name = TYPE_VMBUS, | |
2609 | .parent = TYPE_BUS, | |
2610 | .instance_size = sizeof(VMBus), | |
2611 | .class_init = vmbus_class_init, | |
2612 | }; | |
2613 | ||
2614 | static void vmbus_bridge_realize(DeviceState *dev, Error **errp) | |
2615 | { | |
2616 | VMBusBridge *bridge = VMBUS_BRIDGE(dev); | |
2617 | ||
2618 | /* | |
2619 | * here there's at least one vmbus bridge that is being realized, so | |
2620 | * vmbus_bridge_find can only return NULL if it's not unique | |
2621 | */ | |
2622 | if (!vmbus_bridge_find()) { | |
2623 | error_setg(errp, "there can be at most one %s in the system", | |
2624 | TYPE_VMBUS_BRIDGE); | |
2625 | return; | |
2626 | } | |
2627 | ||
2628 | if (!hyperv_is_synic_enabled()) { | |
2629 | error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX"); | |
2630 | return; | |
2631 | } | |
2632 | ||
9388d170 | 2633 | bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus")); |
0d71f708 JD |
2634 | } |
2635 | ||
2636 | static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev) | |
2637 | { | |
2638 | /* there can be only one VMBus */ | |
2639 | return g_strdup("0"); | |
2640 | } | |
2641 | ||
2642 | static const VMStateDescription vmstate_vmbus_bridge = { | |
2643 | .name = TYPE_VMBUS_BRIDGE, | |
2644 | .version_id = 0, | |
2645 | .minimum_version_id = 0, | |
2646 | .fields = (VMStateField[]) { | |
2647 | VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus), | |
2648 | VMSTATE_END_OF_LIST() | |
2649 | }, | |
2650 | }; | |
2651 | ||
6775d15d | 2652 | static Property vmbus_bridge_props[] = { |
8f06f22f | 2653 | DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7), |
6775d15d JD |
2654 | DEFINE_PROP_END_OF_LIST() |
2655 | }; | |
2656 | ||
0d71f708 JD |
2657 | static void vmbus_bridge_class_init(ObjectClass *klass, void *data) |
2658 | { | |
2659 | DeviceClass *k = DEVICE_CLASS(klass); | |
2660 | SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass); | |
2661 | ||
2662 | k->realize = vmbus_bridge_realize; | |
2663 | k->fw_name = "vmbus"; | |
2664 | sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address; | |
2665 | set_bit(DEVICE_CATEGORY_BRIDGE, k->categories); | |
2666 | k->vmsd = &vmstate_vmbus_bridge; | |
6775d15d | 2667 | device_class_set_props(k, vmbus_bridge_props); |
0d71f708 JD |
2668 | /* override SysBusDevice's default */ |
2669 | k->user_creatable = true; | |
2670 | } | |
2671 | ||
2672 | static const TypeInfo vmbus_bridge_type_info = { | |
2673 | .name = TYPE_VMBUS_BRIDGE, | |
2674 | .parent = TYPE_SYS_BUS_DEVICE, | |
2675 | .instance_size = sizeof(VMBusBridge), | |
2676 | .class_init = vmbus_bridge_class_init, | |
2677 | }; | |
2678 | ||
2679 | static void vmbus_register_types(void) | |
2680 | { | |
2681 | type_register_static(&vmbus_bridge_type_info); | |
2682 | type_register_static(&vmbus_dev_type_info); | |
2683 | type_register_static(&vmbus_type_info); | |
2684 | } | |
2685 | ||
2686 | type_init(vmbus_register_types) |