]> git.proxmox.com Git - mirror_qemu.git/blob - hw/virtio/virtio.c
virtio: guard against NULL pfn
[mirror_qemu.git] / hw / virtio / virtio.c
1 /*
2 * Virtio Support
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qemu-common.h"
17 #include "cpu.h"
18 #include "trace.h"
19 #include "exec/address-spaces.h"
20 #include "qemu/error-report.h"
21 #include "hw/virtio/virtio.h"
22 #include "qemu/atomic.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "migration/migration.h"
25 #include "hw/virtio/virtio-access.h"
26 #include "sysemu/dma.h"
27
28 /*
29 * The alignment to use between consumer and producer parts of vring.
30 * x86 pagesize again. This is the default, used by transports like PCI
31 * which don't provide a means for the guest to tell the host the alignment.
32 */
33 #define VIRTIO_PCI_VRING_ALIGN 4096
34
35 typedef struct VRingDesc
36 {
37 uint64_t addr;
38 uint32_t len;
39 uint16_t flags;
40 uint16_t next;
41 } VRingDesc;
42
43 typedef struct VRingAvail
44 {
45 uint16_t flags;
46 uint16_t idx;
47 uint16_t ring[0];
48 } VRingAvail;
49
50 typedef struct VRingUsedElem
51 {
52 uint32_t id;
53 uint32_t len;
54 } VRingUsedElem;
55
56 typedef struct VRingUsed
57 {
58 uint16_t flags;
59 uint16_t idx;
60 VRingUsedElem ring[0];
61 } VRingUsed;
62
63 typedef struct VRingMemoryRegionCaches {
64 struct rcu_head rcu;
65 MemoryRegionCache desc;
66 MemoryRegionCache avail;
67 MemoryRegionCache used;
68 } VRingMemoryRegionCaches;
69
70 typedef struct VRing
71 {
72 unsigned int num;
73 unsigned int num_default;
74 unsigned int align;
75 hwaddr desc;
76 hwaddr avail;
77 hwaddr used;
78 VRingMemoryRegionCaches *caches;
79 } VRing;
80
81 struct VirtQueue
82 {
83 VRing vring;
84
85 /* Next head to pop */
86 uint16_t last_avail_idx;
87
88 /* Last avail_idx read from VQ. */
89 uint16_t shadow_avail_idx;
90
91 uint16_t used_idx;
92
93 /* Last used index value we have signalled on */
94 uint16_t signalled_used;
95
96 /* Last used index value we have signalled on */
97 bool signalled_used_valid;
98
99 /* Notification enabled? */
100 bool notification;
101
102 uint16_t queue_index;
103
104 unsigned int inuse;
105
106 uint16_t vector;
107 VirtIOHandleOutput handle_output;
108 VirtIOHandleAIOOutput handle_aio_output;
109 VirtIODevice *vdev;
110 EventNotifier guest_notifier;
111 EventNotifier host_notifier;
112 QLIST_ENTRY(VirtQueue) node;
113 };
114
115 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
116 {
117 if (!caches) {
118 return;
119 }
120
121 address_space_cache_destroy(&caches->desc);
122 address_space_cache_destroy(&caches->avail);
123 address_space_cache_destroy(&caches->used);
124 g_free(caches);
125 }
126
127 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
128 {
129 VirtQueue *vq = &vdev->vq[n];
130 VRingMemoryRegionCaches *old = vq->vring.caches;
131 VRingMemoryRegionCaches *new;
132 hwaddr addr, size;
133 int event_size;
134
135 event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
136
137 addr = vq->vring.desc;
138 if (!addr) {
139 return;
140 }
141 new = g_new0(VRingMemoryRegionCaches, 1);
142 size = virtio_queue_get_desc_size(vdev, n);
143 address_space_cache_init(&new->desc, vdev->dma_as,
144 addr, size, false);
145
146 size = virtio_queue_get_used_size(vdev, n) + event_size;
147 address_space_cache_init(&new->used, vdev->dma_as,
148 vq->vring.used, size, true);
149
150 size = virtio_queue_get_avail_size(vdev, n) + event_size;
151 address_space_cache_init(&new->avail, vdev->dma_as,
152 vq->vring.avail, size, false);
153
154 atomic_rcu_set(&vq->vring.caches, new);
155 if (old) {
156 call_rcu(old, virtio_free_region_cache, rcu);
157 }
158 }
159
160 /* virt queue functions */
161 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
162 {
163 VRing *vring = &vdev->vq[n].vring;
164
165 if (!vring->desc) {
166 /* not yet setup -> nothing to do */
167 return;
168 }
169 vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
170 vring->used = vring_align(vring->avail +
171 offsetof(VRingAvail, ring[vring->num]),
172 vring->align);
173 virtio_init_region_cache(vdev, n);
174 }
175
176 /* Called within rcu_read_lock(). */
177 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
178 MemoryRegionCache *cache, int i)
179 {
180 address_space_read_cached(cache, i * sizeof(VRingDesc),
181 desc, sizeof(VRingDesc));
182 virtio_tswap64s(vdev, &desc->addr);
183 virtio_tswap32s(vdev, &desc->len);
184 virtio_tswap16s(vdev, &desc->flags);
185 virtio_tswap16s(vdev, &desc->next);
186 }
187
188 /* Called within rcu_read_lock(). */
189 static inline uint16_t vring_avail_flags(VirtQueue *vq)
190 {
191 VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
192 hwaddr pa = offsetof(VRingAvail, flags);
193 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
194 }
195
196 /* Called within rcu_read_lock(). */
197 static inline uint16_t vring_avail_idx(VirtQueue *vq)
198 {
199 VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
200 hwaddr pa = offsetof(VRingAvail, idx);
201 vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
202 return vq->shadow_avail_idx;
203 }
204
205 /* Called within rcu_read_lock(). */
206 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
207 {
208 VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
209 hwaddr pa = offsetof(VRingAvail, ring[i]);
210 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
211 }
212
213 /* Called within rcu_read_lock(). */
214 static inline uint16_t vring_get_used_event(VirtQueue *vq)
215 {
216 return vring_avail_ring(vq, vq->vring.num);
217 }
218
219 /* Called within rcu_read_lock(). */
220 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
221 int i)
222 {
223 VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
224 hwaddr pa = offsetof(VRingUsed, ring[i]);
225 virtio_tswap32s(vq->vdev, &uelem->id);
226 virtio_tswap32s(vq->vdev, &uelem->len);
227 address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
228 address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
229 }
230
231 /* Called within rcu_read_lock(). */
232 static uint16_t vring_used_idx(VirtQueue *vq)
233 {
234 VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
235 hwaddr pa = offsetof(VRingUsed, idx);
236 return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
237 }
238
239 /* Called within rcu_read_lock(). */
240 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
241 {
242 VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
243 hwaddr pa = offsetof(VRingUsed, idx);
244 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
245 address_space_cache_invalidate(&caches->used, pa, sizeof(val));
246 vq->used_idx = val;
247 }
248
249 /* Called within rcu_read_lock(). */
250 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
251 {
252 VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
253 VirtIODevice *vdev = vq->vdev;
254 hwaddr pa = offsetof(VRingUsed, flags);
255 uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
256
257 virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
258 address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
259 }
260
261 /* Called within rcu_read_lock(). */
262 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
263 {
264 VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
265 VirtIODevice *vdev = vq->vdev;
266 hwaddr pa = offsetof(VRingUsed, flags);
267 uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
268
269 virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
270 address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
271 }
272
273 /* Called within rcu_read_lock(). */
274 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
275 {
276 VRingMemoryRegionCaches *caches;
277 hwaddr pa;
278 if (!vq->notification) {
279 return;
280 }
281
282 caches = atomic_rcu_read(&vq->vring.caches);
283 pa = offsetof(VRingUsed, ring[vq->vring.num]);
284 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
285 address_space_cache_invalidate(&caches->used, pa, sizeof(val));
286 }
287
288 void virtio_queue_set_notification(VirtQueue *vq, int enable)
289 {
290 vq->notification = enable;
291
292 if (!vq->vring.desc) {
293 return;
294 }
295
296 rcu_read_lock();
297 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
298 vring_set_avail_event(vq, vring_avail_idx(vq));
299 } else if (enable) {
300 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
301 } else {
302 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
303 }
304 if (enable) {
305 /* Expose avail event/used flags before caller checks the avail idx. */
306 smp_mb();
307 }
308 rcu_read_unlock();
309 }
310
311 int virtio_queue_ready(VirtQueue *vq)
312 {
313 return vq->vring.avail != 0;
314 }
315
316 /* Fetch avail_idx from VQ memory only when we really need to know if
317 * guest has added some buffers.
318 * Called within rcu_read_lock(). */
319 static int virtio_queue_empty_rcu(VirtQueue *vq)
320 {
321 if (unlikely(!vq->vring.avail)) {
322 return 1;
323 }
324
325 if (vq->shadow_avail_idx != vq->last_avail_idx) {
326 return 0;
327 }
328
329 return vring_avail_idx(vq) == vq->last_avail_idx;
330 }
331
332 int virtio_queue_empty(VirtQueue *vq)
333 {
334 bool empty;
335
336 if (unlikely(!vq->vring.avail)) {
337 return 1;
338 }
339
340 if (vq->shadow_avail_idx != vq->last_avail_idx) {
341 return 0;
342 }
343
344 rcu_read_lock();
345 empty = vring_avail_idx(vq) == vq->last_avail_idx;
346 rcu_read_unlock();
347 return empty;
348 }
349
350 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
351 unsigned int len)
352 {
353 AddressSpace *dma_as = vq->vdev->dma_as;
354 unsigned int offset;
355 int i;
356
357 offset = 0;
358 for (i = 0; i < elem->in_num; i++) {
359 size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
360
361 dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
362 elem->in_sg[i].iov_len,
363 DMA_DIRECTION_FROM_DEVICE, size);
364
365 offset += size;
366 }
367
368 for (i = 0; i < elem->out_num; i++)
369 dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
370 elem->out_sg[i].iov_len,
371 DMA_DIRECTION_TO_DEVICE,
372 elem->out_sg[i].iov_len);
373 }
374
375 /* virtqueue_detach_element:
376 * @vq: The #VirtQueue
377 * @elem: The #VirtQueueElement
378 * @len: number of bytes written
379 *
380 * Detach the element from the virtqueue. This function is suitable for device
381 * reset or other situations where a #VirtQueueElement is simply freed and will
382 * not be pushed or discarded.
383 */
384 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
385 unsigned int len)
386 {
387 vq->inuse--;
388 virtqueue_unmap_sg(vq, elem, len);
389 }
390
391 /* virtqueue_unpop:
392 * @vq: The #VirtQueue
393 * @elem: The #VirtQueueElement
394 * @len: number of bytes written
395 *
396 * Pretend the most recent element wasn't popped from the virtqueue. The next
397 * call to virtqueue_pop() will refetch the element.
398 */
399 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
400 unsigned int len)
401 {
402 vq->last_avail_idx--;
403 virtqueue_detach_element(vq, elem, len);
404 }
405
406 /* virtqueue_rewind:
407 * @vq: The #VirtQueue
408 * @num: Number of elements to push back
409 *
410 * Pretend that elements weren't popped from the virtqueue. The next
411 * virtqueue_pop() will refetch the oldest element.
412 *
413 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
414 *
415 * Returns: true on success, false if @num is greater than the number of in use
416 * elements.
417 */
418 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
419 {
420 if (num > vq->inuse) {
421 return false;
422 }
423 vq->last_avail_idx -= num;
424 vq->inuse -= num;
425 return true;
426 }
427
428 /* Called within rcu_read_lock(). */
429 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
430 unsigned int len, unsigned int idx)
431 {
432 VRingUsedElem uelem;
433
434 trace_virtqueue_fill(vq, elem, len, idx);
435
436 virtqueue_unmap_sg(vq, elem, len);
437
438 if (unlikely(vq->vdev->broken)) {
439 return;
440 }
441
442 if (unlikely(!vq->vring.used)) {
443 return;
444 }
445
446 idx = (idx + vq->used_idx) % vq->vring.num;
447
448 uelem.id = elem->index;
449 uelem.len = len;
450 vring_used_write(vq, &uelem, idx);
451 }
452
453 /* Called within rcu_read_lock(). */
454 void virtqueue_flush(VirtQueue *vq, unsigned int count)
455 {
456 uint16_t old, new;
457
458 if (unlikely(vq->vdev->broken)) {
459 vq->inuse -= count;
460 return;
461 }
462
463 if (unlikely(!vq->vring.used)) {
464 return;
465 }
466
467 /* Make sure buffer is written before we update index. */
468 smp_wmb();
469 trace_virtqueue_flush(vq, count);
470 old = vq->used_idx;
471 new = old + count;
472 vring_used_idx_set(vq, new);
473 vq->inuse -= count;
474 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
475 vq->signalled_used_valid = false;
476 }
477
478 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
479 unsigned int len)
480 {
481 rcu_read_lock();
482 virtqueue_fill(vq, elem, len, 0);
483 virtqueue_flush(vq, 1);
484 rcu_read_unlock();
485 }
486
487 /* Called within rcu_read_lock(). */
488 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
489 {
490 uint16_t num_heads = vring_avail_idx(vq) - idx;
491
492 /* Check it isn't doing very strange things with descriptor numbers. */
493 if (num_heads > vq->vring.num) {
494 virtio_error(vq->vdev, "Guest moved used index from %u to %u",
495 idx, vq->shadow_avail_idx);
496 return -EINVAL;
497 }
498 /* On success, callers read a descriptor at vq->last_avail_idx.
499 * Make sure descriptor read does not bypass avail index read. */
500 if (num_heads) {
501 smp_rmb();
502 }
503
504 return num_heads;
505 }
506
507 /* Called within rcu_read_lock(). */
508 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
509 unsigned int *head)
510 {
511 /* Grab the next descriptor number they're advertising, and increment
512 * the index we've seen. */
513 *head = vring_avail_ring(vq, idx % vq->vring.num);
514
515 /* If their number is silly, that's a fatal mistake. */
516 if (*head >= vq->vring.num) {
517 virtio_error(vq->vdev, "Guest says index %u is available", *head);
518 return false;
519 }
520
521 return true;
522 }
523
524 enum {
525 VIRTQUEUE_READ_DESC_ERROR = -1,
526 VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
527 VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
528 };
529
530 static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
531 MemoryRegionCache *desc_cache, unsigned int max,
532 unsigned int *next)
533 {
534 /* If this descriptor says it doesn't chain, we're done. */
535 if (!(desc->flags & VRING_DESC_F_NEXT)) {
536 return VIRTQUEUE_READ_DESC_DONE;
537 }
538
539 /* Check they're not leading us off end of descriptors. */
540 *next = desc->next;
541 /* Make sure compiler knows to grab that: we don't want it changing! */
542 smp_wmb();
543
544 if (*next >= max) {
545 virtio_error(vdev, "Desc next is %u", *next);
546 return VIRTQUEUE_READ_DESC_ERROR;
547 }
548
549 vring_desc_read(vdev, desc, desc_cache, *next);
550 return VIRTQUEUE_READ_DESC_MORE;
551 }
552
553 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
554 unsigned int *out_bytes,
555 unsigned max_in_bytes, unsigned max_out_bytes)
556 {
557 VirtIODevice *vdev = vq->vdev;
558 unsigned int max, idx;
559 unsigned int total_bufs, in_total, out_total;
560 VRingMemoryRegionCaches *caches;
561 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
562 int64_t len = 0;
563 int rc;
564
565 if (unlikely(!vq->vring.desc)) {
566 if (in_bytes) {
567 *in_bytes = 0;
568 }
569 if (out_bytes) {
570 *out_bytes = 0;
571 }
572 return;
573 }
574
575 rcu_read_lock();
576 idx = vq->last_avail_idx;
577 total_bufs = in_total = out_total = 0;
578
579 max = vq->vring.num;
580 caches = atomic_rcu_read(&vq->vring.caches);
581 if (caches->desc.len < max * sizeof(VRingDesc)) {
582 virtio_error(vdev, "Cannot map descriptor ring");
583 goto err;
584 }
585
586 while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
587 MemoryRegionCache *desc_cache = &caches->desc;
588 unsigned int num_bufs;
589 VRingDesc desc;
590 unsigned int i;
591
592 num_bufs = total_bufs;
593
594 if (!virtqueue_get_head(vq, idx++, &i)) {
595 goto err;
596 }
597
598 vring_desc_read(vdev, &desc, desc_cache, i);
599
600 if (desc.flags & VRING_DESC_F_INDIRECT) {
601 if (desc.len % sizeof(VRingDesc)) {
602 virtio_error(vdev, "Invalid size for indirect buffer table");
603 goto err;
604 }
605
606 /* If we've got too many, that implies a descriptor loop. */
607 if (num_bufs >= max) {
608 virtio_error(vdev, "Looped descriptor");
609 goto err;
610 }
611
612 /* loop over the indirect descriptor table */
613 len = address_space_cache_init(&indirect_desc_cache,
614 vdev->dma_as,
615 desc.addr, desc.len, false);
616 desc_cache = &indirect_desc_cache;
617 if (len < desc.len) {
618 virtio_error(vdev, "Cannot map indirect buffer");
619 goto err;
620 }
621
622 max = desc.len / sizeof(VRingDesc);
623 num_bufs = i = 0;
624 vring_desc_read(vdev, &desc, desc_cache, i);
625 }
626
627 do {
628 /* If we've got too many, that implies a descriptor loop. */
629 if (++num_bufs > max) {
630 virtio_error(vdev, "Looped descriptor");
631 goto err;
632 }
633
634 if (desc.flags & VRING_DESC_F_WRITE) {
635 in_total += desc.len;
636 } else {
637 out_total += desc.len;
638 }
639 if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
640 goto done;
641 }
642
643 rc = virtqueue_read_next_desc(vdev, &desc, desc_cache, max, &i);
644 } while (rc == VIRTQUEUE_READ_DESC_MORE);
645
646 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
647 goto err;
648 }
649
650 if (desc_cache == &indirect_desc_cache) {
651 address_space_cache_destroy(&indirect_desc_cache);
652 total_bufs++;
653 } else {
654 total_bufs = num_bufs;
655 }
656 }
657
658 if (rc < 0) {
659 goto err;
660 }
661
662 done:
663 address_space_cache_destroy(&indirect_desc_cache);
664 if (in_bytes) {
665 *in_bytes = in_total;
666 }
667 if (out_bytes) {
668 *out_bytes = out_total;
669 }
670 rcu_read_unlock();
671 return;
672
673 err:
674 in_total = out_total = 0;
675 goto done;
676 }
677
678 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
679 unsigned int out_bytes)
680 {
681 unsigned int in_total, out_total;
682
683 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
684 return in_bytes <= in_total && out_bytes <= out_total;
685 }
686
687 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
688 hwaddr *addr, struct iovec *iov,
689 unsigned int max_num_sg, bool is_write,
690 hwaddr pa, size_t sz)
691 {
692 bool ok = false;
693 unsigned num_sg = *p_num_sg;
694 assert(num_sg <= max_num_sg);
695
696 if (!sz) {
697 virtio_error(vdev, "virtio: zero sized buffers are not allowed");
698 goto out;
699 }
700
701 while (sz) {
702 hwaddr len = sz;
703
704 if (num_sg == max_num_sg) {
705 virtio_error(vdev, "virtio: too many write descriptors in "
706 "indirect table");
707 goto out;
708 }
709
710 iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
711 is_write ?
712 DMA_DIRECTION_FROM_DEVICE :
713 DMA_DIRECTION_TO_DEVICE);
714 if (!iov[num_sg].iov_base) {
715 virtio_error(vdev, "virtio: bogus descriptor or out of resources");
716 goto out;
717 }
718
719 iov[num_sg].iov_len = len;
720 addr[num_sg] = pa;
721
722 sz -= len;
723 pa += len;
724 num_sg++;
725 }
726 ok = true;
727
728 out:
729 *p_num_sg = num_sg;
730 return ok;
731 }
732
733 /* Only used by error code paths before we have a VirtQueueElement (therefore
734 * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to
735 * yet.
736 */
737 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
738 struct iovec *iov)
739 {
740 unsigned int i;
741
742 for (i = 0; i < out_num + in_num; i++) {
743 int is_write = i >= out_num;
744
745 cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
746 iov++;
747 }
748 }
749
750 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
751 hwaddr *addr, unsigned int *num_sg,
752 int is_write)
753 {
754 unsigned int i;
755 hwaddr len;
756
757 for (i = 0; i < *num_sg; i++) {
758 len = sg[i].iov_len;
759 sg[i].iov_base = dma_memory_map(vdev->dma_as,
760 addr[i], &len, is_write ?
761 DMA_DIRECTION_FROM_DEVICE :
762 DMA_DIRECTION_TO_DEVICE);
763 if (!sg[i].iov_base) {
764 error_report("virtio: error trying to map MMIO memory");
765 exit(1);
766 }
767 if (len != sg[i].iov_len) {
768 error_report("virtio: unexpected memory split");
769 exit(1);
770 }
771 }
772 }
773
774 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
775 {
776 virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num, 1);
777 virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num, 0);
778 }
779
780 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
781 {
782 VirtQueueElement *elem;
783 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
784 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
785 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
786 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
787 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
788 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
789
790 assert(sz >= sizeof(VirtQueueElement));
791 elem = g_malloc(out_sg_end);
792 elem->out_num = out_num;
793 elem->in_num = in_num;
794 elem->in_addr = (void *)elem + in_addr_ofs;
795 elem->out_addr = (void *)elem + out_addr_ofs;
796 elem->in_sg = (void *)elem + in_sg_ofs;
797 elem->out_sg = (void *)elem + out_sg_ofs;
798 return elem;
799 }
800
801 void *virtqueue_pop(VirtQueue *vq, size_t sz)
802 {
803 unsigned int i, head, max;
804 VRingMemoryRegionCaches *caches;
805 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
806 MemoryRegionCache *desc_cache;
807 int64_t len;
808 VirtIODevice *vdev = vq->vdev;
809 VirtQueueElement *elem = NULL;
810 unsigned out_num, in_num;
811 hwaddr addr[VIRTQUEUE_MAX_SIZE];
812 struct iovec iov[VIRTQUEUE_MAX_SIZE];
813 VRingDesc desc;
814 int rc;
815
816 if (unlikely(vdev->broken)) {
817 return NULL;
818 }
819 rcu_read_lock();
820 if (virtio_queue_empty_rcu(vq)) {
821 goto done;
822 }
823 /* Needed after virtio_queue_empty(), see comment in
824 * virtqueue_num_heads(). */
825 smp_rmb();
826
827 /* When we start there are none of either input nor output. */
828 out_num = in_num = 0;
829
830 max = vq->vring.num;
831
832 if (vq->inuse >= vq->vring.num) {
833 virtio_error(vdev, "Virtqueue size exceeded");
834 goto done;
835 }
836
837 if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
838 goto done;
839 }
840
841 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
842 vring_set_avail_event(vq, vq->last_avail_idx);
843 }
844
845 i = head;
846
847 caches = atomic_rcu_read(&vq->vring.caches);
848 if (caches->desc.len < max * sizeof(VRingDesc)) {
849 virtio_error(vdev, "Cannot map descriptor ring");
850 goto done;
851 }
852
853 desc_cache = &caches->desc;
854 vring_desc_read(vdev, &desc, desc_cache, i);
855 if (desc.flags & VRING_DESC_F_INDIRECT) {
856 if (desc.len % sizeof(VRingDesc)) {
857 virtio_error(vdev, "Invalid size for indirect buffer table");
858 goto done;
859 }
860
861 /* loop over the indirect descriptor table */
862 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
863 desc.addr, desc.len, false);
864 desc_cache = &indirect_desc_cache;
865 if (len < desc.len) {
866 virtio_error(vdev, "Cannot map indirect buffer");
867 goto done;
868 }
869
870 max = desc.len / sizeof(VRingDesc);
871 i = 0;
872 vring_desc_read(vdev, &desc, desc_cache, i);
873 }
874
875 /* Collect all the descriptors */
876 do {
877 bool map_ok;
878
879 if (desc.flags & VRING_DESC_F_WRITE) {
880 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
881 iov + out_num,
882 VIRTQUEUE_MAX_SIZE - out_num, true,
883 desc.addr, desc.len);
884 } else {
885 if (in_num) {
886 virtio_error(vdev, "Incorrect order for descriptors");
887 goto err_undo_map;
888 }
889 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
890 VIRTQUEUE_MAX_SIZE, false,
891 desc.addr, desc.len);
892 }
893 if (!map_ok) {
894 goto err_undo_map;
895 }
896
897 /* If we've got too many, that implies a descriptor loop. */
898 if ((in_num + out_num) > max) {
899 virtio_error(vdev, "Looped descriptor");
900 goto err_undo_map;
901 }
902
903 rc = virtqueue_read_next_desc(vdev, &desc, desc_cache, max, &i);
904 } while (rc == VIRTQUEUE_READ_DESC_MORE);
905
906 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
907 goto err_undo_map;
908 }
909
910 /* Now copy what we have collected and mapped */
911 elem = virtqueue_alloc_element(sz, out_num, in_num);
912 elem->index = head;
913 for (i = 0; i < out_num; i++) {
914 elem->out_addr[i] = addr[i];
915 elem->out_sg[i] = iov[i];
916 }
917 for (i = 0; i < in_num; i++) {
918 elem->in_addr[i] = addr[out_num + i];
919 elem->in_sg[i] = iov[out_num + i];
920 }
921
922 vq->inuse++;
923
924 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
925 done:
926 address_space_cache_destroy(&indirect_desc_cache);
927 rcu_read_unlock();
928
929 return elem;
930
931 err_undo_map:
932 virtqueue_undo_map_desc(out_num, in_num, iov);
933 goto done;
934 }
935
936 /* virtqueue_drop_all:
937 * @vq: The #VirtQueue
938 * Drops all queued buffers and indicates them to the guest
939 * as if they are done. Useful when buffers can not be
940 * processed but must be returned to the guest.
941 */
942 unsigned int virtqueue_drop_all(VirtQueue *vq)
943 {
944 unsigned int dropped = 0;
945 VirtQueueElement elem = {};
946 VirtIODevice *vdev = vq->vdev;
947 bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
948
949 if (unlikely(vdev->broken)) {
950 return 0;
951 }
952
953 while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
954 /* works similar to virtqueue_pop but does not map buffers
955 * and does not allocate any memory */
956 smp_rmb();
957 if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
958 break;
959 }
960 vq->inuse++;
961 vq->last_avail_idx++;
962 if (fEventIdx) {
963 vring_set_avail_event(vq, vq->last_avail_idx);
964 }
965 /* immediately push the element, nothing to unmap
966 * as both in_num and out_num are set to 0 */
967 virtqueue_push(vq, &elem, 0);
968 dropped++;
969 }
970
971 return dropped;
972 }
973
974 /* Reading and writing a structure directly to QEMUFile is *awful*, but
975 * it is what QEMU has always done by mistake. We can change it sooner
976 * or later by bumping the version number of the affected vm states.
977 * In the meanwhile, since the in-memory layout of VirtQueueElement
978 * has changed, we need to marshal to and from the layout that was
979 * used before the change.
980 */
981 typedef struct VirtQueueElementOld {
982 unsigned int index;
983 unsigned int out_num;
984 unsigned int in_num;
985 hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
986 hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
987 struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
988 struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
989 } VirtQueueElementOld;
990
991 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
992 {
993 VirtQueueElement *elem;
994 VirtQueueElementOld data;
995 int i;
996
997 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
998
999 /* TODO: teach all callers that this can fail, and return failure instead
1000 * of asserting here.
1001 * When we do, we might be able to re-enable NDEBUG below.
1002 */
1003 #ifdef NDEBUG
1004 #error building with NDEBUG is not supported
1005 #endif
1006 assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1007 assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1008
1009 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1010 elem->index = data.index;
1011
1012 for (i = 0; i < elem->in_num; i++) {
1013 elem->in_addr[i] = data.in_addr[i];
1014 }
1015
1016 for (i = 0; i < elem->out_num; i++) {
1017 elem->out_addr[i] = data.out_addr[i];
1018 }
1019
1020 for (i = 0; i < elem->in_num; i++) {
1021 /* Base is overwritten by virtqueue_map. */
1022 elem->in_sg[i].iov_base = 0;
1023 elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1024 }
1025
1026 for (i = 0; i < elem->out_num; i++) {
1027 /* Base is overwritten by virtqueue_map. */
1028 elem->out_sg[i].iov_base = 0;
1029 elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1030 }
1031
1032 virtqueue_map(vdev, elem);
1033 return elem;
1034 }
1035
1036 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
1037 {
1038 VirtQueueElementOld data;
1039 int i;
1040
1041 memset(&data, 0, sizeof(data));
1042 data.index = elem->index;
1043 data.in_num = elem->in_num;
1044 data.out_num = elem->out_num;
1045
1046 for (i = 0; i < elem->in_num; i++) {
1047 data.in_addr[i] = elem->in_addr[i];
1048 }
1049
1050 for (i = 0; i < elem->out_num; i++) {
1051 data.out_addr[i] = elem->out_addr[i];
1052 }
1053
1054 for (i = 0; i < elem->in_num; i++) {
1055 /* Base is overwritten by virtqueue_map when loading. Do not
1056 * save it, as it would leak the QEMU address space layout. */
1057 data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1058 }
1059
1060 for (i = 0; i < elem->out_num; i++) {
1061 /* Do not save iov_base as above. */
1062 data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1063 }
1064 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1065 }
1066
1067 /* virtio device */
1068 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1069 {
1070 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1071 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1072
1073 if (unlikely(vdev->broken)) {
1074 return;
1075 }
1076
1077 if (k->notify) {
1078 k->notify(qbus->parent, vector);
1079 }
1080 }
1081
1082 void virtio_update_irq(VirtIODevice *vdev)
1083 {
1084 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1085 }
1086
1087 static int virtio_validate_features(VirtIODevice *vdev)
1088 {
1089 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1090
1091 if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1092 !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1093 return -EFAULT;
1094 }
1095
1096 if (k->validate_features) {
1097 return k->validate_features(vdev);
1098 } else {
1099 return 0;
1100 }
1101 }
1102
1103 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1104 {
1105 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1106 trace_virtio_set_status(vdev, val);
1107
1108 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1109 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1110 val & VIRTIO_CONFIG_S_FEATURES_OK) {
1111 int ret = virtio_validate_features(vdev);
1112
1113 if (ret) {
1114 return ret;
1115 }
1116 }
1117 }
1118 if (k->set_status) {
1119 k->set_status(vdev, val);
1120 }
1121 vdev->status = val;
1122 return 0;
1123 }
1124
1125 bool target_words_bigendian(void);
1126 static enum virtio_device_endian virtio_default_endian(void)
1127 {
1128 if (target_words_bigendian()) {
1129 return VIRTIO_DEVICE_ENDIAN_BIG;
1130 } else {
1131 return VIRTIO_DEVICE_ENDIAN_LITTLE;
1132 }
1133 }
1134
1135 static enum virtio_device_endian virtio_current_cpu_endian(void)
1136 {
1137 CPUClass *cc = CPU_GET_CLASS(current_cpu);
1138
1139 if (cc->virtio_is_big_endian(current_cpu)) {
1140 return VIRTIO_DEVICE_ENDIAN_BIG;
1141 } else {
1142 return VIRTIO_DEVICE_ENDIAN_LITTLE;
1143 }
1144 }
1145
1146 void virtio_reset(void *opaque)
1147 {
1148 VirtIODevice *vdev = opaque;
1149 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1150 int i;
1151
1152 virtio_set_status(vdev, 0);
1153 if (current_cpu) {
1154 /* Guest initiated reset */
1155 vdev->device_endian = virtio_current_cpu_endian();
1156 } else {
1157 /* System reset */
1158 vdev->device_endian = virtio_default_endian();
1159 }
1160
1161 if (k->reset) {
1162 k->reset(vdev);
1163 }
1164
1165 vdev->broken = false;
1166 vdev->guest_features = 0;
1167 vdev->queue_sel = 0;
1168 vdev->status = 0;
1169 atomic_set(&vdev->isr, 0);
1170 vdev->config_vector = VIRTIO_NO_VECTOR;
1171 virtio_notify_vector(vdev, vdev->config_vector);
1172
1173 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1174 vdev->vq[i].vring.desc = 0;
1175 vdev->vq[i].vring.avail = 0;
1176 vdev->vq[i].vring.used = 0;
1177 vdev->vq[i].last_avail_idx = 0;
1178 vdev->vq[i].shadow_avail_idx = 0;
1179 vdev->vq[i].used_idx = 0;
1180 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
1181 vdev->vq[i].signalled_used = 0;
1182 vdev->vq[i].signalled_used_valid = false;
1183 vdev->vq[i].notification = true;
1184 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
1185 vdev->vq[i].inuse = 0;
1186 }
1187 }
1188
1189 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
1190 {
1191 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1192 uint8_t val;
1193
1194 if (addr + sizeof(val) > vdev->config_len) {
1195 return (uint32_t)-1;
1196 }
1197
1198 k->get_config(vdev, vdev->config);
1199
1200 val = ldub_p(vdev->config + addr);
1201 return val;
1202 }
1203
1204 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
1205 {
1206 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1207 uint16_t val;
1208
1209 if (addr + sizeof(val) > vdev->config_len) {
1210 return (uint32_t)-1;
1211 }
1212
1213 k->get_config(vdev, vdev->config);
1214
1215 val = lduw_p(vdev->config + addr);
1216 return val;
1217 }
1218
1219 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
1220 {
1221 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1222 uint32_t val;
1223
1224 if (addr + sizeof(val) > vdev->config_len) {
1225 return (uint32_t)-1;
1226 }
1227
1228 k->get_config(vdev, vdev->config);
1229
1230 val = ldl_p(vdev->config + addr);
1231 return val;
1232 }
1233
1234 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1235 {
1236 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1237 uint8_t val = data;
1238
1239 if (addr + sizeof(val) > vdev->config_len) {
1240 return;
1241 }
1242
1243 stb_p(vdev->config + addr, val);
1244
1245 if (k->set_config) {
1246 k->set_config(vdev, vdev->config);
1247 }
1248 }
1249
1250 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1251 {
1252 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1253 uint16_t val = data;
1254
1255 if (addr + sizeof(val) > vdev->config_len) {
1256 return;
1257 }
1258
1259 stw_p(vdev->config + addr, val);
1260
1261 if (k->set_config) {
1262 k->set_config(vdev, vdev->config);
1263 }
1264 }
1265
1266 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1267 {
1268 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1269 uint32_t val = data;
1270
1271 if (addr + sizeof(val) > vdev->config_len) {
1272 return;
1273 }
1274
1275 stl_p(vdev->config + addr, val);
1276
1277 if (k->set_config) {
1278 k->set_config(vdev, vdev->config);
1279 }
1280 }
1281
1282 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
1283 {
1284 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1285 uint8_t val;
1286
1287 if (addr + sizeof(val) > vdev->config_len) {
1288 return (uint32_t)-1;
1289 }
1290
1291 k->get_config(vdev, vdev->config);
1292
1293 val = ldub_p(vdev->config + addr);
1294 return val;
1295 }
1296
1297 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
1298 {
1299 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1300 uint16_t val;
1301
1302 if (addr + sizeof(val) > vdev->config_len) {
1303 return (uint32_t)-1;
1304 }
1305
1306 k->get_config(vdev, vdev->config);
1307
1308 val = lduw_le_p(vdev->config + addr);
1309 return val;
1310 }
1311
1312 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
1313 {
1314 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1315 uint32_t val;
1316
1317 if (addr + sizeof(val) > vdev->config_len) {
1318 return (uint32_t)-1;
1319 }
1320
1321 k->get_config(vdev, vdev->config);
1322
1323 val = ldl_le_p(vdev->config + addr);
1324 return val;
1325 }
1326
1327 void virtio_config_modern_writeb(VirtIODevice *vdev,
1328 uint32_t addr, uint32_t data)
1329 {
1330 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1331 uint8_t val = data;
1332
1333 if (addr + sizeof(val) > vdev->config_len) {
1334 return;
1335 }
1336
1337 stb_p(vdev->config + addr, val);
1338
1339 if (k->set_config) {
1340 k->set_config(vdev, vdev->config);
1341 }
1342 }
1343
1344 void virtio_config_modern_writew(VirtIODevice *vdev,
1345 uint32_t addr, uint32_t data)
1346 {
1347 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1348 uint16_t val = data;
1349
1350 if (addr + sizeof(val) > vdev->config_len) {
1351 return;
1352 }
1353
1354 stw_le_p(vdev->config + addr, val);
1355
1356 if (k->set_config) {
1357 k->set_config(vdev, vdev->config);
1358 }
1359 }
1360
1361 void virtio_config_modern_writel(VirtIODevice *vdev,
1362 uint32_t addr, uint32_t data)
1363 {
1364 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1365 uint32_t val = data;
1366
1367 if (addr + sizeof(val) > vdev->config_len) {
1368 return;
1369 }
1370
1371 stl_le_p(vdev->config + addr, val);
1372
1373 if (k->set_config) {
1374 k->set_config(vdev, vdev->config);
1375 }
1376 }
1377
1378 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
1379 {
1380 vdev->vq[n].vring.desc = addr;
1381 virtio_queue_update_rings(vdev, n);
1382 }
1383
1384 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
1385 {
1386 return vdev->vq[n].vring.desc;
1387 }
1388
1389 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
1390 hwaddr avail, hwaddr used)
1391 {
1392 vdev->vq[n].vring.desc = desc;
1393 vdev->vq[n].vring.avail = avail;
1394 vdev->vq[n].vring.used = used;
1395 virtio_init_region_cache(vdev, n);
1396 }
1397
1398 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
1399 {
1400 /* Don't allow guest to flip queue between existent and
1401 * nonexistent states, or to set it to an invalid size.
1402 */
1403 if (!!num != !!vdev->vq[n].vring.num ||
1404 num > VIRTQUEUE_MAX_SIZE ||
1405 num < 0) {
1406 return;
1407 }
1408 vdev->vq[n].vring.num = num;
1409 }
1410
1411 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
1412 {
1413 return QLIST_FIRST(&vdev->vector_queues[vector]);
1414 }
1415
1416 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
1417 {
1418 return QLIST_NEXT(vq, node);
1419 }
1420
1421 int virtio_queue_get_num(VirtIODevice *vdev, int n)
1422 {
1423 return vdev->vq[n].vring.num;
1424 }
1425
1426 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
1427 {
1428 return vdev->vq[n].vring.num_default;
1429 }
1430
1431 int virtio_get_num_queues(VirtIODevice *vdev)
1432 {
1433 int i;
1434
1435 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1436 if (!virtio_queue_get_num(vdev, i)) {
1437 break;
1438 }
1439 }
1440
1441 return i;
1442 }
1443
1444 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
1445 {
1446 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1447 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1448
1449 /* virtio-1 compliant devices cannot change the alignment */
1450 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1451 error_report("tried to modify queue alignment for virtio-1 device");
1452 return;
1453 }
1454 /* Check that the transport told us it was going to do this
1455 * (so a buggy transport will immediately assert rather than
1456 * silently failing to migrate this state)
1457 */
1458 assert(k->has_variable_vring_alignment);
1459
1460 vdev->vq[n].vring.align = align;
1461 virtio_queue_update_rings(vdev, n);
1462 }
1463
1464 static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
1465 {
1466 if (vq->vring.desc && vq->handle_aio_output) {
1467 VirtIODevice *vdev = vq->vdev;
1468
1469 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1470 return vq->handle_aio_output(vdev, vq);
1471 }
1472
1473 return false;
1474 }
1475
1476 static void virtio_queue_notify_vq(VirtQueue *vq)
1477 {
1478 if (vq->vring.desc && vq->handle_output) {
1479 VirtIODevice *vdev = vq->vdev;
1480
1481 if (unlikely(vdev->broken)) {
1482 return;
1483 }
1484
1485 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1486 vq->handle_output(vdev, vq);
1487 }
1488 }
1489
1490 void virtio_queue_notify(VirtIODevice *vdev, int n)
1491 {
1492 virtio_queue_notify_vq(&vdev->vq[n]);
1493 }
1494
1495 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
1496 {
1497 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1498 VIRTIO_NO_VECTOR;
1499 }
1500
1501 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
1502 {
1503 VirtQueue *vq = &vdev->vq[n];
1504
1505 if (n < VIRTIO_QUEUE_MAX) {
1506 if (vdev->vector_queues &&
1507 vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
1508 QLIST_REMOVE(vq, node);
1509 }
1510 vdev->vq[n].vector = vector;
1511 if (vdev->vector_queues &&
1512 vector != VIRTIO_NO_VECTOR) {
1513 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
1514 }
1515 }
1516 }
1517
1518 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
1519 VirtIOHandleOutput handle_output)
1520 {
1521 int i;
1522
1523 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1524 if (vdev->vq[i].vring.num == 0)
1525 break;
1526 }
1527
1528 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
1529 abort();
1530
1531 vdev->vq[i].vring.num = queue_size;
1532 vdev->vq[i].vring.num_default = queue_size;
1533 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
1534 vdev->vq[i].handle_output = handle_output;
1535 vdev->vq[i].handle_aio_output = NULL;
1536
1537 return &vdev->vq[i];
1538 }
1539
1540 void virtio_del_queue(VirtIODevice *vdev, int n)
1541 {
1542 if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1543 abort();
1544 }
1545
1546 vdev->vq[n].vring.num = 0;
1547 vdev->vq[n].vring.num_default = 0;
1548 }
1549
1550 static void virtio_set_isr(VirtIODevice *vdev, int value)
1551 {
1552 uint8_t old = atomic_read(&vdev->isr);
1553
1554 /* Do not write ISR if it does not change, so that its cacheline remains
1555 * shared in the common case where the guest does not read it.
1556 */
1557 if ((old & value) != value) {
1558 atomic_or(&vdev->isr, value);
1559 }
1560 }
1561
1562 /* Called within rcu_read_lock(). */
1563 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
1564 {
1565 uint16_t old, new;
1566 bool v;
1567 /* We need to expose used array entries before checking used event. */
1568 smp_mb();
1569 /* Always notify when queue is empty (when feature acknowledge) */
1570 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1571 !vq->inuse && virtio_queue_empty(vq)) {
1572 return true;
1573 }
1574
1575 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1576 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
1577 }
1578
1579 v = vq->signalled_used_valid;
1580 vq->signalled_used_valid = true;
1581 old = vq->signalled_used;
1582 new = vq->signalled_used = vq->used_idx;
1583 return !v || vring_need_event(vring_get_used_event(vq), new, old);
1584 }
1585
1586 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
1587 {
1588 bool should_notify;
1589 rcu_read_lock();
1590 should_notify = virtio_should_notify(vdev, vq);
1591 rcu_read_unlock();
1592
1593 if (!should_notify) {
1594 return;
1595 }
1596
1597 trace_virtio_notify_irqfd(vdev, vq);
1598
1599 /*
1600 * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
1601 * windows drivers included in virtio-win 1.8.0 (circa 2015) are
1602 * incorrectly polling this bit during crashdump and hibernation
1603 * in MSI mode, causing a hang if this bit is never updated.
1604 * Recent releases of Windows do not really shut down, but rather
1605 * log out and hibernate to make the next startup faster. Hence,
1606 * this manifested as a more serious hang during shutdown with
1607 *
1608 * Next driver release from 2016 fixed this problem, so working around it
1609 * is not a must, but it's easy to do so let's do it here.
1610 *
1611 * Note: it's safe to update ISR from any thread as it was switched
1612 * to an atomic operation.
1613 */
1614 virtio_set_isr(vq->vdev, 0x1);
1615 event_notifier_set(&vq->guest_notifier);
1616 }
1617
1618 static void virtio_irq(VirtQueue *vq)
1619 {
1620 virtio_set_isr(vq->vdev, 0x1);
1621 virtio_notify_vector(vq->vdev, vq->vector);
1622 }
1623
1624 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
1625 {
1626 bool should_notify;
1627 rcu_read_lock();
1628 should_notify = virtio_should_notify(vdev, vq);
1629 rcu_read_unlock();
1630
1631 if (!should_notify) {
1632 return;
1633 }
1634
1635 trace_virtio_notify(vdev, vq);
1636 virtio_irq(vq);
1637 }
1638
1639 void virtio_notify_config(VirtIODevice *vdev)
1640 {
1641 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1642 return;
1643
1644 virtio_set_isr(vdev, 0x3);
1645 vdev->generation++;
1646 virtio_notify_vector(vdev, vdev->config_vector);
1647 }
1648
1649 static bool virtio_device_endian_needed(void *opaque)
1650 {
1651 VirtIODevice *vdev = opaque;
1652
1653 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1654 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1655 return vdev->device_endian != virtio_default_endian();
1656 }
1657 /* Devices conforming to VIRTIO 1.0 or later are always LE. */
1658 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1659 }
1660
1661 static bool virtio_64bit_features_needed(void *opaque)
1662 {
1663 VirtIODevice *vdev = opaque;
1664
1665 return (vdev->host_features >> 32) != 0;
1666 }
1667
1668 static bool virtio_virtqueue_needed(void *opaque)
1669 {
1670 VirtIODevice *vdev = opaque;
1671
1672 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
1673 }
1674
1675 static bool virtio_ringsize_needed(void *opaque)
1676 {
1677 VirtIODevice *vdev = opaque;
1678 int i;
1679
1680 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1681 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
1682 return true;
1683 }
1684 }
1685 return false;
1686 }
1687
1688 static bool virtio_extra_state_needed(void *opaque)
1689 {
1690 VirtIODevice *vdev = opaque;
1691 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1692 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1693
1694 return k->has_extra_state &&
1695 k->has_extra_state(qbus->parent);
1696 }
1697
1698 static bool virtio_broken_needed(void *opaque)
1699 {
1700 VirtIODevice *vdev = opaque;
1701
1702 return vdev->broken;
1703 }
1704
1705 static const VMStateDescription vmstate_virtqueue = {
1706 .name = "virtqueue_state",
1707 .version_id = 1,
1708 .minimum_version_id = 1,
1709 .fields = (VMStateField[]) {
1710 VMSTATE_UINT64(vring.avail, struct VirtQueue),
1711 VMSTATE_UINT64(vring.used, struct VirtQueue),
1712 VMSTATE_END_OF_LIST()
1713 }
1714 };
1715
1716 static const VMStateDescription vmstate_virtio_virtqueues = {
1717 .name = "virtio/virtqueues",
1718 .version_id = 1,
1719 .minimum_version_id = 1,
1720 .needed = &virtio_virtqueue_needed,
1721 .fields = (VMStateField[]) {
1722 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1723 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
1724 VMSTATE_END_OF_LIST()
1725 }
1726 };
1727
1728 static const VMStateDescription vmstate_ringsize = {
1729 .name = "ringsize_state",
1730 .version_id = 1,
1731 .minimum_version_id = 1,
1732 .fields = (VMStateField[]) {
1733 VMSTATE_UINT32(vring.num_default, struct VirtQueue),
1734 VMSTATE_END_OF_LIST()
1735 }
1736 };
1737
1738 static const VMStateDescription vmstate_virtio_ringsize = {
1739 .name = "virtio/ringsize",
1740 .version_id = 1,
1741 .minimum_version_id = 1,
1742 .needed = &virtio_ringsize_needed,
1743 .fields = (VMStateField[]) {
1744 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1745 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
1746 VMSTATE_END_OF_LIST()
1747 }
1748 };
1749
1750 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
1751 VMStateField *field)
1752 {
1753 VirtIODevice *vdev = pv;
1754 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1755 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1756
1757 if (!k->load_extra_state) {
1758 return -1;
1759 } else {
1760 return k->load_extra_state(qbus->parent, f);
1761 }
1762 }
1763
1764 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
1765 VMStateField *field, QJSON *vmdesc)
1766 {
1767 VirtIODevice *vdev = pv;
1768 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1769 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1770
1771 k->save_extra_state(qbus->parent, f);
1772 return 0;
1773 }
1774
1775 static const VMStateInfo vmstate_info_extra_state = {
1776 .name = "virtqueue_extra_state",
1777 .get = get_extra_state,
1778 .put = put_extra_state,
1779 };
1780
1781 static const VMStateDescription vmstate_virtio_extra_state = {
1782 .name = "virtio/extra_state",
1783 .version_id = 1,
1784 .minimum_version_id = 1,
1785 .needed = &virtio_extra_state_needed,
1786 .fields = (VMStateField[]) {
1787 {
1788 .name = "extra_state",
1789 .version_id = 0,
1790 .field_exists = NULL,
1791 .size = 0,
1792 .info = &vmstate_info_extra_state,
1793 .flags = VMS_SINGLE,
1794 .offset = 0,
1795 },
1796 VMSTATE_END_OF_LIST()
1797 }
1798 };
1799
1800 static const VMStateDescription vmstate_virtio_device_endian = {
1801 .name = "virtio/device_endian",
1802 .version_id = 1,
1803 .minimum_version_id = 1,
1804 .needed = &virtio_device_endian_needed,
1805 .fields = (VMStateField[]) {
1806 VMSTATE_UINT8(device_endian, VirtIODevice),
1807 VMSTATE_END_OF_LIST()
1808 }
1809 };
1810
1811 static const VMStateDescription vmstate_virtio_64bit_features = {
1812 .name = "virtio/64bit_features",
1813 .version_id = 1,
1814 .minimum_version_id = 1,
1815 .needed = &virtio_64bit_features_needed,
1816 .fields = (VMStateField[]) {
1817 VMSTATE_UINT64(guest_features, VirtIODevice),
1818 VMSTATE_END_OF_LIST()
1819 }
1820 };
1821
1822 static const VMStateDescription vmstate_virtio_broken = {
1823 .name = "virtio/broken",
1824 .version_id = 1,
1825 .minimum_version_id = 1,
1826 .needed = &virtio_broken_needed,
1827 .fields = (VMStateField[]) {
1828 VMSTATE_BOOL(broken, VirtIODevice),
1829 VMSTATE_END_OF_LIST()
1830 }
1831 };
1832
1833 static const VMStateDescription vmstate_virtio = {
1834 .name = "virtio",
1835 .version_id = 1,
1836 .minimum_version_id = 1,
1837 .minimum_version_id_old = 1,
1838 .fields = (VMStateField[]) {
1839 VMSTATE_END_OF_LIST()
1840 },
1841 .subsections = (const VMStateDescription*[]) {
1842 &vmstate_virtio_device_endian,
1843 &vmstate_virtio_64bit_features,
1844 &vmstate_virtio_virtqueues,
1845 &vmstate_virtio_ringsize,
1846 &vmstate_virtio_broken,
1847 &vmstate_virtio_extra_state,
1848 NULL
1849 }
1850 };
1851
1852 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
1853 {
1854 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1855 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1856 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1857 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
1858 int i;
1859
1860 if (k->save_config) {
1861 k->save_config(qbus->parent, f);
1862 }
1863
1864 qemu_put_8s(f, &vdev->status);
1865 qemu_put_8s(f, &vdev->isr);
1866 qemu_put_be16s(f, &vdev->queue_sel);
1867 qemu_put_be32s(f, &guest_features_lo);
1868 qemu_put_be32(f, vdev->config_len);
1869 qemu_put_buffer(f, vdev->config, vdev->config_len);
1870
1871 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1872 if (vdev->vq[i].vring.num == 0)
1873 break;
1874 }
1875
1876 qemu_put_be32(f, i);
1877
1878 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1879 if (vdev->vq[i].vring.num == 0)
1880 break;
1881
1882 qemu_put_be32(f, vdev->vq[i].vring.num);
1883 if (k->has_variable_vring_alignment) {
1884 qemu_put_be32(f, vdev->vq[i].vring.align);
1885 }
1886 /*
1887 * Save desc now, the rest of the ring addresses are saved in
1888 * subsections for VIRTIO-1 devices.
1889 */
1890 qemu_put_be64(f, vdev->vq[i].vring.desc);
1891 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
1892 if (k->save_queue) {
1893 k->save_queue(qbus->parent, i, f);
1894 }
1895 }
1896
1897 if (vdc->save != NULL) {
1898 vdc->save(vdev, f);
1899 }
1900
1901 if (vdc->vmsd) {
1902 vmstate_save_state(f, vdc->vmsd, vdev, NULL);
1903 }
1904
1905 /* Subsections */
1906 vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
1907 }
1908
1909 /* A wrapper for use as a VMState .put function */
1910 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
1911 VMStateField *field, QJSON *vmdesc)
1912 {
1913 virtio_save(VIRTIO_DEVICE(opaque), f);
1914
1915 return 0;
1916 }
1917
1918 /* A wrapper for use as a VMState .get function */
1919 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
1920 VMStateField *field)
1921 {
1922 VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
1923 DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
1924
1925 return virtio_load(vdev, f, dc->vmsd->version_id);
1926 }
1927
1928 const VMStateInfo virtio_vmstate_info = {
1929 .name = "virtio",
1930 .get = virtio_device_get,
1931 .put = virtio_device_put,
1932 };
1933
1934 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1935 {
1936 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1937 bool bad = (val & ~(vdev->host_features)) != 0;
1938
1939 val &= vdev->host_features;
1940 if (k->set_features) {
1941 k->set_features(vdev, val);
1942 }
1943 vdev->guest_features = val;
1944 return bad ? -1 : 0;
1945 }
1946
1947 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
1948 {
1949 /*
1950 * The driver must not attempt to set features after feature negotiation
1951 * has finished.
1952 */
1953 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
1954 return -EINVAL;
1955 }
1956 return virtio_set_features_nocheck(vdev, val);
1957 }
1958
1959 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
1960 {
1961 int i, ret;
1962 int32_t config_len;
1963 uint32_t num;
1964 uint32_t features;
1965 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1966 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1967 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1968
1969 /*
1970 * We poison the endianness to ensure it does not get used before
1971 * subsections have been loaded.
1972 */
1973 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
1974
1975 if (k->load_config) {
1976 ret = k->load_config(qbus->parent, f);
1977 if (ret)
1978 return ret;
1979 }
1980
1981 qemu_get_8s(f, &vdev->status);
1982 qemu_get_8s(f, &vdev->isr);
1983 qemu_get_be16s(f, &vdev->queue_sel);
1984 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1985 return -1;
1986 }
1987 qemu_get_be32s(f, &features);
1988
1989 /*
1990 * Temporarily set guest_features low bits - needed by
1991 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
1992 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
1993 *
1994 * Note: devices should always test host features in future - don't create
1995 * new dependencies like this.
1996 */
1997 vdev->guest_features = features;
1998
1999 config_len = qemu_get_be32(f);
2000
2001 /*
2002 * There are cases where the incoming config can be bigger or smaller
2003 * than what we have; so load what we have space for, and skip
2004 * any excess that's in the stream.
2005 */
2006 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
2007
2008 while (config_len > vdev->config_len) {
2009 qemu_get_byte(f);
2010 config_len--;
2011 }
2012
2013 num = qemu_get_be32(f);
2014
2015 if (num > VIRTIO_QUEUE_MAX) {
2016 error_report("Invalid number of virtqueues: 0x%x", num);
2017 return -1;
2018 }
2019
2020 for (i = 0; i < num; i++) {
2021 vdev->vq[i].vring.num = qemu_get_be32(f);
2022 if (k->has_variable_vring_alignment) {
2023 vdev->vq[i].vring.align = qemu_get_be32(f);
2024 }
2025 vdev->vq[i].vring.desc = qemu_get_be64(f);
2026 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
2027 vdev->vq[i].signalled_used_valid = false;
2028 vdev->vq[i].notification = true;
2029
2030 if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
2031 error_report("VQ %d address 0x0 "
2032 "inconsistent with Host index 0x%x",
2033 i, vdev->vq[i].last_avail_idx);
2034 return -1;
2035 }
2036 if (k->load_queue) {
2037 ret = k->load_queue(qbus->parent, i, f);
2038 if (ret)
2039 return ret;
2040 }
2041 }
2042
2043 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2044
2045 if (vdc->load != NULL) {
2046 ret = vdc->load(vdev, f, version_id);
2047 if (ret) {
2048 return ret;
2049 }
2050 }
2051
2052 if (vdc->vmsd) {
2053 ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
2054 if (ret) {
2055 return ret;
2056 }
2057 }
2058
2059 /* Subsections */
2060 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
2061 if (ret) {
2062 return ret;
2063 }
2064
2065 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
2066 vdev->device_endian = virtio_default_endian();
2067 }
2068
2069 if (virtio_64bit_features_needed(vdev)) {
2070 /*
2071 * Subsection load filled vdev->guest_features. Run them
2072 * through virtio_set_features to sanity-check them against
2073 * host_features.
2074 */
2075 uint64_t features64 = vdev->guest_features;
2076 if (virtio_set_features_nocheck(vdev, features64) < 0) {
2077 error_report("Features 0x%" PRIx64 " unsupported. "
2078 "Allowed features: 0x%" PRIx64,
2079 features64, vdev->host_features);
2080 return -1;
2081 }
2082 } else {
2083 if (virtio_set_features_nocheck(vdev, features) < 0) {
2084 error_report("Features 0x%x unsupported. "
2085 "Allowed features: 0x%" PRIx64,
2086 features, vdev->host_features);
2087 return -1;
2088 }
2089 }
2090
2091 rcu_read_lock();
2092 for (i = 0; i < num; i++) {
2093 if (vdev->vq[i].vring.desc) {
2094 uint16_t nheads;
2095
2096 /*
2097 * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
2098 * only the region cache needs to be set up. Legacy devices need
2099 * to calculate used and avail ring addresses based on the desc
2100 * address.
2101 */
2102 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2103 virtio_init_region_cache(vdev, i);
2104 } else {
2105 virtio_queue_update_rings(vdev, i);
2106 }
2107
2108 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
2109 /* Check it isn't doing strange things with descriptor numbers. */
2110 if (nheads > vdev->vq[i].vring.num) {
2111 error_report("VQ %d size 0x%x Guest index 0x%x "
2112 "inconsistent with Host index 0x%x: delta 0x%x",
2113 i, vdev->vq[i].vring.num,
2114 vring_avail_idx(&vdev->vq[i]),
2115 vdev->vq[i].last_avail_idx, nheads);
2116 return -1;
2117 }
2118 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
2119 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
2120
2121 /*
2122 * Some devices migrate VirtQueueElements that have been popped
2123 * from the avail ring but not yet returned to the used ring.
2124 * Since max ring size < UINT16_MAX it's safe to use modulo
2125 * UINT16_MAX + 1 subtraction.
2126 */
2127 vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
2128 vdev->vq[i].used_idx);
2129 if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
2130 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
2131 "used_idx 0x%x",
2132 i, vdev->vq[i].vring.num,
2133 vdev->vq[i].last_avail_idx,
2134 vdev->vq[i].used_idx);
2135 return -1;
2136 }
2137 }
2138 }
2139 rcu_read_unlock();
2140
2141 return 0;
2142 }
2143
2144 void virtio_cleanup(VirtIODevice *vdev)
2145 {
2146 qemu_del_vm_change_state_handler(vdev->vmstate);
2147 }
2148
2149 static void virtio_vmstate_change(void *opaque, int running, RunState state)
2150 {
2151 VirtIODevice *vdev = opaque;
2152 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2153 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2154 bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
2155 vdev->vm_running = running;
2156
2157 if (backend_run) {
2158 virtio_set_status(vdev, vdev->status);
2159 }
2160
2161 if (k->vmstate_change) {
2162 k->vmstate_change(qbus->parent, backend_run);
2163 }
2164
2165 if (!backend_run) {
2166 virtio_set_status(vdev, vdev->status);
2167 }
2168 }
2169
2170 void virtio_instance_init_common(Object *proxy_obj, void *data,
2171 size_t vdev_size, const char *vdev_name)
2172 {
2173 DeviceState *vdev = data;
2174
2175 object_initialize(vdev, vdev_size, vdev_name);
2176 object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
2177 object_unref(OBJECT(vdev));
2178 qdev_alias_all_properties(vdev, proxy_obj);
2179 }
2180
2181 void virtio_init(VirtIODevice *vdev, const char *name,
2182 uint16_t device_id, size_t config_size)
2183 {
2184 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2185 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2186 int i;
2187 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
2188
2189 if (nvectors) {
2190 vdev->vector_queues =
2191 g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
2192 }
2193
2194 vdev->device_id = device_id;
2195 vdev->status = 0;
2196 atomic_set(&vdev->isr, 0);
2197 vdev->queue_sel = 0;
2198 vdev->config_vector = VIRTIO_NO_VECTOR;
2199 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
2200 vdev->vm_running = runstate_is_running();
2201 vdev->broken = false;
2202 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2203 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
2204 vdev->vq[i].vdev = vdev;
2205 vdev->vq[i].queue_index = i;
2206 }
2207
2208 vdev->name = name;
2209 vdev->config_len = config_size;
2210 if (vdev->config_len) {
2211 vdev->config = g_malloc0(config_size);
2212 } else {
2213 vdev->config = NULL;
2214 }
2215 vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
2216 vdev);
2217 vdev->device_endian = virtio_default_endian();
2218 vdev->use_guest_notifier_mask = true;
2219 }
2220
2221 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
2222 {
2223 return vdev->vq[n].vring.desc;
2224 }
2225
2226 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
2227 {
2228 return vdev->vq[n].vring.avail;
2229 }
2230
2231 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
2232 {
2233 return vdev->vq[n].vring.used;
2234 }
2235
2236 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
2237 {
2238 return sizeof(VRingDesc) * vdev->vq[n].vring.num;
2239 }
2240
2241 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
2242 {
2243 return offsetof(VRingAvail, ring) +
2244 sizeof(uint16_t) * vdev->vq[n].vring.num;
2245 }
2246
2247 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
2248 {
2249 return offsetof(VRingUsed, ring) +
2250 sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
2251 }
2252
2253 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
2254 {
2255 return vdev->vq[n].last_avail_idx;
2256 }
2257
2258 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
2259 {
2260 vdev->vq[n].last_avail_idx = idx;
2261 vdev->vq[n].shadow_avail_idx = idx;
2262 }
2263
2264 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
2265 {
2266 rcu_read_lock();
2267 if (vdev->vq[n].vring.desc) {
2268 vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
2269 }
2270 rcu_read_unlock();
2271 }
2272
2273 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
2274 {
2275 vdev->vq[n].signalled_used_valid = false;
2276 }
2277
2278 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
2279 {
2280 return vdev->vq + n;
2281 }
2282
2283 uint16_t virtio_get_queue_index(VirtQueue *vq)
2284 {
2285 return vq->queue_index;
2286 }
2287
2288 static void virtio_queue_guest_notifier_read(EventNotifier *n)
2289 {
2290 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
2291 if (event_notifier_test_and_clear(n)) {
2292 virtio_irq(vq);
2293 }
2294 }
2295
2296 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
2297 bool with_irqfd)
2298 {
2299 if (assign && !with_irqfd) {
2300 event_notifier_set_handler(&vq->guest_notifier,
2301 virtio_queue_guest_notifier_read);
2302 } else {
2303 event_notifier_set_handler(&vq->guest_notifier, NULL);
2304 }
2305 if (!assign) {
2306 /* Test and clear notifier before closing it,
2307 * in case poll callback didn't have time to run. */
2308 virtio_queue_guest_notifier_read(&vq->guest_notifier);
2309 }
2310 }
2311
2312 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
2313 {
2314 return &vq->guest_notifier;
2315 }
2316
2317 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
2318 {
2319 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2320 if (event_notifier_test_and_clear(n)) {
2321 virtio_queue_notify_aio_vq(vq);
2322 }
2323 }
2324
2325 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
2326 {
2327 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2328
2329 virtio_queue_set_notification(vq, 0);
2330 }
2331
2332 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
2333 {
2334 EventNotifier *n = opaque;
2335 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2336 bool progress;
2337
2338 if (!vq->vring.desc || virtio_queue_empty(vq)) {
2339 return false;
2340 }
2341
2342 progress = virtio_queue_notify_aio_vq(vq);
2343
2344 /* In case the handler function re-enabled notifications */
2345 virtio_queue_set_notification(vq, 0);
2346 return progress;
2347 }
2348
2349 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
2350 {
2351 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2352
2353 /* Caller polls once more after this to catch requests that race with us */
2354 virtio_queue_set_notification(vq, 1);
2355 }
2356
2357 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
2358 VirtIOHandleAIOOutput handle_output)
2359 {
2360 if (handle_output) {
2361 vq->handle_aio_output = handle_output;
2362 aio_set_event_notifier(ctx, &vq->host_notifier, true,
2363 virtio_queue_host_notifier_aio_read,
2364 virtio_queue_host_notifier_aio_poll);
2365 aio_set_event_notifier_poll(ctx, &vq->host_notifier,
2366 virtio_queue_host_notifier_aio_poll_begin,
2367 virtio_queue_host_notifier_aio_poll_end);
2368 } else {
2369 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
2370 /* Test and clear notifier before after disabling event,
2371 * in case poll callback didn't have time to run. */
2372 virtio_queue_host_notifier_aio_read(&vq->host_notifier);
2373 vq->handle_aio_output = NULL;
2374 }
2375 }
2376
2377 void virtio_queue_host_notifier_read(EventNotifier *n)
2378 {
2379 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2380 if (event_notifier_test_and_clear(n)) {
2381 virtio_queue_notify_vq(vq);
2382 }
2383 }
2384
2385 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
2386 {
2387 return &vq->host_notifier;
2388 }
2389
2390 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
2391 {
2392 g_free(vdev->bus_name);
2393 vdev->bus_name = g_strdup(bus_name);
2394 }
2395
2396 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
2397 {
2398 va_list ap;
2399
2400 va_start(ap, fmt);
2401 error_vreport(fmt, ap);
2402 va_end(ap);
2403
2404 vdev->broken = true;
2405
2406 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2407 virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
2408 virtio_notify_config(vdev);
2409 }
2410 }
2411
2412 static void virtio_memory_listener_commit(MemoryListener *listener)
2413 {
2414 VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
2415 int i;
2416
2417 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2418 if (vdev->vq[i].vring.num == 0) {
2419 break;
2420 }
2421 virtio_init_region_cache(vdev, i);
2422 }
2423 }
2424
2425 static void virtio_device_realize(DeviceState *dev, Error **errp)
2426 {
2427 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2428 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2429 Error *err = NULL;
2430
2431 /* Devices should either use vmsd or the load/save methods */
2432 assert(!vdc->vmsd || !vdc->load);
2433
2434 if (vdc->realize != NULL) {
2435 vdc->realize(dev, &err);
2436 if (err != NULL) {
2437 error_propagate(errp, err);
2438 return;
2439 }
2440 }
2441
2442 virtio_bus_device_plugged(vdev, &err);
2443 if (err != NULL) {
2444 error_propagate(errp, err);
2445 return;
2446 }
2447
2448 vdev->listener.commit = virtio_memory_listener_commit;
2449 memory_listener_register(&vdev->listener, vdev->dma_as);
2450 }
2451
2452 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
2453 {
2454 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2455 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2456 Error *err = NULL;
2457
2458 virtio_bus_device_unplugged(vdev);
2459
2460 if (vdc->unrealize != NULL) {
2461 vdc->unrealize(dev, &err);
2462 if (err != NULL) {
2463 error_propagate(errp, err);
2464 return;
2465 }
2466 }
2467
2468 g_free(vdev->bus_name);
2469 vdev->bus_name = NULL;
2470 }
2471
2472 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
2473 {
2474 int i;
2475 if (!vdev->vq) {
2476 return;
2477 }
2478
2479 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2480 VRingMemoryRegionCaches *caches;
2481 if (vdev->vq[i].vring.num == 0) {
2482 break;
2483 }
2484 caches = atomic_read(&vdev->vq[i].vring.caches);
2485 atomic_set(&vdev->vq[i].vring.caches, NULL);
2486 virtio_free_region_cache(caches);
2487 }
2488 g_free(vdev->vq);
2489 }
2490
2491 static void virtio_device_instance_finalize(Object *obj)
2492 {
2493 VirtIODevice *vdev = VIRTIO_DEVICE(obj);
2494
2495 memory_listener_unregister(&vdev->listener);
2496 virtio_device_free_virtqueues(vdev);
2497
2498 g_free(vdev->config);
2499 g_free(vdev->vector_queues);
2500 }
2501
2502 static Property virtio_properties[] = {
2503 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
2504 DEFINE_PROP_END_OF_LIST(),
2505 };
2506
2507 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
2508 {
2509 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2510 int n, r, err;
2511
2512 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2513 VirtQueue *vq = &vdev->vq[n];
2514 if (!virtio_queue_get_num(vdev, n)) {
2515 continue;
2516 }
2517 r = virtio_bus_set_host_notifier(qbus, n, true);
2518 if (r < 0) {
2519 err = r;
2520 goto assign_error;
2521 }
2522 event_notifier_set_handler(&vq->host_notifier,
2523 virtio_queue_host_notifier_read);
2524 }
2525
2526 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2527 /* Kick right away to begin processing requests already in vring */
2528 VirtQueue *vq = &vdev->vq[n];
2529 if (!vq->vring.num) {
2530 continue;
2531 }
2532 event_notifier_set(&vq->host_notifier);
2533 }
2534 return 0;
2535
2536 assign_error:
2537 while (--n >= 0) {
2538 VirtQueue *vq = &vdev->vq[n];
2539 if (!virtio_queue_get_num(vdev, n)) {
2540 continue;
2541 }
2542
2543 event_notifier_set_handler(&vq->host_notifier, NULL);
2544 r = virtio_bus_set_host_notifier(qbus, n, false);
2545 assert(r >= 0);
2546 }
2547 return err;
2548 }
2549
2550 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
2551 {
2552 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2553 VirtioBusState *vbus = VIRTIO_BUS(qbus);
2554
2555 return virtio_bus_start_ioeventfd(vbus);
2556 }
2557
2558 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
2559 {
2560 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2561 int n, r;
2562
2563 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2564 VirtQueue *vq = &vdev->vq[n];
2565
2566 if (!virtio_queue_get_num(vdev, n)) {
2567 continue;
2568 }
2569 event_notifier_set_handler(&vq->host_notifier, NULL);
2570 r = virtio_bus_set_host_notifier(qbus, n, false);
2571 assert(r >= 0);
2572 }
2573 }
2574
2575 void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
2576 {
2577 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2578 VirtioBusState *vbus = VIRTIO_BUS(qbus);
2579
2580 virtio_bus_stop_ioeventfd(vbus);
2581 }
2582
2583 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
2584 {
2585 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2586 VirtioBusState *vbus = VIRTIO_BUS(qbus);
2587
2588 return virtio_bus_grab_ioeventfd(vbus);
2589 }
2590
2591 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
2592 {
2593 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2594 VirtioBusState *vbus = VIRTIO_BUS(qbus);
2595
2596 virtio_bus_release_ioeventfd(vbus);
2597 }
2598
2599 static void virtio_device_class_init(ObjectClass *klass, void *data)
2600 {
2601 /* Set the default value here. */
2602 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2603 DeviceClass *dc = DEVICE_CLASS(klass);
2604
2605 dc->realize = virtio_device_realize;
2606 dc->unrealize = virtio_device_unrealize;
2607 dc->bus_type = TYPE_VIRTIO_BUS;
2608 dc->props = virtio_properties;
2609 vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
2610 vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
2611
2612 vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
2613 }
2614
2615 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
2616 {
2617 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2618 VirtioBusState *vbus = VIRTIO_BUS(qbus);
2619
2620 return virtio_bus_ioeventfd_enabled(vbus);
2621 }
2622
2623 static const TypeInfo virtio_device_info = {
2624 .name = TYPE_VIRTIO_DEVICE,
2625 .parent = TYPE_DEVICE,
2626 .instance_size = sizeof(VirtIODevice),
2627 .class_init = virtio_device_class_init,
2628 .instance_finalize = virtio_device_instance_finalize,
2629 .abstract = true,
2630 .class_size = sizeof(VirtioDeviceClass),
2631 };
2632
2633 static void virtio_register_types(void)
2634 {
2635 type_register_static(&virtio_device_info);
2636 }
2637
2638 type_init(virtio_register_types)