]> git.proxmox.com Git - mirror_qemu.git/blob - hw/virtio/virtio.c
Merge tag 'memory-api-20211231' of https://github.com/philmd/qemu into staging
[mirror_qemu.git] / hw / virtio / virtio.c
1 /*
2 * Virtio Support
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "cpu.h"
17 #include "trace.h"
18 #include "qemu/error-report.h"
19 #include "qemu/log.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/module.h"
22 #include "hw/virtio/virtio.h"
23 #include "migration/qemu-file-types.h"
24 #include "qemu/atomic.h"
25 #include "hw/virtio/virtio-bus.h"
26 #include "hw/qdev-properties.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "sysemu/dma.h"
29 #include "sysemu/runstate.h"
30 #include "standard-headers/linux/virtio_ids.h"
31
32 /*
33 * The alignment to use between consumer and producer parts of vring.
34 * x86 pagesize again. This is the default, used by transports like PCI
35 * which don't provide a means for the guest to tell the host the alignment.
36 */
37 #define VIRTIO_PCI_VRING_ALIGN 4096
38
39 typedef struct VRingDesc
40 {
41 uint64_t addr;
42 uint32_t len;
43 uint16_t flags;
44 uint16_t next;
45 } VRingDesc;
46
47 typedef struct VRingPackedDesc {
48 uint64_t addr;
49 uint32_t len;
50 uint16_t id;
51 uint16_t flags;
52 } VRingPackedDesc;
53
54 typedef struct VRingAvail
55 {
56 uint16_t flags;
57 uint16_t idx;
58 uint16_t ring[];
59 } VRingAvail;
60
61 typedef struct VRingUsedElem
62 {
63 uint32_t id;
64 uint32_t len;
65 } VRingUsedElem;
66
67 typedef struct VRingUsed
68 {
69 uint16_t flags;
70 uint16_t idx;
71 VRingUsedElem ring[];
72 } VRingUsed;
73
74 typedef struct VRingMemoryRegionCaches {
75 struct rcu_head rcu;
76 MemoryRegionCache desc;
77 MemoryRegionCache avail;
78 MemoryRegionCache used;
79 } VRingMemoryRegionCaches;
80
81 typedef struct VRing
82 {
83 unsigned int num;
84 unsigned int num_default;
85 unsigned int align;
86 hwaddr desc;
87 hwaddr avail;
88 hwaddr used;
89 VRingMemoryRegionCaches *caches;
90 } VRing;
91
92 typedef struct VRingPackedDescEvent {
93 uint16_t off_wrap;
94 uint16_t flags;
95 } VRingPackedDescEvent ;
96
97 struct VirtQueue
98 {
99 VRing vring;
100 VirtQueueElement *used_elems;
101
102 /* Next head to pop */
103 uint16_t last_avail_idx;
104 bool last_avail_wrap_counter;
105
106 /* Last avail_idx read from VQ. */
107 uint16_t shadow_avail_idx;
108 bool shadow_avail_wrap_counter;
109
110 uint16_t used_idx;
111 bool used_wrap_counter;
112
113 /* Last used index value we have signalled on */
114 uint16_t signalled_used;
115
116 /* Last used index value we have signalled on */
117 bool signalled_used_valid;
118
119 /* Notification enabled? */
120 bool notification;
121
122 uint16_t queue_index;
123
124 unsigned int inuse;
125
126 uint16_t vector;
127 VirtIOHandleOutput handle_output;
128 VirtIOHandleAIOOutput handle_aio_output;
129 VirtIODevice *vdev;
130 EventNotifier guest_notifier;
131 EventNotifier host_notifier;
132 bool host_notifier_enabled;
133 QLIST_ENTRY(VirtQueue) node;
134 };
135
136 /* Called within call_rcu(). */
137 static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
138 {
139 assert(caches != NULL);
140 address_space_cache_destroy(&caches->desc);
141 address_space_cache_destroy(&caches->avail);
142 address_space_cache_destroy(&caches->used);
143 g_free(caches);
144 }
145
146 static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
147 {
148 VRingMemoryRegionCaches *caches;
149
150 caches = qatomic_read(&vq->vring.caches);
151 qatomic_rcu_set(&vq->vring.caches, NULL);
152 if (caches) {
153 call_rcu(caches, virtio_free_region_cache, rcu);
154 }
155 }
156
157 static void virtio_init_region_cache(VirtIODevice *vdev, int n)
158 {
159 VirtQueue *vq = &vdev->vq[n];
160 VRingMemoryRegionCaches *old = vq->vring.caches;
161 VRingMemoryRegionCaches *new = NULL;
162 hwaddr addr, size;
163 int64_t len;
164 bool packed;
165
166
167 addr = vq->vring.desc;
168 if (!addr) {
169 goto out_no_cache;
170 }
171 new = g_new0(VRingMemoryRegionCaches, 1);
172 size = virtio_queue_get_desc_size(vdev, n);
173 packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
174 true : false;
175 len = address_space_cache_init(&new->desc, vdev->dma_as,
176 addr, size, packed);
177 if (len < size) {
178 virtio_error(vdev, "Cannot map desc");
179 goto err_desc;
180 }
181
182 size = virtio_queue_get_used_size(vdev, n);
183 len = address_space_cache_init(&new->used, vdev->dma_as,
184 vq->vring.used, size, true);
185 if (len < size) {
186 virtio_error(vdev, "Cannot map used");
187 goto err_used;
188 }
189
190 size = virtio_queue_get_avail_size(vdev, n);
191 len = address_space_cache_init(&new->avail, vdev->dma_as,
192 vq->vring.avail, size, false);
193 if (len < size) {
194 virtio_error(vdev, "Cannot map avail");
195 goto err_avail;
196 }
197
198 qatomic_rcu_set(&vq->vring.caches, new);
199 if (old) {
200 call_rcu(old, virtio_free_region_cache, rcu);
201 }
202 return;
203
204 err_avail:
205 address_space_cache_destroy(&new->avail);
206 err_used:
207 address_space_cache_destroy(&new->used);
208 err_desc:
209 address_space_cache_destroy(&new->desc);
210 out_no_cache:
211 g_free(new);
212 virtio_virtqueue_reset_region_cache(vq);
213 }
214
215 /* virt queue functions */
216 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
217 {
218 VRing *vring = &vdev->vq[n].vring;
219
220 if (!vring->num || !vring->desc || !vring->align) {
221 /* not yet setup -> nothing to do */
222 return;
223 }
224 vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
225 vring->used = vring_align(vring->avail +
226 offsetof(VRingAvail, ring[vring->num]),
227 vring->align);
228 virtio_init_region_cache(vdev, n);
229 }
230
231 /* Called within rcu_read_lock(). */
232 static void vring_split_desc_read(VirtIODevice *vdev, VRingDesc *desc,
233 MemoryRegionCache *cache, int i)
234 {
235 address_space_read_cached(cache, i * sizeof(VRingDesc),
236 desc, sizeof(VRingDesc));
237 virtio_tswap64s(vdev, &desc->addr);
238 virtio_tswap32s(vdev, &desc->len);
239 virtio_tswap16s(vdev, &desc->flags);
240 virtio_tswap16s(vdev, &desc->next);
241 }
242
243 static void vring_packed_event_read(VirtIODevice *vdev,
244 MemoryRegionCache *cache,
245 VRingPackedDescEvent *e)
246 {
247 hwaddr off_off = offsetof(VRingPackedDescEvent, off_wrap);
248 hwaddr off_flags = offsetof(VRingPackedDescEvent, flags);
249
250 e->flags = virtio_lduw_phys_cached(vdev, cache, off_flags);
251 /* Make sure flags is seen before off_wrap */
252 smp_rmb();
253 e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off);
254 virtio_tswap16s(vdev, &e->flags);
255 }
256
257 static void vring_packed_off_wrap_write(VirtIODevice *vdev,
258 MemoryRegionCache *cache,
259 uint16_t off_wrap)
260 {
261 hwaddr off = offsetof(VRingPackedDescEvent, off_wrap);
262
263 virtio_stw_phys_cached(vdev, cache, off, off_wrap);
264 address_space_cache_invalidate(cache, off, sizeof(off_wrap));
265 }
266
267 static void vring_packed_flags_write(VirtIODevice *vdev,
268 MemoryRegionCache *cache, uint16_t flags)
269 {
270 hwaddr off = offsetof(VRingPackedDescEvent, flags);
271
272 virtio_stw_phys_cached(vdev, cache, off, flags);
273 address_space_cache_invalidate(cache, off, sizeof(flags));
274 }
275
276 /* Called within rcu_read_lock(). */
277 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
278 {
279 return qatomic_rcu_read(&vq->vring.caches);
280 }
281
282 /* Called within rcu_read_lock(). */
283 static inline uint16_t vring_avail_flags(VirtQueue *vq)
284 {
285 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
286 hwaddr pa = offsetof(VRingAvail, flags);
287
288 if (!caches) {
289 return 0;
290 }
291
292 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
293 }
294
295 /* Called within rcu_read_lock(). */
296 static inline uint16_t vring_avail_idx(VirtQueue *vq)
297 {
298 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
299 hwaddr pa = offsetof(VRingAvail, idx);
300
301 if (!caches) {
302 return 0;
303 }
304
305 vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
306 return vq->shadow_avail_idx;
307 }
308
309 /* Called within rcu_read_lock(). */
310 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
311 {
312 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
313 hwaddr pa = offsetof(VRingAvail, ring[i]);
314
315 if (!caches) {
316 return 0;
317 }
318
319 return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
320 }
321
322 /* Called within rcu_read_lock(). */
323 static inline uint16_t vring_get_used_event(VirtQueue *vq)
324 {
325 return vring_avail_ring(vq, vq->vring.num);
326 }
327
328 /* Called within rcu_read_lock(). */
329 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
330 int i)
331 {
332 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
333 hwaddr pa = offsetof(VRingUsed, ring[i]);
334
335 if (!caches) {
336 return;
337 }
338
339 virtio_tswap32s(vq->vdev, &uelem->id);
340 virtio_tswap32s(vq->vdev, &uelem->len);
341 address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
342 address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
343 }
344
345 /* Called within rcu_read_lock(). */
346 static uint16_t vring_used_idx(VirtQueue *vq)
347 {
348 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
349 hwaddr pa = offsetof(VRingUsed, idx);
350
351 if (!caches) {
352 return 0;
353 }
354
355 return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
356 }
357
358 /* Called within rcu_read_lock(). */
359 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
360 {
361 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
362 hwaddr pa = offsetof(VRingUsed, idx);
363
364 if (caches) {
365 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
366 address_space_cache_invalidate(&caches->used, pa, sizeof(val));
367 }
368
369 vq->used_idx = val;
370 }
371
372 /* Called within rcu_read_lock(). */
373 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
374 {
375 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
376 VirtIODevice *vdev = vq->vdev;
377 hwaddr pa = offsetof(VRingUsed, flags);
378 uint16_t flags;
379
380 if (!caches) {
381 return;
382 }
383
384 flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
385 virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
386 address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
387 }
388
389 /* Called within rcu_read_lock(). */
390 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
391 {
392 VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
393 VirtIODevice *vdev = vq->vdev;
394 hwaddr pa = offsetof(VRingUsed, flags);
395 uint16_t flags;
396
397 if (!caches) {
398 return;
399 }
400
401 flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
402 virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
403 address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
404 }
405
406 /* Called within rcu_read_lock(). */
407 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
408 {
409 VRingMemoryRegionCaches *caches;
410 hwaddr pa;
411 if (!vq->notification) {
412 return;
413 }
414
415 caches = vring_get_region_caches(vq);
416 if (!caches) {
417 return;
418 }
419
420 pa = offsetof(VRingUsed, ring[vq->vring.num]);
421 virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
422 address_space_cache_invalidate(&caches->used, pa, sizeof(val));
423 }
424
425 static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
426 {
427 RCU_READ_LOCK_GUARD();
428
429 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
430 vring_set_avail_event(vq, vring_avail_idx(vq));
431 } else if (enable) {
432 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
433 } else {
434 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
435 }
436 if (enable) {
437 /* Expose avail event/used flags before caller checks the avail idx. */
438 smp_mb();
439 }
440 }
441
442 static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
443 {
444 uint16_t off_wrap;
445 VRingPackedDescEvent e;
446 VRingMemoryRegionCaches *caches;
447
448 RCU_READ_LOCK_GUARD();
449 caches = vring_get_region_caches(vq);
450 if (!caches) {
451 return;
452 }
453
454 vring_packed_event_read(vq->vdev, &caches->used, &e);
455
456 if (!enable) {
457 e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
458 } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
459 off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
460 vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
461 /* Make sure off_wrap is wrote before flags */
462 smp_wmb();
463 e.flags = VRING_PACKED_EVENT_FLAG_DESC;
464 } else {
465 e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
466 }
467
468 vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
469 if (enable) {
470 /* Expose avail event/used flags before caller checks the avail idx. */
471 smp_mb();
472 }
473 }
474
475 bool virtio_queue_get_notification(VirtQueue *vq)
476 {
477 return vq->notification;
478 }
479
480 void virtio_queue_set_notification(VirtQueue *vq, int enable)
481 {
482 vq->notification = enable;
483
484 if (!vq->vring.desc) {
485 return;
486 }
487
488 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
489 virtio_queue_packed_set_notification(vq, enable);
490 } else {
491 virtio_queue_split_set_notification(vq, enable);
492 }
493 }
494
495 int virtio_queue_ready(VirtQueue *vq)
496 {
497 return vq->vring.avail != 0;
498 }
499
500 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
501 uint16_t *flags,
502 MemoryRegionCache *cache,
503 int i)
504 {
505 hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
506
507 *flags = virtio_lduw_phys_cached(vdev, cache, off);
508 }
509
510 static void vring_packed_desc_read(VirtIODevice *vdev,
511 VRingPackedDesc *desc,
512 MemoryRegionCache *cache,
513 int i, bool strict_order)
514 {
515 hwaddr off = i * sizeof(VRingPackedDesc);
516
517 vring_packed_desc_read_flags(vdev, &desc->flags, cache, i);
518
519 if (strict_order) {
520 /* Make sure flags is read before the rest fields. */
521 smp_rmb();
522 }
523
524 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, addr),
525 &desc->addr, sizeof(desc->addr));
526 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, id),
527 &desc->id, sizeof(desc->id));
528 address_space_read_cached(cache, off + offsetof(VRingPackedDesc, len),
529 &desc->len, sizeof(desc->len));
530 virtio_tswap64s(vdev, &desc->addr);
531 virtio_tswap16s(vdev, &desc->id);
532 virtio_tswap32s(vdev, &desc->len);
533 }
534
535 static void vring_packed_desc_write_data(VirtIODevice *vdev,
536 VRingPackedDesc *desc,
537 MemoryRegionCache *cache,
538 int i)
539 {
540 hwaddr off_id = i * sizeof(VRingPackedDesc) +
541 offsetof(VRingPackedDesc, id);
542 hwaddr off_len = i * sizeof(VRingPackedDesc) +
543 offsetof(VRingPackedDesc, len);
544
545 virtio_tswap32s(vdev, &desc->len);
546 virtio_tswap16s(vdev, &desc->id);
547 address_space_write_cached(cache, off_id, &desc->id, sizeof(desc->id));
548 address_space_cache_invalidate(cache, off_id, sizeof(desc->id));
549 address_space_write_cached(cache, off_len, &desc->len, sizeof(desc->len));
550 address_space_cache_invalidate(cache, off_len, sizeof(desc->len));
551 }
552
553 static void vring_packed_desc_write_flags(VirtIODevice *vdev,
554 VRingPackedDesc *desc,
555 MemoryRegionCache *cache,
556 int i)
557 {
558 hwaddr off = i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags);
559
560 virtio_stw_phys_cached(vdev, cache, off, desc->flags);
561 address_space_cache_invalidate(cache, off, sizeof(desc->flags));
562 }
563
564 static void vring_packed_desc_write(VirtIODevice *vdev,
565 VRingPackedDesc *desc,
566 MemoryRegionCache *cache,
567 int i, bool strict_order)
568 {
569 vring_packed_desc_write_data(vdev, desc, cache, i);
570 if (strict_order) {
571 /* Make sure data is wrote before flags. */
572 smp_wmb();
573 }
574 vring_packed_desc_write_flags(vdev, desc, cache, i);
575 }
576
577 static inline bool is_desc_avail(uint16_t flags, bool wrap_counter)
578 {
579 bool avail, used;
580
581 avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
582 used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
583 return (avail != used) && (avail == wrap_counter);
584 }
585
586 /* Fetch avail_idx from VQ memory only when we really need to know if
587 * guest has added some buffers.
588 * Called within rcu_read_lock(). */
589 static int virtio_queue_empty_rcu(VirtQueue *vq)
590 {
591 if (virtio_device_disabled(vq->vdev)) {
592 return 1;
593 }
594
595 if (unlikely(!vq->vring.avail)) {
596 return 1;
597 }
598
599 if (vq->shadow_avail_idx != vq->last_avail_idx) {
600 return 0;
601 }
602
603 return vring_avail_idx(vq) == vq->last_avail_idx;
604 }
605
606 static int virtio_queue_split_empty(VirtQueue *vq)
607 {
608 bool empty;
609
610 if (virtio_device_disabled(vq->vdev)) {
611 return 1;
612 }
613
614 if (unlikely(!vq->vring.avail)) {
615 return 1;
616 }
617
618 if (vq->shadow_avail_idx != vq->last_avail_idx) {
619 return 0;
620 }
621
622 RCU_READ_LOCK_GUARD();
623 empty = vring_avail_idx(vq) == vq->last_avail_idx;
624 return empty;
625 }
626
627 /* Called within rcu_read_lock(). */
628 static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
629 {
630 struct VRingPackedDesc desc;
631 VRingMemoryRegionCaches *cache;
632
633 if (unlikely(!vq->vring.desc)) {
634 return 1;
635 }
636
637 cache = vring_get_region_caches(vq);
638 if (!cache) {
639 return 1;
640 }
641
642 vring_packed_desc_read_flags(vq->vdev, &desc.flags, &cache->desc,
643 vq->last_avail_idx);
644
645 return !is_desc_avail(desc.flags, vq->last_avail_wrap_counter);
646 }
647
648 static int virtio_queue_packed_empty(VirtQueue *vq)
649 {
650 RCU_READ_LOCK_GUARD();
651 return virtio_queue_packed_empty_rcu(vq);
652 }
653
654 int virtio_queue_empty(VirtQueue *vq)
655 {
656 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
657 return virtio_queue_packed_empty(vq);
658 } else {
659 return virtio_queue_split_empty(vq);
660 }
661 }
662
663 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
664 unsigned int len)
665 {
666 AddressSpace *dma_as = vq->vdev->dma_as;
667 unsigned int offset;
668 int i;
669
670 offset = 0;
671 for (i = 0; i < elem->in_num; i++) {
672 size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
673
674 dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
675 elem->in_sg[i].iov_len,
676 DMA_DIRECTION_FROM_DEVICE, size);
677
678 offset += size;
679 }
680
681 for (i = 0; i < elem->out_num; i++)
682 dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
683 elem->out_sg[i].iov_len,
684 DMA_DIRECTION_TO_DEVICE,
685 elem->out_sg[i].iov_len);
686 }
687
688 /* virtqueue_detach_element:
689 * @vq: The #VirtQueue
690 * @elem: The #VirtQueueElement
691 * @len: number of bytes written
692 *
693 * Detach the element from the virtqueue. This function is suitable for device
694 * reset or other situations where a #VirtQueueElement is simply freed and will
695 * not be pushed or discarded.
696 */
697 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
698 unsigned int len)
699 {
700 vq->inuse -= elem->ndescs;
701 virtqueue_unmap_sg(vq, elem, len);
702 }
703
704 static void virtqueue_split_rewind(VirtQueue *vq, unsigned int num)
705 {
706 vq->last_avail_idx -= num;
707 }
708
709 static void virtqueue_packed_rewind(VirtQueue *vq, unsigned int num)
710 {
711 if (vq->last_avail_idx < num) {
712 vq->last_avail_idx = vq->vring.num + vq->last_avail_idx - num;
713 vq->last_avail_wrap_counter ^= 1;
714 } else {
715 vq->last_avail_idx -= num;
716 }
717 }
718
719 /* virtqueue_unpop:
720 * @vq: The #VirtQueue
721 * @elem: The #VirtQueueElement
722 * @len: number of bytes written
723 *
724 * Pretend the most recent element wasn't popped from the virtqueue. The next
725 * call to virtqueue_pop() will refetch the element.
726 */
727 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
728 unsigned int len)
729 {
730
731 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
732 virtqueue_packed_rewind(vq, 1);
733 } else {
734 virtqueue_split_rewind(vq, 1);
735 }
736
737 virtqueue_detach_element(vq, elem, len);
738 }
739
740 /* virtqueue_rewind:
741 * @vq: The #VirtQueue
742 * @num: Number of elements to push back
743 *
744 * Pretend that elements weren't popped from the virtqueue. The next
745 * virtqueue_pop() will refetch the oldest element.
746 *
747 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
748 *
749 * Returns: true on success, false if @num is greater than the number of in use
750 * elements.
751 */
752 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
753 {
754 if (num > vq->inuse) {
755 return false;
756 }
757
758 vq->inuse -= num;
759 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
760 virtqueue_packed_rewind(vq, num);
761 } else {
762 virtqueue_split_rewind(vq, num);
763 }
764 return true;
765 }
766
767 static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
768 unsigned int len, unsigned int idx)
769 {
770 VRingUsedElem uelem;
771
772 if (unlikely(!vq->vring.used)) {
773 return;
774 }
775
776 idx = (idx + vq->used_idx) % vq->vring.num;
777
778 uelem.id = elem->index;
779 uelem.len = len;
780 vring_used_write(vq, &uelem, idx);
781 }
782
783 static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
784 unsigned int len, unsigned int idx)
785 {
786 vq->used_elems[idx].index = elem->index;
787 vq->used_elems[idx].len = len;
788 vq->used_elems[idx].ndescs = elem->ndescs;
789 }
790
791 static void virtqueue_packed_fill_desc(VirtQueue *vq,
792 const VirtQueueElement *elem,
793 unsigned int idx,
794 bool strict_order)
795 {
796 uint16_t head;
797 VRingMemoryRegionCaches *caches;
798 VRingPackedDesc desc = {
799 .id = elem->index,
800 .len = elem->len,
801 };
802 bool wrap_counter = vq->used_wrap_counter;
803
804 if (unlikely(!vq->vring.desc)) {
805 return;
806 }
807
808 head = vq->used_idx + idx;
809 if (head >= vq->vring.num) {
810 head -= vq->vring.num;
811 wrap_counter ^= 1;
812 }
813 if (wrap_counter) {
814 desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
815 desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
816 } else {
817 desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
818 desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
819 }
820
821 caches = vring_get_region_caches(vq);
822 if (!caches) {
823 return;
824 }
825
826 vring_packed_desc_write(vq->vdev, &desc, &caches->desc, head, strict_order);
827 }
828
829 /* Called within rcu_read_lock(). */
830 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
831 unsigned int len, unsigned int idx)
832 {
833 trace_virtqueue_fill(vq, elem, len, idx);
834
835 virtqueue_unmap_sg(vq, elem, len);
836
837 if (virtio_device_disabled(vq->vdev)) {
838 return;
839 }
840
841 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
842 virtqueue_packed_fill(vq, elem, len, idx);
843 } else {
844 virtqueue_split_fill(vq, elem, len, idx);
845 }
846 }
847
848 /* Called within rcu_read_lock(). */
849 static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
850 {
851 uint16_t old, new;
852
853 if (unlikely(!vq->vring.used)) {
854 return;
855 }
856
857 /* Make sure buffer is written before we update index. */
858 smp_wmb();
859 trace_virtqueue_flush(vq, count);
860 old = vq->used_idx;
861 new = old + count;
862 vring_used_idx_set(vq, new);
863 vq->inuse -= count;
864 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
865 vq->signalled_used_valid = false;
866 }
867
868 static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
869 {
870 unsigned int i, ndescs = 0;
871
872 if (unlikely(!vq->vring.desc)) {
873 return;
874 }
875
876 for (i = 1; i < count; i++) {
877 virtqueue_packed_fill_desc(vq, &vq->used_elems[i], i, false);
878 ndescs += vq->used_elems[i].ndescs;
879 }
880 virtqueue_packed_fill_desc(vq, &vq->used_elems[0], 0, true);
881 ndescs += vq->used_elems[0].ndescs;
882
883 vq->inuse -= ndescs;
884 vq->used_idx += ndescs;
885 if (vq->used_idx >= vq->vring.num) {
886 vq->used_idx -= vq->vring.num;
887 vq->used_wrap_counter ^= 1;
888 }
889 }
890
891 void virtqueue_flush(VirtQueue *vq, unsigned int count)
892 {
893 if (virtio_device_disabled(vq->vdev)) {
894 vq->inuse -= count;
895 return;
896 }
897
898 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
899 virtqueue_packed_flush(vq, count);
900 } else {
901 virtqueue_split_flush(vq, count);
902 }
903 }
904
905 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
906 unsigned int len)
907 {
908 RCU_READ_LOCK_GUARD();
909 virtqueue_fill(vq, elem, len, 0);
910 virtqueue_flush(vq, 1);
911 }
912
913 /* Called within rcu_read_lock(). */
914 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
915 {
916 uint16_t num_heads = vring_avail_idx(vq) - idx;
917
918 /* Check it isn't doing very strange things with descriptor numbers. */
919 if (num_heads > vq->vring.num) {
920 virtio_error(vq->vdev, "Guest moved used index from %u to %u",
921 idx, vq->shadow_avail_idx);
922 return -EINVAL;
923 }
924 /* On success, callers read a descriptor at vq->last_avail_idx.
925 * Make sure descriptor read does not bypass avail index read. */
926 if (num_heads) {
927 smp_rmb();
928 }
929
930 return num_heads;
931 }
932
933 /* Called within rcu_read_lock(). */
934 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
935 unsigned int *head)
936 {
937 /* Grab the next descriptor number they're advertising, and increment
938 * the index we've seen. */
939 *head = vring_avail_ring(vq, idx % vq->vring.num);
940
941 /* If their number is silly, that's a fatal mistake. */
942 if (*head >= vq->vring.num) {
943 virtio_error(vq->vdev, "Guest says index %u is available", *head);
944 return false;
945 }
946
947 return true;
948 }
949
950 enum {
951 VIRTQUEUE_READ_DESC_ERROR = -1,
952 VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
953 VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
954 };
955
956 static int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
957 MemoryRegionCache *desc_cache,
958 unsigned int max, unsigned int *next)
959 {
960 /* If this descriptor says it doesn't chain, we're done. */
961 if (!(desc->flags & VRING_DESC_F_NEXT)) {
962 return VIRTQUEUE_READ_DESC_DONE;
963 }
964
965 /* Check they're not leading us off end of descriptors. */
966 *next = desc->next;
967 /* Make sure compiler knows to grab that: we don't want it changing! */
968 smp_wmb();
969
970 if (*next >= max) {
971 virtio_error(vdev, "Desc next is %u", *next);
972 return VIRTQUEUE_READ_DESC_ERROR;
973 }
974
975 vring_split_desc_read(vdev, desc, desc_cache, *next);
976 return VIRTQUEUE_READ_DESC_MORE;
977 }
978
979 /* Called within rcu_read_lock(). */
980 static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
981 unsigned int *in_bytes, unsigned int *out_bytes,
982 unsigned max_in_bytes, unsigned max_out_bytes,
983 VRingMemoryRegionCaches *caches)
984 {
985 VirtIODevice *vdev = vq->vdev;
986 unsigned int max, idx;
987 unsigned int total_bufs, in_total, out_total;
988 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
989 int64_t len = 0;
990 int rc;
991
992 idx = vq->last_avail_idx;
993 total_bufs = in_total = out_total = 0;
994
995 max = vq->vring.num;
996
997 while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
998 MemoryRegionCache *desc_cache = &caches->desc;
999 unsigned int num_bufs;
1000 VRingDesc desc;
1001 unsigned int i;
1002
1003 num_bufs = total_bufs;
1004
1005 if (!virtqueue_get_head(vq, idx++, &i)) {
1006 goto err;
1007 }
1008
1009 vring_split_desc_read(vdev, &desc, desc_cache, i);
1010
1011 if (desc.flags & VRING_DESC_F_INDIRECT) {
1012 if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1013 virtio_error(vdev, "Invalid size for indirect buffer table");
1014 goto err;
1015 }
1016
1017 /* If we've got too many, that implies a descriptor loop. */
1018 if (num_bufs >= max) {
1019 virtio_error(vdev, "Looped descriptor");
1020 goto err;
1021 }
1022
1023 /* loop over the indirect descriptor table */
1024 len = address_space_cache_init(&indirect_desc_cache,
1025 vdev->dma_as,
1026 desc.addr, desc.len, false);
1027 desc_cache = &indirect_desc_cache;
1028 if (len < desc.len) {
1029 virtio_error(vdev, "Cannot map indirect buffer");
1030 goto err;
1031 }
1032
1033 max = desc.len / sizeof(VRingDesc);
1034 num_bufs = i = 0;
1035 vring_split_desc_read(vdev, &desc, desc_cache, i);
1036 }
1037
1038 do {
1039 /* If we've got too many, that implies a descriptor loop. */
1040 if (++num_bufs > max) {
1041 virtio_error(vdev, "Looped descriptor");
1042 goto err;
1043 }
1044
1045 if (desc.flags & VRING_DESC_F_WRITE) {
1046 in_total += desc.len;
1047 } else {
1048 out_total += desc.len;
1049 }
1050 if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1051 goto done;
1052 }
1053
1054 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1055 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1056
1057 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1058 goto err;
1059 }
1060
1061 if (desc_cache == &indirect_desc_cache) {
1062 address_space_cache_destroy(&indirect_desc_cache);
1063 total_bufs++;
1064 } else {
1065 total_bufs = num_bufs;
1066 }
1067 }
1068
1069 if (rc < 0) {
1070 goto err;
1071 }
1072
1073 done:
1074 address_space_cache_destroy(&indirect_desc_cache);
1075 if (in_bytes) {
1076 *in_bytes = in_total;
1077 }
1078 if (out_bytes) {
1079 *out_bytes = out_total;
1080 }
1081 return;
1082
1083 err:
1084 in_total = out_total = 0;
1085 goto done;
1086 }
1087
1088 static int virtqueue_packed_read_next_desc(VirtQueue *vq,
1089 VRingPackedDesc *desc,
1090 MemoryRegionCache
1091 *desc_cache,
1092 unsigned int max,
1093 unsigned int *next,
1094 bool indirect)
1095 {
1096 /* If this descriptor says it doesn't chain, we're done. */
1097 if (!indirect && !(desc->flags & VRING_DESC_F_NEXT)) {
1098 return VIRTQUEUE_READ_DESC_DONE;
1099 }
1100
1101 ++*next;
1102 if (*next == max) {
1103 if (indirect) {
1104 return VIRTQUEUE_READ_DESC_DONE;
1105 } else {
1106 (*next) -= vq->vring.num;
1107 }
1108 }
1109
1110 vring_packed_desc_read(vq->vdev, desc, desc_cache, *next, false);
1111 return VIRTQUEUE_READ_DESC_MORE;
1112 }
1113
1114 /* Called within rcu_read_lock(). */
1115 static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
1116 unsigned int *in_bytes,
1117 unsigned int *out_bytes,
1118 unsigned max_in_bytes,
1119 unsigned max_out_bytes,
1120 VRingMemoryRegionCaches *caches)
1121 {
1122 VirtIODevice *vdev = vq->vdev;
1123 unsigned int max, idx;
1124 unsigned int total_bufs, in_total, out_total;
1125 MemoryRegionCache *desc_cache;
1126 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1127 int64_t len = 0;
1128 VRingPackedDesc desc;
1129 bool wrap_counter;
1130
1131 idx = vq->last_avail_idx;
1132 wrap_counter = vq->last_avail_wrap_counter;
1133 total_bufs = in_total = out_total = 0;
1134
1135 max = vq->vring.num;
1136
1137 for (;;) {
1138 unsigned int num_bufs = total_bufs;
1139 unsigned int i = idx;
1140 int rc;
1141
1142 desc_cache = &caches->desc;
1143 vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
1144 if (!is_desc_avail(desc.flags, wrap_counter)) {
1145 break;
1146 }
1147
1148 if (desc.flags & VRING_DESC_F_INDIRECT) {
1149 if (desc.len % sizeof(VRingPackedDesc)) {
1150 virtio_error(vdev, "Invalid size for indirect buffer table");
1151 goto err;
1152 }
1153
1154 /* If we've got too many, that implies a descriptor loop. */
1155 if (num_bufs >= max) {
1156 virtio_error(vdev, "Looped descriptor");
1157 goto err;
1158 }
1159
1160 /* loop over the indirect descriptor table */
1161 len = address_space_cache_init(&indirect_desc_cache,
1162 vdev->dma_as,
1163 desc.addr, desc.len, false);
1164 desc_cache = &indirect_desc_cache;
1165 if (len < desc.len) {
1166 virtio_error(vdev, "Cannot map indirect buffer");
1167 goto err;
1168 }
1169
1170 max = desc.len / sizeof(VRingPackedDesc);
1171 num_bufs = i = 0;
1172 vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1173 }
1174
1175 do {
1176 /* If we've got too many, that implies a descriptor loop. */
1177 if (++num_bufs > max) {
1178 virtio_error(vdev, "Looped descriptor");
1179 goto err;
1180 }
1181
1182 if (desc.flags & VRING_DESC_F_WRITE) {
1183 in_total += desc.len;
1184 } else {
1185 out_total += desc.len;
1186 }
1187 if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
1188 goto done;
1189 }
1190
1191 rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max,
1192 &i, desc_cache ==
1193 &indirect_desc_cache);
1194 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1195
1196 if (desc_cache == &indirect_desc_cache) {
1197 address_space_cache_destroy(&indirect_desc_cache);
1198 total_bufs++;
1199 idx++;
1200 } else {
1201 idx += num_bufs - total_bufs;
1202 total_bufs = num_bufs;
1203 }
1204
1205 if (idx >= vq->vring.num) {
1206 idx -= vq->vring.num;
1207 wrap_counter ^= 1;
1208 }
1209 }
1210
1211 /* Record the index and wrap counter for a kick we want */
1212 vq->shadow_avail_idx = idx;
1213 vq->shadow_avail_wrap_counter = wrap_counter;
1214 done:
1215 address_space_cache_destroy(&indirect_desc_cache);
1216 if (in_bytes) {
1217 *in_bytes = in_total;
1218 }
1219 if (out_bytes) {
1220 *out_bytes = out_total;
1221 }
1222 return;
1223
1224 err:
1225 in_total = out_total = 0;
1226 goto done;
1227 }
1228
1229 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
1230 unsigned int *out_bytes,
1231 unsigned max_in_bytes, unsigned max_out_bytes)
1232 {
1233 uint16_t desc_size;
1234 VRingMemoryRegionCaches *caches;
1235
1236 RCU_READ_LOCK_GUARD();
1237
1238 if (unlikely(!vq->vring.desc)) {
1239 goto err;
1240 }
1241
1242 caches = vring_get_region_caches(vq);
1243 if (!caches) {
1244 goto err;
1245 }
1246
1247 desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
1248 sizeof(VRingPackedDesc) : sizeof(VRingDesc);
1249 if (caches->desc.len < vq->vring.num * desc_size) {
1250 virtio_error(vq->vdev, "Cannot map descriptor ring");
1251 goto err;
1252 }
1253
1254 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1255 virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes,
1256 max_in_bytes, max_out_bytes,
1257 caches);
1258 } else {
1259 virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes,
1260 max_in_bytes, max_out_bytes,
1261 caches);
1262 }
1263
1264 return;
1265 err:
1266 if (in_bytes) {
1267 *in_bytes = 0;
1268 }
1269 if (out_bytes) {
1270 *out_bytes = 0;
1271 }
1272 }
1273
1274 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
1275 unsigned int out_bytes)
1276 {
1277 unsigned int in_total, out_total;
1278
1279 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
1280 return in_bytes <= in_total && out_bytes <= out_total;
1281 }
1282
1283 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
1284 hwaddr *addr, struct iovec *iov,
1285 unsigned int max_num_sg, bool is_write,
1286 hwaddr pa, size_t sz)
1287 {
1288 bool ok = false;
1289 unsigned num_sg = *p_num_sg;
1290 assert(num_sg <= max_num_sg);
1291
1292 if (!sz) {
1293 virtio_error(vdev, "virtio: zero sized buffers are not allowed");
1294 goto out;
1295 }
1296
1297 while (sz) {
1298 hwaddr len = sz;
1299
1300 if (num_sg == max_num_sg) {
1301 virtio_error(vdev, "virtio: too many write descriptors in "
1302 "indirect table");
1303 goto out;
1304 }
1305
1306 iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
1307 is_write ?
1308 DMA_DIRECTION_FROM_DEVICE :
1309 DMA_DIRECTION_TO_DEVICE,
1310 MEMTXATTRS_UNSPECIFIED);
1311 if (!iov[num_sg].iov_base) {
1312 virtio_error(vdev, "virtio: bogus descriptor or out of resources");
1313 goto out;
1314 }
1315
1316 iov[num_sg].iov_len = len;
1317 addr[num_sg] = pa;
1318
1319 sz -= len;
1320 pa += len;
1321 num_sg++;
1322 }
1323 ok = true;
1324
1325 out:
1326 *p_num_sg = num_sg;
1327 return ok;
1328 }
1329
1330 /* Only used by error code paths before we have a VirtQueueElement (therefore
1331 * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to
1332 * yet.
1333 */
1334 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
1335 struct iovec *iov)
1336 {
1337 unsigned int i;
1338
1339 for (i = 0; i < out_num + in_num; i++) {
1340 int is_write = i >= out_num;
1341
1342 cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
1343 iov++;
1344 }
1345 }
1346
1347 static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
1348 hwaddr *addr, unsigned int num_sg,
1349 bool is_write)
1350 {
1351 unsigned int i;
1352 hwaddr len;
1353
1354 for (i = 0; i < num_sg; i++) {
1355 len = sg[i].iov_len;
1356 sg[i].iov_base = dma_memory_map(vdev->dma_as,
1357 addr[i], &len, is_write ?
1358 DMA_DIRECTION_FROM_DEVICE :
1359 DMA_DIRECTION_TO_DEVICE,
1360 MEMTXATTRS_UNSPECIFIED);
1361 if (!sg[i].iov_base) {
1362 error_report("virtio: error trying to map MMIO memory");
1363 exit(1);
1364 }
1365 if (len != sg[i].iov_len) {
1366 error_report("virtio: unexpected memory split");
1367 exit(1);
1368 }
1369 }
1370 }
1371
1372 void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
1373 {
1374 virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, true);
1375 virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num,
1376 false);
1377 }
1378
1379 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
1380 {
1381 VirtQueueElement *elem;
1382 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
1383 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
1384 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
1385 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
1386 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
1387 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
1388
1389 assert(sz >= sizeof(VirtQueueElement));
1390 elem = g_malloc(out_sg_end);
1391 trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
1392 elem->out_num = out_num;
1393 elem->in_num = in_num;
1394 elem->in_addr = (void *)elem + in_addr_ofs;
1395 elem->out_addr = (void *)elem + out_addr_ofs;
1396 elem->in_sg = (void *)elem + in_sg_ofs;
1397 elem->out_sg = (void *)elem + out_sg_ofs;
1398 return elem;
1399 }
1400
1401 static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
1402 {
1403 unsigned int i, head, max;
1404 VRingMemoryRegionCaches *caches;
1405 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1406 MemoryRegionCache *desc_cache;
1407 int64_t len;
1408 VirtIODevice *vdev = vq->vdev;
1409 VirtQueueElement *elem = NULL;
1410 unsigned out_num, in_num, elem_entries;
1411 hwaddr addr[VIRTQUEUE_MAX_SIZE];
1412 struct iovec iov[VIRTQUEUE_MAX_SIZE];
1413 VRingDesc desc;
1414 int rc;
1415
1416 RCU_READ_LOCK_GUARD();
1417 if (virtio_queue_empty_rcu(vq)) {
1418 goto done;
1419 }
1420 /* Needed after virtio_queue_empty(), see comment in
1421 * virtqueue_num_heads(). */
1422 smp_rmb();
1423
1424 /* When we start there are none of either input nor output. */
1425 out_num = in_num = elem_entries = 0;
1426
1427 max = vq->vring.num;
1428
1429 if (vq->inuse >= vq->vring.num) {
1430 virtio_error(vdev, "Virtqueue size exceeded");
1431 goto done;
1432 }
1433
1434 if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
1435 goto done;
1436 }
1437
1438 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1439 vring_set_avail_event(vq, vq->last_avail_idx);
1440 }
1441
1442 i = head;
1443
1444 caches = vring_get_region_caches(vq);
1445 if (!caches) {
1446 virtio_error(vdev, "Region caches not initialized");
1447 goto done;
1448 }
1449
1450 if (caches->desc.len < max * sizeof(VRingDesc)) {
1451 virtio_error(vdev, "Cannot map descriptor ring");
1452 goto done;
1453 }
1454
1455 desc_cache = &caches->desc;
1456 vring_split_desc_read(vdev, &desc, desc_cache, i);
1457 if (desc.flags & VRING_DESC_F_INDIRECT) {
1458 if (!desc.len || (desc.len % sizeof(VRingDesc))) {
1459 virtio_error(vdev, "Invalid size for indirect buffer table");
1460 goto done;
1461 }
1462
1463 /* loop over the indirect descriptor table */
1464 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1465 desc.addr, desc.len, false);
1466 desc_cache = &indirect_desc_cache;
1467 if (len < desc.len) {
1468 virtio_error(vdev, "Cannot map indirect buffer");
1469 goto done;
1470 }
1471
1472 max = desc.len / sizeof(VRingDesc);
1473 i = 0;
1474 vring_split_desc_read(vdev, &desc, desc_cache, i);
1475 }
1476
1477 /* Collect all the descriptors */
1478 do {
1479 bool map_ok;
1480
1481 if (desc.flags & VRING_DESC_F_WRITE) {
1482 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1483 iov + out_num,
1484 VIRTQUEUE_MAX_SIZE - out_num, true,
1485 desc.addr, desc.len);
1486 } else {
1487 if (in_num) {
1488 virtio_error(vdev, "Incorrect order for descriptors");
1489 goto err_undo_map;
1490 }
1491 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1492 VIRTQUEUE_MAX_SIZE, false,
1493 desc.addr, desc.len);
1494 }
1495 if (!map_ok) {
1496 goto err_undo_map;
1497 }
1498
1499 /* If we've got too many, that implies a descriptor loop. */
1500 if (++elem_entries > max) {
1501 virtio_error(vdev, "Looped descriptor");
1502 goto err_undo_map;
1503 }
1504
1505 rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
1506 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1507
1508 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
1509 goto err_undo_map;
1510 }
1511
1512 /* Now copy what we have collected and mapped */
1513 elem = virtqueue_alloc_element(sz, out_num, in_num);
1514 elem->index = head;
1515 elem->ndescs = 1;
1516 for (i = 0; i < out_num; i++) {
1517 elem->out_addr[i] = addr[i];
1518 elem->out_sg[i] = iov[i];
1519 }
1520 for (i = 0; i < in_num; i++) {
1521 elem->in_addr[i] = addr[out_num + i];
1522 elem->in_sg[i] = iov[out_num + i];
1523 }
1524
1525 vq->inuse++;
1526
1527 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1528 done:
1529 address_space_cache_destroy(&indirect_desc_cache);
1530
1531 return elem;
1532
1533 err_undo_map:
1534 virtqueue_undo_map_desc(out_num, in_num, iov);
1535 goto done;
1536 }
1537
1538 static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
1539 {
1540 unsigned int i, max;
1541 VRingMemoryRegionCaches *caches;
1542 MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
1543 MemoryRegionCache *desc_cache;
1544 int64_t len;
1545 VirtIODevice *vdev = vq->vdev;
1546 VirtQueueElement *elem = NULL;
1547 unsigned out_num, in_num, elem_entries;
1548 hwaddr addr[VIRTQUEUE_MAX_SIZE];
1549 struct iovec iov[VIRTQUEUE_MAX_SIZE];
1550 VRingPackedDesc desc;
1551 uint16_t id;
1552 int rc;
1553
1554 RCU_READ_LOCK_GUARD();
1555 if (virtio_queue_packed_empty_rcu(vq)) {
1556 goto done;
1557 }
1558
1559 /* When we start there are none of either input nor output. */
1560 out_num = in_num = elem_entries = 0;
1561
1562 max = vq->vring.num;
1563
1564 if (vq->inuse >= vq->vring.num) {
1565 virtio_error(vdev, "Virtqueue size exceeded");
1566 goto done;
1567 }
1568
1569 i = vq->last_avail_idx;
1570
1571 caches = vring_get_region_caches(vq);
1572 if (!caches) {
1573 virtio_error(vdev, "Region caches not initialized");
1574 goto done;
1575 }
1576
1577 if (caches->desc.len < max * sizeof(VRingDesc)) {
1578 virtio_error(vdev, "Cannot map descriptor ring");
1579 goto done;
1580 }
1581
1582 desc_cache = &caches->desc;
1583 vring_packed_desc_read(vdev, &desc, desc_cache, i, true);
1584 id = desc.id;
1585 if (desc.flags & VRING_DESC_F_INDIRECT) {
1586 if (desc.len % sizeof(VRingPackedDesc)) {
1587 virtio_error(vdev, "Invalid size for indirect buffer table");
1588 goto done;
1589 }
1590
1591 /* loop over the indirect descriptor table */
1592 len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
1593 desc.addr, desc.len, false);
1594 desc_cache = &indirect_desc_cache;
1595 if (len < desc.len) {
1596 virtio_error(vdev, "Cannot map indirect buffer");
1597 goto done;
1598 }
1599
1600 max = desc.len / sizeof(VRingPackedDesc);
1601 i = 0;
1602 vring_packed_desc_read(vdev, &desc, desc_cache, i, false);
1603 }
1604
1605 /* Collect all the descriptors */
1606 do {
1607 bool map_ok;
1608
1609 if (desc.flags & VRING_DESC_F_WRITE) {
1610 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
1611 iov + out_num,
1612 VIRTQUEUE_MAX_SIZE - out_num, true,
1613 desc.addr, desc.len);
1614 } else {
1615 if (in_num) {
1616 virtio_error(vdev, "Incorrect order for descriptors");
1617 goto err_undo_map;
1618 }
1619 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
1620 VIRTQUEUE_MAX_SIZE, false,
1621 desc.addr, desc.len);
1622 }
1623 if (!map_ok) {
1624 goto err_undo_map;
1625 }
1626
1627 /* If we've got too many, that implies a descriptor loop. */
1628 if (++elem_entries > max) {
1629 virtio_error(vdev, "Looped descriptor");
1630 goto err_undo_map;
1631 }
1632
1633 rc = virtqueue_packed_read_next_desc(vq, &desc, desc_cache, max, &i,
1634 desc_cache ==
1635 &indirect_desc_cache);
1636 } while (rc == VIRTQUEUE_READ_DESC_MORE);
1637
1638 /* Now copy what we have collected and mapped */
1639 elem = virtqueue_alloc_element(sz, out_num, in_num);
1640 for (i = 0; i < out_num; i++) {
1641 elem->out_addr[i] = addr[i];
1642 elem->out_sg[i] = iov[i];
1643 }
1644 for (i = 0; i < in_num; i++) {
1645 elem->in_addr[i] = addr[out_num + i];
1646 elem->in_sg[i] = iov[out_num + i];
1647 }
1648
1649 elem->index = id;
1650 elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries;
1651 vq->last_avail_idx += elem->ndescs;
1652 vq->inuse += elem->ndescs;
1653
1654 if (vq->last_avail_idx >= vq->vring.num) {
1655 vq->last_avail_idx -= vq->vring.num;
1656 vq->last_avail_wrap_counter ^= 1;
1657 }
1658
1659 vq->shadow_avail_idx = vq->last_avail_idx;
1660 vq->shadow_avail_wrap_counter = vq->last_avail_wrap_counter;
1661
1662 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
1663 done:
1664 address_space_cache_destroy(&indirect_desc_cache);
1665
1666 return elem;
1667
1668 err_undo_map:
1669 virtqueue_undo_map_desc(out_num, in_num, iov);
1670 goto done;
1671 }
1672
1673 void *virtqueue_pop(VirtQueue *vq, size_t sz)
1674 {
1675 if (virtio_device_disabled(vq->vdev)) {
1676 return NULL;
1677 }
1678
1679 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
1680 return virtqueue_packed_pop(vq, sz);
1681 } else {
1682 return virtqueue_split_pop(vq, sz);
1683 }
1684 }
1685
1686 static unsigned int virtqueue_packed_drop_all(VirtQueue *vq)
1687 {
1688 VRingMemoryRegionCaches *caches;
1689 MemoryRegionCache *desc_cache;
1690 unsigned int dropped = 0;
1691 VirtQueueElement elem = {};
1692 VirtIODevice *vdev = vq->vdev;
1693 VRingPackedDesc desc;
1694
1695 RCU_READ_LOCK_GUARD();
1696
1697 caches = vring_get_region_caches(vq);
1698 if (!caches) {
1699 return 0;
1700 }
1701
1702 desc_cache = &caches->desc;
1703
1704 virtio_queue_set_notification(vq, 0);
1705
1706 while (vq->inuse < vq->vring.num) {
1707 unsigned int idx = vq->last_avail_idx;
1708 /*
1709 * works similar to virtqueue_pop but does not map buffers
1710 * and does not allocate any memory.
1711 */
1712 vring_packed_desc_read(vdev, &desc, desc_cache,
1713 vq->last_avail_idx , true);
1714 if (!is_desc_avail(desc.flags, vq->last_avail_wrap_counter)) {
1715 break;
1716 }
1717 elem.index = desc.id;
1718 elem.ndescs = 1;
1719 while (virtqueue_packed_read_next_desc(vq, &desc, desc_cache,
1720 vq->vring.num, &idx, false)) {
1721 ++elem.ndescs;
1722 }
1723 /*
1724 * immediately push the element, nothing to unmap
1725 * as both in_num and out_num are set to 0.
1726 */
1727 virtqueue_push(vq, &elem, 0);
1728 dropped++;
1729 vq->last_avail_idx += elem.ndescs;
1730 if (vq->last_avail_idx >= vq->vring.num) {
1731 vq->last_avail_idx -= vq->vring.num;
1732 vq->last_avail_wrap_counter ^= 1;
1733 }
1734 }
1735
1736 return dropped;
1737 }
1738
1739 static unsigned int virtqueue_split_drop_all(VirtQueue *vq)
1740 {
1741 unsigned int dropped = 0;
1742 VirtQueueElement elem = {};
1743 VirtIODevice *vdev = vq->vdev;
1744 bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1745
1746 while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
1747 /* works similar to virtqueue_pop but does not map buffers
1748 * and does not allocate any memory */
1749 smp_rmb();
1750 if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
1751 break;
1752 }
1753 vq->inuse++;
1754 vq->last_avail_idx++;
1755 if (fEventIdx) {
1756 vring_set_avail_event(vq, vq->last_avail_idx);
1757 }
1758 /* immediately push the element, nothing to unmap
1759 * as both in_num and out_num are set to 0 */
1760 virtqueue_push(vq, &elem, 0);
1761 dropped++;
1762 }
1763
1764 return dropped;
1765 }
1766
1767 /* virtqueue_drop_all:
1768 * @vq: The #VirtQueue
1769 * Drops all queued buffers and indicates them to the guest
1770 * as if they are done. Useful when buffers can not be
1771 * processed but must be returned to the guest.
1772 */
1773 unsigned int virtqueue_drop_all(VirtQueue *vq)
1774 {
1775 struct VirtIODevice *vdev = vq->vdev;
1776
1777 if (virtio_device_disabled(vq->vdev)) {
1778 return 0;
1779 }
1780
1781 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1782 return virtqueue_packed_drop_all(vq);
1783 } else {
1784 return virtqueue_split_drop_all(vq);
1785 }
1786 }
1787
1788 /* Reading and writing a structure directly to QEMUFile is *awful*, but
1789 * it is what QEMU has always done by mistake. We can change it sooner
1790 * or later by bumping the version number of the affected vm states.
1791 * In the meanwhile, since the in-memory layout of VirtQueueElement
1792 * has changed, we need to marshal to and from the layout that was
1793 * used before the change.
1794 */
1795 typedef struct VirtQueueElementOld {
1796 unsigned int index;
1797 unsigned int out_num;
1798 unsigned int in_num;
1799 hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
1800 hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
1801 struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
1802 struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
1803 } VirtQueueElementOld;
1804
1805 void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1806 {
1807 VirtQueueElement *elem;
1808 VirtQueueElementOld data;
1809 int i;
1810
1811 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1812
1813 /* TODO: teach all callers that this can fail, and return failure instead
1814 * of asserting here.
1815 * This is just one thing (there are probably more) that must be
1816 * fixed before we can allow NDEBUG compilation.
1817 */
1818 assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
1819 assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
1820
1821 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
1822 elem->index = data.index;
1823
1824 for (i = 0; i < elem->in_num; i++) {
1825 elem->in_addr[i] = data.in_addr[i];
1826 }
1827
1828 for (i = 0; i < elem->out_num; i++) {
1829 elem->out_addr[i] = data.out_addr[i];
1830 }
1831
1832 for (i = 0; i < elem->in_num; i++) {
1833 /* Base is overwritten by virtqueue_map. */
1834 elem->in_sg[i].iov_base = 0;
1835 elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
1836 }
1837
1838 for (i = 0; i < elem->out_num; i++) {
1839 /* Base is overwritten by virtqueue_map. */
1840 elem->out_sg[i].iov_base = 0;
1841 elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
1842 }
1843
1844 if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1845 qemu_get_be32s(f, &elem->ndescs);
1846 }
1847
1848 virtqueue_map(vdev, elem);
1849 return elem;
1850 }
1851
1852 void qemu_put_virtqueue_element(VirtIODevice *vdev, QEMUFile *f,
1853 VirtQueueElement *elem)
1854 {
1855 VirtQueueElementOld data;
1856 int i;
1857
1858 memset(&data, 0, sizeof(data));
1859 data.index = elem->index;
1860 data.in_num = elem->in_num;
1861 data.out_num = elem->out_num;
1862
1863 for (i = 0; i < elem->in_num; i++) {
1864 data.in_addr[i] = elem->in_addr[i];
1865 }
1866
1867 for (i = 0; i < elem->out_num; i++) {
1868 data.out_addr[i] = elem->out_addr[i];
1869 }
1870
1871 for (i = 0; i < elem->in_num; i++) {
1872 /* Base is overwritten by virtqueue_map when loading. Do not
1873 * save it, as it would leak the QEMU address space layout. */
1874 data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
1875 }
1876
1877 for (i = 0; i < elem->out_num; i++) {
1878 /* Do not save iov_base as above. */
1879 data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
1880 }
1881
1882 if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
1883 qemu_put_be32s(f, &elem->ndescs);
1884 }
1885
1886 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1887 }
1888
1889 /* virtio device */
1890 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
1891 {
1892 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1893 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1894
1895 if (virtio_device_disabled(vdev)) {
1896 return;
1897 }
1898
1899 if (k->notify) {
1900 k->notify(qbus->parent, vector);
1901 }
1902 }
1903
1904 void virtio_update_irq(VirtIODevice *vdev)
1905 {
1906 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1907 }
1908
1909 static int virtio_validate_features(VirtIODevice *vdev)
1910 {
1911 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1912
1913 if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
1914 !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
1915 return -EFAULT;
1916 }
1917
1918 if (k->validate_features) {
1919 return k->validate_features(vdev);
1920 } else {
1921 return 0;
1922 }
1923 }
1924
1925 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1926 {
1927 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1928 trace_virtio_set_status(vdev, val);
1929
1930 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1931 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
1932 val & VIRTIO_CONFIG_S_FEATURES_OK) {
1933 int ret = virtio_validate_features(vdev);
1934
1935 if (ret) {
1936 return ret;
1937 }
1938 }
1939 }
1940
1941 if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
1942 (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
1943 virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
1944 }
1945
1946 if (k->set_status) {
1947 k->set_status(vdev, val);
1948 }
1949 vdev->status = val;
1950
1951 return 0;
1952 }
1953
1954 static enum virtio_device_endian virtio_default_endian(void)
1955 {
1956 if (target_words_bigendian()) {
1957 return VIRTIO_DEVICE_ENDIAN_BIG;
1958 } else {
1959 return VIRTIO_DEVICE_ENDIAN_LITTLE;
1960 }
1961 }
1962
1963 static enum virtio_device_endian virtio_current_cpu_endian(void)
1964 {
1965 if (cpu_virtio_is_big_endian(current_cpu)) {
1966 return VIRTIO_DEVICE_ENDIAN_BIG;
1967 } else {
1968 return VIRTIO_DEVICE_ENDIAN_LITTLE;
1969 }
1970 }
1971
1972 void virtio_reset(void *opaque)
1973 {
1974 VirtIODevice *vdev = opaque;
1975 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1976 int i;
1977
1978 virtio_set_status(vdev, 0);
1979 if (current_cpu) {
1980 /* Guest initiated reset */
1981 vdev->device_endian = virtio_current_cpu_endian();
1982 } else {
1983 /* System reset */
1984 vdev->device_endian = virtio_default_endian();
1985 }
1986
1987 if (k->reset) {
1988 k->reset(vdev);
1989 }
1990
1991 vdev->start_on_kick = false;
1992 vdev->started = false;
1993 vdev->broken = false;
1994 vdev->guest_features = 0;
1995 vdev->queue_sel = 0;
1996 vdev->status = 0;
1997 vdev->disabled = false;
1998 qatomic_set(&vdev->isr, 0);
1999 vdev->config_vector = VIRTIO_NO_VECTOR;
2000 virtio_notify_vector(vdev, vdev->config_vector);
2001
2002 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2003 vdev->vq[i].vring.desc = 0;
2004 vdev->vq[i].vring.avail = 0;
2005 vdev->vq[i].vring.used = 0;
2006 vdev->vq[i].last_avail_idx = 0;
2007 vdev->vq[i].shadow_avail_idx = 0;
2008 vdev->vq[i].used_idx = 0;
2009 vdev->vq[i].last_avail_wrap_counter = true;
2010 vdev->vq[i].shadow_avail_wrap_counter = true;
2011 vdev->vq[i].used_wrap_counter = true;
2012 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
2013 vdev->vq[i].signalled_used = 0;
2014 vdev->vq[i].signalled_used_valid = false;
2015 vdev->vq[i].notification = true;
2016 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
2017 vdev->vq[i].inuse = 0;
2018 virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2019 }
2020 }
2021
2022 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
2023 {
2024 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2025 uint8_t val;
2026
2027 if (addr + sizeof(val) > vdev->config_len) {
2028 return (uint32_t)-1;
2029 }
2030
2031 k->get_config(vdev, vdev->config);
2032
2033 val = ldub_p(vdev->config + addr);
2034 return val;
2035 }
2036
2037 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
2038 {
2039 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2040 uint16_t val;
2041
2042 if (addr + sizeof(val) > vdev->config_len) {
2043 return (uint32_t)-1;
2044 }
2045
2046 k->get_config(vdev, vdev->config);
2047
2048 val = lduw_p(vdev->config + addr);
2049 return val;
2050 }
2051
2052 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
2053 {
2054 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2055 uint32_t val;
2056
2057 if (addr + sizeof(val) > vdev->config_len) {
2058 return (uint32_t)-1;
2059 }
2060
2061 k->get_config(vdev, vdev->config);
2062
2063 val = ldl_p(vdev->config + addr);
2064 return val;
2065 }
2066
2067 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2068 {
2069 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2070 uint8_t val = data;
2071
2072 if (addr + sizeof(val) > vdev->config_len) {
2073 return;
2074 }
2075
2076 stb_p(vdev->config + addr, val);
2077
2078 if (k->set_config) {
2079 k->set_config(vdev, vdev->config);
2080 }
2081 }
2082
2083 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2084 {
2085 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2086 uint16_t val = data;
2087
2088 if (addr + sizeof(val) > vdev->config_len) {
2089 return;
2090 }
2091
2092 stw_p(vdev->config + addr, val);
2093
2094 if (k->set_config) {
2095 k->set_config(vdev, vdev->config);
2096 }
2097 }
2098
2099 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
2100 {
2101 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2102 uint32_t val = data;
2103
2104 if (addr + sizeof(val) > vdev->config_len) {
2105 return;
2106 }
2107
2108 stl_p(vdev->config + addr, val);
2109
2110 if (k->set_config) {
2111 k->set_config(vdev, vdev->config);
2112 }
2113 }
2114
2115 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
2116 {
2117 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2118 uint8_t val;
2119
2120 if (addr + sizeof(val) > vdev->config_len) {
2121 return (uint32_t)-1;
2122 }
2123
2124 k->get_config(vdev, vdev->config);
2125
2126 val = ldub_p(vdev->config + addr);
2127 return val;
2128 }
2129
2130 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
2131 {
2132 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2133 uint16_t val;
2134
2135 if (addr + sizeof(val) > vdev->config_len) {
2136 return (uint32_t)-1;
2137 }
2138
2139 k->get_config(vdev, vdev->config);
2140
2141 val = lduw_le_p(vdev->config + addr);
2142 return val;
2143 }
2144
2145 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
2146 {
2147 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2148 uint32_t val;
2149
2150 if (addr + sizeof(val) > vdev->config_len) {
2151 return (uint32_t)-1;
2152 }
2153
2154 k->get_config(vdev, vdev->config);
2155
2156 val = ldl_le_p(vdev->config + addr);
2157 return val;
2158 }
2159
2160 void virtio_config_modern_writeb(VirtIODevice *vdev,
2161 uint32_t addr, uint32_t data)
2162 {
2163 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2164 uint8_t val = data;
2165
2166 if (addr + sizeof(val) > vdev->config_len) {
2167 return;
2168 }
2169
2170 stb_p(vdev->config + addr, val);
2171
2172 if (k->set_config) {
2173 k->set_config(vdev, vdev->config);
2174 }
2175 }
2176
2177 void virtio_config_modern_writew(VirtIODevice *vdev,
2178 uint32_t addr, uint32_t data)
2179 {
2180 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2181 uint16_t val = data;
2182
2183 if (addr + sizeof(val) > vdev->config_len) {
2184 return;
2185 }
2186
2187 stw_le_p(vdev->config + addr, val);
2188
2189 if (k->set_config) {
2190 k->set_config(vdev, vdev->config);
2191 }
2192 }
2193
2194 void virtio_config_modern_writel(VirtIODevice *vdev,
2195 uint32_t addr, uint32_t data)
2196 {
2197 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2198 uint32_t val = data;
2199
2200 if (addr + sizeof(val) > vdev->config_len) {
2201 return;
2202 }
2203
2204 stl_le_p(vdev->config + addr, val);
2205
2206 if (k->set_config) {
2207 k->set_config(vdev, vdev->config);
2208 }
2209 }
2210
2211 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
2212 {
2213 if (!vdev->vq[n].vring.num) {
2214 return;
2215 }
2216 vdev->vq[n].vring.desc = addr;
2217 virtio_queue_update_rings(vdev, n);
2218 }
2219
2220 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
2221 {
2222 return vdev->vq[n].vring.desc;
2223 }
2224
2225 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
2226 hwaddr avail, hwaddr used)
2227 {
2228 if (!vdev->vq[n].vring.num) {
2229 return;
2230 }
2231 vdev->vq[n].vring.desc = desc;
2232 vdev->vq[n].vring.avail = avail;
2233 vdev->vq[n].vring.used = used;
2234 virtio_init_region_cache(vdev, n);
2235 }
2236
2237 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
2238 {
2239 /* Don't allow guest to flip queue between existent and
2240 * nonexistent states, or to set it to an invalid size.
2241 */
2242 if (!!num != !!vdev->vq[n].vring.num ||
2243 num > VIRTQUEUE_MAX_SIZE ||
2244 num < 0) {
2245 return;
2246 }
2247 vdev->vq[n].vring.num = num;
2248 }
2249
2250 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
2251 {
2252 return QLIST_FIRST(&vdev->vector_queues[vector]);
2253 }
2254
2255 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
2256 {
2257 return QLIST_NEXT(vq, node);
2258 }
2259
2260 int virtio_queue_get_num(VirtIODevice *vdev, int n)
2261 {
2262 return vdev->vq[n].vring.num;
2263 }
2264
2265 int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
2266 {
2267 return vdev->vq[n].vring.num_default;
2268 }
2269
2270 int virtio_get_num_queues(VirtIODevice *vdev)
2271 {
2272 int i;
2273
2274 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2275 if (!virtio_queue_get_num(vdev, i)) {
2276 break;
2277 }
2278 }
2279
2280 return i;
2281 }
2282
2283 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
2284 {
2285 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2286 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2287
2288 /* virtio-1 compliant devices cannot change the alignment */
2289 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2290 error_report("tried to modify queue alignment for virtio-1 device");
2291 return;
2292 }
2293 /* Check that the transport told us it was going to do this
2294 * (so a buggy transport will immediately assert rather than
2295 * silently failing to migrate this state)
2296 */
2297 assert(k->has_variable_vring_alignment);
2298
2299 if (align) {
2300 vdev->vq[n].vring.align = align;
2301 virtio_queue_update_rings(vdev, n);
2302 }
2303 }
2304
2305 static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
2306 {
2307 bool ret = false;
2308
2309 if (vq->vring.desc && vq->handle_aio_output) {
2310 VirtIODevice *vdev = vq->vdev;
2311
2312 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2313 ret = vq->handle_aio_output(vdev, vq);
2314
2315 if (unlikely(vdev->start_on_kick)) {
2316 virtio_set_started(vdev, true);
2317 }
2318 }
2319
2320 return ret;
2321 }
2322
2323 static void virtio_queue_notify_vq(VirtQueue *vq)
2324 {
2325 if (vq->vring.desc && vq->handle_output) {
2326 VirtIODevice *vdev = vq->vdev;
2327
2328 if (unlikely(vdev->broken)) {
2329 return;
2330 }
2331
2332 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2333 vq->handle_output(vdev, vq);
2334
2335 if (unlikely(vdev->start_on_kick)) {
2336 virtio_set_started(vdev, true);
2337 }
2338 }
2339 }
2340
2341 void virtio_queue_notify(VirtIODevice *vdev, int n)
2342 {
2343 VirtQueue *vq = &vdev->vq[n];
2344
2345 if (unlikely(!vq->vring.desc || vdev->broken)) {
2346 return;
2347 }
2348
2349 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
2350 if (vq->host_notifier_enabled) {
2351 event_notifier_set(&vq->host_notifier);
2352 } else if (vq->handle_output) {
2353 vq->handle_output(vdev, vq);
2354
2355 if (unlikely(vdev->start_on_kick)) {
2356 virtio_set_started(vdev, true);
2357 }
2358 }
2359 }
2360
2361 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
2362 {
2363 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
2364 VIRTIO_NO_VECTOR;
2365 }
2366
2367 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
2368 {
2369 VirtQueue *vq = &vdev->vq[n];
2370
2371 if (n < VIRTIO_QUEUE_MAX) {
2372 if (vdev->vector_queues &&
2373 vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
2374 QLIST_REMOVE(vq, node);
2375 }
2376 vdev->vq[n].vector = vector;
2377 if (vdev->vector_queues &&
2378 vector != VIRTIO_NO_VECTOR) {
2379 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
2380 }
2381 }
2382 }
2383
2384 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
2385 VirtIOHandleOutput handle_output)
2386 {
2387 int i;
2388
2389 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2390 if (vdev->vq[i].vring.num == 0)
2391 break;
2392 }
2393
2394 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
2395 abort();
2396
2397 vdev->vq[i].vring.num = queue_size;
2398 vdev->vq[i].vring.num_default = queue_size;
2399 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
2400 vdev->vq[i].handle_output = handle_output;
2401 vdev->vq[i].handle_aio_output = NULL;
2402 vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
2403 queue_size);
2404
2405 return &vdev->vq[i];
2406 }
2407
2408 void virtio_delete_queue(VirtQueue *vq)
2409 {
2410 vq->vring.num = 0;
2411 vq->vring.num_default = 0;
2412 vq->handle_output = NULL;
2413 vq->handle_aio_output = NULL;
2414 g_free(vq->used_elems);
2415 vq->used_elems = NULL;
2416 virtio_virtqueue_reset_region_cache(vq);
2417 }
2418
2419 void virtio_del_queue(VirtIODevice *vdev, int n)
2420 {
2421 if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
2422 abort();
2423 }
2424
2425 virtio_delete_queue(&vdev->vq[n]);
2426 }
2427
2428 static void virtio_set_isr(VirtIODevice *vdev, int value)
2429 {
2430 uint8_t old = qatomic_read(&vdev->isr);
2431
2432 /* Do not write ISR if it does not change, so that its cacheline remains
2433 * shared in the common case where the guest does not read it.
2434 */
2435 if ((old & value) != value) {
2436 qatomic_or(&vdev->isr, value);
2437 }
2438 }
2439
2440 /* Called within rcu_read_lock(). */
2441 static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2442 {
2443 uint16_t old, new;
2444 bool v;
2445 /* We need to expose used array entries before checking used event. */
2446 smp_mb();
2447 /* Always notify when queue is empty (when feature acknowledge) */
2448 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
2449 !vq->inuse && virtio_queue_empty(vq)) {
2450 return true;
2451 }
2452
2453 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2454 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
2455 }
2456
2457 v = vq->signalled_used_valid;
2458 vq->signalled_used_valid = true;
2459 old = vq->signalled_used;
2460 new = vq->signalled_used = vq->used_idx;
2461 return !v || vring_need_event(vring_get_used_event(vq), new, old);
2462 }
2463
2464 static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
2465 uint16_t off_wrap, uint16_t new,
2466 uint16_t old)
2467 {
2468 int off = off_wrap & ~(1 << 15);
2469
2470 if (wrap != off_wrap >> 15) {
2471 off -= vq->vring.num;
2472 }
2473
2474 return vring_need_event(off, new, old);
2475 }
2476
2477 /* Called within rcu_read_lock(). */
2478 static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2479 {
2480 VRingPackedDescEvent e;
2481 uint16_t old, new;
2482 bool v;
2483 VRingMemoryRegionCaches *caches;
2484
2485 caches = vring_get_region_caches(vq);
2486 if (!caches) {
2487 return false;
2488 }
2489
2490 vring_packed_event_read(vdev, &caches->avail, &e);
2491
2492 old = vq->signalled_used;
2493 new = vq->signalled_used = vq->used_idx;
2494 v = vq->signalled_used_valid;
2495 vq->signalled_used_valid = true;
2496
2497 if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
2498 return false;
2499 } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
2500 return true;
2501 }
2502
2503 return !v || vring_packed_need_event(vq, vq->used_wrap_counter,
2504 e.off_wrap, new, old);
2505 }
2506
2507 /* Called within rcu_read_lock(). */
2508 static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
2509 {
2510 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
2511 return virtio_packed_should_notify(vdev, vq);
2512 } else {
2513 return virtio_split_should_notify(vdev, vq);
2514 }
2515 }
2516
2517 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
2518 {
2519 WITH_RCU_READ_LOCK_GUARD() {
2520 if (!virtio_should_notify(vdev, vq)) {
2521 return;
2522 }
2523 }
2524
2525 trace_virtio_notify_irqfd(vdev, vq);
2526
2527 /*
2528 * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
2529 * windows drivers included in virtio-win 1.8.0 (circa 2015) are
2530 * incorrectly polling this bit during crashdump and hibernation
2531 * in MSI mode, causing a hang if this bit is never updated.
2532 * Recent releases of Windows do not really shut down, but rather
2533 * log out and hibernate to make the next startup faster. Hence,
2534 * this manifested as a more serious hang during shutdown with
2535 *
2536 * Next driver release from 2016 fixed this problem, so working around it
2537 * is not a must, but it's easy to do so let's do it here.
2538 *
2539 * Note: it's safe to update ISR from any thread as it was switched
2540 * to an atomic operation.
2541 */
2542 virtio_set_isr(vq->vdev, 0x1);
2543 event_notifier_set(&vq->guest_notifier);
2544 }
2545
2546 static void virtio_irq(VirtQueue *vq)
2547 {
2548 virtio_set_isr(vq->vdev, 0x1);
2549 virtio_notify_vector(vq->vdev, vq->vector);
2550 }
2551
2552 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
2553 {
2554 WITH_RCU_READ_LOCK_GUARD() {
2555 if (!virtio_should_notify(vdev, vq)) {
2556 return;
2557 }
2558 }
2559
2560 trace_virtio_notify(vdev, vq);
2561 virtio_irq(vq);
2562 }
2563
2564 void virtio_notify_config(VirtIODevice *vdev)
2565 {
2566 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2567 return;
2568
2569 virtio_set_isr(vdev, 0x3);
2570 vdev->generation++;
2571 virtio_notify_vector(vdev, vdev->config_vector);
2572 }
2573
2574 static bool virtio_device_endian_needed(void *opaque)
2575 {
2576 VirtIODevice *vdev = opaque;
2577
2578 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
2579 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2580 return vdev->device_endian != virtio_default_endian();
2581 }
2582 /* Devices conforming to VIRTIO 1.0 or later are always LE. */
2583 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
2584 }
2585
2586 static bool virtio_64bit_features_needed(void *opaque)
2587 {
2588 VirtIODevice *vdev = opaque;
2589
2590 return (vdev->host_features >> 32) != 0;
2591 }
2592
2593 static bool virtio_virtqueue_needed(void *opaque)
2594 {
2595 VirtIODevice *vdev = opaque;
2596
2597 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
2598 }
2599
2600 static bool virtio_packed_virtqueue_needed(void *opaque)
2601 {
2602 VirtIODevice *vdev = opaque;
2603
2604 return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
2605 }
2606
2607 static bool virtio_ringsize_needed(void *opaque)
2608 {
2609 VirtIODevice *vdev = opaque;
2610 int i;
2611
2612 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2613 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
2614 return true;
2615 }
2616 }
2617 return false;
2618 }
2619
2620 static bool virtio_extra_state_needed(void *opaque)
2621 {
2622 VirtIODevice *vdev = opaque;
2623 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2624 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2625
2626 return k->has_extra_state &&
2627 k->has_extra_state(qbus->parent);
2628 }
2629
2630 static bool virtio_broken_needed(void *opaque)
2631 {
2632 VirtIODevice *vdev = opaque;
2633
2634 return vdev->broken;
2635 }
2636
2637 static bool virtio_started_needed(void *opaque)
2638 {
2639 VirtIODevice *vdev = opaque;
2640
2641 return vdev->started;
2642 }
2643
2644 static bool virtio_disabled_needed(void *opaque)
2645 {
2646 VirtIODevice *vdev = opaque;
2647
2648 return vdev->disabled;
2649 }
2650
2651 static const VMStateDescription vmstate_virtqueue = {
2652 .name = "virtqueue_state",
2653 .version_id = 1,
2654 .minimum_version_id = 1,
2655 .fields = (VMStateField[]) {
2656 VMSTATE_UINT64(vring.avail, struct VirtQueue),
2657 VMSTATE_UINT64(vring.used, struct VirtQueue),
2658 VMSTATE_END_OF_LIST()
2659 }
2660 };
2661
2662 static const VMStateDescription vmstate_packed_virtqueue = {
2663 .name = "packed_virtqueue_state",
2664 .version_id = 1,
2665 .minimum_version_id = 1,
2666 .fields = (VMStateField[]) {
2667 VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
2668 VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
2669 VMSTATE_UINT16(used_idx, struct VirtQueue),
2670 VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
2671 VMSTATE_UINT32(inuse, struct VirtQueue),
2672 VMSTATE_END_OF_LIST()
2673 }
2674 };
2675
2676 static const VMStateDescription vmstate_virtio_virtqueues = {
2677 .name = "virtio/virtqueues",
2678 .version_id = 1,
2679 .minimum_version_id = 1,
2680 .needed = &virtio_virtqueue_needed,
2681 .fields = (VMStateField[]) {
2682 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2683 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
2684 VMSTATE_END_OF_LIST()
2685 }
2686 };
2687
2688 static const VMStateDescription vmstate_virtio_packed_virtqueues = {
2689 .name = "virtio/packed_virtqueues",
2690 .version_id = 1,
2691 .minimum_version_id = 1,
2692 .needed = &virtio_packed_virtqueue_needed,
2693 .fields = (VMStateField[]) {
2694 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2695 VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue),
2696 VMSTATE_END_OF_LIST()
2697 }
2698 };
2699
2700 static const VMStateDescription vmstate_ringsize = {
2701 .name = "ringsize_state",
2702 .version_id = 1,
2703 .minimum_version_id = 1,
2704 .fields = (VMStateField[]) {
2705 VMSTATE_UINT32(vring.num_default, struct VirtQueue),
2706 VMSTATE_END_OF_LIST()
2707 }
2708 };
2709
2710 static const VMStateDescription vmstate_virtio_ringsize = {
2711 .name = "virtio/ringsize",
2712 .version_id = 1,
2713 .minimum_version_id = 1,
2714 .needed = &virtio_ringsize_needed,
2715 .fields = (VMStateField[]) {
2716 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
2717 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
2718 VMSTATE_END_OF_LIST()
2719 }
2720 };
2721
2722 static int get_extra_state(QEMUFile *f, void *pv, size_t size,
2723 const VMStateField *field)
2724 {
2725 VirtIODevice *vdev = pv;
2726 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2727 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2728
2729 if (!k->load_extra_state) {
2730 return -1;
2731 } else {
2732 return k->load_extra_state(qbus->parent, f);
2733 }
2734 }
2735
2736 static int put_extra_state(QEMUFile *f, void *pv, size_t size,
2737 const VMStateField *field, JSONWriter *vmdesc)
2738 {
2739 VirtIODevice *vdev = pv;
2740 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2741 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2742
2743 k->save_extra_state(qbus->parent, f);
2744 return 0;
2745 }
2746
2747 static const VMStateInfo vmstate_info_extra_state = {
2748 .name = "virtqueue_extra_state",
2749 .get = get_extra_state,
2750 .put = put_extra_state,
2751 };
2752
2753 static const VMStateDescription vmstate_virtio_extra_state = {
2754 .name = "virtio/extra_state",
2755 .version_id = 1,
2756 .minimum_version_id = 1,
2757 .needed = &virtio_extra_state_needed,
2758 .fields = (VMStateField[]) {
2759 {
2760 .name = "extra_state",
2761 .version_id = 0,
2762 .field_exists = NULL,
2763 .size = 0,
2764 .info = &vmstate_info_extra_state,
2765 .flags = VMS_SINGLE,
2766 .offset = 0,
2767 },
2768 VMSTATE_END_OF_LIST()
2769 }
2770 };
2771
2772 static const VMStateDescription vmstate_virtio_device_endian = {
2773 .name = "virtio/device_endian",
2774 .version_id = 1,
2775 .minimum_version_id = 1,
2776 .needed = &virtio_device_endian_needed,
2777 .fields = (VMStateField[]) {
2778 VMSTATE_UINT8(device_endian, VirtIODevice),
2779 VMSTATE_END_OF_LIST()
2780 }
2781 };
2782
2783 static const VMStateDescription vmstate_virtio_64bit_features = {
2784 .name = "virtio/64bit_features",
2785 .version_id = 1,
2786 .minimum_version_id = 1,
2787 .needed = &virtio_64bit_features_needed,
2788 .fields = (VMStateField[]) {
2789 VMSTATE_UINT64(guest_features, VirtIODevice),
2790 VMSTATE_END_OF_LIST()
2791 }
2792 };
2793
2794 static const VMStateDescription vmstate_virtio_broken = {
2795 .name = "virtio/broken",
2796 .version_id = 1,
2797 .minimum_version_id = 1,
2798 .needed = &virtio_broken_needed,
2799 .fields = (VMStateField[]) {
2800 VMSTATE_BOOL(broken, VirtIODevice),
2801 VMSTATE_END_OF_LIST()
2802 }
2803 };
2804
2805 static const VMStateDescription vmstate_virtio_started = {
2806 .name = "virtio/started",
2807 .version_id = 1,
2808 .minimum_version_id = 1,
2809 .needed = &virtio_started_needed,
2810 .fields = (VMStateField[]) {
2811 VMSTATE_BOOL(started, VirtIODevice),
2812 VMSTATE_END_OF_LIST()
2813 }
2814 };
2815
2816 static const VMStateDescription vmstate_virtio_disabled = {
2817 .name = "virtio/disabled",
2818 .version_id = 1,
2819 .minimum_version_id = 1,
2820 .needed = &virtio_disabled_needed,
2821 .fields = (VMStateField[]) {
2822 VMSTATE_BOOL(disabled, VirtIODevice),
2823 VMSTATE_END_OF_LIST()
2824 }
2825 };
2826
2827 static const VMStateDescription vmstate_virtio = {
2828 .name = "virtio",
2829 .version_id = 1,
2830 .minimum_version_id = 1,
2831 .minimum_version_id_old = 1,
2832 .fields = (VMStateField[]) {
2833 VMSTATE_END_OF_LIST()
2834 },
2835 .subsections = (const VMStateDescription*[]) {
2836 &vmstate_virtio_device_endian,
2837 &vmstate_virtio_64bit_features,
2838 &vmstate_virtio_virtqueues,
2839 &vmstate_virtio_ringsize,
2840 &vmstate_virtio_broken,
2841 &vmstate_virtio_extra_state,
2842 &vmstate_virtio_started,
2843 &vmstate_virtio_packed_virtqueues,
2844 &vmstate_virtio_disabled,
2845 NULL
2846 }
2847 };
2848
2849 int virtio_save(VirtIODevice *vdev, QEMUFile *f)
2850 {
2851 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2852 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2853 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2854 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
2855 int i;
2856
2857 if (k->save_config) {
2858 k->save_config(qbus->parent, f);
2859 }
2860
2861 qemu_put_8s(f, &vdev->status);
2862 qemu_put_8s(f, &vdev->isr);
2863 qemu_put_be16s(f, &vdev->queue_sel);
2864 qemu_put_be32s(f, &guest_features_lo);
2865 qemu_put_be32(f, vdev->config_len);
2866 qemu_put_buffer(f, vdev->config, vdev->config_len);
2867
2868 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2869 if (vdev->vq[i].vring.num == 0)
2870 break;
2871 }
2872
2873 qemu_put_be32(f, i);
2874
2875 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2876 if (vdev->vq[i].vring.num == 0)
2877 break;
2878
2879 qemu_put_be32(f, vdev->vq[i].vring.num);
2880 if (k->has_variable_vring_alignment) {
2881 qemu_put_be32(f, vdev->vq[i].vring.align);
2882 }
2883 /*
2884 * Save desc now, the rest of the ring addresses are saved in
2885 * subsections for VIRTIO-1 devices.
2886 */
2887 qemu_put_be64(f, vdev->vq[i].vring.desc);
2888 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
2889 if (k->save_queue) {
2890 k->save_queue(qbus->parent, i, f);
2891 }
2892 }
2893
2894 if (vdc->save != NULL) {
2895 vdc->save(vdev, f);
2896 }
2897
2898 if (vdc->vmsd) {
2899 int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
2900 if (ret) {
2901 return ret;
2902 }
2903 }
2904
2905 /* Subsections */
2906 return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
2907 }
2908
2909 /* A wrapper for use as a VMState .put function */
2910 static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
2911 const VMStateField *field, JSONWriter *vmdesc)
2912 {
2913 return virtio_save(VIRTIO_DEVICE(opaque), f);
2914 }
2915
2916 /* A wrapper for use as a VMState .get function */
2917 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
2918 const VMStateField *field)
2919 {
2920 VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
2921 DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
2922
2923 return virtio_load(vdev, f, dc->vmsd->version_id);
2924 }
2925
2926 const VMStateInfo virtio_vmstate_info = {
2927 .name = "virtio",
2928 .get = virtio_device_get,
2929 .put = virtio_device_put,
2930 };
2931
2932 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
2933 {
2934 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
2935 bool bad = (val & ~(vdev->host_features)) != 0;
2936
2937 val &= vdev->host_features;
2938 if (k->set_features) {
2939 k->set_features(vdev, val);
2940 }
2941 vdev->guest_features = val;
2942 return bad ? -1 : 0;
2943 }
2944
2945 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
2946 {
2947 int ret;
2948 /*
2949 * The driver must not attempt to set features after feature negotiation
2950 * has finished.
2951 */
2952 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
2953 return -EINVAL;
2954 }
2955 ret = virtio_set_features_nocheck(vdev, val);
2956 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
2957 /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */
2958 int i;
2959 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2960 if (vdev->vq[i].vring.num != 0) {
2961 virtio_init_region_cache(vdev, i);
2962 }
2963 }
2964 }
2965 if (!ret) {
2966 if (!virtio_device_started(vdev, vdev->status) &&
2967 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2968 vdev->start_on_kick = true;
2969 }
2970 }
2971 return ret;
2972 }
2973
2974 size_t virtio_feature_get_config_size(const VirtIOFeature *feature_sizes,
2975 uint64_t host_features)
2976 {
2977 size_t config_size = 0;
2978 int i;
2979
2980 for (i = 0; feature_sizes[i].flags != 0; i++) {
2981 if (host_features & feature_sizes[i].flags) {
2982 config_size = MAX(feature_sizes[i].end, config_size);
2983 }
2984 }
2985
2986 return config_size;
2987 }
2988
2989 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
2990 {
2991 int i, ret;
2992 int32_t config_len;
2993 uint32_t num;
2994 uint32_t features;
2995 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2996 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2997 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
2998
2999 /*
3000 * We poison the endianness to ensure it does not get used before
3001 * subsections have been loaded.
3002 */
3003 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
3004
3005 if (k->load_config) {
3006 ret = k->load_config(qbus->parent, f);
3007 if (ret)
3008 return ret;
3009 }
3010
3011 qemu_get_8s(f, &vdev->status);
3012 qemu_get_8s(f, &vdev->isr);
3013 qemu_get_be16s(f, &vdev->queue_sel);
3014 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
3015 return -1;
3016 }
3017 qemu_get_be32s(f, &features);
3018
3019 /*
3020 * Temporarily set guest_features low bits - needed by
3021 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
3022 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
3023 *
3024 * Note: devices should always test host features in future - don't create
3025 * new dependencies like this.
3026 */
3027 vdev->guest_features = features;
3028
3029 config_len = qemu_get_be32(f);
3030
3031 /*
3032 * There are cases where the incoming config can be bigger or smaller
3033 * than what we have; so load what we have space for, and skip
3034 * any excess that's in the stream.
3035 */
3036 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
3037
3038 while (config_len > vdev->config_len) {
3039 qemu_get_byte(f);
3040 config_len--;
3041 }
3042
3043 num = qemu_get_be32(f);
3044
3045 if (num > VIRTIO_QUEUE_MAX) {
3046 error_report("Invalid number of virtqueues: 0x%x", num);
3047 return -1;
3048 }
3049
3050 for (i = 0; i < num; i++) {
3051 vdev->vq[i].vring.num = qemu_get_be32(f);
3052 if (k->has_variable_vring_alignment) {
3053 vdev->vq[i].vring.align = qemu_get_be32(f);
3054 }
3055 vdev->vq[i].vring.desc = qemu_get_be64(f);
3056 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
3057 vdev->vq[i].signalled_used_valid = false;
3058 vdev->vq[i].notification = true;
3059
3060 if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
3061 error_report("VQ %d address 0x0 "
3062 "inconsistent with Host index 0x%x",
3063 i, vdev->vq[i].last_avail_idx);
3064 return -1;
3065 }
3066 if (k->load_queue) {
3067 ret = k->load_queue(qbus->parent, i, f);
3068 if (ret)
3069 return ret;
3070 }
3071 }
3072
3073 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
3074
3075 if (vdc->load != NULL) {
3076 ret = vdc->load(vdev, f, version_id);
3077 if (ret) {
3078 return ret;
3079 }
3080 }
3081
3082 if (vdc->vmsd) {
3083 ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
3084 if (ret) {
3085 return ret;
3086 }
3087 }
3088
3089 /* Subsections */
3090 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
3091 if (ret) {
3092 return ret;
3093 }
3094
3095 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
3096 vdev->device_endian = virtio_default_endian();
3097 }
3098
3099 if (virtio_64bit_features_needed(vdev)) {
3100 /*
3101 * Subsection load filled vdev->guest_features. Run them
3102 * through virtio_set_features to sanity-check them against
3103 * host_features.
3104 */
3105 uint64_t features64 = vdev->guest_features;
3106 if (virtio_set_features_nocheck(vdev, features64) < 0) {
3107 error_report("Features 0x%" PRIx64 " unsupported. "
3108 "Allowed features: 0x%" PRIx64,
3109 features64, vdev->host_features);
3110 return -1;
3111 }
3112 } else {
3113 if (virtio_set_features_nocheck(vdev, features) < 0) {
3114 error_report("Features 0x%x unsupported. "
3115 "Allowed features: 0x%" PRIx64,
3116 features, vdev->host_features);
3117 return -1;
3118 }
3119 }
3120
3121 if (!virtio_device_started(vdev, vdev->status) &&
3122 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3123 vdev->start_on_kick = true;
3124 }
3125
3126 RCU_READ_LOCK_GUARD();
3127 for (i = 0; i < num; i++) {
3128 if (vdev->vq[i].vring.desc) {
3129 uint16_t nheads;
3130
3131 /*
3132 * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
3133 * only the region cache needs to be set up. Legacy devices need
3134 * to calculate used and avail ring addresses based on the desc
3135 * address.
3136 */
3137 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3138 virtio_init_region_cache(vdev, i);
3139 } else {
3140 virtio_queue_update_rings(vdev, i);
3141 }
3142
3143 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3144 vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
3145 vdev->vq[i].shadow_avail_wrap_counter =
3146 vdev->vq[i].last_avail_wrap_counter;
3147 continue;
3148 }
3149
3150 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
3151 /* Check it isn't doing strange things with descriptor numbers. */
3152 if (nheads > vdev->vq[i].vring.num) {
3153 virtio_error(vdev, "VQ %d size 0x%x Guest index 0x%x "
3154 "inconsistent with Host index 0x%x: delta 0x%x",
3155 i, vdev->vq[i].vring.num,
3156 vring_avail_idx(&vdev->vq[i]),
3157 vdev->vq[i].last_avail_idx, nheads);
3158 vdev->vq[i].used_idx = 0;
3159 vdev->vq[i].shadow_avail_idx = 0;
3160 vdev->vq[i].inuse = 0;
3161 continue;
3162 }
3163 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
3164 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
3165
3166 /*
3167 * Some devices migrate VirtQueueElements that have been popped
3168 * from the avail ring but not yet returned to the used ring.
3169 * Since max ring size < UINT16_MAX it's safe to use modulo
3170 * UINT16_MAX + 1 subtraction.
3171 */
3172 vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
3173 vdev->vq[i].used_idx);
3174 if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
3175 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
3176 "used_idx 0x%x",
3177 i, vdev->vq[i].vring.num,
3178 vdev->vq[i].last_avail_idx,
3179 vdev->vq[i].used_idx);
3180 return -1;
3181 }
3182 }
3183 }
3184
3185 if (vdc->post_load) {
3186 ret = vdc->post_load(vdev);
3187 if (ret) {
3188 return ret;
3189 }
3190 }
3191
3192 return 0;
3193 }
3194
3195 void virtio_cleanup(VirtIODevice *vdev)
3196 {
3197 qemu_del_vm_change_state_handler(vdev->vmstate);
3198 }
3199
3200 static void virtio_vmstate_change(void *opaque, bool running, RunState state)
3201 {
3202 VirtIODevice *vdev = opaque;
3203 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3204 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3205 bool backend_run = running && virtio_device_started(vdev, vdev->status);
3206 vdev->vm_running = running;
3207
3208 if (backend_run) {
3209 virtio_set_status(vdev, vdev->status);
3210 }
3211
3212 if (k->vmstate_change) {
3213 k->vmstate_change(qbus->parent, backend_run);
3214 }
3215
3216 if (!backend_run) {
3217 virtio_set_status(vdev, vdev->status);
3218 }
3219 }
3220
3221 void virtio_instance_init_common(Object *proxy_obj, void *data,
3222 size_t vdev_size, const char *vdev_name)
3223 {
3224 DeviceState *vdev = data;
3225
3226 object_initialize_child_with_props(proxy_obj, "virtio-backend", vdev,
3227 vdev_size, vdev_name, &error_abort,
3228 NULL);
3229 qdev_alias_all_properties(vdev, proxy_obj);
3230 }
3231
3232 void virtio_init(VirtIODevice *vdev, const char *name,
3233 uint16_t device_id, size_t config_size)
3234 {
3235 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3236 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3237 int i;
3238 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
3239
3240 if (nvectors) {
3241 vdev->vector_queues =
3242 g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
3243 }
3244
3245 vdev->start_on_kick = false;
3246 vdev->started = false;
3247 vdev->device_id = device_id;
3248 vdev->status = 0;
3249 qatomic_set(&vdev->isr, 0);
3250 vdev->queue_sel = 0;
3251 vdev->config_vector = VIRTIO_NO_VECTOR;
3252 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
3253 vdev->vm_running = runstate_is_running();
3254 vdev->broken = false;
3255 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3256 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
3257 vdev->vq[i].vdev = vdev;
3258 vdev->vq[i].queue_index = i;
3259 vdev->vq[i].host_notifier_enabled = false;
3260 }
3261
3262 vdev->name = name;
3263 vdev->config_len = config_size;
3264 if (vdev->config_len) {
3265 vdev->config = g_malloc0(config_size);
3266 } else {
3267 vdev->config = NULL;
3268 }
3269 vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
3270 virtio_vmstate_change, vdev);
3271 vdev->device_endian = virtio_default_endian();
3272 vdev->use_guest_notifier_mask = true;
3273 }
3274
3275 /*
3276 * Only devices that have already been around prior to defining the virtio
3277 * standard support legacy mode; this includes devices not specified in the
3278 * standard. All newer devices conform to the virtio standard only.
3279 */
3280 bool virtio_legacy_allowed(VirtIODevice *vdev)
3281 {
3282 switch (vdev->device_id) {
3283 case VIRTIO_ID_NET:
3284 case VIRTIO_ID_BLOCK:
3285 case VIRTIO_ID_CONSOLE:
3286 case VIRTIO_ID_RNG:
3287 case VIRTIO_ID_BALLOON:
3288 case VIRTIO_ID_RPMSG:
3289 case VIRTIO_ID_SCSI:
3290 case VIRTIO_ID_9P:
3291 case VIRTIO_ID_RPROC_SERIAL:
3292 case VIRTIO_ID_CAIF:
3293 return true;
3294 default:
3295 return false;
3296 }
3297 }
3298
3299 bool virtio_legacy_check_disabled(VirtIODevice *vdev)
3300 {
3301 return vdev->disable_legacy_check;
3302 }
3303
3304 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
3305 {
3306 return vdev->vq[n].vring.desc;
3307 }
3308
3309 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n)
3310 {
3311 return virtio_queue_get_desc_addr(vdev, n) != 0;
3312 }
3313
3314 bool virtio_queue_enabled(VirtIODevice *vdev, int n)
3315 {
3316 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3317 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3318
3319 if (k->queue_enabled) {
3320 return k->queue_enabled(qbus->parent, n);
3321 }
3322 return virtio_queue_enabled_legacy(vdev, n);
3323 }
3324
3325 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
3326 {
3327 return vdev->vq[n].vring.avail;
3328 }
3329
3330 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
3331 {
3332 return vdev->vq[n].vring.used;
3333 }
3334
3335 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
3336 {
3337 return sizeof(VRingDesc) * vdev->vq[n].vring.num;
3338 }
3339
3340 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
3341 {
3342 int s;
3343
3344 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3345 return sizeof(struct VRingPackedDescEvent);
3346 }
3347
3348 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3349 return offsetof(VRingAvail, ring) +
3350 sizeof(uint16_t) * vdev->vq[n].vring.num + s;
3351 }
3352
3353 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
3354 {
3355 int s;
3356
3357 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3358 return sizeof(struct VRingPackedDescEvent);
3359 }
3360
3361 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
3362 return offsetof(VRingUsed, ring) +
3363 sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
3364 }
3365
3366 static unsigned int virtio_queue_packed_get_last_avail_idx(VirtIODevice *vdev,
3367 int n)
3368 {
3369 unsigned int avail, used;
3370
3371 avail = vdev->vq[n].last_avail_idx;
3372 avail |= ((uint16_t)vdev->vq[n].last_avail_wrap_counter) << 15;
3373
3374 used = vdev->vq[n].used_idx;
3375 used |= ((uint16_t)vdev->vq[n].used_wrap_counter) << 15;
3376
3377 return avail | used << 16;
3378 }
3379
3380 static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev,
3381 int n)
3382 {
3383 return vdev->vq[n].last_avail_idx;
3384 }
3385
3386 unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
3387 {
3388 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3389 return virtio_queue_packed_get_last_avail_idx(vdev, n);
3390 } else {
3391 return virtio_queue_split_get_last_avail_idx(vdev, n);
3392 }
3393 }
3394
3395 static void virtio_queue_packed_set_last_avail_idx(VirtIODevice *vdev,
3396 int n, unsigned int idx)
3397 {
3398 struct VirtQueue *vq = &vdev->vq[n];
3399
3400 vq->last_avail_idx = vq->shadow_avail_idx = idx & 0x7fff;
3401 vq->last_avail_wrap_counter =
3402 vq->shadow_avail_wrap_counter = !!(idx & 0x8000);
3403 idx >>= 16;
3404 vq->used_idx = idx & 0x7ffff;
3405 vq->used_wrap_counter = !!(idx & 0x8000);
3406 }
3407
3408 static void virtio_queue_split_set_last_avail_idx(VirtIODevice *vdev,
3409 int n, unsigned int idx)
3410 {
3411 vdev->vq[n].last_avail_idx = idx;
3412 vdev->vq[n].shadow_avail_idx = idx;
3413 }
3414
3415 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n,
3416 unsigned int idx)
3417 {
3418 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3419 virtio_queue_packed_set_last_avail_idx(vdev, n, idx);
3420 } else {
3421 virtio_queue_split_set_last_avail_idx(vdev, n, idx);
3422 }
3423 }
3424
3425 static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
3426 int n)
3427 {
3428 /* We don't have a reference like avail idx in shared memory */
3429 return;
3430 }
3431
3432 static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
3433 int n)
3434 {
3435 RCU_READ_LOCK_GUARD();
3436 if (vdev->vq[n].vring.desc) {
3437 vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
3438 vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
3439 }
3440 }
3441
3442 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
3443 {
3444 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3445 virtio_queue_packed_restore_last_avail_idx(vdev, n);
3446 } else {
3447 virtio_queue_split_restore_last_avail_idx(vdev, n);
3448 }
3449 }
3450
3451 static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
3452 {
3453 /* used idx was updated through set_last_avail_idx() */
3454 return;
3455 }
3456
3457 static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
3458 {
3459 RCU_READ_LOCK_GUARD();
3460 if (vdev->vq[n].vring.desc) {
3461 vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
3462 }
3463 }
3464
3465 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
3466 {
3467 if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
3468 return virtio_queue_packed_update_used_idx(vdev, n);
3469 } else {
3470 return virtio_split_packed_update_used_idx(vdev, n);
3471 }
3472 }
3473
3474 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
3475 {
3476 vdev->vq[n].signalled_used_valid = false;
3477 }
3478
3479 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
3480 {
3481 return vdev->vq + n;
3482 }
3483
3484 uint16_t virtio_get_queue_index(VirtQueue *vq)
3485 {
3486 return vq->queue_index;
3487 }
3488
3489 static void virtio_queue_guest_notifier_read(EventNotifier *n)
3490 {
3491 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
3492 if (event_notifier_test_and_clear(n)) {
3493 virtio_irq(vq);
3494 }
3495 }
3496
3497 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
3498 bool with_irqfd)
3499 {
3500 if (assign && !with_irqfd) {
3501 event_notifier_set_handler(&vq->guest_notifier,
3502 virtio_queue_guest_notifier_read);
3503 } else {
3504 event_notifier_set_handler(&vq->guest_notifier, NULL);
3505 }
3506 if (!assign) {
3507 /* Test and clear notifier before closing it,
3508 * in case poll callback didn't have time to run. */
3509 virtio_queue_guest_notifier_read(&vq->guest_notifier);
3510 }
3511 }
3512
3513 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
3514 {
3515 return &vq->guest_notifier;
3516 }
3517
3518 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
3519 {
3520 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3521 if (event_notifier_test_and_clear(n)) {
3522 virtio_queue_notify_aio_vq(vq);
3523 }
3524 }
3525
3526 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
3527 {
3528 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3529
3530 virtio_queue_set_notification(vq, 0);
3531 }
3532
3533 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
3534 {
3535 EventNotifier *n = opaque;
3536 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3537
3538 if (!vq->vring.desc || virtio_queue_empty(vq)) {
3539 return false;
3540 }
3541
3542 return virtio_queue_notify_aio_vq(vq);
3543 }
3544
3545 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
3546 {
3547 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3548
3549 /* Caller polls once more after this to catch requests that race with us */
3550 virtio_queue_set_notification(vq, 1);
3551 }
3552
3553 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
3554 VirtIOHandleAIOOutput handle_output)
3555 {
3556 if (handle_output) {
3557 vq->handle_aio_output = handle_output;
3558 aio_set_event_notifier(ctx, &vq->host_notifier, true,
3559 virtio_queue_host_notifier_aio_read,
3560 virtio_queue_host_notifier_aio_poll);
3561 aio_set_event_notifier_poll(ctx, &vq->host_notifier,
3562 virtio_queue_host_notifier_aio_poll_begin,
3563 virtio_queue_host_notifier_aio_poll_end);
3564 } else {
3565 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
3566 /* Test and clear notifier before after disabling event,
3567 * in case poll callback didn't have time to run. */
3568 virtio_queue_host_notifier_aio_read(&vq->host_notifier);
3569 vq->handle_aio_output = NULL;
3570 }
3571 }
3572
3573 void virtio_queue_host_notifier_read(EventNotifier *n)
3574 {
3575 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
3576 if (event_notifier_test_and_clear(n)) {
3577 virtio_queue_notify_vq(vq);
3578 }
3579 }
3580
3581 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
3582 {
3583 return &vq->host_notifier;
3584 }
3585
3586 void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
3587 {
3588 vq->host_notifier_enabled = enabled;
3589 }
3590
3591 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
3592 MemoryRegion *mr, bool assign)
3593 {
3594 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3595 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
3596
3597 if (k->set_host_notifier_mr) {
3598 return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
3599 }
3600
3601 return -1;
3602 }
3603
3604 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
3605 {
3606 g_free(vdev->bus_name);
3607 vdev->bus_name = g_strdup(bus_name);
3608 }
3609
3610 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
3611 {
3612 va_list ap;
3613
3614 va_start(ap, fmt);
3615 error_vreport(fmt, ap);
3616 va_end(ap);
3617
3618 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
3619 vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
3620 virtio_notify_config(vdev);
3621 }
3622
3623 vdev->broken = true;
3624 }
3625
3626 static void virtio_memory_listener_commit(MemoryListener *listener)
3627 {
3628 VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
3629 int i;
3630
3631 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3632 if (vdev->vq[i].vring.num == 0) {
3633 break;
3634 }
3635 virtio_init_region_cache(vdev, i);
3636 }
3637 }
3638
3639 static void virtio_device_realize(DeviceState *dev, Error **errp)
3640 {
3641 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3642 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3643 Error *err = NULL;
3644
3645 /* Devices should either use vmsd or the load/save methods */
3646 assert(!vdc->vmsd || !vdc->load);
3647
3648 if (vdc->realize != NULL) {
3649 vdc->realize(dev, &err);
3650 if (err != NULL) {
3651 error_propagate(errp, err);
3652 return;
3653 }
3654 }
3655
3656 virtio_bus_device_plugged(vdev, &err);
3657 if (err != NULL) {
3658 error_propagate(errp, err);
3659 vdc->unrealize(dev);
3660 return;
3661 }
3662
3663 vdev->listener.commit = virtio_memory_listener_commit;
3664 vdev->listener.name = "virtio";
3665 memory_listener_register(&vdev->listener, vdev->dma_as);
3666 }
3667
3668 static void virtio_device_unrealize(DeviceState *dev)
3669 {
3670 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3671 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3672
3673 memory_listener_unregister(&vdev->listener);
3674 virtio_bus_device_unplugged(vdev);
3675
3676 if (vdc->unrealize != NULL) {
3677 vdc->unrealize(dev);
3678 }
3679
3680 g_free(vdev->bus_name);
3681 vdev->bus_name = NULL;
3682 }
3683
3684 static void virtio_device_free_virtqueues(VirtIODevice *vdev)
3685 {
3686 int i;
3687 if (!vdev->vq) {
3688 return;
3689 }
3690
3691 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
3692 if (vdev->vq[i].vring.num == 0) {
3693 break;
3694 }
3695 virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
3696 }
3697 g_free(vdev->vq);
3698 }
3699
3700 static void virtio_device_instance_finalize(Object *obj)
3701 {
3702 VirtIODevice *vdev = VIRTIO_DEVICE(obj);
3703
3704 virtio_device_free_virtqueues(vdev);
3705
3706 g_free(vdev->config);
3707 g_free(vdev->vector_queues);
3708 }
3709
3710 static Property virtio_properties[] = {
3711 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
3712 DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
3713 DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true),
3714 DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice,
3715 disable_legacy_check, false),
3716 DEFINE_PROP_END_OF_LIST(),
3717 };
3718
3719 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
3720 {
3721 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3722 int i, n, r, err;
3723
3724 /*
3725 * Batch all the host notifiers in a single transaction to avoid
3726 * quadratic time complexity in address_space_update_ioeventfds().
3727 */
3728 memory_region_transaction_begin();
3729 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3730 VirtQueue *vq = &vdev->vq[n];
3731 if (!virtio_queue_get_num(vdev, n)) {
3732 continue;
3733 }
3734 r = virtio_bus_set_host_notifier(qbus, n, true);
3735 if (r < 0) {
3736 err = r;
3737 goto assign_error;
3738 }
3739 event_notifier_set_handler(&vq->host_notifier,
3740 virtio_queue_host_notifier_read);
3741 }
3742
3743 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3744 /* Kick right away to begin processing requests already in vring */
3745 VirtQueue *vq = &vdev->vq[n];
3746 if (!vq->vring.num) {
3747 continue;
3748 }
3749 event_notifier_set(&vq->host_notifier);
3750 }
3751 memory_region_transaction_commit();
3752 return 0;
3753
3754 assign_error:
3755 i = n; /* save n for a second iteration after transaction is committed. */
3756 while (--n >= 0) {
3757 VirtQueue *vq = &vdev->vq[n];
3758 if (!virtio_queue_get_num(vdev, n)) {
3759 continue;
3760 }
3761
3762 event_notifier_set_handler(&vq->host_notifier, NULL);
3763 r = virtio_bus_set_host_notifier(qbus, n, false);
3764 assert(r >= 0);
3765 }
3766 /*
3767 * The transaction expects the ioeventfds to be open when it
3768 * commits. Do it now, before the cleanup loop.
3769 */
3770 memory_region_transaction_commit();
3771
3772 while (--i >= 0) {
3773 if (!virtio_queue_get_num(vdev, i)) {
3774 continue;
3775 }
3776 virtio_bus_cleanup_host_notifier(qbus, i);
3777 }
3778 return err;
3779 }
3780
3781 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
3782 {
3783 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3784 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3785
3786 return virtio_bus_start_ioeventfd(vbus);
3787 }
3788
3789 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
3790 {
3791 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
3792 int n, r;
3793
3794 /*
3795 * Batch all the host notifiers in a single transaction to avoid
3796 * quadratic time complexity in address_space_update_ioeventfds().
3797 */
3798 memory_region_transaction_begin();
3799 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3800 VirtQueue *vq = &vdev->vq[n];
3801
3802 if (!virtio_queue_get_num(vdev, n)) {
3803 continue;
3804 }
3805 event_notifier_set_handler(&vq->host_notifier, NULL);
3806 r = virtio_bus_set_host_notifier(qbus, n, false);
3807 assert(r >= 0);
3808 }
3809 /*
3810 * The transaction expects the ioeventfds to be open when it
3811 * commits. Do it now, before the cleanup loop.
3812 */
3813 memory_region_transaction_commit();
3814
3815 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
3816 if (!virtio_queue_get_num(vdev, n)) {
3817 continue;
3818 }
3819 virtio_bus_cleanup_host_notifier(qbus, n);
3820 }
3821 }
3822
3823 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
3824 {
3825 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3826 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3827
3828 return virtio_bus_grab_ioeventfd(vbus);
3829 }
3830
3831 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
3832 {
3833 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3834 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3835
3836 virtio_bus_release_ioeventfd(vbus);
3837 }
3838
3839 static void virtio_device_class_init(ObjectClass *klass, void *data)
3840 {
3841 /* Set the default value here. */
3842 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3843 DeviceClass *dc = DEVICE_CLASS(klass);
3844
3845 dc->realize = virtio_device_realize;
3846 dc->unrealize = virtio_device_unrealize;
3847 dc->bus_type = TYPE_VIRTIO_BUS;
3848 device_class_set_props(dc, virtio_properties);
3849 vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
3850 vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
3851
3852 vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
3853 }
3854
3855 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
3856 {
3857 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
3858 VirtioBusState *vbus = VIRTIO_BUS(qbus);
3859
3860 return virtio_bus_ioeventfd_enabled(vbus);
3861 }
3862
3863 static const TypeInfo virtio_device_info = {
3864 .name = TYPE_VIRTIO_DEVICE,
3865 .parent = TYPE_DEVICE,
3866 .instance_size = sizeof(VirtIODevice),
3867 .class_init = virtio_device_class_init,
3868 .instance_finalize = virtio_device_instance_finalize,
3869 .abstract = true,
3870 .class_size = sizeof(VirtioDeviceClass),
3871 };
3872
3873 static void virtio_register_types(void)
3874 {
3875 type_register_static(&virtio_device_info);
3876 }
3877
3878 type_init(virtio_register_types)