]> git.proxmox.com Git - qemu.git/blob - hw/vhost.c
vhost: improve region filtering
[qemu.git] / hw / vhost.c
1 /*
2 * vhost support
3 *
4 * Copyright Red Hat, Inc. 2010
5 *
6 * Authors:
7 * Michael S. Tsirkin <mst@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 */
12
13 #include <sys/ioctl.h>
14 #include "vhost.h"
15 #include "hw/hw.h"
16 #include "range.h"
17 #include <linux/vhost.h>
18 #include "exec-memory.h"
19
20 static void vhost_dev_sync_region(struct vhost_dev *dev,
21 MemoryRegionSection *section,
22 uint64_t mfirst, uint64_t mlast,
23 uint64_t rfirst, uint64_t rlast)
24 {
25 uint64_t start = MAX(mfirst, rfirst);
26 uint64_t end = MIN(mlast, rlast);
27 vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK;
28 vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
29 uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
30
31 assert(end / VHOST_LOG_CHUNK < dev->log_size);
32 assert(start / VHOST_LOG_CHUNK < dev->log_size);
33 if (end < start) {
34 return;
35 }
36 for (;from < to; ++from) {
37 vhost_log_chunk_t log;
38 int bit;
39 /* We first check with non-atomic: much cheaper,
40 * and we expect non-dirty to be the common case. */
41 if (!*from) {
42 addr += VHOST_LOG_CHUNK;
43 continue;
44 }
45 /* Data must be read atomically. We don't really
46 * need the barrier semantics of __sync
47 * builtins, but it's easier to use them than
48 * roll our own. */
49 log = __sync_fetch_and_and(from, 0);
50 while ((bit = sizeof(log) > sizeof(int) ?
51 ffsll(log) : ffs(log))) {
52 ram_addr_t ram_addr;
53 bit -= 1;
54 ram_addr = section->offset_within_region + bit * VHOST_LOG_PAGE;
55 memory_region_set_dirty(section->mr, ram_addr);
56 log &= ~(0x1ull << bit);
57 }
58 addr += VHOST_LOG_CHUNK;
59 }
60 }
61
62 static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
63 MemoryRegionSection *section,
64 target_phys_addr_t start_addr,
65 target_phys_addr_t end_addr)
66 {
67 int i;
68
69 if (!dev->log_enabled || !dev->started) {
70 return 0;
71 }
72 for (i = 0; i < dev->mem->nregions; ++i) {
73 struct vhost_memory_region *reg = dev->mem->regions + i;
74 vhost_dev_sync_region(dev, section, start_addr, end_addr,
75 reg->guest_phys_addr,
76 range_get_last(reg->guest_phys_addr,
77 reg->memory_size));
78 }
79 for (i = 0; i < dev->nvqs; ++i) {
80 struct vhost_virtqueue *vq = dev->vqs + i;
81 vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
82 range_get_last(vq->used_phys, vq->used_size));
83 }
84 return 0;
85 }
86
87 static void vhost_log_sync(MemoryListener *listener,
88 MemoryRegionSection *section)
89 {
90 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
91 memory_listener);
92 target_phys_addr_t start_addr = section->offset_within_address_space;
93 target_phys_addr_t end_addr = start_addr + section->size;
94
95 vhost_sync_dirty_bitmap(dev, section, start_addr, end_addr);
96 }
97
98 /* Assign/unassign. Keep an unsorted array of non-overlapping
99 * memory regions in dev->mem. */
100 static void vhost_dev_unassign_memory(struct vhost_dev *dev,
101 uint64_t start_addr,
102 uint64_t size)
103 {
104 int from, to, n = dev->mem->nregions;
105 /* Track overlapping/split regions for sanity checking. */
106 int overlap_start = 0, overlap_end = 0, overlap_middle = 0, split = 0;
107
108 for (from = 0, to = 0; from < n; ++from, ++to) {
109 struct vhost_memory_region *reg = dev->mem->regions + to;
110 uint64_t reglast;
111 uint64_t memlast;
112 uint64_t change;
113
114 /* clone old region */
115 if (to != from) {
116 memcpy(reg, dev->mem->regions + from, sizeof *reg);
117 }
118
119 /* No overlap is simple */
120 if (!ranges_overlap(reg->guest_phys_addr, reg->memory_size,
121 start_addr, size)) {
122 continue;
123 }
124
125 /* Split only happens if supplied region
126 * is in the middle of an existing one. Thus it can not
127 * overlap with any other existing region. */
128 assert(!split);
129
130 reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
131 memlast = range_get_last(start_addr, size);
132
133 /* Remove whole region */
134 if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
135 --dev->mem->nregions;
136 --to;
137 ++overlap_middle;
138 continue;
139 }
140
141 /* Shrink region */
142 if (memlast >= reglast) {
143 reg->memory_size = start_addr - reg->guest_phys_addr;
144 assert(reg->memory_size);
145 assert(!overlap_end);
146 ++overlap_end;
147 continue;
148 }
149
150 /* Shift region */
151 if (start_addr <= reg->guest_phys_addr) {
152 change = memlast + 1 - reg->guest_phys_addr;
153 reg->memory_size -= change;
154 reg->guest_phys_addr += change;
155 reg->userspace_addr += change;
156 assert(reg->memory_size);
157 assert(!overlap_start);
158 ++overlap_start;
159 continue;
160 }
161
162 /* This only happens if supplied region
163 * is in the middle of an existing one. Thus it can not
164 * overlap with any other existing region. */
165 assert(!overlap_start);
166 assert(!overlap_end);
167 assert(!overlap_middle);
168 /* Split region: shrink first part, shift second part. */
169 memcpy(dev->mem->regions + n, reg, sizeof *reg);
170 reg->memory_size = start_addr - reg->guest_phys_addr;
171 assert(reg->memory_size);
172 change = memlast + 1 - reg->guest_phys_addr;
173 reg = dev->mem->regions + n;
174 reg->memory_size -= change;
175 assert(reg->memory_size);
176 reg->guest_phys_addr += change;
177 reg->userspace_addr += change;
178 /* Never add more than 1 region */
179 assert(dev->mem->nregions == n);
180 ++dev->mem->nregions;
181 ++split;
182 }
183 }
184
185 /* Called after unassign, so no regions overlap the given range. */
186 static void vhost_dev_assign_memory(struct vhost_dev *dev,
187 uint64_t start_addr,
188 uint64_t size,
189 uint64_t uaddr)
190 {
191 int from, to;
192 struct vhost_memory_region *merged = NULL;
193 for (from = 0, to = 0; from < dev->mem->nregions; ++from, ++to) {
194 struct vhost_memory_region *reg = dev->mem->regions + to;
195 uint64_t prlast, urlast;
196 uint64_t pmlast, umlast;
197 uint64_t s, e, u;
198
199 /* clone old region */
200 if (to != from) {
201 memcpy(reg, dev->mem->regions + from, sizeof *reg);
202 }
203 prlast = range_get_last(reg->guest_phys_addr, reg->memory_size);
204 pmlast = range_get_last(start_addr, size);
205 urlast = range_get_last(reg->userspace_addr, reg->memory_size);
206 umlast = range_get_last(uaddr, size);
207
208 /* check for overlapping regions: should never happen. */
209 assert(prlast < start_addr || pmlast < reg->guest_phys_addr);
210 /* Not an adjacent or overlapping region - do not merge. */
211 if ((prlast + 1 != start_addr || urlast + 1 != uaddr) &&
212 (pmlast + 1 != reg->guest_phys_addr ||
213 umlast + 1 != reg->userspace_addr)) {
214 continue;
215 }
216
217 if (merged) {
218 --to;
219 assert(to >= 0);
220 } else {
221 merged = reg;
222 }
223 u = MIN(uaddr, reg->userspace_addr);
224 s = MIN(start_addr, reg->guest_phys_addr);
225 e = MAX(pmlast, prlast);
226 uaddr = merged->userspace_addr = u;
227 start_addr = merged->guest_phys_addr = s;
228 size = merged->memory_size = e - s + 1;
229 assert(merged->memory_size);
230 }
231
232 if (!merged) {
233 struct vhost_memory_region *reg = dev->mem->regions + to;
234 memset(reg, 0, sizeof *reg);
235 reg->memory_size = size;
236 assert(reg->memory_size);
237 reg->guest_phys_addr = start_addr;
238 reg->userspace_addr = uaddr;
239 ++to;
240 }
241 assert(to <= dev->mem->nregions + 1);
242 dev->mem->nregions = to;
243 }
244
245 static uint64_t vhost_get_log_size(struct vhost_dev *dev)
246 {
247 uint64_t log_size = 0;
248 int i;
249 for (i = 0; i < dev->mem->nregions; ++i) {
250 struct vhost_memory_region *reg = dev->mem->regions + i;
251 uint64_t last = range_get_last(reg->guest_phys_addr,
252 reg->memory_size);
253 log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
254 }
255 for (i = 0; i < dev->nvqs; ++i) {
256 struct vhost_virtqueue *vq = dev->vqs + i;
257 uint64_t last = vq->used_phys + vq->used_size - 1;
258 log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
259 }
260 return log_size;
261 }
262
263 static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
264 {
265 vhost_log_chunk_t *log;
266 uint64_t log_base;
267 int r, i;
268 if (size) {
269 log = g_malloc0(size * sizeof *log);
270 } else {
271 log = NULL;
272 }
273 log_base = (uint64_t)(unsigned long)log;
274 r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
275 assert(r >= 0);
276 for (i = 0; i < dev->n_mem_sections; ++i) {
277 vhost_sync_dirty_bitmap(dev, &dev->mem_sections[i],
278 0, (target_phys_addr_t)~0x0ull);
279 }
280 if (dev->log) {
281 g_free(dev->log);
282 }
283 dev->log = log;
284 dev->log_size = size;
285 }
286
287 static int vhost_verify_ring_mappings(struct vhost_dev *dev,
288 uint64_t start_addr,
289 uint64_t size)
290 {
291 int i;
292 for (i = 0; i < dev->nvqs; ++i) {
293 struct vhost_virtqueue *vq = dev->vqs + i;
294 target_phys_addr_t l;
295 void *p;
296
297 if (!ranges_overlap(start_addr, size, vq->ring_phys, vq->ring_size)) {
298 continue;
299 }
300 l = vq->ring_size;
301 p = cpu_physical_memory_map(vq->ring_phys, &l, 1);
302 if (!p || l != vq->ring_size) {
303 fprintf(stderr, "Unable to map ring buffer for ring %d\n", i);
304 return -ENOMEM;
305 }
306 if (p != vq->ring) {
307 fprintf(stderr, "Ring buffer relocated for ring %d\n", i);
308 return -EBUSY;
309 }
310 cpu_physical_memory_unmap(p, l, 0, 0);
311 }
312 return 0;
313 }
314
315 static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev,
316 uint64_t start_addr,
317 uint64_t size)
318 {
319 int i, n = dev->mem->nregions;
320 for (i = 0; i < n; ++i) {
321 struct vhost_memory_region *reg = dev->mem->regions + i;
322 if (ranges_overlap(reg->guest_phys_addr, reg->memory_size,
323 start_addr, size)) {
324 return reg;
325 }
326 }
327 return NULL;
328 }
329
330 static bool vhost_dev_cmp_memory(struct vhost_dev *dev,
331 uint64_t start_addr,
332 uint64_t size,
333 uint64_t uaddr)
334 {
335 struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size);
336 uint64_t reglast;
337 uint64_t memlast;
338
339 if (!reg) {
340 return true;
341 }
342
343 reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
344 memlast = range_get_last(start_addr, size);
345
346 /* Need to extend region? */
347 if (start_addr < reg->guest_phys_addr || memlast > reglast) {
348 return true;
349 }
350 /* userspace_addr changed? */
351 return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr;
352 }
353
354 static void vhost_set_memory(MemoryListener *listener,
355 MemoryRegionSection *section,
356 bool add)
357 {
358 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
359 memory_listener);
360 target_phys_addr_t start_addr = section->offset_within_address_space;
361 ram_addr_t size = section->size;
362 bool log_dirty = memory_region_is_logging(section->mr);
363 int s = offsetof(struct vhost_memory, regions) +
364 (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
365 uint64_t log_size;
366 int r;
367 void *ram;
368
369 dev->mem = g_realloc(dev->mem, s);
370
371 if (log_dirty) {
372 add = false;
373 }
374
375 assert(size);
376
377 /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */
378 ram = memory_region_get_ram_ptr(section->mr) + section->offset_within_region;
379 if (add) {
380 if (!vhost_dev_cmp_memory(dev, start_addr, size, (uintptr_t)ram)) {
381 /* Region exists with same address. Nothing to do. */
382 return;
383 }
384 } else {
385 if (!vhost_dev_find_reg(dev, start_addr, size)) {
386 /* Removing region that we don't access. Nothing to do. */
387 return;
388 }
389 }
390
391 vhost_dev_unassign_memory(dev, start_addr, size);
392 if (add) {
393 /* Add given mapping, merging adjacent regions if any */
394 vhost_dev_assign_memory(dev, start_addr, size, (uintptr_t)ram);
395 } else {
396 /* Remove old mapping for this memory, if any. */
397 vhost_dev_unassign_memory(dev, start_addr, size);
398 }
399
400 if (!dev->started) {
401 return;
402 }
403
404 if (dev->started) {
405 r = vhost_verify_ring_mappings(dev, start_addr, size);
406 assert(r >= 0);
407 }
408
409 if (!dev->log_enabled) {
410 r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
411 assert(r >= 0);
412 return;
413 }
414 log_size = vhost_get_log_size(dev);
415 /* We allocate an extra 4K bytes to log,
416 * to reduce the * number of reallocations. */
417 #define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
418 /* To log more, must increase log size before table update. */
419 if (dev->log_size < log_size) {
420 vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
421 }
422 r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
423 assert(r >= 0);
424 /* To log less, can only decrease log size after table update. */
425 if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
426 vhost_dev_log_resize(dev, log_size);
427 }
428 }
429
430 static bool vhost_section(MemoryRegionSection *section)
431 {
432 return section->address_space == get_system_memory()
433 && memory_region_is_ram(section->mr);
434 }
435
436 static void vhost_region_add(MemoryListener *listener,
437 MemoryRegionSection *section)
438 {
439 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
440 memory_listener);
441
442 if (!vhost_section(section)) {
443 return;
444 }
445
446 ++dev->n_mem_sections;
447 dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections,
448 dev->n_mem_sections);
449 dev->mem_sections[dev->n_mem_sections - 1] = *section;
450 vhost_set_memory(listener, section, true);
451 }
452
453 static void vhost_region_del(MemoryListener *listener,
454 MemoryRegionSection *section)
455 {
456 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
457 memory_listener);
458 int i;
459
460 if (!vhost_section(section)) {
461 return;
462 }
463
464 vhost_set_memory(listener, section, false);
465 for (i = 0; i < dev->n_mem_sections; ++i) {
466 if (dev->mem_sections[i].offset_within_address_space
467 == section->offset_within_address_space) {
468 --dev->n_mem_sections;
469 memmove(&dev->mem_sections[i], &dev->mem_sections[i+1],
470 (dev->n_mem_sections - i) * sizeof(*dev->mem_sections));
471 break;
472 }
473 }
474 }
475
476 static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
477 struct vhost_virtqueue *vq,
478 unsigned idx, bool enable_log)
479 {
480 struct vhost_vring_addr addr = {
481 .index = idx,
482 .desc_user_addr = (uint64_t)(unsigned long)vq->desc,
483 .avail_user_addr = (uint64_t)(unsigned long)vq->avail,
484 .used_user_addr = (uint64_t)(unsigned long)vq->used,
485 .log_guest_addr = vq->used_phys,
486 .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
487 };
488 int r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
489 if (r < 0) {
490 return -errno;
491 }
492 return 0;
493 }
494
495 static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
496 {
497 uint64_t features = dev->acked_features;
498 int r;
499 if (enable_log) {
500 features |= 0x1 << VHOST_F_LOG_ALL;
501 }
502 r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
503 return r < 0 ? -errno : 0;
504 }
505
506 static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
507 {
508 int r, t, i;
509 r = vhost_dev_set_features(dev, enable_log);
510 if (r < 0) {
511 goto err_features;
512 }
513 for (i = 0; i < dev->nvqs; ++i) {
514 r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
515 enable_log);
516 if (r < 0) {
517 goto err_vq;
518 }
519 }
520 return 0;
521 err_vq:
522 for (; i >= 0; --i) {
523 t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
524 dev->log_enabled);
525 assert(t >= 0);
526 }
527 t = vhost_dev_set_features(dev, dev->log_enabled);
528 assert(t >= 0);
529 err_features:
530 return r;
531 }
532
533 static int vhost_migration_log(MemoryListener *listener, int enable)
534 {
535 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
536 memory_listener);
537 int r;
538 if (!!enable == dev->log_enabled) {
539 return 0;
540 }
541 if (!dev->started) {
542 dev->log_enabled = enable;
543 return 0;
544 }
545 if (!enable) {
546 r = vhost_dev_set_log(dev, false);
547 if (r < 0) {
548 return r;
549 }
550 if (dev->log) {
551 g_free(dev->log);
552 }
553 dev->log = NULL;
554 dev->log_size = 0;
555 } else {
556 vhost_dev_log_resize(dev, vhost_get_log_size(dev));
557 r = vhost_dev_set_log(dev, true);
558 if (r < 0) {
559 return r;
560 }
561 }
562 dev->log_enabled = enable;
563 return 0;
564 }
565
566 static void vhost_log_global_start(MemoryListener *listener)
567 {
568 int r;
569
570 r = vhost_migration_log(listener, true);
571 if (r < 0) {
572 abort();
573 }
574 }
575
576 static void vhost_log_global_stop(MemoryListener *listener)
577 {
578 int r;
579
580 r = vhost_migration_log(listener, false);
581 if (r < 0) {
582 abort();
583 }
584 }
585
586 static void vhost_log_start(MemoryListener *listener,
587 MemoryRegionSection *section)
588 {
589 /* FIXME: implement */
590 }
591
592 static void vhost_log_stop(MemoryListener *listener,
593 MemoryRegionSection *section)
594 {
595 /* FIXME: implement */
596 }
597
598 static int vhost_virtqueue_init(struct vhost_dev *dev,
599 struct VirtIODevice *vdev,
600 struct vhost_virtqueue *vq,
601 unsigned idx)
602 {
603 target_phys_addr_t s, l, a;
604 int r;
605 struct vhost_vring_file file = {
606 .index = idx,
607 };
608 struct vhost_vring_state state = {
609 .index = idx,
610 };
611 struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
612
613 vq->num = state.num = virtio_queue_get_num(vdev, idx);
614 r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
615 if (r) {
616 return -errno;
617 }
618
619 state.num = virtio_queue_get_last_avail_idx(vdev, idx);
620 r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
621 if (r) {
622 return -errno;
623 }
624
625 s = l = virtio_queue_get_desc_size(vdev, idx);
626 a = virtio_queue_get_desc_addr(vdev, idx);
627 vq->desc = cpu_physical_memory_map(a, &l, 0);
628 if (!vq->desc || l != s) {
629 r = -ENOMEM;
630 goto fail_alloc_desc;
631 }
632 s = l = virtio_queue_get_avail_size(vdev, idx);
633 a = virtio_queue_get_avail_addr(vdev, idx);
634 vq->avail = cpu_physical_memory_map(a, &l, 0);
635 if (!vq->avail || l != s) {
636 r = -ENOMEM;
637 goto fail_alloc_avail;
638 }
639 vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
640 vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
641 vq->used = cpu_physical_memory_map(a, &l, 1);
642 if (!vq->used || l != s) {
643 r = -ENOMEM;
644 goto fail_alloc_used;
645 }
646
647 vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx);
648 vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx);
649 vq->ring = cpu_physical_memory_map(a, &l, 1);
650 if (!vq->ring || l != s) {
651 r = -ENOMEM;
652 goto fail_alloc_ring;
653 }
654
655 r = vhost_virtqueue_set_addr(dev, vq, idx, dev->log_enabled);
656 if (r < 0) {
657 r = -errno;
658 goto fail_alloc;
659 }
660 file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
661 r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
662 if (r) {
663 r = -errno;
664 goto fail_kick;
665 }
666
667 file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
668 r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
669 if (r) {
670 r = -errno;
671 goto fail_call;
672 }
673
674 return 0;
675
676 fail_call:
677 fail_kick:
678 fail_alloc:
679 cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
680 0, 0);
681 fail_alloc_ring:
682 cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
683 0, 0);
684 fail_alloc_used:
685 cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
686 0, 0);
687 fail_alloc_avail:
688 cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
689 0, 0);
690 fail_alloc_desc:
691 return r;
692 }
693
694 static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
695 struct VirtIODevice *vdev,
696 struct vhost_virtqueue *vq,
697 unsigned idx)
698 {
699 struct vhost_vring_state state = {
700 .index = idx,
701 };
702 int r;
703 r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
704 if (r < 0) {
705 fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
706 fflush(stderr);
707 }
708 virtio_queue_set_last_avail_idx(vdev, idx, state.num);
709 assert (r >= 0);
710 cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
711 0, virtio_queue_get_ring_size(vdev, idx));
712 cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
713 1, virtio_queue_get_used_size(vdev, idx));
714 cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
715 0, virtio_queue_get_avail_size(vdev, idx));
716 cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
717 0, virtio_queue_get_desc_size(vdev, idx));
718 }
719
720 int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
721 {
722 uint64_t features;
723 int r;
724 if (devfd >= 0) {
725 hdev->control = devfd;
726 } else {
727 hdev->control = open("/dev/vhost-net", O_RDWR);
728 if (hdev->control < 0) {
729 return -errno;
730 }
731 }
732 r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
733 if (r < 0) {
734 goto fail;
735 }
736
737 r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
738 if (r < 0) {
739 goto fail;
740 }
741 hdev->features = features;
742
743 hdev->memory_listener = (MemoryListener) {
744 .region_add = vhost_region_add,
745 .region_del = vhost_region_del,
746 .log_start = vhost_log_start,
747 .log_stop = vhost_log_stop,
748 .log_sync = vhost_log_sync,
749 .log_global_start = vhost_log_global_start,
750 .log_global_stop = vhost_log_global_stop,
751 };
752 hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
753 hdev->n_mem_sections = 0;
754 hdev->mem_sections = NULL;
755 hdev->log = NULL;
756 hdev->log_size = 0;
757 hdev->log_enabled = false;
758 hdev->started = false;
759 memory_listener_register(&hdev->memory_listener);
760 hdev->force = force;
761 return 0;
762 fail:
763 r = -errno;
764 close(hdev->control);
765 return r;
766 }
767
768 void vhost_dev_cleanup(struct vhost_dev *hdev)
769 {
770 memory_listener_unregister(&hdev->memory_listener);
771 g_free(hdev->mem);
772 g_free(hdev->mem_sections);
773 close(hdev->control);
774 }
775
776 bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
777 {
778 return !vdev->binding->query_guest_notifiers ||
779 vdev->binding->query_guest_notifiers(vdev->binding_opaque) ||
780 hdev->force;
781 }
782
783 /* Stop processing guest IO notifications in qemu.
784 * Start processing them in vhost in kernel.
785 */
786 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
787 {
788 int i, r;
789 if (!vdev->binding->set_host_notifier) {
790 fprintf(stderr, "binding does not support host notifiers\n");
791 r = -ENOSYS;
792 goto fail;
793 }
794
795 for (i = 0; i < hdev->nvqs; ++i) {
796 r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
797 if (r < 0) {
798 fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r);
799 goto fail_vq;
800 }
801 }
802
803 return 0;
804 fail_vq:
805 while (--i >= 0) {
806 r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
807 if (r < 0) {
808 fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r);
809 fflush(stderr);
810 }
811 assert (r >= 0);
812 }
813 fail:
814 return r;
815 }
816
817 /* Stop processing guest IO notifications in vhost.
818 * Start processing them in qemu.
819 * This might actually run the qemu handlers right away,
820 * so virtio in qemu must be completely setup when this is called.
821 */
822 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
823 {
824 int i, r;
825
826 for (i = 0; i < hdev->nvqs; ++i) {
827 r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
828 if (r < 0) {
829 fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r);
830 fflush(stderr);
831 }
832 assert (r >= 0);
833 }
834 }
835
836 /* Host notifiers must be enabled at this point. */
837 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
838 {
839 int i, r;
840 if (!vdev->binding->set_guest_notifiers) {
841 fprintf(stderr, "binding does not support guest notifiers\n");
842 r = -ENOSYS;
843 goto fail;
844 }
845
846 r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
847 if (r < 0) {
848 fprintf(stderr, "Error binding guest notifier: %d\n", -r);
849 goto fail_notifiers;
850 }
851
852 r = vhost_dev_set_features(hdev, hdev->log_enabled);
853 if (r < 0) {
854 goto fail_features;
855 }
856 r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem);
857 if (r < 0) {
858 r = -errno;
859 goto fail_mem;
860 }
861 for (i = 0; i < hdev->nvqs; ++i) {
862 r = vhost_virtqueue_init(hdev,
863 vdev,
864 hdev->vqs + i,
865 i);
866 if (r < 0) {
867 goto fail_vq;
868 }
869 }
870
871 if (hdev->log_enabled) {
872 hdev->log_size = vhost_get_log_size(hdev);
873 hdev->log = hdev->log_size ?
874 g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL;
875 r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
876 (uint64_t)(unsigned long)hdev->log);
877 if (r < 0) {
878 r = -errno;
879 goto fail_log;
880 }
881 }
882
883 hdev->started = true;
884
885 return 0;
886 fail_log:
887 fail_vq:
888 while (--i >= 0) {
889 vhost_virtqueue_cleanup(hdev,
890 vdev,
891 hdev->vqs + i,
892 i);
893 }
894 fail_mem:
895 fail_features:
896 vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
897 fail_notifiers:
898 fail:
899 return r;
900 }
901
902 /* Host notifiers must be enabled at this point. */
903 void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
904 {
905 int i, r;
906
907 for (i = 0; i < hdev->nvqs; ++i) {
908 vhost_virtqueue_cleanup(hdev,
909 vdev,
910 hdev->vqs + i,
911 i);
912 }
913 for (i = 0; i < hdev->n_mem_sections; ++i) {
914 vhost_sync_dirty_bitmap(hdev, &hdev->mem_sections[i],
915 0, (target_phys_addr_t)~0x0ull);
916 }
917 r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
918 if (r < 0) {
919 fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
920 fflush(stderr);
921 }
922 assert (r >= 0);
923
924 hdev->started = false;
925 g_free(hdev->log);
926 hdev->log = NULL;
927 hdev->log_size = 0;
928 }