]> git.proxmox.com Git - mirror_qemu.git/blame - hw/virtio/vhost.c
vhost: Clean out old vhost_set_memory and friends
[mirror_qemu.git] / hw / virtio / vhost.c
CommitLineData
d5970055
MT
1/*
2 * vhost support
3 *
4 * Copyright Red Hat, Inc. 2010
5 *
6 * Authors:
7 * Michael S. Tsirkin <mst@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
6b620ca3
PB
11 *
12 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
d5970055
MT
14 */
15
9b8bfe21 16#include "qemu/osdep.h"
da34e65c 17#include "qapi/error.h"
0d09e41a 18#include "hw/virtio/vhost.h"
d5970055 19#include "hw/hw.h"
5444e768 20#include "qemu/atomic.h"
1de7afc9 21#include "qemu/range.h"
04b7a152 22#include "qemu/error-report.h"
15324404 23#include "qemu/memfd.h"
11078ae3 24#include <linux/vhost.h>
022c62cb 25#include "exec/address-spaces.h"
1c819449 26#include "hw/virtio/virtio-bus.h"
04b7a152 27#include "hw/virtio/virtio-access.h"
795c40b8 28#include "migration/blocker.h"
c471ad0e 29#include "sysemu/dma.h"
d5970055 30
162bba7f
MAL
31/* enabled until disconnected backend stabilizes */
32#define _VHOST_DEBUG 1
33
34#ifdef _VHOST_DEBUG
35#define VHOST_OPS_DEBUG(fmt, ...) \
36 do { error_report(fmt ": %s (%d)", ## __VA_ARGS__, \
37 strerror(errno), errno); } while (0)
38#else
39#define VHOST_OPS_DEBUG(fmt, ...) \
40 do { } while (0)
41#endif
42
309750fa 43static struct vhost_log *vhost_log;
15324404 44static struct vhost_log *vhost_log_shm;
309750fa 45
2ce68e4c
IM
46static unsigned int used_memslots;
47static QLIST_HEAD(, vhost_dev) vhost_devices =
48 QLIST_HEAD_INITIALIZER(vhost_devices);
49
50bool vhost_has_free_slot(void)
51{
52 unsigned int slots_limit = ~0U;
53 struct vhost_dev *hdev;
54
55 QLIST_FOREACH(hdev, &vhost_devices, entry) {
56 unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
57 slots_limit = MIN(slots_limit, r);
58 }
59 return slots_limit > used_memslots;
60}
61
d5970055 62static void vhost_dev_sync_region(struct vhost_dev *dev,
2817b260 63 MemoryRegionSection *section,
d5970055
MT
64 uint64_t mfirst, uint64_t mlast,
65 uint64_t rfirst, uint64_t rlast)
66{
309750fa
JW
67 vhost_log_chunk_t *log = dev->log->log;
68
d5970055
MT
69 uint64_t start = MAX(mfirst, rfirst);
70 uint64_t end = MIN(mlast, rlast);
309750fa
JW
71 vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK;
72 vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1;
33c5793b 73 uint64_t addr = QEMU_ALIGN_DOWN(start, VHOST_LOG_CHUNK);
d5970055 74
d5970055
MT
75 if (end < start) {
76 return;
77 }
e314672a 78 assert(end / VHOST_LOG_CHUNK < dev->log_size);
fbbaf9ae 79 assert(start / VHOST_LOG_CHUNK < dev->log_size);
e314672a 80
d5970055
MT
81 for (;from < to; ++from) {
82 vhost_log_chunk_t log;
d5970055
MT
83 /* We first check with non-atomic: much cheaper,
84 * and we expect non-dirty to be the common case. */
85 if (!*from) {
0c600ce2 86 addr += VHOST_LOG_CHUNK;
d5970055
MT
87 continue;
88 }
5444e768
PB
89 /* Data must be read atomically. We don't really need barrier semantics
90 * but it's easier to use atomic_* than roll our own. */
91 log = atomic_xchg(from, 0);
747eb78b
NC
92 while (log) {
93 int bit = ctzl(log);
6b37a23d
MT
94 hwaddr page_addr;
95 hwaddr section_offset;
96 hwaddr mr_offset;
6b37a23d
MT
97 page_addr = addr + bit * VHOST_LOG_PAGE;
98 section_offset = page_addr - section->offset_within_address_space;
99 mr_offset = section_offset + section->offset_within_region;
100 memory_region_set_dirty(section->mr, mr_offset, VHOST_LOG_PAGE);
d5970055
MT
101 log &= ~(0x1ull << bit);
102 }
103 addr += VHOST_LOG_CHUNK;
104 }
105}
106
04097f7c 107static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
2817b260 108 MemoryRegionSection *section,
6b37a23d
MT
109 hwaddr first,
110 hwaddr last)
d5970055 111{
d5970055 112 int i;
6b37a23d
MT
113 hwaddr start_addr;
114 hwaddr end_addr;
04097f7c 115
d5970055
MT
116 if (!dev->log_enabled || !dev->started) {
117 return 0;
118 }
6b37a23d 119 start_addr = section->offset_within_address_space;
052e87b0 120 end_addr = range_get_last(start_addr, int128_get64(section->size));
6b37a23d
MT
121 start_addr = MAX(first, start_addr);
122 end_addr = MIN(last, end_addr);
123
d5970055
MT
124 for (i = 0; i < dev->mem->nregions; ++i) {
125 struct vhost_memory_region *reg = dev->mem->regions + i;
2817b260 126 vhost_dev_sync_region(dev, section, start_addr, end_addr,
d5970055
MT
127 reg->guest_phys_addr,
128 range_get_last(reg->guest_phys_addr,
129 reg->memory_size));
130 }
131 for (i = 0; i < dev->nvqs; ++i) {
132 struct vhost_virtqueue *vq = dev->vqs + i;
2817b260 133 vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
d5970055
MT
134 range_get_last(vq->used_phys, vq->used_size));
135 }
136 return 0;
137}
138
04097f7c
AK
139static void vhost_log_sync(MemoryListener *listener,
140 MemoryRegionSection *section)
141{
142 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
143 memory_listener);
6b37a23d
MT
144 vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL);
145}
04097f7c 146
6b37a23d
MT
147static void vhost_log_sync_range(struct vhost_dev *dev,
148 hwaddr first, hwaddr last)
149{
150 int i;
151 /* FIXME: this is N^2 in number of sections */
152 for (i = 0; i < dev->n_mem_sections; ++i) {
153 MemoryRegionSection *section = &dev->mem_sections[i];
154 vhost_sync_dirty_bitmap(dev, section, first, last);
155 }
04097f7c
AK
156}
157
d5970055
MT
158static uint64_t vhost_get_log_size(struct vhost_dev *dev)
159{
160 uint64_t log_size = 0;
161 int i;
162 for (i = 0; i < dev->mem->nregions; ++i) {
163 struct vhost_memory_region *reg = dev->mem->regions + i;
164 uint64_t last = range_get_last(reg->guest_phys_addr,
165 reg->memory_size);
166 log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
167 }
168 for (i = 0; i < dev->nvqs; ++i) {
169 struct vhost_virtqueue *vq = dev->vqs + i;
170 uint64_t last = vq->used_phys + vq->used_size - 1;
171 log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
172 }
173 return log_size;
174}
15324404
MAL
175
176static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
309750fa 177{
0f2956f9 178 Error *err = NULL;
15324404
MAL
179 struct vhost_log *log;
180 uint64_t logsize = size * sizeof(*(log->log));
181 int fd = -1;
182
183 log = g_new0(struct vhost_log, 1);
184 if (share) {
185 log->log = qemu_memfd_alloc("vhost-log", logsize,
186 F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
0f2956f9
MAL
187 &fd, &err);
188 if (err) {
189 error_report_err(err);
190 g_free(log);
191 return NULL;
192 }
15324404
MAL
193 memset(log->log, 0, logsize);
194 } else {
195 log->log = g_malloc0(logsize);
196 }
309750fa
JW
197
198 log->size = size;
199 log->refcnt = 1;
15324404 200 log->fd = fd;
309750fa
JW
201
202 return log;
203}
204
15324404 205static struct vhost_log *vhost_log_get(uint64_t size, bool share)
309750fa 206{
15324404
MAL
207 struct vhost_log *log = share ? vhost_log_shm : vhost_log;
208
209 if (!log || log->size != size) {
210 log = vhost_log_alloc(size, share);
211 if (share) {
212 vhost_log_shm = log;
213 } else {
214 vhost_log = log;
215 }
309750fa 216 } else {
15324404 217 ++log->refcnt;
309750fa
JW
218 }
219
15324404 220 return log;
309750fa
JW
221}
222
223static void vhost_log_put(struct vhost_dev *dev, bool sync)
224{
225 struct vhost_log *log = dev->log;
226
227 if (!log) {
228 return;
229 }
230
231 --log->refcnt;
232 if (log->refcnt == 0) {
233 /* Sync only the range covered by the old log */
234 if (dev->log_size && sync) {
235 vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
236 }
15324404 237
309750fa 238 if (vhost_log == log) {
15324404 239 g_free(log->log);
309750fa 240 vhost_log = NULL;
15324404
MAL
241 } else if (vhost_log_shm == log) {
242 qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
243 log->fd);
244 vhost_log_shm = NULL;
309750fa 245 }
15324404 246
309750fa
JW
247 g_free(log);
248 }
5c0ba1be
FF
249
250 dev->log = NULL;
251 dev->log_size = 0;
309750fa 252}
d5970055 253
15324404
MAL
254static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
255{
256 return dev->vhost_ops->vhost_requires_shm_log &&
257 dev->vhost_ops->vhost_requires_shm_log(dev);
258}
259
260static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
d5970055 261{
15324404 262 struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev));
309750fa 263 uint64_t log_base = (uintptr_t)log->log;
6b37a23d 264 int r;
6528499f 265
636f4ddd
MAL
266 /* inform backend of log switching, this must be done before
267 releasing the current log, to ensure no logging is lost */
9a78a5dd 268 r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log);
162bba7f
MAL
269 if (r < 0) {
270 VHOST_OPS_DEBUG("vhost_set_log_base failed");
271 }
272
309750fa 273 vhost_log_put(dev, true);
d5970055
MT
274 dev->log = log;
275 dev->log_size = size;
276}
277
c471ad0e
JW
278static int vhost_dev_has_iommu(struct vhost_dev *dev)
279{
280 VirtIODevice *vdev = dev->vdev;
c471ad0e 281
375f74f4 282 return virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
c471ad0e
JW
283}
284
285static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
286 hwaddr *plen, int is_write)
287{
288 if (!vhost_dev_has_iommu(dev)) {
289 return cpu_physical_memory_map(addr, plen, is_write);
290 } else {
291 return (void *)(uintptr_t)addr;
292 }
293}
294
295static void vhost_memory_unmap(struct vhost_dev *dev, void *buffer,
296 hwaddr len, int is_write,
297 hwaddr access_len)
298{
299 if (!vhost_dev_has_iommu(dev)) {
300 cpu_physical_memory_unmap(buffer, len, is_write, access_len);
301 }
302}
f1f9e6c5 303
0ca1fd2d
DDAG
304static int vhost_verify_ring_part_mapping(void *ring_hva,
305 uint64_t ring_gpa,
306 uint64_t ring_size,
307 void *reg_hva,
308 uint64_t reg_gpa,
309 uint64_t reg_size)
f1f9e6c5 310{
0ca1fd2d
DDAG
311 uint64_t hva_ring_offset;
312 uint64_t ring_last = range_get_last(ring_gpa, ring_size);
313 uint64_t reg_last = range_get_last(reg_gpa, reg_size);
f1f9e6c5 314
0ca1fd2d 315 if (ring_last < reg_gpa || ring_gpa > reg_last) {
f1f9e6c5
GK
316 return 0;
317 }
0ca1fd2d
DDAG
318 /* check that whole ring's is mapped */
319 if (ring_last > reg_last) {
320 return -ENOMEM;
f1f9e6c5 321 }
0ca1fd2d
DDAG
322 /* check that ring's MemoryRegion wasn't replaced */
323 hva_ring_offset = ring_gpa - reg_gpa;
324 if (ring_hva != reg_hva + hva_ring_offset) {
325 return -EBUSY;
f1f9e6c5 326 }
0ca1fd2d
DDAG
327
328 return 0;
f1f9e6c5
GK
329}
330
d5970055 331static int vhost_verify_ring_mappings(struct vhost_dev *dev,
0ca1fd2d
DDAG
332 void *reg_hva,
333 uint64_t reg_gpa,
334 uint64_t reg_size)
d5970055 335{
f1f9e6c5 336 int i, j;
8617343f 337 int r = 0;
f1f9e6c5
GK
338 const char *part_name[] = {
339 "descriptor table",
340 "available ring",
341 "used ring"
342 };
8617343f 343
f1f9e6c5 344 for (i = 0; i < dev->nvqs; ++i) {
d5970055 345 struct vhost_virtqueue *vq = dev->vqs + i;
d5970055 346
f1f9e6c5 347 j = 0;
0ca1fd2d
DDAG
348 r = vhost_verify_ring_part_mapping(
349 vq->desc, vq->desc_phys, vq->desc_size,
350 reg_hva, reg_gpa, reg_size);
2fe45ec3 351 if (r) {
f1f9e6c5 352 break;
d5970055 353 }
f1f9e6c5
GK
354
355 j++;
0ca1fd2d
DDAG
356 r = vhost_verify_ring_part_mapping(
357 vq->desc, vq->desc_phys, vq->desc_size,
358 reg_hva, reg_gpa, reg_size);
2fe45ec3 359 if (r) {
f1f9e6c5 360 break;
d5970055 361 }
f1f9e6c5
GK
362
363 j++;
0ca1fd2d
DDAG
364 r = vhost_verify_ring_part_mapping(
365 vq->desc, vq->desc_phys, vq->desc_size,
366 reg_hva, reg_gpa, reg_size);
2fe45ec3 367 if (r) {
f1f9e6c5 368 break;
d5970055 369 }
f1f9e6c5
GK
370 }
371
372 if (r == -ENOMEM) {
373 error_report("Unable to map %s for ring %d", part_name[j], i);
374 } else if (r == -EBUSY) {
375 error_report("%s relocated for ring %d", part_name[j], i);
d5970055 376 }
8617343f 377 return r;
d5970055
MT
378}
379
af603142
NB
380static bool vhost_section(MemoryRegionSection *section)
381{
d56ec1e9
MT
382 return memory_region_is_ram(section->mr) &&
383 !memory_region_is_rom(section->mr);
af603142
NB
384}
385
386static void vhost_begin(MemoryListener *listener)
387{
388 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
389 memory_listener);
c44317ef
DDAG
390 dev->tmp_sections = NULL;
391 dev->n_tmp_sections = 0;
af603142 392}
d5970055 393
af603142
NB
394static void vhost_commit(MemoryListener *listener)
395{
396 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
397 memory_listener);
c44317ef
DDAG
398 MemoryRegionSection *old_sections;
399 int n_old_sections;
af603142 400 uint64_t log_size;
ade6d081 401 size_t regions_size;
af603142 402 int r;
0ca1fd2d 403 int i;
ade6d081 404 bool changed = false;
af603142 405
ade6d081
DDAG
406 /* Note we can be called before the device is started, but then
407 * starting the device calls set_mem_table, so we need to have
408 * built the data structures.
409 */
c44317ef
DDAG
410 old_sections = dev->mem_sections;
411 n_old_sections = dev->n_mem_sections;
412 dev->mem_sections = dev->tmp_sections;
413 dev->n_mem_sections = dev->n_tmp_sections;
414
ade6d081
DDAG
415 if (dev->n_mem_sections != n_old_sections) {
416 changed = true;
417 } else {
418 /* Same size, lets check the contents */
419 changed = n_old_sections && memcmp(dev->mem_sections, old_sections,
420 n_old_sections * sizeof(old_sections[0])) != 0;
af603142 421 }
ade6d081
DDAG
422
423 trace_vhost_commit(dev->started, changed);
424 if (!changed) {
c44317ef 425 goto out;
d5970055 426 }
ade6d081
DDAG
427
428 /* Rebuild the regions list from the new sections list */
429 regions_size = offsetof(struct vhost_memory, regions) +
430 dev->n_mem_sections * sizeof dev->mem->regions[0];
431 dev->mem = g_realloc(dev->mem, regions_size);
432 dev->mem->nregions = dev->n_mem_sections;
433 used_memslots = dev->mem->nregions;
434 for (i = 0; i < dev->n_mem_sections; i++) {
435 struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
436 struct MemoryRegionSection *mrs = dev->mem_sections + i;
437
438 cur_vmr->guest_phys_addr = mrs->offset_within_address_space;
439 cur_vmr->memory_size = int128_get64(mrs->size);
440 cur_vmr->userspace_addr =
441 (uintptr_t)memory_region_get_ram_ptr(mrs->mr) +
442 mrs->offset_within_region;
443 cur_vmr->flags_padding = 0;
444 }
445
446 if (!dev->started) {
c44317ef 447 goto out;
af603142 448 }
d5970055 449
0ca1fd2d
DDAG
450 for (i = 0; i < dev->mem->nregions; i++) {
451 if (vhost_verify_ring_mappings(dev,
452 (void *)(uintptr_t)dev->mem->regions[i].userspace_addr,
453 dev->mem->regions[i].guest_phys_addr,
454 dev->mem->regions[i].memory_size)) {
455 error_report("Verify ring failure on region %d", i);
456 abort();
457 }
d5970055
MT
458 }
459
460 if (!dev->log_enabled) {
21e70425 461 r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
162bba7f
MAL
462 if (r < 0) {
463 VHOST_OPS_DEBUG("vhost_set_mem_table failed");
464 }
c44317ef 465 goto out;
d5970055
MT
466 }
467 log_size = vhost_get_log_size(dev);
468 /* We allocate an extra 4K bytes to log,
469 * to reduce the * number of reallocations. */
470#define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
471 /* To log more, must increase log size before table update. */
472 if (dev->log_size < log_size) {
473 vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
474 }
21e70425 475 r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
162bba7f
MAL
476 if (r < 0) {
477 VHOST_OPS_DEBUG("vhost_set_mem_table failed");
478 }
d5970055
MT
479 /* To log less, can only decrease log size after table update. */
480 if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
481 vhost_dev_log_resize(dev, log_size);
482 }
c44317ef
DDAG
483
484out:
485 /* Deref the old list of sections, this must happen _after_ the
486 * vhost_set_mem_table to ensure the client isn't still using the
487 * section we're about to unref.
488 */
489 while (n_old_sections--) {
490 memory_region_unref(old_sections[n_old_sections].mr);
491 }
492 g_free(old_sections);
493 return;
494}
495
48d7c975
DDAG
496/* Adds the section data to the tmp_section structure.
497 * It relies on the listener calling us in memory address order
498 * and for each region (via the _add and _nop methods) to
499 * join neighbours.
500 */
501static void vhost_region_add_section(struct vhost_dev *dev,
502 MemoryRegionSection *section)
c44317ef 503{
48d7c975
DDAG
504 bool need_add = true;
505 uint64_t mrs_size = int128_get64(section->size);
506 uint64_t mrs_gpa = section->offset_within_address_space;
507 uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
508 section->offset_within_region;
509
510 trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size,
511 mrs_host);
512
513 bool log_dirty = memory_region_get_dirty_log_mask(section->mr) &
514 ~(1 << DIRTY_MEMORY_MIGRATION);
515 if (log_dirty) {
516 return;
517 }
518
519 if (dev->n_tmp_sections) {
520 /* Since we already have at least one section, lets see if
521 * this extends it; since we're scanning in order, we only
522 * have to look at the last one, and the FlatView that calls
523 * us shouldn't have overlaps.
524 */
525 MemoryRegionSection *prev_sec = dev->tmp_sections +
526 (dev->n_tmp_sections - 1);
527 uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
528 uint64_t prev_size = int128_get64(prev_sec->size);
529 uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size);
530 uint64_t prev_host_start =
531 (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
532 prev_sec->offset_within_region;
533 uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
534
535 if (prev_gpa_end + 1 == mrs_gpa &&
536 prev_host_end + 1 == mrs_host &&
537 section->mr == prev_sec->mr &&
538 (!dev->vhost_ops->vhost_backend_can_merge ||
539 dev->vhost_ops->vhost_backend_can_merge(dev,
540 mrs_host, mrs_size,
541 prev_host_start, prev_size))) {
542 /* The two sections abut */
543 need_add = false;
544 prev_sec->size = int128_add(prev_sec->size, section->size);
545 trace_vhost_region_add_section_abut(section->mr->name,
546 mrs_size + prev_size);
547 }
548 }
549
550 if (need_add) {
551 ++dev->n_tmp_sections;
552 dev->tmp_sections = g_renew(MemoryRegionSection, dev->tmp_sections,
553 dev->n_tmp_sections);
554 dev->tmp_sections[dev->n_tmp_sections - 1] = *section;
555 /* The flatview isn't stable and we don't use it, making it NULL
556 * means we can memcmp the list.
557 */
558 dev->tmp_sections[dev->n_tmp_sections - 1].fv = NULL;
559 memory_region_ref(section->mr);
560 }
50c1e149
AK
561}
562
04097f7c
AK
563static void vhost_region_add(MemoryListener *listener,
564 MemoryRegionSection *section)
565{
2817b260
AK
566 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
567 memory_listener);
568
c49450b9
AK
569 if (!vhost_section(section)) {
570 return;
571 }
48d7c975 572 vhost_region_add_section(dev, section);
04097f7c
AK
573}
574
48d7c975 575/* Called on regions that have not changed */
c44317ef 576static void vhost_region_nop(MemoryListener *listener,
04097f7c
AK
577 MemoryRegionSection *section)
578{
2817b260
AK
579 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
580 memory_listener);
2817b260 581
c49450b9
AK
582 if (!vhost_section(section)) {
583 return;
584 }
585
48d7c975 586 vhost_region_add_section(dev, section);
c44317ef
DDAG
587}
588
589static void vhost_region_del(MemoryListener *listener,
590 MemoryRegionSection *section)
591{
592 if (!vhost_section(section)) {
593 return;
2817b260 594 }
c44317ef 595
04097f7c
AK
596}
597
375f74f4
JW
598static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
599{
600 struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n);
601 struct vhost_dev *hdev = iommu->hdev;
602 hwaddr iova = iotlb->iova + iommu->iommu_offset;
603
020e571b
MC
604 if (vhost_backend_invalidate_device_iotlb(hdev, iova,
605 iotlb->addr_mask + 1)) {
375f74f4
JW
606 error_report("Fail to invalidate device iotlb");
607 }
608}
609
610static void vhost_iommu_region_add(MemoryListener *listener,
611 MemoryRegionSection *section)
612{
613 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
614 iommu_listener);
615 struct vhost_iommu *iommu;
698feb5e 616 Int128 end;
375f74f4
JW
617
618 if (!memory_region_is_iommu(section->mr)) {
619 return;
620 }
621
622 iommu = g_malloc0(sizeof(*iommu));
698feb5e
PX
623 end = int128_add(int128_make64(section->offset_within_region),
624 section->size);
625 end = int128_sub(end, int128_one());
626 iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
627 IOMMU_NOTIFIER_UNMAP,
628 section->offset_within_region,
629 int128_get64(end));
375f74f4
JW
630 iommu->mr = section->mr;
631 iommu->iommu_offset = section->offset_within_address_space -
632 section->offset_within_region;
633 iommu->hdev = dev;
634 memory_region_register_iommu_notifier(section->mr, &iommu->n);
635 QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next);
636 /* TODO: can replay help performance here? */
637}
638
639static void vhost_iommu_region_del(MemoryListener *listener,
640 MemoryRegionSection *section)
641{
642 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
643 iommu_listener);
644 struct vhost_iommu *iommu;
645
646 if (!memory_region_is_iommu(section->mr)) {
647 return;
648 }
649
650 QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) {
698feb5e
PX
651 if (iommu->mr == section->mr &&
652 iommu->n.start == section->offset_within_region) {
375f74f4
JW
653 memory_region_unregister_iommu_notifier(iommu->mr,
654 &iommu->n);
655 QLIST_REMOVE(iommu, iommu_next);
656 g_free(iommu);
657 break;
658 }
659 }
660}
661
d5970055
MT
662static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
663 struct vhost_virtqueue *vq,
664 unsigned idx, bool enable_log)
665{
666 struct vhost_vring_addr addr = {
667 .index = idx,
2b3af999
SW
668 .desc_user_addr = (uint64_t)(unsigned long)vq->desc,
669 .avail_user_addr = (uint64_t)(unsigned long)vq->avail,
670 .used_user_addr = (uint64_t)(unsigned long)vq->used,
d5970055
MT
671 .log_guest_addr = vq->used_phys,
672 .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
673 };
21e70425 674 int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
d5970055 675 if (r < 0) {
c6409692 676 VHOST_OPS_DEBUG("vhost_set_vring_addr failed");
d5970055
MT
677 return -errno;
678 }
679 return 0;
680}
681
c471ad0e
JW
682static int vhost_dev_set_features(struct vhost_dev *dev,
683 bool enable_log)
d5970055
MT
684{
685 uint64_t features = dev->acked_features;
686 int r;
687 if (enable_log) {
9a2ba823 688 features |= 0x1ULL << VHOST_F_LOG_ALL;
d5970055 689 }
21e70425 690 r = dev->vhost_ops->vhost_set_features(dev, features);
c6409692
MAL
691 if (r < 0) {
692 VHOST_OPS_DEBUG("vhost_set_features failed");
693 }
d5970055
MT
694 return r < 0 ? -errno : 0;
695}
696
697static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
698{
162bba7f 699 int r, i, idx;
d5970055
MT
700 r = vhost_dev_set_features(dev, enable_log);
701 if (r < 0) {
702 goto err_features;
703 }
704 for (i = 0; i < dev->nvqs; ++i) {
25a2a920
TC
705 idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
706 r = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
d5970055
MT
707 enable_log);
708 if (r < 0) {
709 goto err_vq;
710 }
711 }
712 return 0;
713err_vq:
714 for (; i >= 0; --i) {
25a2a920 715 idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
162bba7f
MAL
716 vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
717 dev->log_enabled);
d5970055 718 }
162bba7f 719 vhost_dev_set_features(dev, dev->log_enabled);
d5970055
MT
720err_features:
721 return r;
722}
723
04097f7c 724static int vhost_migration_log(MemoryListener *listener, int enable)
d5970055 725{
04097f7c
AK
726 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
727 memory_listener);
d5970055
MT
728 int r;
729 if (!!enable == dev->log_enabled) {
730 return 0;
731 }
732 if (!dev->started) {
733 dev->log_enabled = enable;
734 return 0;
735 }
736 if (!enable) {
737 r = vhost_dev_set_log(dev, false);
738 if (r < 0) {
739 return r;
740 }
309750fa 741 vhost_log_put(dev, false);
d5970055
MT
742 } else {
743 vhost_dev_log_resize(dev, vhost_get_log_size(dev));
744 r = vhost_dev_set_log(dev, true);
745 if (r < 0) {
746 return r;
747 }
748 }
749 dev->log_enabled = enable;
750 return 0;
751}
752
04097f7c
AK
753static void vhost_log_global_start(MemoryListener *listener)
754{
755 int r;
756
757 r = vhost_migration_log(listener, true);
758 if (r < 0) {
759 abort();
760 }
761}
762
763static void vhost_log_global_stop(MemoryListener *listener)
764{
765 int r;
766
767 r = vhost_migration_log(listener, false);
768 if (r < 0) {
769 abort();
770 }
771}
772
773static void vhost_log_start(MemoryListener *listener,
b2dfd71c
PB
774 MemoryRegionSection *section,
775 int old, int new)
04097f7c
AK
776{
777 /* FIXME: implement */
778}
779
780static void vhost_log_stop(MemoryListener *listener,
b2dfd71c
PB
781 MemoryRegionSection *section,
782 int old, int new)
04097f7c
AK
783{
784 /* FIXME: implement */
785}
786
46f70ff1
GK
787/* The vhost driver natively knows how to handle the vrings of non
788 * cross-endian legacy devices and modern devices. Only legacy devices
789 * exposed to a bi-endian guest may require the vhost driver to use a
790 * specific endianness.
791 */
a122ab24
GK
792static inline bool vhost_needs_vring_endian(VirtIODevice *vdev)
793{
e5848123
GK
794 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
795 return false;
796 }
a122ab24 797#ifdef HOST_WORDS_BIGENDIAN
46f70ff1 798 return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_LITTLE;
a122ab24 799#else
46f70ff1 800 return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG;
a122ab24 801#endif
a122ab24
GK
802}
803
04b7a152
GK
804static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
805 bool is_big_endian,
806 int vhost_vq_index)
807{
808 struct vhost_vring_state s = {
809 .index = vhost_vq_index,
810 .num = is_big_endian
811 };
812
21e70425 813 if (!dev->vhost_ops->vhost_set_vring_endian(dev, &s)) {
04b7a152
GK
814 return 0;
815 }
816
c6409692 817 VHOST_OPS_DEBUG("vhost_set_vring_endian failed");
04b7a152
GK
818 if (errno == ENOTTY) {
819 error_report("vhost does not support cross-endian");
820 return -ENOSYS;
821 }
822
823 return -errno;
824}
825
c471ad0e
JW
826static int vhost_memory_region_lookup(struct vhost_dev *hdev,
827 uint64_t gpa, uint64_t *uaddr,
828 uint64_t *len)
829{
830 int i;
831
832 for (i = 0; i < hdev->mem->nregions; i++) {
833 struct vhost_memory_region *reg = hdev->mem->regions + i;
834
835 if (gpa >= reg->guest_phys_addr &&
836 reg->guest_phys_addr + reg->memory_size > gpa) {
837 *uaddr = reg->userspace_addr + gpa - reg->guest_phys_addr;
838 *len = reg->guest_phys_addr + reg->memory_size - gpa;
839 return 0;
840 }
841 }
842
843 return -EFAULT;
844}
845
fc58bd0d 846int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
c471ad0e
JW
847{
848 IOMMUTLBEntry iotlb;
849 uint64_t uaddr, len;
fc58bd0d 850 int ret = -EFAULT;
c471ad0e
JW
851
852 rcu_read_lock();
853
854 iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
855 iova, write);
856 if (iotlb.target_as != NULL) {
fc58bd0d
MC
857 ret = vhost_memory_region_lookup(dev, iotlb.translated_addr,
858 &uaddr, &len);
859 if (ret) {
c471ad0e
JW
860 error_report("Fail to lookup the translated address "
861 "%"PRIx64, iotlb.translated_addr);
862 goto out;
863 }
864
865 len = MIN(iotlb.addr_mask + 1, len);
866 iova = iova & ~iotlb.addr_mask;
867
020e571b
MC
868 ret = vhost_backend_update_device_iotlb(dev, iova, uaddr,
869 len, iotlb.perm);
fc58bd0d 870 if (ret) {
c471ad0e
JW
871 error_report("Fail to update device iotlb");
872 goto out;
873 }
874 }
875out:
876 rcu_read_unlock();
fc58bd0d
MC
877
878 return ret;
c471ad0e
JW
879}
880
f56a1247 881static int vhost_virtqueue_start(struct vhost_dev *dev,
d5970055
MT
882 struct VirtIODevice *vdev,
883 struct vhost_virtqueue *vq,
884 unsigned idx)
885{
96a3d98d
JW
886 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
887 VirtioBusState *vbus = VIRTIO_BUS(qbus);
888 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
a8170e5e 889 hwaddr s, l, a;
d5970055 890 int r;
21e70425 891 int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
d5970055 892 struct vhost_vring_file file = {
a9f98bb5 893 .index = vhost_vq_index
d5970055
MT
894 };
895 struct vhost_vring_state state = {
a9f98bb5 896 .index = vhost_vq_index
d5970055
MT
897 };
898 struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
899
a9f98bb5 900
d5970055 901 vq->num = state.num = virtio_queue_get_num(vdev, idx);
21e70425 902 r = dev->vhost_ops->vhost_set_vring_num(dev, &state);
d5970055 903 if (r) {
c6409692 904 VHOST_OPS_DEBUG("vhost_set_vring_num failed");
d5970055
MT
905 return -errno;
906 }
907
908 state.num = virtio_queue_get_last_avail_idx(vdev, idx);
21e70425 909 r = dev->vhost_ops->vhost_set_vring_base(dev, &state);
d5970055 910 if (r) {
c6409692 911 VHOST_OPS_DEBUG("vhost_set_vring_base failed");
d5970055
MT
912 return -errno;
913 }
914
e5848123 915 if (vhost_needs_vring_endian(vdev)) {
04b7a152
GK
916 r = vhost_virtqueue_set_vring_endian_legacy(dev,
917 virtio_is_big_endian(vdev),
918 vhost_vq_index);
919 if (r) {
920 return -errno;
921 }
922 }
923
f1f9e6c5
GK
924 vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx);
925 vq->desc_phys = a = virtio_queue_get_desc_addr(vdev, idx);
c471ad0e 926 vq->desc = vhost_memory_map(dev, a, &l, 0);
d5970055
MT
927 if (!vq->desc || l != s) {
928 r = -ENOMEM;
929 goto fail_alloc_desc;
930 }
f1f9e6c5
GK
931 vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx);
932 vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx);
c471ad0e 933 vq->avail = vhost_memory_map(dev, a, &l, 0);
d5970055
MT
934 if (!vq->avail || l != s) {
935 r = -ENOMEM;
936 goto fail_alloc_avail;
937 }
938 vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
939 vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
c471ad0e 940 vq->used = vhost_memory_map(dev, a, &l, 1);
d5970055
MT
941 if (!vq->used || l != s) {
942 r = -ENOMEM;
943 goto fail_alloc_used;
944 }
945
a9f98bb5 946 r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
d5970055
MT
947 if (r < 0) {
948 r = -errno;
949 goto fail_alloc;
950 }
a9f98bb5 951
d5970055 952 file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
21e70425 953 r = dev->vhost_ops->vhost_set_vring_kick(dev, &file);
d5970055 954 if (r) {
c6409692 955 VHOST_OPS_DEBUG("vhost_set_vring_kick failed");
c8852121 956 r = -errno;
d5970055
MT
957 goto fail_kick;
958 }
959
f56a1247
MT
960 /* Clear and discard previous events if any. */
961 event_notifier_test_and_clear(&vq->masked_notifier);
d5970055 962
5669655a
VK
963 /* Init vring in unmasked state, unless guest_notifier_mask
964 * will do it later.
965 */
966 if (!vdev->use_guest_notifier_mask) {
967 /* TODO: check and handle errors. */
968 vhost_virtqueue_mask(dev, vdev, idx, false);
969 }
970
96a3d98d
JW
971 if (k->query_guest_notifiers &&
972 k->query_guest_notifiers(qbus->parent) &&
973 virtio_queue_vector(vdev, idx) == VIRTIO_NO_VECTOR) {
974 file.fd = -1;
975 r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
976 if (r) {
977 goto fail_vector;
978 }
979 }
980
d5970055
MT
981 return 0;
982
96a3d98d 983fail_vector:
d5970055 984fail_kick:
d5970055 985fail_alloc:
c471ad0e
JW
986 vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
987 0, 0);
d5970055 988fail_alloc_used:
c471ad0e
JW
989 vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
990 0, 0);
d5970055 991fail_alloc_avail:
c471ad0e
JW
992 vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
993 0, 0);
d5970055
MT
994fail_alloc_desc:
995 return r;
996}
997
f56a1247 998static void vhost_virtqueue_stop(struct vhost_dev *dev,
d5970055
MT
999 struct VirtIODevice *vdev,
1000 struct vhost_virtqueue *vq,
1001 unsigned idx)
1002{
21e70425 1003 int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
d5970055 1004 struct vhost_vring_state state = {
04b7a152 1005 .index = vhost_vq_index,
d5970055
MT
1006 };
1007 int r;
fc57fd99 1008
21e70425 1009 r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
d5970055 1010 if (r < 0) {
c6409692 1011 VHOST_OPS_DEBUG("vhost VQ %d ring restore failed: %d", idx, r);
2ae39a11
MC
1012 /* Connection to the backend is broken, so let's sync internal
1013 * last avail idx to the device used idx.
1014 */
1015 virtio_queue_restore_last_avail_idx(vdev, idx);
499c5579
MAL
1016 } else {
1017 virtio_queue_set_last_avail_idx(vdev, idx, state.num);
d5970055 1018 }
3561ba14 1019 virtio_queue_invalidate_signalled_used(vdev, idx);
aa94d521 1020 virtio_queue_update_used_idx(vdev, idx);
04b7a152
GK
1021
1022 /* In the cross-endian case, we need to reset the vring endianness to
1023 * native as legacy devices expect so by default.
1024 */
e5848123 1025 if (vhost_needs_vring_endian(vdev)) {
162bba7f
MAL
1026 vhost_virtqueue_set_vring_endian_legacy(dev,
1027 !virtio_is_big_endian(vdev),
1028 vhost_vq_index);
04b7a152
GK
1029 }
1030
c471ad0e
JW
1031 vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
1032 1, virtio_queue_get_used_size(vdev, idx));
1033 vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
1034 0, virtio_queue_get_avail_size(vdev, idx));
1035 vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
1036 0, virtio_queue_get_desc_size(vdev, idx));
d5970055
MT
1037}
1038
80a1ea37
AK
1039static void vhost_eventfd_add(MemoryListener *listener,
1040 MemoryRegionSection *section,
753d5e14 1041 bool match_data, uint64_t data, EventNotifier *e)
80a1ea37
AK
1042{
1043}
1044
1045static void vhost_eventfd_del(MemoryListener *listener,
1046 MemoryRegionSection *section,
753d5e14 1047 bool match_data, uint64_t data, EventNotifier *e)
80a1ea37
AK
1048{
1049}
1050
69e87b32
JW
1051static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
1052 int n, uint32_t timeout)
1053{
1054 int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
1055 struct vhost_vring_state state = {
1056 .index = vhost_vq_index,
1057 .num = timeout,
1058 };
1059 int r;
1060
1061 if (!dev->vhost_ops->vhost_set_vring_busyloop_timeout) {
1062 return -EINVAL;
1063 }
1064
1065 r = dev->vhost_ops->vhost_set_vring_busyloop_timeout(dev, &state);
1066 if (r) {
c6409692 1067 VHOST_OPS_DEBUG("vhost_set_vring_busyloop_timeout failed");
69e87b32
JW
1068 return r;
1069 }
1070
1071 return 0;
1072}
1073
f56a1247
MT
1074static int vhost_virtqueue_init(struct vhost_dev *dev,
1075 struct vhost_virtqueue *vq, int n)
1076{
21e70425 1077 int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
f56a1247 1078 struct vhost_vring_file file = {
b931bfbf 1079 .index = vhost_vq_index,
f56a1247
MT
1080 };
1081 int r = event_notifier_init(&vq->masked_notifier, 0);
1082 if (r < 0) {
1083 return r;
1084 }
1085
1086 file.fd = event_notifier_get_fd(&vq->masked_notifier);
21e70425 1087 r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
f56a1247 1088 if (r) {
c6409692 1089 VHOST_OPS_DEBUG("vhost_set_vring_call failed");
f56a1247
MT
1090 r = -errno;
1091 goto fail_call;
1092 }
c471ad0e
JW
1093
1094 vq->dev = dev;
1095
f56a1247
MT
1096 return 0;
1097fail_call:
1098 event_notifier_cleanup(&vq->masked_notifier);
1099 return r;
1100}
1101
1102static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
1103{
1104 event_notifier_cleanup(&vq->masked_notifier);
1105}
1106
81647a65 1107int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
69e87b32 1108 VhostBackendType backend_type, uint32_t busyloop_timeout)
d5970055
MT
1109{
1110 uint64_t features;
a06db3ec 1111 int i, r, n_initialized_vqs = 0;
fe44dc91 1112 Error *local_err = NULL;
81647a65 1113
c471ad0e 1114 hdev->vdev = NULL;
d2fc4402
MAL
1115 hdev->migration_blocker = NULL;
1116
7cb8a9b9
MAL
1117 r = vhost_set_backend_type(hdev, backend_type);
1118 assert(r >= 0);
1a1bfac9 1119
7cb8a9b9
MAL
1120 r = hdev->vhost_ops->vhost_backend_init(hdev, opaque);
1121 if (r < 0) {
1122 goto fail;
24d1eb33
NN
1123 }
1124
aebf8168 1125 if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
4afba631
MAL
1126 error_report("vhost backend memory slots limit is less"
1127 " than current number of present memory slots");
7cb8a9b9
MAL
1128 r = -1;
1129 goto fail;
aebf8168 1130 }
2ce68e4c 1131
21e70425 1132 r = hdev->vhost_ops->vhost_set_owner(hdev);
d5970055 1133 if (r < 0) {
c6409692 1134 VHOST_OPS_DEBUG("vhost_set_owner failed");
d5970055
MT
1135 goto fail;
1136 }
1137
21e70425 1138 r = hdev->vhost_ops->vhost_get_features(hdev, &features);
d5970055 1139 if (r < 0) {
c6409692 1140 VHOST_OPS_DEBUG("vhost_get_features failed");
d5970055
MT
1141 goto fail;
1142 }
f56a1247 1143
a06db3ec 1144 for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
b931bfbf 1145 r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
f56a1247 1146 if (r < 0) {
a06db3ec 1147 goto fail;
f56a1247
MT
1148 }
1149 }
69e87b32
JW
1150
1151 if (busyloop_timeout) {
1152 for (i = 0; i < hdev->nvqs; ++i) {
1153 r = vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i,
1154 busyloop_timeout);
1155 if (r < 0) {
1156 goto fail_busyloop;
1157 }
1158 }
1159 }
1160
d5970055
MT
1161 hdev->features = features;
1162
04097f7c 1163 hdev->memory_listener = (MemoryListener) {
50c1e149
AK
1164 .begin = vhost_begin,
1165 .commit = vhost_commit,
04097f7c
AK
1166 .region_add = vhost_region_add,
1167 .region_del = vhost_region_del,
50c1e149 1168 .region_nop = vhost_region_nop,
04097f7c
AK
1169 .log_start = vhost_log_start,
1170 .log_stop = vhost_log_stop,
1171 .log_sync = vhost_log_sync,
1172 .log_global_start = vhost_log_global_start,
1173 .log_global_stop = vhost_log_global_stop,
80a1ea37
AK
1174 .eventfd_add = vhost_eventfd_add,
1175 .eventfd_del = vhost_eventfd_del,
72e22d2f 1176 .priority = 10
04097f7c 1177 };
d2fc4402 1178
375f74f4
JW
1179 hdev->iommu_listener = (MemoryListener) {
1180 .region_add = vhost_iommu_region_add,
1181 .region_del = vhost_iommu_region_del,
1182 };
c471ad0e 1183
d2fc4402
MAL
1184 if (hdev->migration_blocker == NULL) {
1185 if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
1186 error_setg(&hdev->migration_blocker,
1187 "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
0d34fbab 1188 } else if (vhost_dev_log_is_shared(hdev) && !qemu_memfd_check()) {
31190ed7
MAL
1189 error_setg(&hdev->migration_blocker,
1190 "Migration disabled: failed to allocate shared memory");
d2fc4402
MAL
1191 }
1192 }
1193
1194 if (hdev->migration_blocker != NULL) {
fe44dc91
AA
1195 r = migrate_add_blocker(hdev->migration_blocker, &local_err);
1196 if (local_err) {
1197 error_report_err(local_err);
1198 error_free(hdev->migration_blocker);
1199 goto fail_busyloop;
1200 }
7145872e 1201 }
d2fc4402 1202
7267c094 1203 hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
2817b260
AK
1204 hdev->n_mem_sections = 0;
1205 hdev->mem_sections = NULL;
d5970055
MT
1206 hdev->log = NULL;
1207 hdev->log_size = 0;
1208 hdev->log_enabled = false;
1209 hdev->started = false;
f6790af6 1210 memory_listener_register(&hdev->memory_listener, &address_space_memory);
5be5f9be 1211 QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
d5970055 1212 return 0;
a06db3ec 1213
69e87b32
JW
1214fail_busyloop:
1215 while (--i >= 0) {
1216 vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i, 0);
1217 }
d5970055 1218fail:
a06db3ec
MAL
1219 hdev->nvqs = n_initialized_vqs;
1220 vhost_dev_cleanup(hdev);
d5970055
MT
1221 return r;
1222}
1223
1224void vhost_dev_cleanup(struct vhost_dev *hdev)
1225{
f56a1247 1226 int i;
e0547b59 1227
f56a1247
MT
1228 for (i = 0; i < hdev->nvqs; ++i) {
1229 vhost_virtqueue_cleanup(hdev->vqs + i);
1230 }
5be5f9be
MAL
1231 if (hdev->mem) {
1232 /* those are only safe after successful init */
1233 memory_listener_unregister(&hdev->memory_listener);
1234 QLIST_REMOVE(hdev, entry);
1235 }
7145872e
MT
1236 if (hdev->migration_blocker) {
1237 migrate_del_blocker(hdev->migration_blocker);
1238 error_free(hdev->migration_blocker);
1239 }
7267c094 1240 g_free(hdev->mem);
2817b260 1241 g_free(hdev->mem_sections);
e0547b59
MAL
1242 if (hdev->vhost_ops) {
1243 hdev->vhost_ops->vhost_backend_cleanup(hdev);
1244 }
7b527247 1245 assert(!hdev->log);
e0547b59
MAL
1246
1247 memset(hdev, 0, sizeof(struct vhost_dev));
d5970055
MT
1248}
1249
b0b3db79
MT
1250/* Stop processing guest IO notifications in qemu.
1251 * Start processing them in vhost in kernel.
1252 */
1253int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
1254{
1c819449 1255 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
16617e36 1256 int i, r, e;
4afba631 1257
310837de
PB
1258 /* We will pass the notifiers to the kernel, make sure that QEMU
1259 * doesn't interfere.
1260 */
1261 r = virtio_device_grab_ioeventfd(vdev);
1262 if (r < 0) {
4afba631 1263 error_report("binding does not support host notifiers");
b0b3db79
MT
1264 goto fail;
1265 }
1266
1267 for (i = 0; i < hdev->nvqs; ++i) {
b1f0a33d
CH
1268 r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
1269 true);
b0b3db79 1270 if (r < 0) {
4afba631 1271 error_report("vhost VQ %d notifier binding failed: %d", i, -r);
b0b3db79
MT
1272 goto fail_vq;
1273 }
1274 }
1275
1276 return 0;
1277fail_vq:
1278 while (--i >= 0) {
b1f0a33d
CH
1279 e = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
1280 false);
16617e36 1281 if (e < 0) {
4afba631 1282 error_report("vhost VQ %d notifier cleanup error: %d", i, -r);
b0b3db79 1283 }
16617e36 1284 assert (e >= 0);
76143618 1285 virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i);
b0b3db79 1286 }
310837de 1287 virtio_device_release_ioeventfd(vdev);
b0b3db79
MT
1288fail:
1289 return r;
1290}
1291
1292/* Stop processing guest IO notifications in vhost.
1293 * Start processing them in qemu.
1294 * This might actually run the qemu handlers right away,
1295 * so virtio in qemu must be completely setup when this is called.
1296 */
1297void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
1298{
1c819449 1299 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
b0b3db79
MT
1300 int i, r;
1301
1302 for (i = 0; i < hdev->nvqs; ++i) {
b1f0a33d
CH
1303 r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
1304 false);
b0b3db79 1305 if (r < 0) {
4afba631 1306 error_report("vhost VQ %d notifier cleanup failed: %d", i, -r);
b0b3db79
MT
1307 }
1308 assert (r >= 0);
76143618 1309 virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i);
b0b3db79 1310 }
310837de 1311 virtio_device_release_ioeventfd(vdev);
b0b3db79
MT
1312}
1313
f56a1247
MT
1314/* Test and clear event pending status.
1315 * Should be called after unmask to avoid losing events.
1316 */
1317bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
1318{
a9f98bb5 1319 struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
a9f98bb5 1320 assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
f56a1247
MT
1321 return event_notifier_test_and_clear(&vq->masked_notifier);
1322}
1323
1324/* Mask/unmask events from this vq. */
1325void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
1326 bool mask)
1327{
1328 struct VirtQueue *vvq = virtio_get_queue(vdev, n);
a9f98bb5 1329 int r, index = n - hdev->vq_index;
fc57fd99 1330 struct vhost_vring_file file;
f56a1247 1331
8695de0f
MAL
1332 /* should only be called after backend is connected */
1333 assert(hdev->vhost_ops);
1334
f56a1247 1335 if (mask) {
5669655a 1336 assert(vdev->use_guest_notifier_mask);
a9f98bb5 1337 file.fd = event_notifier_get_fd(&hdev->vqs[index].masked_notifier);
f56a1247
MT
1338 } else {
1339 file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
1340 }
fc57fd99 1341
21e70425
MAL
1342 file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n);
1343 r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file);
162bba7f
MAL
1344 if (r < 0) {
1345 VHOST_OPS_DEBUG("vhost_set_vring_call failed");
1346 }
f56a1247
MT
1347}
1348
9a2ba823
CH
1349uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
1350 uint64_t features)
2e6d46d7
NN
1351{
1352 const int *bit = feature_bits;
1353 while (*bit != VHOST_INVALID_FEATURE_BIT) {
9a2ba823 1354 uint64_t bit_mask = (1ULL << *bit);
2e6d46d7
NN
1355 if (!(hdev->features & bit_mask)) {
1356 features &= ~bit_mask;
1357 }
1358 bit++;
1359 }
1360 return features;
1361}
1362
1363void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
9a2ba823 1364 uint64_t features)
2e6d46d7
NN
1365{
1366 const int *bit = feature_bits;
1367 while (*bit != VHOST_INVALID_FEATURE_BIT) {
9a2ba823 1368 uint64_t bit_mask = (1ULL << *bit);
2e6d46d7
NN
1369 if (features & bit_mask) {
1370 hdev->acked_features |= bit_mask;
1371 }
1372 bit++;
1373 }
1374}
1375
4c3e257b
CL
1376int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
1377 uint32_t config_len)
1378{
1379 assert(hdev->vhost_ops);
1380
1381 if (hdev->vhost_ops->vhost_get_config) {
1382 return hdev->vhost_ops->vhost_get_config(hdev, config, config_len);
1383 }
1384
1385 return -1;
1386}
1387
1388int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data,
1389 uint32_t offset, uint32_t size, uint32_t flags)
1390{
1391 assert(hdev->vhost_ops);
1392
1393 if (hdev->vhost_ops->vhost_set_config) {
1394 return hdev->vhost_ops->vhost_set_config(hdev, data, offset,
1395 size, flags);
1396 }
1397
1398 return -1;
1399}
1400
1401void vhost_dev_set_config_notifier(struct vhost_dev *hdev,
1402 const VhostDevConfigOps *ops)
1403{
1404 assert(hdev->vhost_ops);
1405 hdev->config_ops = ops;
1406}
1407
b0b3db79 1408/* Host notifiers must be enabled at this point. */
d5970055
MT
1409int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
1410{
1411 int i, r;
24f4fe34 1412
8695de0f
MAL
1413 /* should only be called after backend is connected */
1414 assert(hdev->vhost_ops);
1415
24f4fe34 1416 hdev->started = true;
c471ad0e 1417 hdev->vdev = vdev;
24f4fe34 1418
d5970055
MT
1419 r = vhost_dev_set_features(hdev, hdev->log_enabled);
1420 if (r < 0) {
54dd9321 1421 goto fail_features;
d5970055 1422 }
c471ad0e
JW
1423
1424 if (vhost_dev_has_iommu(hdev)) {
375f74f4 1425 memory_listener_register(&hdev->iommu_listener, vdev->dma_as);
c471ad0e
JW
1426 }
1427
21e70425 1428 r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
d5970055 1429 if (r < 0) {
c6409692 1430 VHOST_OPS_DEBUG("vhost_set_mem_table failed");
d5970055 1431 r = -errno;
54dd9321 1432 goto fail_mem;
d5970055 1433 }
d154e0ba 1434 for (i = 0; i < hdev->nvqs; ++i) {
f56a1247 1435 r = vhost_virtqueue_start(hdev,
a9f98bb5
JW
1436 vdev,
1437 hdev->vqs + i,
1438 hdev->vq_index + i);
d154e0ba
MT
1439 if (r < 0) {
1440 goto fail_vq;
1441 }
1442 }
1443
d5970055 1444 if (hdev->log_enabled) {
e05ca820
MT
1445 uint64_t log_base;
1446
d5970055 1447 hdev->log_size = vhost_get_log_size(hdev);
15324404
MAL
1448 hdev->log = vhost_log_get(hdev->log_size,
1449 vhost_dev_log_is_shared(hdev));
309750fa 1450 log_base = (uintptr_t)hdev->log->log;
c2bea314 1451 r = hdev->vhost_ops->vhost_set_log_base(hdev,
9a78a5dd
MAL
1452 hdev->log_size ? log_base : 0,
1453 hdev->log);
d5970055 1454 if (r < 0) {
c6409692 1455 VHOST_OPS_DEBUG("vhost_set_log_base failed");
d5970055 1456 r = -errno;
54dd9321 1457 goto fail_log;
d5970055
MT
1458 }
1459 }
d154e0ba 1460
c471ad0e
JW
1461 if (vhost_dev_has_iommu(hdev)) {
1462 hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true);
1463
1464 /* Update used ring information for IOTLB to work correctly,
1465 * vhost-kernel code requires for this.*/
1466 for (i = 0; i < hdev->nvqs; ++i) {
1467 struct vhost_virtqueue *vq = hdev->vqs + i;
1468 vhost_device_iotlb_miss(hdev, vq->used_phys, true);
1469 }
1470 }
d5970055 1471 return 0;
54dd9321 1472fail_log:
24bfa207 1473 vhost_log_put(hdev, false);
d5970055
MT
1474fail_vq:
1475 while (--i >= 0) {
f56a1247 1476 vhost_virtqueue_stop(hdev,
a9f98bb5
JW
1477 vdev,
1478 hdev->vqs + i,
1479 hdev->vq_index + i);
d5970055 1480 }
a9f98bb5 1481 i = hdev->nvqs;
c471ad0e 1482
54dd9321
MT
1483fail_mem:
1484fail_features:
24f4fe34
MT
1485
1486 hdev->started = false;
d5970055
MT
1487 return r;
1488}
1489
b0b3db79 1490/* Host notifiers must be enabled at this point. */
d5970055
MT
1491void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
1492{
a9f98bb5 1493 int i;
54dd9321 1494
8695de0f
MAL
1495 /* should only be called after backend is connected */
1496 assert(hdev->vhost_ops);
1497
d5970055 1498 for (i = 0; i < hdev->nvqs; ++i) {
f56a1247 1499 vhost_virtqueue_stop(hdev,
a9f98bb5
JW
1500 vdev,
1501 hdev->vqs + i,
1502 hdev->vq_index + i);
d5970055 1503 }
54dd9321 1504
c471ad0e
JW
1505 if (vhost_dev_has_iommu(hdev)) {
1506 hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
375f74f4 1507 memory_listener_unregister(&hdev->iommu_listener);
c471ad0e 1508 }
309750fa 1509 vhost_log_put(hdev, true);
d5970055 1510 hdev->started = false;
c471ad0e 1511 hdev->vdev = NULL;
d5970055 1512}
950d94ba
MAL
1513
1514int vhost_net_set_backend(struct vhost_dev *hdev,
1515 struct vhost_vring_file *file)
1516{
1517 if (hdev->vhost_ops->vhost_net_set_backend) {
1518 return hdev->vhost_ops->vhost_net_set_backend(hdev, file);
1519 }
1520
1521 return -1;
1522}