]> git.proxmox.com Git - mirror_qemu.git/blame - hw/virtio/vhost.c
vhost: Merge and delete unused callbacks
[mirror_qemu.git] / hw / virtio / vhost.c
CommitLineData
d5970055
MT
1/*
2 * vhost support
3 *
4 * Copyright Red Hat, Inc. 2010
5 *
6 * Authors:
7 * Michael S. Tsirkin <mst@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
6b620ca3
PB
11 *
12 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
d5970055
MT
14 */
15
9b8bfe21 16#include "qemu/osdep.h"
da34e65c 17#include "qapi/error.h"
0d09e41a 18#include "hw/virtio/vhost.h"
d5970055 19#include "hw/hw.h"
5444e768 20#include "qemu/atomic.h"
1de7afc9 21#include "qemu/range.h"
04b7a152 22#include "qemu/error-report.h"
15324404 23#include "qemu/memfd.h"
11078ae3 24#include <linux/vhost.h>
022c62cb 25#include "exec/address-spaces.h"
1c819449 26#include "hw/virtio/virtio-bus.h"
04b7a152 27#include "hw/virtio/virtio-access.h"
795c40b8 28#include "migration/blocker.h"
c471ad0e 29#include "sysemu/dma.h"
d5970055 30
162bba7f
MAL
31/* enabled until disconnected backend stabilizes */
32#define _VHOST_DEBUG 1
33
34#ifdef _VHOST_DEBUG
35#define VHOST_OPS_DEBUG(fmt, ...) \
36 do { error_report(fmt ": %s (%d)", ## __VA_ARGS__, \
37 strerror(errno), errno); } while (0)
38#else
39#define VHOST_OPS_DEBUG(fmt, ...) \
40 do { } while (0)
41#endif
42
309750fa 43static struct vhost_log *vhost_log;
15324404 44static struct vhost_log *vhost_log_shm;
309750fa 45
2ce68e4c
IM
46static unsigned int used_memslots;
47static QLIST_HEAD(, vhost_dev) vhost_devices =
48 QLIST_HEAD_INITIALIZER(vhost_devices);
49
50bool vhost_has_free_slot(void)
51{
52 unsigned int slots_limit = ~0U;
53 struct vhost_dev *hdev;
54
55 QLIST_FOREACH(hdev, &vhost_devices, entry) {
56 unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
57 slots_limit = MIN(slots_limit, r);
58 }
59 return slots_limit > used_memslots;
60}
61
d5970055 62static void vhost_dev_sync_region(struct vhost_dev *dev,
2817b260 63 MemoryRegionSection *section,
d5970055
MT
64 uint64_t mfirst, uint64_t mlast,
65 uint64_t rfirst, uint64_t rlast)
66{
309750fa
JW
67 vhost_log_chunk_t *log = dev->log->log;
68
d5970055
MT
69 uint64_t start = MAX(mfirst, rfirst);
70 uint64_t end = MIN(mlast, rlast);
309750fa
JW
71 vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK;
72 vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1;
33c5793b 73 uint64_t addr = QEMU_ALIGN_DOWN(start, VHOST_LOG_CHUNK);
d5970055 74
d5970055
MT
75 if (end < start) {
76 return;
77 }
e314672a 78 assert(end / VHOST_LOG_CHUNK < dev->log_size);
fbbaf9ae 79 assert(start / VHOST_LOG_CHUNK < dev->log_size);
e314672a 80
d5970055
MT
81 for (;from < to; ++from) {
82 vhost_log_chunk_t log;
d5970055
MT
83 /* We first check with non-atomic: much cheaper,
84 * and we expect non-dirty to be the common case. */
85 if (!*from) {
0c600ce2 86 addr += VHOST_LOG_CHUNK;
d5970055
MT
87 continue;
88 }
5444e768
PB
89 /* Data must be read atomically. We don't really need barrier semantics
90 * but it's easier to use atomic_* than roll our own. */
91 log = atomic_xchg(from, 0);
747eb78b
NC
92 while (log) {
93 int bit = ctzl(log);
6b37a23d
MT
94 hwaddr page_addr;
95 hwaddr section_offset;
96 hwaddr mr_offset;
6b37a23d
MT
97 page_addr = addr + bit * VHOST_LOG_PAGE;
98 section_offset = page_addr - section->offset_within_address_space;
99 mr_offset = section_offset + section->offset_within_region;
100 memory_region_set_dirty(section->mr, mr_offset, VHOST_LOG_PAGE);
d5970055
MT
101 log &= ~(0x1ull << bit);
102 }
103 addr += VHOST_LOG_CHUNK;
104 }
105}
106
04097f7c 107static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
2817b260 108 MemoryRegionSection *section,
6b37a23d
MT
109 hwaddr first,
110 hwaddr last)
d5970055 111{
d5970055 112 int i;
6b37a23d
MT
113 hwaddr start_addr;
114 hwaddr end_addr;
04097f7c 115
d5970055
MT
116 if (!dev->log_enabled || !dev->started) {
117 return 0;
118 }
6b37a23d 119 start_addr = section->offset_within_address_space;
052e87b0 120 end_addr = range_get_last(start_addr, int128_get64(section->size));
6b37a23d
MT
121 start_addr = MAX(first, start_addr);
122 end_addr = MIN(last, end_addr);
123
d5970055
MT
124 for (i = 0; i < dev->mem->nregions; ++i) {
125 struct vhost_memory_region *reg = dev->mem->regions + i;
2817b260 126 vhost_dev_sync_region(dev, section, start_addr, end_addr,
d5970055
MT
127 reg->guest_phys_addr,
128 range_get_last(reg->guest_phys_addr,
129 reg->memory_size));
130 }
131 for (i = 0; i < dev->nvqs; ++i) {
132 struct vhost_virtqueue *vq = dev->vqs + i;
2817b260 133 vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
d5970055
MT
134 range_get_last(vq->used_phys, vq->used_size));
135 }
136 return 0;
137}
138
04097f7c
AK
139static void vhost_log_sync(MemoryListener *listener,
140 MemoryRegionSection *section)
141{
142 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
143 memory_listener);
6b37a23d
MT
144 vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL);
145}
04097f7c 146
6b37a23d
MT
147static void vhost_log_sync_range(struct vhost_dev *dev,
148 hwaddr first, hwaddr last)
149{
150 int i;
151 /* FIXME: this is N^2 in number of sections */
152 for (i = 0; i < dev->n_mem_sections; ++i) {
153 MemoryRegionSection *section = &dev->mem_sections[i];
154 vhost_sync_dirty_bitmap(dev, section, first, last);
155 }
04097f7c
AK
156}
157
d5970055
MT
158static uint64_t vhost_get_log_size(struct vhost_dev *dev)
159{
160 uint64_t log_size = 0;
161 int i;
162 for (i = 0; i < dev->mem->nregions; ++i) {
163 struct vhost_memory_region *reg = dev->mem->regions + i;
164 uint64_t last = range_get_last(reg->guest_phys_addr,
165 reg->memory_size);
166 log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
167 }
168 for (i = 0; i < dev->nvqs; ++i) {
169 struct vhost_virtqueue *vq = dev->vqs + i;
170 uint64_t last = vq->used_phys + vq->used_size - 1;
171 log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
172 }
173 return log_size;
174}
15324404
MAL
175
176static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
309750fa 177{
0f2956f9 178 Error *err = NULL;
15324404
MAL
179 struct vhost_log *log;
180 uint64_t logsize = size * sizeof(*(log->log));
181 int fd = -1;
182
183 log = g_new0(struct vhost_log, 1);
184 if (share) {
185 log->log = qemu_memfd_alloc("vhost-log", logsize,
186 F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
0f2956f9
MAL
187 &fd, &err);
188 if (err) {
189 error_report_err(err);
190 g_free(log);
191 return NULL;
192 }
15324404
MAL
193 memset(log->log, 0, logsize);
194 } else {
195 log->log = g_malloc0(logsize);
196 }
309750fa
JW
197
198 log->size = size;
199 log->refcnt = 1;
15324404 200 log->fd = fd;
309750fa
JW
201
202 return log;
203}
204
15324404 205static struct vhost_log *vhost_log_get(uint64_t size, bool share)
309750fa 206{
15324404
MAL
207 struct vhost_log *log = share ? vhost_log_shm : vhost_log;
208
209 if (!log || log->size != size) {
210 log = vhost_log_alloc(size, share);
211 if (share) {
212 vhost_log_shm = log;
213 } else {
214 vhost_log = log;
215 }
309750fa 216 } else {
15324404 217 ++log->refcnt;
309750fa
JW
218 }
219
15324404 220 return log;
309750fa
JW
221}
222
223static void vhost_log_put(struct vhost_dev *dev, bool sync)
224{
225 struct vhost_log *log = dev->log;
226
227 if (!log) {
228 return;
229 }
230
231 --log->refcnt;
232 if (log->refcnt == 0) {
233 /* Sync only the range covered by the old log */
234 if (dev->log_size && sync) {
235 vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
236 }
15324404 237
309750fa 238 if (vhost_log == log) {
15324404 239 g_free(log->log);
309750fa 240 vhost_log = NULL;
15324404
MAL
241 } else if (vhost_log_shm == log) {
242 qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
243 log->fd);
244 vhost_log_shm = NULL;
309750fa 245 }
15324404 246
309750fa
JW
247 g_free(log);
248 }
5c0ba1be
FF
249
250 dev->log = NULL;
251 dev->log_size = 0;
309750fa 252}
d5970055 253
15324404
MAL
254static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
255{
256 return dev->vhost_ops->vhost_requires_shm_log &&
257 dev->vhost_ops->vhost_requires_shm_log(dev);
258}
259
260static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
d5970055 261{
15324404 262 struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev));
309750fa 263 uint64_t log_base = (uintptr_t)log->log;
6b37a23d 264 int r;
6528499f 265
636f4ddd
MAL
266 /* inform backend of log switching, this must be done before
267 releasing the current log, to ensure no logging is lost */
9a78a5dd 268 r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log);
162bba7f
MAL
269 if (r < 0) {
270 VHOST_OPS_DEBUG("vhost_set_log_base failed");
271 }
272
309750fa 273 vhost_log_put(dev, true);
d5970055
MT
274 dev->log = log;
275 dev->log_size = size;
276}
277
c471ad0e
JW
278static int vhost_dev_has_iommu(struct vhost_dev *dev)
279{
280 VirtIODevice *vdev = dev->vdev;
c471ad0e 281
375f74f4 282 return virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
c471ad0e
JW
283}
284
285static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
286 hwaddr *plen, int is_write)
287{
288 if (!vhost_dev_has_iommu(dev)) {
289 return cpu_physical_memory_map(addr, plen, is_write);
290 } else {
291 return (void *)(uintptr_t)addr;
292 }
293}
294
295static void vhost_memory_unmap(struct vhost_dev *dev, void *buffer,
296 hwaddr len, int is_write,
297 hwaddr access_len)
298{
299 if (!vhost_dev_has_iommu(dev)) {
300 cpu_physical_memory_unmap(buffer, len, is_write, access_len);
301 }
302}
f1f9e6c5 303
0ca1fd2d
DDAG
304static int vhost_verify_ring_part_mapping(void *ring_hva,
305 uint64_t ring_gpa,
306 uint64_t ring_size,
307 void *reg_hva,
308 uint64_t reg_gpa,
309 uint64_t reg_size)
f1f9e6c5 310{
0ca1fd2d
DDAG
311 uint64_t hva_ring_offset;
312 uint64_t ring_last = range_get_last(ring_gpa, ring_size);
313 uint64_t reg_last = range_get_last(reg_gpa, reg_size);
f1f9e6c5 314
0ca1fd2d 315 if (ring_last < reg_gpa || ring_gpa > reg_last) {
f1f9e6c5
GK
316 return 0;
317 }
0ca1fd2d
DDAG
318 /* check that whole ring's is mapped */
319 if (ring_last > reg_last) {
320 return -ENOMEM;
f1f9e6c5 321 }
0ca1fd2d
DDAG
322 /* check that ring's MemoryRegion wasn't replaced */
323 hva_ring_offset = ring_gpa - reg_gpa;
324 if (ring_hva != reg_hva + hva_ring_offset) {
325 return -EBUSY;
f1f9e6c5 326 }
0ca1fd2d
DDAG
327
328 return 0;
f1f9e6c5
GK
329}
330
d5970055 331static int vhost_verify_ring_mappings(struct vhost_dev *dev,
0ca1fd2d
DDAG
332 void *reg_hva,
333 uint64_t reg_gpa,
334 uint64_t reg_size)
d5970055 335{
f1f9e6c5 336 int i, j;
8617343f 337 int r = 0;
f1f9e6c5
GK
338 const char *part_name[] = {
339 "descriptor table",
340 "available ring",
341 "used ring"
342 };
8617343f 343
f1f9e6c5 344 for (i = 0; i < dev->nvqs; ++i) {
d5970055 345 struct vhost_virtqueue *vq = dev->vqs + i;
d5970055 346
f1f9e6c5 347 j = 0;
0ca1fd2d
DDAG
348 r = vhost_verify_ring_part_mapping(
349 vq->desc, vq->desc_phys, vq->desc_size,
350 reg_hva, reg_gpa, reg_size);
2fe45ec3 351 if (r) {
f1f9e6c5 352 break;
d5970055 353 }
f1f9e6c5
GK
354
355 j++;
0ca1fd2d
DDAG
356 r = vhost_verify_ring_part_mapping(
357 vq->desc, vq->desc_phys, vq->desc_size,
358 reg_hva, reg_gpa, reg_size);
2fe45ec3 359 if (r) {
f1f9e6c5 360 break;
d5970055 361 }
f1f9e6c5
GK
362
363 j++;
0ca1fd2d
DDAG
364 r = vhost_verify_ring_part_mapping(
365 vq->desc, vq->desc_phys, vq->desc_size,
366 reg_hva, reg_gpa, reg_size);
2fe45ec3 367 if (r) {
f1f9e6c5 368 break;
d5970055 369 }
f1f9e6c5
GK
370 }
371
372 if (r == -ENOMEM) {
373 error_report("Unable to map %s for ring %d", part_name[j], i);
374 } else if (r == -EBUSY) {
375 error_report("%s relocated for ring %d", part_name[j], i);
d5970055 376 }
8617343f 377 return r;
d5970055
MT
378}
379
af603142
NB
380static bool vhost_section(MemoryRegionSection *section)
381{
d56ec1e9
MT
382 return memory_region_is_ram(section->mr) &&
383 !memory_region_is_rom(section->mr);
af603142
NB
384}
385
386static void vhost_begin(MemoryListener *listener)
387{
388 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
389 memory_listener);
c44317ef
DDAG
390 dev->tmp_sections = NULL;
391 dev->n_tmp_sections = 0;
af603142 392}
d5970055 393
af603142
NB
394static void vhost_commit(MemoryListener *listener)
395{
396 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
397 memory_listener);
c44317ef
DDAG
398 MemoryRegionSection *old_sections;
399 int n_old_sections;
af603142 400 uint64_t log_size;
ade6d081 401 size_t regions_size;
af603142 402 int r;
0ca1fd2d 403 int i;
ade6d081 404 bool changed = false;
af603142 405
ade6d081
DDAG
406 /* Note we can be called before the device is started, but then
407 * starting the device calls set_mem_table, so we need to have
408 * built the data structures.
409 */
c44317ef
DDAG
410 old_sections = dev->mem_sections;
411 n_old_sections = dev->n_mem_sections;
412 dev->mem_sections = dev->tmp_sections;
413 dev->n_mem_sections = dev->n_tmp_sections;
414
ade6d081
DDAG
415 if (dev->n_mem_sections != n_old_sections) {
416 changed = true;
417 } else {
418 /* Same size, lets check the contents */
419 changed = n_old_sections && memcmp(dev->mem_sections, old_sections,
420 n_old_sections * sizeof(old_sections[0])) != 0;
af603142 421 }
ade6d081
DDAG
422
423 trace_vhost_commit(dev->started, changed);
424 if (!changed) {
c44317ef 425 goto out;
d5970055 426 }
ade6d081
DDAG
427
428 /* Rebuild the regions list from the new sections list */
429 regions_size = offsetof(struct vhost_memory, regions) +
430 dev->n_mem_sections * sizeof dev->mem->regions[0];
431 dev->mem = g_realloc(dev->mem, regions_size);
432 dev->mem->nregions = dev->n_mem_sections;
433 used_memslots = dev->mem->nregions;
434 for (i = 0; i < dev->n_mem_sections; i++) {
435 struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
436 struct MemoryRegionSection *mrs = dev->mem_sections + i;
437
438 cur_vmr->guest_phys_addr = mrs->offset_within_address_space;
439 cur_vmr->memory_size = int128_get64(mrs->size);
440 cur_vmr->userspace_addr =
441 (uintptr_t)memory_region_get_ram_ptr(mrs->mr) +
442 mrs->offset_within_region;
443 cur_vmr->flags_padding = 0;
444 }
445
446 if (!dev->started) {
c44317ef 447 goto out;
af603142 448 }
d5970055 449
0ca1fd2d
DDAG
450 for (i = 0; i < dev->mem->nregions; i++) {
451 if (vhost_verify_ring_mappings(dev,
452 (void *)(uintptr_t)dev->mem->regions[i].userspace_addr,
453 dev->mem->regions[i].guest_phys_addr,
454 dev->mem->regions[i].memory_size)) {
455 error_report("Verify ring failure on region %d", i);
456 abort();
457 }
d5970055
MT
458 }
459
460 if (!dev->log_enabled) {
21e70425 461 r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
162bba7f
MAL
462 if (r < 0) {
463 VHOST_OPS_DEBUG("vhost_set_mem_table failed");
464 }
c44317ef 465 goto out;
d5970055
MT
466 }
467 log_size = vhost_get_log_size(dev);
468 /* We allocate an extra 4K bytes to log,
469 * to reduce the * number of reallocations. */
470#define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
471 /* To log more, must increase log size before table update. */
472 if (dev->log_size < log_size) {
473 vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
474 }
21e70425 475 r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
162bba7f
MAL
476 if (r < 0) {
477 VHOST_OPS_DEBUG("vhost_set_mem_table failed");
478 }
d5970055
MT
479 /* To log less, can only decrease log size after table update. */
480 if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
481 vhost_dev_log_resize(dev, log_size);
482 }
c44317ef
DDAG
483
484out:
485 /* Deref the old list of sections, this must happen _after_ the
486 * vhost_set_mem_table to ensure the client isn't still using the
487 * section we're about to unref.
488 */
489 while (n_old_sections--) {
490 memory_region_unref(old_sections[n_old_sections].mr);
491 }
492 g_free(old_sections);
493 return;
494}
495
48d7c975
DDAG
496/* Adds the section data to the tmp_section structure.
497 * It relies on the listener calling us in memory address order
498 * and for each region (via the _add and _nop methods) to
499 * join neighbours.
500 */
501static void vhost_region_add_section(struct vhost_dev *dev,
502 MemoryRegionSection *section)
c44317ef 503{
48d7c975
DDAG
504 bool need_add = true;
505 uint64_t mrs_size = int128_get64(section->size);
506 uint64_t mrs_gpa = section->offset_within_address_space;
507 uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
508 section->offset_within_region;
509
510 trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size,
511 mrs_host);
512
513 bool log_dirty = memory_region_get_dirty_log_mask(section->mr) &
514 ~(1 << DIRTY_MEMORY_MIGRATION);
515 if (log_dirty) {
516 return;
517 }
518
519 if (dev->n_tmp_sections) {
520 /* Since we already have at least one section, lets see if
521 * this extends it; since we're scanning in order, we only
522 * have to look at the last one, and the FlatView that calls
523 * us shouldn't have overlaps.
524 */
525 MemoryRegionSection *prev_sec = dev->tmp_sections +
526 (dev->n_tmp_sections - 1);
527 uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
528 uint64_t prev_size = int128_get64(prev_sec->size);
529 uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size);
530 uint64_t prev_host_start =
531 (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
532 prev_sec->offset_within_region;
533 uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
534
535 if (prev_gpa_end + 1 == mrs_gpa &&
536 prev_host_end + 1 == mrs_host &&
537 section->mr == prev_sec->mr &&
538 (!dev->vhost_ops->vhost_backend_can_merge ||
539 dev->vhost_ops->vhost_backend_can_merge(dev,
540 mrs_host, mrs_size,
541 prev_host_start, prev_size))) {
542 /* The two sections abut */
543 need_add = false;
544 prev_sec->size = int128_add(prev_sec->size, section->size);
545 trace_vhost_region_add_section_abut(section->mr->name,
546 mrs_size + prev_size);
547 }
548 }
549
550 if (need_add) {
551 ++dev->n_tmp_sections;
552 dev->tmp_sections = g_renew(MemoryRegionSection, dev->tmp_sections,
553 dev->n_tmp_sections);
554 dev->tmp_sections[dev->n_tmp_sections - 1] = *section;
555 /* The flatview isn't stable and we don't use it, making it NULL
556 * means we can memcmp the list.
557 */
558 dev->tmp_sections[dev->n_tmp_sections - 1].fv = NULL;
559 memory_region_ref(section->mr);
560 }
50c1e149
AK
561}
562
938eeb64
DDAG
563/* Used for both add and nop callbacks */
564static void vhost_region_addnop(MemoryListener *listener,
565 MemoryRegionSection *section)
04097f7c 566{
2817b260
AK
567 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
568 memory_listener);
569
c49450b9
AK
570 if (!vhost_section(section)) {
571 return;
572 }
48d7c975 573 vhost_region_add_section(dev, section);
04097f7c
AK
574}
575
375f74f4
JW
576static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
577{
578 struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n);
579 struct vhost_dev *hdev = iommu->hdev;
580 hwaddr iova = iotlb->iova + iommu->iommu_offset;
581
020e571b
MC
582 if (vhost_backend_invalidate_device_iotlb(hdev, iova,
583 iotlb->addr_mask + 1)) {
375f74f4
JW
584 error_report("Fail to invalidate device iotlb");
585 }
586}
587
588static void vhost_iommu_region_add(MemoryListener *listener,
589 MemoryRegionSection *section)
590{
591 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
592 iommu_listener);
593 struct vhost_iommu *iommu;
698feb5e 594 Int128 end;
375f74f4
JW
595
596 if (!memory_region_is_iommu(section->mr)) {
597 return;
598 }
599
600 iommu = g_malloc0(sizeof(*iommu));
698feb5e
PX
601 end = int128_add(int128_make64(section->offset_within_region),
602 section->size);
603 end = int128_sub(end, int128_one());
604 iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
605 IOMMU_NOTIFIER_UNMAP,
606 section->offset_within_region,
607 int128_get64(end));
375f74f4
JW
608 iommu->mr = section->mr;
609 iommu->iommu_offset = section->offset_within_address_space -
610 section->offset_within_region;
611 iommu->hdev = dev;
612 memory_region_register_iommu_notifier(section->mr, &iommu->n);
613 QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next);
614 /* TODO: can replay help performance here? */
615}
616
617static void vhost_iommu_region_del(MemoryListener *listener,
618 MemoryRegionSection *section)
619{
620 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
621 iommu_listener);
622 struct vhost_iommu *iommu;
623
624 if (!memory_region_is_iommu(section->mr)) {
625 return;
626 }
627
628 QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) {
698feb5e
PX
629 if (iommu->mr == section->mr &&
630 iommu->n.start == section->offset_within_region) {
375f74f4
JW
631 memory_region_unregister_iommu_notifier(iommu->mr,
632 &iommu->n);
633 QLIST_REMOVE(iommu, iommu_next);
634 g_free(iommu);
635 break;
636 }
637 }
638}
639
d5970055
MT
640static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
641 struct vhost_virtqueue *vq,
642 unsigned idx, bool enable_log)
643{
644 struct vhost_vring_addr addr = {
645 .index = idx,
2b3af999
SW
646 .desc_user_addr = (uint64_t)(unsigned long)vq->desc,
647 .avail_user_addr = (uint64_t)(unsigned long)vq->avail,
648 .used_user_addr = (uint64_t)(unsigned long)vq->used,
d5970055
MT
649 .log_guest_addr = vq->used_phys,
650 .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
651 };
21e70425 652 int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
d5970055 653 if (r < 0) {
c6409692 654 VHOST_OPS_DEBUG("vhost_set_vring_addr failed");
d5970055
MT
655 return -errno;
656 }
657 return 0;
658}
659
c471ad0e
JW
660static int vhost_dev_set_features(struct vhost_dev *dev,
661 bool enable_log)
d5970055
MT
662{
663 uint64_t features = dev->acked_features;
664 int r;
665 if (enable_log) {
9a2ba823 666 features |= 0x1ULL << VHOST_F_LOG_ALL;
d5970055 667 }
21e70425 668 r = dev->vhost_ops->vhost_set_features(dev, features);
c6409692
MAL
669 if (r < 0) {
670 VHOST_OPS_DEBUG("vhost_set_features failed");
671 }
d5970055
MT
672 return r < 0 ? -errno : 0;
673}
674
675static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
676{
162bba7f 677 int r, i, idx;
d5970055
MT
678 r = vhost_dev_set_features(dev, enable_log);
679 if (r < 0) {
680 goto err_features;
681 }
682 for (i = 0; i < dev->nvqs; ++i) {
25a2a920
TC
683 idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
684 r = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
d5970055
MT
685 enable_log);
686 if (r < 0) {
687 goto err_vq;
688 }
689 }
690 return 0;
691err_vq:
692 for (; i >= 0; --i) {
25a2a920 693 idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
162bba7f
MAL
694 vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
695 dev->log_enabled);
d5970055 696 }
162bba7f 697 vhost_dev_set_features(dev, dev->log_enabled);
d5970055
MT
698err_features:
699 return r;
700}
701
04097f7c 702static int vhost_migration_log(MemoryListener *listener, int enable)
d5970055 703{
04097f7c
AK
704 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
705 memory_listener);
d5970055
MT
706 int r;
707 if (!!enable == dev->log_enabled) {
708 return 0;
709 }
710 if (!dev->started) {
711 dev->log_enabled = enable;
712 return 0;
713 }
714 if (!enable) {
715 r = vhost_dev_set_log(dev, false);
716 if (r < 0) {
717 return r;
718 }
309750fa 719 vhost_log_put(dev, false);
d5970055
MT
720 } else {
721 vhost_dev_log_resize(dev, vhost_get_log_size(dev));
722 r = vhost_dev_set_log(dev, true);
723 if (r < 0) {
724 return r;
725 }
726 }
727 dev->log_enabled = enable;
728 return 0;
729}
730
04097f7c
AK
731static void vhost_log_global_start(MemoryListener *listener)
732{
733 int r;
734
735 r = vhost_migration_log(listener, true);
736 if (r < 0) {
737 abort();
738 }
739}
740
741static void vhost_log_global_stop(MemoryListener *listener)
742{
743 int r;
744
745 r = vhost_migration_log(listener, false);
746 if (r < 0) {
747 abort();
748 }
749}
750
751static void vhost_log_start(MemoryListener *listener,
b2dfd71c
PB
752 MemoryRegionSection *section,
753 int old, int new)
04097f7c
AK
754{
755 /* FIXME: implement */
756}
757
758static void vhost_log_stop(MemoryListener *listener,
b2dfd71c
PB
759 MemoryRegionSection *section,
760 int old, int new)
04097f7c
AK
761{
762 /* FIXME: implement */
763}
764
46f70ff1
GK
765/* The vhost driver natively knows how to handle the vrings of non
766 * cross-endian legacy devices and modern devices. Only legacy devices
767 * exposed to a bi-endian guest may require the vhost driver to use a
768 * specific endianness.
769 */
a122ab24
GK
770static inline bool vhost_needs_vring_endian(VirtIODevice *vdev)
771{
e5848123
GK
772 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
773 return false;
774 }
a122ab24 775#ifdef HOST_WORDS_BIGENDIAN
46f70ff1 776 return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_LITTLE;
a122ab24 777#else
46f70ff1 778 return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG;
a122ab24 779#endif
a122ab24
GK
780}
781
04b7a152
GK
782static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
783 bool is_big_endian,
784 int vhost_vq_index)
785{
786 struct vhost_vring_state s = {
787 .index = vhost_vq_index,
788 .num = is_big_endian
789 };
790
21e70425 791 if (!dev->vhost_ops->vhost_set_vring_endian(dev, &s)) {
04b7a152
GK
792 return 0;
793 }
794
c6409692 795 VHOST_OPS_DEBUG("vhost_set_vring_endian failed");
04b7a152
GK
796 if (errno == ENOTTY) {
797 error_report("vhost does not support cross-endian");
798 return -ENOSYS;
799 }
800
801 return -errno;
802}
803
c471ad0e
JW
804static int vhost_memory_region_lookup(struct vhost_dev *hdev,
805 uint64_t gpa, uint64_t *uaddr,
806 uint64_t *len)
807{
808 int i;
809
810 for (i = 0; i < hdev->mem->nregions; i++) {
811 struct vhost_memory_region *reg = hdev->mem->regions + i;
812
813 if (gpa >= reg->guest_phys_addr &&
814 reg->guest_phys_addr + reg->memory_size > gpa) {
815 *uaddr = reg->userspace_addr + gpa - reg->guest_phys_addr;
816 *len = reg->guest_phys_addr + reg->memory_size - gpa;
817 return 0;
818 }
819 }
820
821 return -EFAULT;
822}
823
fc58bd0d 824int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
c471ad0e
JW
825{
826 IOMMUTLBEntry iotlb;
827 uint64_t uaddr, len;
fc58bd0d 828 int ret = -EFAULT;
c471ad0e
JW
829
830 rcu_read_lock();
831
832 iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
833 iova, write);
834 if (iotlb.target_as != NULL) {
fc58bd0d
MC
835 ret = vhost_memory_region_lookup(dev, iotlb.translated_addr,
836 &uaddr, &len);
837 if (ret) {
c471ad0e
JW
838 error_report("Fail to lookup the translated address "
839 "%"PRIx64, iotlb.translated_addr);
840 goto out;
841 }
842
843 len = MIN(iotlb.addr_mask + 1, len);
844 iova = iova & ~iotlb.addr_mask;
845
020e571b
MC
846 ret = vhost_backend_update_device_iotlb(dev, iova, uaddr,
847 len, iotlb.perm);
fc58bd0d 848 if (ret) {
c471ad0e
JW
849 error_report("Fail to update device iotlb");
850 goto out;
851 }
852 }
853out:
854 rcu_read_unlock();
fc58bd0d
MC
855
856 return ret;
c471ad0e
JW
857}
858
f56a1247 859static int vhost_virtqueue_start(struct vhost_dev *dev,
d5970055
MT
860 struct VirtIODevice *vdev,
861 struct vhost_virtqueue *vq,
862 unsigned idx)
863{
96a3d98d
JW
864 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
865 VirtioBusState *vbus = VIRTIO_BUS(qbus);
866 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
a8170e5e 867 hwaddr s, l, a;
d5970055 868 int r;
21e70425 869 int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
d5970055 870 struct vhost_vring_file file = {
a9f98bb5 871 .index = vhost_vq_index
d5970055
MT
872 };
873 struct vhost_vring_state state = {
a9f98bb5 874 .index = vhost_vq_index
d5970055
MT
875 };
876 struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
877
a9f98bb5 878
d5970055 879 vq->num = state.num = virtio_queue_get_num(vdev, idx);
21e70425 880 r = dev->vhost_ops->vhost_set_vring_num(dev, &state);
d5970055 881 if (r) {
c6409692 882 VHOST_OPS_DEBUG("vhost_set_vring_num failed");
d5970055
MT
883 return -errno;
884 }
885
886 state.num = virtio_queue_get_last_avail_idx(vdev, idx);
21e70425 887 r = dev->vhost_ops->vhost_set_vring_base(dev, &state);
d5970055 888 if (r) {
c6409692 889 VHOST_OPS_DEBUG("vhost_set_vring_base failed");
d5970055
MT
890 return -errno;
891 }
892
e5848123 893 if (vhost_needs_vring_endian(vdev)) {
04b7a152
GK
894 r = vhost_virtqueue_set_vring_endian_legacy(dev,
895 virtio_is_big_endian(vdev),
896 vhost_vq_index);
897 if (r) {
898 return -errno;
899 }
900 }
901
f1f9e6c5
GK
902 vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx);
903 vq->desc_phys = a = virtio_queue_get_desc_addr(vdev, idx);
c471ad0e 904 vq->desc = vhost_memory_map(dev, a, &l, 0);
d5970055
MT
905 if (!vq->desc || l != s) {
906 r = -ENOMEM;
907 goto fail_alloc_desc;
908 }
f1f9e6c5
GK
909 vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx);
910 vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx);
c471ad0e 911 vq->avail = vhost_memory_map(dev, a, &l, 0);
d5970055
MT
912 if (!vq->avail || l != s) {
913 r = -ENOMEM;
914 goto fail_alloc_avail;
915 }
916 vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
917 vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
c471ad0e 918 vq->used = vhost_memory_map(dev, a, &l, 1);
d5970055
MT
919 if (!vq->used || l != s) {
920 r = -ENOMEM;
921 goto fail_alloc_used;
922 }
923
a9f98bb5 924 r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
d5970055
MT
925 if (r < 0) {
926 r = -errno;
927 goto fail_alloc;
928 }
a9f98bb5 929
d5970055 930 file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
21e70425 931 r = dev->vhost_ops->vhost_set_vring_kick(dev, &file);
d5970055 932 if (r) {
c6409692 933 VHOST_OPS_DEBUG("vhost_set_vring_kick failed");
c8852121 934 r = -errno;
d5970055
MT
935 goto fail_kick;
936 }
937
f56a1247
MT
938 /* Clear and discard previous events if any. */
939 event_notifier_test_and_clear(&vq->masked_notifier);
d5970055 940
5669655a
VK
941 /* Init vring in unmasked state, unless guest_notifier_mask
942 * will do it later.
943 */
944 if (!vdev->use_guest_notifier_mask) {
945 /* TODO: check and handle errors. */
946 vhost_virtqueue_mask(dev, vdev, idx, false);
947 }
948
96a3d98d
JW
949 if (k->query_guest_notifiers &&
950 k->query_guest_notifiers(qbus->parent) &&
951 virtio_queue_vector(vdev, idx) == VIRTIO_NO_VECTOR) {
952 file.fd = -1;
953 r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
954 if (r) {
955 goto fail_vector;
956 }
957 }
958
d5970055
MT
959 return 0;
960
96a3d98d 961fail_vector:
d5970055 962fail_kick:
d5970055 963fail_alloc:
c471ad0e
JW
964 vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
965 0, 0);
d5970055 966fail_alloc_used:
c471ad0e
JW
967 vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
968 0, 0);
d5970055 969fail_alloc_avail:
c471ad0e
JW
970 vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
971 0, 0);
d5970055
MT
972fail_alloc_desc:
973 return r;
974}
975
f56a1247 976static void vhost_virtqueue_stop(struct vhost_dev *dev,
d5970055
MT
977 struct VirtIODevice *vdev,
978 struct vhost_virtqueue *vq,
979 unsigned idx)
980{
21e70425 981 int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
d5970055 982 struct vhost_vring_state state = {
04b7a152 983 .index = vhost_vq_index,
d5970055
MT
984 };
985 int r;
fc57fd99 986
21e70425 987 r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
d5970055 988 if (r < 0) {
c6409692 989 VHOST_OPS_DEBUG("vhost VQ %d ring restore failed: %d", idx, r);
2ae39a11
MC
990 /* Connection to the backend is broken, so let's sync internal
991 * last avail idx to the device used idx.
992 */
993 virtio_queue_restore_last_avail_idx(vdev, idx);
499c5579
MAL
994 } else {
995 virtio_queue_set_last_avail_idx(vdev, idx, state.num);
d5970055 996 }
3561ba14 997 virtio_queue_invalidate_signalled_used(vdev, idx);
aa94d521 998 virtio_queue_update_used_idx(vdev, idx);
04b7a152
GK
999
1000 /* In the cross-endian case, we need to reset the vring endianness to
1001 * native as legacy devices expect so by default.
1002 */
e5848123 1003 if (vhost_needs_vring_endian(vdev)) {
162bba7f
MAL
1004 vhost_virtqueue_set_vring_endian_legacy(dev,
1005 !virtio_is_big_endian(vdev),
1006 vhost_vq_index);
04b7a152
GK
1007 }
1008
c471ad0e
JW
1009 vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
1010 1, virtio_queue_get_used_size(vdev, idx));
1011 vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
1012 0, virtio_queue_get_avail_size(vdev, idx));
1013 vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
1014 0, virtio_queue_get_desc_size(vdev, idx));
d5970055
MT
1015}
1016
80a1ea37
AK
1017static void vhost_eventfd_add(MemoryListener *listener,
1018 MemoryRegionSection *section,
753d5e14 1019 bool match_data, uint64_t data, EventNotifier *e)
80a1ea37
AK
1020{
1021}
1022
1023static void vhost_eventfd_del(MemoryListener *listener,
1024 MemoryRegionSection *section,
753d5e14 1025 bool match_data, uint64_t data, EventNotifier *e)
80a1ea37
AK
1026{
1027}
1028
69e87b32
JW
1029static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
1030 int n, uint32_t timeout)
1031{
1032 int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
1033 struct vhost_vring_state state = {
1034 .index = vhost_vq_index,
1035 .num = timeout,
1036 };
1037 int r;
1038
1039 if (!dev->vhost_ops->vhost_set_vring_busyloop_timeout) {
1040 return -EINVAL;
1041 }
1042
1043 r = dev->vhost_ops->vhost_set_vring_busyloop_timeout(dev, &state);
1044 if (r) {
c6409692 1045 VHOST_OPS_DEBUG("vhost_set_vring_busyloop_timeout failed");
69e87b32
JW
1046 return r;
1047 }
1048
1049 return 0;
1050}
1051
f56a1247
MT
1052static int vhost_virtqueue_init(struct vhost_dev *dev,
1053 struct vhost_virtqueue *vq, int n)
1054{
21e70425 1055 int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
f56a1247 1056 struct vhost_vring_file file = {
b931bfbf 1057 .index = vhost_vq_index,
f56a1247
MT
1058 };
1059 int r = event_notifier_init(&vq->masked_notifier, 0);
1060 if (r < 0) {
1061 return r;
1062 }
1063
1064 file.fd = event_notifier_get_fd(&vq->masked_notifier);
21e70425 1065 r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
f56a1247 1066 if (r) {
c6409692 1067 VHOST_OPS_DEBUG("vhost_set_vring_call failed");
f56a1247
MT
1068 r = -errno;
1069 goto fail_call;
1070 }
c471ad0e
JW
1071
1072 vq->dev = dev;
1073
f56a1247
MT
1074 return 0;
1075fail_call:
1076 event_notifier_cleanup(&vq->masked_notifier);
1077 return r;
1078}
1079
1080static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
1081{
1082 event_notifier_cleanup(&vq->masked_notifier);
1083}
1084
81647a65 1085int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
69e87b32 1086 VhostBackendType backend_type, uint32_t busyloop_timeout)
d5970055
MT
1087{
1088 uint64_t features;
a06db3ec 1089 int i, r, n_initialized_vqs = 0;
fe44dc91 1090 Error *local_err = NULL;
81647a65 1091
c471ad0e 1092 hdev->vdev = NULL;
d2fc4402
MAL
1093 hdev->migration_blocker = NULL;
1094
7cb8a9b9
MAL
1095 r = vhost_set_backend_type(hdev, backend_type);
1096 assert(r >= 0);
1a1bfac9 1097
7cb8a9b9
MAL
1098 r = hdev->vhost_ops->vhost_backend_init(hdev, opaque);
1099 if (r < 0) {
1100 goto fail;
24d1eb33
NN
1101 }
1102
aebf8168 1103 if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
4afba631
MAL
1104 error_report("vhost backend memory slots limit is less"
1105 " than current number of present memory slots");
7cb8a9b9
MAL
1106 r = -1;
1107 goto fail;
aebf8168 1108 }
2ce68e4c 1109
21e70425 1110 r = hdev->vhost_ops->vhost_set_owner(hdev);
d5970055 1111 if (r < 0) {
c6409692 1112 VHOST_OPS_DEBUG("vhost_set_owner failed");
d5970055
MT
1113 goto fail;
1114 }
1115
21e70425 1116 r = hdev->vhost_ops->vhost_get_features(hdev, &features);
d5970055 1117 if (r < 0) {
c6409692 1118 VHOST_OPS_DEBUG("vhost_get_features failed");
d5970055
MT
1119 goto fail;
1120 }
f56a1247 1121
a06db3ec 1122 for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
b931bfbf 1123 r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
f56a1247 1124 if (r < 0) {
a06db3ec 1125 goto fail;
f56a1247
MT
1126 }
1127 }
69e87b32
JW
1128
1129 if (busyloop_timeout) {
1130 for (i = 0; i < hdev->nvqs; ++i) {
1131 r = vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i,
1132 busyloop_timeout);
1133 if (r < 0) {
1134 goto fail_busyloop;
1135 }
1136 }
1137 }
1138
d5970055
MT
1139 hdev->features = features;
1140
04097f7c 1141 hdev->memory_listener = (MemoryListener) {
50c1e149
AK
1142 .begin = vhost_begin,
1143 .commit = vhost_commit,
938eeb64
DDAG
1144 .region_add = vhost_region_addnop,
1145 .region_nop = vhost_region_addnop,
04097f7c
AK
1146 .log_start = vhost_log_start,
1147 .log_stop = vhost_log_stop,
1148 .log_sync = vhost_log_sync,
1149 .log_global_start = vhost_log_global_start,
1150 .log_global_stop = vhost_log_global_stop,
80a1ea37
AK
1151 .eventfd_add = vhost_eventfd_add,
1152 .eventfd_del = vhost_eventfd_del,
72e22d2f 1153 .priority = 10
04097f7c 1154 };
d2fc4402 1155
375f74f4
JW
1156 hdev->iommu_listener = (MemoryListener) {
1157 .region_add = vhost_iommu_region_add,
1158 .region_del = vhost_iommu_region_del,
1159 };
c471ad0e 1160
d2fc4402
MAL
1161 if (hdev->migration_blocker == NULL) {
1162 if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
1163 error_setg(&hdev->migration_blocker,
1164 "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
0d34fbab 1165 } else if (vhost_dev_log_is_shared(hdev) && !qemu_memfd_check()) {
31190ed7
MAL
1166 error_setg(&hdev->migration_blocker,
1167 "Migration disabled: failed to allocate shared memory");
d2fc4402
MAL
1168 }
1169 }
1170
1171 if (hdev->migration_blocker != NULL) {
fe44dc91
AA
1172 r = migrate_add_blocker(hdev->migration_blocker, &local_err);
1173 if (local_err) {
1174 error_report_err(local_err);
1175 error_free(hdev->migration_blocker);
1176 goto fail_busyloop;
1177 }
7145872e 1178 }
d2fc4402 1179
7267c094 1180 hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
2817b260
AK
1181 hdev->n_mem_sections = 0;
1182 hdev->mem_sections = NULL;
d5970055
MT
1183 hdev->log = NULL;
1184 hdev->log_size = 0;
1185 hdev->log_enabled = false;
1186 hdev->started = false;
f6790af6 1187 memory_listener_register(&hdev->memory_listener, &address_space_memory);
5be5f9be 1188 QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
d5970055 1189 return 0;
a06db3ec 1190
69e87b32
JW
1191fail_busyloop:
1192 while (--i >= 0) {
1193 vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i, 0);
1194 }
d5970055 1195fail:
a06db3ec
MAL
1196 hdev->nvqs = n_initialized_vqs;
1197 vhost_dev_cleanup(hdev);
d5970055
MT
1198 return r;
1199}
1200
1201void vhost_dev_cleanup(struct vhost_dev *hdev)
1202{
f56a1247 1203 int i;
e0547b59 1204
f56a1247
MT
1205 for (i = 0; i < hdev->nvqs; ++i) {
1206 vhost_virtqueue_cleanup(hdev->vqs + i);
1207 }
5be5f9be
MAL
1208 if (hdev->mem) {
1209 /* those are only safe after successful init */
1210 memory_listener_unregister(&hdev->memory_listener);
1211 QLIST_REMOVE(hdev, entry);
1212 }
7145872e
MT
1213 if (hdev->migration_blocker) {
1214 migrate_del_blocker(hdev->migration_blocker);
1215 error_free(hdev->migration_blocker);
1216 }
7267c094 1217 g_free(hdev->mem);
2817b260 1218 g_free(hdev->mem_sections);
e0547b59
MAL
1219 if (hdev->vhost_ops) {
1220 hdev->vhost_ops->vhost_backend_cleanup(hdev);
1221 }
7b527247 1222 assert(!hdev->log);
e0547b59
MAL
1223
1224 memset(hdev, 0, sizeof(struct vhost_dev));
d5970055
MT
1225}
1226
b0b3db79
MT
1227/* Stop processing guest IO notifications in qemu.
1228 * Start processing them in vhost in kernel.
1229 */
1230int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
1231{
1c819449 1232 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
16617e36 1233 int i, r, e;
4afba631 1234
310837de
PB
1235 /* We will pass the notifiers to the kernel, make sure that QEMU
1236 * doesn't interfere.
1237 */
1238 r = virtio_device_grab_ioeventfd(vdev);
1239 if (r < 0) {
4afba631 1240 error_report("binding does not support host notifiers");
b0b3db79
MT
1241 goto fail;
1242 }
1243
1244 for (i = 0; i < hdev->nvqs; ++i) {
b1f0a33d
CH
1245 r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
1246 true);
b0b3db79 1247 if (r < 0) {
4afba631 1248 error_report("vhost VQ %d notifier binding failed: %d", i, -r);
b0b3db79
MT
1249 goto fail_vq;
1250 }
1251 }
1252
1253 return 0;
1254fail_vq:
1255 while (--i >= 0) {
b1f0a33d
CH
1256 e = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
1257 false);
16617e36 1258 if (e < 0) {
4afba631 1259 error_report("vhost VQ %d notifier cleanup error: %d", i, -r);
b0b3db79 1260 }
16617e36 1261 assert (e >= 0);
76143618 1262 virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i);
b0b3db79 1263 }
310837de 1264 virtio_device_release_ioeventfd(vdev);
b0b3db79
MT
1265fail:
1266 return r;
1267}
1268
1269/* Stop processing guest IO notifications in vhost.
1270 * Start processing them in qemu.
1271 * This might actually run the qemu handlers right away,
1272 * so virtio in qemu must be completely setup when this is called.
1273 */
1274void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
1275{
1c819449 1276 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
b0b3db79
MT
1277 int i, r;
1278
1279 for (i = 0; i < hdev->nvqs; ++i) {
b1f0a33d
CH
1280 r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
1281 false);
b0b3db79 1282 if (r < 0) {
4afba631 1283 error_report("vhost VQ %d notifier cleanup failed: %d", i, -r);
b0b3db79
MT
1284 }
1285 assert (r >= 0);
76143618 1286 virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i);
b0b3db79 1287 }
310837de 1288 virtio_device_release_ioeventfd(vdev);
b0b3db79
MT
1289}
1290
f56a1247
MT
1291/* Test and clear event pending status.
1292 * Should be called after unmask to avoid losing events.
1293 */
1294bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
1295{
a9f98bb5 1296 struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
a9f98bb5 1297 assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
f56a1247
MT
1298 return event_notifier_test_and_clear(&vq->masked_notifier);
1299}
1300
1301/* Mask/unmask events from this vq. */
1302void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
1303 bool mask)
1304{
1305 struct VirtQueue *vvq = virtio_get_queue(vdev, n);
a9f98bb5 1306 int r, index = n - hdev->vq_index;
fc57fd99 1307 struct vhost_vring_file file;
f56a1247 1308
8695de0f
MAL
1309 /* should only be called after backend is connected */
1310 assert(hdev->vhost_ops);
1311
f56a1247 1312 if (mask) {
5669655a 1313 assert(vdev->use_guest_notifier_mask);
a9f98bb5 1314 file.fd = event_notifier_get_fd(&hdev->vqs[index].masked_notifier);
f56a1247
MT
1315 } else {
1316 file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
1317 }
fc57fd99 1318
21e70425
MAL
1319 file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n);
1320 r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file);
162bba7f
MAL
1321 if (r < 0) {
1322 VHOST_OPS_DEBUG("vhost_set_vring_call failed");
1323 }
f56a1247
MT
1324}
1325
9a2ba823
CH
1326uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
1327 uint64_t features)
2e6d46d7
NN
1328{
1329 const int *bit = feature_bits;
1330 while (*bit != VHOST_INVALID_FEATURE_BIT) {
9a2ba823 1331 uint64_t bit_mask = (1ULL << *bit);
2e6d46d7
NN
1332 if (!(hdev->features & bit_mask)) {
1333 features &= ~bit_mask;
1334 }
1335 bit++;
1336 }
1337 return features;
1338}
1339
1340void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
9a2ba823 1341 uint64_t features)
2e6d46d7
NN
1342{
1343 const int *bit = feature_bits;
1344 while (*bit != VHOST_INVALID_FEATURE_BIT) {
9a2ba823 1345 uint64_t bit_mask = (1ULL << *bit);
2e6d46d7
NN
1346 if (features & bit_mask) {
1347 hdev->acked_features |= bit_mask;
1348 }
1349 bit++;
1350 }
1351}
1352
4c3e257b
CL
1353int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
1354 uint32_t config_len)
1355{
1356 assert(hdev->vhost_ops);
1357
1358 if (hdev->vhost_ops->vhost_get_config) {
1359 return hdev->vhost_ops->vhost_get_config(hdev, config, config_len);
1360 }
1361
1362 return -1;
1363}
1364
1365int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data,
1366 uint32_t offset, uint32_t size, uint32_t flags)
1367{
1368 assert(hdev->vhost_ops);
1369
1370 if (hdev->vhost_ops->vhost_set_config) {
1371 return hdev->vhost_ops->vhost_set_config(hdev, data, offset,
1372 size, flags);
1373 }
1374
1375 return -1;
1376}
1377
1378void vhost_dev_set_config_notifier(struct vhost_dev *hdev,
1379 const VhostDevConfigOps *ops)
1380{
1381 assert(hdev->vhost_ops);
1382 hdev->config_ops = ops;
1383}
1384
b0b3db79 1385/* Host notifiers must be enabled at this point. */
d5970055
MT
1386int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
1387{
1388 int i, r;
24f4fe34 1389
8695de0f
MAL
1390 /* should only be called after backend is connected */
1391 assert(hdev->vhost_ops);
1392
24f4fe34 1393 hdev->started = true;
c471ad0e 1394 hdev->vdev = vdev;
24f4fe34 1395
d5970055
MT
1396 r = vhost_dev_set_features(hdev, hdev->log_enabled);
1397 if (r < 0) {
54dd9321 1398 goto fail_features;
d5970055 1399 }
c471ad0e
JW
1400
1401 if (vhost_dev_has_iommu(hdev)) {
375f74f4 1402 memory_listener_register(&hdev->iommu_listener, vdev->dma_as);
c471ad0e
JW
1403 }
1404
21e70425 1405 r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
d5970055 1406 if (r < 0) {
c6409692 1407 VHOST_OPS_DEBUG("vhost_set_mem_table failed");
d5970055 1408 r = -errno;
54dd9321 1409 goto fail_mem;
d5970055 1410 }
d154e0ba 1411 for (i = 0; i < hdev->nvqs; ++i) {
f56a1247 1412 r = vhost_virtqueue_start(hdev,
a9f98bb5
JW
1413 vdev,
1414 hdev->vqs + i,
1415 hdev->vq_index + i);
d154e0ba
MT
1416 if (r < 0) {
1417 goto fail_vq;
1418 }
1419 }
1420
d5970055 1421 if (hdev->log_enabled) {
e05ca820
MT
1422 uint64_t log_base;
1423
d5970055 1424 hdev->log_size = vhost_get_log_size(hdev);
15324404
MAL
1425 hdev->log = vhost_log_get(hdev->log_size,
1426 vhost_dev_log_is_shared(hdev));
309750fa 1427 log_base = (uintptr_t)hdev->log->log;
c2bea314 1428 r = hdev->vhost_ops->vhost_set_log_base(hdev,
9a78a5dd
MAL
1429 hdev->log_size ? log_base : 0,
1430 hdev->log);
d5970055 1431 if (r < 0) {
c6409692 1432 VHOST_OPS_DEBUG("vhost_set_log_base failed");
d5970055 1433 r = -errno;
54dd9321 1434 goto fail_log;
d5970055
MT
1435 }
1436 }
d154e0ba 1437
c471ad0e
JW
1438 if (vhost_dev_has_iommu(hdev)) {
1439 hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true);
1440
1441 /* Update used ring information for IOTLB to work correctly,
1442 * vhost-kernel code requires for this.*/
1443 for (i = 0; i < hdev->nvqs; ++i) {
1444 struct vhost_virtqueue *vq = hdev->vqs + i;
1445 vhost_device_iotlb_miss(hdev, vq->used_phys, true);
1446 }
1447 }
d5970055 1448 return 0;
54dd9321 1449fail_log:
24bfa207 1450 vhost_log_put(hdev, false);
d5970055
MT
1451fail_vq:
1452 while (--i >= 0) {
f56a1247 1453 vhost_virtqueue_stop(hdev,
a9f98bb5
JW
1454 vdev,
1455 hdev->vqs + i,
1456 hdev->vq_index + i);
d5970055 1457 }
a9f98bb5 1458 i = hdev->nvqs;
c471ad0e 1459
54dd9321
MT
1460fail_mem:
1461fail_features:
24f4fe34
MT
1462
1463 hdev->started = false;
d5970055
MT
1464 return r;
1465}
1466
b0b3db79 1467/* Host notifiers must be enabled at this point. */
d5970055
MT
1468void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
1469{
a9f98bb5 1470 int i;
54dd9321 1471
8695de0f
MAL
1472 /* should only be called after backend is connected */
1473 assert(hdev->vhost_ops);
1474
d5970055 1475 for (i = 0; i < hdev->nvqs; ++i) {
f56a1247 1476 vhost_virtqueue_stop(hdev,
a9f98bb5
JW
1477 vdev,
1478 hdev->vqs + i,
1479 hdev->vq_index + i);
d5970055 1480 }
54dd9321 1481
c471ad0e
JW
1482 if (vhost_dev_has_iommu(hdev)) {
1483 hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
375f74f4 1484 memory_listener_unregister(&hdev->iommu_listener);
c471ad0e 1485 }
309750fa 1486 vhost_log_put(hdev, true);
d5970055 1487 hdev->started = false;
c471ad0e 1488 hdev->vdev = NULL;
d5970055 1489}
950d94ba
MAL
1490
1491int vhost_net_set_backend(struct vhost_dev *hdev,
1492 struct vhost_vring_file *file)
1493{
1494 if (hdev->vhost_ops->vhost_net_set_backend) {
1495 return hdev->vhost_ops->vhost_net_set_backend(hdev, file);
1496 }
1497
1498 return -1;
1499}