]>
Commit | Line | Data |
---|---|---|
10857ec0 EP |
1 | /* |
2 | * vhost shadow virtqueue | |
3 | * | |
4 | * SPDX-FileCopyrightText: Red Hat, Inc. 2021 | |
5 | * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com> | |
6 | * | |
7 | * SPDX-License-Identifier: GPL-2.0-or-later | |
8 | */ | |
9 | ||
10 | #include "qemu/osdep.h" | |
11 | #include "hw/virtio/vhost-shadow-virtqueue.h" | |
12 | ||
13 | #include "qemu/error-report.h" | |
4725a418 | 14 | #include "qapi/error.h" |
dff4426f | 15 | #include "qemu/main-loop.h" |
100890f7 EP |
16 | #include "qemu/log.h" |
17 | #include "qemu/memalign.h" | |
dff4426f EP |
18 | #include "linux-headers/linux/vhost.h" |
19 | ||
4725a418 EP |
20 | /** |
21 | * Validate the transport device features that both guests can use with the SVQ | |
22 | * and SVQs can use with the device. | |
23 | * | |
24 | * @dev_features: The features | |
25 | * @errp: Error pointer | |
26 | */ | |
27 | bool vhost_svq_valid_features(uint64_t features, Error **errp) | |
28 | { | |
29 | bool ok = true; | |
30 | uint64_t svq_features = features; | |
31 | ||
32 | for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END; | |
33 | ++b) { | |
34 | switch (b) { | |
35 | case VIRTIO_F_ANY_LAYOUT: | |
36 | continue; | |
37 | ||
38 | case VIRTIO_F_ACCESS_PLATFORM: | |
39 | /* SVQ trust in the host's IOMMU to translate addresses */ | |
40 | case VIRTIO_F_VERSION_1: | |
41 | /* SVQ trust that the guest vring is little endian */ | |
42 | if (!(svq_features & BIT_ULL(b))) { | |
43 | svq_features |= BIT_ULL(b); | |
44 | ok = false; | |
45 | } | |
46 | continue; | |
47 | ||
48 | default: | |
49 | if (svq_features & BIT_ULL(b)) { | |
50 | svq_features &= ~BIT_ULL(b); | |
51 | ok = false; | |
52 | } | |
53 | } | |
54 | } | |
55 | ||
56 | if (!ok) { | |
57 | error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64 | |
58 | ", ok: 0x%"PRIx64, features, svq_features); | |
59 | } | |
60 | return ok; | |
61 | } | |
62 | ||
dff4426f | 63 | /** |
100890f7 EP |
64 | * Number of descriptors that the SVQ can make available from the guest. |
65 | * | |
66 | * @svq: The svq | |
67 | */ | |
68 | static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) | |
69 | { | |
70 | return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx); | |
71 | } | |
72 | ||
34e3c94e EP |
73 | /** |
74 | * Translate addresses between the qemu's virtual address and the SVQ IOVA | |
75 | * | |
76 | * @svq: Shadow VirtQueue | |
77 | * @vaddr: Translated IOVA addresses | |
78 | * @iovec: Source qemu's VA addresses | |
79 | * @num: Length of iovec and minimum length of vaddr | |
80 | */ | |
81 | static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, | |
82 | hwaddr *addrs, const struct iovec *iovec, | |
83 | size_t num) | |
84 | { | |
85 | if (num == 0) { | |
86 | return true; | |
87 | } | |
88 | ||
89 | for (size_t i = 0; i < num; ++i) { | |
90 | DMAMap needle = { | |
91 | .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, | |
92 | .size = iovec[i].iov_len, | |
93 | }; | |
94 | Int128 needle_last, map_last; | |
95 | size_t off; | |
96 | ||
97 | const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); | |
98 | /* | |
99 | * Map cannot be NULL since iova map contains all guest space and | |
100 | * qemu already has a physical address mapped | |
101 | */ | |
102 | if (unlikely(!map)) { | |
103 | qemu_log_mask(LOG_GUEST_ERROR, | |
104 | "Invalid address 0x%"HWADDR_PRIx" given by guest", | |
105 | needle.translated_addr); | |
106 | return false; | |
107 | } | |
108 | ||
109 | off = needle.translated_addr - map->translated_addr; | |
110 | addrs[i] = map->iova + off; | |
111 | ||
112 | needle_last = int128_add(int128_make64(needle.translated_addr), | |
113 | int128_make64(iovec[i].iov_len)); | |
114 | map_last = int128_make64(map->translated_addr + map->size); | |
115 | if (unlikely(int128_gt(needle_last, map_last))) { | |
116 | qemu_log_mask(LOG_GUEST_ERROR, | |
117 | "Guest buffer expands over iova range"); | |
118 | return false; | |
119 | } | |
120 | } | |
121 | ||
122 | return true; | |
123 | } | |
124 | ||
009c2549 EP |
125 | /** |
126 | * Write descriptors to SVQ vring | |
127 | * | |
128 | * @svq: The shadow virtqueue | |
129 | * @sg: Cache for hwaddr | |
130 | * @iovec: The iovec from the guest | |
131 | * @num: iovec length | |
132 | * @more_descs: True if more descriptors come in the chain | |
133 | * @write: True if they are writeable descriptors | |
134 | * | |
135 | * Return true if success, false otherwise and print error. | |
136 | */ | |
137 | static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, | |
138 | const struct iovec *iovec, size_t num, | |
139 | bool more_descs, bool write) | |
100890f7 EP |
140 | { |
141 | uint16_t i = svq->free_head, last = svq->free_head; | |
142 | unsigned n; | |
143 | uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; | |
144 | vring_desc_t *descs = svq->vring.desc; | |
009c2549 | 145 | bool ok; |
100890f7 EP |
146 | |
147 | if (num == 0) { | |
009c2549 EP |
148 | return true; |
149 | } | |
150 | ||
151 | ok = vhost_svq_translate_addr(svq, sg, iovec, num); | |
152 | if (unlikely(!ok)) { | |
153 | return false; | |
100890f7 EP |
154 | } |
155 | ||
156 | for (n = 0; n < num; n++) { | |
157 | if (more_descs || (n + 1 < num)) { | |
158 | descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); | |
495fe3a7 | 159 | descs[i].next = cpu_to_le16(svq->desc_next[i]); |
100890f7 EP |
160 | } else { |
161 | descs[i].flags = flags; | |
162 | } | |
34e3c94e | 163 | descs[i].addr = cpu_to_le64(sg[n]); |
100890f7 EP |
164 | descs[i].len = cpu_to_le32(iovec[n].iov_len); |
165 | ||
166 | last = i; | |
495fe3a7 | 167 | i = cpu_to_le16(svq->desc_next[i]); |
100890f7 EP |
168 | } |
169 | ||
495fe3a7 | 170 | svq->free_head = le16_to_cpu(svq->desc_next[last]); |
009c2549 | 171 | return true; |
100890f7 EP |
172 | } |
173 | ||
174 | static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, | |
175 | VirtQueueElement *elem, unsigned *head) | |
176 | { | |
177 | unsigned avail_idx; | |
178 | vring_avail_t *avail = svq->vring.avail; | |
34e3c94e EP |
179 | bool ok; |
180 | g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num)); | |
100890f7 EP |
181 | |
182 | *head = svq->free_head; | |
183 | ||
184 | /* We need some descriptors here */ | |
185 | if (unlikely(!elem->out_num && !elem->in_num)) { | |
186 | qemu_log_mask(LOG_GUEST_ERROR, | |
187 | "Guest provided element with no descriptors"); | |
188 | return false; | |
189 | } | |
190 | ||
009c2549 EP |
191 | ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, |
192 | elem->in_num > 0, false); | |
34e3c94e EP |
193 | if (unlikely(!ok)) { |
194 | return false; | |
195 | } | |
34e3c94e | 196 | |
009c2549 EP |
197 | ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, |
198 | true); | |
34e3c94e EP |
199 | if (unlikely(!ok)) { |
200 | return false; | |
201 | } | |
202 | ||
100890f7 EP |
203 | /* |
204 | * Put the entry in the available array (but don't update avail->idx until | |
205 | * they do sync). | |
206 | */ | |
207 | avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1); | |
208 | avail->ring[avail_idx] = cpu_to_le16(*head); | |
209 | svq->shadow_avail_idx++; | |
210 | ||
211 | /* Update the avail index after write the descriptor */ | |
212 | smp_wmb(); | |
213 | avail->idx = cpu_to_le16(svq->shadow_avail_idx); | |
214 | ||
215 | return true; | |
216 | } | |
217 | ||
5181db13 EP |
218 | /** |
219 | * Add an element to a SVQ. | |
220 | * | |
221 | * The caller must check that there is enough slots for the new element. It | |
222 | * takes ownership of the element: In case of failure, it is free and the SVQ | |
223 | * is considered broken. | |
224 | */ | |
100890f7 EP |
225 | static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) |
226 | { | |
227 | unsigned qemu_head; | |
228 | bool ok = vhost_svq_add_split(svq, elem, &qemu_head); | |
229 | if (unlikely(!ok)) { | |
5181db13 | 230 | g_free(elem); |
100890f7 EP |
231 | return false; |
232 | } | |
233 | ||
234 | svq->ring_id_maps[qemu_head] = elem; | |
235 | return true; | |
236 | } | |
237 | ||
238 | static void vhost_svq_kick(VhostShadowVirtqueue *svq) | |
239 | { | |
240 | /* | |
241 | * We need to expose the available array entries before checking the used | |
242 | * flags | |
243 | */ | |
244 | smp_mb(); | |
245 | if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { | |
246 | return; | |
247 | } | |
248 | ||
249 | event_notifier_set(&svq->hdev_kick); | |
250 | } | |
251 | ||
252 | /** | |
253 | * Forward available buffers. | |
254 | * | |
255 | * @svq: Shadow VirtQueue | |
256 | * | |
257 | * Note that this function does not guarantee that all guest's available | |
258 | * buffers are available to the device in SVQ avail ring. The guest may have | |
259 | * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in | |
260 | * qemu vaddr. | |
261 | * | |
262 | * If that happens, guest's kick notifications will be disabled until the | |
263 | * device uses some buffers. | |
264 | */ | |
265 | static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) | |
266 | { | |
267 | /* Clear event notifier */ | |
268 | event_notifier_test_and_clear(&svq->svq_kick); | |
269 | ||
270 | /* Forward to the device as many available buffers as possible */ | |
271 | do { | |
272 | virtio_queue_set_notification(svq->vq, false); | |
273 | ||
274 | while (true) { | |
275 | VirtQueueElement *elem; | |
276 | bool ok; | |
277 | ||
278 | if (svq->next_guest_avail_elem) { | |
279 | elem = g_steal_pointer(&svq->next_guest_avail_elem); | |
280 | } else { | |
281 | elem = virtqueue_pop(svq->vq, sizeof(*elem)); | |
282 | } | |
283 | ||
284 | if (!elem) { | |
285 | break; | |
286 | } | |
287 | ||
288 | if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) { | |
289 | /* | |
290 | * This condition is possible since a contiguous buffer in GPA | |
291 | * does not imply a contiguous buffer in qemu's VA | |
292 | * scatter-gather segments. If that happens, the buffer exposed | |
293 | * to the device needs to be a chain of descriptors at this | |
294 | * moment. | |
295 | * | |
296 | * SVQ cannot hold more available buffers if we are here: | |
297 | * queue the current guest descriptor and ignore further kicks | |
298 | * until some elements are used. | |
299 | */ | |
300 | svq->next_guest_avail_elem = elem; | |
301 | return; | |
302 | } | |
303 | ||
304 | ok = vhost_svq_add(svq, elem); | |
305 | if (unlikely(!ok)) { | |
306 | /* VQ is broken, just return and ignore any other kicks */ | |
307 | return; | |
308 | } | |
309 | vhost_svq_kick(svq); | |
310 | } | |
311 | ||
312 | virtio_queue_set_notification(svq->vq, true); | |
313 | } while (!virtio_queue_empty(svq->vq)); | |
314 | } | |
315 | ||
316 | /** | |
317 | * Handle guest's kick. | |
dff4426f EP |
318 | * |
319 | * @n: guest kick event notifier, the one that guest set to notify svq. | |
320 | */ | |
100890f7 | 321 | static void vhost_handle_guest_kick_notifier(EventNotifier *n) |
dff4426f EP |
322 | { |
323 | VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick); | |
324 | event_notifier_test_and_clear(n); | |
100890f7 EP |
325 | vhost_handle_guest_kick(svq); |
326 | } | |
327 | ||
328 | static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) | |
329 | { | |
c381abc3 | 330 | uint16_t *used_idx = &svq->vring.used->idx; |
100890f7 EP |
331 | if (svq->last_used_idx != svq->shadow_used_idx) { |
332 | return true; | |
333 | } | |
334 | ||
c381abc3 | 335 | svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx); |
100890f7 EP |
336 | |
337 | return svq->last_used_idx != svq->shadow_used_idx; | |
dff4426f EP |
338 | } |
339 | ||
a8ac8858 | 340 | /** |
100890f7 EP |
341 | * Enable vhost device calls after disable them. |
342 | * | |
343 | * @svq: The svq | |
344 | * | |
345 | * It returns false if there are pending used buffers from the vhost device, | |
346 | * avoiding the possible races between SVQ checking for more work and enabling | |
347 | * callbacks. True if SVQ used vring has no more pending buffers. | |
348 | */ | |
349 | static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq) | |
350 | { | |
351 | svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); | |
352 | /* Make sure the flag is written before the read of used_idx */ | |
353 | smp_mb(); | |
354 | return !vhost_svq_more_used(svq); | |
355 | } | |
356 | ||
357 | static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) | |
358 | { | |
359 | svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); | |
360 | } | |
361 | ||
81abfa57 EP |
362 | static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, |
363 | uint16_t num, uint16_t i) | |
364 | { | |
365 | for (uint16_t j = 0; j < (num - 1); ++j) { | |
366 | i = le16_to_cpu(svq->desc_next[i]); | |
367 | } | |
368 | ||
369 | return i; | |
370 | } | |
371 | ||
100890f7 EP |
372 | static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, |
373 | uint32_t *len) | |
374 | { | |
100890f7 EP |
375 | const vring_used_t *used = svq->vring.used; |
376 | vring_used_elem_t used_elem; | |
81abfa57 | 377 | uint16_t last_used, last_used_chain, num; |
100890f7 EP |
378 | |
379 | if (!vhost_svq_more_used(svq)) { | |
380 | return NULL; | |
381 | } | |
382 | ||
383 | /* Only get used array entries after they have been exposed by dev */ | |
384 | smp_rmb(); | |
385 | last_used = svq->last_used_idx & (svq->vring.num - 1); | |
386 | used_elem.id = le32_to_cpu(used->ring[last_used].id); | |
387 | used_elem.len = le32_to_cpu(used->ring[last_used].len); | |
388 | ||
389 | svq->last_used_idx++; | |
390 | if (unlikely(used_elem.id >= svq->vring.num)) { | |
391 | qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used", | |
392 | svq->vdev->name, used_elem.id); | |
393 | return NULL; | |
394 | } | |
395 | ||
396 | if (unlikely(!svq->ring_id_maps[used_elem.id])) { | |
397 | qemu_log_mask(LOG_GUEST_ERROR, | |
398 | "Device %s says index %u is used, but it was not available", | |
399 | svq->vdev->name, used_elem.id); | |
400 | return NULL; | |
401 | } | |
402 | ||
81abfa57 EP |
403 | num = svq->ring_id_maps[used_elem.id]->in_num + |
404 | svq->ring_id_maps[used_elem.id]->out_num; | |
405 | last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); | |
406 | svq->desc_next[last_used_chain] = svq->free_head; | |
100890f7 EP |
407 | svq->free_head = used_elem.id; |
408 | ||
409 | *len = used_elem.len; | |
410 | return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); | |
411 | } | |
412 | ||
413 | static void vhost_svq_flush(VhostShadowVirtqueue *svq, | |
414 | bool check_for_avail_queue) | |
415 | { | |
416 | VirtQueue *vq = svq->vq; | |
417 | ||
418 | /* Forward as many used buffers as possible. */ | |
419 | do { | |
420 | unsigned i = 0; | |
421 | ||
422 | vhost_svq_disable_notification(svq); | |
423 | while (true) { | |
424 | uint32_t len; | |
425 | g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); | |
426 | if (!elem) { | |
427 | break; | |
428 | } | |
429 | ||
430 | if (unlikely(i >= svq->vring.num)) { | |
431 | qemu_log_mask(LOG_GUEST_ERROR, | |
432 | "More than %u used buffers obtained in a %u size SVQ", | |
433 | i, svq->vring.num); | |
434 | virtqueue_fill(vq, elem, len, i); | |
435 | virtqueue_flush(vq, i); | |
436 | return; | |
437 | } | |
438 | virtqueue_fill(vq, elem, len, i++); | |
439 | } | |
440 | ||
441 | virtqueue_flush(vq, i); | |
442 | event_notifier_set(&svq->svq_call); | |
443 | ||
444 | if (check_for_avail_queue && svq->next_guest_avail_elem) { | |
445 | /* | |
446 | * Avail ring was full when vhost_svq_flush was called, so it's a | |
447 | * good moment to make more descriptors available if possible. | |
448 | */ | |
449 | vhost_handle_guest_kick(svq); | |
450 | } | |
451 | } while (!vhost_svq_enable_notification(svq)); | |
452 | } | |
453 | ||
454 | /** | |
455 | * Forward used buffers. | |
a8ac8858 EP |
456 | * |
457 | * @n: hdev call event notifier, the one that device set to notify svq. | |
100890f7 EP |
458 | * |
459 | * Note that we are not making any buffers available in the loop, there is no | |
460 | * way that it runs more than virtqueue size times. | |
a8ac8858 EP |
461 | */ |
462 | static void vhost_svq_handle_call(EventNotifier *n) | |
463 | { | |
464 | VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, | |
465 | hdev_call); | |
466 | event_notifier_test_and_clear(n); | |
100890f7 | 467 | vhost_svq_flush(svq, true); |
a8ac8858 EP |
468 | } |
469 | ||
470 | /** | |
471 | * Set the call notifier for the SVQ to call the guest | |
472 | * | |
473 | * @svq: Shadow virtqueue | |
474 | * @call_fd: call notifier | |
475 | * | |
476 | * Called on BQL context. | |
477 | */ | |
478 | void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd) | |
479 | { | |
480 | if (call_fd == VHOST_FILE_UNBIND) { | |
481 | /* | |
482 | * Fail event_notifier_set if called handling device call. | |
483 | * | |
484 | * SVQ still needs device notifications, since it needs to keep | |
485 | * forwarding used buffers even with the unbind. | |
486 | */ | |
487 | memset(&svq->svq_call, 0, sizeof(svq->svq_call)); | |
488 | } else { | |
489 | event_notifier_init_fd(&svq->svq_call, call_fd); | |
490 | } | |
491 | } | |
492 | ||
dafb34c9 EP |
493 | /** |
494 | * Get the shadow vq vring address. | |
495 | * @svq: Shadow virtqueue | |
496 | * @addr: Destination to store address | |
497 | */ | |
498 | void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, | |
499 | struct vhost_vring_addr *addr) | |
500 | { | |
34e3c94e EP |
501 | addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc; |
502 | addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail; | |
503 | addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used; | |
dafb34c9 EP |
504 | } |
505 | ||
506 | size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) | |
507 | { | |
508 | size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; | |
509 | size_t avail_size = offsetof(vring_avail_t, ring) + | |
510 | sizeof(uint16_t) * svq->vring.num; | |
511 | ||
8e3b0cbb | 512 | return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size()); |
dafb34c9 EP |
513 | } |
514 | ||
515 | size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) | |
516 | { | |
517 | size_t used_size = offsetof(vring_used_t, ring) + | |
518 | sizeof(vring_used_elem_t) * svq->vring.num; | |
8e3b0cbb | 519 | return ROUND_UP(used_size, qemu_real_host_page_size()); |
dafb34c9 EP |
520 | } |
521 | ||
dff4426f EP |
522 | /** |
523 | * Set a new file descriptor for the guest to kick the SVQ and notify for avail | |
524 | * | |
525 | * @svq: The svq | |
526 | * @svq_kick_fd: The svq kick fd | |
527 | * | |
528 | * Note that the SVQ will never close the old file descriptor. | |
529 | */ | |
530 | void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) | |
531 | { | |
532 | EventNotifier *svq_kick = &svq->svq_kick; | |
533 | bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick); | |
534 | bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND; | |
535 | ||
536 | if (poll_stop) { | |
537 | event_notifier_set_handler(svq_kick, NULL); | |
538 | } | |
539 | ||
540 | /* | |
541 | * event_notifier_set_handler already checks for guest's notifications if | |
542 | * they arrive at the new file descriptor in the switch, so there is no | |
543 | * need to explicitly check for them. | |
544 | */ | |
545 | if (poll_start) { | |
546 | event_notifier_init_fd(svq_kick, svq_kick_fd); | |
547 | event_notifier_set(svq_kick); | |
100890f7 EP |
548 | event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); |
549 | } | |
550 | } | |
551 | ||
552 | /** | |
553 | * Start the shadow virtqueue operation. | |
554 | * | |
555 | * @svq: Shadow Virtqueue | |
556 | * @vdev: VirtIO device | |
557 | * @vq: Virtqueue to shadow | |
558 | */ | |
559 | void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, | |
560 | VirtQueue *vq) | |
561 | { | |
562 | size_t desc_size, driver_size, device_size; | |
563 | ||
564 | svq->next_guest_avail_elem = NULL; | |
565 | svq->shadow_avail_idx = 0; | |
566 | svq->shadow_used_idx = 0; | |
567 | svq->last_used_idx = 0; | |
568 | svq->vdev = vdev; | |
569 | svq->vq = vq; | |
570 | ||
571 | svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); | |
572 | driver_size = vhost_svq_driver_area_size(svq); | |
573 | device_size = vhost_svq_device_area_size(svq); | |
8e3b0cbb | 574 | svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), driver_size); |
100890f7 EP |
575 | desc_size = sizeof(vring_desc_t) * svq->vring.num; |
576 | svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); | |
577 | memset(svq->vring.desc, 0, driver_size); | |
8e3b0cbb | 578 | svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); |
100890f7 EP |
579 | memset(svq->vring.used, 0, device_size); |
580 | svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); | |
495fe3a7 | 581 | svq->desc_next = g_new0(uint16_t, svq->vring.num); |
100890f7 | 582 | for (unsigned i = 0; i < svq->vring.num - 1; i++) { |
495fe3a7 | 583 | svq->desc_next[i] = cpu_to_le16(i + 1); |
dff4426f EP |
584 | } |
585 | } | |
586 | ||
587 | /** | |
588 | * Stop the shadow virtqueue operation. | |
589 | * @svq: Shadow Virtqueue | |
590 | */ | |
591 | void vhost_svq_stop(VhostShadowVirtqueue *svq) | |
592 | { | |
593 | event_notifier_set_handler(&svq->svq_kick, NULL); | |
100890f7 EP |
594 | g_autofree VirtQueueElement *next_avail_elem = NULL; |
595 | ||
596 | if (!svq->vq) { | |
597 | return; | |
598 | } | |
599 | ||
600 | /* Send all pending used descriptors to guest */ | |
601 | vhost_svq_flush(svq, false); | |
602 | ||
603 | for (unsigned i = 0; i < svq->vring.num; ++i) { | |
604 | g_autofree VirtQueueElement *elem = NULL; | |
605 | elem = g_steal_pointer(&svq->ring_id_maps[i]); | |
606 | if (elem) { | |
607 | virtqueue_detach_element(svq->vq, elem, 0); | |
608 | } | |
609 | } | |
610 | ||
611 | next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem); | |
612 | if (next_avail_elem) { | |
613 | virtqueue_detach_element(svq->vq, next_avail_elem, 0); | |
614 | } | |
615 | svq->vq = NULL; | |
495fe3a7 | 616 | g_free(svq->desc_next); |
100890f7 EP |
617 | g_free(svq->ring_id_maps); |
618 | qemu_vfree(svq->vring.desc); | |
619 | qemu_vfree(svq->vring.used); | |
dff4426f | 620 | } |
10857ec0 EP |
621 | |
622 | /** | |
623 | * Creates vhost shadow virtqueue, and instructs the vhost device to use the | |
624 | * shadow methods and file descriptors. | |
625 | * | |
34e3c94e EP |
626 | * @iova_tree: Tree to perform descriptors translations |
627 | * | |
10857ec0 EP |
628 | * Returns the new virtqueue or NULL. |
629 | * | |
630 | * In case of error, reason is reported through error_report. | |
631 | */ | |
34e3c94e | 632 | VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) |
10857ec0 EP |
633 | { |
634 | g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); | |
635 | int r; | |
636 | ||
637 | r = event_notifier_init(&svq->hdev_kick, 0); | |
638 | if (r != 0) { | |
639 | error_report("Couldn't create kick event notifier: %s (%d)", | |
640 | g_strerror(errno), errno); | |
641 | goto err_init_hdev_kick; | |
642 | } | |
643 | ||
644 | r = event_notifier_init(&svq->hdev_call, 0); | |
645 | if (r != 0) { | |
646 | error_report("Couldn't create call event notifier: %s (%d)", | |
647 | g_strerror(errno), errno); | |
648 | goto err_init_hdev_call; | |
649 | } | |
650 | ||
dff4426f | 651 | event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); |
a8ac8858 | 652 | event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); |
34e3c94e | 653 | svq->iova_tree = iova_tree; |
10857ec0 EP |
654 | return g_steal_pointer(&svq); |
655 | ||
656 | err_init_hdev_call: | |
657 | event_notifier_cleanup(&svq->hdev_kick); | |
658 | ||
659 | err_init_hdev_kick: | |
660 | return NULL; | |
661 | } | |
662 | ||
663 | /** | |
664 | * Free the resources of the shadow virtqueue. | |
665 | * | |
666 | * @pvq: gpointer to SVQ so it can be used by autofree functions. | |
667 | */ | |
668 | void vhost_svq_free(gpointer pvq) | |
669 | { | |
670 | VhostShadowVirtqueue *vq = pvq; | |
dff4426f | 671 | vhost_svq_stop(vq); |
10857ec0 | 672 | event_notifier_cleanup(&vq->hdev_kick); |
a8ac8858 | 673 | event_notifier_set_handler(&vq->hdev_call, NULL); |
10857ec0 EP |
674 | event_notifier_cleanup(&vq->hdev_call); |
675 | g_free(vq); | |
676 | } |