]>
Commit | Line | Data |
---|---|---|
10857ec0 EP |
1 | /* |
2 | * vhost shadow virtqueue | |
3 | * | |
4 | * SPDX-FileCopyrightText: Red Hat, Inc. 2021 | |
5 | * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com> | |
6 | * | |
7 | * SPDX-License-Identifier: GPL-2.0-or-later | |
8 | */ | |
9 | ||
10 | #include "qemu/osdep.h" | |
11 | #include "hw/virtio/vhost-shadow-virtqueue.h" | |
12 | ||
13 | #include "qemu/error-report.h" | |
4725a418 | 14 | #include "qapi/error.h" |
dff4426f | 15 | #include "qemu/main-loop.h" |
100890f7 EP |
16 | #include "qemu/log.h" |
17 | #include "qemu/memalign.h" | |
dff4426f EP |
18 | #include "linux-headers/linux/vhost.h" |
19 | ||
4725a418 EP |
20 | /** |
21 | * Validate the transport device features that both guests can use with the SVQ | |
22 | * and SVQs can use with the device. | |
23 | * | |
24 | * @dev_features: The features | |
25 | * @errp: Error pointer | |
26 | */ | |
27 | bool vhost_svq_valid_features(uint64_t features, Error **errp) | |
28 | { | |
29 | bool ok = true; | |
30 | uint64_t svq_features = features; | |
31 | ||
32 | for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END; | |
33 | ++b) { | |
34 | switch (b) { | |
35 | case VIRTIO_F_ANY_LAYOUT: | |
396d5126 | 36 | case VIRTIO_RING_F_EVENT_IDX: |
4725a418 EP |
37 | continue; |
38 | ||
39 | case VIRTIO_F_ACCESS_PLATFORM: | |
40 | /* SVQ trust in the host's IOMMU to translate addresses */ | |
41 | case VIRTIO_F_VERSION_1: | |
42 | /* SVQ trust that the guest vring is little endian */ | |
43 | if (!(svq_features & BIT_ULL(b))) { | |
44 | svq_features |= BIT_ULL(b); | |
45 | ok = false; | |
46 | } | |
47 | continue; | |
48 | ||
49 | default: | |
50 | if (svq_features & BIT_ULL(b)) { | |
51 | svq_features &= ~BIT_ULL(b); | |
52 | ok = false; | |
53 | } | |
54 | } | |
55 | } | |
56 | ||
57 | if (!ok) { | |
58 | error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64 | |
59 | ", ok: 0x%"PRIx64, features, svq_features); | |
60 | } | |
61 | return ok; | |
62 | } | |
63 | ||
dff4426f | 64 | /** |
100890f7 EP |
65 | * Number of descriptors that the SVQ can make available from the guest. |
66 | * | |
67 | * @svq: The svq | |
68 | */ | |
99d6a324 | 69 | uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) |
100890f7 | 70 | { |
5d410557 | 71 | return svq->num_free; |
100890f7 EP |
72 | } |
73 | ||
34e3c94e EP |
74 | /** |
75 | * Translate addresses between the qemu's virtual address and the SVQ IOVA | |
76 | * | |
77 | * @svq: Shadow VirtQueue | |
78 | * @vaddr: Translated IOVA addresses | |
79 | * @iovec: Source qemu's VA addresses | |
80 | * @num: Length of iovec and minimum length of vaddr | |
81 | */ | |
82 | static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, | |
83 | hwaddr *addrs, const struct iovec *iovec, | |
84 | size_t num) | |
85 | { | |
86 | if (num == 0) { | |
87 | return true; | |
88 | } | |
89 | ||
90 | for (size_t i = 0; i < num; ++i) { | |
91 | DMAMap needle = { | |
92 | .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, | |
93 | .size = iovec[i].iov_len, | |
94 | }; | |
95 | Int128 needle_last, map_last; | |
96 | size_t off; | |
97 | ||
98 | const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); | |
99 | /* | |
100 | * Map cannot be NULL since iova map contains all guest space and | |
101 | * qemu already has a physical address mapped | |
102 | */ | |
103 | if (unlikely(!map)) { | |
104 | qemu_log_mask(LOG_GUEST_ERROR, | |
105 | "Invalid address 0x%"HWADDR_PRIx" given by guest", | |
106 | needle.translated_addr); | |
107 | return false; | |
108 | } | |
109 | ||
110 | off = needle.translated_addr - map->translated_addr; | |
111 | addrs[i] = map->iova + off; | |
112 | ||
113 | needle_last = int128_add(int128_make64(needle.translated_addr), | |
b77a5f22 | 114 | int128_makes64(iovec[i].iov_len - 1)); |
34e3c94e EP |
115 | map_last = int128_make64(map->translated_addr + map->size); |
116 | if (unlikely(int128_gt(needle_last, map_last))) { | |
117 | qemu_log_mask(LOG_GUEST_ERROR, | |
118 | "Guest buffer expands over iova range"); | |
119 | return false; | |
120 | } | |
121 | } | |
122 | ||
123 | return true; | |
124 | } | |
125 | ||
009c2549 EP |
126 | /** |
127 | * Write descriptors to SVQ vring | |
128 | * | |
129 | * @svq: The shadow virtqueue | |
130 | * @sg: Cache for hwaddr | |
131 | * @iovec: The iovec from the guest | |
132 | * @num: iovec length | |
133 | * @more_descs: True if more descriptors come in the chain | |
134 | * @write: True if they are writeable descriptors | |
135 | * | |
136 | * Return true if success, false otherwise and print error. | |
137 | */ | |
138 | static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, | |
139 | const struct iovec *iovec, size_t num, | |
140 | bool more_descs, bool write) | |
100890f7 EP |
141 | { |
142 | uint16_t i = svq->free_head, last = svq->free_head; | |
143 | unsigned n; | |
144 | uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; | |
145 | vring_desc_t *descs = svq->vring.desc; | |
009c2549 | 146 | bool ok; |
100890f7 EP |
147 | |
148 | if (num == 0) { | |
009c2549 EP |
149 | return true; |
150 | } | |
151 | ||
152 | ok = vhost_svq_translate_addr(svq, sg, iovec, num); | |
153 | if (unlikely(!ok)) { | |
154 | return false; | |
100890f7 EP |
155 | } |
156 | ||
157 | for (n = 0; n < num; n++) { | |
158 | if (more_descs || (n + 1 < num)) { | |
159 | descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); | |
495fe3a7 | 160 | descs[i].next = cpu_to_le16(svq->desc_next[i]); |
100890f7 EP |
161 | } else { |
162 | descs[i].flags = flags; | |
163 | } | |
34e3c94e | 164 | descs[i].addr = cpu_to_le64(sg[n]); |
100890f7 EP |
165 | descs[i].len = cpu_to_le32(iovec[n].iov_len); |
166 | ||
167 | last = i; | |
495fe3a7 | 168 | i = cpu_to_le16(svq->desc_next[i]); |
100890f7 EP |
169 | } |
170 | ||
495fe3a7 | 171 | svq->free_head = le16_to_cpu(svq->desc_next[last]); |
009c2549 | 172 | return true; |
100890f7 EP |
173 | } |
174 | ||
175 | static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, | |
1f46ae65 EP |
176 | const struct iovec *out_sg, size_t out_num, |
177 | const struct iovec *in_sg, size_t in_num, | |
178 | unsigned *head) | |
100890f7 EP |
179 | { |
180 | unsigned avail_idx; | |
181 | vring_avail_t *avail = svq->vring.avail; | |
34e3c94e | 182 | bool ok; |
1f46ae65 | 183 | g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num)); |
100890f7 EP |
184 | |
185 | *head = svq->free_head; | |
186 | ||
187 | /* We need some descriptors here */ | |
1f46ae65 | 188 | if (unlikely(!out_num && !in_num)) { |
100890f7 EP |
189 | qemu_log_mask(LOG_GUEST_ERROR, |
190 | "Guest provided element with no descriptors"); | |
191 | return false; | |
192 | } | |
193 | ||
1f46ae65 EP |
194 | ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, |
195 | false); | |
34e3c94e EP |
196 | if (unlikely(!ok)) { |
197 | return false; | |
198 | } | |
34e3c94e | 199 | |
1f46ae65 | 200 | ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); |
34e3c94e EP |
201 | if (unlikely(!ok)) { |
202 | return false; | |
203 | } | |
204 | ||
100890f7 EP |
205 | /* |
206 | * Put the entry in the available array (but don't update avail->idx until | |
207 | * they do sync). | |
208 | */ | |
209 | avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1); | |
210 | avail->ring[avail_idx] = cpu_to_le16(*head); | |
211 | svq->shadow_avail_idx++; | |
212 | ||
213 | /* Update the avail index after write the descriptor */ | |
214 | smp_wmb(); | |
215 | avail->idx = cpu_to_le16(svq->shadow_avail_idx); | |
216 | ||
217 | return true; | |
218 | } | |
219 | ||
d93a2405 EP |
220 | static void vhost_svq_kick(VhostShadowVirtqueue *svq) |
221 | { | |
22a6840f EP |
222 | bool needs_kick; |
223 | ||
d93a2405 EP |
224 | /* |
225 | * We need to expose the available array entries before checking the used | |
226 | * flags | |
227 | */ | |
228 | smp_mb(); | |
22a6840f EP |
229 | |
230 | if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { | |
231 | uint16_t avail_event = *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]); | |
232 | needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1); | |
233 | } else { | |
234 | needs_kick = !(svq->vring.used->flags & VRING_USED_F_NO_NOTIFY); | |
235 | } | |
236 | ||
237 | if (!needs_kick) { | |
d93a2405 EP |
238 | return; |
239 | } | |
240 | ||
241 | event_notifier_set(&svq->hdev_kick); | |
242 | } | |
243 | ||
5181db13 EP |
244 | /** |
245 | * Add an element to a SVQ. | |
246 | * | |
f20b70eb | 247 | * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full |
5181db13 | 248 | */ |
d0291f3f EP |
249 | int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, |
250 | size_t out_num, const struct iovec *in_sg, size_t in_num, | |
251 | VirtQueueElement *elem) | |
100890f7 EP |
252 | { |
253 | unsigned qemu_head; | |
1f46ae65 | 254 | unsigned ndescs = in_num + out_num; |
f20b70eb EP |
255 | bool ok; |
256 | ||
257 | if (unlikely(ndescs > vhost_svq_available_slots(svq))) { | |
258 | return -ENOSPC; | |
259 | } | |
260 | ||
1f46ae65 | 261 | ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); |
100890f7 | 262 | if (unlikely(!ok)) { |
f20b70eb | 263 | return -EINVAL; |
100890f7 EP |
264 | } |
265 | ||
5d410557 | 266 | svq->num_free -= ndescs; |
9e87868f | 267 | svq->desc_state[qemu_head].elem = elem; |
ac4cfdc6 | 268 | svq->desc_state[qemu_head].ndescs = ndescs; |
98b5adef | 269 | vhost_svq_kick(svq); |
f20b70eb | 270 | return 0; |
100890f7 EP |
271 | } |
272 | ||
1f46ae65 EP |
273 | /* Convenience wrapper to add a guest's element to SVQ */ |
274 | static int vhost_svq_add_element(VhostShadowVirtqueue *svq, | |
275 | VirtQueueElement *elem) | |
276 | { | |
277 | return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg, | |
278 | elem->in_num, elem); | |
279 | } | |
280 | ||
100890f7 EP |
281 | /** |
282 | * Forward available buffers. | |
283 | * | |
284 | * @svq: Shadow VirtQueue | |
285 | * | |
286 | * Note that this function does not guarantee that all guest's available | |
287 | * buffers are available to the device in SVQ avail ring. The guest may have | |
288 | * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in | |
289 | * qemu vaddr. | |
290 | * | |
291 | * If that happens, guest's kick notifications will be disabled until the | |
292 | * device uses some buffers. | |
293 | */ | |
294 | static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) | |
295 | { | |
296 | /* Clear event notifier */ | |
297 | event_notifier_test_and_clear(&svq->svq_kick); | |
298 | ||
299 | /* Forward to the device as many available buffers as possible */ | |
300 | do { | |
301 | virtio_queue_set_notification(svq->vq, false); | |
302 | ||
303 | while (true) { | |
ad9f958d | 304 | g_autofree VirtQueueElement *elem = NULL; |
f20b70eb | 305 | int r; |
100890f7 EP |
306 | |
307 | if (svq->next_guest_avail_elem) { | |
308 | elem = g_steal_pointer(&svq->next_guest_avail_elem); | |
309 | } else { | |
310 | elem = virtqueue_pop(svq->vq, sizeof(*elem)); | |
311 | } | |
312 | ||
313 | if (!elem) { | |
314 | break; | |
315 | } | |
316 | ||
e966c0b7 EP |
317 | if (svq->ops) { |
318 | r = svq->ops->avail_handler(svq, elem, svq->ops_opaque); | |
319 | } else { | |
320 | r = vhost_svq_add_element(svq, elem); | |
321 | } | |
f20b70eb EP |
322 | if (unlikely(r != 0)) { |
323 | if (r == -ENOSPC) { | |
324 | /* | |
325 | * This condition is possible since a contiguous buffer in | |
326 | * GPA does not imply a contiguous buffer in qemu's VA | |
327 | * scatter-gather segments. If that happens, the buffer | |
328 | * exposed to the device needs to be a chain of descriptors | |
329 | * at this moment. | |
330 | * | |
331 | * SVQ cannot hold more available buffers if we are here: | |
332 | * queue the current guest descriptor and ignore kicks | |
333 | * until some elements are used. | |
334 | */ | |
9c2ab2f1 | 335 | svq->next_guest_avail_elem = g_steal_pointer(&elem); |
f20b70eb EP |
336 | } |
337 | ||
338 | /* VQ is full or broken, just return and ignore kicks */ | |
100890f7 EP |
339 | return; |
340 | } | |
9c2ab2f1 EP |
341 | /* elem belongs to SVQ or external caller now */ |
342 | elem = NULL; | |
100890f7 EP |
343 | } |
344 | ||
345 | virtio_queue_set_notification(svq->vq, true); | |
346 | } while (!virtio_queue_empty(svq->vq)); | |
347 | } | |
348 | ||
349 | /** | |
350 | * Handle guest's kick. | |
dff4426f EP |
351 | * |
352 | * @n: guest kick event notifier, the one that guest set to notify svq. | |
353 | */ | |
100890f7 | 354 | static void vhost_handle_guest_kick_notifier(EventNotifier *n) |
dff4426f EP |
355 | { |
356 | VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick); | |
357 | event_notifier_test_and_clear(n); | |
100890f7 EP |
358 | vhost_handle_guest_kick(svq); |
359 | } | |
360 | ||
361 | static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) | |
362 | { | |
c381abc3 | 363 | uint16_t *used_idx = &svq->vring.used->idx; |
100890f7 EP |
364 | if (svq->last_used_idx != svq->shadow_used_idx) { |
365 | return true; | |
366 | } | |
367 | ||
c381abc3 | 368 | svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx); |
100890f7 EP |
369 | |
370 | return svq->last_used_idx != svq->shadow_used_idx; | |
dff4426f EP |
371 | } |
372 | ||
a8ac8858 | 373 | /** |
100890f7 EP |
374 | * Enable vhost device calls after disable them. |
375 | * | |
376 | * @svq: The svq | |
377 | * | |
378 | * It returns false if there are pending used buffers from the vhost device, | |
379 | * avoiding the possible races between SVQ checking for more work and enabling | |
380 | * callbacks. True if SVQ used vring has no more pending buffers. | |
381 | */ | |
382 | static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq) | |
383 | { | |
01f8beac EP |
384 | if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { |
385 | uint16_t *used_event = (uint16_t *)&svq->vring.avail->ring[svq->vring.num]; | |
386 | *used_event = svq->shadow_used_idx; | |
387 | } else { | |
388 | svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); | |
389 | } | |
390 | ||
391 | /* Make sure the event is enabled before the read of used_idx */ | |
100890f7 EP |
392 | smp_mb(); |
393 | return !vhost_svq_more_used(svq); | |
394 | } | |
395 | ||
396 | static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) | |
397 | { | |
01f8beac EP |
398 | /* |
399 | * No need to disable notification in the event idx case, since used event | |
400 | * index is already an index too far away. | |
401 | */ | |
402 | if (!virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { | |
403 | svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); | |
404 | } | |
100890f7 EP |
405 | } |
406 | ||
81abfa57 EP |
407 | static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, |
408 | uint16_t num, uint16_t i) | |
409 | { | |
410 | for (uint16_t j = 0; j < (num - 1); ++j) { | |
411 | i = le16_to_cpu(svq->desc_next[i]); | |
412 | } | |
413 | ||
414 | return i; | |
415 | } | |
416 | ||
100890f7 EP |
417 | static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, |
418 | uint32_t *len) | |
419 | { | |
100890f7 EP |
420 | const vring_used_t *used = svq->vring.used; |
421 | vring_used_elem_t used_elem; | |
81abfa57 | 422 | uint16_t last_used, last_used_chain, num; |
100890f7 EP |
423 | |
424 | if (!vhost_svq_more_used(svq)) { | |
425 | return NULL; | |
426 | } | |
427 | ||
428 | /* Only get used array entries after they have been exposed by dev */ | |
429 | smp_rmb(); | |
430 | last_used = svq->last_used_idx & (svq->vring.num - 1); | |
431 | used_elem.id = le32_to_cpu(used->ring[last_used].id); | |
432 | used_elem.len = le32_to_cpu(used->ring[last_used].len); | |
433 | ||
434 | svq->last_used_idx++; | |
435 | if (unlikely(used_elem.id >= svq->vring.num)) { | |
436 | qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used", | |
437 | svq->vdev->name, used_elem.id); | |
438 | return NULL; | |
439 | } | |
440 | ||
86f5f254 | 441 | if (unlikely(!svq->desc_state[used_elem.id].ndescs)) { |
100890f7 EP |
442 | qemu_log_mask(LOG_GUEST_ERROR, |
443 | "Device %s says index %u is used, but it was not available", | |
444 | svq->vdev->name, used_elem.id); | |
445 | return NULL; | |
446 | } | |
447 | ||
ac4cfdc6 | 448 | num = svq->desc_state[used_elem.id].ndescs; |
86f5f254 | 449 | svq->desc_state[used_elem.id].ndescs = 0; |
81abfa57 EP |
450 | last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); |
451 | svq->desc_next[last_used_chain] = svq->free_head; | |
100890f7 | 452 | svq->free_head = used_elem.id; |
5d410557 | 453 | svq->num_free += num; |
100890f7 EP |
454 | |
455 | *len = used_elem.len; | |
9e87868f | 456 | return g_steal_pointer(&svq->desc_state[used_elem.id].elem); |
100890f7 EP |
457 | } |
458 | ||
432efd14 EP |
459 | /** |
460 | * Push an element to SVQ, returning it to the guest. | |
461 | */ | |
462 | void vhost_svq_push_elem(VhostShadowVirtqueue *svq, | |
463 | const VirtQueueElement *elem, uint32_t len) | |
464 | { | |
465 | virtqueue_push(svq->vq, elem, len); | |
466 | if (svq->next_guest_avail_elem) { | |
467 | /* | |
468 | * Avail ring was full when vhost_svq_flush was called, so it's a | |
469 | * good moment to make more descriptors available if possible. | |
470 | */ | |
471 | vhost_handle_guest_kick(svq); | |
472 | } | |
473 | } | |
474 | ||
100890f7 EP |
475 | static void vhost_svq_flush(VhostShadowVirtqueue *svq, |
476 | bool check_for_avail_queue) | |
477 | { | |
478 | VirtQueue *vq = svq->vq; | |
479 | ||
480 | /* Forward as many used buffers as possible. */ | |
481 | do { | |
482 | unsigned i = 0; | |
483 | ||
484 | vhost_svq_disable_notification(svq); | |
485 | while (true) { | |
486 | uint32_t len; | |
487 | g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); | |
488 | if (!elem) { | |
489 | break; | |
490 | } | |
491 | ||
492 | if (unlikely(i >= svq->vring.num)) { | |
493 | qemu_log_mask(LOG_GUEST_ERROR, | |
494 | "More than %u used buffers obtained in a %u size SVQ", | |
495 | i, svq->vring.num); | |
496 | virtqueue_fill(vq, elem, len, i); | |
497 | virtqueue_flush(vq, i); | |
498 | return; | |
499 | } | |
500 | virtqueue_fill(vq, elem, len, i++); | |
501 | } | |
502 | ||
503 | virtqueue_flush(vq, i); | |
504 | event_notifier_set(&svq->svq_call); | |
505 | ||
506 | if (check_for_avail_queue && svq->next_guest_avail_elem) { | |
507 | /* | |
508 | * Avail ring was full when vhost_svq_flush was called, so it's a | |
509 | * good moment to make more descriptors available if possible. | |
510 | */ | |
511 | vhost_handle_guest_kick(svq); | |
512 | } | |
513 | } while (!vhost_svq_enable_notification(svq)); | |
514 | } | |
515 | ||
3f44d13d | 516 | /** |
b0de17a2 HJ |
517 | * Poll the SVQ to wait for the device to use the specified number |
518 | * of elements and return the total length written by the device. | |
3f44d13d EP |
519 | * |
520 | * This function race with main event loop SVQ polling, so extra | |
521 | * synchronization is needed. | |
522 | * | |
b0de17a2 HJ |
523 | * @svq: The svq |
524 | * @num: The number of elements that need to be used | |
3f44d13d | 525 | */ |
b0de17a2 | 526 | size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num) |
3f44d13d | 527 | { |
b0de17a2 HJ |
528 | size_t len = 0; |
529 | uint32_t r; | |
d368c0b0 | 530 | |
b0de17a2 HJ |
531 | while (num--) { |
532 | int64_t start_us = g_get_monotonic_time(); | |
3f44d13d | 533 | |
b0de17a2 HJ |
534 | do { |
535 | if (vhost_svq_more_used(svq)) { | |
536 | break; | |
537 | } | |
538 | ||
539 | if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { | |
540 | return len; | |
541 | } | |
542 | } while (true); | |
543 | ||
544 | vhost_svq_get_buf(svq, &r); | |
545 | len += r; | |
546 | } | |
d368c0b0 | 547 | |
d368c0b0 | 548 | return len; |
3f44d13d EP |
549 | } |
550 | ||
100890f7 EP |
551 | /** |
552 | * Forward used buffers. | |
a8ac8858 EP |
553 | * |
554 | * @n: hdev call event notifier, the one that device set to notify svq. | |
100890f7 EP |
555 | * |
556 | * Note that we are not making any buffers available in the loop, there is no | |
557 | * way that it runs more than virtqueue size times. | |
a8ac8858 EP |
558 | */ |
559 | static void vhost_svq_handle_call(EventNotifier *n) | |
560 | { | |
561 | VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, | |
562 | hdev_call); | |
563 | event_notifier_test_and_clear(n); | |
100890f7 | 564 | vhost_svq_flush(svq, true); |
a8ac8858 EP |
565 | } |
566 | ||
567 | /** | |
568 | * Set the call notifier for the SVQ to call the guest | |
569 | * | |
570 | * @svq: Shadow virtqueue | |
571 | * @call_fd: call notifier | |
572 | * | |
573 | * Called on BQL context. | |
574 | */ | |
575 | void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd) | |
576 | { | |
577 | if (call_fd == VHOST_FILE_UNBIND) { | |
578 | /* | |
579 | * Fail event_notifier_set if called handling device call. | |
580 | * | |
581 | * SVQ still needs device notifications, since it needs to keep | |
582 | * forwarding used buffers even with the unbind. | |
583 | */ | |
584 | memset(&svq->svq_call, 0, sizeof(svq->svq_call)); | |
585 | } else { | |
586 | event_notifier_init_fd(&svq->svq_call, call_fd); | |
587 | } | |
588 | } | |
589 | ||
dafb34c9 EP |
590 | /** |
591 | * Get the shadow vq vring address. | |
592 | * @svq: Shadow virtqueue | |
593 | * @addr: Destination to store address | |
594 | */ | |
595 | void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, | |
596 | struct vhost_vring_addr *addr) | |
597 | { | |
34e3c94e EP |
598 | addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc; |
599 | addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail; | |
600 | addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used; | |
dafb34c9 EP |
601 | } |
602 | ||
603 | size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) | |
604 | { | |
605 | size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; | |
f0c48e05 EP |
606 | size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) + |
607 | sizeof(uint16_t); | |
dafb34c9 | 608 | |
8e3b0cbb | 609 | return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size()); |
dafb34c9 EP |
610 | } |
611 | ||
612 | size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) | |
613 | { | |
f0c48e05 EP |
614 | size_t used_size = offsetof(vring_used_t, ring[svq->vring.num]) + |
615 | sizeof(uint16_t); | |
8e3b0cbb | 616 | return ROUND_UP(used_size, qemu_real_host_page_size()); |
dafb34c9 EP |
617 | } |
618 | ||
dff4426f EP |
619 | /** |
620 | * Set a new file descriptor for the guest to kick the SVQ and notify for avail | |
621 | * | |
622 | * @svq: The svq | |
623 | * @svq_kick_fd: The svq kick fd | |
624 | * | |
625 | * Note that the SVQ will never close the old file descriptor. | |
626 | */ | |
627 | void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) | |
628 | { | |
629 | EventNotifier *svq_kick = &svq->svq_kick; | |
630 | bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick); | |
631 | bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND; | |
632 | ||
633 | if (poll_stop) { | |
634 | event_notifier_set_handler(svq_kick, NULL); | |
635 | } | |
636 | ||
8b64e486 | 637 | event_notifier_init_fd(svq_kick, svq_kick_fd); |
dff4426f EP |
638 | /* |
639 | * event_notifier_set_handler already checks for guest's notifications if | |
640 | * they arrive at the new file descriptor in the switch, so there is no | |
641 | * need to explicitly check for them. | |
642 | */ | |
643 | if (poll_start) { | |
dff4426f | 644 | event_notifier_set(svq_kick); |
100890f7 EP |
645 | event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); |
646 | } | |
647 | } | |
648 | ||
649 | /** | |
650 | * Start the shadow virtqueue operation. | |
651 | * | |
652 | * @svq: Shadow Virtqueue | |
653 | * @vdev: VirtIO device | |
654 | * @vq: Virtqueue to shadow | |
5fde952b | 655 | * @iova_tree: Tree to perform descriptors translations |
100890f7 EP |
656 | */ |
657 | void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, | |
5fde952b | 658 | VirtQueue *vq, VhostIOVATree *iova_tree) |
100890f7 | 659 | { |
babf8b87 | 660 | size_t desc_size; |
100890f7 | 661 | |
20e7412b | 662 | event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); |
100890f7 EP |
663 | svq->next_guest_avail_elem = NULL; |
664 | svq->shadow_avail_idx = 0; | |
665 | svq->shadow_used_idx = 0; | |
666 | svq->last_used_idx = 0; | |
667 | svq->vdev = vdev; | |
668 | svq->vq = vq; | |
5fde952b | 669 | svq->iova_tree = iova_tree; |
100890f7 EP |
670 | |
671 | svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); | |
5d410557 | 672 | svq->num_free = svq->vring.num; |
babf8b87 EP |
673 | svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), |
674 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, | |
675 | -1, 0); | |
100890f7 EP |
676 | desc_size = sizeof(vring_desc_t) * svq->vring.num; |
677 | svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); | |
babf8b87 EP |
678 | svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), |
679 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, | |
680 | -1, 0); | |
9e87868f | 681 | svq->desc_state = g_new0(SVQDescState, svq->vring.num); |
495fe3a7 | 682 | svq->desc_next = g_new0(uint16_t, svq->vring.num); |
100890f7 | 683 | for (unsigned i = 0; i < svq->vring.num - 1; i++) { |
495fe3a7 | 684 | svq->desc_next[i] = cpu_to_le16(i + 1); |
dff4426f EP |
685 | } |
686 | } | |
687 | ||
688 | /** | |
689 | * Stop the shadow virtqueue operation. | |
690 | * @svq: Shadow Virtqueue | |
691 | */ | |
692 | void vhost_svq_stop(VhostShadowVirtqueue *svq) | |
693 | { | |
8b64e486 | 694 | vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND); |
100890f7 EP |
695 | g_autofree VirtQueueElement *next_avail_elem = NULL; |
696 | ||
697 | if (!svq->vq) { | |
698 | return; | |
699 | } | |
700 | ||
701 | /* Send all pending used descriptors to guest */ | |
702 | vhost_svq_flush(svq, false); | |
703 | ||
704 | for (unsigned i = 0; i < svq->vring.num; ++i) { | |
705 | g_autofree VirtQueueElement *elem = NULL; | |
9e87868f | 706 | elem = g_steal_pointer(&svq->desc_state[i].elem); |
100890f7 | 707 | if (elem) { |
4241e8bd EP |
708 | /* |
709 | * TODO: This is ok for networking, but other kinds of devices | |
710 | * might have problems with just unpop these. | |
711 | */ | |
712 | virtqueue_unpop(svq->vq, elem, 0); | |
100890f7 EP |
713 | } |
714 | } | |
715 | ||
716 | next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem); | |
717 | if (next_avail_elem) { | |
4241e8bd | 718 | virtqueue_unpop(svq->vq, next_avail_elem, 0); |
100890f7 EP |
719 | } |
720 | svq->vq = NULL; | |
495fe3a7 | 721 | g_free(svq->desc_next); |
9e87868f | 722 | g_free(svq->desc_state); |
babf8b87 EP |
723 | munmap(svq->vring.desc, vhost_svq_driver_area_size(svq)); |
724 | munmap(svq->vring.used, vhost_svq_device_area_size(svq)); | |
20e7412b | 725 | event_notifier_set_handler(&svq->hdev_call, NULL); |
dff4426f | 726 | } |
10857ec0 EP |
727 | |
728 | /** | |
729 | * Creates vhost shadow virtqueue, and instructs the vhost device to use the | |
730 | * shadow methods and file descriptors. | |
731 | * | |
e966c0b7 EP |
732 | * @ops: SVQ owner callbacks |
733 | * @ops_opaque: ops opaque pointer | |
10857ec0 | 734 | */ |
5fde952b | 735 | VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops, |
e966c0b7 | 736 | void *ops_opaque) |
10857ec0 | 737 | { |
3cfb4d06 | 738 | VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); |
10857ec0 | 739 | |
dff4426f | 740 | event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); |
e966c0b7 EP |
741 | svq->ops = ops; |
742 | svq->ops_opaque = ops_opaque; | |
3cfb4d06 | 743 | return svq; |
10857ec0 EP |
744 | } |
745 | ||
746 | /** | |
747 | * Free the resources of the shadow virtqueue. | |
748 | * | |
749 | * @pvq: gpointer to SVQ so it can be used by autofree functions. | |
750 | */ | |
751 | void vhost_svq_free(gpointer pvq) | |
752 | { | |
753 | VhostShadowVirtqueue *vq = pvq; | |
dff4426f | 754 | vhost_svq_stop(vq); |
10857ec0 EP |
755 | g_free(vq); |
756 | } |