]>
Commit | Line | Data |
---|---|---|
10857ec0 EP |
1 | /* |
2 | * vhost shadow virtqueue | |
3 | * | |
4 | * SPDX-FileCopyrightText: Red Hat, Inc. 2021 | |
5 | * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com> | |
6 | * | |
7 | * SPDX-License-Identifier: GPL-2.0-or-later | |
8 | */ | |
9 | ||
10 | #include "qemu/osdep.h" | |
11 | #include "hw/virtio/vhost-shadow-virtqueue.h" | |
12 | ||
13 | #include "qemu/error-report.h" | |
4725a418 | 14 | #include "qapi/error.h" |
dff4426f | 15 | #include "qemu/main-loop.h" |
100890f7 EP |
16 | #include "qemu/log.h" |
17 | #include "qemu/memalign.h" | |
dff4426f EP |
18 | #include "linux-headers/linux/vhost.h" |
19 | ||
4725a418 EP |
20 | /** |
21 | * Validate the transport device features that both guests can use with the SVQ | |
22 | * and SVQs can use with the device. | |
23 | * | |
24 | * @dev_features: The features | |
25 | * @errp: Error pointer | |
26 | */ | |
27 | bool vhost_svq_valid_features(uint64_t features, Error **errp) | |
28 | { | |
29 | bool ok = true; | |
30 | uint64_t svq_features = features; | |
31 | ||
32 | for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END; | |
33 | ++b) { | |
34 | switch (b) { | |
35 | case VIRTIO_F_ANY_LAYOUT: | |
396d5126 | 36 | case VIRTIO_RING_F_EVENT_IDX: |
4725a418 EP |
37 | continue; |
38 | ||
39 | case VIRTIO_F_ACCESS_PLATFORM: | |
40 | /* SVQ trust in the host's IOMMU to translate addresses */ | |
41 | case VIRTIO_F_VERSION_1: | |
42 | /* SVQ trust that the guest vring is little endian */ | |
43 | if (!(svq_features & BIT_ULL(b))) { | |
44 | svq_features |= BIT_ULL(b); | |
45 | ok = false; | |
46 | } | |
47 | continue; | |
48 | ||
49 | default: | |
50 | if (svq_features & BIT_ULL(b)) { | |
51 | svq_features &= ~BIT_ULL(b); | |
52 | ok = false; | |
53 | } | |
54 | } | |
55 | } | |
56 | ||
57 | if (!ok) { | |
58 | error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64 | |
59 | ", ok: 0x%"PRIx64, features, svq_features); | |
60 | } | |
61 | return ok; | |
62 | } | |
63 | ||
dff4426f | 64 | /** |
100890f7 EP |
65 | * Number of descriptors that the SVQ can make available from the guest. |
66 | * | |
67 | * @svq: The svq | |
68 | */ | |
69 | static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) | |
70 | { | |
71 | return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx); | |
72 | } | |
73 | ||
34e3c94e EP |
74 | /** |
75 | * Translate addresses between the qemu's virtual address and the SVQ IOVA | |
76 | * | |
77 | * @svq: Shadow VirtQueue | |
78 | * @vaddr: Translated IOVA addresses | |
79 | * @iovec: Source qemu's VA addresses | |
80 | * @num: Length of iovec and minimum length of vaddr | |
81 | */ | |
82 | static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, | |
83 | hwaddr *addrs, const struct iovec *iovec, | |
84 | size_t num) | |
85 | { | |
86 | if (num == 0) { | |
87 | return true; | |
88 | } | |
89 | ||
90 | for (size_t i = 0; i < num; ++i) { | |
91 | DMAMap needle = { | |
92 | .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, | |
93 | .size = iovec[i].iov_len, | |
94 | }; | |
95 | Int128 needle_last, map_last; | |
96 | size_t off; | |
97 | ||
98 | const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); | |
99 | /* | |
100 | * Map cannot be NULL since iova map contains all guest space and | |
101 | * qemu already has a physical address mapped | |
102 | */ | |
103 | if (unlikely(!map)) { | |
104 | qemu_log_mask(LOG_GUEST_ERROR, | |
105 | "Invalid address 0x%"HWADDR_PRIx" given by guest", | |
106 | needle.translated_addr); | |
107 | return false; | |
108 | } | |
109 | ||
110 | off = needle.translated_addr - map->translated_addr; | |
111 | addrs[i] = map->iova + off; | |
112 | ||
113 | needle_last = int128_add(int128_make64(needle.translated_addr), | |
114 | int128_make64(iovec[i].iov_len)); | |
115 | map_last = int128_make64(map->translated_addr + map->size); | |
116 | if (unlikely(int128_gt(needle_last, map_last))) { | |
117 | qemu_log_mask(LOG_GUEST_ERROR, | |
118 | "Guest buffer expands over iova range"); | |
119 | return false; | |
120 | } | |
121 | } | |
122 | ||
123 | return true; | |
124 | } | |
125 | ||
009c2549 EP |
126 | /** |
127 | * Write descriptors to SVQ vring | |
128 | * | |
129 | * @svq: The shadow virtqueue | |
130 | * @sg: Cache for hwaddr | |
131 | * @iovec: The iovec from the guest | |
132 | * @num: iovec length | |
133 | * @more_descs: True if more descriptors come in the chain | |
134 | * @write: True if they are writeable descriptors | |
135 | * | |
136 | * Return true if success, false otherwise and print error. | |
137 | */ | |
138 | static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, | |
139 | const struct iovec *iovec, size_t num, | |
140 | bool more_descs, bool write) | |
100890f7 EP |
141 | { |
142 | uint16_t i = svq->free_head, last = svq->free_head; | |
143 | unsigned n; | |
144 | uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; | |
145 | vring_desc_t *descs = svq->vring.desc; | |
009c2549 | 146 | bool ok; |
100890f7 EP |
147 | |
148 | if (num == 0) { | |
009c2549 EP |
149 | return true; |
150 | } | |
151 | ||
152 | ok = vhost_svq_translate_addr(svq, sg, iovec, num); | |
153 | if (unlikely(!ok)) { | |
154 | return false; | |
100890f7 EP |
155 | } |
156 | ||
157 | for (n = 0; n < num; n++) { | |
158 | if (more_descs || (n + 1 < num)) { | |
159 | descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); | |
495fe3a7 | 160 | descs[i].next = cpu_to_le16(svq->desc_next[i]); |
100890f7 EP |
161 | } else { |
162 | descs[i].flags = flags; | |
163 | } | |
34e3c94e | 164 | descs[i].addr = cpu_to_le64(sg[n]); |
100890f7 EP |
165 | descs[i].len = cpu_to_le32(iovec[n].iov_len); |
166 | ||
167 | last = i; | |
495fe3a7 | 168 | i = cpu_to_le16(svq->desc_next[i]); |
100890f7 EP |
169 | } |
170 | ||
495fe3a7 | 171 | svq->free_head = le16_to_cpu(svq->desc_next[last]); |
009c2549 | 172 | return true; |
100890f7 EP |
173 | } |
174 | ||
175 | static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, | |
1f46ae65 EP |
176 | const struct iovec *out_sg, size_t out_num, |
177 | const struct iovec *in_sg, size_t in_num, | |
178 | unsigned *head) | |
100890f7 EP |
179 | { |
180 | unsigned avail_idx; | |
181 | vring_avail_t *avail = svq->vring.avail; | |
34e3c94e | 182 | bool ok; |
1f46ae65 | 183 | g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num)); |
100890f7 EP |
184 | |
185 | *head = svq->free_head; | |
186 | ||
187 | /* We need some descriptors here */ | |
1f46ae65 | 188 | if (unlikely(!out_num && !in_num)) { |
100890f7 EP |
189 | qemu_log_mask(LOG_GUEST_ERROR, |
190 | "Guest provided element with no descriptors"); | |
191 | return false; | |
192 | } | |
193 | ||
1f46ae65 EP |
194 | ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, |
195 | false); | |
34e3c94e EP |
196 | if (unlikely(!ok)) { |
197 | return false; | |
198 | } | |
34e3c94e | 199 | |
1f46ae65 | 200 | ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); |
34e3c94e EP |
201 | if (unlikely(!ok)) { |
202 | return false; | |
203 | } | |
204 | ||
100890f7 EP |
205 | /* |
206 | * Put the entry in the available array (but don't update avail->idx until | |
207 | * they do sync). | |
208 | */ | |
209 | avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1); | |
210 | avail->ring[avail_idx] = cpu_to_le16(*head); | |
211 | svq->shadow_avail_idx++; | |
212 | ||
213 | /* Update the avail index after write the descriptor */ | |
214 | smp_wmb(); | |
215 | avail->idx = cpu_to_le16(svq->shadow_avail_idx); | |
216 | ||
217 | return true; | |
218 | } | |
219 | ||
d93a2405 EP |
220 | static void vhost_svq_kick(VhostShadowVirtqueue *svq) |
221 | { | |
22a6840f EP |
222 | bool needs_kick; |
223 | ||
d93a2405 EP |
224 | /* |
225 | * We need to expose the available array entries before checking the used | |
226 | * flags | |
227 | */ | |
228 | smp_mb(); | |
22a6840f EP |
229 | |
230 | if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { | |
231 | uint16_t avail_event = *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]); | |
232 | needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1); | |
233 | } else { | |
234 | needs_kick = !(svq->vring.used->flags & VRING_USED_F_NO_NOTIFY); | |
235 | } | |
236 | ||
237 | if (!needs_kick) { | |
d93a2405 EP |
238 | return; |
239 | } | |
240 | ||
241 | event_notifier_set(&svq->hdev_kick); | |
242 | } | |
243 | ||
5181db13 EP |
244 | /** |
245 | * Add an element to a SVQ. | |
246 | * | |
f20b70eb | 247 | * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full |
5181db13 | 248 | */ |
d0291f3f EP |
249 | int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, |
250 | size_t out_num, const struct iovec *in_sg, size_t in_num, | |
251 | VirtQueueElement *elem) | |
100890f7 EP |
252 | { |
253 | unsigned qemu_head; | |
1f46ae65 | 254 | unsigned ndescs = in_num + out_num; |
f20b70eb EP |
255 | bool ok; |
256 | ||
257 | if (unlikely(ndescs > vhost_svq_available_slots(svq))) { | |
258 | return -ENOSPC; | |
259 | } | |
260 | ||
1f46ae65 | 261 | ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); |
100890f7 | 262 | if (unlikely(!ok)) { |
f20b70eb | 263 | return -EINVAL; |
100890f7 EP |
264 | } |
265 | ||
9e87868f | 266 | svq->desc_state[qemu_head].elem = elem; |
ac4cfdc6 | 267 | svq->desc_state[qemu_head].ndescs = ndescs; |
98b5adef | 268 | vhost_svq_kick(svq); |
f20b70eb | 269 | return 0; |
100890f7 EP |
270 | } |
271 | ||
1f46ae65 EP |
272 | /* Convenience wrapper to add a guest's element to SVQ */ |
273 | static int vhost_svq_add_element(VhostShadowVirtqueue *svq, | |
274 | VirtQueueElement *elem) | |
275 | { | |
276 | return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg, | |
277 | elem->in_num, elem); | |
278 | } | |
279 | ||
100890f7 EP |
280 | /** |
281 | * Forward available buffers. | |
282 | * | |
283 | * @svq: Shadow VirtQueue | |
284 | * | |
285 | * Note that this function does not guarantee that all guest's available | |
286 | * buffers are available to the device in SVQ avail ring. The guest may have | |
287 | * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in | |
288 | * qemu vaddr. | |
289 | * | |
290 | * If that happens, guest's kick notifications will be disabled until the | |
291 | * device uses some buffers. | |
292 | */ | |
293 | static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) | |
294 | { | |
295 | /* Clear event notifier */ | |
296 | event_notifier_test_and_clear(&svq->svq_kick); | |
297 | ||
298 | /* Forward to the device as many available buffers as possible */ | |
299 | do { | |
300 | virtio_queue_set_notification(svq->vq, false); | |
301 | ||
302 | while (true) { | |
ad9f958d | 303 | g_autofree VirtQueueElement *elem = NULL; |
f20b70eb | 304 | int r; |
100890f7 EP |
305 | |
306 | if (svq->next_guest_avail_elem) { | |
307 | elem = g_steal_pointer(&svq->next_guest_avail_elem); | |
308 | } else { | |
309 | elem = virtqueue_pop(svq->vq, sizeof(*elem)); | |
310 | } | |
311 | ||
312 | if (!elem) { | |
313 | break; | |
314 | } | |
315 | ||
e966c0b7 EP |
316 | if (svq->ops) { |
317 | r = svq->ops->avail_handler(svq, elem, svq->ops_opaque); | |
318 | } else { | |
319 | r = vhost_svq_add_element(svq, elem); | |
320 | } | |
f20b70eb EP |
321 | if (unlikely(r != 0)) { |
322 | if (r == -ENOSPC) { | |
323 | /* | |
324 | * This condition is possible since a contiguous buffer in | |
325 | * GPA does not imply a contiguous buffer in qemu's VA | |
326 | * scatter-gather segments. If that happens, the buffer | |
327 | * exposed to the device needs to be a chain of descriptors | |
328 | * at this moment. | |
329 | * | |
330 | * SVQ cannot hold more available buffers if we are here: | |
331 | * queue the current guest descriptor and ignore kicks | |
332 | * until some elements are used. | |
333 | */ | |
9c2ab2f1 | 334 | svq->next_guest_avail_elem = g_steal_pointer(&elem); |
f20b70eb EP |
335 | } |
336 | ||
337 | /* VQ is full or broken, just return and ignore kicks */ | |
100890f7 EP |
338 | return; |
339 | } | |
9c2ab2f1 EP |
340 | /* elem belongs to SVQ or external caller now */ |
341 | elem = NULL; | |
100890f7 EP |
342 | } |
343 | ||
344 | virtio_queue_set_notification(svq->vq, true); | |
345 | } while (!virtio_queue_empty(svq->vq)); | |
346 | } | |
347 | ||
348 | /** | |
349 | * Handle guest's kick. | |
dff4426f EP |
350 | * |
351 | * @n: guest kick event notifier, the one that guest set to notify svq. | |
352 | */ | |
100890f7 | 353 | static void vhost_handle_guest_kick_notifier(EventNotifier *n) |
dff4426f EP |
354 | { |
355 | VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick); | |
356 | event_notifier_test_and_clear(n); | |
100890f7 EP |
357 | vhost_handle_guest_kick(svq); |
358 | } | |
359 | ||
360 | static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) | |
361 | { | |
c381abc3 | 362 | uint16_t *used_idx = &svq->vring.used->idx; |
100890f7 EP |
363 | if (svq->last_used_idx != svq->shadow_used_idx) { |
364 | return true; | |
365 | } | |
366 | ||
c381abc3 | 367 | svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx); |
100890f7 EP |
368 | |
369 | return svq->last_used_idx != svq->shadow_used_idx; | |
dff4426f EP |
370 | } |
371 | ||
a8ac8858 | 372 | /** |
100890f7 EP |
373 | * Enable vhost device calls after disable them. |
374 | * | |
375 | * @svq: The svq | |
376 | * | |
377 | * It returns false if there are pending used buffers from the vhost device, | |
378 | * avoiding the possible races between SVQ checking for more work and enabling | |
379 | * callbacks. True if SVQ used vring has no more pending buffers. | |
380 | */ | |
381 | static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq) | |
382 | { | |
01f8beac EP |
383 | if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { |
384 | uint16_t *used_event = (uint16_t *)&svq->vring.avail->ring[svq->vring.num]; | |
385 | *used_event = svq->shadow_used_idx; | |
386 | } else { | |
387 | svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); | |
388 | } | |
389 | ||
390 | /* Make sure the event is enabled before the read of used_idx */ | |
100890f7 EP |
391 | smp_mb(); |
392 | return !vhost_svq_more_used(svq); | |
393 | } | |
394 | ||
395 | static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) | |
396 | { | |
01f8beac EP |
397 | /* |
398 | * No need to disable notification in the event idx case, since used event | |
399 | * index is already an index too far away. | |
400 | */ | |
401 | if (!virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { | |
402 | svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); | |
403 | } | |
100890f7 EP |
404 | } |
405 | ||
81abfa57 EP |
406 | static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, |
407 | uint16_t num, uint16_t i) | |
408 | { | |
409 | for (uint16_t j = 0; j < (num - 1); ++j) { | |
410 | i = le16_to_cpu(svq->desc_next[i]); | |
411 | } | |
412 | ||
413 | return i; | |
414 | } | |
415 | ||
100890f7 EP |
416 | static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, |
417 | uint32_t *len) | |
418 | { | |
100890f7 EP |
419 | const vring_used_t *used = svq->vring.used; |
420 | vring_used_elem_t used_elem; | |
81abfa57 | 421 | uint16_t last_used, last_used_chain, num; |
100890f7 EP |
422 | |
423 | if (!vhost_svq_more_used(svq)) { | |
424 | return NULL; | |
425 | } | |
426 | ||
427 | /* Only get used array entries after they have been exposed by dev */ | |
428 | smp_rmb(); | |
429 | last_used = svq->last_used_idx & (svq->vring.num - 1); | |
430 | used_elem.id = le32_to_cpu(used->ring[last_used].id); | |
431 | used_elem.len = le32_to_cpu(used->ring[last_used].len); | |
432 | ||
433 | svq->last_used_idx++; | |
434 | if (unlikely(used_elem.id >= svq->vring.num)) { | |
435 | qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used", | |
436 | svq->vdev->name, used_elem.id); | |
437 | return NULL; | |
438 | } | |
439 | ||
86f5f254 | 440 | if (unlikely(!svq->desc_state[used_elem.id].ndescs)) { |
100890f7 EP |
441 | qemu_log_mask(LOG_GUEST_ERROR, |
442 | "Device %s says index %u is used, but it was not available", | |
443 | svq->vdev->name, used_elem.id); | |
444 | return NULL; | |
445 | } | |
446 | ||
ac4cfdc6 | 447 | num = svq->desc_state[used_elem.id].ndescs; |
86f5f254 | 448 | svq->desc_state[used_elem.id].ndescs = 0; |
81abfa57 EP |
449 | last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); |
450 | svq->desc_next[last_used_chain] = svq->free_head; | |
100890f7 EP |
451 | svq->free_head = used_elem.id; |
452 | ||
453 | *len = used_elem.len; | |
9e87868f | 454 | return g_steal_pointer(&svq->desc_state[used_elem.id].elem); |
100890f7 EP |
455 | } |
456 | ||
432efd14 EP |
457 | /** |
458 | * Push an element to SVQ, returning it to the guest. | |
459 | */ | |
460 | void vhost_svq_push_elem(VhostShadowVirtqueue *svq, | |
461 | const VirtQueueElement *elem, uint32_t len) | |
462 | { | |
463 | virtqueue_push(svq->vq, elem, len); | |
464 | if (svq->next_guest_avail_elem) { | |
465 | /* | |
466 | * Avail ring was full when vhost_svq_flush was called, so it's a | |
467 | * good moment to make more descriptors available if possible. | |
468 | */ | |
469 | vhost_handle_guest_kick(svq); | |
470 | } | |
471 | } | |
472 | ||
100890f7 EP |
473 | static void vhost_svq_flush(VhostShadowVirtqueue *svq, |
474 | bool check_for_avail_queue) | |
475 | { | |
476 | VirtQueue *vq = svq->vq; | |
477 | ||
478 | /* Forward as many used buffers as possible. */ | |
479 | do { | |
480 | unsigned i = 0; | |
481 | ||
482 | vhost_svq_disable_notification(svq); | |
483 | while (true) { | |
484 | uint32_t len; | |
485 | g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); | |
486 | if (!elem) { | |
487 | break; | |
488 | } | |
489 | ||
490 | if (unlikely(i >= svq->vring.num)) { | |
491 | qemu_log_mask(LOG_GUEST_ERROR, | |
492 | "More than %u used buffers obtained in a %u size SVQ", | |
493 | i, svq->vring.num); | |
494 | virtqueue_fill(vq, elem, len, i); | |
495 | virtqueue_flush(vq, i); | |
496 | return; | |
497 | } | |
498 | virtqueue_fill(vq, elem, len, i++); | |
499 | } | |
500 | ||
501 | virtqueue_flush(vq, i); | |
502 | event_notifier_set(&svq->svq_call); | |
503 | ||
504 | if (check_for_avail_queue && svq->next_guest_avail_elem) { | |
505 | /* | |
506 | * Avail ring was full when vhost_svq_flush was called, so it's a | |
507 | * good moment to make more descriptors available if possible. | |
508 | */ | |
509 | vhost_handle_guest_kick(svq); | |
510 | } | |
511 | } while (!vhost_svq_enable_notification(svq)); | |
512 | } | |
513 | ||
3f44d13d EP |
514 | /** |
515 | * Poll the SVQ for one device used buffer. | |
516 | * | |
517 | * This function race with main event loop SVQ polling, so extra | |
518 | * synchronization is needed. | |
519 | * | |
520 | * Return the length written by the device. | |
521 | */ | |
522 | size_t vhost_svq_poll(VhostShadowVirtqueue *svq) | |
523 | { | |
524 | int64_t start_us = g_get_monotonic_time(); | |
d368c0b0 EP |
525 | uint32_t len; |
526 | ||
3f44d13d | 527 | do { |
d368c0b0 EP |
528 | if (vhost_svq_more_used(svq)) { |
529 | break; | |
3f44d13d EP |
530 | } |
531 | ||
532 | if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { | |
533 | return 0; | |
534 | } | |
3f44d13d | 535 | } while (true); |
d368c0b0 EP |
536 | |
537 | vhost_svq_get_buf(svq, &len); | |
538 | return len; | |
3f44d13d EP |
539 | } |
540 | ||
100890f7 EP |
541 | /** |
542 | * Forward used buffers. | |
a8ac8858 EP |
543 | * |
544 | * @n: hdev call event notifier, the one that device set to notify svq. | |
100890f7 EP |
545 | * |
546 | * Note that we are not making any buffers available in the loop, there is no | |
547 | * way that it runs more than virtqueue size times. | |
a8ac8858 EP |
548 | */ |
549 | static void vhost_svq_handle_call(EventNotifier *n) | |
550 | { | |
551 | VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, | |
552 | hdev_call); | |
553 | event_notifier_test_and_clear(n); | |
100890f7 | 554 | vhost_svq_flush(svq, true); |
a8ac8858 EP |
555 | } |
556 | ||
557 | /** | |
558 | * Set the call notifier for the SVQ to call the guest | |
559 | * | |
560 | * @svq: Shadow virtqueue | |
561 | * @call_fd: call notifier | |
562 | * | |
563 | * Called on BQL context. | |
564 | */ | |
565 | void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd) | |
566 | { | |
567 | if (call_fd == VHOST_FILE_UNBIND) { | |
568 | /* | |
569 | * Fail event_notifier_set if called handling device call. | |
570 | * | |
571 | * SVQ still needs device notifications, since it needs to keep | |
572 | * forwarding used buffers even with the unbind. | |
573 | */ | |
574 | memset(&svq->svq_call, 0, sizeof(svq->svq_call)); | |
575 | } else { | |
576 | event_notifier_init_fd(&svq->svq_call, call_fd); | |
577 | } | |
578 | } | |
579 | ||
dafb34c9 EP |
580 | /** |
581 | * Get the shadow vq vring address. | |
582 | * @svq: Shadow virtqueue | |
583 | * @addr: Destination to store address | |
584 | */ | |
585 | void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, | |
586 | struct vhost_vring_addr *addr) | |
587 | { | |
34e3c94e EP |
588 | addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc; |
589 | addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail; | |
590 | addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used; | |
dafb34c9 EP |
591 | } |
592 | ||
593 | size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) | |
594 | { | |
595 | size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; | |
f0c48e05 EP |
596 | size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) + |
597 | sizeof(uint16_t); | |
dafb34c9 | 598 | |
8e3b0cbb | 599 | return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size()); |
dafb34c9 EP |
600 | } |
601 | ||
602 | size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) | |
603 | { | |
f0c48e05 EP |
604 | size_t used_size = offsetof(vring_used_t, ring[svq->vring.num]) + |
605 | sizeof(uint16_t); | |
8e3b0cbb | 606 | return ROUND_UP(used_size, qemu_real_host_page_size()); |
dafb34c9 EP |
607 | } |
608 | ||
dff4426f EP |
609 | /** |
610 | * Set a new file descriptor for the guest to kick the SVQ and notify for avail | |
611 | * | |
612 | * @svq: The svq | |
613 | * @svq_kick_fd: The svq kick fd | |
614 | * | |
615 | * Note that the SVQ will never close the old file descriptor. | |
616 | */ | |
617 | void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) | |
618 | { | |
619 | EventNotifier *svq_kick = &svq->svq_kick; | |
620 | bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick); | |
621 | bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND; | |
622 | ||
623 | if (poll_stop) { | |
624 | event_notifier_set_handler(svq_kick, NULL); | |
625 | } | |
626 | ||
8b64e486 | 627 | event_notifier_init_fd(svq_kick, svq_kick_fd); |
dff4426f EP |
628 | /* |
629 | * event_notifier_set_handler already checks for guest's notifications if | |
630 | * they arrive at the new file descriptor in the switch, so there is no | |
631 | * need to explicitly check for them. | |
632 | */ | |
633 | if (poll_start) { | |
dff4426f | 634 | event_notifier_set(svq_kick); |
100890f7 EP |
635 | event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); |
636 | } | |
637 | } | |
638 | ||
639 | /** | |
640 | * Start the shadow virtqueue operation. | |
641 | * | |
642 | * @svq: Shadow Virtqueue | |
643 | * @vdev: VirtIO device | |
644 | * @vq: Virtqueue to shadow | |
645 | */ | |
646 | void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, | |
647 | VirtQueue *vq) | |
648 | { | |
649 | size_t desc_size, driver_size, device_size; | |
650 | ||
651 | svq->next_guest_avail_elem = NULL; | |
652 | svq->shadow_avail_idx = 0; | |
653 | svq->shadow_used_idx = 0; | |
654 | svq->last_used_idx = 0; | |
655 | svq->vdev = vdev; | |
656 | svq->vq = vq; | |
657 | ||
658 | svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); | |
659 | driver_size = vhost_svq_driver_area_size(svq); | |
660 | device_size = vhost_svq_device_area_size(svq); | |
8e3b0cbb | 661 | svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), driver_size); |
100890f7 EP |
662 | desc_size = sizeof(vring_desc_t) * svq->vring.num; |
663 | svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); | |
664 | memset(svq->vring.desc, 0, driver_size); | |
8e3b0cbb | 665 | svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); |
100890f7 | 666 | memset(svq->vring.used, 0, device_size); |
9e87868f | 667 | svq->desc_state = g_new0(SVQDescState, svq->vring.num); |
495fe3a7 | 668 | svq->desc_next = g_new0(uint16_t, svq->vring.num); |
100890f7 | 669 | for (unsigned i = 0; i < svq->vring.num - 1; i++) { |
495fe3a7 | 670 | svq->desc_next[i] = cpu_to_le16(i + 1); |
dff4426f EP |
671 | } |
672 | } | |
673 | ||
674 | /** | |
675 | * Stop the shadow virtqueue operation. | |
676 | * @svq: Shadow Virtqueue | |
677 | */ | |
678 | void vhost_svq_stop(VhostShadowVirtqueue *svq) | |
679 | { | |
8b64e486 | 680 | vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND); |
100890f7 EP |
681 | g_autofree VirtQueueElement *next_avail_elem = NULL; |
682 | ||
683 | if (!svq->vq) { | |
684 | return; | |
685 | } | |
686 | ||
687 | /* Send all pending used descriptors to guest */ | |
688 | vhost_svq_flush(svq, false); | |
689 | ||
690 | for (unsigned i = 0; i < svq->vring.num; ++i) { | |
691 | g_autofree VirtQueueElement *elem = NULL; | |
9e87868f | 692 | elem = g_steal_pointer(&svq->desc_state[i].elem); |
100890f7 EP |
693 | if (elem) { |
694 | virtqueue_detach_element(svq->vq, elem, 0); | |
695 | } | |
696 | } | |
697 | ||
698 | next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem); | |
699 | if (next_avail_elem) { | |
700 | virtqueue_detach_element(svq->vq, next_avail_elem, 0); | |
701 | } | |
702 | svq->vq = NULL; | |
495fe3a7 | 703 | g_free(svq->desc_next); |
9e87868f | 704 | g_free(svq->desc_state); |
100890f7 EP |
705 | qemu_vfree(svq->vring.desc); |
706 | qemu_vfree(svq->vring.used); | |
dff4426f | 707 | } |
10857ec0 EP |
708 | |
709 | /** | |
710 | * Creates vhost shadow virtqueue, and instructs the vhost device to use the | |
711 | * shadow methods and file descriptors. | |
712 | * | |
34e3c94e | 713 | * @iova_tree: Tree to perform descriptors translations |
e966c0b7 EP |
714 | * @ops: SVQ owner callbacks |
715 | * @ops_opaque: ops opaque pointer | |
34e3c94e | 716 | * |
10857ec0 EP |
717 | * Returns the new virtqueue or NULL. |
718 | * | |
719 | * In case of error, reason is reported through error_report. | |
720 | */ | |
e966c0b7 EP |
721 | VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, |
722 | const VhostShadowVirtqueueOps *ops, | |
723 | void *ops_opaque) | |
10857ec0 EP |
724 | { |
725 | g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); | |
726 | int r; | |
727 | ||
728 | r = event_notifier_init(&svq->hdev_kick, 0); | |
729 | if (r != 0) { | |
730 | error_report("Couldn't create kick event notifier: %s (%d)", | |
731 | g_strerror(errno), errno); | |
732 | goto err_init_hdev_kick; | |
733 | } | |
734 | ||
735 | r = event_notifier_init(&svq->hdev_call, 0); | |
736 | if (r != 0) { | |
737 | error_report("Couldn't create call event notifier: %s (%d)", | |
738 | g_strerror(errno), errno); | |
739 | goto err_init_hdev_call; | |
740 | } | |
741 | ||
dff4426f | 742 | event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); |
a8ac8858 | 743 | event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); |
34e3c94e | 744 | svq->iova_tree = iova_tree; |
e966c0b7 EP |
745 | svq->ops = ops; |
746 | svq->ops_opaque = ops_opaque; | |
10857ec0 EP |
747 | return g_steal_pointer(&svq); |
748 | ||
749 | err_init_hdev_call: | |
750 | event_notifier_cleanup(&svq->hdev_kick); | |
751 | ||
752 | err_init_hdev_kick: | |
753 | return NULL; | |
754 | } | |
755 | ||
756 | /** | |
757 | * Free the resources of the shadow virtqueue. | |
758 | * | |
759 | * @pvq: gpointer to SVQ so it can be used by autofree functions. | |
760 | */ | |
761 | void vhost_svq_free(gpointer pvq) | |
762 | { | |
763 | VhostShadowVirtqueue *vq = pvq; | |
dff4426f | 764 | vhost_svq_stop(vq); |
10857ec0 | 765 | event_notifier_cleanup(&vq->hdev_kick); |
a8ac8858 | 766 | event_notifier_set_handler(&vq->hdev_call, NULL); |
10857ec0 EP |
767 | event_notifier_cleanup(&vq->hdev_call); |
768 | g_free(vq); | |
769 | } |