]> git.proxmox.com Git - qemu.git/blob - hw/virtio/dataplane/vring.c
Merge branch 'tcg-s390' of git://github.com/rth7680/qemu
[qemu.git] / hw / virtio / dataplane / vring.c
1 /* Copyright 2012 Red Hat, Inc.
2 * Copyright IBM, Corp. 2012
3 *
4 * Based on Linux 2.6.39 vhost code:
5 * Copyright (C) 2009 Red Hat, Inc.
6 * Copyright (C) 2006 Rusty Russell IBM Corporation
7 *
8 * Author: Michael S. Tsirkin <mst@redhat.com>
9 * Stefan Hajnoczi <stefanha@redhat.com>
10 *
11 * Inspiration, some code, and most witty comments come from
12 * Documentation/virtual/lguest/lguest.c, by Rusty Russell
13 *
14 * This work is licensed under the terms of the GNU GPL, version 2.
15 */
16
17 #include "trace.h"
18 #include "hw/virtio/dataplane/vring.h"
19 #include "qemu/error-report.h"
20
21 /* Map the guest's vring to host memory */
22 bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
23 {
24 hwaddr vring_addr = virtio_queue_get_ring_addr(vdev, n);
25 hwaddr vring_size = virtio_queue_get_ring_size(vdev, n);
26 void *vring_ptr;
27
28 vring->broken = false;
29
30 hostmem_init(&vring->hostmem);
31 vring_ptr = hostmem_lookup(&vring->hostmem, vring_addr, vring_size, true);
32 if (!vring_ptr) {
33 error_report("Failed to map vring "
34 "addr %#" HWADDR_PRIx " size %" HWADDR_PRIu,
35 vring_addr, vring_size);
36 vring->broken = true;
37 return false;
38 }
39
40 vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096);
41
42 vring->last_avail_idx = 0;
43 vring->last_used_idx = 0;
44 vring->signalled_used = 0;
45 vring->signalled_used_valid = false;
46
47 trace_vring_setup(virtio_queue_get_ring_addr(vdev, n),
48 vring->vr.desc, vring->vr.avail, vring->vr.used);
49 return true;
50 }
51
52 void vring_teardown(Vring *vring)
53 {
54 hostmem_finalize(&vring->hostmem);
55 }
56
57 /* Disable guest->host notifies */
58 void vring_disable_notification(VirtIODevice *vdev, Vring *vring)
59 {
60 if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
61 vring->vr.used->flags |= VRING_USED_F_NO_NOTIFY;
62 }
63 }
64
65 /* Enable guest->host notifies
66 *
67 * Return true if the vring is empty, false if there are more requests.
68 */
69 bool vring_enable_notification(VirtIODevice *vdev, Vring *vring)
70 {
71 if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
72 vring_avail_event(&vring->vr) = vring->vr.avail->idx;
73 } else {
74 vring->vr.used->flags &= ~VRING_USED_F_NO_NOTIFY;
75 }
76 smp_mb(); /* ensure update is seen before reading avail_idx */
77 return !vring_more_avail(vring);
78 }
79
80 /* This is stolen from linux/drivers/vhost/vhost.c:vhost_notify() */
81 bool vring_should_notify(VirtIODevice *vdev, Vring *vring)
82 {
83 uint16_t old, new;
84 bool v;
85 /* Flush out used index updates. This is paired
86 * with the barrier that the Guest executes when enabling
87 * interrupts. */
88 smp_mb();
89
90 if ((vdev->guest_features & VIRTIO_F_NOTIFY_ON_EMPTY) &&
91 unlikely(vring->vr.avail->idx == vring->last_avail_idx)) {
92 return true;
93 }
94
95 if (!(vdev->guest_features & VIRTIO_RING_F_EVENT_IDX)) {
96 return !(vring->vr.avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
97 }
98 old = vring->signalled_used;
99 v = vring->signalled_used_valid;
100 new = vring->signalled_used = vring->last_used_idx;
101 vring->signalled_used_valid = true;
102
103 if (unlikely(!v)) {
104 return true;
105 }
106
107 return vring_need_event(vring_used_event(&vring->vr), new, old);
108 }
109
110 /* This is stolen from linux/drivers/vhost/vhost.c. */
111 static int get_indirect(Vring *vring,
112 struct iovec iov[], struct iovec *iov_end,
113 unsigned int *out_num, unsigned int *in_num,
114 struct vring_desc *indirect)
115 {
116 struct vring_desc desc;
117 unsigned int i = 0, count, found = 0;
118
119 /* Sanity check */
120 if (unlikely(indirect->len % sizeof(desc))) {
121 error_report("Invalid length in indirect descriptor: "
122 "len %#x not multiple of %#zx",
123 indirect->len, sizeof(desc));
124 vring->broken = true;
125 return -EFAULT;
126 }
127
128 count = indirect->len / sizeof(desc);
129 /* Buffers are chained via a 16 bit next field, so
130 * we can have at most 2^16 of these. */
131 if (unlikely(count > USHRT_MAX + 1)) {
132 error_report("Indirect buffer length too big: %d", indirect->len);
133 vring->broken = true;
134 return -EFAULT;
135 }
136
137 do {
138 struct vring_desc *desc_ptr;
139
140 /* Translate indirect descriptor */
141 desc_ptr = hostmem_lookup(&vring->hostmem,
142 indirect->addr + found * sizeof(desc),
143 sizeof(desc), false);
144 if (!desc_ptr) {
145 error_report("Failed to map indirect descriptor "
146 "addr %#" PRIx64 " len %zu",
147 (uint64_t)indirect->addr + found * sizeof(desc),
148 sizeof(desc));
149 vring->broken = true;
150 return -EFAULT;
151 }
152 desc = *desc_ptr;
153
154 /* Ensure descriptor has been loaded before accessing fields */
155 barrier(); /* read_barrier_depends(); */
156
157 if (unlikely(++found > count)) {
158 error_report("Loop detected: last one at %u "
159 "indirect size %u", i, count);
160 vring->broken = true;
161 return -EFAULT;
162 }
163
164 if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
165 error_report("Nested indirect descriptor");
166 vring->broken = true;
167 return -EFAULT;
168 }
169
170 /* Stop for now if there are not enough iovecs available. */
171 if (iov >= iov_end) {
172 return -ENOBUFS;
173 }
174
175 iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
176 desc.flags & VRING_DESC_F_WRITE);
177 if (!iov->iov_base) {
178 error_report("Failed to map indirect descriptor"
179 "addr %#" PRIx64 " len %u",
180 (uint64_t)desc.addr, desc.len);
181 vring->broken = true;
182 return -EFAULT;
183 }
184 iov->iov_len = desc.len;
185 iov++;
186
187 /* If this is an input descriptor, increment that count. */
188 if (desc.flags & VRING_DESC_F_WRITE) {
189 *in_num += 1;
190 } else {
191 /* If it's an output descriptor, they're all supposed
192 * to come before any input descriptors. */
193 if (unlikely(*in_num)) {
194 error_report("Indirect descriptor "
195 "has out after in: idx %u", i);
196 vring->broken = true;
197 return -EFAULT;
198 }
199 *out_num += 1;
200 }
201 i = desc.next;
202 } while (desc.flags & VRING_DESC_F_NEXT);
203 return 0;
204 }
205
206 /* This looks in the virtqueue and for the first available buffer, and converts
207 * it to an iovec for convenient access. Since descriptors consist of some
208 * number of output then some number of input descriptors, it's actually two
209 * iovecs, but we pack them into one and note how many of each there were.
210 *
211 * This function returns the descriptor number found, or vq->num (which is
212 * never a valid descriptor number) if none was found. A negative code is
213 * returned on error.
214 *
215 * Stolen from linux/drivers/vhost/vhost.c.
216 */
217 int vring_pop(VirtIODevice *vdev, Vring *vring,
218 struct iovec iov[], struct iovec *iov_end,
219 unsigned int *out_num, unsigned int *in_num)
220 {
221 struct vring_desc desc;
222 unsigned int i, head, found = 0, num = vring->vr.num;
223 uint16_t avail_idx, last_avail_idx;
224
225 /* If there was a fatal error then refuse operation */
226 if (vring->broken) {
227 return -EFAULT;
228 }
229
230 /* Check it isn't doing very strange things with descriptor numbers. */
231 last_avail_idx = vring->last_avail_idx;
232 avail_idx = vring->vr.avail->idx;
233 barrier(); /* load indices now and not again later */
234
235 if (unlikely((uint16_t)(avail_idx - last_avail_idx) > num)) {
236 error_report("Guest moved used index from %u to %u",
237 last_avail_idx, avail_idx);
238 vring->broken = true;
239 return -EFAULT;
240 }
241
242 /* If there's nothing new since last we looked. */
243 if (avail_idx == last_avail_idx) {
244 return -EAGAIN;
245 }
246
247 /* Only get avail ring entries after they have been exposed by guest. */
248 smp_rmb();
249
250 /* Grab the next descriptor number they're advertising, and increment
251 * the index we've seen. */
252 head = vring->vr.avail->ring[last_avail_idx % num];
253
254 /* If their number is silly, that's an error. */
255 if (unlikely(head >= num)) {
256 error_report("Guest says index %u > %u is available", head, num);
257 vring->broken = true;
258 return -EFAULT;
259 }
260
261 if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
262 vring_avail_event(&vring->vr) = vring->vr.avail->idx;
263 }
264
265 /* When we start there are none of either input nor output. */
266 *out_num = *in_num = 0;
267
268 i = head;
269 do {
270 if (unlikely(i >= num)) {
271 error_report("Desc index is %u > %u, head = %u", i, num, head);
272 vring->broken = true;
273 return -EFAULT;
274 }
275 if (unlikely(++found > num)) {
276 error_report("Loop detected: last one at %u vq size %u head %u",
277 i, num, head);
278 vring->broken = true;
279 return -EFAULT;
280 }
281 desc = vring->vr.desc[i];
282
283 /* Ensure descriptor is loaded before accessing fields */
284 barrier();
285
286 if (desc.flags & VRING_DESC_F_INDIRECT) {
287 int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc);
288 if (ret < 0) {
289 return ret;
290 }
291 continue;
292 }
293
294 /* If there are not enough iovecs left, stop for now. The caller
295 * should check if there are more descs available once they have dealt
296 * with the current set.
297 */
298 if (iov >= iov_end) {
299 return -ENOBUFS;
300 }
301
302 /* TODO handle non-contiguous memory across region boundaries */
303 iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
304 desc.flags & VRING_DESC_F_WRITE);
305 if (!iov->iov_base) {
306 error_report("Failed to map vring desc addr %#" PRIx64 " len %u",
307 (uint64_t)desc.addr, desc.len);
308 vring->broken = true;
309 return -EFAULT;
310 }
311 iov->iov_len = desc.len;
312 iov++;
313
314 if (desc.flags & VRING_DESC_F_WRITE) {
315 /* If this is an input descriptor,
316 * increment that count. */
317 *in_num += 1;
318 } else {
319 /* If it's an output descriptor, they're all supposed
320 * to come before any input descriptors. */
321 if (unlikely(*in_num)) {
322 error_report("Descriptor has out after in: idx %d", i);
323 vring->broken = true;
324 return -EFAULT;
325 }
326 *out_num += 1;
327 }
328 i = desc.next;
329 } while (desc.flags & VRING_DESC_F_NEXT);
330
331 /* On success, increment avail index. */
332 vring->last_avail_idx++;
333 return head;
334 }
335
336 /* After we've used one of their buffers, we tell them about it.
337 *
338 * Stolen from linux/drivers/vhost/vhost.c.
339 */
340 void vring_push(Vring *vring, unsigned int head, int len)
341 {
342 struct vring_used_elem *used;
343 uint16_t new;
344
345 /* Don't touch vring if a fatal error occurred */
346 if (vring->broken) {
347 return;
348 }
349
350 /* The virtqueue contains a ring of used buffers. Get a pointer to the
351 * next entry in that used ring. */
352 used = &vring->vr.used->ring[vring->last_used_idx % vring->vr.num];
353 used->id = head;
354 used->len = len;
355
356 /* Make sure buffer is written before we update index. */
357 smp_wmb();
358
359 new = vring->vr.used->idx = ++vring->last_used_idx;
360 if (unlikely((int16_t)(new - vring->signalled_used) < (uint16_t)1)) {
361 vring->signalled_used_valid = false;
362 }
363 }