]>
Commit | Line | Data |
---|---|---|
88807f89 SH |
1 | /* Copyright 2012 Red Hat, Inc. |
2 | * Copyright IBM, Corp. 2012 | |
3 | * | |
4 | * Based on Linux 2.6.39 vhost code: | |
5 | * Copyright (C) 2009 Red Hat, Inc. | |
6 | * Copyright (C) 2006 Rusty Russell IBM Corporation | |
7 | * | |
8 | * Author: Michael S. Tsirkin <mst@redhat.com> | |
9 | * Stefan Hajnoczi <stefanha@redhat.com> | |
10 | * | |
11 | * Inspiration, some code, and most witty comments come from | |
12 | * Documentation/virtual/lguest/lguest.c, by Rusty Russell | |
13 | * | |
14 | * This work is licensed under the terms of the GNU GPL, version 2. | |
15 | */ | |
16 | ||
17 | #include "trace.h" | |
87b7f2f8 PB |
18 | #include "hw/hw.h" |
19 | #include "exec/memory.h" | |
20 | #include "exec/address-spaces.h" | |
b0e5d90e | 21 | #include "hw/virtio/virtio-access.h" |
0d09e41a | 22 | #include "hw/virtio/dataplane/vring.h" |
b0e5d90e | 23 | #include "hw/virtio/dataplane/vring-accessors.h" |
b4a42f81 | 24 | #include "qemu/error-report.h" |
88807f89 | 25 | |
87b7f2f8 PB |
26 | /* vring_map can be coupled with vring_unmap or (if you still have the |
27 | * value returned in *mr) memory_region_unref. | |
28 | */ | |
29 | static void *vring_map(MemoryRegion **mr, hwaddr phys, hwaddr len, | |
30 | bool is_write) | |
31 | { | |
32 | MemoryRegionSection section = memory_region_find(get_system_memory(), phys, len); | |
33 | ||
34 | if (!section.mr || int128_get64(section.size) < len) { | |
35 | goto out; | |
36 | } | |
37 | if (is_write && section.readonly) { | |
38 | goto out; | |
39 | } | |
40 | if (!memory_region_is_ram(section.mr)) { | |
41 | goto out; | |
42 | } | |
43 | ||
44 | /* Ignore regions with dirty logging, we cannot mark them dirty */ | |
2d1a35be | 45 | if (memory_region_get_dirty_log_mask(section.mr)) { |
87b7f2f8 PB |
46 | goto out; |
47 | } | |
48 | ||
49 | *mr = section.mr; | |
50 | return memory_region_get_ram_ptr(section.mr) + section.offset_within_region; | |
51 | ||
52 | out: | |
53 | memory_region_unref(section.mr); | |
54 | *mr = NULL; | |
55 | return NULL; | |
56 | } | |
57 | ||
58 | static void vring_unmap(void *buffer, bool is_write) | |
59 | { | |
60 | ram_addr_t addr; | |
61 | MemoryRegion *mr; | |
62 | ||
63 | mr = qemu_ram_addr_from_host(buffer, &addr); | |
64 | memory_region_unref(mr); | |
65 | } | |
66 | ||
88807f89 SH |
67 | /* Map the guest's vring to host memory */ |
68 | bool vring_setup(Vring *vring, VirtIODevice *vdev, int n) | |
69 | { | |
70 | hwaddr vring_addr = virtio_queue_get_ring_addr(vdev, n); | |
71 | hwaddr vring_size = virtio_queue_get_ring_size(vdev, n); | |
72 | void *vring_ptr; | |
73 | ||
74 | vring->broken = false; | |
75 | ||
87b7f2f8 | 76 | vring_ptr = vring_map(&vring->mr, vring_addr, vring_size, true); |
88807f89 SH |
77 | if (!vring_ptr) { |
78 | error_report("Failed to map vring " | |
79 | "addr %#" HWADDR_PRIx " size %" HWADDR_PRIu, | |
80 | vring_addr, vring_size); | |
81 | vring->broken = true; | |
82 | return false; | |
83 | } | |
84 | ||
85 | vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096); | |
86 | ||
9154b02c | 87 | vring->last_avail_idx = virtio_queue_get_last_avail_idx(vdev, n); |
b0e5d90e | 88 | vring->last_used_idx = vring_get_used_idx(vdev, vring); |
88807f89 SH |
89 | vring->signalled_used = 0; |
90 | vring->signalled_used_valid = false; | |
91 | ||
92 | trace_vring_setup(virtio_queue_get_ring_addr(vdev, n), | |
93 | vring->vr.desc, vring->vr.avail, vring->vr.used); | |
94 | return true; | |
95 | } | |
96 | ||
9154b02c | 97 | void vring_teardown(Vring *vring, VirtIODevice *vdev, int n) |
88807f89 | 98 | { |
9154b02c | 99 | virtio_queue_set_last_avail_idx(vdev, n, vring->last_avail_idx); |
6793dfd1 | 100 | virtio_queue_invalidate_signalled_used(vdev, n); |
9154b02c | 101 | |
87b7f2f8 | 102 | memory_region_unref(vring->mr); |
88807f89 SH |
103 | } |
104 | ||
105 | /* Disable guest->host notifies */ | |
106 | void vring_disable_notification(VirtIODevice *vdev, Vring *vring) | |
107 | { | |
95129d6f | 108 | if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { |
b0e5d90e | 109 | vring_set_used_flags(vdev, vring, VRING_USED_F_NO_NOTIFY); |
88807f89 SH |
110 | } |
111 | } | |
112 | ||
113 | /* Enable guest->host notifies | |
114 | * | |
115 | * Return true if the vring is empty, false if there are more requests. | |
116 | */ | |
117 | bool vring_enable_notification(VirtIODevice *vdev, Vring *vring) | |
118 | { | |
95129d6f | 119 | if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { |
88807f89 SH |
120 | vring_avail_event(&vring->vr) = vring->vr.avail->idx; |
121 | } else { | |
b0e5d90e | 122 | vring_clear_used_flags(vdev, vring, VRING_USED_F_NO_NOTIFY); |
88807f89 SH |
123 | } |
124 | smp_mb(); /* ensure update is seen before reading avail_idx */ | |
b0e5d90e | 125 | return !vring_more_avail(vdev, vring); |
88807f89 SH |
126 | } |
127 | ||
128 | /* This is stolen from linux/drivers/vhost/vhost.c:vhost_notify() */ | |
129 | bool vring_should_notify(VirtIODevice *vdev, Vring *vring) | |
130 | { | |
131 | uint16_t old, new; | |
132 | bool v; | |
133 | /* Flush out used index updates. This is paired | |
134 | * with the barrier that the Guest executes when enabling | |
135 | * interrupts. */ | |
136 | smp_mb(); | |
137 | ||
95129d6f | 138 | if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) && |
b0e5d90e | 139 | unlikely(!vring_more_avail(vdev, vring))) { |
88807f89 SH |
140 | return true; |
141 | } | |
142 | ||
95129d6f | 143 | if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { |
b0e5d90e CH |
144 | return !(vring_get_avail_flags(vdev, vring) & |
145 | VRING_AVAIL_F_NO_INTERRUPT); | |
88807f89 SH |
146 | } |
147 | old = vring->signalled_used; | |
148 | v = vring->signalled_used_valid; | |
149 | new = vring->signalled_used = vring->last_used_idx; | |
150 | vring->signalled_used_valid = true; | |
151 | ||
152 | if (unlikely(!v)) { | |
153 | return true; | |
154 | } | |
155 | ||
be1e50a2 GK |
156 | return vring_need_event(virtio_tswap16(vdev, vring_used_event(&vring->vr)), |
157 | new, old); | |
88807f89 SH |
158 | } |
159 | ||
4d684832 | 160 | |
f329c74c | 161 | static int get_desc(Vring *vring, VirtQueueElement *elem, |
4d684832 PB |
162 | struct vring_desc *desc) |
163 | { | |
164 | unsigned *num; | |
8c1b566f PB |
165 | struct iovec *iov; |
166 | hwaddr *addr; | |
87b7f2f8 | 167 | MemoryRegion *mr; |
4d684832 | 168 | |
f329c74c | 169 | if (desc->flags & VRING_DESC_F_WRITE) { |
8c1b566f PB |
170 | num = &elem->in_num; |
171 | iov = &elem->in_sg[*num]; | |
172 | addr = &elem->in_addr[*num]; | |
4d684832 | 173 | } else { |
8c1b566f PB |
174 | num = &elem->out_num; |
175 | iov = &elem->out_sg[*num]; | |
176 | addr = &elem->out_addr[*num]; | |
4d684832 PB |
177 | |
178 | /* If it's an output descriptor, they're all supposed | |
179 | * to come before any input descriptors. */ | |
8c1b566f | 180 | if (unlikely(elem->in_num)) { |
4d684832 PB |
181 | error_report("Descriptor has out after in"); |
182 | return -EFAULT; | |
183 | } | |
184 | } | |
185 | ||
186 | /* Stop for now if there are not enough iovecs available. */ | |
8c1b566f | 187 | if (*num >= VIRTQUEUE_MAX_SIZE) { |
032f8b81 FZ |
188 | error_report("Invalid SG num: %u", *num); |
189 | return -EFAULT; | |
4d684832 PB |
190 | } |
191 | ||
192 | /* TODO handle non-contiguous memory across region boundaries */ | |
f329c74c CH |
193 | iov->iov_base = vring_map(&mr, desc->addr, desc->len, |
194 | desc->flags & VRING_DESC_F_WRITE); | |
4d684832 PB |
195 | if (!iov->iov_base) { |
196 | error_report("Failed to map descriptor addr %#" PRIx64 " len %u", | |
f329c74c | 197 | (uint64_t)desc->addr, desc->len); |
4d684832 PB |
198 | return -EFAULT; |
199 | } | |
200 | ||
87b7f2f8 PB |
201 | /* The MemoryRegion is looked up again and unref'ed later, leave the |
202 | * ref in place. */ | |
f329c74c CH |
203 | iov->iov_len = desc->len; |
204 | *addr = desc->addr; | |
4d684832 PB |
205 | *num += 1; |
206 | return 0; | |
207 | } | |
208 | ||
b0e5d90e CH |
209 | static void copy_in_vring_desc(VirtIODevice *vdev, |
210 | const struct vring_desc *guest, | |
211 | struct vring_desc *host) | |
212 | { | |
213 | host->addr = virtio_ldq_p(vdev, &guest->addr); | |
214 | host->len = virtio_ldl_p(vdev, &guest->len); | |
215 | host->flags = virtio_lduw_p(vdev, &guest->flags); | |
216 | host->next = virtio_lduw_p(vdev, &guest->next); | |
217 | } | |
218 | ||
88807f89 | 219 | /* This is stolen from linux/drivers/vhost/vhost.c. */ |
b0e5d90e CH |
220 | static int get_indirect(VirtIODevice *vdev, Vring *vring, |
221 | VirtQueueElement *elem, struct vring_desc *indirect) | |
88807f89 SH |
222 | { |
223 | struct vring_desc desc; | |
224 | unsigned int i = 0, count, found = 0; | |
4d684832 | 225 | int ret; |
88807f89 SH |
226 | |
227 | /* Sanity check */ | |
f329c74c | 228 | if (unlikely(indirect->len % sizeof(desc))) { |
88807f89 SH |
229 | error_report("Invalid length in indirect descriptor: " |
230 | "len %#x not multiple of %#zx", | |
f329c74c | 231 | indirect->len, sizeof(desc)); |
88807f89 SH |
232 | vring->broken = true; |
233 | return -EFAULT; | |
234 | } | |
235 | ||
f329c74c | 236 | count = indirect->len / sizeof(desc); |
88807f89 SH |
237 | /* Buffers are chained via a 16 bit next field, so |
238 | * we can have at most 2^16 of these. */ | |
239 | if (unlikely(count > USHRT_MAX + 1)) { | |
f329c74c | 240 | error_report("Indirect buffer length too big: %d", indirect->len); |
88807f89 SH |
241 | vring->broken = true; |
242 | return -EFAULT; | |
243 | } | |
244 | ||
245 | do { | |
246 | struct vring_desc *desc_ptr; | |
87b7f2f8 | 247 | MemoryRegion *mr; |
88807f89 SH |
248 | |
249 | /* Translate indirect descriptor */ | |
87b7f2f8 | 250 | desc_ptr = vring_map(&mr, |
f329c74c | 251 | indirect->addr + found * sizeof(desc), |
87b7f2f8 | 252 | sizeof(desc), false); |
88807f89 SH |
253 | if (!desc_ptr) { |
254 | error_report("Failed to map indirect descriptor " | |
255 | "addr %#" PRIx64 " len %zu", | |
f329c74c | 256 | (uint64_t)indirect->addr + found * sizeof(desc), |
88807f89 SH |
257 | sizeof(desc)); |
258 | vring->broken = true; | |
259 | return -EFAULT; | |
260 | } | |
b0e5d90e | 261 | copy_in_vring_desc(vdev, desc_ptr, &desc); |
87b7f2f8 | 262 | memory_region_unref(mr); |
88807f89 SH |
263 | |
264 | /* Ensure descriptor has been loaded before accessing fields */ | |
265 | barrier(); /* read_barrier_depends(); */ | |
266 | ||
267 | if (unlikely(++found > count)) { | |
268 | error_report("Loop detected: last one at %u " | |
269 | "indirect size %u", i, count); | |
270 | vring->broken = true; | |
271 | return -EFAULT; | |
272 | } | |
273 | ||
f329c74c | 274 | if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) { |
88807f89 SH |
275 | error_report("Nested indirect descriptor"); |
276 | vring->broken = true; | |
277 | return -EFAULT; | |
278 | } | |
279 | ||
f329c74c | 280 | ret = get_desc(vring, elem, &desc); |
4d684832 PB |
281 | if (ret < 0) { |
282 | vring->broken |= (ret == -EFAULT); | |
283 | return ret; | |
88807f89 | 284 | } |
f329c74c CH |
285 | i = desc.next; |
286 | } while (desc.flags & VRING_DESC_F_NEXT); | |
88807f89 SH |
287 | return 0; |
288 | } | |
289 | ||
abd76425 | 290 | static void vring_unmap_element(VirtQueueElement *elem) |
8c1b566f | 291 | { |
87b7f2f8 PB |
292 | int i; |
293 | ||
294 | /* This assumes that the iovecs, if changed, are never moved past | |
295 | * the end of the valid area. This is true if iovec manipulations | |
296 | * are done with iov_discard_front and iov_discard_back. | |
297 | */ | |
298 | for (i = 0; i < elem->out_num; i++) { | |
299 | vring_unmap(elem->out_sg[i].iov_base, false); | |
300 | } | |
301 | ||
302 | for (i = 0; i < elem->in_num; i++) { | |
303 | vring_unmap(elem->in_sg[i].iov_base, true); | |
304 | } | |
8c1b566f PB |
305 | } |
306 | ||
88807f89 SH |
307 | /* This looks in the virtqueue and for the first available buffer, and converts |
308 | * it to an iovec for convenient access. Since descriptors consist of some | |
309 | * number of output then some number of input descriptors, it's actually two | |
310 | * iovecs, but we pack them into one and note how many of each there were. | |
311 | * | |
312 | * This function returns the descriptor number found, or vq->num (which is | |
313 | * never a valid descriptor number) if none was found. A negative code is | |
314 | * returned on error. | |
315 | * | |
316 | * Stolen from linux/drivers/vhost/vhost.c. | |
317 | */ | |
318 | int vring_pop(VirtIODevice *vdev, Vring *vring, | |
f897bf75 | 319 | VirtQueueElement *elem) |
88807f89 SH |
320 | { |
321 | struct vring_desc desc; | |
322 | unsigned int i, head, found = 0, num = vring->vr.num; | |
323 | uint16_t avail_idx, last_avail_idx; | |
4d684832 | 324 | int ret; |
88807f89 | 325 | |
f897bf75 SH |
326 | /* Initialize elem so it can be safely unmapped */ |
327 | elem->in_num = elem->out_num = 0; | |
328 | ||
88807f89 SH |
329 | /* If there was a fatal error then refuse operation */ |
330 | if (vring->broken) { | |
781c117f PB |
331 | ret = -EFAULT; |
332 | goto out; | |
88807f89 SH |
333 | } |
334 | ||
335 | /* Check it isn't doing very strange things with descriptor numbers. */ | |
336 | last_avail_idx = vring->last_avail_idx; | |
b0e5d90e | 337 | avail_idx = vring_get_avail_idx(vdev, vring); |
88807f89 SH |
338 | barrier(); /* load indices now and not again later */ |
339 | ||
340 | if (unlikely((uint16_t)(avail_idx - last_avail_idx) > num)) { | |
341 | error_report("Guest moved used index from %u to %u", | |
342 | last_avail_idx, avail_idx); | |
781c117f PB |
343 | ret = -EFAULT; |
344 | goto out; | |
88807f89 SH |
345 | } |
346 | ||
347 | /* If there's nothing new since last we looked. */ | |
348 | if (avail_idx == last_avail_idx) { | |
781c117f PB |
349 | ret = -EAGAIN; |
350 | goto out; | |
88807f89 SH |
351 | } |
352 | ||
353 | /* Only get avail ring entries after they have been exposed by guest. */ | |
354 | smp_rmb(); | |
355 | ||
356 | /* Grab the next descriptor number they're advertising, and increment | |
357 | * the index we've seen. */ | |
b0e5d90e | 358 | head = vring_get_avail_ring(vdev, vring, last_avail_idx % num); |
88807f89 | 359 | |
8c1b566f | 360 | elem->index = head; |
f897bf75 | 361 | |
88807f89 SH |
362 | /* If their number is silly, that's an error. */ |
363 | if (unlikely(head >= num)) { | |
364 | error_report("Guest says index %u > %u is available", head, num); | |
781c117f PB |
365 | ret = -EFAULT; |
366 | goto out; | |
88807f89 SH |
367 | } |
368 | ||
88807f89 SH |
369 | i = head; |
370 | do { | |
371 | if (unlikely(i >= num)) { | |
372 | error_report("Desc index is %u > %u, head = %u", i, num, head); | |
781c117f PB |
373 | ret = -EFAULT; |
374 | goto out; | |
88807f89 SH |
375 | } |
376 | if (unlikely(++found > num)) { | |
377 | error_report("Loop detected: last one at %u vq size %u head %u", | |
378 | i, num, head); | |
781c117f PB |
379 | ret = -EFAULT; |
380 | goto out; | |
88807f89 | 381 | } |
b0e5d90e | 382 | copy_in_vring_desc(vdev, &vring->vr.desc[i], &desc); |
88807f89 SH |
383 | |
384 | /* Ensure descriptor is loaded before accessing fields */ | |
385 | barrier(); | |
386 | ||
f329c74c | 387 | if (desc.flags & VRING_DESC_F_INDIRECT) { |
b0e5d90e | 388 | ret = get_indirect(vdev, vring, elem, &desc); |
88807f89 | 389 | if (ret < 0) { |
781c117f | 390 | goto out; |
88807f89 SH |
391 | } |
392 | continue; | |
393 | } | |
394 | ||
f329c74c | 395 | ret = get_desc(vring, elem, &desc); |
4d684832 | 396 | if (ret < 0) { |
781c117f | 397 | goto out; |
88807f89 SH |
398 | } |
399 | ||
f329c74c CH |
400 | i = desc.next; |
401 | } while (desc.flags & VRING_DESC_F_NEXT); | |
88807f89 SH |
402 | |
403 | /* On success, increment avail index. */ | |
404 | vring->last_avail_idx++; | |
95129d6f | 405 | if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { |
be1e50a2 GK |
406 | vring_avail_event(&vring->vr) = |
407 | virtio_tswap16(vdev, vring->last_avail_idx); | |
a3614c65 BW |
408 | } |
409 | ||
88807f89 | 410 | return head; |
781c117f PB |
411 | |
412 | out: | |
413 | assert(ret < 0); | |
414 | if (ret == -EFAULT) { | |
415 | vring->broken = true; | |
416 | } | |
f897bf75 | 417 | vring_unmap_element(elem); |
781c117f | 418 | return ret; |
88807f89 SH |
419 | } |
420 | ||
421 | /* After we've used one of their buffers, we tell them about it. | |
422 | * | |
423 | * Stolen from linux/drivers/vhost/vhost.c. | |
424 | */ | |
b0e5d90e CH |
425 | void vring_push(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem, |
426 | int len) | |
88807f89 | 427 | { |
8c1b566f | 428 | unsigned int head = elem->index; |
88807f89 SH |
429 | uint16_t new; |
430 | ||
abd76425 | 431 | vring_unmap_element(elem); |
8c1b566f | 432 | |
88807f89 SH |
433 | /* Don't touch vring if a fatal error occurred */ |
434 | if (vring->broken) { | |
435 | return; | |
436 | } | |
437 | ||
438 | /* The virtqueue contains a ring of used buffers. Get a pointer to the | |
439 | * next entry in that used ring. */ | |
b0e5d90e CH |
440 | vring_set_used_ring_id(vdev, vring, vring->last_used_idx % vring->vr.num, |
441 | head); | |
442 | vring_set_used_ring_len(vdev, vring, vring->last_used_idx % vring->vr.num, | |
443 | len); | |
88807f89 SH |
444 | |
445 | /* Make sure buffer is written before we update index. */ | |
446 | smp_wmb(); | |
447 | ||
b0e5d90e CH |
448 | new = ++vring->last_used_idx; |
449 | vring_set_used_idx(vdev, vring, new); | |
88807f89 SH |
450 | if (unlikely((int16_t)(new - vring->signalled_used) < (uint16_t)1)) { |
451 | vring->signalled_used_valid = false; | |
452 | } | |
453 | } |