]>
Commit | Line | Data |
---|---|---|
967f97fa AL |
1 | /* |
2 | * Virtio Support | |
3 | * | |
4 | * Copyright IBM, Corp. 2007 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <aliguori@us.ibm.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include <inttypes.h> | |
15 | #include <err.h> | |
16 | ||
17 | #include "virtio.h" | |
18 | #include "sysemu.h" | |
19 | ||
20 | //#define VIRTIO_ZERO_COPY | |
21 | ||
22 | /* from Linux's linux/virtio_pci.h */ | |
23 | ||
24 | /* A 32-bit r/o bitmask of the features supported by the host */ | |
25 | #define VIRTIO_PCI_HOST_FEATURES 0 | |
26 | ||
27 | /* A 32-bit r/w bitmask of features activated by the guest */ | |
28 | #define VIRTIO_PCI_GUEST_FEATURES 4 | |
29 | ||
30 | /* A 32-bit r/w PFN for the currently selected queue */ | |
31 | #define VIRTIO_PCI_QUEUE_PFN 8 | |
32 | ||
33 | /* A 16-bit r/o queue size for the currently selected queue */ | |
34 | #define VIRTIO_PCI_QUEUE_NUM 12 | |
35 | ||
36 | /* A 16-bit r/w queue selector */ | |
37 | #define VIRTIO_PCI_QUEUE_SEL 14 | |
38 | ||
39 | /* A 16-bit r/w queue notifier */ | |
40 | #define VIRTIO_PCI_QUEUE_NOTIFY 16 | |
41 | ||
42 | /* An 8-bit device status register. */ | |
43 | #define VIRTIO_PCI_STATUS 18 | |
44 | ||
45 | /* An 8-bit r/o interrupt status register. Reading the value will return the | |
46 | * current contents of the ISR and will also clear it. This is effectively | |
47 | * a read-and-acknowledge. */ | |
48 | #define VIRTIO_PCI_ISR 19 | |
49 | ||
50 | #define VIRTIO_PCI_CONFIG 20 | |
51 | ||
52 | /* Virtio ABI version, if we increment this, we break the guest driver. */ | |
53 | #define VIRTIO_PCI_ABI_VERSION 0 | |
54 | ||
f46f15bc AL |
55 | /* How many bits to shift physical queue address written to QUEUE_PFN. |
56 | * 12 is historical, and due to x86 page size. */ | |
57 | #define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 | |
58 | ||
59 | /* The alignment to use between consumer and producer parts of vring. | |
60 | * x86 pagesize again. */ | |
61 | #define VIRTIO_PCI_VRING_ALIGN 4096 | |
62 | ||
967f97fa AL |
63 | /* QEMU doesn't strictly need write barriers since everything runs in |
64 | * lock-step. We'll leave the calls to wmb() in though to make it obvious for | |
65 | * KVM or if kqemu gets SMP support. | |
66 | */ | |
67 | #define wmb() do { } while (0) | |
68 | ||
69 | typedef struct VRingDesc | |
70 | { | |
71 | uint64_t addr; | |
72 | uint32_t len; | |
73 | uint16_t flags; | |
74 | uint16_t next; | |
75 | } VRingDesc; | |
76 | ||
77 | typedef struct VRingAvail | |
78 | { | |
79 | uint16_t flags; | |
80 | uint16_t idx; | |
81 | uint16_t ring[0]; | |
82 | } VRingAvail; | |
83 | ||
84 | typedef struct VRingUsedElem | |
85 | { | |
86 | uint32_t id; | |
87 | uint32_t len; | |
88 | } VRingUsedElem; | |
89 | ||
90 | typedef struct VRingUsed | |
91 | { | |
92 | uint16_t flags; | |
93 | uint16_t idx; | |
94 | VRingUsedElem ring[0]; | |
95 | } VRingUsed; | |
96 | ||
97 | typedef struct VRing | |
98 | { | |
99 | unsigned int num; | |
100 | target_phys_addr_t desc; | |
101 | target_phys_addr_t avail; | |
102 | target_phys_addr_t used; | |
103 | } VRing; | |
104 | ||
105 | struct VirtQueue | |
106 | { | |
107 | VRing vring; | |
108 | uint32_t pfn; | |
109 | uint16_t last_avail_idx; | |
110 | int inuse; | |
111 | void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); | |
112 | }; | |
113 | ||
114 | #define VIRTIO_PCI_QUEUE_MAX 16 | |
115 | ||
116 | /* virt queue functions */ | |
117 | #ifdef VIRTIO_ZERO_COPY | |
118 | static void *virtio_map_gpa(target_phys_addr_t addr, size_t size) | |
119 | { | |
120 | ram_addr_t off; | |
121 | target_phys_addr_t addr1; | |
122 | ||
123 | off = cpu_get_physical_page_desc(addr); | |
124 | if ((off & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { | |
125 | fprintf(stderr, "virtio DMA to IO ram\n"); | |
126 | exit(1); | |
127 | } | |
128 | ||
129 | off = (off & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK); | |
130 | ||
131 | for (addr1 = addr + TARGET_PAGE_SIZE; | |
132 | addr1 < TARGET_PAGE_ALIGN(addr + size); | |
133 | addr1 += TARGET_PAGE_SIZE) { | |
134 | ram_addr_t off1; | |
135 | ||
136 | off1 = cpu_get_physical_page_desc(addr1); | |
137 | if ((off1 & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { | |
138 | fprintf(stderr, "virtio DMA to IO ram\n"); | |
139 | exit(1); | |
140 | } | |
141 | ||
142 | off1 = (off1 & TARGET_PAGE_MASK) | (addr1 & ~TARGET_PAGE_MASK); | |
143 | ||
144 | if (off1 != (off + (addr1 - addr))) { | |
145 | fprintf(stderr, "discontigous virtio memory\n"); | |
146 | exit(1); | |
147 | } | |
148 | } | |
149 | ||
150 | return phys_ram_base + off; | |
151 | } | |
152 | #endif | |
153 | ||
154 | static void virtqueue_init(VirtQueue *vq, target_phys_addr_t pa) | |
155 | { | |
156 | vq->vring.desc = pa; | |
157 | vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc); | |
f46f15bc AL |
158 | vq->vring.used = vring_align(vq->vring.avail + |
159 | offsetof(VRingAvail, ring[vq->vring.num]), | |
160 | VIRTIO_PCI_VRING_ALIGN); | |
967f97fa AL |
161 | } |
162 | ||
163 | static inline uint64_t vring_desc_addr(VirtQueue *vq, int i) | |
164 | { | |
165 | target_phys_addr_t pa; | |
166 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); | |
167 | return ldq_phys(pa); | |
168 | } | |
169 | ||
170 | static inline uint32_t vring_desc_len(VirtQueue *vq, int i) | |
171 | { | |
172 | target_phys_addr_t pa; | |
173 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); | |
174 | return ldl_phys(pa); | |
175 | } | |
176 | ||
177 | static inline uint16_t vring_desc_flags(VirtQueue *vq, int i) | |
178 | { | |
179 | target_phys_addr_t pa; | |
180 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); | |
181 | return lduw_phys(pa); | |
182 | } | |
183 | ||
184 | static inline uint16_t vring_desc_next(VirtQueue *vq, int i) | |
185 | { | |
186 | target_phys_addr_t pa; | |
187 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); | |
188 | return lduw_phys(pa); | |
189 | } | |
190 | ||
191 | static inline uint16_t vring_avail_flags(VirtQueue *vq) | |
192 | { | |
193 | target_phys_addr_t pa; | |
194 | pa = vq->vring.avail + offsetof(VRingAvail, flags); | |
195 | return lduw_phys(pa); | |
196 | } | |
197 | ||
198 | static inline uint16_t vring_avail_idx(VirtQueue *vq) | |
199 | { | |
200 | target_phys_addr_t pa; | |
201 | pa = vq->vring.avail + offsetof(VRingAvail, idx); | |
202 | return lduw_phys(pa); | |
203 | } | |
204 | ||
205 | static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) | |
206 | { | |
207 | target_phys_addr_t pa; | |
208 | pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); | |
209 | return lduw_phys(pa); | |
210 | } | |
211 | ||
212 | static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val) | |
213 | { | |
214 | target_phys_addr_t pa; | |
215 | pa = vq->vring.used + offsetof(VRingUsed, ring[i].id); | |
216 | stl_phys(pa, val); | |
217 | } | |
218 | ||
219 | static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val) | |
220 | { | |
221 | target_phys_addr_t pa; | |
222 | pa = vq->vring.used + offsetof(VRingUsed, ring[i].len); | |
223 | stl_phys(pa, val); | |
224 | } | |
225 | ||
226 | static uint16_t vring_used_idx(VirtQueue *vq) | |
227 | { | |
228 | target_phys_addr_t pa; | |
229 | pa = vq->vring.used + offsetof(VRingUsed, idx); | |
230 | return lduw_phys(pa); | |
231 | } | |
232 | ||
233 | static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val) | |
234 | { | |
235 | target_phys_addr_t pa; | |
236 | pa = vq->vring.used + offsetof(VRingUsed, idx); | |
237 | stw_phys(pa, vring_used_idx(vq) + val); | |
238 | } | |
239 | ||
240 | static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) | |
241 | { | |
242 | target_phys_addr_t pa; | |
243 | pa = vq->vring.used + offsetof(VRingUsed, flags); | |
244 | stw_phys(pa, lduw_phys(pa) | mask); | |
245 | } | |
246 | ||
247 | static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) | |
248 | { | |
249 | target_phys_addr_t pa; | |
250 | pa = vq->vring.used + offsetof(VRingUsed, flags); | |
251 | stw_phys(pa, lduw_phys(pa) & ~mask); | |
252 | } | |
253 | ||
254 | void virtio_queue_set_notification(VirtQueue *vq, int enable) | |
255 | { | |
256 | if (enable) | |
257 | vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); | |
258 | else | |
259 | vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); | |
260 | } | |
261 | ||
262 | int virtio_queue_ready(VirtQueue *vq) | |
263 | { | |
264 | return vq->vring.avail != 0; | |
265 | } | |
266 | ||
267 | int virtio_queue_empty(VirtQueue *vq) | |
268 | { | |
269 | return vring_avail_idx(vq) == vq->last_avail_idx; | |
270 | } | |
271 | ||
272 | void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, | |
273 | unsigned int len, unsigned int idx) | |
274 | { | |
275 | unsigned int offset; | |
276 | int i; | |
277 | ||
278 | #ifndef VIRTIO_ZERO_COPY | |
279 | for (i = 0; i < elem->out_num; i++) | |
280 | qemu_free(elem->out_sg[i].iov_base); | |
281 | #endif | |
282 | ||
283 | offset = 0; | |
284 | for (i = 0; i < elem->in_num; i++) { | |
285 | size_t size = MIN(len - offset, elem->in_sg[i].iov_len); | |
286 | ||
287 | #ifdef VIRTIO_ZERO_COPY | |
288 | if (size) { | |
289 | ram_addr_t addr = (uint8_t *)elem->in_sg[i].iov_base - phys_ram_base; | |
290 | ram_addr_t off; | |
291 | ||
292 | for (off = 0; off < size; off += TARGET_PAGE_SIZE) | |
293 | cpu_physical_memory_set_dirty(addr + off); | |
294 | } | |
295 | #else | |
296 | if (size) | |
297 | cpu_physical_memory_write(elem->in_addr[i], | |
298 | elem->in_sg[i].iov_base, | |
299 | size); | |
300 | ||
301 | qemu_free(elem->in_sg[i].iov_base); | |
302 | #endif | |
303 | ||
304 | offset += size; | |
305 | } | |
306 | ||
307 | idx = (idx + vring_used_idx(vq)) % vq->vring.num; | |
308 | ||
309 | /* Get a pointer to the next entry in the used ring. */ | |
310 | vring_used_ring_id(vq, idx, elem->index); | |
311 | vring_used_ring_len(vq, idx, len); | |
312 | } | |
313 | ||
314 | void virtqueue_flush(VirtQueue *vq, unsigned int count) | |
315 | { | |
316 | /* Make sure buffer is written before we update index. */ | |
317 | wmb(); | |
318 | vring_used_idx_increment(vq, count); | |
319 | vq->inuse -= count; | |
320 | } | |
321 | ||
322 | void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, | |
323 | unsigned int len) | |
324 | { | |
325 | virtqueue_fill(vq, elem, len, 0); | |
326 | virtqueue_flush(vq, 1); | |
327 | } | |
328 | ||
329 | static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) | |
330 | { | |
331 | uint16_t num_heads = vring_avail_idx(vq) - idx; | |
332 | ||
333 | /* Check it isn't doing very strange things with descriptor numbers. */ | |
334 | if (num_heads > vq->vring.num) | |
335 | errx(1, "Guest moved used index from %u to %u", | |
336 | idx, vring_avail_idx(vq)); | |
337 | ||
338 | return num_heads; | |
339 | } | |
340 | ||
341 | static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) | |
342 | { | |
343 | unsigned int head; | |
344 | ||
345 | /* Grab the next descriptor number they're advertising, and increment | |
346 | * the index we've seen. */ | |
347 | head = vring_avail_ring(vq, idx % vq->vring.num); | |
348 | ||
349 | /* If their number is silly, that's a fatal mistake. */ | |
350 | if (head >= vq->vring.num) | |
351 | errx(1, "Guest says index %u is available", head); | |
352 | ||
353 | return head; | |
354 | } | |
355 | ||
356 | static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) | |
357 | { | |
358 | unsigned int next; | |
359 | ||
360 | /* If this descriptor says it doesn't chain, we're done. */ | |
361 | if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT)) | |
362 | return vq->vring.num; | |
363 | ||
364 | /* Check they're not leading us off end of descriptors. */ | |
365 | next = vring_desc_next(vq, i); | |
366 | /* Make sure compiler knows to grab that: we don't want it changing! */ | |
367 | wmb(); | |
368 | ||
369 | if (next >= vq->vring.num) | |
370 | errx(1, "Desc next is %u", next); | |
371 | ||
372 | return next; | |
373 | } | |
374 | ||
375 | int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) | |
376 | { | |
377 | unsigned int idx; | |
378 | int num_bufs, in_total, out_total; | |
379 | ||
380 | idx = vq->last_avail_idx; | |
381 | ||
382 | num_bufs = in_total = out_total = 0; | |
383 | while (virtqueue_num_heads(vq, idx)) { | |
384 | int i; | |
385 | ||
386 | i = virtqueue_get_head(vq, idx++); | |
387 | do { | |
388 | /* If we've got too many, that implies a descriptor loop. */ | |
389 | if (++num_bufs > vq->vring.num) | |
390 | errx(1, "Looped descriptor"); | |
391 | ||
392 | if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { | |
393 | if (in_bytes > 0 && | |
394 | (in_total += vring_desc_len(vq, i)) >= in_bytes) | |
395 | return 1; | |
396 | } else { | |
397 | if (out_bytes > 0 && | |
398 | (out_total += vring_desc_len(vq, i)) >= out_bytes) | |
399 | return 1; | |
400 | } | |
401 | } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); | |
402 | } | |
403 | ||
404 | return 0; | |
405 | } | |
406 | ||
407 | int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) | |
408 | { | |
409 | unsigned int i, head; | |
410 | ||
411 | if (!virtqueue_num_heads(vq, vq->last_avail_idx)) | |
412 | return 0; | |
413 | ||
414 | /* When we start there are none of either input nor output. */ | |
415 | elem->out_num = elem->in_num = 0; | |
416 | ||
417 | i = head = virtqueue_get_head(vq, vq->last_avail_idx++); | |
418 | do { | |
419 | struct iovec *sg; | |
420 | ||
421 | if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { | |
422 | elem->in_addr[elem->in_num] = vring_desc_addr(vq, i); | |
423 | sg = &elem->in_sg[elem->in_num++]; | |
424 | } else | |
425 | sg = &elem->out_sg[elem->out_num++]; | |
426 | ||
427 | /* Grab the first descriptor, and check it's OK. */ | |
428 | sg->iov_len = vring_desc_len(vq, i); | |
429 | ||
430 | #ifdef VIRTIO_ZERO_COPY | |
431 | sg->iov_base = virtio_map_gpa(vring_desc_addr(vq, i), sg->iov_len); | |
432 | #else | |
433 | /* cap individual scatter element size to prevent unbounded allocations | |
434 | of memory from the guest. Practically speaking, no virtio driver | |
435 | will ever pass more than a page in each element. We set the cap to | |
436 | be 2MB in case for some reason a large page makes it way into the | |
437 | sg list. When we implement a zero copy API, this limitation will | |
438 | disappear */ | |
439 | if (sg->iov_len > (2 << 20)) | |
440 | sg->iov_len = 2 << 20; | |
441 | ||
442 | sg->iov_base = qemu_malloc(sg->iov_len); | |
443 | if (sg->iov_base && | |
444 | !(vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)) { | |
445 | cpu_physical_memory_read(vring_desc_addr(vq, i), | |
446 | sg->iov_base, | |
447 | sg->iov_len); | |
448 | } | |
449 | #endif | |
450 | if (sg->iov_base == NULL) | |
451 | errx(1, "Invalid mapping\n"); | |
452 | ||
453 | /* If we've got too many, that implies a descriptor loop. */ | |
454 | if ((elem->in_num + elem->out_num) > vq->vring.num) | |
455 | errx(1, "Looped descriptor"); | |
456 | } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); | |
457 | ||
458 | elem->index = head; | |
459 | ||
460 | vq->inuse++; | |
461 | ||
462 | return elem->in_num + elem->out_num; | |
463 | } | |
464 | ||
465 | /* virtio device */ | |
466 | ||
467 | static VirtIODevice *to_virtio_device(PCIDevice *pci_dev) | |
468 | { | |
469 | return (VirtIODevice *)pci_dev; | |
470 | } | |
471 | ||
472 | static void virtio_update_irq(VirtIODevice *vdev) | |
473 | { | |
474 | qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1); | |
475 | } | |
476 | ||
477 | void virtio_reset(void *opaque) | |
478 | { | |
479 | VirtIODevice *vdev = opaque; | |
480 | int i; | |
481 | ||
482 | if (vdev->reset) | |
483 | vdev->reset(vdev); | |
484 | ||
485 | vdev->features = 0; | |
486 | vdev->queue_sel = 0; | |
487 | vdev->status = 0; | |
488 | vdev->isr = 0; | |
489 | virtio_update_irq(vdev); | |
490 | ||
491 | for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
492 | vdev->vq[i].vring.desc = 0; | |
493 | vdev->vq[i].vring.avail = 0; | |
494 | vdev->vq[i].vring.used = 0; | |
495 | vdev->vq[i].last_avail_idx = 0; | |
496 | vdev->vq[i].pfn = 0; | |
497 | } | |
498 | } | |
499 | ||
500 | static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) | |
501 | { | |
502 | VirtIODevice *vdev = to_virtio_device(opaque); | |
503 | ram_addr_t pa; | |
504 | ||
505 | addr -= vdev->addr; | |
506 | ||
507 | switch (addr) { | |
508 | case VIRTIO_PCI_GUEST_FEATURES: | |
509 | if (vdev->set_features) | |
510 | vdev->set_features(vdev, val); | |
511 | vdev->features = val; | |
512 | break; | |
513 | case VIRTIO_PCI_QUEUE_PFN: | |
f46f15bc | 514 | pa = (ram_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT; |
967f97fa AL |
515 | vdev->vq[vdev->queue_sel].pfn = val; |
516 | if (pa == 0) { | |
517 | virtio_reset(vdev); | |
518 | } else { | |
519 | virtqueue_init(&vdev->vq[vdev->queue_sel], pa); | |
520 | } | |
521 | break; | |
522 | case VIRTIO_PCI_QUEUE_SEL: | |
523 | if (val < VIRTIO_PCI_QUEUE_MAX) | |
524 | vdev->queue_sel = val; | |
525 | break; | |
526 | case VIRTIO_PCI_QUEUE_NOTIFY: | |
527 | if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc) | |
528 | vdev->vq[val].handle_output(vdev, &vdev->vq[val]); | |
529 | break; | |
530 | case VIRTIO_PCI_STATUS: | |
531 | vdev->status = val & 0xFF; | |
532 | if (vdev->status == 0) | |
533 | virtio_reset(vdev); | |
534 | break; | |
535 | } | |
536 | } | |
537 | ||
538 | static uint32_t virtio_ioport_read(void *opaque, uint32_t addr) | |
539 | { | |
540 | VirtIODevice *vdev = to_virtio_device(opaque); | |
541 | uint32_t ret = 0xFFFFFFFF; | |
542 | ||
543 | addr -= vdev->addr; | |
544 | ||
545 | switch (addr) { | |
546 | case VIRTIO_PCI_HOST_FEATURES: | |
547 | ret = vdev->get_features(vdev); | |
548 | ret |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY); | |
549 | break; | |
550 | case VIRTIO_PCI_GUEST_FEATURES: | |
551 | ret = vdev->features; | |
552 | break; | |
553 | case VIRTIO_PCI_QUEUE_PFN: | |
554 | ret = vdev->vq[vdev->queue_sel].pfn; | |
555 | break; | |
556 | case VIRTIO_PCI_QUEUE_NUM: | |
557 | ret = vdev->vq[vdev->queue_sel].vring.num; | |
558 | break; | |
559 | case VIRTIO_PCI_QUEUE_SEL: | |
560 | ret = vdev->queue_sel; | |
561 | break; | |
562 | case VIRTIO_PCI_STATUS: | |
563 | ret = vdev->status; | |
564 | break; | |
565 | case VIRTIO_PCI_ISR: | |
566 | /* reading from the ISR also clears it. */ | |
567 | ret = vdev->isr; | |
568 | vdev->isr = 0; | |
569 | virtio_update_irq(vdev); | |
570 | break; | |
571 | default: | |
572 | break; | |
573 | } | |
574 | ||
575 | return ret; | |
576 | } | |
577 | ||
578 | static uint32_t virtio_config_readb(void *opaque, uint32_t addr) | |
579 | { | |
580 | VirtIODevice *vdev = opaque; | |
581 | uint8_t val; | |
582 | ||
583 | vdev->get_config(vdev, vdev->config); | |
584 | ||
585 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
586 | if (addr > (vdev->config_len - sizeof(val))) | |
587 | return (uint32_t)-1; | |
588 | ||
589 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
590 | return val; | |
591 | } | |
592 | ||
593 | static uint32_t virtio_config_readw(void *opaque, uint32_t addr) | |
594 | { | |
595 | VirtIODevice *vdev = opaque; | |
596 | uint16_t val; | |
597 | ||
598 | vdev->get_config(vdev, vdev->config); | |
599 | ||
600 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
601 | if (addr > (vdev->config_len - sizeof(val))) | |
602 | return (uint32_t)-1; | |
603 | ||
604 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
605 | return val; | |
606 | } | |
607 | ||
608 | static uint32_t virtio_config_readl(void *opaque, uint32_t addr) | |
609 | { | |
610 | VirtIODevice *vdev = opaque; | |
611 | uint32_t val; | |
612 | ||
613 | vdev->get_config(vdev, vdev->config); | |
614 | ||
615 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
616 | if (addr > (vdev->config_len - sizeof(val))) | |
617 | return (uint32_t)-1; | |
618 | ||
619 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
620 | return val; | |
621 | } | |
622 | ||
623 | static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data) | |
624 | { | |
625 | VirtIODevice *vdev = opaque; | |
626 | uint8_t val = data; | |
627 | ||
628 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
629 | if (addr > (vdev->config_len - sizeof(val))) | |
630 | return; | |
631 | ||
632 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
633 | ||
634 | if (vdev->set_config) | |
635 | vdev->set_config(vdev, vdev->config); | |
636 | } | |
637 | ||
638 | static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data) | |
639 | { | |
640 | VirtIODevice *vdev = opaque; | |
641 | uint16_t val = data; | |
642 | ||
643 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
644 | if (addr > (vdev->config_len - sizeof(val))) | |
645 | return; | |
646 | ||
647 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
648 | ||
649 | if (vdev->set_config) | |
650 | vdev->set_config(vdev, vdev->config); | |
651 | } | |
652 | ||
653 | static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data) | |
654 | { | |
655 | VirtIODevice *vdev = opaque; | |
656 | uint32_t val = data; | |
657 | ||
658 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
659 | if (addr > (vdev->config_len - sizeof(val))) | |
660 | return; | |
661 | ||
662 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
663 | ||
664 | if (vdev->set_config) | |
665 | vdev->set_config(vdev, vdev->config); | |
666 | } | |
667 | ||
668 | static void virtio_map(PCIDevice *pci_dev, int region_num, | |
669 | uint32_t addr, uint32_t size, int type) | |
670 | { | |
671 | VirtIODevice *vdev = to_virtio_device(pci_dev); | |
672 | int i; | |
673 | ||
674 | vdev->addr = addr; | |
675 | for (i = 0; i < 3; i++) { | |
676 | register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev); | |
677 | register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev); | |
678 | } | |
679 | ||
680 | if (vdev->config_len) { | |
681 | register_ioport_write(addr + 20, vdev->config_len, 1, | |
682 | virtio_config_writeb, vdev); | |
683 | register_ioport_write(addr + 20, vdev->config_len, 2, | |
684 | virtio_config_writew, vdev); | |
685 | register_ioport_write(addr + 20, vdev->config_len, 4, | |
686 | virtio_config_writel, vdev); | |
687 | register_ioport_read(addr + 20, vdev->config_len, 1, | |
688 | virtio_config_readb, vdev); | |
689 | register_ioport_read(addr + 20, vdev->config_len, 2, | |
690 | virtio_config_readw, vdev); | |
691 | register_ioport_read(addr + 20, vdev->config_len, 4, | |
692 | virtio_config_readl, vdev); | |
693 | ||
694 | vdev->get_config(vdev, vdev->config); | |
695 | } | |
696 | } | |
697 | ||
698 | VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, | |
699 | void (*handle_output)(VirtIODevice *, VirtQueue *)) | |
700 | { | |
701 | int i; | |
702 | ||
703 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
704 | if (vdev->vq[i].vring.num == 0) | |
705 | break; | |
706 | } | |
707 | ||
708 | if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) | |
709 | abort(); | |
710 | ||
711 | vdev->vq[i].vring.num = queue_size; | |
712 | vdev->vq[i].handle_output = handle_output; | |
713 | ||
714 | return &vdev->vq[i]; | |
715 | } | |
716 | ||
717 | void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) | |
718 | { | |
719 | /* Always notify when queue is empty */ | |
720 | if ((vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx) && | |
721 | (vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT)) | |
722 | return; | |
723 | ||
724 | vdev->isr |= 0x01; | |
725 | virtio_update_irq(vdev); | |
726 | } | |
727 | ||
728 | void virtio_notify_config(VirtIODevice *vdev) | |
729 | { | |
730 | vdev->isr |= 0x03; | |
731 | virtio_update_irq(vdev); | |
732 | } | |
733 | ||
734 | void virtio_save(VirtIODevice *vdev, QEMUFile *f) | |
735 | { | |
736 | int i; | |
737 | ||
738 | pci_device_save(&vdev->pci_dev, f); | |
739 | ||
740 | qemu_put_be32s(f, &vdev->addr); | |
741 | qemu_put_8s(f, &vdev->status); | |
742 | qemu_put_8s(f, &vdev->isr); | |
743 | qemu_put_be16s(f, &vdev->queue_sel); | |
744 | qemu_put_be32s(f, &vdev->features); | |
745 | qemu_put_be32(f, vdev->config_len); | |
746 | qemu_put_buffer(f, vdev->config, vdev->config_len); | |
747 | ||
748 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
749 | if (vdev->vq[i].vring.num == 0) | |
750 | break; | |
751 | } | |
752 | ||
753 | qemu_put_be32(f, i); | |
754 | ||
755 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
756 | if (vdev->vq[i].vring.num == 0) | |
757 | break; | |
758 | ||
759 | qemu_put_be32(f, vdev->vq[i].vring.num); | |
760 | qemu_put_be32s(f, &vdev->vq[i].pfn); | |
761 | qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); | |
762 | } | |
763 | } | |
764 | ||
765 | void virtio_load(VirtIODevice *vdev, QEMUFile *f) | |
766 | { | |
767 | int num, i; | |
768 | ||
769 | pci_device_load(&vdev->pci_dev, f); | |
770 | ||
771 | qemu_get_be32s(f, &vdev->addr); | |
772 | qemu_get_8s(f, &vdev->status); | |
773 | qemu_get_8s(f, &vdev->isr); | |
774 | qemu_get_be16s(f, &vdev->queue_sel); | |
775 | qemu_get_be32s(f, &vdev->features); | |
776 | vdev->config_len = qemu_get_be32(f); | |
777 | qemu_get_buffer(f, vdev->config, vdev->config_len); | |
778 | ||
779 | num = qemu_get_be32(f); | |
780 | ||
781 | for (i = 0; i < num; i++) { | |
782 | vdev->vq[i].vring.num = qemu_get_be32(f); | |
783 | qemu_get_be32s(f, &vdev->vq[i].pfn); | |
784 | qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); | |
785 | ||
786 | if (vdev->vq[i].pfn) { | |
787 | target_phys_addr_t pa; | |
788 | ||
f46f15bc | 789 | pa = (ram_addr_t)vdev->vq[i].pfn << VIRTIO_PCI_QUEUE_ADDR_SHIFT; |
967f97fa AL |
790 | virtqueue_init(&vdev->vq[i], pa); |
791 | } | |
792 | } | |
793 | ||
794 | virtio_update_irq(vdev); | |
795 | } | |
796 | ||
797 | VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name, | |
798 | uint16_t vendor, uint16_t device, | |
799 | uint16_t subvendor, uint16_t subdevice, | |
800 | uint8_t class_code, uint8_t subclass_code, | |
801 | uint8_t pif, size_t config_size, | |
802 | size_t struct_size) | |
803 | { | |
804 | VirtIODevice *vdev; | |
805 | PCIDevice *pci_dev; | |
806 | uint8_t *config; | |
807 | uint32_t size; | |
808 | ||
809 | pci_dev = pci_register_device(bus, name, struct_size, | |
810 | -1, NULL, NULL); | |
811 | if (!pci_dev) | |
812 | return NULL; | |
813 | ||
814 | vdev = to_virtio_device(pci_dev); | |
815 | ||
816 | vdev->status = 0; | |
817 | vdev->isr = 0; | |
818 | vdev->queue_sel = 0; | |
819 | vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); | |
820 | ||
821 | config = pci_dev->config; | |
822 | config[0x00] = vendor & 0xFF; | |
823 | config[0x01] = (vendor >> 8) & 0xFF; | |
824 | config[0x02] = device & 0xFF; | |
825 | config[0x03] = (device >> 8) & 0xFF; | |
826 | ||
827 | config[0x08] = VIRTIO_PCI_ABI_VERSION; | |
828 | ||
829 | config[0x09] = pif; | |
830 | config[0x0a] = subclass_code; | |
831 | config[0x0b] = class_code; | |
832 | config[0x0e] = 0x00; | |
833 | ||
834 | config[0x2c] = subvendor & 0xFF; | |
835 | config[0x2d] = (subvendor >> 8) & 0xFF; | |
836 | config[0x2e] = subdevice & 0xFF; | |
837 | config[0x2f] = (subdevice >> 8) & 0xFF; | |
838 | ||
839 | config[0x3d] = 1; | |
840 | ||
841 | vdev->name = name; | |
842 | vdev->config_len = config_size; | |
843 | if (vdev->config_len) | |
844 | vdev->config = qemu_mallocz(config_size); | |
845 | else | |
846 | vdev->config = NULL; | |
847 | ||
848 | size = 20 + config_size; | |
849 | if (size & (size-1)) | |
850 | size = 1 << fls(size); | |
851 | ||
852 | pci_register_io_region(pci_dev, 0, size, PCI_ADDRESS_SPACE_IO, | |
853 | virtio_map); | |
854 | qemu_register_reset(virtio_reset, vdev); | |
855 | ||
856 | return vdev; | |
857 | } |