]>
Commit | Line | Data |
---|---|---|
967f97fa AL |
1 | /* |
2 | * Virtio Support | |
3 | * | |
4 | * Copyright IBM, Corp. 2007 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <aliguori@us.ibm.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include <inttypes.h> | |
15 | #include <err.h> | |
16 | ||
17 | #include "virtio.h" | |
18 | #include "sysemu.h" | |
19 | ||
20 | //#define VIRTIO_ZERO_COPY | |
21 | ||
22 | /* from Linux's linux/virtio_pci.h */ | |
23 | ||
24 | /* A 32-bit r/o bitmask of the features supported by the host */ | |
25 | #define VIRTIO_PCI_HOST_FEATURES 0 | |
26 | ||
27 | /* A 32-bit r/w bitmask of features activated by the guest */ | |
28 | #define VIRTIO_PCI_GUEST_FEATURES 4 | |
29 | ||
30 | /* A 32-bit r/w PFN for the currently selected queue */ | |
31 | #define VIRTIO_PCI_QUEUE_PFN 8 | |
32 | ||
33 | /* A 16-bit r/o queue size for the currently selected queue */ | |
34 | #define VIRTIO_PCI_QUEUE_NUM 12 | |
35 | ||
36 | /* A 16-bit r/w queue selector */ | |
37 | #define VIRTIO_PCI_QUEUE_SEL 14 | |
38 | ||
39 | /* A 16-bit r/w queue notifier */ | |
40 | #define VIRTIO_PCI_QUEUE_NOTIFY 16 | |
41 | ||
42 | /* An 8-bit device status register. */ | |
43 | #define VIRTIO_PCI_STATUS 18 | |
44 | ||
45 | /* An 8-bit r/o interrupt status register. Reading the value will return the | |
46 | * current contents of the ISR and will also clear it. This is effectively | |
47 | * a read-and-acknowledge. */ | |
48 | #define VIRTIO_PCI_ISR 19 | |
49 | ||
50 | #define VIRTIO_PCI_CONFIG 20 | |
51 | ||
52 | /* Virtio ABI version, if we increment this, we break the guest driver. */ | |
53 | #define VIRTIO_PCI_ABI_VERSION 0 | |
54 | ||
55 | /* QEMU doesn't strictly need write barriers since everything runs in | |
56 | * lock-step. We'll leave the calls to wmb() in though to make it obvious for | |
57 | * KVM or if kqemu gets SMP support. | |
58 | */ | |
59 | #define wmb() do { } while (0) | |
60 | ||
61 | typedef struct VRingDesc | |
62 | { | |
63 | uint64_t addr; | |
64 | uint32_t len; | |
65 | uint16_t flags; | |
66 | uint16_t next; | |
67 | } VRingDesc; | |
68 | ||
69 | typedef struct VRingAvail | |
70 | { | |
71 | uint16_t flags; | |
72 | uint16_t idx; | |
73 | uint16_t ring[0]; | |
74 | } VRingAvail; | |
75 | ||
76 | typedef struct VRingUsedElem | |
77 | { | |
78 | uint32_t id; | |
79 | uint32_t len; | |
80 | } VRingUsedElem; | |
81 | ||
82 | typedef struct VRingUsed | |
83 | { | |
84 | uint16_t flags; | |
85 | uint16_t idx; | |
86 | VRingUsedElem ring[0]; | |
87 | } VRingUsed; | |
88 | ||
89 | typedef struct VRing | |
90 | { | |
91 | unsigned int num; | |
92 | target_phys_addr_t desc; | |
93 | target_phys_addr_t avail; | |
94 | target_phys_addr_t used; | |
95 | } VRing; | |
96 | ||
97 | struct VirtQueue | |
98 | { | |
99 | VRing vring; | |
100 | uint32_t pfn; | |
101 | uint16_t last_avail_idx; | |
102 | int inuse; | |
103 | void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); | |
104 | }; | |
105 | ||
106 | #define VIRTIO_PCI_QUEUE_MAX 16 | |
107 | ||
108 | /* virt queue functions */ | |
109 | #ifdef VIRTIO_ZERO_COPY | |
110 | static void *virtio_map_gpa(target_phys_addr_t addr, size_t size) | |
111 | { | |
112 | ram_addr_t off; | |
113 | target_phys_addr_t addr1; | |
114 | ||
115 | off = cpu_get_physical_page_desc(addr); | |
116 | if ((off & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { | |
117 | fprintf(stderr, "virtio DMA to IO ram\n"); | |
118 | exit(1); | |
119 | } | |
120 | ||
121 | off = (off & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK); | |
122 | ||
123 | for (addr1 = addr + TARGET_PAGE_SIZE; | |
124 | addr1 < TARGET_PAGE_ALIGN(addr + size); | |
125 | addr1 += TARGET_PAGE_SIZE) { | |
126 | ram_addr_t off1; | |
127 | ||
128 | off1 = cpu_get_physical_page_desc(addr1); | |
129 | if ((off1 & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { | |
130 | fprintf(stderr, "virtio DMA to IO ram\n"); | |
131 | exit(1); | |
132 | } | |
133 | ||
134 | off1 = (off1 & TARGET_PAGE_MASK) | (addr1 & ~TARGET_PAGE_MASK); | |
135 | ||
136 | if (off1 != (off + (addr1 - addr))) { | |
137 | fprintf(stderr, "discontigous virtio memory\n"); | |
138 | exit(1); | |
139 | } | |
140 | } | |
141 | ||
142 | return phys_ram_base + off; | |
143 | } | |
144 | #endif | |
145 | ||
146 | static void virtqueue_init(VirtQueue *vq, target_phys_addr_t pa) | |
147 | { | |
148 | vq->vring.desc = pa; | |
149 | vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc); | |
150 | vq->vring.used = TARGET_PAGE_ALIGN(vq->vring.avail + offsetof(VRingAvail, ring[vq->vring.num])); | |
151 | } | |
152 | ||
153 | static inline uint64_t vring_desc_addr(VirtQueue *vq, int i) | |
154 | { | |
155 | target_phys_addr_t pa; | |
156 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); | |
157 | return ldq_phys(pa); | |
158 | } | |
159 | ||
160 | static inline uint32_t vring_desc_len(VirtQueue *vq, int i) | |
161 | { | |
162 | target_phys_addr_t pa; | |
163 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); | |
164 | return ldl_phys(pa); | |
165 | } | |
166 | ||
167 | static inline uint16_t vring_desc_flags(VirtQueue *vq, int i) | |
168 | { | |
169 | target_phys_addr_t pa; | |
170 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); | |
171 | return lduw_phys(pa); | |
172 | } | |
173 | ||
174 | static inline uint16_t vring_desc_next(VirtQueue *vq, int i) | |
175 | { | |
176 | target_phys_addr_t pa; | |
177 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); | |
178 | return lduw_phys(pa); | |
179 | } | |
180 | ||
181 | static inline uint16_t vring_avail_flags(VirtQueue *vq) | |
182 | { | |
183 | target_phys_addr_t pa; | |
184 | pa = vq->vring.avail + offsetof(VRingAvail, flags); | |
185 | return lduw_phys(pa); | |
186 | } | |
187 | ||
188 | static inline uint16_t vring_avail_idx(VirtQueue *vq) | |
189 | { | |
190 | target_phys_addr_t pa; | |
191 | pa = vq->vring.avail + offsetof(VRingAvail, idx); | |
192 | return lduw_phys(pa); | |
193 | } | |
194 | ||
195 | static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) | |
196 | { | |
197 | target_phys_addr_t pa; | |
198 | pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); | |
199 | return lduw_phys(pa); | |
200 | } | |
201 | ||
202 | static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val) | |
203 | { | |
204 | target_phys_addr_t pa; | |
205 | pa = vq->vring.used + offsetof(VRingUsed, ring[i].id); | |
206 | stl_phys(pa, val); | |
207 | } | |
208 | ||
209 | static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val) | |
210 | { | |
211 | target_phys_addr_t pa; | |
212 | pa = vq->vring.used + offsetof(VRingUsed, ring[i].len); | |
213 | stl_phys(pa, val); | |
214 | } | |
215 | ||
216 | static uint16_t vring_used_idx(VirtQueue *vq) | |
217 | { | |
218 | target_phys_addr_t pa; | |
219 | pa = vq->vring.used + offsetof(VRingUsed, idx); | |
220 | return lduw_phys(pa); | |
221 | } | |
222 | ||
223 | static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val) | |
224 | { | |
225 | target_phys_addr_t pa; | |
226 | pa = vq->vring.used + offsetof(VRingUsed, idx); | |
227 | stw_phys(pa, vring_used_idx(vq) + val); | |
228 | } | |
229 | ||
230 | static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) | |
231 | { | |
232 | target_phys_addr_t pa; | |
233 | pa = vq->vring.used + offsetof(VRingUsed, flags); | |
234 | stw_phys(pa, lduw_phys(pa) | mask); | |
235 | } | |
236 | ||
237 | static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) | |
238 | { | |
239 | target_phys_addr_t pa; | |
240 | pa = vq->vring.used + offsetof(VRingUsed, flags); | |
241 | stw_phys(pa, lduw_phys(pa) & ~mask); | |
242 | } | |
243 | ||
244 | void virtio_queue_set_notification(VirtQueue *vq, int enable) | |
245 | { | |
246 | if (enable) | |
247 | vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); | |
248 | else | |
249 | vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); | |
250 | } | |
251 | ||
252 | int virtio_queue_ready(VirtQueue *vq) | |
253 | { | |
254 | return vq->vring.avail != 0; | |
255 | } | |
256 | ||
257 | int virtio_queue_empty(VirtQueue *vq) | |
258 | { | |
259 | return vring_avail_idx(vq) == vq->last_avail_idx; | |
260 | } | |
261 | ||
262 | void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, | |
263 | unsigned int len, unsigned int idx) | |
264 | { | |
265 | unsigned int offset; | |
266 | int i; | |
267 | ||
268 | #ifndef VIRTIO_ZERO_COPY | |
269 | for (i = 0; i < elem->out_num; i++) | |
270 | qemu_free(elem->out_sg[i].iov_base); | |
271 | #endif | |
272 | ||
273 | offset = 0; | |
274 | for (i = 0; i < elem->in_num; i++) { | |
275 | size_t size = MIN(len - offset, elem->in_sg[i].iov_len); | |
276 | ||
277 | #ifdef VIRTIO_ZERO_COPY | |
278 | if (size) { | |
279 | ram_addr_t addr = (uint8_t *)elem->in_sg[i].iov_base - phys_ram_base; | |
280 | ram_addr_t off; | |
281 | ||
282 | for (off = 0; off < size; off += TARGET_PAGE_SIZE) | |
283 | cpu_physical_memory_set_dirty(addr + off); | |
284 | } | |
285 | #else | |
286 | if (size) | |
287 | cpu_physical_memory_write(elem->in_addr[i], | |
288 | elem->in_sg[i].iov_base, | |
289 | size); | |
290 | ||
291 | qemu_free(elem->in_sg[i].iov_base); | |
292 | #endif | |
293 | ||
294 | offset += size; | |
295 | } | |
296 | ||
297 | idx = (idx + vring_used_idx(vq)) % vq->vring.num; | |
298 | ||
299 | /* Get a pointer to the next entry in the used ring. */ | |
300 | vring_used_ring_id(vq, idx, elem->index); | |
301 | vring_used_ring_len(vq, idx, len); | |
302 | } | |
303 | ||
304 | void virtqueue_flush(VirtQueue *vq, unsigned int count) | |
305 | { | |
306 | /* Make sure buffer is written before we update index. */ | |
307 | wmb(); | |
308 | vring_used_idx_increment(vq, count); | |
309 | vq->inuse -= count; | |
310 | } | |
311 | ||
312 | void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, | |
313 | unsigned int len) | |
314 | { | |
315 | virtqueue_fill(vq, elem, len, 0); | |
316 | virtqueue_flush(vq, 1); | |
317 | } | |
318 | ||
319 | static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) | |
320 | { | |
321 | uint16_t num_heads = vring_avail_idx(vq) - idx; | |
322 | ||
323 | /* Check it isn't doing very strange things with descriptor numbers. */ | |
324 | if (num_heads > vq->vring.num) | |
325 | errx(1, "Guest moved used index from %u to %u", | |
326 | idx, vring_avail_idx(vq)); | |
327 | ||
328 | return num_heads; | |
329 | } | |
330 | ||
331 | static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) | |
332 | { | |
333 | unsigned int head; | |
334 | ||
335 | /* Grab the next descriptor number they're advertising, and increment | |
336 | * the index we've seen. */ | |
337 | head = vring_avail_ring(vq, idx % vq->vring.num); | |
338 | ||
339 | /* If their number is silly, that's a fatal mistake. */ | |
340 | if (head >= vq->vring.num) | |
341 | errx(1, "Guest says index %u is available", head); | |
342 | ||
343 | return head; | |
344 | } | |
345 | ||
346 | static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) | |
347 | { | |
348 | unsigned int next; | |
349 | ||
350 | /* If this descriptor says it doesn't chain, we're done. */ | |
351 | if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT)) | |
352 | return vq->vring.num; | |
353 | ||
354 | /* Check they're not leading us off end of descriptors. */ | |
355 | next = vring_desc_next(vq, i); | |
356 | /* Make sure compiler knows to grab that: we don't want it changing! */ | |
357 | wmb(); | |
358 | ||
359 | if (next >= vq->vring.num) | |
360 | errx(1, "Desc next is %u", next); | |
361 | ||
362 | return next; | |
363 | } | |
364 | ||
365 | int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) | |
366 | { | |
367 | unsigned int idx; | |
368 | int num_bufs, in_total, out_total; | |
369 | ||
370 | idx = vq->last_avail_idx; | |
371 | ||
372 | num_bufs = in_total = out_total = 0; | |
373 | while (virtqueue_num_heads(vq, idx)) { | |
374 | int i; | |
375 | ||
376 | i = virtqueue_get_head(vq, idx++); | |
377 | do { | |
378 | /* If we've got too many, that implies a descriptor loop. */ | |
379 | if (++num_bufs > vq->vring.num) | |
380 | errx(1, "Looped descriptor"); | |
381 | ||
382 | if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { | |
383 | if (in_bytes > 0 && | |
384 | (in_total += vring_desc_len(vq, i)) >= in_bytes) | |
385 | return 1; | |
386 | } else { | |
387 | if (out_bytes > 0 && | |
388 | (out_total += vring_desc_len(vq, i)) >= out_bytes) | |
389 | return 1; | |
390 | } | |
391 | } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); | |
392 | } | |
393 | ||
394 | return 0; | |
395 | } | |
396 | ||
397 | int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) | |
398 | { | |
399 | unsigned int i, head; | |
400 | ||
401 | if (!virtqueue_num_heads(vq, vq->last_avail_idx)) | |
402 | return 0; | |
403 | ||
404 | /* When we start there are none of either input nor output. */ | |
405 | elem->out_num = elem->in_num = 0; | |
406 | ||
407 | i = head = virtqueue_get_head(vq, vq->last_avail_idx++); | |
408 | do { | |
409 | struct iovec *sg; | |
410 | ||
411 | if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { | |
412 | elem->in_addr[elem->in_num] = vring_desc_addr(vq, i); | |
413 | sg = &elem->in_sg[elem->in_num++]; | |
414 | } else | |
415 | sg = &elem->out_sg[elem->out_num++]; | |
416 | ||
417 | /* Grab the first descriptor, and check it's OK. */ | |
418 | sg->iov_len = vring_desc_len(vq, i); | |
419 | ||
420 | #ifdef VIRTIO_ZERO_COPY | |
421 | sg->iov_base = virtio_map_gpa(vring_desc_addr(vq, i), sg->iov_len); | |
422 | #else | |
423 | /* cap individual scatter element size to prevent unbounded allocations | |
424 | of memory from the guest. Practically speaking, no virtio driver | |
425 | will ever pass more than a page in each element. We set the cap to | |
426 | be 2MB in case for some reason a large page makes it way into the | |
427 | sg list. When we implement a zero copy API, this limitation will | |
428 | disappear */ | |
429 | if (sg->iov_len > (2 << 20)) | |
430 | sg->iov_len = 2 << 20; | |
431 | ||
432 | sg->iov_base = qemu_malloc(sg->iov_len); | |
433 | if (sg->iov_base && | |
434 | !(vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)) { | |
435 | cpu_physical_memory_read(vring_desc_addr(vq, i), | |
436 | sg->iov_base, | |
437 | sg->iov_len); | |
438 | } | |
439 | #endif | |
440 | if (sg->iov_base == NULL) | |
441 | errx(1, "Invalid mapping\n"); | |
442 | ||
443 | /* If we've got too many, that implies a descriptor loop. */ | |
444 | if ((elem->in_num + elem->out_num) > vq->vring.num) | |
445 | errx(1, "Looped descriptor"); | |
446 | } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); | |
447 | ||
448 | elem->index = head; | |
449 | ||
450 | vq->inuse++; | |
451 | ||
452 | return elem->in_num + elem->out_num; | |
453 | } | |
454 | ||
455 | /* virtio device */ | |
456 | ||
457 | static VirtIODevice *to_virtio_device(PCIDevice *pci_dev) | |
458 | { | |
459 | return (VirtIODevice *)pci_dev; | |
460 | } | |
461 | ||
462 | static void virtio_update_irq(VirtIODevice *vdev) | |
463 | { | |
464 | qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1); | |
465 | } | |
466 | ||
467 | void virtio_reset(void *opaque) | |
468 | { | |
469 | VirtIODevice *vdev = opaque; | |
470 | int i; | |
471 | ||
472 | if (vdev->reset) | |
473 | vdev->reset(vdev); | |
474 | ||
475 | vdev->features = 0; | |
476 | vdev->queue_sel = 0; | |
477 | vdev->status = 0; | |
478 | vdev->isr = 0; | |
479 | virtio_update_irq(vdev); | |
480 | ||
481 | for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
482 | vdev->vq[i].vring.desc = 0; | |
483 | vdev->vq[i].vring.avail = 0; | |
484 | vdev->vq[i].vring.used = 0; | |
485 | vdev->vq[i].last_avail_idx = 0; | |
486 | vdev->vq[i].pfn = 0; | |
487 | } | |
488 | } | |
489 | ||
490 | static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) | |
491 | { | |
492 | VirtIODevice *vdev = to_virtio_device(opaque); | |
493 | ram_addr_t pa; | |
494 | ||
495 | addr -= vdev->addr; | |
496 | ||
497 | switch (addr) { | |
498 | case VIRTIO_PCI_GUEST_FEATURES: | |
499 | if (vdev->set_features) | |
500 | vdev->set_features(vdev, val); | |
501 | vdev->features = val; | |
502 | break; | |
503 | case VIRTIO_PCI_QUEUE_PFN: | |
504 | pa = (ram_addr_t)val << TARGET_PAGE_BITS; | |
505 | vdev->vq[vdev->queue_sel].pfn = val; | |
506 | if (pa == 0) { | |
507 | virtio_reset(vdev); | |
508 | } else { | |
509 | virtqueue_init(&vdev->vq[vdev->queue_sel], pa); | |
510 | } | |
511 | break; | |
512 | case VIRTIO_PCI_QUEUE_SEL: | |
513 | if (val < VIRTIO_PCI_QUEUE_MAX) | |
514 | vdev->queue_sel = val; | |
515 | break; | |
516 | case VIRTIO_PCI_QUEUE_NOTIFY: | |
517 | if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc) | |
518 | vdev->vq[val].handle_output(vdev, &vdev->vq[val]); | |
519 | break; | |
520 | case VIRTIO_PCI_STATUS: | |
521 | vdev->status = val & 0xFF; | |
522 | if (vdev->status == 0) | |
523 | virtio_reset(vdev); | |
524 | break; | |
525 | } | |
526 | } | |
527 | ||
528 | static uint32_t virtio_ioport_read(void *opaque, uint32_t addr) | |
529 | { | |
530 | VirtIODevice *vdev = to_virtio_device(opaque); | |
531 | uint32_t ret = 0xFFFFFFFF; | |
532 | ||
533 | addr -= vdev->addr; | |
534 | ||
535 | switch (addr) { | |
536 | case VIRTIO_PCI_HOST_FEATURES: | |
537 | ret = vdev->get_features(vdev); | |
538 | ret |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY); | |
539 | break; | |
540 | case VIRTIO_PCI_GUEST_FEATURES: | |
541 | ret = vdev->features; | |
542 | break; | |
543 | case VIRTIO_PCI_QUEUE_PFN: | |
544 | ret = vdev->vq[vdev->queue_sel].pfn; | |
545 | break; | |
546 | case VIRTIO_PCI_QUEUE_NUM: | |
547 | ret = vdev->vq[vdev->queue_sel].vring.num; | |
548 | break; | |
549 | case VIRTIO_PCI_QUEUE_SEL: | |
550 | ret = vdev->queue_sel; | |
551 | break; | |
552 | case VIRTIO_PCI_STATUS: | |
553 | ret = vdev->status; | |
554 | break; | |
555 | case VIRTIO_PCI_ISR: | |
556 | /* reading from the ISR also clears it. */ | |
557 | ret = vdev->isr; | |
558 | vdev->isr = 0; | |
559 | virtio_update_irq(vdev); | |
560 | break; | |
561 | default: | |
562 | break; | |
563 | } | |
564 | ||
565 | return ret; | |
566 | } | |
567 | ||
568 | static uint32_t virtio_config_readb(void *opaque, uint32_t addr) | |
569 | { | |
570 | VirtIODevice *vdev = opaque; | |
571 | uint8_t val; | |
572 | ||
573 | vdev->get_config(vdev, vdev->config); | |
574 | ||
575 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
576 | if (addr > (vdev->config_len - sizeof(val))) | |
577 | return (uint32_t)-1; | |
578 | ||
579 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
580 | return val; | |
581 | } | |
582 | ||
583 | static uint32_t virtio_config_readw(void *opaque, uint32_t addr) | |
584 | { | |
585 | VirtIODevice *vdev = opaque; | |
586 | uint16_t val; | |
587 | ||
588 | vdev->get_config(vdev, vdev->config); | |
589 | ||
590 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
591 | if (addr > (vdev->config_len - sizeof(val))) | |
592 | return (uint32_t)-1; | |
593 | ||
594 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
595 | return val; | |
596 | } | |
597 | ||
598 | static uint32_t virtio_config_readl(void *opaque, uint32_t addr) | |
599 | { | |
600 | VirtIODevice *vdev = opaque; | |
601 | uint32_t val; | |
602 | ||
603 | vdev->get_config(vdev, vdev->config); | |
604 | ||
605 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
606 | if (addr > (vdev->config_len - sizeof(val))) | |
607 | return (uint32_t)-1; | |
608 | ||
609 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
610 | return val; | |
611 | } | |
612 | ||
613 | static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data) | |
614 | { | |
615 | VirtIODevice *vdev = opaque; | |
616 | uint8_t val = data; | |
617 | ||
618 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
619 | if (addr > (vdev->config_len - sizeof(val))) | |
620 | return; | |
621 | ||
622 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
623 | ||
624 | if (vdev->set_config) | |
625 | vdev->set_config(vdev, vdev->config); | |
626 | } | |
627 | ||
628 | static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data) | |
629 | { | |
630 | VirtIODevice *vdev = opaque; | |
631 | uint16_t val = data; | |
632 | ||
633 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
634 | if (addr > (vdev->config_len - sizeof(val))) | |
635 | return; | |
636 | ||
637 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
638 | ||
639 | if (vdev->set_config) | |
640 | vdev->set_config(vdev, vdev->config); | |
641 | } | |
642 | ||
643 | static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data) | |
644 | { | |
645 | VirtIODevice *vdev = opaque; | |
646 | uint32_t val = data; | |
647 | ||
648 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
649 | if (addr > (vdev->config_len - sizeof(val))) | |
650 | return; | |
651 | ||
652 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
653 | ||
654 | if (vdev->set_config) | |
655 | vdev->set_config(vdev, vdev->config); | |
656 | } | |
657 | ||
658 | static void virtio_map(PCIDevice *pci_dev, int region_num, | |
659 | uint32_t addr, uint32_t size, int type) | |
660 | { | |
661 | VirtIODevice *vdev = to_virtio_device(pci_dev); | |
662 | int i; | |
663 | ||
664 | vdev->addr = addr; | |
665 | for (i = 0; i < 3; i++) { | |
666 | register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev); | |
667 | register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev); | |
668 | } | |
669 | ||
670 | if (vdev->config_len) { | |
671 | register_ioport_write(addr + 20, vdev->config_len, 1, | |
672 | virtio_config_writeb, vdev); | |
673 | register_ioport_write(addr + 20, vdev->config_len, 2, | |
674 | virtio_config_writew, vdev); | |
675 | register_ioport_write(addr + 20, vdev->config_len, 4, | |
676 | virtio_config_writel, vdev); | |
677 | register_ioport_read(addr + 20, vdev->config_len, 1, | |
678 | virtio_config_readb, vdev); | |
679 | register_ioport_read(addr + 20, vdev->config_len, 2, | |
680 | virtio_config_readw, vdev); | |
681 | register_ioport_read(addr + 20, vdev->config_len, 4, | |
682 | virtio_config_readl, vdev); | |
683 | ||
684 | vdev->get_config(vdev, vdev->config); | |
685 | } | |
686 | } | |
687 | ||
688 | VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, | |
689 | void (*handle_output)(VirtIODevice *, VirtQueue *)) | |
690 | { | |
691 | int i; | |
692 | ||
693 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
694 | if (vdev->vq[i].vring.num == 0) | |
695 | break; | |
696 | } | |
697 | ||
698 | if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) | |
699 | abort(); | |
700 | ||
701 | vdev->vq[i].vring.num = queue_size; | |
702 | vdev->vq[i].handle_output = handle_output; | |
703 | ||
704 | return &vdev->vq[i]; | |
705 | } | |
706 | ||
707 | void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) | |
708 | { | |
709 | /* Always notify when queue is empty */ | |
710 | if ((vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx) && | |
711 | (vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT)) | |
712 | return; | |
713 | ||
714 | vdev->isr |= 0x01; | |
715 | virtio_update_irq(vdev); | |
716 | } | |
717 | ||
718 | void virtio_notify_config(VirtIODevice *vdev) | |
719 | { | |
720 | vdev->isr |= 0x03; | |
721 | virtio_update_irq(vdev); | |
722 | } | |
723 | ||
724 | void virtio_save(VirtIODevice *vdev, QEMUFile *f) | |
725 | { | |
726 | int i; | |
727 | ||
728 | pci_device_save(&vdev->pci_dev, f); | |
729 | ||
730 | qemu_put_be32s(f, &vdev->addr); | |
731 | qemu_put_8s(f, &vdev->status); | |
732 | qemu_put_8s(f, &vdev->isr); | |
733 | qemu_put_be16s(f, &vdev->queue_sel); | |
734 | qemu_put_be32s(f, &vdev->features); | |
735 | qemu_put_be32(f, vdev->config_len); | |
736 | qemu_put_buffer(f, vdev->config, vdev->config_len); | |
737 | ||
738 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
739 | if (vdev->vq[i].vring.num == 0) | |
740 | break; | |
741 | } | |
742 | ||
743 | qemu_put_be32(f, i); | |
744 | ||
745 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
746 | if (vdev->vq[i].vring.num == 0) | |
747 | break; | |
748 | ||
749 | qemu_put_be32(f, vdev->vq[i].vring.num); | |
750 | qemu_put_be32s(f, &vdev->vq[i].pfn); | |
751 | qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); | |
752 | } | |
753 | } | |
754 | ||
755 | void virtio_load(VirtIODevice *vdev, QEMUFile *f) | |
756 | { | |
757 | int num, i; | |
758 | ||
759 | pci_device_load(&vdev->pci_dev, f); | |
760 | ||
761 | qemu_get_be32s(f, &vdev->addr); | |
762 | qemu_get_8s(f, &vdev->status); | |
763 | qemu_get_8s(f, &vdev->isr); | |
764 | qemu_get_be16s(f, &vdev->queue_sel); | |
765 | qemu_get_be32s(f, &vdev->features); | |
766 | vdev->config_len = qemu_get_be32(f); | |
767 | qemu_get_buffer(f, vdev->config, vdev->config_len); | |
768 | ||
769 | num = qemu_get_be32(f); | |
770 | ||
771 | for (i = 0; i < num; i++) { | |
772 | vdev->vq[i].vring.num = qemu_get_be32(f); | |
773 | qemu_get_be32s(f, &vdev->vq[i].pfn); | |
774 | qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); | |
775 | ||
776 | if (vdev->vq[i].pfn) { | |
777 | target_phys_addr_t pa; | |
778 | ||
779 | pa = (ram_addr_t)vdev->vq[i].pfn << TARGET_PAGE_BITS; | |
780 | virtqueue_init(&vdev->vq[i], pa); | |
781 | } | |
782 | } | |
783 | ||
784 | virtio_update_irq(vdev); | |
785 | } | |
786 | ||
787 | VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name, | |
788 | uint16_t vendor, uint16_t device, | |
789 | uint16_t subvendor, uint16_t subdevice, | |
790 | uint8_t class_code, uint8_t subclass_code, | |
791 | uint8_t pif, size_t config_size, | |
792 | size_t struct_size) | |
793 | { | |
794 | VirtIODevice *vdev; | |
795 | PCIDevice *pci_dev; | |
796 | uint8_t *config; | |
797 | uint32_t size; | |
798 | ||
799 | pci_dev = pci_register_device(bus, name, struct_size, | |
800 | -1, NULL, NULL); | |
801 | if (!pci_dev) | |
802 | return NULL; | |
803 | ||
804 | vdev = to_virtio_device(pci_dev); | |
805 | ||
806 | vdev->status = 0; | |
807 | vdev->isr = 0; | |
808 | vdev->queue_sel = 0; | |
809 | vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); | |
810 | ||
811 | config = pci_dev->config; | |
812 | config[0x00] = vendor & 0xFF; | |
813 | config[0x01] = (vendor >> 8) & 0xFF; | |
814 | config[0x02] = device & 0xFF; | |
815 | config[0x03] = (device >> 8) & 0xFF; | |
816 | ||
817 | config[0x08] = VIRTIO_PCI_ABI_VERSION; | |
818 | ||
819 | config[0x09] = pif; | |
820 | config[0x0a] = subclass_code; | |
821 | config[0x0b] = class_code; | |
822 | config[0x0e] = 0x00; | |
823 | ||
824 | config[0x2c] = subvendor & 0xFF; | |
825 | config[0x2d] = (subvendor >> 8) & 0xFF; | |
826 | config[0x2e] = subdevice & 0xFF; | |
827 | config[0x2f] = (subdevice >> 8) & 0xFF; | |
828 | ||
829 | config[0x3d] = 1; | |
830 | ||
831 | vdev->name = name; | |
832 | vdev->config_len = config_size; | |
833 | if (vdev->config_len) | |
834 | vdev->config = qemu_mallocz(config_size); | |
835 | else | |
836 | vdev->config = NULL; | |
837 | ||
838 | size = 20 + config_size; | |
839 | if (size & (size-1)) | |
840 | size = 1 << fls(size); | |
841 | ||
842 | pci_register_io_region(pci_dev, 0, size, PCI_ADDRESS_SPACE_IO, | |
843 | virtio_map); | |
844 | qemu_register_reset(virtio_reset, vdev); | |
845 | ||
846 | return vdev; | |
847 | } |