]>
Commit | Line | Data |
---|---|---|
967f97fa AL |
1 | /* |
2 | * Virtio Support | |
3 | * | |
4 | * Copyright IBM, Corp. 2007 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <aliguori@us.ibm.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include <inttypes.h> | |
967f97fa AL |
15 | |
16 | #include "virtio.h" | |
17 | #include "sysemu.h" | |
18 | ||
19 | //#define VIRTIO_ZERO_COPY | |
20 | ||
21 | /* from Linux's linux/virtio_pci.h */ | |
22 | ||
23 | /* A 32-bit r/o bitmask of the features supported by the host */ | |
24 | #define VIRTIO_PCI_HOST_FEATURES 0 | |
25 | ||
26 | /* A 32-bit r/w bitmask of features activated by the guest */ | |
27 | #define VIRTIO_PCI_GUEST_FEATURES 4 | |
28 | ||
29 | /* A 32-bit r/w PFN for the currently selected queue */ | |
30 | #define VIRTIO_PCI_QUEUE_PFN 8 | |
31 | ||
32 | /* A 16-bit r/o queue size for the currently selected queue */ | |
33 | #define VIRTIO_PCI_QUEUE_NUM 12 | |
34 | ||
35 | /* A 16-bit r/w queue selector */ | |
36 | #define VIRTIO_PCI_QUEUE_SEL 14 | |
37 | ||
38 | /* A 16-bit r/w queue notifier */ | |
39 | #define VIRTIO_PCI_QUEUE_NOTIFY 16 | |
40 | ||
41 | /* An 8-bit device status register. */ | |
42 | #define VIRTIO_PCI_STATUS 18 | |
43 | ||
44 | /* An 8-bit r/o interrupt status register. Reading the value will return the | |
45 | * current contents of the ISR and will also clear it. This is effectively | |
46 | * a read-and-acknowledge. */ | |
47 | #define VIRTIO_PCI_ISR 19 | |
48 | ||
49 | #define VIRTIO_PCI_CONFIG 20 | |
50 | ||
51 | /* Virtio ABI version, if we increment this, we break the guest driver. */ | |
52 | #define VIRTIO_PCI_ABI_VERSION 0 | |
53 | ||
f46f15bc AL |
54 | /* How many bits to shift physical queue address written to QUEUE_PFN. |
55 | * 12 is historical, and due to x86 page size. */ | |
56 | #define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 | |
57 | ||
58 | /* The alignment to use between consumer and producer parts of vring. | |
59 | * x86 pagesize again. */ | |
60 | #define VIRTIO_PCI_VRING_ALIGN 4096 | |
61 | ||
967f97fa AL |
62 | /* QEMU doesn't strictly need write barriers since everything runs in |
63 | * lock-step. We'll leave the calls to wmb() in though to make it obvious for | |
64 | * KVM or if kqemu gets SMP support. | |
65 | */ | |
66 | #define wmb() do { } while (0) | |
67 | ||
68 | typedef struct VRingDesc | |
69 | { | |
70 | uint64_t addr; | |
71 | uint32_t len; | |
72 | uint16_t flags; | |
73 | uint16_t next; | |
74 | } VRingDesc; | |
75 | ||
76 | typedef struct VRingAvail | |
77 | { | |
78 | uint16_t flags; | |
79 | uint16_t idx; | |
80 | uint16_t ring[0]; | |
81 | } VRingAvail; | |
82 | ||
83 | typedef struct VRingUsedElem | |
84 | { | |
85 | uint32_t id; | |
86 | uint32_t len; | |
87 | } VRingUsedElem; | |
88 | ||
89 | typedef struct VRingUsed | |
90 | { | |
91 | uint16_t flags; | |
92 | uint16_t idx; | |
93 | VRingUsedElem ring[0]; | |
94 | } VRingUsed; | |
95 | ||
96 | typedef struct VRing | |
97 | { | |
98 | unsigned int num; | |
99 | target_phys_addr_t desc; | |
100 | target_phys_addr_t avail; | |
101 | target_phys_addr_t used; | |
102 | } VRing; | |
103 | ||
104 | struct VirtQueue | |
105 | { | |
106 | VRing vring; | |
107 | uint32_t pfn; | |
108 | uint16_t last_avail_idx; | |
109 | int inuse; | |
110 | void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); | |
111 | }; | |
112 | ||
113 | #define VIRTIO_PCI_QUEUE_MAX 16 | |
114 | ||
115 | /* virt queue functions */ | |
116 | #ifdef VIRTIO_ZERO_COPY | |
117 | static void *virtio_map_gpa(target_phys_addr_t addr, size_t size) | |
118 | { | |
119 | ram_addr_t off; | |
120 | target_phys_addr_t addr1; | |
121 | ||
122 | off = cpu_get_physical_page_desc(addr); | |
123 | if ((off & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { | |
124 | fprintf(stderr, "virtio DMA to IO ram\n"); | |
125 | exit(1); | |
126 | } | |
127 | ||
128 | off = (off & TARGET_PAGE_MASK) | (addr & ~TARGET_PAGE_MASK); | |
129 | ||
130 | for (addr1 = addr + TARGET_PAGE_SIZE; | |
131 | addr1 < TARGET_PAGE_ALIGN(addr + size); | |
132 | addr1 += TARGET_PAGE_SIZE) { | |
133 | ram_addr_t off1; | |
134 | ||
135 | off1 = cpu_get_physical_page_desc(addr1); | |
136 | if ((off1 & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { | |
137 | fprintf(stderr, "virtio DMA to IO ram\n"); | |
138 | exit(1); | |
139 | } | |
140 | ||
141 | off1 = (off1 & TARGET_PAGE_MASK) | (addr1 & ~TARGET_PAGE_MASK); | |
142 | ||
143 | if (off1 != (off + (addr1 - addr))) { | |
144 | fprintf(stderr, "discontigous virtio memory\n"); | |
145 | exit(1); | |
146 | } | |
147 | } | |
148 | ||
149 | return phys_ram_base + off; | |
150 | } | |
151 | #endif | |
152 | ||
153 | static void virtqueue_init(VirtQueue *vq, target_phys_addr_t pa) | |
154 | { | |
155 | vq->vring.desc = pa; | |
156 | vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc); | |
f46f15bc AL |
157 | vq->vring.used = vring_align(vq->vring.avail + |
158 | offsetof(VRingAvail, ring[vq->vring.num]), | |
159 | VIRTIO_PCI_VRING_ALIGN); | |
967f97fa AL |
160 | } |
161 | ||
162 | static inline uint64_t vring_desc_addr(VirtQueue *vq, int i) | |
163 | { | |
164 | target_phys_addr_t pa; | |
165 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr); | |
166 | return ldq_phys(pa); | |
167 | } | |
168 | ||
169 | static inline uint32_t vring_desc_len(VirtQueue *vq, int i) | |
170 | { | |
171 | target_phys_addr_t pa; | |
172 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, len); | |
173 | return ldl_phys(pa); | |
174 | } | |
175 | ||
176 | static inline uint16_t vring_desc_flags(VirtQueue *vq, int i) | |
177 | { | |
178 | target_phys_addr_t pa; | |
179 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags); | |
180 | return lduw_phys(pa); | |
181 | } | |
182 | ||
183 | static inline uint16_t vring_desc_next(VirtQueue *vq, int i) | |
184 | { | |
185 | target_phys_addr_t pa; | |
186 | pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, next); | |
187 | return lduw_phys(pa); | |
188 | } | |
189 | ||
190 | static inline uint16_t vring_avail_flags(VirtQueue *vq) | |
191 | { | |
192 | target_phys_addr_t pa; | |
193 | pa = vq->vring.avail + offsetof(VRingAvail, flags); | |
194 | return lduw_phys(pa); | |
195 | } | |
196 | ||
197 | static inline uint16_t vring_avail_idx(VirtQueue *vq) | |
198 | { | |
199 | target_phys_addr_t pa; | |
200 | pa = vq->vring.avail + offsetof(VRingAvail, idx); | |
201 | return lduw_phys(pa); | |
202 | } | |
203 | ||
204 | static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) | |
205 | { | |
206 | target_phys_addr_t pa; | |
207 | pa = vq->vring.avail + offsetof(VRingAvail, ring[i]); | |
208 | return lduw_phys(pa); | |
209 | } | |
210 | ||
211 | static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val) | |
212 | { | |
213 | target_phys_addr_t pa; | |
214 | pa = vq->vring.used + offsetof(VRingUsed, ring[i].id); | |
215 | stl_phys(pa, val); | |
216 | } | |
217 | ||
218 | static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val) | |
219 | { | |
220 | target_phys_addr_t pa; | |
221 | pa = vq->vring.used + offsetof(VRingUsed, ring[i].len); | |
222 | stl_phys(pa, val); | |
223 | } | |
224 | ||
225 | static uint16_t vring_used_idx(VirtQueue *vq) | |
226 | { | |
227 | target_phys_addr_t pa; | |
228 | pa = vq->vring.used + offsetof(VRingUsed, idx); | |
229 | return lduw_phys(pa); | |
230 | } | |
231 | ||
232 | static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val) | |
233 | { | |
234 | target_phys_addr_t pa; | |
235 | pa = vq->vring.used + offsetof(VRingUsed, idx); | |
236 | stw_phys(pa, vring_used_idx(vq) + val); | |
237 | } | |
238 | ||
239 | static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) | |
240 | { | |
241 | target_phys_addr_t pa; | |
242 | pa = vq->vring.used + offsetof(VRingUsed, flags); | |
243 | stw_phys(pa, lduw_phys(pa) | mask); | |
244 | } | |
245 | ||
246 | static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) | |
247 | { | |
248 | target_phys_addr_t pa; | |
249 | pa = vq->vring.used + offsetof(VRingUsed, flags); | |
250 | stw_phys(pa, lduw_phys(pa) & ~mask); | |
251 | } | |
252 | ||
253 | void virtio_queue_set_notification(VirtQueue *vq, int enable) | |
254 | { | |
255 | if (enable) | |
256 | vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); | |
257 | else | |
258 | vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); | |
259 | } | |
260 | ||
261 | int virtio_queue_ready(VirtQueue *vq) | |
262 | { | |
263 | return vq->vring.avail != 0; | |
264 | } | |
265 | ||
266 | int virtio_queue_empty(VirtQueue *vq) | |
267 | { | |
268 | return vring_avail_idx(vq) == vq->last_avail_idx; | |
269 | } | |
270 | ||
271 | void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, | |
272 | unsigned int len, unsigned int idx) | |
273 | { | |
274 | unsigned int offset; | |
275 | int i; | |
276 | ||
277 | #ifndef VIRTIO_ZERO_COPY | |
278 | for (i = 0; i < elem->out_num; i++) | |
279 | qemu_free(elem->out_sg[i].iov_base); | |
280 | #endif | |
281 | ||
282 | offset = 0; | |
283 | for (i = 0; i < elem->in_num; i++) { | |
284 | size_t size = MIN(len - offset, elem->in_sg[i].iov_len); | |
285 | ||
286 | #ifdef VIRTIO_ZERO_COPY | |
287 | if (size) { | |
288 | ram_addr_t addr = (uint8_t *)elem->in_sg[i].iov_base - phys_ram_base; | |
289 | ram_addr_t off; | |
290 | ||
291 | for (off = 0; off < size; off += TARGET_PAGE_SIZE) | |
292 | cpu_physical_memory_set_dirty(addr + off); | |
293 | } | |
294 | #else | |
295 | if (size) | |
296 | cpu_physical_memory_write(elem->in_addr[i], | |
297 | elem->in_sg[i].iov_base, | |
298 | size); | |
299 | ||
300 | qemu_free(elem->in_sg[i].iov_base); | |
301 | #endif | |
302 | ||
303 | offset += size; | |
304 | } | |
305 | ||
306 | idx = (idx + vring_used_idx(vq)) % vq->vring.num; | |
307 | ||
308 | /* Get a pointer to the next entry in the used ring. */ | |
309 | vring_used_ring_id(vq, idx, elem->index); | |
310 | vring_used_ring_len(vq, idx, len); | |
311 | } | |
312 | ||
313 | void virtqueue_flush(VirtQueue *vq, unsigned int count) | |
314 | { | |
315 | /* Make sure buffer is written before we update index. */ | |
316 | wmb(); | |
317 | vring_used_idx_increment(vq, count); | |
318 | vq->inuse -= count; | |
319 | } | |
320 | ||
321 | void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, | |
322 | unsigned int len) | |
323 | { | |
324 | virtqueue_fill(vq, elem, len, 0); | |
325 | virtqueue_flush(vq, 1); | |
326 | } | |
327 | ||
328 | static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) | |
329 | { | |
330 | uint16_t num_heads = vring_avail_idx(vq) - idx; | |
331 | ||
332 | /* Check it isn't doing very strange things with descriptor numbers. */ | |
bb6834cf AL |
333 | if (num_heads > vq->vring.num) { |
334 | fprintf(stderr, "Guest moved used index from %u to %u", | |
335 | idx, vring_avail_idx(vq)); | |
336 | exit(1); | |
337 | } | |
967f97fa AL |
338 | |
339 | return num_heads; | |
340 | } | |
341 | ||
342 | static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx) | |
343 | { | |
344 | unsigned int head; | |
345 | ||
346 | /* Grab the next descriptor number they're advertising, and increment | |
347 | * the index we've seen. */ | |
348 | head = vring_avail_ring(vq, idx % vq->vring.num); | |
349 | ||
350 | /* If their number is silly, that's a fatal mistake. */ | |
bb6834cf AL |
351 | if (head >= vq->vring.num) { |
352 | fprintf(stderr, "Guest says index %u is available", head); | |
353 | exit(1); | |
354 | } | |
967f97fa AL |
355 | |
356 | return head; | |
357 | } | |
358 | ||
359 | static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) | |
360 | { | |
361 | unsigned int next; | |
362 | ||
363 | /* If this descriptor says it doesn't chain, we're done. */ | |
364 | if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT)) | |
365 | return vq->vring.num; | |
366 | ||
367 | /* Check they're not leading us off end of descriptors. */ | |
368 | next = vring_desc_next(vq, i); | |
369 | /* Make sure compiler knows to grab that: we don't want it changing! */ | |
370 | wmb(); | |
371 | ||
bb6834cf AL |
372 | if (next >= vq->vring.num) { |
373 | fprintf(stderr, "Desc next is %u", next); | |
374 | exit(1); | |
375 | } | |
967f97fa AL |
376 | |
377 | return next; | |
378 | } | |
379 | ||
380 | int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes) | |
381 | { | |
382 | unsigned int idx; | |
383 | int num_bufs, in_total, out_total; | |
384 | ||
385 | idx = vq->last_avail_idx; | |
386 | ||
387 | num_bufs = in_total = out_total = 0; | |
388 | while (virtqueue_num_heads(vq, idx)) { | |
389 | int i; | |
390 | ||
391 | i = virtqueue_get_head(vq, idx++); | |
392 | do { | |
393 | /* If we've got too many, that implies a descriptor loop. */ | |
bb6834cf AL |
394 | if (++num_bufs > vq->vring.num) { |
395 | fprintf(stderr, "Looped descriptor"); | |
396 | exit(1); | |
397 | } | |
967f97fa AL |
398 | |
399 | if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { | |
400 | if (in_bytes > 0 && | |
401 | (in_total += vring_desc_len(vq, i)) >= in_bytes) | |
402 | return 1; | |
403 | } else { | |
404 | if (out_bytes > 0 && | |
405 | (out_total += vring_desc_len(vq, i)) >= out_bytes) | |
406 | return 1; | |
407 | } | |
408 | } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); | |
409 | } | |
410 | ||
411 | return 0; | |
412 | } | |
413 | ||
414 | int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem) | |
415 | { | |
416 | unsigned int i, head; | |
417 | ||
418 | if (!virtqueue_num_heads(vq, vq->last_avail_idx)) | |
419 | return 0; | |
420 | ||
421 | /* When we start there are none of either input nor output. */ | |
422 | elem->out_num = elem->in_num = 0; | |
423 | ||
424 | i = head = virtqueue_get_head(vq, vq->last_avail_idx++); | |
425 | do { | |
426 | struct iovec *sg; | |
427 | ||
428 | if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) { | |
429 | elem->in_addr[elem->in_num] = vring_desc_addr(vq, i); | |
430 | sg = &elem->in_sg[elem->in_num++]; | |
431 | } else | |
432 | sg = &elem->out_sg[elem->out_num++]; | |
433 | ||
434 | /* Grab the first descriptor, and check it's OK. */ | |
435 | sg->iov_len = vring_desc_len(vq, i); | |
436 | ||
437 | #ifdef VIRTIO_ZERO_COPY | |
438 | sg->iov_base = virtio_map_gpa(vring_desc_addr(vq, i), sg->iov_len); | |
439 | #else | |
440 | /* cap individual scatter element size to prevent unbounded allocations | |
441 | of memory from the guest. Practically speaking, no virtio driver | |
442 | will ever pass more than a page in each element. We set the cap to | |
443 | be 2MB in case for some reason a large page makes it way into the | |
444 | sg list. When we implement a zero copy API, this limitation will | |
445 | disappear */ | |
446 | if (sg->iov_len > (2 << 20)) | |
447 | sg->iov_len = 2 << 20; | |
448 | ||
449 | sg->iov_base = qemu_malloc(sg->iov_len); | |
487414f1 | 450 | if (!(vring_desc_flags(vq, i) & VRING_DESC_F_WRITE)) { |
967f97fa AL |
451 | cpu_physical_memory_read(vring_desc_addr(vq, i), |
452 | sg->iov_base, | |
453 | sg->iov_len); | |
454 | } | |
455 | #endif | |
bb6834cf AL |
456 | if (sg->iov_base == NULL) { |
457 | fprintf(stderr, "Invalid mapping\n"); | |
458 | exit(1); | |
459 | } | |
967f97fa AL |
460 | |
461 | /* If we've got too many, that implies a descriptor loop. */ | |
bb6834cf AL |
462 | if ((elem->in_num + elem->out_num) > vq->vring.num) { |
463 | fprintf(stderr, "Looped descriptor"); | |
464 | exit(1); | |
465 | } | |
967f97fa AL |
466 | } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); |
467 | ||
468 | elem->index = head; | |
469 | ||
470 | vq->inuse++; | |
471 | ||
472 | return elem->in_num + elem->out_num; | |
473 | } | |
474 | ||
475 | /* virtio device */ | |
476 | ||
477 | static VirtIODevice *to_virtio_device(PCIDevice *pci_dev) | |
478 | { | |
479 | return (VirtIODevice *)pci_dev; | |
480 | } | |
481 | ||
482 | static void virtio_update_irq(VirtIODevice *vdev) | |
483 | { | |
484 | qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1); | |
485 | } | |
486 | ||
69d6451c | 487 | static void virtio_reset(void *opaque) |
967f97fa AL |
488 | { |
489 | VirtIODevice *vdev = opaque; | |
490 | int i; | |
491 | ||
492 | if (vdev->reset) | |
493 | vdev->reset(vdev); | |
494 | ||
495 | vdev->features = 0; | |
496 | vdev->queue_sel = 0; | |
497 | vdev->status = 0; | |
498 | vdev->isr = 0; | |
499 | virtio_update_irq(vdev); | |
500 | ||
501 | for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
502 | vdev->vq[i].vring.desc = 0; | |
503 | vdev->vq[i].vring.avail = 0; | |
504 | vdev->vq[i].vring.used = 0; | |
505 | vdev->vq[i].last_avail_idx = 0; | |
506 | vdev->vq[i].pfn = 0; | |
507 | } | |
508 | } | |
509 | ||
510 | static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) | |
511 | { | |
512 | VirtIODevice *vdev = to_virtio_device(opaque); | |
513 | ram_addr_t pa; | |
514 | ||
515 | addr -= vdev->addr; | |
516 | ||
517 | switch (addr) { | |
518 | case VIRTIO_PCI_GUEST_FEATURES: | |
519 | if (vdev->set_features) | |
520 | vdev->set_features(vdev, val); | |
521 | vdev->features = val; | |
522 | break; | |
523 | case VIRTIO_PCI_QUEUE_PFN: | |
f46f15bc | 524 | pa = (ram_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT; |
967f97fa AL |
525 | vdev->vq[vdev->queue_sel].pfn = val; |
526 | if (pa == 0) { | |
527 | virtio_reset(vdev); | |
528 | } else { | |
529 | virtqueue_init(&vdev->vq[vdev->queue_sel], pa); | |
530 | } | |
531 | break; | |
532 | case VIRTIO_PCI_QUEUE_SEL: | |
533 | if (val < VIRTIO_PCI_QUEUE_MAX) | |
534 | vdev->queue_sel = val; | |
535 | break; | |
536 | case VIRTIO_PCI_QUEUE_NOTIFY: | |
537 | if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc) | |
538 | vdev->vq[val].handle_output(vdev, &vdev->vq[val]); | |
539 | break; | |
540 | case VIRTIO_PCI_STATUS: | |
541 | vdev->status = val & 0xFF; | |
542 | if (vdev->status == 0) | |
543 | virtio_reset(vdev); | |
544 | break; | |
545 | } | |
546 | } | |
547 | ||
548 | static uint32_t virtio_ioport_read(void *opaque, uint32_t addr) | |
549 | { | |
550 | VirtIODevice *vdev = to_virtio_device(opaque); | |
551 | uint32_t ret = 0xFFFFFFFF; | |
552 | ||
553 | addr -= vdev->addr; | |
554 | ||
555 | switch (addr) { | |
556 | case VIRTIO_PCI_HOST_FEATURES: | |
557 | ret = vdev->get_features(vdev); | |
558 | ret |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY); | |
559 | break; | |
560 | case VIRTIO_PCI_GUEST_FEATURES: | |
561 | ret = vdev->features; | |
562 | break; | |
563 | case VIRTIO_PCI_QUEUE_PFN: | |
564 | ret = vdev->vq[vdev->queue_sel].pfn; | |
565 | break; | |
566 | case VIRTIO_PCI_QUEUE_NUM: | |
567 | ret = vdev->vq[vdev->queue_sel].vring.num; | |
568 | break; | |
569 | case VIRTIO_PCI_QUEUE_SEL: | |
570 | ret = vdev->queue_sel; | |
571 | break; | |
572 | case VIRTIO_PCI_STATUS: | |
573 | ret = vdev->status; | |
574 | break; | |
575 | case VIRTIO_PCI_ISR: | |
576 | /* reading from the ISR also clears it. */ | |
577 | ret = vdev->isr; | |
578 | vdev->isr = 0; | |
579 | virtio_update_irq(vdev); | |
580 | break; | |
581 | default: | |
582 | break; | |
583 | } | |
584 | ||
585 | return ret; | |
586 | } | |
587 | ||
588 | static uint32_t virtio_config_readb(void *opaque, uint32_t addr) | |
589 | { | |
590 | VirtIODevice *vdev = opaque; | |
591 | uint8_t val; | |
592 | ||
593 | vdev->get_config(vdev, vdev->config); | |
594 | ||
595 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
596 | if (addr > (vdev->config_len - sizeof(val))) | |
597 | return (uint32_t)-1; | |
598 | ||
599 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
600 | return val; | |
601 | } | |
602 | ||
603 | static uint32_t virtio_config_readw(void *opaque, uint32_t addr) | |
604 | { | |
605 | VirtIODevice *vdev = opaque; | |
606 | uint16_t val; | |
607 | ||
608 | vdev->get_config(vdev, vdev->config); | |
609 | ||
610 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
611 | if (addr > (vdev->config_len - sizeof(val))) | |
612 | return (uint32_t)-1; | |
613 | ||
614 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
615 | return val; | |
616 | } | |
617 | ||
618 | static uint32_t virtio_config_readl(void *opaque, uint32_t addr) | |
619 | { | |
620 | VirtIODevice *vdev = opaque; | |
621 | uint32_t val; | |
622 | ||
623 | vdev->get_config(vdev, vdev->config); | |
624 | ||
625 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
626 | if (addr > (vdev->config_len - sizeof(val))) | |
627 | return (uint32_t)-1; | |
628 | ||
629 | memcpy(&val, vdev->config + addr, sizeof(val)); | |
630 | return val; | |
631 | } | |
632 | ||
633 | static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data) | |
634 | { | |
635 | VirtIODevice *vdev = opaque; | |
636 | uint8_t val = data; | |
637 | ||
638 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
639 | if (addr > (vdev->config_len - sizeof(val))) | |
640 | return; | |
641 | ||
642 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
643 | ||
644 | if (vdev->set_config) | |
645 | vdev->set_config(vdev, vdev->config); | |
646 | } | |
647 | ||
648 | static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data) | |
649 | { | |
650 | VirtIODevice *vdev = opaque; | |
651 | uint16_t val = data; | |
652 | ||
653 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
654 | if (addr > (vdev->config_len - sizeof(val))) | |
655 | return; | |
656 | ||
657 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
658 | ||
659 | if (vdev->set_config) | |
660 | vdev->set_config(vdev, vdev->config); | |
661 | } | |
662 | ||
663 | static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data) | |
664 | { | |
665 | VirtIODevice *vdev = opaque; | |
666 | uint32_t val = data; | |
667 | ||
668 | addr -= vdev->addr + VIRTIO_PCI_CONFIG; | |
669 | if (addr > (vdev->config_len - sizeof(val))) | |
670 | return; | |
671 | ||
672 | memcpy(vdev->config + addr, &val, sizeof(val)); | |
673 | ||
674 | if (vdev->set_config) | |
675 | vdev->set_config(vdev, vdev->config); | |
676 | } | |
677 | ||
678 | static void virtio_map(PCIDevice *pci_dev, int region_num, | |
679 | uint32_t addr, uint32_t size, int type) | |
680 | { | |
681 | VirtIODevice *vdev = to_virtio_device(pci_dev); | |
682 | int i; | |
683 | ||
684 | vdev->addr = addr; | |
685 | for (i = 0; i < 3; i++) { | |
686 | register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev); | |
687 | register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev); | |
688 | } | |
689 | ||
690 | if (vdev->config_len) { | |
691 | register_ioport_write(addr + 20, vdev->config_len, 1, | |
692 | virtio_config_writeb, vdev); | |
693 | register_ioport_write(addr + 20, vdev->config_len, 2, | |
694 | virtio_config_writew, vdev); | |
695 | register_ioport_write(addr + 20, vdev->config_len, 4, | |
696 | virtio_config_writel, vdev); | |
697 | register_ioport_read(addr + 20, vdev->config_len, 1, | |
698 | virtio_config_readb, vdev); | |
699 | register_ioport_read(addr + 20, vdev->config_len, 2, | |
700 | virtio_config_readw, vdev); | |
701 | register_ioport_read(addr + 20, vdev->config_len, 4, | |
702 | virtio_config_readl, vdev); | |
703 | ||
704 | vdev->get_config(vdev, vdev->config); | |
705 | } | |
706 | } | |
707 | ||
708 | VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, | |
709 | void (*handle_output)(VirtIODevice *, VirtQueue *)) | |
710 | { | |
711 | int i; | |
712 | ||
713 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
714 | if (vdev->vq[i].vring.num == 0) | |
715 | break; | |
716 | } | |
717 | ||
718 | if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) | |
719 | abort(); | |
720 | ||
721 | vdev->vq[i].vring.num = queue_size; | |
722 | vdev->vq[i].handle_output = handle_output; | |
723 | ||
724 | return &vdev->vq[i]; | |
725 | } | |
726 | ||
727 | void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) | |
728 | { | |
729 | /* Always notify when queue is empty */ | |
730 | if ((vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx) && | |
731 | (vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT)) | |
732 | return; | |
733 | ||
734 | vdev->isr |= 0x01; | |
735 | virtio_update_irq(vdev); | |
736 | } | |
737 | ||
738 | void virtio_notify_config(VirtIODevice *vdev) | |
739 | { | |
7625162c AL |
740 | if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) |
741 | return; | |
742 | ||
967f97fa AL |
743 | vdev->isr |= 0x03; |
744 | virtio_update_irq(vdev); | |
745 | } | |
746 | ||
747 | void virtio_save(VirtIODevice *vdev, QEMUFile *f) | |
748 | { | |
749 | int i; | |
750 | ||
751 | pci_device_save(&vdev->pci_dev, f); | |
752 | ||
753 | qemu_put_be32s(f, &vdev->addr); | |
754 | qemu_put_8s(f, &vdev->status); | |
755 | qemu_put_8s(f, &vdev->isr); | |
756 | qemu_put_be16s(f, &vdev->queue_sel); | |
757 | qemu_put_be32s(f, &vdev->features); | |
758 | qemu_put_be32(f, vdev->config_len); | |
759 | qemu_put_buffer(f, vdev->config, vdev->config_len); | |
760 | ||
761 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
762 | if (vdev->vq[i].vring.num == 0) | |
763 | break; | |
764 | } | |
765 | ||
766 | qemu_put_be32(f, i); | |
767 | ||
768 | for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { | |
769 | if (vdev->vq[i].vring.num == 0) | |
770 | break; | |
771 | ||
772 | qemu_put_be32(f, vdev->vq[i].vring.num); | |
773 | qemu_put_be32s(f, &vdev->vq[i].pfn); | |
774 | qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); | |
775 | } | |
776 | } | |
777 | ||
778 | void virtio_load(VirtIODevice *vdev, QEMUFile *f) | |
779 | { | |
780 | int num, i; | |
781 | ||
782 | pci_device_load(&vdev->pci_dev, f); | |
783 | ||
784 | qemu_get_be32s(f, &vdev->addr); | |
785 | qemu_get_8s(f, &vdev->status); | |
786 | qemu_get_8s(f, &vdev->isr); | |
787 | qemu_get_be16s(f, &vdev->queue_sel); | |
788 | qemu_get_be32s(f, &vdev->features); | |
789 | vdev->config_len = qemu_get_be32(f); | |
790 | qemu_get_buffer(f, vdev->config, vdev->config_len); | |
791 | ||
792 | num = qemu_get_be32(f); | |
793 | ||
794 | for (i = 0; i < num; i++) { | |
795 | vdev->vq[i].vring.num = qemu_get_be32(f); | |
796 | qemu_get_be32s(f, &vdev->vq[i].pfn); | |
797 | qemu_get_be16s(f, &vdev->vq[i].last_avail_idx); | |
798 | ||
799 | if (vdev->vq[i].pfn) { | |
800 | target_phys_addr_t pa; | |
801 | ||
f46f15bc | 802 | pa = (ram_addr_t)vdev->vq[i].pfn << VIRTIO_PCI_QUEUE_ADDR_SHIFT; |
967f97fa AL |
803 | virtqueue_init(&vdev->vq[i], pa); |
804 | } | |
805 | } | |
806 | ||
807 | virtio_update_irq(vdev); | |
808 | } | |
809 | ||
810 | VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name, | |
811 | uint16_t vendor, uint16_t device, | |
812 | uint16_t subvendor, uint16_t subdevice, | |
173a543b BS |
813 | uint16_t class_code, uint8_t pif, |
814 | size_t config_size, size_t struct_size) | |
967f97fa AL |
815 | { |
816 | VirtIODevice *vdev; | |
817 | PCIDevice *pci_dev; | |
818 | uint8_t *config; | |
819 | uint32_t size; | |
820 | ||
821 | pci_dev = pci_register_device(bus, name, struct_size, | |
822 | -1, NULL, NULL); | |
823 | if (!pci_dev) | |
824 | return NULL; | |
825 | ||
826 | vdev = to_virtio_device(pci_dev); | |
827 | ||
828 | vdev->status = 0; | |
829 | vdev->isr = 0; | |
830 | vdev->queue_sel = 0; | |
831 | vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); | |
832 | ||
833 | config = pci_dev->config; | |
deb54399 AL |
834 | pci_config_set_vendor_id(config, vendor); |
835 | pci_config_set_device_id(config, device); | |
967f97fa AL |
836 | |
837 | config[0x08] = VIRTIO_PCI_ABI_VERSION; | |
838 | ||
839 | config[0x09] = pif; | |
173a543b | 840 | pci_config_set_class(config, class_code); |
967f97fa AL |
841 | config[0x0e] = 0x00; |
842 | ||
843 | config[0x2c] = subvendor & 0xFF; | |
844 | config[0x2d] = (subvendor >> 8) & 0xFF; | |
845 | config[0x2e] = subdevice & 0xFF; | |
846 | config[0x2f] = (subdevice >> 8) & 0xFF; | |
847 | ||
848 | config[0x3d] = 1; | |
849 | ||
850 | vdev->name = name; | |
851 | vdev->config_len = config_size; | |
852 | if (vdev->config_len) | |
853 | vdev->config = qemu_mallocz(config_size); | |
854 | else | |
855 | vdev->config = NULL; | |
856 | ||
857 | size = 20 + config_size; | |
858 | if (size & (size-1)) | |
ad46db9a | 859 | size = 1 << qemu_fls(size); |
967f97fa AL |
860 | |
861 | pci_register_io_region(pci_dev, 0, size, PCI_ADDRESS_SPACE_IO, | |
862 | virtio_map); | |
863 | qemu_register_reset(virtio_reset, vdev); | |
864 | ||
865 | return vdev; | |
866 | } |