4 * Copyright (c) 2020 Red Hat, Inc.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
23 #include "hw/qdev-properties.h"
24 #include "hw/virtio/virtio.h"
25 #include "sysemu/kvm.h"
26 #include "sysemu/reset.h"
27 #include "qapi/error.h"
28 #include "qemu/error-report.h"
31 #include "standard-headers/linux/virtio_ids.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "hw/virtio/virtio-access.h"
35 #include "hw/virtio/virtio-iommu.h"
36 #include "hw/pci/pci_bus.h"
37 #include "hw/pci/pci.h"
40 #define VIOMMU_DEFAULT_QUEUE_SIZE 256
41 #define VIOMMU_PROBE_SIZE 512
43 typedef struct VirtIOIOMMUDomain
{
47 QLIST_HEAD(, VirtIOIOMMUEndpoint
) endpoint_list
;
50 typedef struct VirtIOIOMMUEndpoint
{
52 VirtIOIOMMUDomain
*domain
;
53 IOMMUMemoryRegion
*iommu_mr
;
54 QLIST_ENTRY(VirtIOIOMMUEndpoint
) next
;
55 } VirtIOIOMMUEndpoint
;
57 typedef struct VirtIOIOMMUInterval
{
60 } VirtIOIOMMUInterval
;
62 typedef struct VirtIOIOMMUMapping
{
67 static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice
*dev
)
69 return PCI_BUILD_BDF(pci_bus_num(dev
->bus
), dev
->devfn
);
73 * The bus number is used for lookup when SID based operations occur.
74 * In that case we lazily populate the IOMMUPciBus array from the bus hash
75 * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus
76 * numbers may not be always initialized yet.
78 static IOMMUPciBus
*iommu_find_iommu_pcibus(VirtIOIOMMU
*s
, uint8_t bus_num
)
80 IOMMUPciBus
*iommu_pci_bus
= s
->iommu_pcibus_by_bus_num
[bus_num
];
85 g_hash_table_iter_init(&iter
, s
->as_by_busptr
);
86 while (g_hash_table_iter_next(&iter
, NULL
, (void **)&iommu_pci_bus
)) {
87 if (pci_bus_num(iommu_pci_bus
->bus
) == bus_num
) {
88 s
->iommu_pcibus_by_bus_num
[bus_num
] = iommu_pci_bus
;
97 static IOMMUMemoryRegion
*virtio_iommu_mr(VirtIOIOMMU
*s
, uint32_t sid
)
100 IOMMUPciBus
*iommu_pci_bus
;
103 bus_n
= PCI_BUS_NUM(sid
);
104 iommu_pci_bus
= iommu_find_iommu_pcibus(s
, bus_n
);
106 devfn
= sid
& (PCI_DEVFN_MAX
- 1);
107 dev
= iommu_pci_bus
->pbdev
[devfn
];
109 return &dev
->iommu_mr
;
115 static gint
interval_cmp(gconstpointer a
, gconstpointer b
, gpointer user_data
)
117 VirtIOIOMMUInterval
*inta
= (VirtIOIOMMUInterval
*)a
;
118 VirtIOIOMMUInterval
*intb
= (VirtIOIOMMUInterval
*)b
;
120 if (inta
->high
< intb
->low
) {
122 } else if (intb
->high
< inta
->low
) {
129 static void virtio_iommu_notify_map(IOMMUMemoryRegion
*mr
, hwaddr virt_start
,
130 hwaddr virt_end
, hwaddr paddr
,
134 IOMMUAccessFlags perm
= IOMMU_ACCESS_FLAG(flags
& VIRTIO_IOMMU_MAP_F_READ
,
135 flags
& VIRTIO_IOMMU_MAP_F_WRITE
);
137 if (!(mr
->iommu_notify_flags
& IOMMU_NOTIFIER_MAP
) ||
138 (flags
& VIRTIO_IOMMU_MAP_F_MMIO
) || !perm
) {
142 trace_virtio_iommu_notify_map(mr
->parent_obj
.name
, virt_start
, virt_end
,
145 event
.type
= IOMMU_NOTIFIER_MAP
;
146 event
.entry
.target_as
= &address_space_memory
;
147 event
.entry
.addr_mask
= virt_end
- virt_start
;
148 event
.entry
.iova
= virt_start
;
149 event
.entry
.perm
= perm
;
150 event
.entry
.translated_addr
= paddr
;
152 memory_region_notify_iommu(mr
, 0, event
);
155 static void virtio_iommu_notify_unmap(IOMMUMemoryRegion
*mr
, hwaddr virt_start
,
159 uint64_t delta
= virt_end
- virt_start
;
161 if (!(mr
->iommu_notify_flags
& IOMMU_NOTIFIER_UNMAP
)) {
165 trace_virtio_iommu_notify_unmap(mr
->parent_obj
.name
, virt_start
, virt_end
);
167 event
.type
= IOMMU_NOTIFIER_UNMAP
;
168 event
.entry
.target_as
= &address_space_memory
;
169 event
.entry
.perm
= IOMMU_NONE
;
170 event
.entry
.translated_addr
= 0;
171 event
.entry
.addr_mask
= delta
;
172 event
.entry
.iova
= virt_start
;
174 if (delta
== UINT64_MAX
) {
175 memory_region_notify_iommu(mr
, 0, event
);
179 while (virt_start
!= virt_end
+ 1) {
180 uint64_t mask
= dma_aligned_pow2_mask(virt_start
, virt_end
, 64);
182 event
.entry
.addr_mask
= mask
;
183 event
.entry
.iova
= virt_start
;
184 memory_region_notify_iommu(mr
, 0, event
);
185 virt_start
+= mask
+ 1;
189 static gboolean
virtio_iommu_notify_unmap_cb(gpointer key
, gpointer value
,
192 VirtIOIOMMUInterval
*interval
= (VirtIOIOMMUInterval
*) key
;
193 IOMMUMemoryRegion
*mr
= (IOMMUMemoryRegion
*) data
;
195 virtio_iommu_notify_unmap(mr
, interval
->low
, interval
->high
);
200 static gboolean
virtio_iommu_notify_map_cb(gpointer key
, gpointer value
,
203 VirtIOIOMMUMapping
*mapping
= (VirtIOIOMMUMapping
*) value
;
204 VirtIOIOMMUInterval
*interval
= (VirtIOIOMMUInterval
*) key
;
205 IOMMUMemoryRegion
*mr
= (IOMMUMemoryRegion
*) data
;
207 virtio_iommu_notify_map(mr
, interval
->low
, interval
->high
,
208 mapping
->phys_addr
, mapping
->flags
);
213 static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint
*ep
)
215 VirtIOIOMMUDomain
*domain
= ep
->domain
;
220 g_tree_foreach(domain
->mappings
, virtio_iommu_notify_unmap_cb
,
222 QLIST_REMOVE(ep
, next
);
226 static VirtIOIOMMUEndpoint
*virtio_iommu_get_endpoint(VirtIOIOMMU
*s
,
229 VirtIOIOMMUEndpoint
*ep
;
230 IOMMUMemoryRegion
*mr
;
232 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(ep_id
));
236 mr
= virtio_iommu_mr(s
, ep_id
);
240 ep
= g_malloc0(sizeof(*ep
));
243 trace_virtio_iommu_get_endpoint(ep_id
);
244 g_tree_insert(s
->endpoints
, GUINT_TO_POINTER(ep_id
), ep
);
248 static void virtio_iommu_put_endpoint(gpointer data
)
250 VirtIOIOMMUEndpoint
*ep
= (VirtIOIOMMUEndpoint
*)data
;
253 virtio_iommu_detach_endpoint_from_domain(ep
);
256 trace_virtio_iommu_put_endpoint(ep
->id
);
260 static VirtIOIOMMUDomain
*virtio_iommu_get_domain(VirtIOIOMMU
*s
,
264 VirtIOIOMMUDomain
*domain
;
266 domain
= g_tree_lookup(s
->domains
, GUINT_TO_POINTER(domain_id
));
268 if (domain
->bypass
!= bypass
) {
273 domain
= g_malloc0(sizeof(*domain
));
274 domain
->id
= domain_id
;
275 domain
->mappings
= g_tree_new_full((GCompareDataFunc
)interval_cmp
,
276 NULL
, (GDestroyNotify
)g_free
,
277 (GDestroyNotify
)g_free
);
278 domain
->bypass
= bypass
;
279 g_tree_insert(s
->domains
, GUINT_TO_POINTER(domain_id
), domain
);
280 QLIST_INIT(&domain
->endpoint_list
);
281 trace_virtio_iommu_get_domain(domain_id
);
285 static void virtio_iommu_put_domain(gpointer data
)
287 VirtIOIOMMUDomain
*domain
= (VirtIOIOMMUDomain
*)data
;
288 VirtIOIOMMUEndpoint
*iter
, *tmp
;
290 QLIST_FOREACH_SAFE(iter
, &domain
->endpoint_list
, next
, tmp
) {
291 virtio_iommu_detach_endpoint_from_domain(iter
);
293 g_tree_destroy(domain
->mappings
);
294 trace_virtio_iommu_put_domain(domain
->id
);
298 static AddressSpace
*virtio_iommu_find_add_as(PCIBus
*bus
, void *opaque
,
301 VirtIOIOMMU
*s
= opaque
;
302 IOMMUPciBus
*sbus
= g_hash_table_lookup(s
->as_by_busptr
, bus
);
303 static uint32_t mr_index
;
307 sbus
= g_malloc0(sizeof(IOMMUPciBus
) +
308 sizeof(IOMMUDevice
*) * PCI_DEVFN_MAX
);
310 g_hash_table_insert(s
->as_by_busptr
, bus
, sbus
);
313 sdev
= sbus
->pbdev
[devfn
];
315 char *name
= g_strdup_printf("%s-%d-%d",
316 TYPE_VIRTIO_IOMMU_MEMORY_REGION
,
318 sdev
= sbus
->pbdev
[devfn
] = g_new0(IOMMUDevice
, 1);
324 trace_virtio_iommu_init_iommu_mr(name
);
326 memory_region_init_iommu(&sdev
->iommu_mr
, sizeof(sdev
->iommu_mr
),
327 TYPE_VIRTIO_IOMMU_MEMORY_REGION
,
330 address_space_init(&sdev
->as
,
331 MEMORY_REGION(&sdev
->iommu_mr
), TYPE_VIRTIO_IOMMU
);
337 static int virtio_iommu_attach(VirtIOIOMMU
*s
,
338 struct virtio_iommu_req_attach
*req
)
340 uint32_t domain_id
= le32_to_cpu(req
->domain
);
341 uint32_t ep_id
= le32_to_cpu(req
->endpoint
);
342 uint32_t flags
= le32_to_cpu(req
->flags
);
343 VirtIOIOMMUDomain
*domain
;
344 VirtIOIOMMUEndpoint
*ep
;
346 trace_virtio_iommu_attach(domain_id
, ep_id
);
348 if (flags
& ~VIRTIO_IOMMU_ATTACH_F_BYPASS
) {
349 return VIRTIO_IOMMU_S_INVAL
;
352 ep
= virtio_iommu_get_endpoint(s
, ep_id
);
354 return VIRTIO_IOMMU_S_NOENT
;
358 VirtIOIOMMUDomain
*previous_domain
= ep
->domain
;
360 * the device is already attached to a domain,
363 virtio_iommu_detach_endpoint_from_domain(ep
);
364 if (QLIST_EMPTY(&previous_domain
->endpoint_list
)) {
365 g_tree_remove(s
->domains
, GUINT_TO_POINTER(previous_domain
->id
));
369 domain
= virtio_iommu_get_domain(s
, domain_id
,
370 flags
& VIRTIO_IOMMU_ATTACH_F_BYPASS
);
372 /* Incompatible bypass flag */
373 return VIRTIO_IOMMU_S_INVAL
;
375 QLIST_INSERT_HEAD(&domain
->endpoint_list
, ep
, next
);
379 /* Replay domain mappings on the associated memory region */
380 g_tree_foreach(domain
->mappings
, virtio_iommu_notify_map_cb
,
383 return VIRTIO_IOMMU_S_OK
;
386 static int virtio_iommu_detach(VirtIOIOMMU
*s
,
387 struct virtio_iommu_req_detach
*req
)
389 uint32_t domain_id
= le32_to_cpu(req
->domain
);
390 uint32_t ep_id
= le32_to_cpu(req
->endpoint
);
391 VirtIOIOMMUDomain
*domain
;
392 VirtIOIOMMUEndpoint
*ep
;
394 trace_virtio_iommu_detach(domain_id
, ep_id
);
396 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(ep_id
));
398 return VIRTIO_IOMMU_S_NOENT
;
403 if (!domain
|| domain
->id
!= domain_id
) {
404 return VIRTIO_IOMMU_S_INVAL
;
407 virtio_iommu_detach_endpoint_from_domain(ep
);
409 if (QLIST_EMPTY(&domain
->endpoint_list
)) {
410 g_tree_remove(s
->domains
, GUINT_TO_POINTER(domain
->id
));
412 return VIRTIO_IOMMU_S_OK
;
415 static int virtio_iommu_map(VirtIOIOMMU
*s
,
416 struct virtio_iommu_req_map
*req
)
418 uint32_t domain_id
= le32_to_cpu(req
->domain
);
419 uint64_t phys_start
= le64_to_cpu(req
->phys_start
);
420 uint64_t virt_start
= le64_to_cpu(req
->virt_start
);
421 uint64_t virt_end
= le64_to_cpu(req
->virt_end
);
422 uint32_t flags
= le32_to_cpu(req
->flags
);
423 VirtIOIOMMUDomain
*domain
;
424 VirtIOIOMMUInterval
*interval
;
425 VirtIOIOMMUMapping
*mapping
;
426 VirtIOIOMMUEndpoint
*ep
;
428 if (flags
& ~VIRTIO_IOMMU_MAP_F_MASK
) {
429 return VIRTIO_IOMMU_S_INVAL
;
432 domain
= g_tree_lookup(s
->domains
, GUINT_TO_POINTER(domain_id
));
434 return VIRTIO_IOMMU_S_NOENT
;
437 if (domain
->bypass
) {
438 return VIRTIO_IOMMU_S_INVAL
;
441 interval
= g_malloc0(sizeof(*interval
));
443 interval
->low
= virt_start
;
444 interval
->high
= virt_end
;
446 mapping
= g_tree_lookup(domain
->mappings
, (gpointer
)interval
);
449 return VIRTIO_IOMMU_S_INVAL
;
452 trace_virtio_iommu_map(domain_id
, virt_start
, virt_end
, phys_start
, flags
);
454 mapping
= g_malloc0(sizeof(*mapping
));
455 mapping
->phys_addr
= phys_start
;
456 mapping
->flags
= flags
;
458 g_tree_insert(domain
->mappings
, interval
, mapping
);
460 QLIST_FOREACH(ep
, &domain
->endpoint_list
, next
) {
461 virtio_iommu_notify_map(ep
->iommu_mr
, virt_start
, virt_end
, phys_start
,
465 return VIRTIO_IOMMU_S_OK
;
468 static int virtio_iommu_unmap(VirtIOIOMMU
*s
,
469 struct virtio_iommu_req_unmap
*req
)
471 uint32_t domain_id
= le32_to_cpu(req
->domain
);
472 uint64_t virt_start
= le64_to_cpu(req
->virt_start
);
473 uint64_t virt_end
= le64_to_cpu(req
->virt_end
);
474 VirtIOIOMMUMapping
*iter_val
;
475 VirtIOIOMMUInterval interval
, *iter_key
;
476 VirtIOIOMMUDomain
*domain
;
477 VirtIOIOMMUEndpoint
*ep
;
478 int ret
= VIRTIO_IOMMU_S_OK
;
480 trace_virtio_iommu_unmap(domain_id
, virt_start
, virt_end
);
482 domain
= g_tree_lookup(s
->domains
, GUINT_TO_POINTER(domain_id
));
484 return VIRTIO_IOMMU_S_NOENT
;
487 if (domain
->bypass
) {
488 return VIRTIO_IOMMU_S_INVAL
;
491 interval
.low
= virt_start
;
492 interval
.high
= virt_end
;
494 while (g_tree_lookup_extended(domain
->mappings
, &interval
,
495 (void **)&iter_key
, (void**)&iter_val
)) {
496 uint64_t current_low
= iter_key
->low
;
497 uint64_t current_high
= iter_key
->high
;
499 if (interval
.low
<= current_low
&& interval
.high
>= current_high
) {
500 QLIST_FOREACH(ep
, &domain
->endpoint_list
, next
) {
501 virtio_iommu_notify_unmap(ep
->iommu_mr
, current_low
,
504 g_tree_remove(domain
->mappings
, iter_key
);
505 trace_virtio_iommu_unmap_done(domain_id
, current_low
, current_high
);
507 ret
= VIRTIO_IOMMU_S_RANGE
;
514 static ssize_t
virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU
*s
, uint32_t ep
,
515 uint8_t *buf
, size_t free
)
517 struct virtio_iommu_probe_resv_mem prop
= {};
518 size_t size
= sizeof(prop
), length
= size
- sizeof(prop
.head
), total
;
521 total
= size
* s
->nb_reserved_regions
;
527 for (i
= 0; i
< s
->nb_reserved_regions
; i
++) {
528 unsigned subtype
= s
->reserved_regions
[i
].type
;
530 assert(subtype
== VIRTIO_IOMMU_RESV_MEM_T_RESERVED
||
531 subtype
== VIRTIO_IOMMU_RESV_MEM_T_MSI
);
532 prop
.head
.type
= cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM
);
533 prop
.head
.length
= cpu_to_le16(length
);
534 prop
.subtype
= subtype
;
535 prop
.start
= cpu_to_le64(s
->reserved_regions
[i
].low
);
536 prop
.end
= cpu_to_le64(s
->reserved_regions
[i
].high
);
538 memcpy(buf
, &prop
, size
);
540 trace_virtio_iommu_fill_resv_property(ep
, prop
.subtype
,
541 prop
.start
, prop
.end
);
548 * virtio_iommu_probe - Fill the probe request buffer with
549 * the properties the device is able to return
551 static int virtio_iommu_probe(VirtIOIOMMU
*s
,
552 struct virtio_iommu_req_probe
*req
,
555 uint32_t ep_id
= le32_to_cpu(req
->endpoint
);
556 size_t free
= VIOMMU_PROBE_SIZE
;
559 if (!virtio_iommu_mr(s
, ep_id
)) {
560 return VIRTIO_IOMMU_S_NOENT
;
563 count
= virtio_iommu_fill_resv_mem_prop(s
, ep_id
, buf
, free
);
565 return VIRTIO_IOMMU_S_INVAL
;
570 return VIRTIO_IOMMU_S_OK
;
573 static int virtio_iommu_iov_to_req(struct iovec
*iov
,
574 unsigned int iov_cnt
,
575 void *req
, size_t req_sz
)
577 size_t sz
, payload_sz
= req_sz
- sizeof(struct virtio_iommu_req_tail
);
579 sz
= iov_to_buf(iov
, iov_cnt
, 0, req
, payload_sz
);
580 if (unlikely(sz
!= payload_sz
)) {
581 return VIRTIO_IOMMU_S_INVAL
;
586 #define virtio_iommu_handle_req(__req) \
587 static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \
589 unsigned int iov_cnt) \
591 struct virtio_iommu_req_ ## __req req; \
592 int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \
594 return ret ? ret : virtio_iommu_ ## __req(s, &req); \
597 virtio_iommu_handle_req(attach
)
598 virtio_iommu_handle_req(detach
)
599 virtio_iommu_handle_req(map
)
600 virtio_iommu_handle_req(unmap
)
602 static int virtio_iommu_handle_probe(VirtIOIOMMU
*s
,
604 unsigned int iov_cnt
,
607 struct virtio_iommu_req_probe req
;
608 int ret
= virtio_iommu_iov_to_req(iov
, iov_cnt
, &req
, sizeof(req
));
610 return ret
? ret
: virtio_iommu_probe(s
, &req
, buf
);
613 static void virtio_iommu_handle_command(VirtIODevice
*vdev
, VirtQueue
*vq
)
615 VirtIOIOMMU
*s
= VIRTIO_IOMMU(vdev
);
616 struct virtio_iommu_req_head head
;
617 struct virtio_iommu_req_tail tail
= {};
618 size_t output_size
= sizeof(tail
), sz
;
619 VirtQueueElement
*elem
;
620 unsigned int iov_cnt
;
625 elem
= virtqueue_pop(vq
, sizeof(VirtQueueElement
));
630 if (iov_size(elem
->in_sg
, elem
->in_num
) < sizeof(tail
) ||
631 iov_size(elem
->out_sg
, elem
->out_num
) < sizeof(head
)) {
632 virtio_error(vdev
, "virtio-iommu bad head/tail size");
633 virtqueue_detach_element(vq
, elem
, 0);
638 iov_cnt
= elem
->out_num
;
640 sz
= iov_to_buf(iov
, iov_cnt
, 0, &head
, sizeof(head
));
641 if (unlikely(sz
!= sizeof(head
))) {
642 tail
.status
= VIRTIO_IOMMU_S_DEVERR
;
645 qemu_mutex_lock(&s
->mutex
);
647 case VIRTIO_IOMMU_T_ATTACH
:
648 tail
.status
= virtio_iommu_handle_attach(s
, iov
, iov_cnt
);
650 case VIRTIO_IOMMU_T_DETACH
:
651 tail
.status
= virtio_iommu_handle_detach(s
, iov
, iov_cnt
);
653 case VIRTIO_IOMMU_T_MAP
:
654 tail
.status
= virtio_iommu_handle_map(s
, iov
, iov_cnt
);
656 case VIRTIO_IOMMU_T_UNMAP
:
657 tail
.status
= virtio_iommu_handle_unmap(s
, iov
, iov_cnt
);
659 case VIRTIO_IOMMU_T_PROBE
:
661 struct virtio_iommu_req_tail
*ptail
;
663 output_size
= s
->config
.probe_size
+ sizeof(tail
);
664 buf
= g_malloc0(output_size
);
666 ptail
= (struct virtio_iommu_req_tail
*)
667 (buf
+ s
->config
.probe_size
);
668 ptail
->status
= virtio_iommu_handle_probe(s
, iov
, iov_cnt
, buf
);
672 tail
.status
= VIRTIO_IOMMU_S_UNSUPP
;
674 qemu_mutex_unlock(&s
->mutex
);
677 sz
= iov_from_buf(elem
->in_sg
, elem
->in_num
, 0,
678 buf
? buf
: &tail
, output_size
);
679 assert(sz
== output_size
);
681 virtqueue_push(vq
, elem
, sz
);
682 virtio_notify(vdev
, vq
);
688 static void virtio_iommu_report_fault(VirtIOIOMMU
*viommu
, uint8_t reason
,
689 int flags
, uint32_t endpoint
,
692 VirtIODevice
*vdev
= &viommu
->parent_obj
;
693 VirtQueue
*vq
= viommu
->event_vq
;
694 struct virtio_iommu_fault fault
;
695 VirtQueueElement
*elem
;
698 memset(&fault
, 0, sizeof(fault
));
699 fault
.reason
= reason
;
700 fault
.flags
= cpu_to_le32(flags
);
701 fault
.endpoint
= cpu_to_le32(endpoint
);
702 fault
.address
= cpu_to_le64(address
);
704 elem
= virtqueue_pop(vq
, sizeof(VirtQueueElement
));
708 "no buffer available in event queue to report event");
712 if (iov_size(elem
->in_sg
, elem
->in_num
) < sizeof(fault
)) {
713 virtio_error(vdev
, "error buffer of wrong size");
714 virtqueue_detach_element(vq
, elem
, 0);
719 sz
= iov_from_buf(elem
->in_sg
, elem
->in_num
, 0,
720 &fault
, sizeof(fault
));
721 assert(sz
== sizeof(fault
));
723 trace_virtio_iommu_report_fault(reason
, flags
, endpoint
, address
);
724 virtqueue_push(vq
, elem
, sz
);
725 virtio_notify(vdev
, vq
);
730 static IOMMUTLBEntry
virtio_iommu_translate(IOMMUMemoryRegion
*mr
, hwaddr addr
,
731 IOMMUAccessFlags flag
,
734 IOMMUDevice
*sdev
= container_of(mr
, IOMMUDevice
, iommu_mr
);
735 VirtIOIOMMUInterval interval
, *mapping_key
;
736 VirtIOIOMMUMapping
*mapping_value
;
737 VirtIOIOMMU
*s
= sdev
->viommu
;
738 bool read_fault
, write_fault
;
739 VirtIOIOMMUEndpoint
*ep
;
746 interval
.high
= addr
+ 1;
748 IOMMUTLBEntry entry
= {
749 .target_as
= &address_space_memory
,
751 .translated_addr
= addr
,
752 .addr_mask
= (1 << ctz32(s
->config
.page_size_mask
)) - 1,
756 bypass_allowed
= s
->config
.bypass
;
758 sid
= virtio_iommu_get_bdf(sdev
);
760 trace_virtio_iommu_translate(mr
->parent_obj
.name
, sid
, addr
, flag
);
761 qemu_mutex_lock(&s
->mutex
);
763 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(sid
));
765 if (!bypass_allowed
) {
766 error_report_once("%s sid=%d is not known!!", __func__
, sid
);
767 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_UNKNOWN
,
768 VIRTIO_IOMMU_FAULT_F_ADDRESS
,
776 for (i
= 0; i
< s
->nb_reserved_regions
; i
++) {
777 ReservedRegion
*reg
= &s
->reserved_regions
[i
];
779 if (addr
>= reg
->low
&& addr
<= reg
->high
) {
781 case VIRTIO_IOMMU_RESV_MEM_T_MSI
:
784 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED
:
786 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_MAPPING
,
787 VIRTIO_IOMMU_FAULT_F_ADDRESS
,
796 if (!bypass_allowed
) {
797 error_report_once("%s %02x:%02x.%01x not attached to any domain",
798 __func__
, PCI_BUS_NUM(sid
),
799 PCI_SLOT(sid
), PCI_FUNC(sid
));
800 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_DOMAIN
,
801 VIRTIO_IOMMU_FAULT_F_ADDRESS
,
807 } else if (ep
->domain
->bypass
) {
812 found
= g_tree_lookup_extended(ep
->domain
->mappings
, (gpointer
)(&interval
),
813 (void **)&mapping_key
,
814 (void **)&mapping_value
);
816 error_report_once("%s no mapping for 0x%"PRIx64
" for sid=%d",
817 __func__
, addr
, sid
);
818 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_MAPPING
,
819 VIRTIO_IOMMU_FAULT_F_ADDRESS
,
824 read_fault
= (flag
& IOMMU_RO
) &&
825 !(mapping_value
->flags
& VIRTIO_IOMMU_MAP_F_READ
);
826 write_fault
= (flag
& IOMMU_WO
) &&
827 !(mapping_value
->flags
& VIRTIO_IOMMU_MAP_F_WRITE
);
829 flags
= read_fault
? VIRTIO_IOMMU_FAULT_F_READ
: 0;
830 flags
|= write_fault
? VIRTIO_IOMMU_FAULT_F_WRITE
: 0;
832 error_report_once("%s permission error on 0x%"PRIx64
"(%d): allowed=%d",
833 __func__
, addr
, flag
, mapping_value
->flags
);
834 flags
|= VIRTIO_IOMMU_FAULT_F_ADDRESS
;
835 virtio_iommu_report_fault(s
, VIRTIO_IOMMU_FAULT_R_MAPPING
,
836 flags
| VIRTIO_IOMMU_FAULT_F_ADDRESS
,
840 entry
.translated_addr
= addr
- mapping_key
->low
+ mapping_value
->phys_addr
;
842 trace_virtio_iommu_translate_out(addr
, entry
.translated_addr
, sid
);
845 qemu_mutex_unlock(&s
->mutex
);
849 static void virtio_iommu_get_config(VirtIODevice
*vdev
, uint8_t *config_data
)
851 VirtIOIOMMU
*dev
= VIRTIO_IOMMU(vdev
);
852 struct virtio_iommu_config
*dev_config
= &dev
->config
;
853 struct virtio_iommu_config
*out_config
= (void *)config_data
;
855 out_config
->page_size_mask
= cpu_to_le64(dev_config
->page_size_mask
);
856 out_config
->input_range
.start
= cpu_to_le64(dev_config
->input_range
.start
);
857 out_config
->input_range
.end
= cpu_to_le64(dev_config
->input_range
.end
);
858 out_config
->domain_range
.start
= cpu_to_le32(dev_config
->domain_range
.start
);
859 out_config
->domain_range
.end
= cpu_to_le32(dev_config
->domain_range
.end
);
860 out_config
->probe_size
= cpu_to_le32(dev_config
->probe_size
);
861 out_config
->bypass
= dev_config
->bypass
;
863 trace_virtio_iommu_get_config(dev_config
->page_size_mask
,
864 dev_config
->input_range
.start
,
865 dev_config
->input_range
.end
,
866 dev_config
->domain_range
.start
,
867 dev_config
->domain_range
.end
,
868 dev_config
->probe_size
,
872 static void virtio_iommu_set_config(VirtIODevice
*vdev
,
873 const uint8_t *config_data
)
875 VirtIOIOMMU
*dev
= VIRTIO_IOMMU(vdev
);
876 struct virtio_iommu_config
*dev_config
= &dev
->config
;
877 const struct virtio_iommu_config
*in_config
= (void *)config_data
;
879 if (in_config
->bypass
!= dev_config
->bypass
) {
880 if (!virtio_vdev_has_feature(vdev
, VIRTIO_IOMMU_F_BYPASS_CONFIG
)) {
881 virtio_error(vdev
, "cannot set config.bypass");
883 } else if (in_config
->bypass
!= 0 && in_config
->bypass
!= 1) {
884 virtio_error(vdev
, "invalid config.bypass value '%u'",
888 dev_config
->bypass
= in_config
->bypass
;
891 trace_virtio_iommu_set_config(in_config
->bypass
);
894 static uint64_t virtio_iommu_get_features(VirtIODevice
*vdev
, uint64_t f
,
897 VirtIOIOMMU
*dev
= VIRTIO_IOMMU(vdev
);
900 trace_virtio_iommu_get_features(f
);
904 static gint
int_cmp(gconstpointer a
, gconstpointer b
, gpointer user_data
)
906 guint ua
= GPOINTER_TO_UINT(a
);
907 guint ub
= GPOINTER_TO_UINT(b
);
908 return (ua
> ub
) - (ua
< ub
);
911 static gboolean
virtio_iommu_remap(gpointer key
, gpointer value
, gpointer data
)
913 VirtIOIOMMUMapping
*mapping
= (VirtIOIOMMUMapping
*) value
;
914 VirtIOIOMMUInterval
*interval
= (VirtIOIOMMUInterval
*) key
;
915 IOMMUMemoryRegion
*mr
= (IOMMUMemoryRegion
*) data
;
917 trace_virtio_iommu_remap(mr
->parent_obj
.name
, interval
->low
, interval
->high
,
919 virtio_iommu_notify_map(mr
, interval
->low
, interval
->high
,
920 mapping
->phys_addr
, mapping
->flags
);
924 static void virtio_iommu_replay(IOMMUMemoryRegion
*mr
, IOMMUNotifier
*n
)
926 IOMMUDevice
*sdev
= container_of(mr
, IOMMUDevice
, iommu_mr
);
927 VirtIOIOMMU
*s
= sdev
->viommu
;
929 VirtIOIOMMUEndpoint
*ep
;
931 sid
= virtio_iommu_get_bdf(sdev
);
933 qemu_mutex_lock(&s
->mutex
);
939 ep
= g_tree_lookup(s
->endpoints
, GUINT_TO_POINTER(sid
));
940 if (!ep
|| !ep
->domain
) {
944 g_tree_foreach(ep
->domain
->mappings
, virtio_iommu_remap
, mr
);
947 qemu_mutex_unlock(&s
->mutex
);
950 static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion
*iommu_mr
,
951 IOMMUNotifierFlag old
,
952 IOMMUNotifierFlag
new,
955 if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP
) {
956 error_setg(errp
, "Virtio-iommu does not support dev-iotlb yet");
960 if (old
== IOMMU_NOTIFIER_NONE
) {
961 trace_virtio_iommu_notify_flag_add(iommu_mr
->parent_obj
.name
);
962 } else if (new == IOMMU_NOTIFIER_NONE
) {
963 trace_virtio_iommu_notify_flag_del(iommu_mr
->parent_obj
.name
);
969 * The default mask (TARGET_PAGE_MASK) is the smallest supported guest granule,
970 * for example 0xfffffffffffff000. When an assigned device has page size
971 * restrictions due to the hardware IOMMU configuration, apply this restriction
974 static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion
*mr
,
978 IOMMUDevice
*sdev
= container_of(mr
, IOMMUDevice
, iommu_mr
);
979 VirtIOIOMMU
*s
= sdev
->viommu
;
980 uint64_t cur_mask
= s
->config
.page_size_mask
;
982 trace_virtio_iommu_set_page_size_mask(mr
->parent_obj
.name
, cur_mask
,
985 if ((cur_mask
& new_mask
) == 0) {
986 error_setg(errp
, "virtio-iommu page mask 0x%"PRIx64
987 " is incompatible with mask 0x%"PRIx64
, cur_mask
, new_mask
);
992 * After the machine is finalized, we can't change the mask anymore. If by
993 * chance the hotplugged device supports the same granule, we can still
994 * accept it. Having a different masks is possible but the guest will use
995 * sub-optimal block sizes, so warn about it.
997 if (phase_check(PHASE_MACHINE_READY
)) {
998 int new_granule
= ctz64(new_mask
);
999 int cur_granule
= ctz64(cur_mask
);
1001 if (new_granule
!= cur_granule
) {
1002 error_setg(errp
, "virtio-iommu page mask 0x%"PRIx64
1003 " is incompatible with mask 0x%"PRIx64
, cur_mask
,
1006 } else if (new_mask
!= cur_mask
) {
1007 warn_report("virtio-iommu page mask 0x%"PRIx64
1008 " does not match 0x%"PRIx64
, cur_mask
, new_mask
);
1013 s
->config
.page_size_mask
&= new_mask
;
1017 static void virtio_iommu_system_reset(void *opaque
)
1019 VirtIOIOMMU
*s
= opaque
;
1021 trace_virtio_iommu_system_reset();
1024 * config.bypass is sticky across device reset, but should be restored on
1027 s
->config
.bypass
= s
->boot_bypass
;
1030 static void virtio_iommu_device_realize(DeviceState
*dev
, Error
**errp
)
1032 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
1033 VirtIOIOMMU
*s
= VIRTIO_IOMMU(dev
);
1035 virtio_init(vdev
, "virtio-iommu", VIRTIO_ID_IOMMU
,
1036 sizeof(struct virtio_iommu_config
));
1038 memset(s
->iommu_pcibus_by_bus_num
, 0, sizeof(s
->iommu_pcibus_by_bus_num
));
1040 s
->req_vq
= virtio_add_queue(vdev
, VIOMMU_DEFAULT_QUEUE_SIZE
,
1041 virtio_iommu_handle_command
);
1042 s
->event_vq
= virtio_add_queue(vdev
, VIOMMU_DEFAULT_QUEUE_SIZE
, NULL
);
1044 s
->config
.page_size_mask
= TARGET_PAGE_MASK
;
1045 s
->config
.input_range
.end
= UINT64_MAX
;
1046 s
->config
.domain_range
.end
= UINT32_MAX
;
1047 s
->config
.probe_size
= VIOMMU_PROBE_SIZE
;
1049 virtio_add_feature(&s
->features
, VIRTIO_RING_F_EVENT_IDX
);
1050 virtio_add_feature(&s
->features
, VIRTIO_RING_F_INDIRECT_DESC
);
1051 virtio_add_feature(&s
->features
, VIRTIO_F_VERSION_1
);
1052 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_INPUT_RANGE
);
1053 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_DOMAIN_RANGE
);
1054 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_MAP_UNMAP
);
1055 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_MMIO
);
1056 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_PROBE
);
1057 virtio_add_feature(&s
->features
, VIRTIO_IOMMU_F_BYPASS_CONFIG
);
1059 qemu_mutex_init(&s
->mutex
);
1061 s
->as_by_busptr
= g_hash_table_new_full(NULL
, NULL
, NULL
, g_free
);
1063 if (s
->primary_bus
) {
1064 pci_setup_iommu(s
->primary_bus
, virtio_iommu_find_add_as
, s
);
1066 error_setg(errp
, "VIRTIO-IOMMU is not attached to any PCI bus!");
1069 qemu_register_reset(virtio_iommu_system_reset
, s
);
1072 static void virtio_iommu_device_unrealize(DeviceState
*dev
)
1074 VirtIODevice
*vdev
= VIRTIO_DEVICE(dev
);
1075 VirtIOIOMMU
*s
= VIRTIO_IOMMU(dev
);
1077 qemu_unregister_reset(virtio_iommu_system_reset
, s
);
1079 g_hash_table_destroy(s
->as_by_busptr
);
1081 g_tree_destroy(s
->domains
);
1084 g_tree_destroy(s
->endpoints
);
1087 virtio_delete_queue(s
->req_vq
);
1088 virtio_delete_queue(s
->event_vq
);
1089 virtio_cleanup(vdev
);
1092 static void virtio_iommu_device_reset(VirtIODevice
*vdev
)
1094 VirtIOIOMMU
*s
= VIRTIO_IOMMU(vdev
);
1096 trace_virtio_iommu_device_reset();
1099 g_tree_destroy(s
->domains
);
1102 g_tree_destroy(s
->endpoints
);
1104 s
->domains
= g_tree_new_full((GCompareDataFunc
)int_cmp
,
1105 NULL
, NULL
, virtio_iommu_put_domain
);
1106 s
->endpoints
= g_tree_new_full((GCompareDataFunc
)int_cmp
,
1107 NULL
, NULL
, virtio_iommu_put_endpoint
);
1110 static void virtio_iommu_set_status(VirtIODevice
*vdev
, uint8_t status
)
1112 trace_virtio_iommu_device_status(status
);
1115 static void virtio_iommu_instance_init(Object
*obj
)
1119 #define VMSTATE_INTERVAL \
1121 .name = "interval", \
1123 .minimum_version_id = 1, \
1124 .fields = (VMStateField[]) { \
1125 VMSTATE_UINT64(low, VirtIOIOMMUInterval), \
1126 VMSTATE_UINT64(high, VirtIOIOMMUInterval), \
1127 VMSTATE_END_OF_LIST() \
1131 #define VMSTATE_MAPPING \
1133 .name = "mapping", \
1135 .minimum_version_id = 1, \
1136 .fields = (VMStateField[]) { \
1137 VMSTATE_UINT64(phys_addr, VirtIOIOMMUMapping),\
1138 VMSTATE_UINT32(flags, VirtIOIOMMUMapping), \
1139 VMSTATE_END_OF_LIST() \
1143 static const VMStateDescription vmstate_interval_mapping
[2] = {
1144 VMSTATE_MAPPING
, /* value */
1145 VMSTATE_INTERVAL
/* key */
1148 static int domain_preload(void *opaque
)
1150 VirtIOIOMMUDomain
*domain
= opaque
;
1152 domain
->mappings
= g_tree_new_full((GCompareDataFunc
)interval_cmp
,
1153 NULL
, g_free
, g_free
);
1157 static const VMStateDescription vmstate_endpoint
= {
1160 .minimum_version_id
= 1,
1161 .fields
= (VMStateField
[]) {
1162 VMSTATE_UINT32(id
, VirtIOIOMMUEndpoint
),
1163 VMSTATE_END_OF_LIST()
1167 static const VMStateDescription vmstate_domain
= {
1170 .minimum_version_id
= 2,
1171 .pre_load
= domain_preload
,
1172 .fields
= (VMStateField
[]) {
1173 VMSTATE_UINT32(id
, VirtIOIOMMUDomain
),
1174 VMSTATE_GTREE_V(mappings
, VirtIOIOMMUDomain
, 1,
1175 vmstate_interval_mapping
,
1176 VirtIOIOMMUInterval
, VirtIOIOMMUMapping
),
1177 VMSTATE_QLIST_V(endpoint_list
, VirtIOIOMMUDomain
, 1,
1178 vmstate_endpoint
, VirtIOIOMMUEndpoint
, next
),
1179 VMSTATE_BOOL_V(bypass
, VirtIOIOMMUDomain
, 2),
1180 VMSTATE_END_OF_LIST()
1184 static gboolean
reconstruct_endpoints(gpointer key
, gpointer value
,
1187 VirtIOIOMMU
*s
= (VirtIOIOMMU
*)data
;
1188 VirtIOIOMMUDomain
*d
= (VirtIOIOMMUDomain
*)value
;
1189 VirtIOIOMMUEndpoint
*iter
;
1190 IOMMUMemoryRegion
*mr
;
1192 QLIST_FOREACH(iter
, &d
->endpoint_list
, next
) {
1193 mr
= virtio_iommu_mr(s
, iter
->id
);
1197 iter
->iommu_mr
= mr
;
1198 g_tree_insert(s
->endpoints
, GUINT_TO_POINTER(iter
->id
), iter
);
1200 return false; /* continue the domain traversal */
1203 static int iommu_post_load(void *opaque
, int version_id
)
1205 VirtIOIOMMU
*s
= opaque
;
1207 g_tree_foreach(s
->domains
, reconstruct_endpoints
, s
);
1211 static const VMStateDescription vmstate_virtio_iommu_device
= {
1212 .name
= "virtio-iommu-device",
1213 .minimum_version_id
= 2,
1215 .post_load
= iommu_post_load
,
1216 .fields
= (VMStateField
[]) {
1217 VMSTATE_GTREE_DIRECT_KEY_V(domains
, VirtIOIOMMU
, 2,
1218 &vmstate_domain
, VirtIOIOMMUDomain
),
1219 VMSTATE_UINT8_V(config
.bypass
, VirtIOIOMMU
, 2),
1220 VMSTATE_END_OF_LIST()
1224 static const VMStateDescription vmstate_virtio_iommu
= {
1225 .name
= "virtio-iommu",
1226 .minimum_version_id
= 2,
1227 .priority
= MIG_PRI_IOMMU
,
1229 .fields
= (VMStateField
[]) {
1230 VMSTATE_VIRTIO_DEVICE
,
1231 VMSTATE_END_OF_LIST()
1235 static Property virtio_iommu_properties
[] = {
1236 DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU
, primary_bus
, "PCI", PCIBus
*),
1237 DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU
, boot_bypass
, true),
1238 DEFINE_PROP_END_OF_LIST(),
1241 static void virtio_iommu_class_init(ObjectClass
*klass
, void *data
)
1243 DeviceClass
*dc
= DEVICE_CLASS(klass
);
1244 VirtioDeviceClass
*vdc
= VIRTIO_DEVICE_CLASS(klass
);
1246 device_class_set_props(dc
, virtio_iommu_properties
);
1247 dc
->vmsd
= &vmstate_virtio_iommu
;
1249 set_bit(DEVICE_CATEGORY_MISC
, dc
->categories
);
1250 vdc
->realize
= virtio_iommu_device_realize
;
1251 vdc
->unrealize
= virtio_iommu_device_unrealize
;
1252 vdc
->reset
= virtio_iommu_device_reset
;
1253 vdc
->get_config
= virtio_iommu_get_config
;
1254 vdc
->set_config
= virtio_iommu_set_config
;
1255 vdc
->get_features
= virtio_iommu_get_features
;
1256 vdc
->set_status
= virtio_iommu_set_status
;
1257 vdc
->vmsd
= &vmstate_virtio_iommu_device
;
1260 static void virtio_iommu_memory_region_class_init(ObjectClass
*klass
,
1263 IOMMUMemoryRegionClass
*imrc
= IOMMU_MEMORY_REGION_CLASS(klass
);
1265 imrc
->translate
= virtio_iommu_translate
;
1266 imrc
->replay
= virtio_iommu_replay
;
1267 imrc
->notify_flag_changed
= virtio_iommu_notify_flag_changed
;
1268 imrc
->iommu_set_page_size_mask
= virtio_iommu_set_page_size_mask
;
1271 static const TypeInfo virtio_iommu_info
= {
1272 .name
= TYPE_VIRTIO_IOMMU
,
1273 .parent
= TYPE_VIRTIO_DEVICE
,
1274 .instance_size
= sizeof(VirtIOIOMMU
),
1275 .instance_init
= virtio_iommu_instance_init
,
1276 .class_init
= virtio_iommu_class_init
,
1279 static const TypeInfo virtio_iommu_memory_region_info
= {
1280 .parent
= TYPE_IOMMU_MEMORY_REGION
,
1281 .name
= TYPE_VIRTIO_IOMMU_MEMORY_REGION
,
1282 .class_init
= virtio_iommu_memory_region_class_init
,
1285 static void virtio_register_types(void)
1287 type_register_static(&virtio_iommu_info
);
1288 type_register_static(&virtio_iommu_memory_region_info
);
1291 type_init(virtio_register_types
)