2 * QEMU emulation of an Intel IOMMU (VT-d)
3 * (DMA Remapping device)
5 * Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com>
6 * Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "qemu/osdep.h"
23 #include "qemu/error-report.h"
24 #include "qapi/error.h"
25 #include "hw/sysbus.h"
26 #include "exec/address-spaces.h"
27 #include "intel_iommu_internal.h"
28 #include "hw/pci/pci.h"
29 #include "hw/pci/pci_bus.h"
30 #include "hw/i386/pc.h"
31 #include "hw/i386/apic-msidef.h"
32 #include "hw/boards.h"
33 #include "hw/i386/x86-iommu.h"
34 #include "hw/pci-host/q35.h"
35 #include "sysemu/kvm.h"
36 #include "hw/i386/apic_internal.h"
40 static void vtd_define_quad(IntelIOMMUState
*s
, hwaddr addr
, uint64_t val
,
41 uint64_t wmask
, uint64_t w1cmask
)
43 stq_le_p(&s
->csr
[addr
], val
);
44 stq_le_p(&s
->wmask
[addr
], wmask
);
45 stq_le_p(&s
->w1cmask
[addr
], w1cmask
);
48 static void vtd_define_quad_wo(IntelIOMMUState
*s
, hwaddr addr
, uint64_t mask
)
50 stq_le_p(&s
->womask
[addr
], mask
);
53 static void vtd_define_long(IntelIOMMUState
*s
, hwaddr addr
, uint32_t val
,
54 uint32_t wmask
, uint32_t w1cmask
)
56 stl_le_p(&s
->csr
[addr
], val
);
57 stl_le_p(&s
->wmask
[addr
], wmask
);
58 stl_le_p(&s
->w1cmask
[addr
], w1cmask
);
61 static void vtd_define_long_wo(IntelIOMMUState
*s
, hwaddr addr
, uint32_t mask
)
63 stl_le_p(&s
->womask
[addr
], mask
);
66 /* "External" get/set operations */
67 static void vtd_set_quad(IntelIOMMUState
*s
, hwaddr addr
, uint64_t val
)
69 uint64_t oldval
= ldq_le_p(&s
->csr
[addr
]);
70 uint64_t wmask
= ldq_le_p(&s
->wmask
[addr
]);
71 uint64_t w1cmask
= ldq_le_p(&s
->w1cmask
[addr
]);
72 stq_le_p(&s
->csr
[addr
],
73 ((oldval
& ~wmask
) | (val
& wmask
)) & ~(w1cmask
& val
));
76 static void vtd_set_long(IntelIOMMUState
*s
, hwaddr addr
, uint32_t val
)
78 uint32_t oldval
= ldl_le_p(&s
->csr
[addr
]);
79 uint32_t wmask
= ldl_le_p(&s
->wmask
[addr
]);
80 uint32_t w1cmask
= ldl_le_p(&s
->w1cmask
[addr
]);
81 stl_le_p(&s
->csr
[addr
],
82 ((oldval
& ~wmask
) | (val
& wmask
)) & ~(w1cmask
& val
));
85 static uint64_t vtd_get_quad(IntelIOMMUState
*s
, hwaddr addr
)
87 uint64_t val
= ldq_le_p(&s
->csr
[addr
]);
88 uint64_t womask
= ldq_le_p(&s
->womask
[addr
]);
92 static uint32_t vtd_get_long(IntelIOMMUState
*s
, hwaddr addr
)
94 uint32_t val
= ldl_le_p(&s
->csr
[addr
]);
95 uint32_t womask
= ldl_le_p(&s
->womask
[addr
]);
99 /* "Internal" get/set operations */
100 static uint64_t vtd_get_quad_raw(IntelIOMMUState
*s
, hwaddr addr
)
102 return ldq_le_p(&s
->csr
[addr
]);
105 static uint32_t vtd_get_long_raw(IntelIOMMUState
*s
, hwaddr addr
)
107 return ldl_le_p(&s
->csr
[addr
]);
110 static void vtd_set_quad_raw(IntelIOMMUState
*s
, hwaddr addr
, uint64_t val
)
112 stq_le_p(&s
->csr
[addr
], val
);
115 static uint32_t vtd_set_clear_mask_long(IntelIOMMUState
*s
, hwaddr addr
,
116 uint32_t clear
, uint32_t mask
)
118 uint32_t new_val
= (ldl_le_p(&s
->csr
[addr
]) & ~clear
) | mask
;
119 stl_le_p(&s
->csr
[addr
], new_val
);
123 static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState
*s
, hwaddr addr
,
124 uint64_t clear
, uint64_t mask
)
126 uint64_t new_val
= (ldq_le_p(&s
->csr
[addr
]) & ~clear
) | mask
;
127 stq_le_p(&s
->csr
[addr
], new_val
);
131 static inline void vtd_iommu_lock(IntelIOMMUState
*s
)
133 qemu_mutex_lock(&s
->iommu_lock
);
136 static inline void vtd_iommu_unlock(IntelIOMMUState
*s
)
138 qemu_mutex_unlock(&s
->iommu_lock
);
141 /* Whether the address space needs to notify new mappings */
142 static inline gboolean
vtd_as_has_map_notifier(VTDAddressSpace
*as
)
144 return as
->notifier_flags
& IOMMU_NOTIFIER_MAP
;
147 /* GHashTable functions */
148 static gboolean
vtd_uint64_equal(gconstpointer v1
, gconstpointer v2
)
150 return *((const uint64_t *)v1
) == *((const uint64_t *)v2
);
153 static guint
vtd_uint64_hash(gconstpointer v
)
155 return (guint
)*(const uint64_t *)v
;
158 static gboolean
vtd_hash_remove_by_domain(gpointer key
, gpointer value
,
161 VTDIOTLBEntry
*entry
= (VTDIOTLBEntry
*)value
;
162 uint16_t domain_id
= *(uint16_t *)user_data
;
163 return entry
->domain_id
== domain_id
;
166 /* The shift of an addr for a certain level of paging structure */
167 static inline uint32_t vtd_slpt_level_shift(uint32_t level
)
170 return VTD_PAGE_SHIFT_4K
+ (level
- 1) * VTD_SL_LEVEL_BITS
;
173 static inline uint64_t vtd_slpt_level_page_mask(uint32_t level
)
175 return ~((1ULL << vtd_slpt_level_shift(level
)) - 1);
178 static gboolean
vtd_hash_remove_by_page(gpointer key
, gpointer value
,
181 VTDIOTLBEntry
*entry
= (VTDIOTLBEntry
*)value
;
182 VTDIOTLBPageInvInfo
*info
= (VTDIOTLBPageInvInfo
*)user_data
;
183 uint64_t gfn
= (info
->addr
>> VTD_PAGE_SHIFT_4K
) & info
->mask
;
184 uint64_t gfn_tlb
= (info
->addr
& entry
->mask
) >> VTD_PAGE_SHIFT_4K
;
185 return (entry
->domain_id
== info
->domain_id
) &&
186 (((entry
->gfn
& info
->mask
) == gfn
) ||
187 (entry
->gfn
== gfn_tlb
));
190 /* Reset all the gen of VTDAddressSpace to zero and set the gen of
191 * IntelIOMMUState to 1. Must be called with IOMMU lock held.
193 static void vtd_reset_context_cache_locked(IntelIOMMUState
*s
)
195 VTDAddressSpace
*vtd_as
;
197 GHashTableIter bus_it
;
200 trace_vtd_context_cache_reset();
202 g_hash_table_iter_init(&bus_it
, s
->vtd_as_by_busptr
);
204 while (g_hash_table_iter_next (&bus_it
, NULL
, (void**)&vtd_bus
)) {
205 for (devfn_it
= 0; devfn_it
< PCI_DEVFN_MAX
; ++devfn_it
) {
206 vtd_as
= vtd_bus
->dev_as
[devfn_it
];
210 vtd_as
->context_cache_entry
.context_cache_gen
= 0;
213 s
->context_cache_gen
= 1;
216 /* Must be called with IOMMU lock held. */
217 static void vtd_reset_iotlb_locked(IntelIOMMUState
*s
)
220 g_hash_table_remove_all(s
->iotlb
);
223 static void vtd_reset_iotlb(IntelIOMMUState
*s
)
226 vtd_reset_iotlb_locked(s
);
230 static void vtd_reset_caches(IntelIOMMUState
*s
)
233 vtd_reset_iotlb_locked(s
);
234 vtd_reset_context_cache_locked(s
);
238 static uint64_t vtd_get_iotlb_key(uint64_t gfn
, uint16_t source_id
,
241 return gfn
| ((uint64_t)(source_id
) << VTD_IOTLB_SID_SHIFT
) |
242 ((uint64_t)(level
) << VTD_IOTLB_LVL_SHIFT
);
245 static uint64_t vtd_get_iotlb_gfn(hwaddr addr
, uint32_t level
)
247 return (addr
& vtd_slpt_level_page_mask(level
)) >> VTD_PAGE_SHIFT_4K
;
250 /* Must be called with IOMMU lock held */
251 static VTDIOTLBEntry
*vtd_lookup_iotlb(IntelIOMMUState
*s
, uint16_t source_id
,
254 VTDIOTLBEntry
*entry
;
258 for (level
= VTD_SL_PT_LEVEL
; level
< VTD_SL_PML4_LEVEL
; level
++) {
259 key
= vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr
, level
),
261 entry
= g_hash_table_lookup(s
->iotlb
, &key
);
271 /* Must be with IOMMU lock held */
272 static void vtd_update_iotlb(IntelIOMMUState
*s
, uint16_t source_id
,
273 uint16_t domain_id
, hwaddr addr
, uint64_t slpte
,
274 uint8_t access_flags
, uint32_t level
)
276 VTDIOTLBEntry
*entry
= g_malloc(sizeof(*entry
));
277 uint64_t *key
= g_malloc(sizeof(*key
));
278 uint64_t gfn
= vtd_get_iotlb_gfn(addr
, level
);
280 trace_vtd_iotlb_page_update(source_id
, addr
, slpte
, domain_id
);
281 if (g_hash_table_size(s
->iotlb
) >= VTD_IOTLB_MAX_SIZE
) {
282 trace_vtd_iotlb_reset("iotlb exceeds size limit");
283 vtd_reset_iotlb_locked(s
);
287 entry
->domain_id
= domain_id
;
288 entry
->slpte
= slpte
;
289 entry
->access_flags
= access_flags
;
290 entry
->mask
= vtd_slpt_level_page_mask(level
);
291 *key
= vtd_get_iotlb_key(gfn
, source_id
, level
);
292 g_hash_table_replace(s
->iotlb
, key
, entry
);
295 /* Given the reg addr of both the message data and address, generate an
298 static void vtd_generate_interrupt(IntelIOMMUState
*s
, hwaddr mesg_addr_reg
,
299 hwaddr mesg_data_reg
)
303 assert(mesg_data_reg
< DMAR_REG_SIZE
);
304 assert(mesg_addr_reg
< DMAR_REG_SIZE
);
306 msi
.address
= vtd_get_long_raw(s
, mesg_addr_reg
);
307 msi
.data
= vtd_get_long_raw(s
, mesg_data_reg
);
309 trace_vtd_irq_generate(msi
.address
, msi
.data
);
311 apic_get_class()->send_msi(&msi
);
314 /* Generate a fault event to software via MSI if conditions are met.
315 * Notice that the value of FSTS_REG being passed to it should be the one
318 static void vtd_generate_fault_event(IntelIOMMUState
*s
, uint32_t pre_fsts
)
320 if (pre_fsts
& VTD_FSTS_PPF
|| pre_fsts
& VTD_FSTS_PFO
||
321 pre_fsts
& VTD_FSTS_IQE
) {
322 error_report_once("There are previous interrupt conditions "
323 "to be serviced by software, fault event "
327 vtd_set_clear_mask_long(s
, DMAR_FECTL_REG
, 0, VTD_FECTL_IP
);
328 if (vtd_get_long_raw(s
, DMAR_FECTL_REG
) & VTD_FECTL_IM
) {
329 error_report_once("Interrupt Mask set, irq is not generated");
331 vtd_generate_interrupt(s
, DMAR_FEADDR_REG
, DMAR_FEDATA_REG
);
332 vtd_set_clear_mask_long(s
, DMAR_FECTL_REG
, VTD_FECTL_IP
, 0);
336 /* Check if the Fault (F) field of the Fault Recording Register referenced by
339 static bool vtd_is_frcd_set(IntelIOMMUState
*s
, uint16_t index
)
341 /* Each reg is 128-bit */
342 hwaddr addr
= DMAR_FRCD_REG_OFFSET
+ (((uint64_t)index
) << 4);
343 addr
+= 8; /* Access the high 64-bit half */
345 assert(index
< DMAR_FRCD_REG_NR
);
347 return vtd_get_quad_raw(s
, addr
) & VTD_FRCD_F
;
350 /* Update the PPF field of Fault Status Register.
351 * Should be called whenever change the F field of any fault recording
354 static void vtd_update_fsts_ppf(IntelIOMMUState
*s
)
357 uint32_t ppf_mask
= 0;
359 for (i
= 0; i
< DMAR_FRCD_REG_NR
; i
++) {
360 if (vtd_is_frcd_set(s
, i
)) {
361 ppf_mask
= VTD_FSTS_PPF
;
365 vtd_set_clear_mask_long(s
, DMAR_FSTS_REG
, VTD_FSTS_PPF
, ppf_mask
);
366 trace_vtd_fsts_ppf(!!ppf_mask
);
369 static void vtd_set_frcd_and_update_ppf(IntelIOMMUState
*s
, uint16_t index
)
371 /* Each reg is 128-bit */
372 hwaddr addr
= DMAR_FRCD_REG_OFFSET
+ (((uint64_t)index
) << 4);
373 addr
+= 8; /* Access the high 64-bit half */
375 assert(index
< DMAR_FRCD_REG_NR
);
377 vtd_set_clear_mask_quad(s
, addr
, 0, VTD_FRCD_F
);
378 vtd_update_fsts_ppf(s
);
381 /* Must not update F field now, should be done later */
382 static void vtd_record_frcd(IntelIOMMUState
*s
, uint16_t index
,
383 uint16_t source_id
, hwaddr addr
,
384 VTDFaultReason fault
, bool is_write
)
387 hwaddr frcd_reg_addr
= DMAR_FRCD_REG_OFFSET
+ (((uint64_t)index
) << 4);
389 assert(index
< DMAR_FRCD_REG_NR
);
391 lo
= VTD_FRCD_FI(addr
);
392 hi
= VTD_FRCD_SID(source_id
) | VTD_FRCD_FR(fault
);
396 vtd_set_quad_raw(s
, frcd_reg_addr
, lo
);
397 vtd_set_quad_raw(s
, frcd_reg_addr
+ 8, hi
);
399 trace_vtd_frr_new(index
, hi
, lo
);
402 /* Try to collapse multiple pending faults from the same requester */
403 static bool vtd_try_collapse_fault(IntelIOMMUState
*s
, uint16_t source_id
)
407 hwaddr addr
= DMAR_FRCD_REG_OFFSET
+ 8; /* The high 64-bit half */
409 for (i
= 0; i
< DMAR_FRCD_REG_NR
; i
++) {
410 frcd_reg
= vtd_get_quad_raw(s
, addr
);
411 if ((frcd_reg
& VTD_FRCD_F
) &&
412 ((frcd_reg
& VTD_FRCD_SID_MASK
) == source_id
)) {
415 addr
+= 16; /* 128-bit for each */
420 /* Log and report an DMAR (address translation) fault to software */
421 static void vtd_report_dmar_fault(IntelIOMMUState
*s
, uint16_t source_id
,
422 hwaddr addr
, VTDFaultReason fault
,
425 uint32_t fsts_reg
= vtd_get_long_raw(s
, DMAR_FSTS_REG
);
427 assert(fault
< VTD_FR_MAX
);
429 if (fault
== VTD_FR_RESERVED_ERR
) {
430 /* This is not a normal fault reason case. Drop it. */
434 trace_vtd_dmar_fault(source_id
, fault
, addr
, is_write
);
436 if (fsts_reg
& VTD_FSTS_PFO
) {
437 error_report_once("New fault is not recorded due to "
438 "Primary Fault Overflow");
442 if (vtd_try_collapse_fault(s
, source_id
)) {
443 error_report_once("New fault is not recorded due to "
444 "compression of faults");
448 if (vtd_is_frcd_set(s
, s
->next_frcd_reg
)) {
449 error_report_once("Next Fault Recording Reg is used, "
450 "new fault is not recorded, set PFO field");
451 vtd_set_clear_mask_long(s
, DMAR_FSTS_REG
, 0, VTD_FSTS_PFO
);
455 vtd_record_frcd(s
, s
->next_frcd_reg
, source_id
, addr
, fault
, is_write
);
457 if (fsts_reg
& VTD_FSTS_PPF
) {
458 error_report_once("There are pending faults already, "
459 "fault event is not generated");
460 vtd_set_frcd_and_update_ppf(s
, s
->next_frcd_reg
);
462 if (s
->next_frcd_reg
== DMAR_FRCD_REG_NR
) {
463 s
->next_frcd_reg
= 0;
466 vtd_set_clear_mask_long(s
, DMAR_FSTS_REG
, VTD_FSTS_FRI_MASK
,
467 VTD_FSTS_FRI(s
->next_frcd_reg
));
468 vtd_set_frcd_and_update_ppf(s
, s
->next_frcd_reg
); /* Will set PPF */
470 if (s
->next_frcd_reg
== DMAR_FRCD_REG_NR
) {
471 s
->next_frcd_reg
= 0;
473 /* This case actually cause the PPF to be Set.
474 * So generate fault event (interrupt).
476 vtd_generate_fault_event(s
, fsts_reg
);
480 /* Handle Invalidation Queue Errors of queued invalidation interface error
483 static void vtd_handle_inv_queue_error(IntelIOMMUState
*s
)
485 uint32_t fsts_reg
= vtd_get_long_raw(s
, DMAR_FSTS_REG
);
487 vtd_set_clear_mask_long(s
, DMAR_FSTS_REG
, 0, VTD_FSTS_IQE
);
488 vtd_generate_fault_event(s
, fsts_reg
);
491 /* Set the IWC field and try to generate an invalidation completion interrupt */
492 static void vtd_generate_completion_event(IntelIOMMUState
*s
)
494 if (vtd_get_long_raw(s
, DMAR_ICS_REG
) & VTD_ICS_IWC
) {
495 trace_vtd_inv_desc_wait_irq("One pending, skip current");
498 vtd_set_clear_mask_long(s
, DMAR_ICS_REG
, 0, VTD_ICS_IWC
);
499 vtd_set_clear_mask_long(s
, DMAR_IECTL_REG
, 0, VTD_IECTL_IP
);
500 if (vtd_get_long_raw(s
, DMAR_IECTL_REG
) & VTD_IECTL_IM
) {
501 trace_vtd_inv_desc_wait_irq("IM in IECTL_REG is set, "
502 "new event not generated");
505 /* Generate the interrupt event */
506 trace_vtd_inv_desc_wait_irq("Generating complete event");
507 vtd_generate_interrupt(s
, DMAR_IEADDR_REG
, DMAR_IEDATA_REG
);
508 vtd_set_clear_mask_long(s
, DMAR_IECTL_REG
, VTD_IECTL_IP
, 0);
512 static inline bool vtd_root_entry_present(VTDRootEntry
*root
)
514 return root
->val
& VTD_ROOT_ENTRY_P
;
517 static int vtd_get_root_entry(IntelIOMMUState
*s
, uint8_t index
,
522 addr
= s
->root
+ index
* sizeof(*re
);
523 if (dma_memory_read(&address_space_memory
, addr
, re
, sizeof(*re
))) {
524 trace_vtd_re_invalid(re
->rsvd
, re
->val
);
526 return -VTD_FR_ROOT_TABLE_INV
;
528 re
->val
= le64_to_cpu(re
->val
);
532 static inline bool vtd_ce_present(VTDContextEntry
*context
)
534 return context
->lo
& VTD_CONTEXT_ENTRY_P
;
537 static int vtd_get_context_entry_from_root(VTDRootEntry
*root
, uint8_t index
,
542 /* we have checked that root entry is present */
543 addr
= (root
->val
& VTD_ROOT_ENTRY_CTP
) + index
* sizeof(*ce
);
544 if (dma_memory_read(&address_space_memory
, addr
, ce
, sizeof(*ce
))) {
545 trace_vtd_re_invalid(root
->rsvd
, root
->val
);
546 return -VTD_FR_CONTEXT_TABLE_INV
;
548 ce
->lo
= le64_to_cpu(ce
->lo
);
549 ce
->hi
= le64_to_cpu(ce
->hi
);
553 static inline dma_addr_t
vtd_ce_get_slpt_base(VTDContextEntry
*ce
)
555 return ce
->lo
& VTD_CONTEXT_ENTRY_SLPTPTR
;
558 static inline uint64_t vtd_get_slpte_addr(uint64_t slpte
, uint8_t aw
)
560 return slpte
& VTD_SL_PT_BASE_ADDR_MASK(aw
);
563 /* Whether the pte indicates the address of the page frame */
564 static inline bool vtd_is_last_slpte(uint64_t slpte
, uint32_t level
)
566 return level
== VTD_SL_PT_LEVEL
|| (slpte
& VTD_SL_PT_PAGE_SIZE_MASK
);
569 /* Get the content of a spte located in @base_addr[@index] */
570 static uint64_t vtd_get_slpte(dma_addr_t base_addr
, uint32_t index
)
574 assert(index
< VTD_SL_PT_ENTRY_NR
);
576 if (dma_memory_read(&address_space_memory
,
577 base_addr
+ index
* sizeof(slpte
), &slpte
,
579 slpte
= (uint64_t)-1;
582 slpte
= le64_to_cpu(slpte
);
586 /* Given an iova and the level of paging structure, return the offset
589 static inline uint32_t vtd_iova_level_offset(uint64_t iova
, uint32_t level
)
591 return (iova
>> vtd_slpt_level_shift(level
)) &
592 ((1ULL << VTD_SL_LEVEL_BITS
) - 1);
595 /* Check Capability Register to see if the @level of page-table is supported */
596 static inline bool vtd_is_level_supported(IntelIOMMUState
*s
, uint32_t level
)
598 return VTD_CAP_SAGAW_MASK
& s
->cap
&
599 (1ULL << (level
- 2 + VTD_CAP_SAGAW_SHIFT
));
602 /* Get the page-table level that hardware should use for the second-level
603 * page-table walk from the Address Width field of context-entry.
605 static inline uint32_t vtd_ce_get_level(VTDContextEntry
*ce
)
607 return 2 + (ce
->hi
& VTD_CONTEXT_ENTRY_AW
);
610 static inline uint32_t vtd_ce_get_agaw(VTDContextEntry
*ce
)
612 return 30 + (ce
->hi
& VTD_CONTEXT_ENTRY_AW
) * 9;
615 static inline uint32_t vtd_ce_get_type(VTDContextEntry
*ce
)
617 return ce
->lo
& VTD_CONTEXT_ENTRY_TT
;
620 /* Return true if check passed, otherwise false */
621 static inline bool vtd_ce_type_check(X86IOMMUState
*x86_iommu
,
624 switch (vtd_ce_get_type(ce
)) {
625 case VTD_CONTEXT_TT_MULTI_LEVEL
:
626 /* Always supported */
628 case VTD_CONTEXT_TT_DEV_IOTLB
:
629 if (!x86_iommu
->dt_supported
) {
633 case VTD_CONTEXT_TT_PASS_THROUGH
:
634 if (!x86_iommu
->pt_supported
) {
645 static inline uint64_t vtd_iova_limit(VTDContextEntry
*ce
, uint8_t aw
)
647 uint32_t ce_agaw
= vtd_ce_get_agaw(ce
);
648 return 1ULL << MIN(ce_agaw
, aw
);
651 /* Return true if IOVA passes range check, otherwise false. */
652 static inline bool vtd_iova_range_check(uint64_t iova
, VTDContextEntry
*ce
,
656 * Check if @iova is above 2^X-1, where X is the minimum of MGAW
657 * in CAP_REG and AW in context-entry.
659 return !(iova
& ~(vtd_iova_limit(ce
, aw
) - 1));
663 * Rsvd field masks for spte:
664 * Index [1] to [4] 4k pages
665 * Index [5] to [8] large pages
667 static uint64_t vtd_paging_entry_rsvd_field
[9];
669 static bool vtd_slpte_nonzero_rsvd(uint64_t slpte
, uint32_t level
)
671 if (slpte
& VTD_SL_PT_PAGE_SIZE_MASK
) {
672 /* Maybe large page */
673 return slpte
& vtd_paging_entry_rsvd_field
[level
+ 4];
675 return slpte
& vtd_paging_entry_rsvd_field
[level
];
679 /* Find the VTD address space associated with a given bus number */
680 static VTDBus
*vtd_find_as_from_bus_num(IntelIOMMUState
*s
, uint8_t bus_num
)
682 VTDBus
*vtd_bus
= s
->vtd_as_by_bus_num
[bus_num
];
685 * Iterate over the registered buses to find the one which
686 * currently hold this bus number, and update the bus_num
691 g_hash_table_iter_init(&iter
, s
->vtd_as_by_busptr
);
692 while (g_hash_table_iter_next(&iter
, NULL
, (void **)&vtd_bus
)) {
693 if (pci_bus_num(vtd_bus
->bus
) == bus_num
) {
694 s
->vtd_as_by_bus_num
[bus_num
] = vtd_bus
;
702 /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
703 * of the translation, can be used for deciding the size of large page.
705 static int vtd_iova_to_slpte(VTDContextEntry
*ce
, uint64_t iova
, bool is_write
,
706 uint64_t *slptep
, uint32_t *slpte_level
,
707 bool *reads
, bool *writes
, uint8_t aw_bits
)
709 dma_addr_t addr
= vtd_ce_get_slpt_base(ce
);
710 uint32_t level
= vtd_ce_get_level(ce
);
713 uint64_t access_right_check
;
715 if (!vtd_iova_range_check(iova
, ce
, aw_bits
)) {
716 error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64
")",
718 return -VTD_FR_ADDR_BEYOND_MGAW
;
721 /* FIXME: what is the Atomics request here? */
722 access_right_check
= is_write
? VTD_SL_W
: VTD_SL_R
;
725 offset
= vtd_iova_level_offset(iova
, level
);
726 slpte
= vtd_get_slpte(addr
, offset
);
728 if (slpte
== (uint64_t)-1) {
729 error_report_once("%s: detected read error on DMAR slpte "
730 "(iova=0x%" PRIx64
")", __func__
, iova
);
731 if (level
== vtd_ce_get_level(ce
)) {
732 /* Invalid programming of context-entry */
733 return -VTD_FR_CONTEXT_ENTRY_INV
;
735 return -VTD_FR_PAGING_ENTRY_INV
;
738 *reads
= (*reads
) && (slpte
& VTD_SL_R
);
739 *writes
= (*writes
) && (slpte
& VTD_SL_W
);
740 if (!(slpte
& access_right_check
)) {
741 error_report_once("%s: detected slpte permission error "
742 "(iova=0x%" PRIx64
", level=0x%" PRIx32
", "
743 "slpte=0x%" PRIx64
", write=%d)", __func__
,
744 iova
, level
, slpte
, is_write
);
745 return is_write
? -VTD_FR_WRITE
: -VTD_FR_READ
;
747 if (vtd_slpte_nonzero_rsvd(slpte
, level
)) {
748 error_report_once("%s: detected splte reserve non-zero "
749 "iova=0x%" PRIx64
", level=0x%" PRIx32
750 "slpte=0x%" PRIx64
")", __func__
, iova
,
752 return -VTD_FR_PAGING_ENTRY_RSVD
;
755 if (vtd_is_last_slpte(slpte
, level
)) {
757 *slpte_level
= level
;
760 addr
= vtd_get_slpte_addr(slpte
, aw_bits
);
765 typedef int (*vtd_page_walk_hook
)(IOMMUTLBEntry
*entry
, void *private);
768 * Constant information used during page walking
770 * @hook_fn: hook func to be called when detected page
771 * @private: private data to be passed into hook func
772 * @notify_unmap: whether we should notify invalid entries
773 * @as: VT-d address space of the device
774 * @aw: maximum address width
775 * @domain: domain ID of the page walk
779 vtd_page_walk_hook hook_fn
;
784 } vtd_page_walk_info
;
786 static int vtd_page_walk_one(IOMMUTLBEntry
*entry
, vtd_page_walk_info
*info
)
788 VTDAddressSpace
*as
= info
->as
;
789 vtd_page_walk_hook hook_fn
= info
->hook_fn
;
790 void *private = info
->private;
793 .size
= entry
->addr_mask
,
794 .translated_addr
= entry
->translated_addr
,
797 DMAMap
*mapped
= iova_tree_find(as
->iova_tree
, &target
);
799 if (entry
->perm
== IOMMU_NONE
&& !info
->notify_unmap
) {
800 trace_vtd_page_walk_one_skip_unmap(entry
->iova
, entry
->addr_mask
);
806 /* Update local IOVA mapped ranges */
809 /* If it's exactly the same translation, skip */
810 if (!memcmp(mapped
, &target
, sizeof(target
))) {
811 trace_vtd_page_walk_one_skip_map(entry
->iova
, entry
->addr_mask
,
812 entry
->translated_addr
);
816 * Translation changed. Normally this should not
817 * happen, but it can happen when with buggy guest
818 * OSes. Note that there will be a small window that
819 * we don't have map at all. But that's the best
820 * effort we can do. The ideal way to emulate this is
821 * atomically modify the PTE to follow what has
822 * changed, but we can't. One example is that vfio
823 * driver only has VFIO_IOMMU_[UN]MAP_DMA but no
824 * interface to modify a mapping (meanwhile it seems
825 * meaningless to even provide one). Anyway, let's
826 * mark this as a TODO in case one day we'll have
829 IOMMUAccessFlags cache_perm
= entry
->perm
;
832 /* Emulate an UNMAP */
833 entry
->perm
= IOMMU_NONE
;
834 trace_vtd_page_walk_one(info
->domain_id
,
836 entry
->translated_addr
,
839 ret
= hook_fn(entry
, private);
843 /* Drop any existing mapping */
844 iova_tree_remove(as
->iova_tree
, &target
);
845 /* Recover the correct permission */
846 entry
->perm
= cache_perm
;
849 iova_tree_insert(as
->iova_tree
, &target
);
852 /* Skip since we didn't map this range at all */
853 trace_vtd_page_walk_one_skip_unmap(entry
->iova
, entry
->addr_mask
);
856 iova_tree_remove(as
->iova_tree
, &target
);
859 trace_vtd_page_walk_one(info
->domain_id
, entry
->iova
,
860 entry
->translated_addr
, entry
->addr_mask
,
862 return hook_fn(entry
, private);
866 * vtd_page_walk_level - walk over specific level for IOVA range
868 * @addr: base GPA addr to start the walk
869 * @start: IOVA range start address
870 * @end: IOVA range end address (start <= addr < end)
871 * @read: whether parent level has read permission
872 * @write: whether parent level has write permission
873 * @info: constant information for the page walk
875 static int vtd_page_walk_level(dma_addr_t addr
, uint64_t start
,
876 uint64_t end
, uint32_t level
, bool read
,
877 bool write
, vtd_page_walk_info
*info
)
879 bool read_cur
, write_cur
, entry_valid
;
882 uint64_t subpage_size
, subpage_mask
;
884 uint64_t iova
= start
;
888 trace_vtd_page_walk_level(addr
, level
, start
, end
);
890 subpage_size
= 1ULL << vtd_slpt_level_shift(level
);
891 subpage_mask
= vtd_slpt_level_page_mask(level
);
894 iova_next
= (iova
& subpage_mask
) + subpage_size
;
896 offset
= vtd_iova_level_offset(iova
, level
);
897 slpte
= vtd_get_slpte(addr
, offset
);
899 if (slpte
== (uint64_t)-1) {
900 trace_vtd_page_walk_skip_read(iova
, iova_next
);
904 if (vtd_slpte_nonzero_rsvd(slpte
, level
)) {
905 trace_vtd_page_walk_skip_reserve(iova
, iova_next
);
909 /* Permissions are stacked with parents' */
910 read_cur
= read
&& (slpte
& VTD_SL_R
);
911 write_cur
= write
&& (slpte
& VTD_SL_W
);
914 * As long as we have either read/write permission, this is a
915 * valid entry. The rule works for both page entries and page
918 entry_valid
= read_cur
| write_cur
;
920 if (!vtd_is_last_slpte(slpte
, level
) && entry_valid
) {
922 * This is a valid PDE (or even bigger than PDE). We need
923 * to walk one further level.
925 ret
= vtd_page_walk_level(vtd_get_slpte_addr(slpte
, info
->aw
),
926 iova
, MIN(iova_next
, end
), level
- 1,
927 read_cur
, write_cur
, info
);
930 * This means we are either:
932 * (1) the real page entry (either 4K page, or huge page)
933 * (2) the whole range is invalid
935 * In either case, we send an IOTLB notification down.
937 entry
.target_as
= &address_space_memory
;
938 entry
.iova
= iova
& subpage_mask
;
939 entry
.perm
= IOMMU_ACCESS_FLAG(read_cur
, write_cur
);
940 entry
.addr_mask
= ~subpage_mask
;
941 /* NOTE: this is only meaningful if entry_valid == true */
942 entry
.translated_addr
= vtd_get_slpte_addr(slpte
, info
->aw
);
943 ret
= vtd_page_walk_one(&entry
, info
);
958 * vtd_page_walk - walk specific IOVA range, and call the hook
960 * @ce: context entry to walk upon
961 * @start: IOVA address to start the walk
962 * @end: IOVA range end address (start <= addr < end)
963 * @info: page walking information struct
965 static int vtd_page_walk(VTDContextEntry
*ce
, uint64_t start
, uint64_t end
,
966 vtd_page_walk_info
*info
)
968 dma_addr_t addr
= vtd_ce_get_slpt_base(ce
);
969 uint32_t level
= vtd_ce_get_level(ce
);
971 if (!vtd_iova_range_check(start
, ce
, info
->aw
)) {
972 return -VTD_FR_ADDR_BEYOND_MGAW
;
975 if (!vtd_iova_range_check(end
, ce
, info
->aw
)) {
976 /* Fix end so that it reaches the maximum */
977 end
= vtd_iova_limit(ce
, info
->aw
);
980 return vtd_page_walk_level(addr
, start
, end
, level
, true, true, info
);
983 /* Map a device to its corresponding domain (context-entry) */
984 static int vtd_dev_to_context_entry(IntelIOMMUState
*s
, uint8_t bus_num
,
985 uint8_t devfn
, VTDContextEntry
*ce
)
989 X86IOMMUState
*x86_iommu
= X86_IOMMU_DEVICE(s
);
991 ret_fr
= vtd_get_root_entry(s
, bus_num
, &re
);
996 if (!vtd_root_entry_present(&re
)) {
997 /* Not error - it's okay we don't have root entry. */
998 trace_vtd_re_not_present(bus_num
);
999 return -VTD_FR_ROOT_ENTRY_P
;
1002 if (re
.rsvd
|| (re
.val
& VTD_ROOT_ENTRY_RSVD(s
->aw_bits
))) {
1003 trace_vtd_re_invalid(re
.rsvd
, re
.val
);
1004 return -VTD_FR_ROOT_ENTRY_RSVD
;
1007 ret_fr
= vtd_get_context_entry_from_root(&re
, devfn
, ce
);
1012 if (!vtd_ce_present(ce
)) {
1013 /* Not error - it's okay we don't have context entry. */
1014 trace_vtd_ce_not_present(bus_num
, devfn
);
1015 return -VTD_FR_CONTEXT_ENTRY_P
;
1018 if ((ce
->hi
& VTD_CONTEXT_ENTRY_RSVD_HI
) ||
1019 (ce
->lo
& VTD_CONTEXT_ENTRY_RSVD_LO(s
->aw_bits
))) {
1020 trace_vtd_ce_invalid(ce
->hi
, ce
->lo
);
1021 return -VTD_FR_CONTEXT_ENTRY_RSVD
;
1024 /* Check if the programming of context-entry is valid */
1025 if (!vtd_is_level_supported(s
, vtd_ce_get_level(ce
))) {
1026 trace_vtd_ce_invalid(ce
->hi
, ce
->lo
);
1027 return -VTD_FR_CONTEXT_ENTRY_INV
;
1030 /* Do translation type check */
1031 if (!vtd_ce_type_check(x86_iommu
, ce
)) {
1032 trace_vtd_ce_invalid(ce
->hi
, ce
->lo
);
1033 return -VTD_FR_CONTEXT_ENTRY_INV
;
1039 static int vtd_sync_shadow_page_hook(IOMMUTLBEntry
*entry
,
1042 memory_region_notify_iommu((IOMMUMemoryRegion
*)private, 0, *entry
);
1046 /* If context entry is NULL, we'll try to fetch it on our own. */
1047 static int vtd_sync_shadow_page_table_range(VTDAddressSpace
*vtd_as
,
1048 VTDContextEntry
*ce
,
1049 hwaddr addr
, hwaddr size
)
1051 IntelIOMMUState
*s
= vtd_as
->iommu_state
;
1052 vtd_page_walk_info info
= {
1053 .hook_fn
= vtd_sync_shadow_page_hook
,
1054 .private = (void *)&vtd_as
->iommu
,
1055 .notify_unmap
= true,
1059 VTDContextEntry ce_cache
;
1063 /* If the caller provided context entry, use it */
1066 /* If the caller didn't provide ce, try to fetch */
1067 ret
= vtd_dev_to_context_entry(s
, pci_bus_num(vtd_as
->bus
),
1068 vtd_as
->devfn
, &ce_cache
);
1071 * This should not really happen, but in case it happens,
1072 * we just skip the sync for this time. After all we even
1073 * don't have the root table pointer!
1075 error_report_once("%s: invalid context entry for bus 0x%x"
1077 __func__
, pci_bus_num(vtd_as
->bus
),
1083 info
.domain_id
= VTD_CONTEXT_ENTRY_DID(ce_cache
.hi
);
1085 return vtd_page_walk(&ce_cache
, addr
, addr
+ size
, &info
);
1088 static int vtd_sync_shadow_page_table(VTDAddressSpace
*vtd_as
)
1090 return vtd_sync_shadow_page_table_range(vtd_as
, NULL
, 0, UINT64_MAX
);
1094 * Fetch translation type for specific device. Returns <0 if error
1095 * happens, otherwise return the shifted type to check against
1098 static int vtd_dev_get_trans_type(VTDAddressSpace
*as
)
1104 s
= as
->iommu_state
;
1106 ret
= vtd_dev_to_context_entry(s
, pci_bus_num(as
->bus
),
1112 return vtd_ce_get_type(&ce
);
1115 static bool vtd_dev_pt_enabled(VTDAddressSpace
*as
)
1121 ret
= vtd_dev_get_trans_type(as
);
1124 * Possibly failed to parse the context entry for some reason
1125 * (e.g., during init, or any guest configuration errors on
1126 * context entries). We should assume PT not enabled for
1132 return ret
== VTD_CONTEXT_TT_PASS_THROUGH
;
1135 /* Return whether the device is using IOMMU translation. */
1136 static bool vtd_switch_address_space(VTDAddressSpace
*as
)
1139 /* Whether we need to take the BQL on our own */
1140 bool take_bql
= !qemu_mutex_iothread_locked();
1144 use_iommu
= as
->iommu_state
->dmar_enabled
& !vtd_dev_pt_enabled(as
);
1146 trace_vtd_switch_address_space(pci_bus_num(as
->bus
),
1147 VTD_PCI_SLOT(as
->devfn
),
1148 VTD_PCI_FUNC(as
->devfn
),
1152 * It's possible that we reach here without BQL, e.g., when called
1153 * from vtd_pt_enable_fast_path(). However the memory APIs need
1154 * it. We'd better make sure we have had it already, or, take it.
1157 qemu_mutex_lock_iothread();
1160 /* Turn off first then on the other */
1162 memory_region_set_enabled(&as
->sys_alias
, false);
1163 memory_region_set_enabled(MEMORY_REGION(&as
->iommu
), true);
1165 memory_region_set_enabled(MEMORY_REGION(&as
->iommu
), false);
1166 memory_region_set_enabled(&as
->sys_alias
, true);
1170 qemu_mutex_unlock_iothread();
1176 static void vtd_switch_address_space_all(IntelIOMMUState
*s
)
1178 GHashTableIter iter
;
1182 g_hash_table_iter_init(&iter
, s
->vtd_as_by_busptr
);
1183 while (g_hash_table_iter_next(&iter
, NULL
, (void **)&vtd_bus
)) {
1184 for (i
= 0; i
< PCI_DEVFN_MAX
; i
++) {
1185 if (!vtd_bus
->dev_as
[i
]) {
1188 vtd_switch_address_space(vtd_bus
->dev_as
[i
]);
1193 static inline uint16_t vtd_make_source_id(uint8_t bus_num
, uint8_t devfn
)
1195 return ((bus_num
& 0xffUL
) << 8) | (devfn
& 0xffUL
);
1198 static const bool vtd_qualified_faults
[] = {
1199 [VTD_FR_RESERVED
] = false,
1200 [VTD_FR_ROOT_ENTRY_P
] = false,
1201 [VTD_FR_CONTEXT_ENTRY_P
] = true,
1202 [VTD_FR_CONTEXT_ENTRY_INV
] = true,
1203 [VTD_FR_ADDR_BEYOND_MGAW
] = true,
1204 [VTD_FR_WRITE
] = true,
1205 [VTD_FR_READ
] = true,
1206 [VTD_FR_PAGING_ENTRY_INV
] = true,
1207 [VTD_FR_ROOT_TABLE_INV
] = false,
1208 [VTD_FR_CONTEXT_TABLE_INV
] = false,
1209 [VTD_FR_ROOT_ENTRY_RSVD
] = false,
1210 [VTD_FR_PAGING_ENTRY_RSVD
] = true,
1211 [VTD_FR_CONTEXT_ENTRY_TT
] = true,
1212 [VTD_FR_RESERVED_ERR
] = false,
1213 [VTD_FR_MAX
] = false,
1216 /* To see if a fault condition is "qualified", which is reported to software
1217 * only if the FPD field in the context-entry used to process the faulting
1220 static inline bool vtd_is_qualified_fault(VTDFaultReason fault
)
1222 return vtd_qualified_faults
[fault
];
1225 static inline bool vtd_is_interrupt_addr(hwaddr addr
)
1227 return VTD_INTERRUPT_ADDR_FIRST
<= addr
&& addr
<= VTD_INTERRUPT_ADDR_LAST
;
1230 static void vtd_pt_enable_fast_path(IntelIOMMUState
*s
, uint16_t source_id
)
1233 VTDAddressSpace
*vtd_as
;
1234 bool success
= false;
1236 vtd_bus
= vtd_find_as_from_bus_num(s
, VTD_SID_TO_BUS(source_id
));
1241 vtd_as
= vtd_bus
->dev_as
[VTD_SID_TO_DEVFN(source_id
)];
1246 if (vtd_switch_address_space(vtd_as
) == false) {
1247 /* We switched off IOMMU region successfully. */
1252 trace_vtd_pt_enable_fast_path(source_id
, success
);
1255 /* Map dev to context-entry then do a paging-structures walk to do a iommu
1258 * Called from RCU critical section.
1260 * @bus_num: The bus number
1261 * @devfn: The devfn, which is the combined of device and function number
1262 * @is_write: The access is a write operation
1263 * @entry: IOMMUTLBEntry that contain the addr to be translated and result
1265 * Returns true if translation is successful, otherwise false.
1267 static bool vtd_do_iommu_translate(VTDAddressSpace
*vtd_as
, PCIBus
*bus
,
1268 uint8_t devfn
, hwaddr addr
, bool is_write
,
1269 IOMMUTLBEntry
*entry
)
1271 IntelIOMMUState
*s
= vtd_as
->iommu_state
;
1273 uint8_t bus_num
= pci_bus_num(bus
);
1274 VTDContextCacheEntry
*cc_entry
;
1275 uint64_t slpte
, page_mask
;
1277 uint16_t source_id
= vtd_make_source_id(bus_num
, devfn
);
1279 bool is_fpd_set
= false;
1282 uint8_t access_flags
;
1283 VTDIOTLBEntry
*iotlb_entry
;
1286 * We have standalone memory region for interrupt addresses, we
1287 * should never receive translation requests in this region.
1289 assert(!vtd_is_interrupt_addr(addr
));
1293 cc_entry
= &vtd_as
->context_cache_entry
;
1295 /* Try to fetch slpte form IOTLB */
1296 iotlb_entry
= vtd_lookup_iotlb(s
, source_id
, addr
);
1298 trace_vtd_iotlb_page_hit(source_id
, addr
, iotlb_entry
->slpte
,
1299 iotlb_entry
->domain_id
);
1300 slpte
= iotlb_entry
->slpte
;
1301 access_flags
= iotlb_entry
->access_flags
;
1302 page_mask
= iotlb_entry
->mask
;
1306 /* Try to fetch context-entry from cache first */
1307 if (cc_entry
->context_cache_gen
== s
->context_cache_gen
) {
1308 trace_vtd_iotlb_cc_hit(bus_num
, devfn
, cc_entry
->context_entry
.hi
,
1309 cc_entry
->context_entry
.lo
,
1310 cc_entry
->context_cache_gen
);
1311 ce
= cc_entry
->context_entry
;
1312 is_fpd_set
= ce
.lo
& VTD_CONTEXT_ENTRY_FPD
;
1314 ret_fr
= vtd_dev_to_context_entry(s
, bus_num
, devfn
, &ce
);
1315 is_fpd_set
= ce
.lo
& VTD_CONTEXT_ENTRY_FPD
;
1318 if (is_fpd_set
&& vtd_is_qualified_fault(ret_fr
)) {
1319 trace_vtd_fault_disabled();
1321 vtd_report_dmar_fault(s
, source_id
, addr
, ret_fr
, is_write
);
1325 /* Update context-cache */
1326 trace_vtd_iotlb_cc_update(bus_num
, devfn
, ce
.hi
, ce
.lo
,
1327 cc_entry
->context_cache_gen
,
1328 s
->context_cache_gen
);
1329 cc_entry
->context_entry
= ce
;
1330 cc_entry
->context_cache_gen
= s
->context_cache_gen
;
1334 * We don't need to translate for pass-through context entries.
1335 * Also, let's ignore IOTLB caching as well for PT devices.
1337 if (vtd_ce_get_type(&ce
) == VTD_CONTEXT_TT_PASS_THROUGH
) {
1338 entry
->iova
= addr
& VTD_PAGE_MASK_4K
;
1339 entry
->translated_addr
= entry
->iova
;
1340 entry
->addr_mask
= ~VTD_PAGE_MASK_4K
;
1341 entry
->perm
= IOMMU_RW
;
1342 trace_vtd_translate_pt(source_id
, entry
->iova
);
1345 * When this happens, it means firstly caching-mode is not
1346 * enabled, and this is the first passthrough translation for
1347 * the device. Let's enable the fast path for passthrough.
1349 * When passthrough is disabled again for the device, we can
1350 * capture it via the context entry invalidation, then the
1351 * IOMMU region can be swapped back.
1353 vtd_pt_enable_fast_path(s
, source_id
);
1354 vtd_iommu_unlock(s
);
1358 ret_fr
= vtd_iova_to_slpte(&ce
, addr
, is_write
, &slpte
, &level
,
1359 &reads
, &writes
, s
->aw_bits
);
1362 if (is_fpd_set
&& vtd_is_qualified_fault(ret_fr
)) {
1363 trace_vtd_fault_disabled();
1365 vtd_report_dmar_fault(s
, source_id
, addr
, ret_fr
, is_write
);
1370 page_mask
= vtd_slpt_level_page_mask(level
);
1371 access_flags
= IOMMU_ACCESS_FLAG(reads
, writes
);
1372 vtd_update_iotlb(s
, source_id
, VTD_CONTEXT_ENTRY_DID(ce
.hi
), addr
, slpte
,
1373 access_flags
, level
);
1375 vtd_iommu_unlock(s
);
1376 entry
->iova
= addr
& page_mask
;
1377 entry
->translated_addr
= vtd_get_slpte_addr(slpte
, s
->aw_bits
) & page_mask
;
1378 entry
->addr_mask
= ~page_mask
;
1379 entry
->perm
= access_flags
;
1383 vtd_iommu_unlock(s
);
1385 entry
->translated_addr
= 0;
1386 entry
->addr_mask
= 0;
1387 entry
->perm
= IOMMU_NONE
;
1391 static void vtd_root_table_setup(IntelIOMMUState
*s
)
1393 s
->root
= vtd_get_quad_raw(s
, DMAR_RTADDR_REG
);
1394 s
->root_extended
= s
->root
& VTD_RTADDR_RTT
;
1395 s
->root
&= VTD_RTADDR_ADDR_MASK(s
->aw_bits
);
1397 trace_vtd_reg_dmar_root(s
->root
, s
->root_extended
);
1400 static void vtd_iec_notify_all(IntelIOMMUState
*s
, bool global
,
1401 uint32_t index
, uint32_t mask
)
1403 x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s
), global
, index
, mask
);
1406 static void vtd_interrupt_remap_table_setup(IntelIOMMUState
*s
)
1409 value
= vtd_get_quad_raw(s
, DMAR_IRTA_REG
);
1410 s
->intr_size
= 1UL << ((value
& VTD_IRTA_SIZE_MASK
) + 1);
1411 s
->intr_root
= value
& VTD_IRTA_ADDR_MASK(s
->aw_bits
);
1412 s
->intr_eime
= value
& VTD_IRTA_EIME
;
1414 /* Notify global invalidation */
1415 vtd_iec_notify_all(s
, true, 0, 0);
1417 trace_vtd_reg_ir_root(s
->intr_root
, s
->intr_size
);
1420 static void vtd_iommu_replay_all(IntelIOMMUState
*s
)
1422 VTDAddressSpace
*vtd_as
;
1424 QLIST_FOREACH(vtd_as
, &s
->vtd_as_with_notifiers
, next
) {
1425 vtd_sync_shadow_page_table(vtd_as
);
1429 static void vtd_context_global_invalidate(IntelIOMMUState
*s
)
1431 trace_vtd_inv_desc_cc_global();
1432 /* Protects context cache */
1434 s
->context_cache_gen
++;
1435 if (s
->context_cache_gen
== VTD_CONTEXT_CACHE_GEN_MAX
) {
1436 vtd_reset_context_cache_locked(s
);
1438 vtd_iommu_unlock(s
);
1439 vtd_switch_address_space_all(s
);
1441 * From VT-d spec 6.5.2.1, a global context entry invalidation
1442 * should be followed by a IOTLB global invalidation, so we should
1443 * be safe even without this. Hoewever, let's replay the region as
1444 * well to be safer, and go back here when we need finer tunes for
1445 * VT-d emulation codes.
1447 vtd_iommu_replay_all(s
);
1450 /* Do a context-cache device-selective invalidation.
1451 * @func_mask: FM field after shifting
1453 static void vtd_context_device_invalidate(IntelIOMMUState
*s
,
1459 VTDAddressSpace
*vtd_as
;
1460 uint8_t bus_n
, devfn
;
1463 trace_vtd_inv_desc_cc_devices(source_id
, func_mask
);
1465 switch (func_mask
& 3) {
1467 mask
= 0; /* No bits in the SID field masked */
1470 mask
= 4; /* Mask bit 2 in the SID field */
1473 mask
= 6; /* Mask bit 2:1 in the SID field */
1476 mask
= 7; /* Mask bit 2:0 in the SID field */
1481 bus_n
= VTD_SID_TO_BUS(source_id
);
1482 vtd_bus
= vtd_find_as_from_bus_num(s
, bus_n
);
1484 devfn
= VTD_SID_TO_DEVFN(source_id
);
1485 for (devfn_it
= 0; devfn_it
< PCI_DEVFN_MAX
; ++devfn_it
) {
1486 vtd_as
= vtd_bus
->dev_as
[devfn_it
];
1487 if (vtd_as
&& ((devfn_it
& mask
) == (devfn
& mask
))) {
1488 trace_vtd_inv_desc_cc_device(bus_n
, VTD_PCI_SLOT(devfn_it
),
1489 VTD_PCI_FUNC(devfn_it
));
1491 vtd_as
->context_cache_entry
.context_cache_gen
= 0;
1492 vtd_iommu_unlock(s
);
1494 * Do switch address space when needed, in case if the
1495 * device passthrough bit is switched.
1497 vtd_switch_address_space(vtd_as
);
1499 * So a device is moving out of (or moving into) a
1500 * domain, resync the shadow page table.
1501 * This won't bring bad even if we have no such
1502 * notifier registered - the IOMMU notification
1503 * framework will skip MAP notifications if that
1506 vtd_sync_shadow_page_table(vtd_as
);
1512 /* Context-cache invalidation
1513 * Returns the Context Actual Invalidation Granularity.
1514 * @val: the content of the CCMD_REG
1516 static uint64_t vtd_context_cache_invalidate(IntelIOMMUState
*s
, uint64_t val
)
1519 uint64_t type
= val
& VTD_CCMD_CIRG_MASK
;
1522 case VTD_CCMD_DOMAIN_INVL
:
1524 case VTD_CCMD_GLOBAL_INVL
:
1525 caig
= VTD_CCMD_GLOBAL_INVL_A
;
1526 vtd_context_global_invalidate(s
);
1529 case VTD_CCMD_DEVICE_INVL
:
1530 caig
= VTD_CCMD_DEVICE_INVL_A
;
1531 vtd_context_device_invalidate(s
, VTD_CCMD_SID(val
), VTD_CCMD_FM(val
));
1535 error_report_once("%s: invalid context: 0x%" PRIx64
,
1542 static void vtd_iotlb_global_invalidate(IntelIOMMUState
*s
)
1544 trace_vtd_inv_desc_iotlb_global();
1546 vtd_iommu_replay_all(s
);
1549 static void vtd_iotlb_domain_invalidate(IntelIOMMUState
*s
, uint16_t domain_id
)
1552 VTDAddressSpace
*vtd_as
;
1554 trace_vtd_inv_desc_iotlb_domain(domain_id
);
1557 g_hash_table_foreach_remove(s
->iotlb
, vtd_hash_remove_by_domain
,
1559 vtd_iommu_unlock(s
);
1561 QLIST_FOREACH(vtd_as
, &s
->vtd_as_with_notifiers
, next
) {
1562 if (!vtd_dev_to_context_entry(s
, pci_bus_num(vtd_as
->bus
),
1563 vtd_as
->devfn
, &ce
) &&
1564 domain_id
== VTD_CONTEXT_ENTRY_DID(ce
.hi
)) {
1565 vtd_sync_shadow_page_table(vtd_as
);
1570 static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState
*s
,
1571 uint16_t domain_id
, hwaddr addr
,
1574 VTDAddressSpace
*vtd_as
;
1577 hwaddr size
= (1 << am
) * VTD_PAGE_SIZE
;
1579 QLIST_FOREACH(vtd_as
, &(s
->vtd_as_with_notifiers
), next
) {
1580 ret
= vtd_dev_to_context_entry(s
, pci_bus_num(vtd_as
->bus
),
1581 vtd_as
->devfn
, &ce
);
1582 if (!ret
&& domain_id
== VTD_CONTEXT_ENTRY_DID(ce
.hi
)) {
1583 if (vtd_as_has_map_notifier(vtd_as
)) {
1585 * As long as we have MAP notifications registered in
1586 * any of our IOMMU notifiers, we need to sync the
1587 * shadow page table.
1589 vtd_sync_shadow_page_table_range(vtd_as
, &ce
, addr
, size
);
1592 * For UNMAP-only notifiers, we don't need to walk the
1593 * page tables. We just deliver the PSI down to
1594 * invalidate caches.
1596 IOMMUTLBEntry entry
= {
1597 .target_as
= &address_space_memory
,
1599 .translated_addr
= 0,
1600 .addr_mask
= size
- 1,
1603 memory_region_notify_iommu(&vtd_as
->iommu
, 0, entry
);
1609 static void vtd_iotlb_page_invalidate(IntelIOMMUState
*s
, uint16_t domain_id
,
1610 hwaddr addr
, uint8_t am
)
1612 VTDIOTLBPageInvInfo info
;
1614 trace_vtd_inv_desc_iotlb_pages(domain_id
, addr
, am
);
1616 assert(am
<= VTD_MAMV
);
1617 info
.domain_id
= domain_id
;
1619 info
.mask
= ~((1 << am
) - 1);
1621 g_hash_table_foreach_remove(s
->iotlb
, vtd_hash_remove_by_page
, &info
);
1622 vtd_iommu_unlock(s
);
1623 vtd_iotlb_page_invalidate_notify(s
, domain_id
, addr
, am
);
1627 * Returns the IOTLB Actual Invalidation Granularity.
1628 * @val: the content of the IOTLB_REG
1630 static uint64_t vtd_iotlb_flush(IntelIOMMUState
*s
, uint64_t val
)
1633 uint64_t type
= val
& VTD_TLB_FLUSH_GRANU_MASK
;
1639 case VTD_TLB_GLOBAL_FLUSH
:
1640 iaig
= VTD_TLB_GLOBAL_FLUSH_A
;
1641 vtd_iotlb_global_invalidate(s
);
1644 case VTD_TLB_DSI_FLUSH
:
1645 domain_id
= VTD_TLB_DID(val
);
1646 iaig
= VTD_TLB_DSI_FLUSH_A
;
1647 vtd_iotlb_domain_invalidate(s
, domain_id
);
1650 case VTD_TLB_PSI_FLUSH
:
1651 domain_id
= VTD_TLB_DID(val
);
1652 addr
= vtd_get_quad_raw(s
, DMAR_IVA_REG
);
1653 am
= VTD_IVA_AM(addr
);
1654 addr
= VTD_IVA_ADDR(addr
);
1655 if (am
> VTD_MAMV
) {
1656 error_report_once("%s: address mask overflow: 0x%" PRIx64
,
1657 __func__
, vtd_get_quad_raw(s
, DMAR_IVA_REG
));
1661 iaig
= VTD_TLB_PSI_FLUSH_A
;
1662 vtd_iotlb_page_invalidate(s
, domain_id
, addr
, am
);
1666 error_report_once("%s: invalid granularity: 0x%" PRIx64
,
1673 static void vtd_fetch_inv_desc(IntelIOMMUState
*s
);
1675 static inline bool vtd_queued_inv_disable_check(IntelIOMMUState
*s
)
1677 return s
->qi_enabled
&& (s
->iq_tail
== s
->iq_head
) &&
1678 (s
->iq_last_desc_type
== VTD_INV_DESC_WAIT
);
1681 static void vtd_handle_gcmd_qie(IntelIOMMUState
*s
, bool en
)
1683 uint64_t iqa_val
= vtd_get_quad_raw(s
, DMAR_IQA_REG
);
1685 trace_vtd_inv_qi_enable(en
);
1688 s
->iq
= iqa_val
& VTD_IQA_IQA_MASK(s
->aw_bits
);
1689 /* 2^(x+8) entries */
1690 s
->iq_size
= 1UL << ((iqa_val
& VTD_IQA_QS
) + 8);
1691 s
->qi_enabled
= true;
1692 trace_vtd_inv_qi_setup(s
->iq
, s
->iq_size
);
1693 /* Ok - report back to driver */
1694 vtd_set_clear_mask_long(s
, DMAR_GSTS_REG
, 0, VTD_GSTS_QIES
);
1696 if (s
->iq_tail
!= 0) {
1698 * This is a spec violation but Windows guests are known to set up
1699 * Queued Invalidation this way so we allow the write and process
1700 * Invalidation Descriptors right away.
1702 trace_vtd_warn_invalid_qi_tail(s
->iq_tail
);
1703 if (!(vtd_get_long_raw(s
, DMAR_FSTS_REG
) & VTD_FSTS_IQE
)) {
1704 vtd_fetch_inv_desc(s
);
1708 if (vtd_queued_inv_disable_check(s
)) {
1709 /* disable Queued Invalidation */
1710 vtd_set_quad_raw(s
, DMAR_IQH_REG
, 0);
1712 s
->qi_enabled
= false;
1713 /* Ok - report back to driver */
1714 vtd_set_clear_mask_long(s
, DMAR_GSTS_REG
, VTD_GSTS_QIES
, 0);
1716 error_report_once("%s: detected improper state when disable QI "
1717 "(head=0x%x, tail=0x%x, last_type=%d)",
1719 s
->iq_head
, s
->iq_tail
, s
->iq_last_desc_type
);
1724 /* Set Root Table Pointer */
1725 static void vtd_handle_gcmd_srtp(IntelIOMMUState
*s
)
1727 vtd_root_table_setup(s
);
1728 /* Ok - report back to driver */
1729 vtd_set_clear_mask_long(s
, DMAR_GSTS_REG
, 0, VTD_GSTS_RTPS
);
1732 /* Set Interrupt Remap Table Pointer */
1733 static void vtd_handle_gcmd_sirtp(IntelIOMMUState
*s
)
1735 vtd_interrupt_remap_table_setup(s
);
1736 /* Ok - report back to driver */
1737 vtd_set_clear_mask_long(s
, DMAR_GSTS_REG
, 0, VTD_GSTS_IRTPS
);
1740 /* Handle Translation Enable/Disable */
1741 static void vtd_handle_gcmd_te(IntelIOMMUState
*s
, bool en
)
1743 if (s
->dmar_enabled
== en
) {
1747 trace_vtd_dmar_enable(en
);
1750 s
->dmar_enabled
= true;
1751 /* Ok - report back to driver */
1752 vtd_set_clear_mask_long(s
, DMAR_GSTS_REG
, 0, VTD_GSTS_TES
);
1754 s
->dmar_enabled
= false;
1756 /* Clear the index of Fault Recording Register */
1757 s
->next_frcd_reg
= 0;
1758 /* Ok - report back to driver */
1759 vtd_set_clear_mask_long(s
, DMAR_GSTS_REG
, VTD_GSTS_TES
, 0);
1762 vtd_switch_address_space_all(s
);
1765 /* Handle Interrupt Remap Enable/Disable */
1766 static void vtd_handle_gcmd_ire(IntelIOMMUState
*s
, bool en
)
1768 trace_vtd_ir_enable(en
);
1771 s
->intr_enabled
= true;
1772 /* Ok - report back to driver */
1773 vtd_set_clear_mask_long(s
, DMAR_GSTS_REG
, 0, VTD_GSTS_IRES
);
1775 s
->intr_enabled
= false;
1776 /* Ok - report back to driver */
1777 vtd_set_clear_mask_long(s
, DMAR_GSTS_REG
, VTD_GSTS_IRES
, 0);
1781 /* Handle write to Global Command Register */
1782 static void vtd_handle_gcmd_write(IntelIOMMUState
*s
)
1784 uint32_t status
= vtd_get_long_raw(s
, DMAR_GSTS_REG
);
1785 uint32_t val
= vtd_get_long_raw(s
, DMAR_GCMD_REG
);
1786 uint32_t changed
= status
^ val
;
1788 trace_vtd_reg_write_gcmd(status
, val
);
1789 if (changed
& VTD_GCMD_TE
) {
1790 /* Translation enable/disable */
1791 vtd_handle_gcmd_te(s
, val
& VTD_GCMD_TE
);
1793 if (val
& VTD_GCMD_SRTP
) {
1794 /* Set/update the root-table pointer */
1795 vtd_handle_gcmd_srtp(s
);
1797 if (changed
& VTD_GCMD_QIE
) {
1798 /* Queued Invalidation Enable */
1799 vtd_handle_gcmd_qie(s
, val
& VTD_GCMD_QIE
);
1801 if (val
& VTD_GCMD_SIRTP
) {
1802 /* Set/update the interrupt remapping root-table pointer */
1803 vtd_handle_gcmd_sirtp(s
);
1805 if (changed
& VTD_GCMD_IRE
) {
1806 /* Interrupt remap enable/disable */
1807 vtd_handle_gcmd_ire(s
, val
& VTD_GCMD_IRE
);
1811 /* Handle write to Context Command Register */
1812 static void vtd_handle_ccmd_write(IntelIOMMUState
*s
)
1815 uint64_t val
= vtd_get_quad_raw(s
, DMAR_CCMD_REG
);
1817 /* Context-cache invalidation request */
1818 if (val
& VTD_CCMD_ICC
) {
1819 if (s
->qi_enabled
) {
1820 error_report_once("Queued Invalidation enabled, "
1821 "should not use register-based invalidation");
1824 ret
= vtd_context_cache_invalidate(s
, val
);
1825 /* Invalidation completed. Change something to show */
1826 vtd_set_clear_mask_quad(s
, DMAR_CCMD_REG
, VTD_CCMD_ICC
, 0ULL);
1827 ret
= vtd_set_clear_mask_quad(s
, DMAR_CCMD_REG
, VTD_CCMD_CAIG_MASK
,
1832 /* Handle write to IOTLB Invalidation Register */
1833 static void vtd_handle_iotlb_write(IntelIOMMUState
*s
)
1836 uint64_t val
= vtd_get_quad_raw(s
, DMAR_IOTLB_REG
);
1838 /* IOTLB invalidation request */
1839 if (val
& VTD_TLB_IVT
) {
1840 if (s
->qi_enabled
) {
1841 error_report_once("Queued Invalidation enabled, "
1842 "should not use register-based invalidation");
1845 ret
= vtd_iotlb_flush(s
, val
);
1846 /* Invalidation completed. Change something to show */
1847 vtd_set_clear_mask_quad(s
, DMAR_IOTLB_REG
, VTD_TLB_IVT
, 0ULL);
1848 ret
= vtd_set_clear_mask_quad(s
, DMAR_IOTLB_REG
,
1849 VTD_TLB_FLUSH_GRANU_MASK_A
, ret
);
1853 /* Fetch an Invalidation Descriptor from the Invalidation Queue */
1854 static bool vtd_get_inv_desc(dma_addr_t base_addr
, uint32_t offset
,
1855 VTDInvDesc
*inv_desc
)
1857 dma_addr_t addr
= base_addr
+ offset
* sizeof(*inv_desc
);
1858 if (dma_memory_read(&address_space_memory
, addr
, inv_desc
,
1859 sizeof(*inv_desc
))) {
1860 error_report_once("Read INV DESC failed");
1865 inv_desc
->lo
= le64_to_cpu(inv_desc
->lo
);
1866 inv_desc
->hi
= le64_to_cpu(inv_desc
->hi
);
1870 static bool vtd_process_wait_desc(IntelIOMMUState
*s
, VTDInvDesc
*inv_desc
)
1872 if ((inv_desc
->hi
& VTD_INV_DESC_WAIT_RSVD_HI
) ||
1873 (inv_desc
->lo
& VTD_INV_DESC_WAIT_RSVD_LO
)) {
1874 trace_vtd_inv_desc_wait_invalid(inv_desc
->hi
, inv_desc
->lo
);
1877 if (inv_desc
->lo
& VTD_INV_DESC_WAIT_SW
) {
1879 uint32_t status_data
= (uint32_t)(inv_desc
->lo
>>
1880 VTD_INV_DESC_WAIT_DATA_SHIFT
);
1882 assert(!(inv_desc
->lo
& VTD_INV_DESC_WAIT_IF
));
1884 /* FIXME: need to be masked with HAW? */
1885 dma_addr_t status_addr
= inv_desc
->hi
;
1886 trace_vtd_inv_desc_wait_sw(status_addr
, status_data
);
1887 status_data
= cpu_to_le32(status_data
);
1888 if (dma_memory_write(&address_space_memory
, status_addr
, &status_data
,
1889 sizeof(status_data
))) {
1890 trace_vtd_inv_desc_wait_write_fail(inv_desc
->hi
, inv_desc
->lo
);
1893 } else if (inv_desc
->lo
& VTD_INV_DESC_WAIT_IF
) {
1894 /* Interrupt flag */
1895 vtd_generate_completion_event(s
);
1897 trace_vtd_inv_desc_wait_invalid(inv_desc
->hi
, inv_desc
->lo
);
1903 static bool vtd_process_context_cache_desc(IntelIOMMUState
*s
,
1904 VTDInvDesc
*inv_desc
)
1906 uint16_t sid
, fmask
;
1908 if ((inv_desc
->lo
& VTD_INV_DESC_CC_RSVD
) || inv_desc
->hi
) {
1909 trace_vtd_inv_desc_cc_invalid(inv_desc
->hi
, inv_desc
->lo
);
1912 switch (inv_desc
->lo
& VTD_INV_DESC_CC_G
) {
1913 case VTD_INV_DESC_CC_DOMAIN
:
1914 trace_vtd_inv_desc_cc_domain(
1915 (uint16_t)VTD_INV_DESC_CC_DID(inv_desc
->lo
));
1917 case VTD_INV_DESC_CC_GLOBAL
:
1918 vtd_context_global_invalidate(s
);
1921 case VTD_INV_DESC_CC_DEVICE
:
1922 sid
= VTD_INV_DESC_CC_SID(inv_desc
->lo
);
1923 fmask
= VTD_INV_DESC_CC_FM(inv_desc
->lo
);
1924 vtd_context_device_invalidate(s
, sid
, fmask
);
1928 trace_vtd_inv_desc_cc_invalid(inv_desc
->hi
, inv_desc
->lo
);
1934 static bool vtd_process_iotlb_desc(IntelIOMMUState
*s
, VTDInvDesc
*inv_desc
)
1940 if ((inv_desc
->lo
& VTD_INV_DESC_IOTLB_RSVD_LO
) ||
1941 (inv_desc
->hi
& VTD_INV_DESC_IOTLB_RSVD_HI
)) {
1942 trace_vtd_inv_desc_iotlb_invalid(inv_desc
->hi
, inv_desc
->lo
);
1946 switch (inv_desc
->lo
& VTD_INV_DESC_IOTLB_G
) {
1947 case VTD_INV_DESC_IOTLB_GLOBAL
:
1948 vtd_iotlb_global_invalidate(s
);
1951 case VTD_INV_DESC_IOTLB_DOMAIN
:
1952 domain_id
= VTD_INV_DESC_IOTLB_DID(inv_desc
->lo
);
1953 vtd_iotlb_domain_invalidate(s
, domain_id
);
1956 case VTD_INV_DESC_IOTLB_PAGE
:
1957 domain_id
= VTD_INV_DESC_IOTLB_DID(inv_desc
->lo
);
1958 addr
= VTD_INV_DESC_IOTLB_ADDR(inv_desc
->hi
);
1959 am
= VTD_INV_DESC_IOTLB_AM(inv_desc
->hi
);
1960 if (am
> VTD_MAMV
) {
1961 trace_vtd_inv_desc_iotlb_invalid(inv_desc
->hi
, inv_desc
->lo
);
1964 vtd_iotlb_page_invalidate(s
, domain_id
, addr
, am
);
1968 trace_vtd_inv_desc_iotlb_invalid(inv_desc
->hi
, inv_desc
->lo
);
1974 static bool vtd_process_inv_iec_desc(IntelIOMMUState
*s
,
1975 VTDInvDesc
*inv_desc
)
1977 trace_vtd_inv_desc_iec(inv_desc
->iec
.granularity
,
1978 inv_desc
->iec
.index
,
1979 inv_desc
->iec
.index_mask
);
1981 vtd_iec_notify_all(s
, !inv_desc
->iec
.granularity
,
1982 inv_desc
->iec
.index
,
1983 inv_desc
->iec
.index_mask
);
1987 static bool vtd_process_device_iotlb_desc(IntelIOMMUState
*s
,
1988 VTDInvDesc
*inv_desc
)
1990 VTDAddressSpace
*vtd_dev_as
;
1991 IOMMUTLBEntry entry
;
1992 struct VTDBus
*vtd_bus
;
2000 addr
= VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc
->hi
);
2001 sid
= VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc
->lo
);
2004 size
= VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc
->hi
);
2006 if ((inv_desc
->lo
& VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO
) ||
2007 (inv_desc
->hi
& VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI
)) {
2008 trace_vtd_inv_desc_iotlb_invalid(inv_desc
->hi
, inv_desc
->lo
);
2012 vtd_bus
= vtd_find_as_from_bus_num(s
, bus_num
);
2017 vtd_dev_as
= vtd_bus
->dev_as
[devfn
];
2022 /* According to ATS spec table 2.4:
2023 * S = 0, bits 15:12 = xxxx range size: 4K
2024 * S = 1, bits 15:12 = xxx0 range size: 8K
2025 * S = 1, bits 15:12 = xx01 range size: 16K
2026 * S = 1, bits 15:12 = x011 range size: 32K
2027 * S = 1, bits 15:12 = 0111 range size: 64K
2031 sz
= (VTD_PAGE_SIZE
* 2) << cto64(addr
>> VTD_PAGE_SHIFT
);
2037 entry
.target_as
= &vtd_dev_as
->as
;
2038 entry
.addr_mask
= sz
- 1;
2040 entry
.perm
= IOMMU_NONE
;
2041 entry
.translated_addr
= 0;
2042 memory_region_notify_iommu(&vtd_dev_as
->iommu
, 0, entry
);
2048 static bool vtd_process_inv_desc(IntelIOMMUState
*s
)
2050 VTDInvDesc inv_desc
;
2053 trace_vtd_inv_qi_head(s
->iq_head
);
2054 if (!vtd_get_inv_desc(s
->iq
, s
->iq_head
, &inv_desc
)) {
2055 s
->iq_last_desc_type
= VTD_INV_DESC_NONE
;
2058 desc_type
= inv_desc
.lo
& VTD_INV_DESC_TYPE
;
2059 /* FIXME: should update at first or at last? */
2060 s
->iq_last_desc_type
= desc_type
;
2062 switch (desc_type
) {
2063 case VTD_INV_DESC_CC
:
2064 trace_vtd_inv_desc("context-cache", inv_desc
.hi
, inv_desc
.lo
);
2065 if (!vtd_process_context_cache_desc(s
, &inv_desc
)) {
2070 case VTD_INV_DESC_IOTLB
:
2071 trace_vtd_inv_desc("iotlb", inv_desc
.hi
, inv_desc
.lo
);
2072 if (!vtd_process_iotlb_desc(s
, &inv_desc
)) {
2077 case VTD_INV_DESC_WAIT
:
2078 trace_vtd_inv_desc("wait", inv_desc
.hi
, inv_desc
.lo
);
2079 if (!vtd_process_wait_desc(s
, &inv_desc
)) {
2084 case VTD_INV_DESC_IEC
:
2085 trace_vtd_inv_desc("iec", inv_desc
.hi
, inv_desc
.lo
);
2086 if (!vtd_process_inv_iec_desc(s
, &inv_desc
)) {
2091 case VTD_INV_DESC_DEVICE
:
2092 trace_vtd_inv_desc("device", inv_desc
.hi
, inv_desc
.lo
);
2093 if (!vtd_process_device_iotlb_desc(s
, &inv_desc
)) {
2099 trace_vtd_inv_desc_invalid(inv_desc
.hi
, inv_desc
.lo
);
2103 if (s
->iq_head
== s
->iq_size
) {
2109 /* Try to fetch and process more Invalidation Descriptors */
2110 static void vtd_fetch_inv_desc(IntelIOMMUState
*s
)
2112 trace_vtd_inv_qi_fetch();
2114 if (s
->iq_tail
>= s
->iq_size
) {
2115 /* Detects an invalid Tail pointer */
2116 error_report_once("%s: detected invalid QI tail "
2117 "(tail=0x%x, size=0x%x)",
2118 __func__
, s
->iq_tail
, s
->iq_size
);
2119 vtd_handle_inv_queue_error(s
);
2122 while (s
->iq_head
!= s
->iq_tail
) {
2123 if (!vtd_process_inv_desc(s
)) {
2124 /* Invalidation Queue Errors */
2125 vtd_handle_inv_queue_error(s
);
2128 /* Must update the IQH_REG in time */
2129 vtd_set_quad_raw(s
, DMAR_IQH_REG
,
2130 (((uint64_t)(s
->iq_head
)) << VTD_IQH_QH_SHIFT
) &
2135 /* Handle write to Invalidation Queue Tail Register */
2136 static void vtd_handle_iqt_write(IntelIOMMUState
*s
)
2138 uint64_t val
= vtd_get_quad_raw(s
, DMAR_IQT_REG
);
2140 s
->iq_tail
= VTD_IQT_QT(val
);
2141 trace_vtd_inv_qi_tail(s
->iq_tail
);
2143 if (s
->qi_enabled
&& !(vtd_get_long_raw(s
, DMAR_FSTS_REG
) & VTD_FSTS_IQE
)) {
2144 /* Process Invalidation Queue here */
2145 vtd_fetch_inv_desc(s
);
2149 static void vtd_handle_fsts_write(IntelIOMMUState
*s
)
2151 uint32_t fsts_reg
= vtd_get_long_raw(s
, DMAR_FSTS_REG
);
2152 uint32_t fectl_reg
= vtd_get_long_raw(s
, DMAR_FECTL_REG
);
2153 uint32_t status_fields
= VTD_FSTS_PFO
| VTD_FSTS_PPF
| VTD_FSTS_IQE
;
2155 if ((fectl_reg
& VTD_FECTL_IP
) && !(fsts_reg
& status_fields
)) {
2156 vtd_set_clear_mask_long(s
, DMAR_FECTL_REG
, VTD_FECTL_IP
, 0);
2157 trace_vtd_fsts_clear_ip();
2159 /* FIXME: when IQE is Clear, should we try to fetch some Invalidation
2160 * Descriptors if there are any when Queued Invalidation is enabled?
2164 static void vtd_handle_fectl_write(IntelIOMMUState
*s
)
2167 /* FIXME: when software clears the IM field, check the IP field. But do we
2168 * need to compare the old value and the new value to conclude that
2169 * software clears the IM field? Or just check if the IM field is zero?
2171 fectl_reg
= vtd_get_long_raw(s
, DMAR_FECTL_REG
);
2173 trace_vtd_reg_write_fectl(fectl_reg
);
2175 if ((fectl_reg
& VTD_FECTL_IP
) && !(fectl_reg
& VTD_FECTL_IM
)) {
2176 vtd_generate_interrupt(s
, DMAR_FEADDR_REG
, DMAR_FEDATA_REG
);
2177 vtd_set_clear_mask_long(s
, DMAR_FECTL_REG
, VTD_FECTL_IP
, 0);
2181 static void vtd_handle_ics_write(IntelIOMMUState
*s
)
2183 uint32_t ics_reg
= vtd_get_long_raw(s
, DMAR_ICS_REG
);
2184 uint32_t iectl_reg
= vtd_get_long_raw(s
, DMAR_IECTL_REG
);
2186 if ((iectl_reg
& VTD_IECTL_IP
) && !(ics_reg
& VTD_ICS_IWC
)) {
2187 trace_vtd_reg_ics_clear_ip();
2188 vtd_set_clear_mask_long(s
, DMAR_IECTL_REG
, VTD_IECTL_IP
, 0);
2192 static void vtd_handle_iectl_write(IntelIOMMUState
*s
)
2195 /* FIXME: when software clears the IM field, check the IP field. But do we
2196 * need to compare the old value and the new value to conclude that
2197 * software clears the IM field? Or just check if the IM field is zero?
2199 iectl_reg
= vtd_get_long_raw(s
, DMAR_IECTL_REG
);
2201 trace_vtd_reg_write_iectl(iectl_reg
);
2203 if ((iectl_reg
& VTD_IECTL_IP
) && !(iectl_reg
& VTD_IECTL_IM
)) {
2204 vtd_generate_interrupt(s
, DMAR_IEADDR_REG
, DMAR_IEDATA_REG
);
2205 vtd_set_clear_mask_long(s
, DMAR_IECTL_REG
, VTD_IECTL_IP
, 0);
2209 static uint64_t vtd_mem_read(void *opaque
, hwaddr addr
, unsigned size
)
2211 IntelIOMMUState
*s
= opaque
;
2214 trace_vtd_reg_read(addr
, size
);
2216 if (addr
+ size
> DMAR_REG_SIZE
) {
2217 error_report_once("%s: MMIO over range: addr=0x%" PRIx64
2218 " size=0x%u", __func__
, addr
, size
);
2219 return (uint64_t)-1;
2223 /* Root Table Address Register, 64-bit */
2224 case DMAR_RTADDR_REG
:
2226 val
= s
->root
& ((1ULL << 32) - 1);
2232 case DMAR_RTADDR_REG_HI
:
2234 val
= s
->root
>> 32;
2237 /* Invalidation Queue Address Register, 64-bit */
2239 val
= s
->iq
| (vtd_get_quad(s
, DMAR_IQA_REG
) & VTD_IQA_QS
);
2241 val
= val
& ((1ULL << 32) - 1);
2245 case DMAR_IQA_REG_HI
:
2252 val
= vtd_get_long(s
, addr
);
2254 val
= vtd_get_quad(s
, addr
);
2261 static void vtd_mem_write(void *opaque
, hwaddr addr
,
2262 uint64_t val
, unsigned size
)
2264 IntelIOMMUState
*s
= opaque
;
2266 trace_vtd_reg_write(addr
, size
, val
);
2268 if (addr
+ size
> DMAR_REG_SIZE
) {
2269 error_report_once("%s: MMIO over range: addr=0x%" PRIx64
2270 " size=0x%u", __func__
, addr
, size
);
2275 /* Global Command Register, 32-bit */
2277 vtd_set_long(s
, addr
, val
);
2278 vtd_handle_gcmd_write(s
);
2281 /* Context Command Register, 64-bit */
2284 vtd_set_long(s
, addr
, val
);
2286 vtd_set_quad(s
, addr
, val
);
2287 vtd_handle_ccmd_write(s
);
2291 case DMAR_CCMD_REG_HI
:
2293 vtd_set_long(s
, addr
, val
);
2294 vtd_handle_ccmd_write(s
);
2297 /* IOTLB Invalidation Register, 64-bit */
2298 case DMAR_IOTLB_REG
:
2300 vtd_set_long(s
, addr
, val
);
2302 vtd_set_quad(s
, addr
, val
);
2303 vtd_handle_iotlb_write(s
);
2307 case DMAR_IOTLB_REG_HI
:
2309 vtd_set_long(s
, addr
, val
);
2310 vtd_handle_iotlb_write(s
);
2313 /* Invalidate Address Register, 64-bit */
2316 vtd_set_long(s
, addr
, val
);
2318 vtd_set_quad(s
, addr
, val
);
2322 case DMAR_IVA_REG_HI
:
2324 vtd_set_long(s
, addr
, val
);
2327 /* Fault Status Register, 32-bit */
2330 vtd_set_long(s
, addr
, val
);
2331 vtd_handle_fsts_write(s
);
2334 /* Fault Event Control Register, 32-bit */
2335 case DMAR_FECTL_REG
:
2337 vtd_set_long(s
, addr
, val
);
2338 vtd_handle_fectl_write(s
);
2341 /* Fault Event Data Register, 32-bit */
2342 case DMAR_FEDATA_REG
:
2344 vtd_set_long(s
, addr
, val
);
2347 /* Fault Event Address Register, 32-bit */
2348 case DMAR_FEADDR_REG
:
2350 vtd_set_long(s
, addr
, val
);
2353 * While the register is 32-bit only, some guests (Xen...) write to
2356 vtd_set_quad(s
, addr
, val
);
2360 /* Fault Event Upper Address Register, 32-bit */
2361 case DMAR_FEUADDR_REG
:
2363 vtd_set_long(s
, addr
, val
);
2366 /* Protected Memory Enable Register, 32-bit */
2369 vtd_set_long(s
, addr
, val
);
2372 /* Root Table Address Register, 64-bit */
2373 case DMAR_RTADDR_REG
:
2375 vtd_set_long(s
, addr
, val
);
2377 vtd_set_quad(s
, addr
, val
);
2381 case DMAR_RTADDR_REG_HI
:
2383 vtd_set_long(s
, addr
, val
);
2386 /* Invalidation Queue Tail Register, 64-bit */
2389 vtd_set_long(s
, addr
, val
);
2391 vtd_set_quad(s
, addr
, val
);
2393 vtd_handle_iqt_write(s
);
2396 case DMAR_IQT_REG_HI
:
2398 vtd_set_long(s
, addr
, val
);
2399 /* 19:63 of IQT_REG is RsvdZ, do nothing here */
2402 /* Invalidation Queue Address Register, 64-bit */
2405 vtd_set_long(s
, addr
, val
);
2407 vtd_set_quad(s
, addr
, val
);
2411 case DMAR_IQA_REG_HI
:
2413 vtd_set_long(s
, addr
, val
);
2416 /* Invalidation Completion Status Register, 32-bit */
2419 vtd_set_long(s
, addr
, val
);
2420 vtd_handle_ics_write(s
);
2423 /* Invalidation Event Control Register, 32-bit */
2424 case DMAR_IECTL_REG
:
2426 vtd_set_long(s
, addr
, val
);
2427 vtd_handle_iectl_write(s
);
2430 /* Invalidation Event Data Register, 32-bit */
2431 case DMAR_IEDATA_REG
:
2433 vtd_set_long(s
, addr
, val
);
2436 /* Invalidation Event Address Register, 32-bit */
2437 case DMAR_IEADDR_REG
:
2439 vtd_set_long(s
, addr
, val
);
2442 /* Invalidation Event Upper Address Register, 32-bit */
2443 case DMAR_IEUADDR_REG
:
2445 vtd_set_long(s
, addr
, val
);
2448 /* Fault Recording Registers, 128-bit */
2449 case DMAR_FRCD_REG_0_0
:
2451 vtd_set_long(s
, addr
, val
);
2453 vtd_set_quad(s
, addr
, val
);
2457 case DMAR_FRCD_REG_0_1
:
2459 vtd_set_long(s
, addr
, val
);
2462 case DMAR_FRCD_REG_0_2
:
2464 vtd_set_long(s
, addr
, val
);
2466 vtd_set_quad(s
, addr
, val
);
2467 /* May clear bit 127 (Fault), update PPF */
2468 vtd_update_fsts_ppf(s
);
2472 case DMAR_FRCD_REG_0_3
:
2474 vtd_set_long(s
, addr
, val
);
2475 /* May clear bit 127 (Fault), update PPF */
2476 vtd_update_fsts_ppf(s
);
2481 vtd_set_long(s
, addr
, val
);
2483 vtd_set_quad(s
, addr
, val
);
2487 case DMAR_IRTA_REG_HI
:
2489 vtd_set_long(s
, addr
, val
);
2494 vtd_set_long(s
, addr
, val
);
2496 vtd_set_quad(s
, addr
, val
);
2501 static IOMMUTLBEntry
vtd_iommu_translate(IOMMUMemoryRegion
*iommu
, hwaddr addr
,
2502 IOMMUAccessFlags flag
, int iommu_idx
)
2504 VTDAddressSpace
*vtd_as
= container_of(iommu
, VTDAddressSpace
, iommu
);
2505 IntelIOMMUState
*s
= vtd_as
->iommu_state
;
2506 IOMMUTLBEntry iotlb
= {
2507 /* We'll fill in the rest later. */
2508 .target_as
= &address_space_memory
,
2512 if (likely(s
->dmar_enabled
)) {
2513 success
= vtd_do_iommu_translate(vtd_as
, vtd_as
->bus
, vtd_as
->devfn
,
2514 addr
, flag
& IOMMU_WO
, &iotlb
);
2516 /* DMAR disabled, passthrough, use 4k-page*/
2517 iotlb
.iova
= addr
& VTD_PAGE_MASK_4K
;
2518 iotlb
.translated_addr
= addr
& VTD_PAGE_MASK_4K
;
2519 iotlb
.addr_mask
= ~VTD_PAGE_MASK_4K
;
2520 iotlb
.perm
= IOMMU_RW
;
2524 if (likely(success
)) {
2525 trace_vtd_dmar_translate(pci_bus_num(vtd_as
->bus
),
2526 VTD_PCI_SLOT(vtd_as
->devfn
),
2527 VTD_PCI_FUNC(vtd_as
->devfn
),
2528 iotlb
.iova
, iotlb
.translated_addr
,
2531 error_report_once("%s: detected translation failure "
2532 "(dev=%02x:%02x:%02x, iova=0x%" PRIx64
")",
2533 __func__
, pci_bus_num(vtd_as
->bus
),
2534 VTD_PCI_SLOT(vtd_as
->devfn
),
2535 VTD_PCI_FUNC(vtd_as
->devfn
),
2542 static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion
*iommu
,
2543 IOMMUNotifierFlag old
,
2544 IOMMUNotifierFlag
new)
2546 VTDAddressSpace
*vtd_as
= container_of(iommu
, VTDAddressSpace
, iommu
);
2547 IntelIOMMUState
*s
= vtd_as
->iommu_state
;
2549 if (!s
->caching_mode
&& new & IOMMU_NOTIFIER_MAP
) {
2550 error_report("We need to set caching-mode=1 for intel-iommu to enable "
2551 "device assignment with IOMMU protection.");
2555 /* Update per-address-space notifier flags */
2556 vtd_as
->notifier_flags
= new;
2558 if (old
== IOMMU_NOTIFIER_NONE
) {
2559 QLIST_INSERT_HEAD(&s
->vtd_as_with_notifiers
, vtd_as
, next
);
2560 } else if (new == IOMMU_NOTIFIER_NONE
) {
2561 QLIST_REMOVE(vtd_as
, next
);
2565 static int vtd_post_load(void *opaque
, int version_id
)
2567 IntelIOMMUState
*iommu
= opaque
;
2570 * Memory regions are dynamically turned on/off depending on
2571 * context entry configurations from the guest. After migration,
2572 * we need to make sure the memory regions are still correct.
2574 vtd_switch_address_space_all(iommu
);
2579 static const VMStateDescription vtd_vmstate
= {
2580 .name
= "iommu-intel",
2582 .minimum_version_id
= 1,
2583 .priority
= MIG_PRI_IOMMU
,
2584 .post_load
= vtd_post_load
,
2585 .fields
= (VMStateField
[]) {
2586 VMSTATE_UINT64(root
, IntelIOMMUState
),
2587 VMSTATE_UINT64(intr_root
, IntelIOMMUState
),
2588 VMSTATE_UINT64(iq
, IntelIOMMUState
),
2589 VMSTATE_UINT32(intr_size
, IntelIOMMUState
),
2590 VMSTATE_UINT16(iq_head
, IntelIOMMUState
),
2591 VMSTATE_UINT16(iq_tail
, IntelIOMMUState
),
2592 VMSTATE_UINT16(iq_size
, IntelIOMMUState
),
2593 VMSTATE_UINT16(next_frcd_reg
, IntelIOMMUState
),
2594 VMSTATE_UINT8_ARRAY(csr
, IntelIOMMUState
, DMAR_REG_SIZE
),
2595 VMSTATE_UINT8(iq_last_desc_type
, IntelIOMMUState
),
2596 VMSTATE_BOOL(root_extended
, IntelIOMMUState
),
2597 VMSTATE_BOOL(dmar_enabled
, IntelIOMMUState
),
2598 VMSTATE_BOOL(qi_enabled
, IntelIOMMUState
),
2599 VMSTATE_BOOL(intr_enabled
, IntelIOMMUState
),
2600 VMSTATE_BOOL(intr_eime
, IntelIOMMUState
),
2601 VMSTATE_END_OF_LIST()
2605 static const MemoryRegionOps vtd_mem_ops
= {
2606 .read
= vtd_mem_read
,
2607 .write
= vtd_mem_write
,
2608 .endianness
= DEVICE_LITTLE_ENDIAN
,
2610 .min_access_size
= 4,
2611 .max_access_size
= 8,
2614 .min_access_size
= 4,
2615 .max_access_size
= 8,
2619 static Property vtd_properties
[] = {
2620 DEFINE_PROP_UINT32("version", IntelIOMMUState
, version
, 0),
2621 DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState
, intr_eim
,
2623 DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState
, buggy_eim
, false),
2624 DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState
, aw_bits
,
2625 VTD_HOST_ADDRESS_WIDTH
),
2626 DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState
, caching_mode
, FALSE
),
2627 DEFINE_PROP_END_OF_LIST(),
2630 /* Read IRTE entry with specific index */
2631 static int vtd_irte_get(IntelIOMMUState
*iommu
, uint16_t index
,
2632 VTD_IR_TableEntry
*entry
, uint16_t sid
)
2634 static const uint16_t vtd_svt_mask
[VTD_SQ_MAX
] = \
2635 {0xffff, 0xfffb, 0xfff9, 0xfff8};
2636 dma_addr_t addr
= 0x00;
2637 uint16_t mask
, source_id
;
2638 uint8_t bus
, bus_max
, bus_min
;
2640 addr
= iommu
->intr_root
+ index
* sizeof(*entry
);
2641 if (dma_memory_read(&address_space_memory
, addr
, entry
,
2643 error_report_once("%s: read failed: ind=0x%x addr=0x%" PRIx64
,
2644 __func__
, index
, addr
);
2645 return -VTD_FR_IR_ROOT_INVAL
;
2648 trace_vtd_ir_irte_get(index
, le64_to_cpu(entry
->data
[1]),
2649 le64_to_cpu(entry
->data
[0]));
2651 if (!entry
->irte
.present
) {
2652 error_report_once("%s: detected non-present IRTE "
2653 "(index=%u, high=0x%" PRIx64
", low=0x%" PRIx64
")",
2654 __func__
, index
, le64_to_cpu(entry
->data
[1]),
2655 le64_to_cpu(entry
->data
[0]));
2656 return -VTD_FR_IR_ENTRY_P
;
2659 if (entry
->irte
.__reserved_0
|| entry
->irte
.__reserved_1
||
2660 entry
->irte
.__reserved_2
) {
2661 error_report_once("%s: detected non-zero reserved IRTE "
2662 "(index=%u, high=0x%" PRIx64
", low=0x%" PRIx64
")",
2663 __func__
, index
, le64_to_cpu(entry
->data
[1]),
2664 le64_to_cpu(entry
->data
[0]));
2665 return -VTD_FR_IR_IRTE_RSVD
;
2668 if (sid
!= X86_IOMMU_SID_INVALID
) {
2669 /* Validate IRTE SID */
2670 source_id
= le32_to_cpu(entry
->irte
.source_id
);
2671 switch (entry
->irte
.sid_vtype
) {
2676 mask
= vtd_svt_mask
[entry
->irte
.sid_q
];
2677 if ((source_id
& mask
) != (sid
& mask
)) {
2678 error_report_once("%s: invalid IRTE SID "
2679 "(index=%u, sid=%u, source_id=%u)",
2680 __func__
, index
, sid
, source_id
);
2681 return -VTD_FR_IR_SID_ERR
;
2686 bus_max
= source_id
>> 8;
2687 bus_min
= source_id
& 0xff;
2689 if (bus
> bus_max
|| bus
< bus_min
) {
2690 error_report_once("%s: invalid SVT_BUS "
2691 "(index=%u, bus=%u, min=%u, max=%u)",
2692 __func__
, index
, bus
, bus_min
, bus_max
);
2693 return -VTD_FR_IR_SID_ERR
;
2698 error_report_once("%s: detected invalid IRTE SVT "
2699 "(index=%u, type=%d)", __func__
,
2700 index
, entry
->irte
.sid_vtype
);
2701 /* Take this as verification failure. */
2702 return -VTD_FR_IR_SID_ERR
;
2710 /* Fetch IRQ information of specific IR index */
2711 static int vtd_remap_irq_get(IntelIOMMUState
*iommu
, uint16_t index
,
2712 VTDIrq
*irq
, uint16_t sid
)
2714 VTD_IR_TableEntry irte
= {};
2717 ret
= vtd_irte_get(iommu
, index
, &irte
, sid
);
2722 irq
->trigger_mode
= irte
.irte
.trigger_mode
;
2723 irq
->vector
= irte
.irte
.vector
;
2724 irq
->delivery_mode
= irte
.irte
.delivery_mode
;
2725 irq
->dest
= le32_to_cpu(irte
.irte
.dest_id
);
2726 if (!iommu
->intr_eime
) {
2727 #define VTD_IR_APIC_DEST_MASK (0xff00ULL)
2728 #define VTD_IR_APIC_DEST_SHIFT (8)
2729 irq
->dest
= (irq
->dest
& VTD_IR_APIC_DEST_MASK
) >>
2730 VTD_IR_APIC_DEST_SHIFT
;
2732 irq
->dest_mode
= irte
.irte
.dest_mode
;
2733 irq
->redir_hint
= irte
.irte
.redir_hint
;
2735 trace_vtd_ir_remap(index
, irq
->trigger_mode
, irq
->vector
,
2736 irq
->delivery_mode
, irq
->dest
, irq
->dest_mode
);
2741 /* Generate one MSI message from VTDIrq info */
2742 static void vtd_generate_msi_message(VTDIrq
*irq
, MSIMessage
*msg_out
)
2744 VTD_MSIMessage msg
= {};
2746 /* Generate address bits */
2747 msg
.dest_mode
= irq
->dest_mode
;
2748 msg
.redir_hint
= irq
->redir_hint
;
2749 msg
.dest
= irq
->dest
;
2750 msg
.__addr_hi
= irq
->dest
& 0xffffff00;
2751 msg
.__addr_head
= cpu_to_le32(0xfee);
2752 /* Keep this from original MSI address bits */
2753 msg
.__not_used
= irq
->msi_addr_last_bits
;
2755 /* Generate data bits */
2756 msg
.vector
= irq
->vector
;
2757 msg
.delivery_mode
= irq
->delivery_mode
;
2759 msg
.trigger_mode
= irq
->trigger_mode
;
2761 msg_out
->address
= msg
.msi_addr
;
2762 msg_out
->data
= msg
.msi_data
;
2765 /* Interrupt remapping for MSI/MSI-X entry */
2766 static int vtd_interrupt_remap_msi(IntelIOMMUState
*iommu
,
2768 MSIMessage
*translated
,
2772 VTD_IR_MSIAddress addr
;
2776 assert(origin
&& translated
);
2778 trace_vtd_ir_remap_msi_req(origin
->address
, origin
->data
);
2780 if (!iommu
|| !iommu
->intr_enabled
) {
2781 memcpy(translated
, origin
, sizeof(*origin
));
2785 if (origin
->address
& VTD_MSI_ADDR_HI_MASK
) {
2786 error_report_once("%s: MSI address high 32 bits non-zero detected: "
2787 "address=0x%" PRIx64
, __func__
, origin
->address
);
2788 return -VTD_FR_IR_REQ_RSVD
;
2791 addr
.data
= origin
->address
& VTD_MSI_ADDR_LO_MASK
;
2792 if (addr
.addr
.__head
!= 0xfee) {
2793 error_report_once("%s: MSI address low 32 bit invalid: 0x%" PRIx32
,
2794 __func__
, addr
.data
);
2795 return -VTD_FR_IR_REQ_RSVD
;
2798 /* This is compatible mode. */
2799 if (addr
.addr
.int_mode
!= VTD_IR_INT_FORMAT_REMAP
) {
2800 memcpy(translated
, origin
, sizeof(*origin
));
2804 index
= addr
.addr
.index_h
<< 15 | le16_to_cpu(addr
.addr
.index_l
);
2806 #define VTD_IR_MSI_DATA_SUBHANDLE (0x0000ffff)
2807 #define VTD_IR_MSI_DATA_RESERVED (0xffff0000)
2809 if (addr
.addr
.sub_valid
) {
2810 /* See VT-d spec 5.1.2.2 and 5.1.3 on subhandle */
2811 index
+= origin
->data
& VTD_IR_MSI_DATA_SUBHANDLE
;
2814 ret
= vtd_remap_irq_get(iommu
, index
, &irq
, sid
);
2819 if (addr
.addr
.sub_valid
) {
2820 trace_vtd_ir_remap_type("MSI");
2821 if (origin
->data
& VTD_IR_MSI_DATA_RESERVED
) {
2822 error_report_once("%s: invalid IR MSI "
2823 "(sid=%u, address=0x%" PRIx64
2824 ", data=0x%" PRIx32
")",
2825 __func__
, sid
, origin
->address
, origin
->data
);
2826 return -VTD_FR_IR_REQ_RSVD
;
2829 uint8_t vector
= origin
->data
& 0xff;
2830 uint8_t trigger_mode
= (origin
->data
>> MSI_DATA_TRIGGER_SHIFT
) & 0x1;
2832 trace_vtd_ir_remap_type("IOAPIC");
2833 /* IOAPIC entry vector should be aligned with IRTE vector
2834 * (see vt-d spec 5.1.5.1). */
2835 if (vector
!= irq
.vector
) {
2836 trace_vtd_warn_ir_vector(sid
, index
, vector
, irq
.vector
);
2839 /* The Trigger Mode field must match the Trigger Mode in the IRTE.
2840 * (see vt-d spec 5.1.5.1). */
2841 if (trigger_mode
!= irq
.trigger_mode
) {
2842 trace_vtd_warn_ir_trigger(sid
, index
, trigger_mode
,
2848 * We'd better keep the last two bits, assuming that guest OS
2849 * might modify it. Keep it does not hurt after all.
2851 irq
.msi_addr_last_bits
= addr
.addr
.__not_care
;
2853 /* Translate VTDIrq to MSI message */
2854 vtd_generate_msi_message(&irq
, translated
);
2857 trace_vtd_ir_remap_msi(origin
->address
, origin
->data
,
2858 translated
->address
, translated
->data
);
2862 static int vtd_int_remap(X86IOMMUState
*iommu
, MSIMessage
*src
,
2863 MSIMessage
*dst
, uint16_t sid
)
2865 return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu
),
2869 static MemTxResult
vtd_mem_ir_read(void *opaque
, hwaddr addr
,
2870 uint64_t *data
, unsigned size
,
2876 static MemTxResult
vtd_mem_ir_write(void *opaque
, hwaddr addr
,
2877 uint64_t value
, unsigned size
,
2881 MSIMessage from
= {}, to
= {};
2882 uint16_t sid
= X86_IOMMU_SID_INVALID
;
2884 from
.address
= (uint64_t) addr
+ VTD_INTERRUPT_ADDR_FIRST
;
2885 from
.data
= (uint32_t) value
;
2887 if (!attrs
.unspecified
) {
2888 /* We have explicit Source ID */
2889 sid
= attrs
.requester_id
;
2892 ret
= vtd_interrupt_remap_msi(opaque
, &from
, &to
, sid
);
2894 /* TODO: report error */
2895 /* Drop this interrupt */
2899 apic_get_class()->send_msi(&to
);
2904 static const MemoryRegionOps vtd_mem_ir_ops
= {
2905 .read_with_attrs
= vtd_mem_ir_read
,
2906 .write_with_attrs
= vtd_mem_ir_write
,
2907 .endianness
= DEVICE_LITTLE_ENDIAN
,
2909 .min_access_size
= 4,
2910 .max_access_size
= 4,
2913 .min_access_size
= 4,
2914 .max_access_size
= 4,
2918 VTDAddressSpace
*vtd_find_add_as(IntelIOMMUState
*s
, PCIBus
*bus
, int devfn
)
2920 uintptr_t key
= (uintptr_t)bus
;
2921 VTDBus
*vtd_bus
= g_hash_table_lookup(s
->vtd_as_by_busptr
, &key
);
2922 VTDAddressSpace
*vtd_dev_as
;
2926 uintptr_t *new_key
= g_malloc(sizeof(*new_key
));
2927 *new_key
= (uintptr_t)bus
;
2928 /* No corresponding free() */
2929 vtd_bus
= g_malloc0(sizeof(VTDBus
) + sizeof(VTDAddressSpace
*) * \
2932 g_hash_table_insert(s
->vtd_as_by_busptr
, new_key
, vtd_bus
);
2935 vtd_dev_as
= vtd_bus
->dev_as
[devfn
];
2938 snprintf(name
, sizeof(name
), "intel_iommu_devfn_%d", devfn
);
2939 vtd_bus
->dev_as
[devfn
] = vtd_dev_as
= g_malloc0(sizeof(VTDAddressSpace
));
2941 vtd_dev_as
->bus
= bus
;
2942 vtd_dev_as
->devfn
= (uint8_t)devfn
;
2943 vtd_dev_as
->iommu_state
= s
;
2944 vtd_dev_as
->context_cache_entry
.context_cache_gen
= 0;
2945 vtd_dev_as
->iova_tree
= iova_tree_new();
2948 * Memory region relationships looks like (Address range shows
2949 * only lower 32 bits to make it short in length...):
2951 * |-----------------+-------------------+----------|
2952 * | Name | Address range | Priority |
2953 * |-----------------+-------------------+----------+
2954 * | vtd_root | 00000000-ffffffff | 0 |
2955 * | intel_iommu | 00000000-ffffffff | 1 |
2956 * | vtd_sys_alias | 00000000-ffffffff | 1 |
2957 * | intel_iommu_ir | fee00000-feefffff | 64 |
2958 * |-----------------+-------------------+----------|
2960 * We enable/disable DMAR by switching enablement for
2961 * vtd_sys_alias and intel_iommu regions. IR region is always
2964 memory_region_init_iommu(&vtd_dev_as
->iommu
, sizeof(vtd_dev_as
->iommu
),
2965 TYPE_INTEL_IOMMU_MEMORY_REGION
, OBJECT(s
),
2968 memory_region_init_alias(&vtd_dev_as
->sys_alias
, OBJECT(s
),
2969 "vtd_sys_alias", get_system_memory(),
2970 0, memory_region_size(get_system_memory()));
2971 memory_region_init_io(&vtd_dev_as
->iommu_ir
, OBJECT(s
),
2972 &vtd_mem_ir_ops
, s
, "intel_iommu_ir",
2973 VTD_INTERRUPT_ADDR_SIZE
);
2974 memory_region_init(&vtd_dev_as
->root
, OBJECT(s
),
2975 "vtd_root", UINT64_MAX
);
2976 memory_region_add_subregion_overlap(&vtd_dev_as
->root
,
2977 VTD_INTERRUPT_ADDR_FIRST
,
2978 &vtd_dev_as
->iommu_ir
, 64);
2979 address_space_init(&vtd_dev_as
->as
, &vtd_dev_as
->root
, name
);
2980 memory_region_add_subregion_overlap(&vtd_dev_as
->root
, 0,
2981 &vtd_dev_as
->sys_alias
, 1);
2982 memory_region_add_subregion_overlap(&vtd_dev_as
->root
, 0,
2983 MEMORY_REGION(&vtd_dev_as
->iommu
),
2985 vtd_switch_address_space(vtd_dev_as
);
2990 /* Unmap the whole range in the notifier's scope. */
2991 static void vtd_address_space_unmap(VTDAddressSpace
*as
, IOMMUNotifier
*n
)
2993 IOMMUTLBEntry entry
;
2995 hwaddr start
= n
->start
;
2996 hwaddr end
= n
->end
;
2997 IntelIOMMUState
*s
= as
->iommu_state
;
3001 * Note: all the codes in this function has a assumption that IOVA
3002 * bits are no more than VTD_MGAW bits (which is restricted by
3003 * VT-d spec), otherwise we need to consider overflow of 64 bits.
3006 if (end
> VTD_ADDRESS_SIZE(s
->aw_bits
)) {
3008 * Don't need to unmap regions that is bigger than the whole
3009 * VT-d supported address space size
3011 end
= VTD_ADDRESS_SIZE(s
->aw_bits
);
3014 assert(start
<= end
);
3017 if (ctpop64(size
) != 1) {
3019 * This size cannot format a correct mask. Let's enlarge it to
3020 * suite the minimum available mask.
3022 int n
= 64 - clz64(size
);
3023 if (n
> s
->aw_bits
) {
3024 /* should not happen, but in case it happens, limit it */
3030 entry
.target_as
= &address_space_memory
;
3031 /* Adjust iova for the size */
3032 entry
.iova
= n
->start
& ~(size
- 1);
3033 /* This field is meaningless for unmap */
3034 entry
.translated_addr
= 0;
3035 entry
.perm
= IOMMU_NONE
;
3036 entry
.addr_mask
= size
- 1;
3038 trace_vtd_as_unmap_whole(pci_bus_num(as
->bus
),
3039 VTD_PCI_SLOT(as
->devfn
),
3040 VTD_PCI_FUNC(as
->devfn
),
3043 map
.iova
= entry
.iova
;
3044 map
.size
= entry
.addr_mask
;
3045 iova_tree_remove(as
->iova_tree
, &map
);
3047 memory_region_notify_one(n
, &entry
);
3050 static void vtd_address_space_unmap_all(IntelIOMMUState
*s
)
3052 VTDAddressSpace
*vtd_as
;
3055 QLIST_FOREACH(vtd_as
, &s
->vtd_as_with_notifiers
, next
) {
3056 IOMMU_NOTIFIER_FOREACH(n
, &vtd_as
->iommu
) {
3057 vtd_address_space_unmap(vtd_as
, n
);
3062 static int vtd_replay_hook(IOMMUTLBEntry
*entry
, void *private)
3064 memory_region_notify_one((IOMMUNotifier
*)private, entry
);
3068 static void vtd_iommu_replay(IOMMUMemoryRegion
*iommu_mr
, IOMMUNotifier
*n
)
3070 VTDAddressSpace
*vtd_as
= container_of(iommu_mr
, VTDAddressSpace
, iommu
);
3071 IntelIOMMUState
*s
= vtd_as
->iommu_state
;
3072 uint8_t bus_n
= pci_bus_num(vtd_as
->bus
);
3076 * The replay can be triggered by either a invalidation or a newly
3077 * created entry. No matter what, we release existing mappings
3078 * (it means flushing caches for UNMAP-only registers).
3080 vtd_address_space_unmap(vtd_as
, n
);
3082 if (vtd_dev_to_context_entry(s
, bus_n
, vtd_as
->devfn
, &ce
) == 0) {
3083 trace_vtd_replay_ce_valid(bus_n
, PCI_SLOT(vtd_as
->devfn
),
3084 PCI_FUNC(vtd_as
->devfn
),
3085 VTD_CONTEXT_ENTRY_DID(ce
.hi
),
3087 if (vtd_as_has_map_notifier(vtd_as
)) {
3088 /* This is required only for MAP typed notifiers */
3089 vtd_page_walk_info info
= {
3090 .hook_fn
= vtd_replay_hook
,
3091 .private = (void *)n
,
3092 .notify_unmap
= false,
3095 .domain_id
= VTD_CONTEXT_ENTRY_DID(ce
.hi
),
3098 vtd_page_walk(&ce
, 0, ~0ULL, &info
);
3101 trace_vtd_replay_ce_invalid(bus_n
, PCI_SLOT(vtd_as
->devfn
),
3102 PCI_FUNC(vtd_as
->devfn
));
3108 /* Do the initialization. It will also be called when reset, so pay
3109 * attention when adding new initialization stuff.
3111 static void vtd_init(IntelIOMMUState
*s
)
3113 X86IOMMUState
*x86_iommu
= X86_IOMMU_DEVICE(s
);
3115 memset(s
->csr
, 0, DMAR_REG_SIZE
);
3116 memset(s
->wmask
, 0, DMAR_REG_SIZE
);
3117 memset(s
->w1cmask
, 0, DMAR_REG_SIZE
);
3118 memset(s
->womask
, 0, DMAR_REG_SIZE
);
3121 s
->root_extended
= false;
3122 s
->dmar_enabled
= false;
3127 s
->qi_enabled
= false;
3128 s
->iq_last_desc_type
= VTD_INV_DESC_NONE
;
3129 s
->next_frcd_reg
= 0;
3130 s
->cap
= VTD_CAP_FRO
| VTD_CAP_NFR
| VTD_CAP_ND
|
3131 VTD_CAP_MAMV
| VTD_CAP_PSI
| VTD_CAP_SLLPS
|
3132 VTD_CAP_SAGAW_39bit
| VTD_CAP_MGAW(s
->aw_bits
);
3133 if (s
->aw_bits
== VTD_HOST_AW_48BIT
) {
3134 s
->cap
|= VTD_CAP_SAGAW_48bit
;
3136 s
->ecap
= VTD_ECAP_QI
| VTD_ECAP_IRO
;
3139 * Rsvd field masks for spte
3141 vtd_paging_entry_rsvd_field
[0] = ~0ULL;
3142 vtd_paging_entry_rsvd_field
[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s
->aw_bits
);
3143 vtd_paging_entry_rsvd_field
[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s
->aw_bits
);
3144 vtd_paging_entry_rsvd_field
[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s
->aw_bits
);
3145 vtd_paging_entry_rsvd_field
[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s
->aw_bits
);
3146 vtd_paging_entry_rsvd_field
[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s
->aw_bits
);
3147 vtd_paging_entry_rsvd_field
[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s
->aw_bits
);
3148 vtd_paging_entry_rsvd_field
[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s
->aw_bits
);
3149 vtd_paging_entry_rsvd_field
[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s
->aw_bits
);
3151 if (x86_iommu
->intr_supported
) {
3152 s
->ecap
|= VTD_ECAP_IR
| VTD_ECAP_MHMV
;
3153 if (s
->intr_eim
== ON_OFF_AUTO_ON
) {
3154 s
->ecap
|= VTD_ECAP_EIM
;
3156 assert(s
->intr_eim
!= ON_OFF_AUTO_AUTO
);
3159 if (x86_iommu
->dt_supported
) {
3160 s
->ecap
|= VTD_ECAP_DT
;
3163 if (x86_iommu
->pt_supported
) {
3164 s
->ecap
|= VTD_ECAP_PT
;
3167 if (s
->caching_mode
) {
3168 s
->cap
|= VTD_CAP_CM
;
3171 vtd_reset_caches(s
);
3173 /* Define registers with default values and bit semantics */
3174 vtd_define_long(s
, DMAR_VER_REG
, 0x10UL
, 0, 0);
3175 vtd_define_quad(s
, DMAR_CAP_REG
, s
->cap
, 0, 0);
3176 vtd_define_quad(s
, DMAR_ECAP_REG
, s
->ecap
, 0, 0);
3177 vtd_define_long(s
, DMAR_GCMD_REG
, 0, 0xff800000UL
, 0);
3178 vtd_define_long_wo(s
, DMAR_GCMD_REG
, 0xff800000UL
);
3179 vtd_define_long(s
, DMAR_GSTS_REG
, 0, 0, 0);
3180 vtd_define_quad(s
, DMAR_RTADDR_REG
, 0, 0xfffffffffffff000ULL
, 0);
3181 vtd_define_quad(s
, DMAR_CCMD_REG
, 0, 0xe0000003ffffffffULL
, 0);
3182 vtd_define_quad_wo(s
, DMAR_CCMD_REG
, 0x3ffff0000ULL
);
3184 /* Advanced Fault Logging not supported */
3185 vtd_define_long(s
, DMAR_FSTS_REG
, 0, 0, 0x11UL
);
3186 vtd_define_long(s
, DMAR_FECTL_REG
, 0x80000000UL
, 0x80000000UL
, 0);
3187 vtd_define_long(s
, DMAR_FEDATA_REG
, 0, 0x0000ffffUL
, 0);
3188 vtd_define_long(s
, DMAR_FEADDR_REG
, 0, 0xfffffffcUL
, 0);
3190 /* Treated as RsvdZ when EIM in ECAP_REG is not supported
3191 * vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0xffffffffUL, 0);
3193 vtd_define_long(s
, DMAR_FEUADDR_REG
, 0, 0, 0);
3195 /* Treated as RO for implementations that PLMR and PHMR fields reported
3196 * as Clear in the CAP_REG.
3197 * vtd_define_long(s, DMAR_PMEN_REG, 0, 0x80000000UL, 0);
3199 vtd_define_long(s
, DMAR_PMEN_REG
, 0, 0, 0);
3201 vtd_define_quad(s
, DMAR_IQH_REG
, 0, 0, 0);
3202 vtd_define_quad(s
, DMAR_IQT_REG
, 0, 0x7fff0ULL
, 0);
3203 vtd_define_quad(s
, DMAR_IQA_REG
, 0, 0xfffffffffffff007ULL
, 0);
3204 vtd_define_long(s
, DMAR_ICS_REG
, 0, 0, 0x1UL
);
3205 vtd_define_long(s
, DMAR_IECTL_REG
, 0x80000000UL
, 0x80000000UL
, 0);
3206 vtd_define_long(s
, DMAR_IEDATA_REG
, 0, 0xffffffffUL
, 0);
3207 vtd_define_long(s
, DMAR_IEADDR_REG
, 0, 0xfffffffcUL
, 0);
3208 /* Treadted as RsvdZ when EIM in ECAP_REG is not supported */
3209 vtd_define_long(s
, DMAR_IEUADDR_REG
, 0, 0, 0);
3211 /* IOTLB registers */
3212 vtd_define_quad(s
, DMAR_IOTLB_REG
, 0, 0Xb003ffff00000000ULL
, 0);
3213 vtd_define_quad(s
, DMAR_IVA_REG
, 0, 0xfffffffffffff07fULL
, 0);
3214 vtd_define_quad_wo(s
, DMAR_IVA_REG
, 0xfffffffffffff07fULL
);
3216 /* Fault Recording Registers, 128-bit */
3217 vtd_define_quad(s
, DMAR_FRCD_REG_0_0
, 0, 0, 0);
3218 vtd_define_quad(s
, DMAR_FRCD_REG_0_2
, 0, 0, 0x8000000000000000ULL
);
3221 * Interrupt remapping registers.
3223 vtd_define_quad(s
, DMAR_IRTA_REG
, 0, 0xfffffffffffff80fULL
, 0);
3226 /* Should not reset address_spaces when reset because devices will still use
3227 * the address space they got at first (won't ask the bus again).
3229 static void vtd_reset(DeviceState
*dev
)
3231 IntelIOMMUState
*s
= INTEL_IOMMU_DEVICE(dev
);
3236 * When device reset, throw away all mappings and external caches
3238 vtd_address_space_unmap_all(s
);
3241 static AddressSpace
*vtd_host_dma_iommu(PCIBus
*bus
, void *opaque
, int devfn
)
3243 IntelIOMMUState
*s
= opaque
;
3244 VTDAddressSpace
*vtd_as
;
3246 assert(0 <= devfn
&& devfn
< PCI_DEVFN_MAX
);
3248 vtd_as
= vtd_find_add_as(s
, bus
, devfn
);
3252 static bool vtd_decide_config(IntelIOMMUState
*s
, Error
**errp
)
3254 X86IOMMUState
*x86_iommu
= X86_IOMMU_DEVICE(s
);
3256 /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */
3257 if (x86_iommu
->intr_supported
&& kvm_irqchip_in_kernel() &&
3258 !kvm_irqchip_is_split()) {
3259 error_setg(errp
, "Intel Interrupt Remapping cannot work with "
3260 "kernel-irqchip=on, please use 'split|off'.");
3263 if (s
->intr_eim
== ON_OFF_AUTO_ON
&& !x86_iommu
->intr_supported
) {
3264 error_setg(errp
, "eim=on cannot be selected without intremap=on");
3268 if (s
->intr_eim
== ON_OFF_AUTO_AUTO
) {
3269 s
->intr_eim
= (kvm_irqchip_in_kernel() || s
->buggy_eim
)
3270 && x86_iommu
->intr_supported
?
3271 ON_OFF_AUTO_ON
: ON_OFF_AUTO_OFF
;
3273 if (s
->intr_eim
== ON_OFF_AUTO_ON
&& !s
->buggy_eim
) {
3274 if (!kvm_irqchip_in_kernel()) {
3275 error_setg(errp
, "eim=on requires accel=kvm,kernel-irqchip=split");
3278 if (!kvm_enable_x2apic()) {
3279 error_setg(errp
, "eim=on requires support on the KVM side"
3280 "(X2APIC_API, first shipped in v4.7)");
3285 /* Currently only address widths supported are 39 and 48 bits */
3286 if ((s
->aw_bits
!= VTD_HOST_AW_39BIT
) &&
3287 (s
->aw_bits
!= VTD_HOST_AW_48BIT
)) {
3288 error_setg(errp
, "Supported values for x-aw-bits are: %d, %d",
3289 VTD_HOST_AW_39BIT
, VTD_HOST_AW_48BIT
);
3296 static void vtd_realize(DeviceState
*dev
, Error
**errp
)
3298 MachineState
*ms
= MACHINE(qdev_get_machine());
3299 PCMachineState
*pcms
= PC_MACHINE(ms
);
3300 PCIBus
*bus
= pcms
->bus
;
3301 IntelIOMMUState
*s
= INTEL_IOMMU_DEVICE(dev
);
3302 X86IOMMUState
*x86_iommu
= X86_IOMMU_DEVICE(dev
);
3304 x86_iommu
->type
= TYPE_INTEL
;
3306 if (!vtd_decide_config(s
, errp
)) {
3310 QLIST_INIT(&s
->vtd_as_with_notifiers
);
3311 qemu_mutex_init(&s
->iommu_lock
);
3312 memset(s
->vtd_as_by_bus_num
, 0, sizeof(s
->vtd_as_by_bus_num
));
3313 memory_region_init_io(&s
->csrmem
, OBJECT(s
), &vtd_mem_ops
, s
,
3314 "intel_iommu", DMAR_REG_SIZE
);
3315 sysbus_init_mmio(SYS_BUS_DEVICE(s
), &s
->csrmem
);
3316 /* No corresponding destroy */
3317 s
->iotlb
= g_hash_table_new_full(vtd_uint64_hash
, vtd_uint64_equal
,
3319 s
->vtd_as_by_busptr
= g_hash_table_new_full(vtd_uint64_hash
, vtd_uint64_equal
,
3322 sysbus_mmio_map(SYS_BUS_DEVICE(s
), 0, Q35_HOST_BRIDGE_IOMMU_ADDR
);
3323 pci_setup_iommu(bus
, vtd_host_dma_iommu
, dev
);
3324 /* Pseudo address space under root PCI bus. */
3325 pcms
->ioapic_as
= vtd_host_dma_iommu(bus
, s
, Q35_PSEUDO_DEVFN_IOAPIC
);
3328 static void vtd_class_init(ObjectClass
*klass
, void *data
)
3330 DeviceClass
*dc
= DEVICE_CLASS(klass
);
3331 X86IOMMUClass
*x86_class
= X86_IOMMU_CLASS(klass
);
3333 dc
->reset
= vtd_reset
;
3334 dc
->vmsd
= &vtd_vmstate
;
3335 dc
->props
= vtd_properties
;
3336 dc
->hotpluggable
= false;
3337 x86_class
->realize
= vtd_realize
;
3338 x86_class
->int_remap
= vtd_int_remap
;
3339 /* Supported by the pc-q35-* machine types */
3340 dc
->user_creatable
= true;
3343 static const TypeInfo vtd_info
= {
3344 .name
= TYPE_INTEL_IOMMU_DEVICE
,
3345 .parent
= TYPE_X86_IOMMU_DEVICE
,
3346 .instance_size
= sizeof(IntelIOMMUState
),
3347 .class_init
= vtd_class_init
,
3350 static void vtd_iommu_memory_region_class_init(ObjectClass
*klass
,
3353 IOMMUMemoryRegionClass
*imrc
= IOMMU_MEMORY_REGION_CLASS(klass
);
3355 imrc
->translate
= vtd_iommu_translate
;
3356 imrc
->notify_flag_changed
= vtd_iommu_notify_flag_changed
;
3357 imrc
->replay
= vtd_iommu_replay
;
3360 static const TypeInfo vtd_iommu_memory_region_info
= {
3361 .parent
= TYPE_IOMMU_MEMORY_REGION
,
3362 .name
= TYPE_INTEL_IOMMU_MEMORY_REGION
,
3363 .class_init
= vtd_iommu_memory_region_class_init
,
3366 static void vtd_register_types(void)
3368 type_register_static(&vtd_info
);
3369 type_register_static(&vtd_iommu_memory_region_info
);
3372 type_init(vtd_register_types
)