]> git.proxmox.com Git - mirror_qemu.git/blame - hw/i386/intel_iommu.c
tests/q35-test: add TSEG size checks
[mirror_qemu.git] / hw / i386 / intel_iommu.c
CommitLineData
1da12ec4
LT
1/*
2 * QEMU emulation of an Intel IOMMU (VT-d)
3 * (DMA Remapping device)
4 *
5 * Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com>
6 * Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21
b6a0aa05 22#include "qemu/osdep.h"
4684a204 23#include "qemu/error-report.h"
6333e93c 24#include "qapi/error.h"
1da12ec4
LT
25#include "hw/sysbus.h"
26#include "exec/address-spaces.h"
27#include "intel_iommu_internal.h"
7df953bd 28#include "hw/pci/pci.h"
3cb3b154 29#include "hw/pci/pci_bus.h"
621d983a 30#include "hw/i386/pc.h"
dea651a9 31#include "hw/i386/apic-msidef.h"
04af0e18
PX
32#include "hw/boards.h"
33#include "hw/i386/x86-iommu.h"
cb135f59 34#include "hw/pci-host/q35.h"
4684a204 35#include "sysemu/kvm.h"
32946019 36#include "hw/i386/apic_internal.h"
fb506e70 37#include "kvm_i386.h"
bc535e59 38#include "trace.h"
1da12ec4
LT
39
40/*#define DEBUG_INTEL_IOMMU*/
41#ifdef DEBUG_INTEL_IOMMU
42enum {
43 DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG,
a5861439 44 DEBUG_CACHE, DEBUG_IR,
1da12ec4
LT
45};
46#define VTD_DBGBIT(x) (1 << DEBUG_##x)
47static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
48
49#define VTD_DPRINTF(what, fmt, ...) do { \
50 if (vtd_dbgflags & VTD_DBGBIT(what)) { \
51 fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \
52 ## __VA_ARGS__); } \
53 } while (0)
54#else
55#define VTD_DPRINTF(what, fmt, ...) do {} while (0)
56#endif
57
58static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
59 uint64_t wmask, uint64_t w1cmask)
60{
61 stq_le_p(&s->csr[addr], val);
62 stq_le_p(&s->wmask[addr], wmask);
63 stq_le_p(&s->w1cmask[addr], w1cmask);
64}
65
66static void vtd_define_quad_wo(IntelIOMMUState *s, hwaddr addr, uint64_t mask)
67{
68 stq_le_p(&s->womask[addr], mask);
69}
70
71static void vtd_define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val,
72 uint32_t wmask, uint32_t w1cmask)
73{
74 stl_le_p(&s->csr[addr], val);
75 stl_le_p(&s->wmask[addr], wmask);
76 stl_le_p(&s->w1cmask[addr], w1cmask);
77}
78
79static void vtd_define_long_wo(IntelIOMMUState *s, hwaddr addr, uint32_t mask)
80{
81 stl_le_p(&s->womask[addr], mask);
82}
83
84/* "External" get/set operations */
85static void vtd_set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val)
86{
87 uint64_t oldval = ldq_le_p(&s->csr[addr]);
88 uint64_t wmask = ldq_le_p(&s->wmask[addr]);
89 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
90 stq_le_p(&s->csr[addr],
91 ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
92}
93
94static void vtd_set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val)
95{
96 uint32_t oldval = ldl_le_p(&s->csr[addr]);
97 uint32_t wmask = ldl_le_p(&s->wmask[addr]);
98 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
99 stl_le_p(&s->csr[addr],
100 ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
101}
102
103static uint64_t vtd_get_quad(IntelIOMMUState *s, hwaddr addr)
104{
105 uint64_t val = ldq_le_p(&s->csr[addr]);
106 uint64_t womask = ldq_le_p(&s->womask[addr]);
107 return val & ~womask;
108}
109
110static uint32_t vtd_get_long(IntelIOMMUState *s, hwaddr addr)
111{
112 uint32_t val = ldl_le_p(&s->csr[addr]);
113 uint32_t womask = ldl_le_p(&s->womask[addr]);
114 return val & ~womask;
115}
116
117/* "Internal" get/set operations */
118static uint64_t vtd_get_quad_raw(IntelIOMMUState *s, hwaddr addr)
119{
120 return ldq_le_p(&s->csr[addr]);
121}
122
123static uint32_t vtd_get_long_raw(IntelIOMMUState *s, hwaddr addr)
124{
125 return ldl_le_p(&s->csr[addr]);
126}
127
128static void vtd_set_quad_raw(IntelIOMMUState *s, hwaddr addr, uint64_t val)
129{
130 stq_le_p(&s->csr[addr], val);
131}
132
133static uint32_t vtd_set_clear_mask_long(IntelIOMMUState *s, hwaddr addr,
134 uint32_t clear, uint32_t mask)
135{
136 uint32_t new_val = (ldl_le_p(&s->csr[addr]) & ~clear) | mask;
137 stl_le_p(&s->csr[addr], new_val);
138 return new_val;
139}
140
141static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
142 uint64_t clear, uint64_t mask)
143{
144 uint64_t new_val = (ldq_le_p(&s->csr[addr]) & ~clear) | mask;
145 stq_le_p(&s->csr[addr], new_val);
146 return new_val;
147}
148
b5a280c0
LT
149/* GHashTable functions */
150static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
151{
152 return *((const uint64_t *)v1) == *((const uint64_t *)v2);
153}
154
155static guint vtd_uint64_hash(gconstpointer v)
156{
157 return (guint)*(const uint64_t *)v;
158}
159
160static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
161 gpointer user_data)
162{
163 VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
164 uint16_t domain_id = *(uint16_t *)user_data;
165 return entry->domain_id == domain_id;
166}
167
d66b969b
JW
168/* The shift of an addr for a certain level of paging structure */
169static inline uint32_t vtd_slpt_level_shift(uint32_t level)
170{
7e58326a 171 assert(level != 0);
d66b969b
JW
172 return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
173}
174
175static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
176{
177 return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
178}
179
b5a280c0
LT
180static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
181 gpointer user_data)
182{
183 VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
184 VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
d66b969b
JW
185 uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
186 uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
b5a280c0 187 return (entry->domain_id == info->domain_id) &&
d66b969b
JW
188 (((entry->gfn & info->mask) == gfn) ||
189 (entry->gfn == gfn_tlb));
b5a280c0
LT
190}
191
d92fa2dc
LT
192/* Reset all the gen of VTDAddressSpace to zero and set the gen of
193 * IntelIOMMUState to 1.
194 */
195static void vtd_reset_context_cache(IntelIOMMUState *s)
196{
d92fa2dc 197 VTDAddressSpace *vtd_as;
7df953bd
KO
198 VTDBus *vtd_bus;
199 GHashTableIter bus_it;
d92fa2dc
LT
200 uint32_t devfn_it;
201
7df953bd
KO
202 g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
203
d92fa2dc 204 VTD_DPRINTF(CACHE, "global context_cache_gen=1");
7df953bd 205 while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
04af0e18 206 for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
7df953bd 207 vtd_as = vtd_bus->dev_as[devfn_it];
d92fa2dc
LT
208 if (!vtd_as) {
209 continue;
210 }
211 vtd_as->context_cache_entry.context_cache_gen = 0;
212 }
213 }
214 s->context_cache_gen = 1;
215}
216
b5a280c0
LT
217static void vtd_reset_iotlb(IntelIOMMUState *s)
218{
219 assert(s->iotlb);
220 g_hash_table_remove_all(s->iotlb);
221}
222
bacabb0a 223static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
d66b969b
JW
224 uint32_t level)
225{
226 return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) |
227 ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT);
228}
229
230static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
231{
232 return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
233}
234
b5a280c0
LT
235static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
236 hwaddr addr)
237{
d66b969b 238 VTDIOTLBEntry *entry;
b5a280c0 239 uint64_t key;
d66b969b
JW
240 int level;
241
242 for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
243 key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level),
244 source_id, level);
245 entry = g_hash_table_lookup(s->iotlb, &key);
246 if (entry) {
247 goto out;
248 }
249 }
b5a280c0 250
d66b969b
JW
251out:
252 return entry;
b5a280c0
LT
253}
254
255static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
256 uint16_t domain_id, hwaddr addr, uint64_t slpte,
d66b969b
JW
257 bool read_flags, bool write_flags,
258 uint32_t level)
b5a280c0
LT
259{
260 VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
261 uint64_t *key = g_malloc(sizeof(*key));
d66b969b 262 uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
b5a280c0 263
6c441e1d 264 trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
b5a280c0 265 if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
6c441e1d 266 trace_vtd_iotlb_reset("iotlb exceeds size limit");
b5a280c0
LT
267 vtd_reset_iotlb(s);
268 }
269
270 entry->gfn = gfn;
271 entry->domain_id = domain_id;
272 entry->slpte = slpte;
273 entry->read_flags = read_flags;
274 entry->write_flags = write_flags;
d66b969b
JW
275 entry->mask = vtd_slpt_level_page_mask(level);
276 *key = vtd_get_iotlb_key(gfn, source_id, level);
b5a280c0
LT
277 g_hash_table_replace(s->iotlb, key, entry);
278}
279
1da12ec4
LT
280/* Given the reg addr of both the message data and address, generate an
281 * interrupt via MSI.
282 */
283static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg,
284 hwaddr mesg_data_reg)
285{
32946019 286 MSIMessage msi;
1da12ec4
LT
287
288 assert(mesg_data_reg < DMAR_REG_SIZE);
289 assert(mesg_addr_reg < DMAR_REG_SIZE);
290
32946019
RK
291 msi.address = vtd_get_long_raw(s, mesg_addr_reg);
292 msi.data = vtd_get_long_raw(s, mesg_data_reg);
1da12ec4 293
32946019
RK
294 VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32,
295 msi.address, msi.data);
296 apic_get_class()->send_msi(&msi);
1da12ec4
LT
297}
298
299/* Generate a fault event to software via MSI if conditions are met.
300 * Notice that the value of FSTS_REG being passed to it should be the one
301 * before any update.
302 */
303static void vtd_generate_fault_event(IntelIOMMUState *s, uint32_t pre_fsts)
304{
305 if (pre_fsts & VTD_FSTS_PPF || pre_fsts & VTD_FSTS_PFO ||
306 pre_fsts & VTD_FSTS_IQE) {
307 VTD_DPRINTF(FLOG, "there are previous interrupt conditions "
308 "to be serviced by software, fault event is not generated "
309 "(FSTS_REG 0x%"PRIx32 ")", pre_fsts);
310 return;
311 }
312 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, 0, VTD_FECTL_IP);
313 if (vtd_get_long_raw(s, DMAR_FECTL_REG) & VTD_FECTL_IM) {
314 VTD_DPRINTF(FLOG, "Interrupt Mask set, fault event is not generated");
315 } else {
316 vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG);
317 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
318 }
319}
320
321/* Check if the Fault (F) field of the Fault Recording Register referenced by
322 * @index is Set.
323 */
324static bool vtd_is_frcd_set(IntelIOMMUState *s, uint16_t index)
325{
326 /* Each reg is 128-bit */
327 hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
328 addr += 8; /* Access the high 64-bit half */
329
330 assert(index < DMAR_FRCD_REG_NR);
331
332 return vtd_get_quad_raw(s, addr) & VTD_FRCD_F;
333}
334
335/* Update the PPF field of Fault Status Register.
336 * Should be called whenever change the F field of any fault recording
337 * registers.
338 */
339static void vtd_update_fsts_ppf(IntelIOMMUState *s)
340{
341 uint32_t i;
342 uint32_t ppf_mask = 0;
343
344 for (i = 0; i < DMAR_FRCD_REG_NR; i++) {
345 if (vtd_is_frcd_set(s, i)) {
346 ppf_mask = VTD_FSTS_PPF;
347 break;
348 }
349 }
350 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_PPF, ppf_mask);
351 VTD_DPRINTF(FLOG, "set PPF of FSTS_REG to %d", ppf_mask ? 1 : 0);
352}
353
354static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
355{
356 /* Each reg is 128-bit */
357 hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
358 addr += 8; /* Access the high 64-bit half */
359
360 assert(index < DMAR_FRCD_REG_NR);
361
362 vtd_set_clear_mask_quad(s, addr, 0, VTD_FRCD_F);
363 vtd_update_fsts_ppf(s);
364}
365
366/* Must not update F field now, should be done later */
367static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
368 uint16_t source_id, hwaddr addr,
369 VTDFaultReason fault, bool is_write)
370{
371 uint64_t hi = 0, lo;
372 hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
373
374 assert(index < DMAR_FRCD_REG_NR);
375
376 lo = VTD_FRCD_FI(addr);
377 hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
378 if (!is_write) {
379 hi |= VTD_FRCD_T;
380 }
381 vtd_set_quad_raw(s, frcd_reg_addr, lo);
382 vtd_set_quad_raw(s, frcd_reg_addr + 8, hi);
383 VTD_DPRINTF(FLOG, "record to FRCD_REG #%"PRIu16 ": hi 0x%"PRIx64
384 ", lo 0x%"PRIx64, index, hi, lo);
385}
386
387/* Try to collapse multiple pending faults from the same requester */
388static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
389{
390 uint32_t i;
391 uint64_t frcd_reg;
392 hwaddr addr = DMAR_FRCD_REG_OFFSET + 8; /* The high 64-bit half */
393
394 for (i = 0; i < DMAR_FRCD_REG_NR; i++) {
395 frcd_reg = vtd_get_quad_raw(s, addr);
396 VTD_DPRINTF(FLOG, "frcd_reg #%d 0x%"PRIx64, i, frcd_reg);
397 if ((frcd_reg & VTD_FRCD_F) &&
398 ((frcd_reg & VTD_FRCD_SID_MASK) == source_id)) {
399 return true;
400 }
401 addr += 16; /* 128-bit for each */
402 }
403 return false;
404}
405
406/* Log and report an DMAR (address translation) fault to software */
407static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
408 hwaddr addr, VTDFaultReason fault,
409 bool is_write)
410{
411 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
412
413 assert(fault < VTD_FR_MAX);
414
415 if (fault == VTD_FR_RESERVED_ERR) {
416 /* This is not a normal fault reason case. Drop it. */
417 return;
418 }
419 VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64
420 ", is_write %d", source_id, fault, addr, is_write);
421 if (fsts_reg & VTD_FSTS_PFO) {
422 VTD_DPRINTF(FLOG, "new fault is not recorded due to "
423 "Primary Fault Overflow");
424 return;
425 }
426 if (vtd_try_collapse_fault(s, source_id)) {
427 VTD_DPRINTF(FLOG, "new fault is not recorded due to "
428 "compression of faults");
429 return;
430 }
431 if (vtd_is_frcd_set(s, s->next_frcd_reg)) {
432 VTD_DPRINTF(FLOG, "Primary Fault Overflow and "
433 "new fault is not recorded, set PFO field");
434 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_PFO);
435 return;
436 }
437
438 vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write);
439
440 if (fsts_reg & VTD_FSTS_PPF) {
441 VTD_DPRINTF(FLOG, "there are pending faults already, "
442 "fault event is not generated");
443 vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg);
444 s->next_frcd_reg++;
445 if (s->next_frcd_reg == DMAR_FRCD_REG_NR) {
446 s->next_frcd_reg = 0;
447 }
448 } else {
449 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_FRI_MASK,
450 VTD_FSTS_FRI(s->next_frcd_reg));
451 vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg); /* Will set PPF */
452 s->next_frcd_reg++;
453 if (s->next_frcd_reg == DMAR_FRCD_REG_NR) {
454 s->next_frcd_reg = 0;
455 }
456 /* This case actually cause the PPF to be Set.
457 * So generate fault event (interrupt).
458 */
459 vtd_generate_fault_event(s, fsts_reg);
460 }
461}
462
ed7b8fbc
LT
463/* Handle Invalidation Queue Errors of queued invalidation interface error
464 * conditions.
465 */
466static void vtd_handle_inv_queue_error(IntelIOMMUState *s)
467{
468 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
469
470 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_IQE);
471 vtd_generate_fault_event(s, fsts_reg);
472}
473
474/* Set the IWC field and try to generate an invalidation completion interrupt */
475static void vtd_generate_completion_event(IntelIOMMUState *s)
476{
ed7b8fbc 477 if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) {
bc535e59 478 trace_vtd_inv_desc_wait_irq("One pending, skip current");
ed7b8fbc
LT
479 return;
480 }
481 vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC);
482 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP);
483 if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) {
bc535e59
PX
484 trace_vtd_inv_desc_wait_irq("IM in IECTL_REG is set, "
485 "new event not generated");
ed7b8fbc
LT
486 return;
487 } else {
488 /* Generate the interrupt event */
bc535e59 489 trace_vtd_inv_desc_wait_irq("Generating complete event");
ed7b8fbc
LT
490 vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
491 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
492 }
493}
494
1da12ec4
LT
495static inline bool vtd_root_entry_present(VTDRootEntry *root)
496{
497 return root->val & VTD_ROOT_ENTRY_P;
498}
499
500static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,
501 VTDRootEntry *re)
502{
503 dma_addr_t addr;
504
505 addr = s->root + index * sizeof(*re);
506 if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) {
6c441e1d 507 trace_vtd_re_invalid(re->rsvd, re->val);
1da12ec4
LT
508 re->val = 0;
509 return -VTD_FR_ROOT_TABLE_INV;
510 }
511 re->val = le64_to_cpu(re->val);
512 return 0;
513}
514
8f7d7161 515static inline bool vtd_ce_present(VTDContextEntry *context)
1da12ec4
LT
516{
517 return context->lo & VTD_CONTEXT_ENTRY_P;
518}
519
520static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index,
521 VTDContextEntry *ce)
522{
523 dma_addr_t addr;
524
6c441e1d 525 /* we have checked that root entry is present */
1da12ec4
LT
526 addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce);
527 if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) {
6c441e1d 528 trace_vtd_re_invalid(root->rsvd, root->val);
1da12ec4
LT
529 return -VTD_FR_CONTEXT_TABLE_INV;
530 }
531 ce->lo = le64_to_cpu(ce->lo);
532 ce->hi = le64_to_cpu(ce->hi);
533 return 0;
534}
535
8f7d7161 536static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce)
1da12ec4
LT
537{
538 return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
539}
540
1da12ec4
LT
541static inline uint64_t vtd_get_slpte_addr(uint64_t slpte)
542{
543 return slpte & VTD_SL_PT_BASE_ADDR_MASK;
544}
545
546/* Whether the pte indicates the address of the page frame */
547static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level)
548{
549 return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK);
550}
551
552/* Get the content of a spte located in @base_addr[@index] */
553static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
554{
555 uint64_t slpte;
556
557 assert(index < VTD_SL_PT_ENTRY_NR);
558
559 if (dma_memory_read(&address_space_memory,
560 base_addr + index * sizeof(slpte), &slpte,
561 sizeof(slpte))) {
562 slpte = (uint64_t)-1;
563 return slpte;
564 }
565 slpte = le64_to_cpu(slpte);
566 return slpte;
567}
568
6e905564
PX
569/* Given an iova and the level of paging structure, return the offset
570 * of current level.
1da12ec4 571 */
6e905564 572static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level)
1da12ec4 573{
6e905564 574 return (iova >> vtd_slpt_level_shift(level)) &
1da12ec4
LT
575 ((1ULL << VTD_SL_LEVEL_BITS) - 1);
576}
577
578/* Check Capability Register to see if the @level of page-table is supported */
579static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level)
580{
581 return VTD_CAP_SAGAW_MASK & s->cap &
582 (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT));
583}
584
585/* Get the page-table level that hardware should use for the second-level
586 * page-table walk from the Address Width field of context-entry.
587 */
8f7d7161 588static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce)
1da12ec4
LT
589{
590 return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW);
591}
592
8f7d7161 593static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce)
1da12ec4
LT
594{
595 return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9;
596}
597
127ff5c3
PX
598static inline uint32_t vtd_ce_get_type(VTDContextEntry *ce)
599{
600 return ce->lo & VTD_CONTEXT_ENTRY_TT;
601}
602
f80c9874
PX
603/* Return true if check passed, otherwise false */
604static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
605 VTDContextEntry *ce)
606{
607 switch (vtd_ce_get_type(ce)) {
608 case VTD_CONTEXT_TT_MULTI_LEVEL:
609 /* Always supported */
610 break;
611 case VTD_CONTEXT_TT_DEV_IOTLB:
612 if (!x86_iommu->dt_supported) {
613 return false;
614 }
615 break;
dbaabb25
PX
616 case VTD_CONTEXT_TT_PASS_THROUGH:
617 if (!x86_iommu->pt_supported) {
618 return false;
619 }
620 break;
f80c9874
PX
621 default:
622 /* Unknwon type */
623 return false;
624 }
625 return true;
626}
627
f06a696d
PX
628static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)
629{
8f7d7161 630 uint32_t ce_agaw = vtd_ce_get_agaw(ce);
f06a696d
PX
631 return 1ULL << MIN(ce_agaw, VTD_MGAW);
632}
633
634/* Return true if IOVA passes range check, otherwise false. */
635static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce)
636{
637 /*
638 * Check if @iova is above 2^X-1, where X is the minimum of MGAW
639 * in CAP_REG and AW in context-entry.
640 */
641 return !(iova & ~(vtd_iova_limit(ce) - 1));
642}
643
1da12ec4
LT
644static const uint64_t vtd_paging_entry_rsvd_field[] = {
645 [0] = ~0ULL,
646 /* For not large page */
647 [1] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
648 [2] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
649 [3] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
650 [4] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
651 /* For large page */
652 [5] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
653 [6] = 0x1ff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
654 [7] = 0x3ffff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
655 [8] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
656};
657
658static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
659{
660 if (slpte & VTD_SL_PT_PAGE_SIZE_MASK) {
661 /* Maybe large page */
662 return slpte & vtd_paging_entry_rsvd_field[level + 4];
663 } else {
664 return slpte & vtd_paging_entry_rsvd_field[level];
665 }
666}
667
dbaabb25
PX
668/* Find the VTD address space associated with a given bus number */
669static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
670{
671 VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
672 if (!vtd_bus) {
673 /*
674 * Iterate over the registered buses to find the one which
675 * currently hold this bus number, and update the bus_num
676 * lookup table:
677 */
678 GHashTableIter iter;
679
680 g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
681 while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
682 if (pci_bus_num(vtd_bus->bus) == bus_num) {
683 s->vtd_as_by_bus_num[bus_num] = vtd_bus;
684 return vtd_bus;
685 }
686 }
687 }
688 return vtd_bus;
689}
690
6e905564 691/* Given the @iova, get relevant @slptep. @slpte_level will be the last level
1da12ec4
LT
692 * of the translation, can be used for deciding the size of large page.
693 */
6e905564
PX
694static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
695 uint64_t *slptep, uint32_t *slpte_level,
696 bool *reads, bool *writes)
1da12ec4 697{
8f7d7161
PX
698 dma_addr_t addr = vtd_ce_get_slpt_base(ce);
699 uint32_t level = vtd_ce_get_level(ce);
1da12ec4
LT
700 uint32_t offset;
701 uint64_t slpte;
1da12ec4
LT
702 uint64_t access_right_check;
703
f06a696d 704 if (!vtd_iova_range_check(iova, ce)) {
6e905564 705 VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova);
1da12ec4
LT
706 return -VTD_FR_ADDR_BEYOND_MGAW;
707 }
708
709 /* FIXME: what is the Atomics request here? */
710 access_right_check = is_write ? VTD_SL_W : VTD_SL_R;
711
712 while (true) {
6e905564 713 offset = vtd_iova_level_offset(iova, level);
1da12ec4
LT
714 slpte = vtd_get_slpte(addr, offset);
715
716 if (slpte == (uint64_t)-1) {
717 VTD_DPRINTF(GENERAL, "error: fail to access second-level paging "
6e905564
PX
718 "entry at level %"PRIu32 " for iova 0x%"PRIx64,
719 level, iova);
8f7d7161 720 if (level == vtd_ce_get_level(ce)) {
1da12ec4
LT
721 /* Invalid programming of context-entry */
722 return -VTD_FR_CONTEXT_ENTRY_INV;
723 } else {
724 return -VTD_FR_PAGING_ENTRY_INV;
725 }
726 }
727 *reads = (*reads) && (slpte & VTD_SL_R);
728 *writes = (*writes) && (slpte & VTD_SL_W);
729 if (!(slpte & access_right_check)) {
730 VTD_DPRINTF(GENERAL, "error: lack of %s permission for "
6e905564
PX
731 "iova 0x%"PRIx64 " slpte 0x%"PRIx64,
732 (is_write ? "write" : "read"), iova, slpte);
1da12ec4
LT
733 return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
734 }
735 if (vtd_slpte_nonzero_rsvd(slpte, level)) {
736 VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second "
737 "level paging entry level %"PRIu32 " slpte 0x%"PRIx64,
738 level, slpte);
739 return -VTD_FR_PAGING_ENTRY_RSVD;
740 }
741
742 if (vtd_is_last_slpte(slpte, level)) {
743 *slptep = slpte;
744 *slpte_level = level;
745 return 0;
746 }
747 addr = vtd_get_slpte_addr(slpte);
748 level--;
749 }
750}
751
f06a696d
PX
752typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
753
754/**
755 * vtd_page_walk_level - walk over specific level for IOVA range
756 *
757 * @addr: base GPA addr to start the walk
758 * @start: IOVA range start address
759 * @end: IOVA range end address (start <= addr < end)
760 * @hook_fn: hook func to be called when detected page
761 * @private: private data to be passed into hook func
762 * @read: whether parent level has read permission
763 * @write: whether parent level has write permission
764 * @notify_unmap: whether we should notify invalid entries
765 */
766static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
767 uint64_t end, vtd_page_walk_hook hook_fn,
768 void *private, uint32_t level,
769 bool read, bool write, bool notify_unmap)
770{
771 bool read_cur, write_cur, entry_valid;
772 uint32_t offset;
773 uint64_t slpte;
774 uint64_t subpage_size, subpage_mask;
775 IOMMUTLBEntry entry;
776 uint64_t iova = start;
777 uint64_t iova_next;
778 int ret = 0;
779
780 trace_vtd_page_walk_level(addr, level, start, end);
781
782 subpage_size = 1ULL << vtd_slpt_level_shift(level);
783 subpage_mask = vtd_slpt_level_page_mask(level);
784
785 while (iova < end) {
786 iova_next = (iova & subpage_mask) + subpage_size;
787
788 offset = vtd_iova_level_offset(iova, level);
789 slpte = vtd_get_slpte(addr, offset);
790
791 if (slpte == (uint64_t)-1) {
792 trace_vtd_page_walk_skip_read(iova, iova_next);
793 goto next;
794 }
795
796 if (vtd_slpte_nonzero_rsvd(slpte, level)) {
797 trace_vtd_page_walk_skip_reserve(iova, iova_next);
798 goto next;
799 }
800
801 /* Permissions are stacked with parents' */
802 read_cur = read && (slpte & VTD_SL_R);
803 write_cur = write && (slpte & VTD_SL_W);
804
805 /*
806 * As long as we have either read/write permission, this is a
807 * valid entry. The rule works for both page entries and page
808 * table entries.
809 */
810 entry_valid = read_cur | write_cur;
811
812 if (vtd_is_last_slpte(slpte, level)) {
813 entry.target_as = &address_space_memory;
814 entry.iova = iova & subpage_mask;
815 /* NOTE: this is only meaningful if entry_valid == true */
816 entry.translated_addr = vtd_get_slpte_addr(slpte);
817 entry.addr_mask = ~subpage_mask;
818 entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
819 if (!entry_valid && !notify_unmap) {
820 trace_vtd_page_walk_skip_perm(iova, iova_next);
821 goto next;
822 }
823 trace_vtd_page_walk_one(level, entry.iova, entry.translated_addr,
824 entry.addr_mask, entry.perm);
825 if (hook_fn) {
826 ret = hook_fn(&entry, private);
827 if (ret < 0) {
828 return ret;
829 }
830 }
831 } else {
832 if (!entry_valid) {
833 trace_vtd_page_walk_skip_perm(iova, iova_next);
834 goto next;
835 }
836 ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova,
837 MIN(iova_next, end), hook_fn, private,
838 level - 1, read_cur, write_cur,
839 notify_unmap);
840 if (ret < 0) {
841 return ret;
842 }
843 }
844
845next:
846 iova = iova_next;
847 }
848
849 return 0;
850}
851
852/**
853 * vtd_page_walk - walk specific IOVA range, and call the hook
854 *
855 * @ce: context entry to walk upon
856 * @start: IOVA address to start the walk
857 * @end: IOVA range end address (start <= addr < end)
858 * @hook_fn: the hook that to be called for each detected area
859 * @private: private data for the hook function
860 */
861static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
dd4d607e
PX
862 vtd_page_walk_hook hook_fn, void *private,
863 bool notify_unmap)
f06a696d 864{
8f7d7161
PX
865 dma_addr_t addr = vtd_ce_get_slpt_base(ce);
866 uint32_t level = vtd_ce_get_level(ce);
f06a696d
PX
867
868 if (!vtd_iova_range_check(start, ce)) {
869 return -VTD_FR_ADDR_BEYOND_MGAW;
870 }
871
872 if (!vtd_iova_range_check(end, ce)) {
873 /* Fix end so that it reaches the maximum */
874 end = vtd_iova_limit(ce);
875 }
876
877 return vtd_page_walk_level(addr, start, end, hook_fn, private,
dd4d607e 878 level, true, true, notify_unmap);
f06a696d
PX
879}
880
1da12ec4
LT
881/* Map a device to its corresponding domain (context-entry) */
882static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
883 uint8_t devfn, VTDContextEntry *ce)
884{
885 VTDRootEntry re;
886 int ret_fr;
f80c9874 887 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
1da12ec4
LT
888
889 ret_fr = vtd_get_root_entry(s, bus_num, &re);
890 if (ret_fr) {
891 return ret_fr;
892 }
893
894 if (!vtd_root_entry_present(&re)) {
6c441e1d
PX
895 /* Not error - it's okay we don't have root entry. */
896 trace_vtd_re_not_present(bus_num);
1da12ec4 897 return -VTD_FR_ROOT_ENTRY_P;
f80c9874
PX
898 }
899
900 if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) {
6c441e1d 901 trace_vtd_re_invalid(re.rsvd, re.val);
1da12ec4
LT
902 return -VTD_FR_ROOT_ENTRY_RSVD;
903 }
904
905 ret_fr = vtd_get_context_entry_from_root(&re, devfn, ce);
906 if (ret_fr) {
907 return ret_fr;
908 }
909
8f7d7161 910 if (!vtd_ce_present(ce)) {
6c441e1d
PX
911 /* Not error - it's okay we don't have context entry. */
912 trace_vtd_ce_not_present(bus_num, devfn);
1da12ec4 913 return -VTD_FR_CONTEXT_ENTRY_P;
f80c9874
PX
914 }
915
916 if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
917 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
6c441e1d 918 trace_vtd_ce_invalid(ce->hi, ce->lo);
1da12ec4
LT
919 return -VTD_FR_CONTEXT_ENTRY_RSVD;
920 }
f80c9874 921
1da12ec4 922 /* Check if the programming of context-entry is valid */
8f7d7161 923 if (!vtd_is_level_supported(s, vtd_ce_get_level(ce))) {
6c441e1d 924 trace_vtd_ce_invalid(ce->hi, ce->lo);
1da12ec4 925 return -VTD_FR_CONTEXT_ENTRY_INV;
1da12ec4 926 }
f80c9874
PX
927
928 /* Do translation type check */
929 if (!vtd_ce_type_check(x86_iommu, ce)) {
930 trace_vtd_ce_invalid(ce->hi, ce->lo);
931 return -VTD_FR_CONTEXT_ENTRY_INV;
932 }
933
1da12ec4
LT
934 return 0;
935}
936
dbaabb25
PX
937/*
938 * Fetch translation type for specific device. Returns <0 if error
939 * happens, otherwise return the shifted type to check against
940 * VTD_CONTEXT_TT_*.
941 */
942static int vtd_dev_get_trans_type(VTDAddressSpace *as)
943{
944 IntelIOMMUState *s;
945 VTDContextEntry ce;
946 int ret;
947
948 s = as->iommu_state;
949
950 ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus),
951 as->devfn, &ce);
952 if (ret) {
953 return ret;
954 }
955
956 return vtd_ce_get_type(&ce);
957}
958
959static bool vtd_dev_pt_enabled(VTDAddressSpace *as)
960{
961 int ret;
962
963 assert(as);
964
965 ret = vtd_dev_get_trans_type(as);
966 if (ret < 0) {
967 /*
968 * Possibly failed to parse the context entry for some reason
969 * (e.g., during init, or any guest configuration errors on
970 * context entries). We should assume PT not enabled for
971 * safety.
972 */
973 return false;
974 }
975
976 return ret == VTD_CONTEXT_TT_PASS_THROUGH;
977}
978
979/* Return whether the device is using IOMMU translation. */
980static bool vtd_switch_address_space(VTDAddressSpace *as)
981{
982 bool use_iommu;
983
984 assert(as);
985
986 use_iommu = as->iommu_state->dmar_enabled & !vtd_dev_pt_enabled(as);
987
988 trace_vtd_switch_address_space(pci_bus_num(as->bus),
989 VTD_PCI_SLOT(as->devfn),
990 VTD_PCI_FUNC(as->devfn),
991 use_iommu);
992
993 /* Turn off first then on the other */
994 if (use_iommu) {
995 memory_region_set_enabled(&as->sys_alias, false);
996 memory_region_set_enabled(&as->iommu, true);
997 } else {
998 memory_region_set_enabled(&as->iommu, false);
999 memory_region_set_enabled(&as->sys_alias, true);
1000 }
1001
1002 return use_iommu;
1003}
1004
1005static void vtd_switch_address_space_all(IntelIOMMUState *s)
1006{
1007 GHashTableIter iter;
1008 VTDBus *vtd_bus;
1009 int i;
1010
1011 g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
1012 while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
1013 for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) {
1014 if (!vtd_bus->dev_as[i]) {
1015 continue;
1016 }
1017 vtd_switch_address_space(vtd_bus->dev_as[i]);
1018 }
1019 }
1020}
1021
1da12ec4
LT
1022static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
1023{
1024 return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
1025}
1026
1027static const bool vtd_qualified_faults[] = {
1028 [VTD_FR_RESERVED] = false,
1029 [VTD_FR_ROOT_ENTRY_P] = false,
1030 [VTD_FR_CONTEXT_ENTRY_P] = true,
1031 [VTD_FR_CONTEXT_ENTRY_INV] = true,
1032 [VTD_FR_ADDR_BEYOND_MGAW] = true,
1033 [VTD_FR_WRITE] = true,
1034 [VTD_FR_READ] = true,
1035 [VTD_FR_PAGING_ENTRY_INV] = true,
1036 [VTD_FR_ROOT_TABLE_INV] = false,
1037 [VTD_FR_CONTEXT_TABLE_INV] = false,
1038 [VTD_FR_ROOT_ENTRY_RSVD] = false,
1039 [VTD_FR_PAGING_ENTRY_RSVD] = true,
1040 [VTD_FR_CONTEXT_ENTRY_TT] = true,
1041 [VTD_FR_RESERVED_ERR] = false,
1042 [VTD_FR_MAX] = false,
1043};
1044
1045/* To see if a fault condition is "qualified", which is reported to software
1046 * only if the FPD field in the context-entry used to process the faulting
1047 * request is 0.
1048 */
1049static inline bool vtd_is_qualified_fault(VTDFaultReason fault)
1050{
1051 return vtd_qualified_faults[fault];
1052}
1053
1054static inline bool vtd_is_interrupt_addr(hwaddr addr)
1055{
1056 return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
1057}
1058
dbaabb25
PX
1059static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
1060{
1061 VTDBus *vtd_bus;
1062 VTDAddressSpace *vtd_as;
1063 bool success = false;
1064
1065 vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
1066 if (!vtd_bus) {
1067 goto out;
1068 }
1069
1070 vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)];
1071 if (!vtd_as) {
1072 goto out;
1073 }
1074
1075 if (vtd_switch_address_space(vtd_as) == false) {
1076 /* We switched off IOMMU region successfully. */
1077 success = true;
1078 }
1079
1080out:
1081 trace_vtd_pt_enable_fast_path(source_id, success);
1082}
1083
1da12ec4
LT
1084/* Map dev to context-entry then do a paging-structures walk to do a iommu
1085 * translation.
79e2b9ae
PB
1086 *
1087 * Called from RCU critical section.
1088 *
1da12ec4
LT
1089 * @bus_num: The bus number
1090 * @devfn: The devfn, which is the combined of device and function number
1091 * @is_write: The access is a write operation
1092 * @entry: IOMMUTLBEntry that contain the addr to be translated and result
1093 */
7df953bd 1094static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
1da12ec4
LT
1095 uint8_t devfn, hwaddr addr, bool is_write,
1096 IOMMUTLBEntry *entry)
1097{
d92fa2dc 1098 IntelIOMMUState *s = vtd_as->iommu_state;
1da12ec4 1099 VTDContextEntry ce;
7df953bd 1100 uint8_t bus_num = pci_bus_num(bus);
d92fa2dc 1101 VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
d66b969b 1102 uint64_t slpte, page_mask;
1da12ec4
LT
1103 uint32_t level;
1104 uint16_t source_id = vtd_make_source_id(bus_num, devfn);
1105 int ret_fr;
1106 bool is_fpd_set = false;
1107 bool reads = true;
1108 bool writes = true;
b5a280c0 1109 VTDIOTLBEntry *iotlb_entry;
1da12ec4 1110
046ab7e9
PX
1111 /*
1112 * We have standalone memory region for interrupt addresses, we
1113 * should never receive translation requests in this region.
1114 */
1115 assert(!vtd_is_interrupt_addr(addr));
1116
b5a280c0
LT
1117 /* Try to fetch slpte form IOTLB */
1118 iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
1119 if (iotlb_entry) {
6c441e1d
PX
1120 trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
1121 iotlb_entry->domain_id);
b5a280c0
LT
1122 slpte = iotlb_entry->slpte;
1123 reads = iotlb_entry->read_flags;
1124 writes = iotlb_entry->write_flags;
d66b969b 1125 page_mask = iotlb_entry->mask;
b5a280c0
LT
1126 goto out;
1127 }
d92fa2dc
LT
1128 /* Try to fetch context-entry from cache first */
1129 if (cc_entry->context_cache_gen == s->context_cache_gen) {
6c441e1d
PX
1130 trace_vtd_iotlb_cc_hit(bus_num, devfn, cc_entry->context_entry.hi,
1131 cc_entry->context_entry.lo,
1132 cc_entry->context_cache_gen);
d92fa2dc
LT
1133 ce = cc_entry->context_entry;
1134 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
1135 } else {
1136 ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
1137 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
1138 if (ret_fr) {
1139 ret_fr = -ret_fr;
1140 if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
6c441e1d 1141 trace_vtd_fault_disabled();
d92fa2dc
LT
1142 } else {
1143 vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
1144 }
1145 return;
1da12ec4 1146 }
d92fa2dc 1147 /* Update context-cache */
6c441e1d
PX
1148 trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo,
1149 cc_entry->context_cache_gen,
1150 s->context_cache_gen);
d92fa2dc
LT
1151 cc_entry->context_entry = ce;
1152 cc_entry->context_cache_gen = s->context_cache_gen;
1da12ec4
LT
1153 }
1154
dbaabb25
PX
1155 /*
1156 * We don't need to translate for pass-through context entries.
1157 * Also, let's ignore IOTLB caching as well for PT devices.
1158 */
1159 if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
1160 entry->translated_addr = entry->iova;
1161 entry->addr_mask = VTD_PAGE_SIZE - 1;
1162 entry->perm = IOMMU_RW;
1163 trace_vtd_translate_pt(source_id, entry->iova);
1164
1165 /*
1166 * When this happens, it means firstly caching-mode is not
1167 * enabled, and this is the first passthrough translation for
1168 * the device. Let's enable the fast path for passthrough.
1169 *
1170 * When passthrough is disabled again for the device, we can
1171 * capture it via the context entry invalidation, then the
1172 * IOMMU region can be swapped back.
1173 */
1174 vtd_pt_enable_fast_path(s, source_id);
1175
1176 return;
1177 }
1178
6e905564
PX
1179 ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
1180 &reads, &writes);
1da12ec4
LT
1181 if (ret_fr) {
1182 ret_fr = -ret_fr;
1183 if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
6c441e1d 1184 trace_vtd_fault_disabled();
1da12ec4
LT
1185 } else {
1186 vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
1187 }
1188 return;
1189 }
1190
d66b969b 1191 page_mask = vtd_slpt_level_page_mask(level);
b5a280c0 1192 vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
d66b969b 1193 reads, writes, level);
b5a280c0 1194out:
d66b969b
JW
1195 entry->iova = addr & page_mask;
1196 entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask;
1197 entry->addr_mask = ~page_mask;
5a38cb59 1198 entry->perm = IOMMU_ACCESS_FLAG(reads, writes);
1da12ec4
LT
1199}
1200
1201static void vtd_root_table_setup(IntelIOMMUState *s)
1202{
1203 s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG);
1204 s->root_extended = s->root & VTD_RTADDR_RTT;
1205 s->root &= VTD_RTADDR_ADDR_MASK;
1206
1207 VTD_DPRINTF(CSR, "root_table addr 0x%"PRIx64 " %s", s->root,
1208 (s->root_extended ? "(extended)" : ""));
1209}
1210
02a2cbc8
PX
1211static void vtd_iec_notify_all(IntelIOMMUState *s, bool global,
1212 uint32_t index, uint32_t mask)
1213{
1214 x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask);
1215}
1216
a5861439
PX
1217static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
1218{
1219 uint64_t value = 0;
1220 value = vtd_get_quad_raw(s, DMAR_IRTA_REG);
1221 s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1);
1222 s->intr_root = value & VTD_IRTA_ADDR_MASK;
28589311 1223 s->intr_eime = value & VTD_IRTA_EIME;
a5861439 1224
02a2cbc8
PX
1225 /* Notify global invalidation */
1226 vtd_iec_notify_all(s, true, 0, 0);
a5861439
PX
1227
1228 VTD_DPRINTF(CSR, "int remap table addr 0x%"PRIx64 " size %"PRIu32,
1229 s->intr_root, s->intr_size);
1230}
1231
dd4d607e
PX
1232static void vtd_iommu_replay_all(IntelIOMMUState *s)
1233{
1234 IntelIOMMUNotifierNode *node;
1235
1236 QLIST_FOREACH(node, &s->notifiers_list, next) {
1237 memory_region_iommu_replay_all(&node->vtd_as->iommu);
1238 }
1239}
1240
d92fa2dc
LT
1241static void vtd_context_global_invalidate(IntelIOMMUState *s)
1242{
bc535e59 1243 trace_vtd_inv_desc_cc_global();
d92fa2dc
LT
1244 s->context_cache_gen++;
1245 if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
1246 vtd_reset_context_cache(s);
1247 }
dbaabb25 1248 vtd_switch_address_space_all(s);
dd4d607e
PX
1249 /*
1250 * From VT-d spec 6.5.2.1, a global context entry invalidation
1251 * should be followed by a IOTLB global invalidation, so we should
1252 * be safe even without this. Hoewever, let's replay the region as
1253 * well to be safer, and go back here when we need finer tunes for
1254 * VT-d emulation codes.
1255 */
1256 vtd_iommu_replay_all(s);
d92fa2dc
LT
1257}
1258
1259/* Do a context-cache device-selective invalidation.
1260 * @func_mask: FM field after shifting
1261 */
1262static void vtd_context_device_invalidate(IntelIOMMUState *s,
1263 uint16_t source_id,
1264 uint16_t func_mask)
1265{
1266 uint16_t mask;
7df953bd 1267 VTDBus *vtd_bus;
d92fa2dc 1268 VTDAddressSpace *vtd_as;
bc535e59 1269 uint8_t bus_n, devfn;
d92fa2dc
LT
1270 uint16_t devfn_it;
1271
bc535e59
PX
1272 trace_vtd_inv_desc_cc_devices(source_id, func_mask);
1273
d92fa2dc
LT
1274 switch (func_mask & 3) {
1275 case 0:
1276 mask = 0; /* No bits in the SID field masked */
1277 break;
1278 case 1:
1279 mask = 4; /* Mask bit 2 in the SID field */
1280 break;
1281 case 2:
1282 mask = 6; /* Mask bit 2:1 in the SID field */
1283 break;
1284 case 3:
1285 mask = 7; /* Mask bit 2:0 in the SID field */
1286 break;
1287 }
6cb99acc 1288 mask = ~mask;
bc535e59
PX
1289
1290 bus_n = VTD_SID_TO_BUS(source_id);
1291 vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
7df953bd 1292 if (vtd_bus) {
d92fa2dc 1293 devfn = VTD_SID_TO_DEVFN(source_id);
04af0e18 1294 for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
7df953bd 1295 vtd_as = vtd_bus->dev_as[devfn_it];
d92fa2dc 1296 if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
bc535e59
PX
1297 trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
1298 VTD_PCI_FUNC(devfn_it));
d92fa2dc 1299 vtd_as->context_cache_entry.context_cache_gen = 0;
dbaabb25
PX
1300 /*
1301 * Do switch address space when needed, in case if the
1302 * device passthrough bit is switched.
1303 */
1304 vtd_switch_address_space(vtd_as);
dd4d607e
PX
1305 /*
1306 * So a device is moving out of (or moving into) a
1307 * domain, a replay() suites here to notify all the
1308 * IOMMU_NOTIFIER_MAP registers about this change.
1309 * This won't bring bad even if we have no such
1310 * notifier registered - the IOMMU notification
1311 * framework will skip MAP notifications if that
1312 * happened.
1313 */
1314 memory_region_iommu_replay_all(&vtd_as->iommu);
d92fa2dc
LT
1315 }
1316 }
1317 }
1318}
1319
1da12ec4
LT
1320/* Context-cache invalidation
1321 * Returns the Context Actual Invalidation Granularity.
1322 * @val: the content of the CCMD_REG
1323 */
1324static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
1325{
1326 uint64_t caig;
1327 uint64_t type = val & VTD_CCMD_CIRG_MASK;
1328
1329 switch (type) {
d92fa2dc
LT
1330 case VTD_CCMD_DOMAIN_INVL:
1331 VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
1332 (uint16_t)VTD_CCMD_DID(val));
1333 /* Fall through */
1da12ec4 1334 case VTD_CCMD_GLOBAL_INVL:
d92fa2dc 1335 VTD_DPRINTF(INV, "global invalidation");
1da12ec4 1336 caig = VTD_CCMD_GLOBAL_INVL_A;
d92fa2dc 1337 vtd_context_global_invalidate(s);
1da12ec4
LT
1338 break;
1339
1340 case VTD_CCMD_DEVICE_INVL:
1da12ec4 1341 caig = VTD_CCMD_DEVICE_INVL_A;
d92fa2dc 1342 vtd_context_device_invalidate(s, VTD_CCMD_SID(val), VTD_CCMD_FM(val));
1da12ec4
LT
1343 break;
1344
1345 default:
d92fa2dc 1346 VTD_DPRINTF(GENERAL, "error: invalid granularity");
1da12ec4
LT
1347 caig = 0;
1348 }
1349 return caig;
1350}
1351
b5a280c0
LT
1352static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
1353{
6c441e1d 1354 trace_vtd_iotlb_reset("global invalidation recved");
b5a280c0 1355 vtd_reset_iotlb(s);
dd4d607e 1356 vtd_iommu_replay_all(s);
b5a280c0
LT
1357}
1358
1359static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
1360{
dd4d607e
PX
1361 IntelIOMMUNotifierNode *node;
1362 VTDContextEntry ce;
1363 VTDAddressSpace *vtd_as;
1364
b5a280c0
LT
1365 g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
1366 &domain_id);
dd4d607e
PX
1367
1368 QLIST_FOREACH(node, &s->notifiers_list, next) {
1369 vtd_as = node->vtd_as;
1370 if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
1371 vtd_as->devfn, &ce) &&
1372 domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
1373 memory_region_iommu_replay_all(&vtd_as->iommu);
1374 }
1375 }
1376}
1377
1378static int vtd_page_invalidate_notify_hook(IOMMUTLBEntry *entry,
1379 void *private)
1380{
1381 memory_region_notify_iommu((MemoryRegion *)private, *entry);
1382 return 0;
1383}
1384
1385static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
1386 uint16_t domain_id, hwaddr addr,
1387 uint8_t am)
1388{
1389 IntelIOMMUNotifierNode *node;
1390 VTDContextEntry ce;
1391 int ret;
1392
1393 QLIST_FOREACH(node, &(s->notifiers_list), next) {
1394 VTDAddressSpace *vtd_as = node->vtd_as;
1395 ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
1396 vtd_as->devfn, &ce);
1397 if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
1398 vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE,
1399 vtd_page_invalidate_notify_hook,
1400 (void *)&vtd_as->iommu, true);
1401 }
1402 }
b5a280c0
LT
1403}
1404
1405static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
1406 hwaddr addr, uint8_t am)
1407{
1408 VTDIOTLBPageInvInfo info;
1409
1410 assert(am <= VTD_MAMV);
1411 info.domain_id = domain_id;
d66b969b 1412 info.addr = addr;
b5a280c0
LT
1413 info.mask = ~((1 << am) - 1);
1414 g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
dd4d607e 1415 vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
b5a280c0
LT
1416}
1417
1da12ec4
LT
1418/* Flush IOTLB
1419 * Returns the IOTLB Actual Invalidation Granularity.
1420 * @val: the content of the IOTLB_REG
1421 */
1422static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
1423{
1424 uint64_t iaig;
1425 uint64_t type = val & VTD_TLB_FLUSH_GRANU_MASK;
b5a280c0
LT
1426 uint16_t domain_id;
1427 hwaddr addr;
1428 uint8_t am;
1da12ec4
LT
1429
1430 switch (type) {
1431 case VTD_TLB_GLOBAL_FLUSH:
b5a280c0 1432 VTD_DPRINTF(INV, "global invalidation");
1da12ec4 1433 iaig = VTD_TLB_GLOBAL_FLUSH_A;
b5a280c0 1434 vtd_iotlb_global_invalidate(s);
1da12ec4
LT
1435 break;
1436
1437 case VTD_TLB_DSI_FLUSH:
b5a280c0
LT
1438 domain_id = VTD_TLB_DID(val);
1439 VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
1440 domain_id);
1da12ec4 1441 iaig = VTD_TLB_DSI_FLUSH_A;
b5a280c0 1442 vtd_iotlb_domain_invalidate(s, domain_id);
1da12ec4
LT
1443 break;
1444
1445 case VTD_TLB_PSI_FLUSH:
b5a280c0
LT
1446 domain_id = VTD_TLB_DID(val);
1447 addr = vtd_get_quad_raw(s, DMAR_IVA_REG);
1448 am = VTD_IVA_AM(addr);
1449 addr = VTD_IVA_ADDR(addr);
1450 VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16
1451 " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am);
1452 if (am > VTD_MAMV) {
1453 VTD_DPRINTF(GENERAL, "error: supported max address mask value is "
1454 "%"PRIu8, (uint8_t)VTD_MAMV);
1455 iaig = 0;
1456 break;
1457 }
1da12ec4 1458 iaig = VTD_TLB_PSI_FLUSH_A;
b5a280c0 1459 vtd_iotlb_page_invalidate(s, domain_id, addr, am);
1da12ec4
LT
1460 break;
1461
1462 default:
b5a280c0 1463 VTD_DPRINTF(GENERAL, "error: invalid granularity");
1da12ec4
LT
1464 iaig = 0;
1465 }
1466 return iaig;
1467}
1468
ed7b8fbc
LT
1469static inline bool vtd_queued_inv_enable_check(IntelIOMMUState *s)
1470{
1471 return s->iq_tail == 0;
1472}
1473
1474static inline bool vtd_queued_inv_disable_check(IntelIOMMUState *s)
1475{
1476 return s->qi_enabled && (s->iq_tail == s->iq_head) &&
1477 (s->iq_last_desc_type == VTD_INV_DESC_WAIT);
1478}
1479
1480static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
1481{
1482 uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG);
1483
1484 VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off"));
1485 if (en) {
1486 if (vtd_queued_inv_enable_check(s)) {
1487 s->iq = iqa_val & VTD_IQA_IQA_MASK;
1488 /* 2^(x+8) entries */
1489 s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
1490 s->qi_enabled = true;
1491 VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val);
1492 VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d",
1493 s->iq, s->iq_size);
1494 /* Ok - report back to driver */
1495 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES);
1496 } else {
1497 VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: "
1498 "tail %"PRIu16, s->iq_tail);
1499 }
1500 } else {
1501 if (vtd_queued_inv_disable_check(s)) {
1502 /* disable Queued Invalidation */
1503 vtd_set_quad_raw(s, DMAR_IQH_REG, 0);
1504 s->iq_head = 0;
1505 s->qi_enabled = false;
1506 /* Ok - report back to driver */
1507 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0);
1508 } else {
1509 VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: "
1510 "head %"PRIu16 ", tail %"PRIu16
1511 ", last_descriptor %"PRIu8,
1512 s->iq_head, s->iq_tail, s->iq_last_desc_type);
1513 }
1514 }
1515}
1516
1da12ec4
LT
1517/* Set Root Table Pointer */
1518static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
1519{
1520 VTD_DPRINTF(CSR, "set Root Table Pointer");
1521
1522 vtd_root_table_setup(s);
1523 /* Ok - report back to driver */
1524 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS);
1525}
1526
a5861439
PX
1527/* Set Interrupt Remap Table Pointer */
1528static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
1529{
1530 VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer");
1531
1532 vtd_interrupt_remap_table_setup(s);
1533 /* Ok - report back to driver */
1534 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
1535}
1536
1da12ec4
LT
1537/* Handle Translation Enable/Disable */
1538static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
1539{
558e0024
PX
1540 if (s->dmar_enabled == en) {
1541 return;
1542 }
1543
1da12ec4
LT
1544 VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off"));
1545
1546 if (en) {
1547 s->dmar_enabled = true;
1548 /* Ok - report back to driver */
1549 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_TES);
1550 } else {
1551 s->dmar_enabled = false;
1552
1553 /* Clear the index of Fault Recording Register */
1554 s->next_frcd_reg = 0;
1555 /* Ok - report back to driver */
1556 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0);
1557 }
558e0024
PX
1558
1559 vtd_switch_address_space_all(s);
1da12ec4
LT
1560}
1561
80de52ba
PX
1562/* Handle Interrupt Remap Enable/Disable */
1563static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
1564{
1565 VTD_DPRINTF(CSR, "Interrupt Remap Enable %s", (en ? "on" : "off"));
1566
1567 if (en) {
1568 s->intr_enabled = true;
1569 /* Ok - report back to driver */
1570 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRES);
1571 } else {
1572 s->intr_enabled = false;
1573 /* Ok - report back to driver */
1574 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_IRES, 0);
1575 }
1576}
1577
1da12ec4
LT
1578/* Handle write to Global Command Register */
1579static void vtd_handle_gcmd_write(IntelIOMMUState *s)
1580{
1581 uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG);
1582 uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG);
1583 uint32_t changed = status ^ val;
1584
1585 VTD_DPRINTF(CSR, "value 0x%"PRIx32 " status 0x%"PRIx32, val, status);
1586 if (changed & VTD_GCMD_TE) {
1587 /* Translation enable/disable */
1588 vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
1589 }
1590 if (val & VTD_GCMD_SRTP) {
1591 /* Set/update the root-table pointer */
1592 vtd_handle_gcmd_srtp(s);
1593 }
ed7b8fbc
LT
1594 if (changed & VTD_GCMD_QIE) {
1595 /* Queued Invalidation Enable */
1596 vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE);
1597 }
a5861439
PX
1598 if (val & VTD_GCMD_SIRTP) {
1599 /* Set/update the interrupt remapping root-table pointer */
1600 vtd_handle_gcmd_sirtp(s);
1601 }
80de52ba
PX
1602 if (changed & VTD_GCMD_IRE) {
1603 /* Interrupt remap enable/disable */
1604 vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE);
1605 }
1da12ec4
LT
1606}
1607
1608/* Handle write to Context Command Register */
1609static void vtd_handle_ccmd_write(IntelIOMMUState *s)
1610{
1611 uint64_t ret;
1612 uint64_t val = vtd_get_quad_raw(s, DMAR_CCMD_REG);
1613
1614 /* Context-cache invalidation request */
1615 if (val & VTD_CCMD_ICC) {
ed7b8fbc
LT
1616 if (s->qi_enabled) {
1617 VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, "
1618 "should not use register-based invalidation");
1619 return;
1620 }
1da12ec4
LT
1621 ret = vtd_context_cache_invalidate(s, val);
1622 /* Invalidation completed. Change something to show */
1623 vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_ICC, 0ULL);
1624 ret = vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_CAIG_MASK,
1625 ret);
1626 VTD_DPRINTF(INV, "CCMD_REG write-back val: 0x%"PRIx64, ret);
1627 }
1628}
1629
1630/* Handle write to IOTLB Invalidation Register */
1631static void vtd_handle_iotlb_write(IntelIOMMUState *s)
1632{
1633 uint64_t ret;
1634 uint64_t val = vtd_get_quad_raw(s, DMAR_IOTLB_REG);
1635
1636 /* IOTLB invalidation request */
1637 if (val & VTD_TLB_IVT) {
ed7b8fbc
LT
1638 if (s->qi_enabled) {
1639 VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, "
1640 "should not use register-based invalidation");
1641 return;
1642 }
1da12ec4
LT
1643 ret = vtd_iotlb_flush(s, val);
1644 /* Invalidation completed. Change something to show */
1645 vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_IVT, 0ULL);
1646 ret = vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG,
1647 VTD_TLB_FLUSH_GRANU_MASK_A, ret);
1648 VTD_DPRINTF(INV, "IOTLB_REG write-back val: 0x%"PRIx64, ret);
1649 }
1650}
1651
ed7b8fbc
LT
1652/* Fetch an Invalidation Descriptor from the Invalidation Queue */
1653static bool vtd_get_inv_desc(dma_addr_t base_addr, uint32_t offset,
1654 VTDInvDesc *inv_desc)
1655{
1656 dma_addr_t addr = base_addr + offset * sizeof(*inv_desc);
1657 if (dma_memory_read(&address_space_memory, addr, inv_desc,
1658 sizeof(*inv_desc))) {
1659 VTD_DPRINTF(GENERAL, "error: fail to fetch Invalidation Descriptor "
1660 "base_addr 0x%"PRIx64 " offset %"PRIu32, base_addr, offset);
1661 inv_desc->lo = 0;
1662 inv_desc->hi = 0;
1663
1664 return false;
1665 }
1666 inv_desc->lo = le64_to_cpu(inv_desc->lo);
1667 inv_desc->hi = le64_to_cpu(inv_desc->hi);
1668 return true;
1669}
1670
1671static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
1672{
1673 if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) ||
1674 (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) {
bc535e59 1675 trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
ed7b8fbc
LT
1676 return false;
1677 }
1678 if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) {
1679 /* Status Write */
1680 uint32_t status_data = (uint32_t)(inv_desc->lo >>
1681 VTD_INV_DESC_WAIT_DATA_SHIFT);
1682
1683 assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF));
1684
1685 /* FIXME: need to be masked with HAW? */
1686 dma_addr_t status_addr = inv_desc->hi;
bc535e59 1687 trace_vtd_inv_desc_wait_sw(status_addr, status_data);
ed7b8fbc
LT
1688 status_data = cpu_to_le32(status_data);
1689 if (dma_memory_write(&address_space_memory, status_addr, &status_data,
1690 sizeof(status_data))) {
bc535e59 1691 trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo);
ed7b8fbc
LT
1692 return false;
1693 }
1694 } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
1695 /* Interrupt flag */
ed7b8fbc
LT
1696 vtd_generate_completion_event(s);
1697 } else {
bc535e59 1698 trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
ed7b8fbc
LT
1699 return false;
1700 }
1701 return true;
1702}
1703
d92fa2dc
LT
1704static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
1705 VTDInvDesc *inv_desc)
1706{
bc535e59
PX
1707 uint16_t sid, fmask;
1708
d92fa2dc 1709 if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) {
bc535e59 1710 trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
d92fa2dc
LT
1711 return false;
1712 }
1713 switch (inv_desc->lo & VTD_INV_DESC_CC_G) {
1714 case VTD_INV_DESC_CC_DOMAIN:
bc535e59
PX
1715 trace_vtd_inv_desc_cc_domain(
1716 (uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo));
d92fa2dc
LT
1717 /* Fall through */
1718 case VTD_INV_DESC_CC_GLOBAL:
d92fa2dc
LT
1719 vtd_context_global_invalidate(s);
1720 break;
1721
1722 case VTD_INV_DESC_CC_DEVICE:
bc535e59
PX
1723 sid = VTD_INV_DESC_CC_SID(inv_desc->lo);
1724 fmask = VTD_INV_DESC_CC_FM(inv_desc->lo);
1725 vtd_context_device_invalidate(s, sid, fmask);
d92fa2dc
LT
1726 break;
1727
1728 default:
bc535e59 1729 trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
d92fa2dc
LT
1730 return false;
1731 }
1732 return true;
1733}
1734
b5a280c0
LT
1735static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
1736{
1737 uint16_t domain_id;
1738 uint8_t am;
1739 hwaddr addr;
1740
1741 if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) ||
1742 (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) {
bc535e59 1743 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
b5a280c0
LT
1744 return false;
1745 }
1746
1747 switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) {
1748 case VTD_INV_DESC_IOTLB_GLOBAL:
bc535e59 1749 trace_vtd_inv_desc_iotlb_global();
b5a280c0
LT
1750 vtd_iotlb_global_invalidate(s);
1751 break;
1752
1753 case VTD_INV_DESC_IOTLB_DOMAIN:
1754 domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
bc535e59 1755 trace_vtd_inv_desc_iotlb_domain(domain_id);
b5a280c0
LT
1756 vtd_iotlb_domain_invalidate(s, domain_id);
1757 break;
1758
1759 case VTD_INV_DESC_IOTLB_PAGE:
1760 domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
1761 addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi);
1762 am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi);
bc535e59 1763 trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
b5a280c0 1764 if (am > VTD_MAMV) {
bc535e59 1765 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
b5a280c0
LT
1766 return false;
1767 }
1768 vtd_iotlb_page_invalidate(s, domain_id, addr, am);
1769 break;
1770
1771 default:
bc535e59 1772 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
b5a280c0
LT
1773 return false;
1774 }
1775 return true;
1776}
1777
02a2cbc8
PX
1778static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
1779 VTDInvDesc *inv_desc)
1780{
1781 VTD_DPRINTF(INV, "inv ir glob %d index %d mask %d",
1782 inv_desc->iec.granularity,
1783 inv_desc->iec.index,
1784 inv_desc->iec.index_mask);
1785
1786 vtd_iec_notify_all(s, !inv_desc->iec.granularity,
1787 inv_desc->iec.index,
1788 inv_desc->iec.index_mask);
554f5e16
JW
1789 return true;
1790}
1791
1792static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
1793 VTDInvDesc *inv_desc)
1794{
1795 VTDAddressSpace *vtd_dev_as;
1796 IOMMUTLBEntry entry;
1797 struct VTDBus *vtd_bus;
1798 hwaddr addr;
1799 uint64_t sz;
1800 uint16_t sid;
1801 uint8_t devfn;
1802 bool size;
1803 uint8_t bus_num;
1804
1805 addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
1806 sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
1807 devfn = sid & 0xff;
1808 bus_num = sid >> 8;
1809 size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
1810
1811 if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
1812 (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
1813 VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
1814 "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
1815 inv_desc->hi, inv_desc->lo);
1816 return false;
1817 }
1818
1819 vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
1820 if (!vtd_bus) {
1821 goto done;
1822 }
1823
1824 vtd_dev_as = vtd_bus->dev_as[devfn];
1825 if (!vtd_dev_as) {
1826 goto done;
1827 }
1828
04eb6247
JW
1829 /* According to ATS spec table 2.4:
1830 * S = 0, bits 15:12 = xxxx range size: 4K
1831 * S = 1, bits 15:12 = xxx0 range size: 8K
1832 * S = 1, bits 15:12 = xx01 range size: 16K
1833 * S = 1, bits 15:12 = x011 range size: 32K
1834 * S = 1, bits 15:12 = 0111 range size: 64K
1835 * ...
1836 */
554f5e16 1837 if (size) {
04eb6247 1838 sz = (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT);
554f5e16
JW
1839 addr &= ~(sz - 1);
1840 } else {
1841 sz = VTD_PAGE_SIZE;
1842 }
02a2cbc8 1843
554f5e16
JW
1844 entry.target_as = &vtd_dev_as->as;
1845 entry.addr_mask = sz - 1;
1846 entry.iova = addr;
1847 entry.perm = IOMMU_NONE;
1848 entry.translated_addr = 0;
10315b9b 1849 memory_region_notify_iommu(&vtd_dev_as->iommu, entry);
554f5e16
JW
1850
1851done:
02a2cbc8
PX
1852 return true;
1853}
1854
ed7b8fbc
LT
1855static bool vtd_process_inv_desc(IntelIOMMUState *s)
1856{
1857 VTDInvDesc inv_desc;
1858 uint8_t desc_type;
1859
1860 VTD_DPRINTF(INV, "iq head %"PRIu16, s->iq_head);
1861 if (!vtd_get_inv_desc(s->iq, s->iq_head, &inv_desc)) {
1862 s->iq_last_desc_type = VTD_INV_DESC_NONE;
1863 return false;
1864 }
1865 desc_type = inv_desc.lo & VTD_INV_DESC_TYPE;
1866 /* FIXME: should update at first or at last? */
1867 s->iq_last_desc_type = desc_type;
1868
1869 switch (desc_type) {
1870 case VTD_INV_DESC_CC:
bc535e59 1871 trace_vtd_inv_desc("context-cache", inv_desc.hi, inv_desc.lo);
d92fa2dc
LT
1872 if (!vtd_process_context_cache_desc(s, &inv_desc)) {
1873 return false;
1874 }
ed7b8fbc
LT
1875 break;
1876
1877 case VTD_INV_DESC_IOTLB:
bc535e59 1878 trace_vtd_inv_desc("iotlb", inv_desc.hi, inv_desc.lo);
b5a280c0
LT
1879 if (!vtd_process_iotlb_desc(s, &inv_desc)) {
1880 return false;
1881 }
ed7b8fbc
LT
1882 break;
1883
1884 case VTD_INV_DESC_WAIT:
bc535e59 1885 trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo);
ed7b8fbc
LT
1886 if (!vtd_process_wait_desc(s, &inv_desc)) {
1887 return false;
1888 }
1889 break;
1890
b7910472 1891 case VTD_INV_DESC_IEC:
bc535e59 1892 trace_vtd_inv_desc("iec", inv_desc.hi, inv_desc.lo);
02a2cbc8
PX
1893 if (!vtd_process_inv_iec_desc(s, &inv_desc)) {
1894 return false;
1895 }
b7910472
PX
1896 break;
1897
554f5e16
JW
1898 case VTD_INV_DESC_DEVICE:
1899 VTD_DPRINTF(INV, "Device IOTLB Invalidation Descriptor hi 0x%"PRIx64
1900 " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
1901 if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
1902 return false;
1903 }
1904 break;
1905
ed7b8fbc 1906 default:
bc535e59 1907 trace_vtd_inv_desc_invalid(inv_desc.hi, inv_desc.lo);
ed7b8fbc
LT
1908 return false;
1909 }
1910 s->iq_head++;
1911 if (s->iq_head == s->iq_size) {
1912 s->iq_head = 0;
1913 }
1914 return true;
1915}
1916
1917/* Try to fetch and process more Invalidation Descriptors */
1918static void vtd_fetch_inv_desc(IntelIOMMUState *s)
1919{
1920 VTD_DPRINTF(INV, "fetch Invalidation Descriptors");
1921 if (s->iq_tail >= s->iq_size) {
1922 /* Detects an invalid Tail pointer */
1923 VTD_DPRINTF(GENERAL, "error: iq_tail is %"PRIu16
1924 " while iq_size is %"PRIu16, s->iq_tail, s->iq_size);
1925 vtd_handle_inv_queue_error(s);
1926 return;
1927 }
1928 while (s->iq_head != s->iq_tail) {
1929 if (!vtd_process_inv_desc(s)) {
1930 /* Invalidation Queue Errors */
1931 vtd_handle_inv_queue_error(s);
1932 break;
1933 }
1934 /* Must update the IQH_REG in time */
1935 vtd_set_quad_raw(s, DMAR_IQH_REG,
1936 (((uint64_t)(s->iq_head)) << VTD_IQH_QH_SHIFT) &
1937 VTD_IQH_QH_MASK);
1938 }
1939}
1940
1941/* Handle write to Invalidation Queue Tail Register */
1942static void vtd_handle_iqt_write(IntelIOMMUState *s)
1943{
1944 uint64_t val = vtd_get_quad_raw(s, DMAR_IQT_REG);
1945
1946 s->iq_tail = VTD_IQT_QT(val);
1947 VTD_DPRINTF(INV, "set iq tail %"PRIu16, s->iq_tail);
1948 if (s->qi_enabled && !(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) {
1949 /* Process Invalidation Queue here */
1950 vtd_fetch_inv_desc(s);
1951 }
1952}
1953
1da12ec4
LT
1954static void vtd_handle_fsts_write(IntelIOMMUState *s)
1955{
1956 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
1957 uint32_t fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG);
1958 uint32_t status_fields = VTD_FSTS_PFO | VTD_FSTS_PPF | VTD_FSTS_IQE;
1959
1960 if ((fectl_reg & VTD_FECTL_IP) && !(fsts_reg & status_fields)) {
1961 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
1962 VTD_DPRINTF(FLOG, "all pending interrupt conditions serviced, clear "
1963 "IP field of FECTL_REG");
1964 }
ed7b8fbc
LT
1965 /* FIXME: when IQE is Clear, should we try to fetch some Invalidation
1966 * Descriptors if there are any when Queued Invalidation is enabled?
1967 */
1da12ec4
LT
1968}
1969
1970static void vtd_handle_fectl_write(IntelIOMMUState *s)
1971{
1972 uint32_t fectl_reg;
1973 /* FIXME: when software clears the IM field, check the IP field. But do we
1974 * need to compare the old value and the new value to conclude that
1975 * software clears the IM field? Or just check if the IM field is zero?
1976 */
1977 fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG);
1978 if ((fectl_reg & VTD_FECTL_IP) && !(fectl_reg & VTD_FECTL_IM)) {
1979 vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG);
1980 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
1981 VTD_DPRINTF(FLOG, "IM field is cleared, generate "
1982 "fault event interrupt");
1983 }
1984}
1985
ed7b8fbc
LT
1986static void vtd_handle_ics_write(IntelIOMMUState *s)
1987{
1988 uint32_t ics_reg = vtd_get_long_raw(s, DMAR_ICS_REG);
1989 uint32_t iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG);
1990
1991 if ((iectl_reg & VTD_IECTL_IP) && !(ics_reg & VTD_ICS_IWC)) {
1992 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
1993 VTD_DPRINTF(INV, "pending completion interrupt condition serviced, "
1994 "clear IP field of IECTL_REG");
1995 }
1996}
1997
1998static void vtd_handle_iectl_write(IntelIOMMUState *s)
1999{
2000 uint32_t iectl_reg;
2001 /* FIXME: when software clears the IM field, check the IP field. But do we
2002 * need to compare the old value and the new value to conclude that
2003 * software clears the IM field? Or just check if the IM field is zero?
2004 */
2005 iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG);
2006 if ((iectl_reg & VTD_IECTL_IP) && !(iectl_reg & VTD_IECTL_IM)) {
2007 vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
2008 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
2009 VTD_DPRINTF(INV, "IM field is cleared, generate "
2010 "invalidation event interrupt");
2011 }
2012}
2013
1da12ec4
LT
2014static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
2015{
2016 IntelIOMMUState *s = opaque;
2017 uint64_t val;
2018
2019 if (addr + size > DMAR_REG_SIZE) {
2020 VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64
2021 ", got 0x%"PRIx64 " %d",
2022 (uint64_t)DMAR_REG_SIZE, addr, size);
2023 return (uint64_t)-1;
2024 }
2025
2026 switch (addr) {
2027 /* Root Table Address Register, 64-bit */
2028 case DMAR_RTADDR_REG:
2029 if (size == 4) {
2030 val = s->root & ((1ULL << 32) - 1);
2031 } else {
2032 val = s->root;
2033 }
2034 break;
2035
2036 case DMAR_RTADDR_REG_HI:
2037 assert(size == 4);
2038 val = s->root >> 32;
2039 break;
2040
ed7b8fbc
LT
2041 /* Invalidation Queue Address Register, 64-bit */
2042 case DMAR_IQA_REG:
2043 val = s->iq | (vtd_get_quad(s, DMAR_IQA_REG) & VTD_IQA_QS);
2044 if (size == 4) {
2045 val = val & ((1ULL << 32) - 1);
2046 }
2047 break;
2048
2049 case DMAR_IQA_REG_HI:
2050 assert(size == 4);
2051 val = s->iq >> 32;
2052 break;
2053
1da12ec4
LT
2054 default:
2055 if (size == 4) {
2056 val = vtd_get_long(s, addr);
2057 } else {
2058 val = vtd_get_quad(s, addr);
2059 }
2060 }
2061 VTD_DPRINTF(CSR, "addr 0x%"PRIx64 " size %d val 0x%"PRIx64,
2062 addr, size, val);
2063 return val;
2064}
2065
2066static void vtd_mem_write(void *opaque, hwaddr addr,
2067 uint64_t val, unsigned size)
2068{
2069 IntelIOMMUState *s = opaque;
2070
2071 if (addr + size > DMAR_REG_SIZE) {
2072 VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64
2073 ", got 0x%"PRIx64 " %d",
2074 (uint64_t)DMAR_REG_SIZE, addr, size);
2075 return;
2076 }
2077
2078 switch (addr) {
2079 /* Global Command Register, 32-bit */
2080 case DMAR_GCMD_REG:
2081 VTD_DPRINTF(CSR, "DMAR_GCMD_REG write addr 0x%"PRIx64
2082 ", size %d, val 0x%"PRIx64, addr, size, val);
2083 vtd_set_long(s, addr, val);
2084 vtd_handle_gcmd_write(s);
2085 break;
2086
2087 /* Context Command Register, 64-bit */
2088 case DMAR_CCMD_REG:
2089 VTD_DPRINTF(CSR, "DMAR_CCMD_REG write addr 0x%"PRIx64
2090 ", size %d, val 0x%"PRIx64, addr, size, val);
2091 if (size == 4) {
2092 vtd_set_long(s, addr, val);
2093 } else {
2094 vtd_set_quad(s, addr, val);
2095 vtd_handle_ccmd_write(s);
2096 }
2097 break;
2098
2099 case DMAR_CCMD_REG_HI:
2100 VTD_DPRINTF(CSR, "DMAR_CCMD_REG_HI write addr 0x%"PRIx64
2101 ", size %d, val 0x%"PRIx64, addr, size, val);
2102 assert(size == 4);
2103 vtd_set_long(s, addr, val);
2104 vtd_handle_ccmd_write(s);
2105 break;
2106
2107 /* IOTLB Invalidation Register, 64-bit */
2108 case DMAR_IOTLB_REG:
2109 VTD_DPRINTF(INV, "DMAR_IOTLB_REG write addr 0x%"PRIx64
2110 ", size %d, val 0x%"PRIx64, addr, size, val);
2111 if (size == 4) {
2112 vtd_set_long(s, addr, val);
2113 } else {
2114 vtd_set_quad(s, addr, val);
2115 vtd_handle_iotlb_write(s);
2116 }
2117 break;
2118
2119 case DMAR_IOTLB_REG_HI:
2120 VTD_DPRINTF(INV, "DMAR_IOTLB_REG_HI write addr 0x%"PRIx64
2121 ", size %d, val 0x%"PRIx64, addr, size, val);
2122 assert(size == 4);
2123 vtd_set_long(s, addr, val);
2124 vtd_handle_iotlb_write(s);
2125 break;
2126
b5a280c0
LT
2127 /* Invalidate Address Register, 64-bit */
2128 case DMAR_IVA_REG:
2129 VTD_DPRINTF(INV, "DMAR_IVA_REG write addr 0x%"PRIx64
2130 ", size %d, val 0x%"PRIx64, addr, size, val);
2131 if (size == 4) {
2132 vtd_set_long(s, addr, val);
2133 } else {
2134 vtd_set_quad(s, addr, val);
2135 }
2136 break;
2137
2138 case DMAR_IVA_REG_HI:
2139 VTD_DPRINTF(INV, "DMAR_IVA_REG_HI write addr 0x%"PRIx64
2140 ", size %d, val 0x%"PRIx64, addr, size, val);
2141 assert(size == 4);
2142 vtd_set_long(s, addr, val);
2143 break;
2144
1da12ec4
LT
2145 /* Fault Status Register, 32-bit */
2146 case DMAR_FSTS_REG:
2147 VTD_DPRINTF(FLOG, "DMAR_FSTS_REG write addr 0x%"PRIx64
2148 ", size %d, val 0x%"PRIx64, addr, size, val);
2149 assert(size == 4);
2150 vtd_set_long(s, addr, val);
2151 vtd_handle_fsts_write(s);
2152 break;
2153
2154 /* Fault Event Control Register, 32-bit */
2155 case DMAR_FECTL_REG:
2156 VTD_DPRINTF(FLOG, "DMAR_FECTL_REG write addr 0x%"PRIx64
2157 ", size %d, val 0x%"PRIx64, addr, size, val);
2158 assert(size == 4);
2159 vtd_set_long(s, addr, val);
2160 vtd_handle_fectl_write(s);
2161 break;
2162
2163 /* Fault Event Data Register, 32-bit */
2164 case DMAR_FEDATA_REG:
2165 VTD_DPRINTF(FLOG, "DMAR_FEDATA_REG write addr 0x%"PRIx64
2166 ", size %d, val 0x%"PRIx64, addr, size, val);
2167 assert(size == 4);
2168 vtd_set_long(s, addr, val);
2169 break;
2170
2171 /* Fault Event Address Register, 32-bit */
2172 case DMAR_FEADDR_REG:
2173 VTD_DPRINTF(FLOG, "DMAR_FEADDR_REG write addr 0x%"PRIx64
2174 ", size %d, val 0x%"PRIx64, addr, size, val);
2175 assert(size == 4);
2176 vtd_set_long(s, addr, val);
2177 break;
2178
2179 /* Fault Event Upper Address Register, 32-bit */
2180 case DMAR_FEUADDR_REG:
2181 VTD_DPRINTF(FLOG, "DMAR_FEUADDR_REG write addr 0x%"PRIx64
2182 ", size %d, val 0x%"PRIx64, addr, size, val);
2183 assert(size == 4);
2184 vtd_set_long(s, addr, val);
2185 break;
2186
2187 /* Protected Memory Enable Register, 32-bit */
2188 case DMAR_PMEN_REG:
2189 VTD_DPRINTF(CSR, "DMAR_PMEN_REG write addr 0x%"PRIx64
2190 ", size %d, val 0x%"PRIx64, addr, size, val);
2191 assert(size == 4);
2192 vtd_set_long(s, addr, val);
2193 break;
2194
2195 /* Root Table Address Register, 64-bit */
2196 case DMAR_RTADDR_REG:
2197 VTD_DPRINTF(CSR, "DMAR_RTADDR_REG write addr 0x%"PRIx64
2198 ", size %d, val 0x%"PRIx64, addr, size, val);
2199 if (size == 4) {
2200 vtd_set_long(s, addr, val);
2201 } else {
2202 vtd_set_quad(s, addr, val);
2203 }
2204 break;
2205
2206 case DMAR_RTADDR_REG_HI:
2207 VTD_DPRINTF(CSR, "DMAR_RTADDR_REG_HI write addr 0x%"PRIx64
2208 ", size %d, val 0x%"PRIx64, addr, size, val);
2209 assert(size == 4);
2210 vtd_set_long(s, addr, val);
2211 break;
2212
ed7b8fbc
LT
2213 /* Invalidation Queue Tail Register, 64-bit */
2214 case DMAR_IQT_REG:
2215 VTD_DPRINTF(INV, "DMAR_IQT_REG write addr 0x%"PRIx64
2216 ", size %d, val 0x%"PRIx64, addr, size, val);
2217 if (size == 4) {
2218 vtd_set_long(s, addr, val);
2219 } else {
2220 vtd_set_quad(s, addr, val);
2221 }
2222 vtd_handle_iqt_write(s);
2223 break;
2224
2225 case DMAR_IQT_REG_HI:
2226 VTD_DPRINTF(INV, "DMAR_IQT_REG_HI write addr 0x%"PRIx64
2227 ", size %d, val 0x%"PRIx64, addr, size, val);
2228 assert(size == 4);
2229 vtd_set_long(s, addr, val);
2230 /* 19:63 of IQT_REG is RsvdZ, do nothing here */
2231 break;
2232
2233 /* Invalidation Queue Address Register, 64-bit */
2234 case DMAR_IQA_REG:
2235 VTD_DPRINTF(INV, "DMAR_IQA_REG write addr 0x%"PRIx64
2236 ", size %d, val 0x%"PRIx64, addr, size, val);
2237 if (size == 4) {
2238 vtd_set_long(s, addr, val);
2239 } else {
2240 vtd_set_quad(s, addr, val);
2241 }
2242 break;
2243
2244 case DMAR_IQA_REG_HI:
2245 VTD_DPRINTF(INV, "DMAR_IQA_REG_HI write addr 0x%"PRIx64
2246 ", size %d, val 0x%"PRIx64, addr, size, val);
2247 assert(size == 4);
2248 vtd_set_long(s, addr, val);
2249 break;
2250
2251 /* Invalidation Completion Status Register, 32-bit */
2252 case DMAR_ICS_REG:
2253 VTD_DPRINTF(INV, "DMAR_ICS_REG write addr 0x%"PRIx64
2254 ", size %d, val 0x%"PRIx64, addr, size, val);
2255 assert(size == 4);
2256 vtd_set_long(s, addr, val);
2257 vtd_handle_ics_write(s);
2258 break;
2259
2260 /* Invalidation Event Control Register, 32-bit */
2261 case DMAR_IECTL_REG:
2262 VTD_DPRINTF(INV, "DMAR_IECTL_REG write addr 0x%"PRIx64
2263 ", size %d, val 0x%"PRIx64, addr, size, val);
2264 assert(size == 4);
2265 vtd_set_long(s, addr, val);
2266 vtd_handle_iectl_write(s);
2267 break;
2268
2269 /* Invalidation Event Data Register, 32-bit */
2270 case DMAR_IEDATA_REG:
2271 VTD_DPRINTF(INV, "DMAR_IEDATA_REG write addr 0x%"PRIx64
2272 ", size %d, val 0x%"PRIx64, addr, size, val);
2273 assert(size == 4);
2274 vtd_set_long(s, addr, val);
2275 break;
2276
2277 /* Invalidation Event Address Register, 32-bit */
2278 case DMAR_IEADDR_REG:
2279 VTD_DPRINTF(INV, "DMAR_IEADDR_REG write addr 0x%"PRIx64
2280 ", size %d, val 0x%"PRIx64, addr, size, val);
2281 assert(size == 4);
2282 vtd_set_long(s, addr, val);
2283 break;
2284
2285 /* Invalidation Event Upper Address Register, 32-bit */
2286 case DMAR_IEUADDR_REG:
2287 VTD_DPRINTF(INV, "DMAR_IEUADDR_REG write addr 0x%"PRIx64
2288 ", size %d, val 0x%"PRIx64, addr, size, val);
2289 assert(size == 4);
2290 vtd_set_long(s, addr, val);
2291 break;
2292
1da12ec4
LT
2293 /* Fault Recording Registers, 128-bit */
2294 case DMAR_FRCD_REG_0_0:
2295 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_0 write addr 0x%"PRIx64
2296 ", size %d, val 0x%"PRIx64, addr, size, val);
2297 if (size == 4) {
2298 vtd_set_long(s, addr, val);
2299 } else {
2300 vtd_set_quad(s, addr, val);
2301 }
2302 break;
2303
2304 case DMAR_FRCD_REG_0_1:
2305 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_1 write addr 0x%"PRIx64
2306 ", size %d, val 0x%"PRIx64, addr, size, val);
2307 assert(size == 4);
2308 vtd_set_long(s, addr, val);
2309 break;
2310
2311 case DMAR_FRCD_REG_0_2:
2312 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_2 write addr 0x%"PRIx64
2313 ", size %d, val 0x%"PRIx64, addr, size, val);
2314 if (size == 4) {
2315 vtd_set_long(s, addr, val);
2316 } else {
2317 vtd_set_quad(s, addr, val);
2318 /* May clear bit 127 (Fault), update PPF */
2319 vtd_update_fsts_ppf(s);
2320 }
2321 break;
2322
2323 case DMAR_FRCD_REG_0_3:
2324 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_3 write addr 0x%"PRIx64
2325 ", size %d, val 0x%"PRIx64, addr, size, val);
2326 assert(size == 4);
2327 vtd_set_long(s, addr, val);
2328 /* May clear bit 127 (Fault), update PPF */
2329 vtd_update_fsts_ppf(s);
2330 break;
2331
a5861439
PX
2332 case DMAR_IRTA_REG:
2333 VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64
2334 ", size %d, val 0x%"PRIx64, addr, size, val);
2335 if (size == 4) {
2336 vtd_set_long(s, addr, val);
2337 } else {
2338 vtd_set_quad(s, addr, val);
2339 }
2340 break;
2341
2342 case DMAR_IRTA_REG_HI:
2343 VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64
2344 ", size %d, val 0x%"PRIx64, addr, size, val);
2345 assert(size == 4);
2346 vtd_set_long(s, addr, val);
2347 break;
2348
1da12ec4
LT
2349 default:
2350 VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64
2351 ", size %d, val 0x%"PRIx64, addr, size, val);
2352 if (size == 4) {
2353 vtd_set_long(s, addr, val);
2354 } else {
2355 vtd_set_quad(s, addr, val);
2356 }
2357 }
2358}
2359
2360static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
bf55b7af 2361 IOMMUAccessFlags flag)
1da12ec4
LT
2362{
2363 VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
2364 IntelIOMMUState *s = vtd_as->iommu_state;
1da12ec4
LT
2365 IOMMUTLBEntry ret = {
2366 .target_as = &address_space_memory,
2367 .iova = addr,
2368 .translated_addr = 0,
2369 .addr_mask = ~(hwaddr)0,
2370 .perm = IOMMU_NONE,
2371 };
2372
2373 if (!s->dmar_enabled) {
2374 /* DMAR disabled, passthrough, use 4k-page*/
2375 ret.iova = addr & VTD_PAGE_MASK_4K;
2376 ret.translated_addr = addr & VTD_PAGE_MASK_4K;
2377 ret.addr_mask = ~VTD_PAGE_MASK_4K;
2378 ret.perm = IOMMU_RW;
2379 return ret;
2380 }
2381
7df953bd 2382 vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr,
bf55b7af 2383 flag & IOMMU_WO, &ret);
1da12ec4
LT
2384 VTD_DPRINTF(MMU,
2385 "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
6e905564 2386 " iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
d92fa2dc
LT
2387 VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn),
2388 vtd_as->devfn, addr, ret.translated_addr);
1da12ec4
LT
2389 return ret;
2390}
2391
5bf3d319
PX
2392static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu,
2393 IOMMUNotifierFlag old,
2394 IOMMUNotifierFlag new)
3cb3b154
AW
2395{
2396 VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
dd4d607e
PX
2397 IntelIOMMUState *s = vtd_as->iommu_state;
2398 IntelIOMMUNotifierNode *node = NULL;
2399 IntelIOMMUNotifierNode *next_node = NULL;
3cb3b154 2400
dd4d607e
PX
2401 if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
2402 error_report("We need to set cache_mode=1 for intel-iommu to enable "
2403 "device assignment with IOMMU protection.");
a3276f78
PX
2404 exit(1);
2405 }
dd4d607e
PX
2406
2407 if (old == IOMMU_NOTIFIER_NONE) {
2408 node = g_malloc0(sizeof(*node));
2409 node->vtd_as = vtd_as;
2410 QLIST_INSERT_HEAD(&s->notifiers_list, node, next);
2411 return;
2412 }
2413
2414 /* update notifier node with new flags */
2415 QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) {
2416 if (node->vtd_as == vtd_as) {
2417 if (new == IOMMU_NOTIFIER_NONE) {
2418 QLIST_REMOVE(node, next);
2419 g_free(node);
2420 }
2421 return;
2422 }
2423 }
3cb3b154
AW
2424}
2425
1da12ec4
LT
2426static const VMStateDescription vtd_vmstate = {
2427 .name = "iommu-intel",
8cdcf3c1
PX
2428 .version_id = 1,
2429 .minimum_version_id = 1,
2430 .priority = MIG_PRI_IOMMU,
2431 .fields = (VMStateField[]) {
2432 VMSTATE_UINT64(root, IntelIOMMUState),
2433 VMSTATE_UINT64(intr_root, IntelIOMMUState),
2434 VMSTATE_UINT64(iq, IntelIOMMUState),
2435 VMSTATE_UINT32(intr_size, IntelIOMMUState),
2436 VMSTATE_UINT16(iq_head, IntelIOMMUState),
2437 VMSTATE_UINT16(iq_tail, IntelIOMMUState),
2438 VMSTATE_UINT16(iq_size, IntelIOMMUState),
2439 VMSTATE_UINT16(next_frcd_reg, IntelIOMMUState),
2440 VMSTATE_UINT8_ARRAY(csr, IntelIOMMUState, DMAR_REG_SIZE),
2441 VMSTATE_UINT8(iq_last_desc_type, IntelIOMMUState),
2442 VMSTATE_BOOL(root_extended, IntelIOMMUState),
2443 VMSTATE_BOOL(dmar_enabled, IntelIOMMUState),
2444 VMSTATE_BOOL(qi_enabled, IntelIOMMUState),
2445 VMSTATE_BOOL(intr_enabled, IntelIOMMUState),
2446 VMSTATE_BOOL(intr_eime, IntelIOMMUState),
2447 VMSTATE_END_OF_LIST()
2448 }
1da12ec4
LT
2449};
2450
2451static const MemoryRegionOps vtd_mem_ops = {
2452 .read = vtd_mem_read,
2453 .write = vtd_mem_write,
2454 .endianness = DEVICE_LITTLE_ENDIAN,
2455 .impl = {
2456 .min_access_size = 4,
2457 .max_access_size = 8,
2458 },
2459 .valid = {
2460 .min_access_size = 4,
2461 .max_access_size = 8,
2462 },
2463};
2464
2465static Property vtd_properties[] = {
2466 DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0),
e6b6af05
RK
2467 DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
2468 ON_OFF_AUTO_AUTO),
fb506e70 2469 DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
3b40f0e5 2470 DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
1da12ec4
LT
2471 DEFINE_PROP_END_OF_LIST(),
2472};
2473
651e4cef
PX
2474/* Read IRTE entry with specific index */
2475static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
bc38ee10 2476 VTD_IR_TableEntry *entry, uint16_t sid)
651e4cef 2477{
ede9c94a
PX
2478 static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \
2479 {0xffff, 0xfffb, 0xfff9, 0xfff8};
651e4cef 2480 dma_addr_t addr = 0x00;
ede9c94a
PX
2481 uint16_t mask, source_id;
2482 uint8_t bus, bus_max, bus_min;
651e4cef
PX
2483
2484 addr = iommu->intr_root + index * sizeof(*entry);
2485 if (dma_memory_read(&address_space_memory, addr, entry,
2486 sizeof(*entry))) {
2487 VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64
2488 " + %"PRIu16, iommu->intr_root, index);
2489 return -VTD_FR_IR_ROOT_INVAL;
2490 }
2491
bc38ee10 2492 if (!entry->irte.present) {
651e4cef
PX
2493 VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE"
2494 " entry index %u value 0x%"PRIx64 " 0x%"PRIx64,
2495 index, le64_to_cpu(entry->data[1]),
2496 le64_to_cpu(entry->data[0]));
2497 return -VTD_FR_IR_ENTRY_P;
2498 }
2499
bc38ee10
MT
2500 if (entry->irte.__reserved_0 || entry->irte.__reserved_1 ||
2501 entry->irte.__reserved_2) {
651e4cef
PX
2502 VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16
2503 " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64,
2504 index, le64_to_cpu(entry->data[1]),
2505 le64_to_cpu(entry->data[0]));
2506 return -VTD_FR_IR_IRTE_RSVD;
2507 }
2508
ede9c94a
PX
2509 if (sid != X86_IOMMU_SID_INVALID) {
2510 /* Validate IRTE SID */
bc38ee10
MT
2511 source_id = le32_to_cpu(entry->irte.source_id);
2512 switch (entry->irte.sid_vtype) {
ede9c94a
PX
2513 case VTD_SVT_NONE:
2514 VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index);
2515 break;
2516
2517 case VTD_SVT_ALL:
bc38ee10 2518 mask = vtd_svt_mask[entry->irte.sid_q];
ede9c94a
PX
2519 if ((source_id & mask) != (sid & mask)) {
2520 VTD_DPRINTF(GENERAL, "SID validation for IRTE index "
2521 "%d failed (reqid 0x%04x sid 0x%04x)", index,
2522 sid, source_id);
2523 return -VTD_FR_IR_SID_ERR;
2524 }
2525 break;
2526
2527 case VTD_SVT_BUS:
2528 bus_max = source_id >> 8;
2529 bus_min = source_id & 0xff;
2530 bus = sid >> 8;
2531 if (bus > bus_max || bus < bus_min) {
2532 VTD_DPRINTF(GENERAL, "SID validation for IRTE index %d "
2533 "failed (bus %d outside %d-%d)", index, bus,
2534 bus_min, bus_max);
2535 return -VTD_FR_IR_SID_ERR;
2536 }
2537 break;
2538
2539 default:
2540 VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index "
bc38ee10 2541 "%d", entry->irte.sid_vtype, index);
ede9c94a
PX
2542 /* Take this as verification failure. */
2543 return -VTD_FR_IR_SID_ERR;
2544 break;
2545 }
2546 }
651e4cef
PX
2547
2548 return 0;
2549}
2550
2551/* Fetch IRQ information of specific IR index */
ede9c94a
PX
2552static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
2553 VTDIrq *irq, uint16_t sid)
651e4cef 2554{
bc38ee10 2555 VTD_IR_TableEntry irte = {};
651e4cef
PX
2556 int ret = 0;
2557
ede9c94a 2558 ret = vtd_irte_get(iommu, index, &irte, sid);
651e4cef
PX
2559 if (ret) {
2560 return ret;
2561 }
2562
bc38ee10
MT
2563 irq->trigger_mode = irte.irte.trigger_mode;
2564 irq->vector = irte.irte.vector;
2565 irq->delivery_mode = irte.irte.delivery_mode;
2566 irq->dest = le32_to_cpu(irte.irte.dest_id);
28589311 2567 if (!iommu->intr_eime) {
651e4cef
PX
2568#define VTD_IR_APIC_DEST_MASK (0xff00ULL)
2569#define VTD_IR_APIC_DEST_SHIFT (8)
28589311
JK
2570 irq->dest = (irq->dest & VTD_IR_APIC_DEST_MASK) >>
2571 VTD_IR_APIC_DEST_SHIFT;
2572 }
bc38ee10
MT
2573 irq->dest_mode = irte.irte.dest_mode;
2574 irq->redir_hint = irte.irte.redir_hint;
651e4cef
PX
2575
2576 VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u,"
2577 "deliver:%u,dest:%u,dest_mode:%u", index,
2578 irq->trigger_mode, irq->vector, irq->delivery_mode,
2579 irq->dest, irq->dest_mode);
2580
2581 return 0;
2582}
2583
2584/* Generate one MSI message from VTDIrq info */
2585static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out)
2586{
2587 VTD_MSIMessage msg = {};
2588
2589 /* Generate address bits */
2590 msg.dest_mode = irq->dest_mode;
2591 msg.redir_hint = irq->redir_hint;
2592 msg.dest = irq->dest;
32946019 2593 msg.__addr_hi = irq->dest & 0xffffff00;
651e4cef
PX
2594 msg.__addr_head = cpu_to_le32(0xfee);
2595 /* Keep this from original MSI address bits */
2596 msg.__not_used = irq->msi_addr_last_bits;
2597
2598 /* Generate data bits */
2599 msg.vector = irq->vector;
2600 msg.delivery_mode = irq->delivery_mode;
2601 msg.level = 1;
2602 msg.trigger_mode = irq->trigger_mode;
2603
2604 msg_out->address = msg.msi_addr;
2605 msg_out->data = msg.msi_data;
2606}
2607
2608/* Interrupt remapping for MSI/MSI-X entry */
2609static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
2610 MSIMessage *origin,
ede9c94a
PX
2611 MSIMessage *translated,
2612 uint16_t sid)
651e4cef
PX
2613{
2614 int ret = 0;
2615 VTD_IR_MSIAddress addr;
2616 uint16_t index;
09cd058a 2617 VTDIrq irq = {};
651e4cef
PX
2618
2619 assert(origin && translated);
2620
2621 if (!iommu || !iommu->intr_enabled) {
2622 goto do_not_translate;
2623 }
2624
2625 if (origin->address & VTD_MSI_ADDR_HI_MASK) {
2626 VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero"
2627 " during interrupt remapping: 0x%"PRIx32,
2628 (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> \
2629 VTD_MSI_ADDR_HI_SHIFT));
2630 return -VTD_FR_IR_REQ_RSVD;
2631 }
2632
2633 addr.data = origin->address & VTD_MSI_ADDR_LO_MASK;
1a43713b 2634 if (addr.addr.__head != 0xfee) {
651e4cef
PX
2635 VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: "
2636 "0x%"PRIx32, addr.data);
2637 return -VTD_FR_IR_REQ_RSVD;
2638 }
2639
2640 /* This is compatible mode. */
bc38ee10 2641 if (addr.addr.int_mode != VTD_IR_INT_FORMAT_REMAP) {
651e4cef
PX
2642 goto do_not_translate;
2643 }
2644
bc38ee10 2645 index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l);
651e4cef
PX
2646
2647#define VTD_IR_MSI_DATA_SUBHANDLE (0x0000ffff)
2648#define VTD_IR_MSI_DATA_RESERVED (0xffff0000)
2649
bc38ee10 2650 if (addr.addr.sub_valid) {
651e4cef
PX
2651 /* See VT-d spec 5.1.2.2 and 5.1.3 on subhandle */
2652 index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE;
2653 }
2654
ede9c94a 2655 ret = vtd_remap_irq_get(iommu, index, &irq, sid);
651e4cef
PX
2656 if (ret) {
2657 return ret;
2658 }
2659
bc38ee10 2660 if (addr.addr.sub_valid) {
651e4cef
PX
2661 VTD_DPRINTF(IR, "received MSI interrupt");
2662 if (origin->data & VTD_IR_MSI_DATA_RESERVED) {
2663 VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for "
2664 "interrupt remappable entry: 0x%"PRIx32,
2665 origin->data);
2666 return -VTD_FR_IR_REQ_RSVD;
2667 }
2668 } else {
2669 uint8_t vector = origin->data & 0xff;
dea651a9
FW
2670 uint8_t trigger_mode = (origin->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
2671
651e4cef
PX
2672 VTD_DPRINTF(IR, "received IOAPIC interrupt");
2673 /* IOAPIC entry vector should be aligned with IRTE vector
2674 * (see vt-d spec 5.1.5.1). */
2675 if (vector != irq.vector) {
2676 VTD_DPRINTF(GENERAL, "IOAPIC vector inconsistent: "
2677 "entry: %d, IRTE: %d, index: %d",
2678 vector, irq.vector, index);
2679 }
dea651a9
FW
2680
2681 /* The Trigger Mode field must match the Trigger Mode in the IRTE.
2682 * (see vt-d spec 5.1.5.1). */
2683 if (trigger_mode != irq.trigger_mode) {
2684 VTD_DPRINTF(GENERAL, "IOAPIC trigger mode inconsistent: "
2685 "entry: %u, IRTE: %u, index: %d",
2686 trigger_mode, irq.trigger_mode, index);
2687 }
2688
651e4cef
PX
2689 }
2690
2691 /*
2692 * We'd better keep the last two bits, assuming that guest OS
2693 * might modify it. Keep it does not hurt after all.
2694 */
bc38ee10 2695 irq.msi_addr_last_bits = addr.addr.__not_care;
651e4cef
PX
2696
2697 /* Translate VTDIrq to MSI message */
2698 vtd_generate_msi_message(&irq, translated);
2699
2700 VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> "
2701 "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data,
2702 translated->address, translated->data);
2703 return 0;
2704
2705do_not_translate:
2706 memcpy(translated, origin, sizeof(*origin));
2707 return 0;
2708}
2709
8b5ed7df
PX
2710static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src,
2711 MSIMessage *dst, uint16_t sid)
2712{
ede9c94a
PX
2713 return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu),
2714 src, dst, sid);
8b5ed7df
PX
2715}
2716
651e4cef
PX
2717static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr,
2718 uint64_t *data, unsigned size,
2719 MemTxAttrs attrs)
2720{
2721 return MEMTX_OK;
2722}
2723
2724static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
2725 uint64_t value, unsigned size,
2726 MemTxAttrs attrs)
2727{
2728 int ret = 0;
09cd058a 2729 MSIMessage from = {}, to = {};
ede9c94a 2730 uint16_t sid = X86_IOMMU_SID_INVALID;
651e4cef
PX
2731
2732 from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST;
2733 from.data = (uint32_t) value;
2734
ede9c94a
PX
2735 if (!attrs.unspecified) {
2736 /* We have explicit Source ID */
2737 sid = attrs.requester_id;
2738 }
2739
2740 ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid);
651e4cef
PX
2741 if (ret) {
2742 /* TODO: report error */
2743 VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64
2744 " data 0x%"PRIx32, from.address, from.data);
2745 /* Drop this interrupt */
2746 return MEMTX_ERROR;
2747 }
2748
2749 VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32
2750 " for device sid 0x%04x",
2751 to.address, to.data, sid);
2752
32946019 2753 apic_get_class()->send_msi(&to);
651e4cef
PX
2754
2755 return MEMTX_OK;
2756}
2757
2758static const MemoryRegionOps vtd_mem_ir_ops = {
2759 .read_with_attrs = vtd_mem_ir_read,
2760 .write_with_attrs = vtd_mem_ir_write,
2761 .endianness = DEVICE_LITTLE_ENDIAN,
2762 .impl = {
2763 .min_access_size = 4,
2764 .max_access_size = 4,
2765 },
2766 .valid = {
2767 .min_access_size = 4,
2768 .max_access_size = 4,
2769 },
2770};
7df953bd
KO
2771
2772VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
2773{
2774 uintptr_t key = (uintptr_t)bus;
2775 VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
2776 VTDAddressSpace *vtd_dev_as;
e0a3c8cc 2777 char name[128];
7df953bd
KO
2778
2779 if (!vtd_bus) {
2d3fc581
JW
2780 uintptr_t *new_key = g_malloc(sizeof(*new_key));
2781 *new_key = (uintptr_t)bus;
7df953bd 2782 /* No corresponding free() */
04af0e18
PX
2783 vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
2784 X86_IOMMU_PCI_DEVFN_MAX);
7df953bd 2785 vtd_bus->bus = bus;
2d3fc581 2786 g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
7df953bd
KO
2787 }
2788
2789 vtd_dev_as = vtd_bus->dev_as[devfn];
2790
2791 if (!vtd_dev_as) {
e0a3c8cc 2792 snprintf(name, sizeof(name), "intel_iommu_devfn_%d", devfn);
7df953bd
KO
2793 vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
2794
2795 vtd_dev_as->bus = bus;
2796 vtd_dev_as->devfn = (uint8_t)devfn;
2797 vtd_dev_as->iommu_state = s;
2798 vtd_dev_as->context_cache_entry.context_cache_gen = 0;
558e0024
PX
2799
2800 /*
2801 * Memory region relationships looks like (Address range shows
2802 * only lower 32 bits to make it short in length...):
2803 *
2804 * |-----------------+-------------------+----------|
2805 * | Name | Address range | Priority |
2806 * |-----------------+-------------------+----------+
2807 * | vtd_root | 00000000-ffffffff | 0 |
2808 * | intel_iommu | 00000000-ffffffff | 1 |
2809 * | vtd_sys_alias | 00000000-ffffffff | 1 |
2810 * | intel_iommu_ir | fee00000-feefffff | 64 |
2811 * |-----------------+-------------------+----------|
2812 *
2813 * We enable/disable DMAR by switching enablement for
2814 * vtd_sys_alias and intel_iommu regions. IR region is always
2815 * enabled.
2816 */
7df953bd 2817 memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s),
558e0024
PX
2818 &s->iommu_ops, "intel_iommu_dmar",
2819 UINT64_MAX);
2820 memory_region_init_alias(&vtd_dev_as->sys_alias, OBJECT(s),
2821 "vtd_sys_alias", get_system_memory(),
2822 0, memory_region_size(get_system_memory()));
651e4cef
PX
2823 memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s),
2824 &vtd_mem_ir_ops, s, "intel_iommu_ir",
2825 VTD_INTERRUPT_ADDR_SIZE);
558e0024
PX
2826 memory_region_init(&vtd_dev_as->root, OBJECT(s),
2827 "vtd_root", UINT64_MAX);
2828 memory_region_add_subregion_overlap(&vtd_dev_as->root,
2829 VTD_INTERRUPT_ADDR_FIRST,
2830 &vtd_dev_as->iommu_ir, 64);
2831 address_space_init(&vtd_dev_as->as, &vtd_dev_as->root, name);
2832 memory_region_add_subregion_overlap(&vtd_dev_as->root, 0,
2833 &vtd_dev_as->sys_alias, 1);
2834 memory_region_add_subregion_overlap(&vtd_dev_as->root, 0,
2835 &vtd_dev_as->iommu, 1);
2836 vtd_switch_address_space(vtd_dev_as);
7df953bd
KO
2837 }
2838 return vtd_dev_as;
2839}
2840
dd4d607e
PX
2841/* Unmap the whole range in the notifier's scope. */
2842static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
2843{
2844 IOMMUTLBEntry entry;
2845 hwaddr size;
2846 hwaddr start = n->start;
2847 hwaddr end = n->end;
2848
2849 /*
2850 * Note: all the codes in this function has a assumption that IOVA
2851 * bits are no more than VTD_MGAW bits (which is restricted by
2852 * VT-d spec), otherwise we need to consider overflow of 64 bits.
2853 */
2854
2855 if (end > VTD_ADDRESS_SIZE) {
2856 /*
2857 * Don't need to unmap regions that is bigger than the whole
2858 * VT-d supported address space size
2859 */
2860 end = VTD_ADDRESS_SIZE;
2861 }
2862
2863 assert(start <= end);
2864 size = end - start;
2865
2866 if (ctpop64(size) != 1) {
2867 /*
2868 * This size cannot format a correct mask. Let's enlarge it to
2869 * suite the minimum available mask.
2870 */
2871 int n = 64 - clz64(size);
2872 if (n > VTD_MGAW) {
2873 /* should not happen, but in case it happens, limit it */
2874 n = VTD_MGAW;
2875 }
2876 size = 1ULL << n;
2877 }
2878
2879 entry.target_as = &address_space_memory;
2880 /* Adjust iova for the size */
2881 entry.iova = n->start & ~(size - 1);
2882 /* This field is meaningless for unmap */
2883 entry.translated_addr = 0;
2884 entry.perm = IOMMU_NONE;
2885 entry.addr_mask = size - 1;
2886
2887 trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
2888 VTD_PCI_SLOT(as->devfn),
2889 VTD_PCI_FUNC(as->devfn),
2890 entry.iova, size);
2891
2892 memory_region_notify_one(n, &entry);
2893}
2894
2895static void vtd_address_space_unmap_all(IntelIOMMUState *s)
2896{
2897 IntelIOMMUNotifierNode *node;
2898 VTDAddressSpace *vtd_as;
2899 IOMMUNotifier *n;
2900
2901 QLIST_FOREACH(node, &s->notifiers_list, next) {
2902 vtd_as = node->vtd_as;
2903 IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
2904 vtd_address_space_unmap(vtd_as, n);
2905 }
2906 }
2907}
2908
f06a696d
PX
2909static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private)
2910{
2911 memory_region_notify_one((IOMMUNotifier *)private, entry);
2912 return 0;
2913}
2914
2915static void vtd_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n)
2916{
2917 VTDAddressSpace *vtd_as = container_of(mr, VTDAddressSpace, iommu);
2918 IntelIOMMUState *s = vtd_as->iommu_state;
2919 uint8_t bus_n = pci_bus_num(vtd_as->bus);
2920 VTDContextEntry ce;
2921
dd4d607e
PX
2922 /*
2923 * The replay can be triggered by either a invalidation or a newly
2924 * created entry. No matter what, we release existing mappings
2925 * (it means flushing caches for UNMAP-only registers).
2926 */
2927 vtd_address_space_unmap(vtd_as, n);
2928
f06a696d 2929 if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
f06a696d
PX
2930 trace_vtd_replay_ce_valid(bus_n, PCI_SLOT(vtd_as->devfn),
2931 PCI_FUNC(vtd_as->devfn),
2932 VTD_CONTEXT_ENTRY_DID(ce.hi),
2933 ce.hi, ce.lo);
dd4d607e 2934 vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false);
f06a696d
PX
2935 } else {
2936 trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
2937 PCI_FUNC(vtd_as->devfn));
2938 }
2939
2940 return;
2941}
2942
1da12ec4
LT
2943/* Do the initialization. It will also be called when reset, so pay
2944 * attention when adding new initialization stuff.
2945 */
2946static void vtd_init(IntelIOMMUState *s)
2947{
d54bd7f8
PX
2948 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
2949
1da12ec4
LT
2950 memset(s->csr, 0, DMAR_REG_SIZE);
2951 memset(s->wmask, 0, DMAR_REG_SIZE);
2952 memset(s->w1cmask, 0, DMAR_REG_SIZE);
2953 memset(s->womask, 0, DMAR_REG_SIZE);
2954
2955 s->iommu_ops.translate = vtd_iommu_translate;
5bf3d319 2956 s->iommu_ops.notify_flag_changed = vtd_iommu_notify_flag_changed;
f06a696d 2957 s->iommu_ops.replay = vtd_iommu_replay;
1da12ec4
LT
2958 s->root = 0;
2959 s->root_extended = false;
2960 s->dmar_enabled = false;
2961 s->iq_head = 0;
2962 s->iq_tail = 0;
2963 s->iq = 0;
2964 s->iq_size = 0;
2965 s->qi_enabled = false;
2966 s->iq_last_desc_type = VTD_INV_DESC_NONE;
2967 s->next_frcd_reg = 0;
2968 s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
d66b969b 2969 VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
ed7b8fbc 2970 s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
1da12ec4 2971
d54bd7f8 2972 if (x86_iommu->intr_supported) {
e6b6af05
RK
2973 s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
2974 if (s->intr_eim == ON_OFF_AUTO_ON) {
2975 s->ecap |= VTD_ECAP_EIM;
2976 }
2977 assert(s->intr_eim != ON_OFF_AUTO_AUTO);
d54bd7f8
PX
2978 }
2979
554f5e16
JW
2980 if (x86_iommu->dt_supported) {
2981 s->ecap |= VTD_ECAP_DT;
2982 }
2983
dbaabb25
PX
2984 if (x86_iommu->pt_supported) {
2985 s->ecap |= VTD_ECAP_PT;
2986 }
2987
3b40f0e5
ABD
2988 if (s->caching_mode) {
2989 s->cap |= VTD_CAP_CM;
2990 }
2991
d92fa2dc 2992 vtd_reset_context_cache(s);
b5a280c0 2993 vtd_reset_iotlb(s);
d92fa2dc 2994
1da12ec4
LT
2995 /* Define registers with default values and bit semantics */
2996 vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0);
2997 vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0);
2998 vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0);
2999 vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0);
3000 vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL);
3001 vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0);
3002 vtd_define_quad(s, DMAR_RTADDR_REG, 0, 0xfffffffffffff000ULL, 0);
3003 vtd_define_quad(s, DMAR_CCMD_REG, 0, 0xe0000003ffffffffULL, 0);
3004 vtd_define_quad_wo(s, DMAR_CCMD_REG, 0x3ffff0000ULL);
3005
3006 /* Advanced Fault Logging not supported */
3007 vtd_define_long(s, DMAR_FSTS_REG, 0, 0, 0x11UL);
3008 vtd_define_long(s, DMAR_FECTL_REG, 0x80000000UL, 0x80000000UL, 0);
3009 vtd_define_long(s, DMAR_FEDATA_REG, 0, 0x0000ffffUL, 0);
3010 vtd_define_long(s, DMAR_FEADDR_REG, 0, 0xfffffffcUL, 0);
3011
3012 /* Treated as RsvdZ when EIM in ECAP_REG is not supported
3013 * vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0xffffffffUL, 0);
3014 */
3015 vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0, 0);
3016
3017 /* Treated as RO for implementations that PLMR and PHMR fields reported
3018 * as Clear in the CAP_REG.
3019 * vtd_define_long(s, DMAR_PMEN_REG, 0, 0x80000000UL, 0);
3020 */
3021 vtd_define_long(s, DMAR_PMEN_REG, 0, 0, 0);
3022
ed7b8fbc
LT
3023 vtd_define_quad(s, DMAR_IQH_REG, 0, 0, 0);
3024 vtd_define_quad(s, DMAR_IQT_REG, 0, 0x7fff0ULL, 0);
3025 vtd_define_quad(s, DMAR_IQA_REG, 0, 0xfffffffffffff007ULL, 0);
3026 vtd_define_long(s, DMAR_ICS_REG, 0, 0, 0x1UL);
3027 vtd_define_long(s, DMAR_IECTL_REG, 0x80000000UL, 0x80000000UL, 0);
3028 vtd_define_long(s, DMAR_IEDATA_REG, 0, 0xffffffffUL, 0);
3029 vtd_define_long(s, DMAR_IEADDR_REG, 0, 0xfffffffcUL, 0);
3030 /* Treadted as RsvdZ when EIM in ECAP_REG is not supported */
3031 vtd_define_long(s, DMAR_IEUADDR_REG, 0, 0, 0);
3032
1da12ec4
LT
3033 /* IOTLB registers */
3034 vtd_define_quad(s, DMAR_IOTLB_REG, 0, 0Xb003ffff00000000ULL, 0);
3035 vtd_define_quad(s, DMAR_IVA_REG, 0, 0xfffffffffffff07fULL, 0);
3036 vtd_define_quad_wo(s, DMAR_IVA_REG, 0xfffffffffffff07fULL);
3037
3038 /* Fault Recording Registers, 128-bit */
3039 vtd_define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0);
3040 vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL);
a5861439
PX
3041
3042 /*
28589311 3043 * Interrupt remapping registers.
a5861439 3044 */
28589311 3045 vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff80fULL, 0);
1da12ec4
LT
3046}
3047
3048/* Should not reset address_spaces when reset because devices will still use
3049 * the address space they got at first (won't ask the bus again).
3050 */
3051static void vtd_reset(DeviceState *dev)
3052{
3053 IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
3054
3055 VTD_DPRINTF(GENERAL, "");
3056 vtd_init(s);
dd4d607e
PX
3057
3058 /*
3059 * When device reset, throw away all mappings and external caches
3060 */
3061 vtd_address_space_unmap_all(s);
1da12ec4
LT
3062}
3063
621d983a
MA
3064static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
3065{
3066 IntelIOMMUState *s = opaque;
3067 VTDAddressSpace *vtd_as;
3068
8e7a0a16 3069 assert(0 <= devfn && devfn < X86_IOMMU_PCI_DEVFN_MAX);
621d983a
MA
3070
3071 vtd_as = vtd_find_add_as(s, bus, devfn);
3072 return &vtd_as->as;
3073}
3074
e6b6af05 3075static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
6333e93c 3076{
e6b6af05
RK
3077 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
3078
6333e93c
RK
3079 /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */
3080 if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
3081 !kvm_irqchip_is_split()) {
3082 error_setg(errp, "Intel Interrupt Remapping cannot work with "
3083 "kernel-irqchip=on, please use 'split|off'.");
3084 return false;
3085 }
e6b6af05
RK
3086 if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu->intr_supported) {
3087 error_setg(errp, "eim=on cannot be selected without intremap=on");
3088 return false;
3089 }
3090
3091 if (s->intr_eim == ON_OFF_AUTO_AUTO) {
fb506e70
RK
3092 s->intr_eim = (kvm_irqchip_in_kernel() || s->buggy_eim)
3093 && x86_iommu->intr_supported ?
e6b6af05
RK
3094 ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
3095 }
fb506e70
RK
3096 if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
3097 if (!kvm_irqchip_in_kernel()) {
3098 error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
3099 return false;
3100 }
3101 if (!kvm_enable_x2apic()) {
3102 error_setg(errp, "eim=on requires support on the KVM side"
3103 "(X2APIC_API, first shipped in v4.7)");
3104 return false;
3105 }
3106 }
e6b6af05 3107
6333e93c
RK
3108 return true;
3109}
3110
1da12ec4
LT
3111static void vtd_realize(DeviceState *dev, Error **errp)
3112{
ef0e8fc7
EH
3113 MachineState *ms = MACHINE(qdev_get_machine());
3114 MachineClass *mc = MACHINE_GET_CLASS(ms);
3115 PCMachineState *pcms =
3116 PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE));
3117 PCIBus *bus;
1da12ec4 3118 IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
4684a204 3119 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
1da12ec4 3120
ef0e8fc7
EH
3121 if (!pcms) {
3122 error_setg(errp, "Machine-type '%s' not supported by intel-iommu",
3123 mc->name);
3124 return;
3125 }
3126
3127 bus = pcms->bus;
1da12ec4 3128 VTD_DPRINTF(GENERAL, "");
fb9f5926 3129 x86_iommu->type = TYPE_INTEL;
6333e93c 3130
e6b6af05 3131 if (!vtd_decide_config(s, errp)) {
6333e93c
RK
3132 return;
3133 }
3134
dd4d607e 3135 QLIST_INIT(&s->notifiers_list);
7df953bd 3136 memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
1da12ec4
LT
3137 memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
3138 "intel_iommu", DMAR_REG_SIZE);
3139 sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem);
b5a280c0
LT
3140 /* No corresponding destroy */
3141 s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
3142 g_free, g_free);
7df953bd
KO
3143 s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
3144 g_free, g_free);
1da12ec4 3145 vtd_init(s);
621d983a
MA
3146 sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
3147 pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
cb135f59
PX
3148 /* Pseudo address space under root PCI bus. */
3149 pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
1da12ec4
LT
3150}
3151
3152static void vtd_class_init(ObjectClass *klass, void *data)
3153{
3154 DeviceClass *dc = DEVICE_CLASS(klass);
1c7955c4 3155 X86IOMMUClass *x86_class = X86_IOMMU_CLASS(klass);
1da12ec4
LT
3156
3157 dc->reset = vtd_reset;
1da12ec4
LT
3158 dc->vmsd = &vtd_vmstate;
3159 dc->props = vtd_properties;
621d983a 3160 dc->hotpluggable = false;
1c7955c4 3161 x86_class->realize = vtd_realize;
8b5ed7df 3162 x86_class->int_remap = vtd_int_remap;
8ab5700c 3163 /* Supported by the pc-q35-* machine types */
e4f4fb1e 3164 dc->user_creatable = true;
1da12ec4
LT
3165}
3166
3167static const TypeInfo vtd_info = {
3168 .name = TYPE_INTEL_IOMMU_DEVICE,
1c7955c4 3169 .parent = TYPE_X86_IOMMU_DEVICE,
1da12ec4
LT
3170 .instance_size = sizeof(IntelIOMMUState),
3171 .class_init = vtd_class_init,
3172};
3173
3174static void vtd_register_types(void)
3175{
3176 VTD_DPRINTF(GENERAL, "");
3177 type_register_static(&vtd_info);
3178}
3179
3180type_init(vtd_register_types)