]> git.proxmox.com Git - mirror_qemu.git/blob - hw/i386/intel_iommu.c
327a46cd19175ab13c0fc0abe776c45da2acbce8
[mirror_qemu.git] / hw / i386 / intel_iommu.c
1 /*
2 * QEMU emulation of an Intel IOMMU (VT-d)
3 * (DMA Remapping device)
4 *
5 * Copyright (C) 2013 Knut Omang, Oracle <knut.omang@oracle.com>
6 * Copyright (C) 2014 Le Tan, <tamlokveer@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include "qemu/osdep.h"
23 #include "qemu/error-report.h"
24 #include "qapi/error.h"
25 #include "hw/sysbus.h"
26 #include "exec/address-spaces.h"
27 #include "intel_iommu_internal.h"
28 #include "hw/pci/pci.h"
29 #include "hw/pci/pci_bus.h"
30 #include "hw/i386/pc.h"
31 #include "hw/i386/apic-msidef.h"
32 #include "hw/boards.h"
33 #include "hw/i386/x86-iommu.h"
34 #include "hw/pci-host/q35.h"
35 #include "sysemu/kvm.h"
36 #include "hw/i386/apic_internal.h"
37 #include "kvm_i386.h"
38 #include "trace.h"
39
40 /*#define DEBUG_INTEL_IOMMU*/
41 #ifdef DEBUG_INTEL_IOMMU
42 enum {
43 DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG,
44 DEBUG_CACHE, DEBUG_IR,
45 };
46 #define VTD_DBGBIT(x) (1 << DEBUG_##x)
47 static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
48
49 #define VTD_DPRINTF(what, fmt, ...) do { \
50 if (vtd_dbgflags & VTD_DBGBIT(what)) { \
51 fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \
52 ## __VA_ARGS__); } \
53 } while (0)
54 #else
55 #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
56 #endif
57
58 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
59 uint64_t wmask, uint64_t w1cmask)
60 {
61 stq_le_p(&s->csr[addr], val);
62 stq_le_p(&s->wmask[addr], wmask);
63 stq_le_p(&s->w1cmask[addr], w1cmask);
64 }
65
66 static void vtd_define_quad_wo(IntelIOMMUState *s, hwaddr addr, uint64_t mask)
67 {
68 stq_le_p(&s->womask[addr], mask);
69 }
70
71 static void vtd_define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val,
72 uint32_t wmask, uint32_t w1cmask)
73 {
74 stl_le_p(&s->csr[addr], val);
75 stl_le_p(&s->wmask[addr], wmask);
76 stl_le_p(&s->w1cmask[addr], w1cmask);
77 }
78
79 static void vtd_define_long_wo(IntelIOMMUState *s, hwaddr addr, uint32_t mask)
80 {
81 stl_le_p(&s->womask[addr], mask);
82 }
83
84 /* "External" get/set operations */
85 static void vtd_set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val)
86 {
87 uint64_t oldval = ldq_le_p(&s->csr[addr]);
88 uint64_t wmask = ldq_le_p(&s->wmask[addr]);
89 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
90 stq_le_p(&s->csr[addr],
91 ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
92 }
93
94 static void vtd_set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val)
95 {
96 uint32_t oldval = ldl_le_p(&s->csr[addr]);
97 uint32_t wmask = ldl_le_p(&s->wmask[addr]);
98 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
99 stl_le_p(&s->csr[addr],
100 ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
101 }
102
103 static uint64_t vtd_get_quad(IntelIOMMUState *s, hwaddr addr)
104 {
105 uint64_t val = ldq_le_p(&s->csr[addr]);
106 uint64_t womask = ldq_le_p(&s->womask[addr]);
107 return val & ~womask;
108 }
109
110 static uint32_t vtd_get_long(IntelIOMMUState *s, hwaddr addr)
111 {
112 uint32_t val = ldl_le_p(&s->csr[addr]);
113 uint32_t womask = ldl_le_p(&s->womask[addr]);
114 return val & ~womask;
115 }
116
117 /* "Internal" get/set operations */
118 static uint64_t vtd_get_quad_raw(IntelIOMMUState *s, hwaddr addr)
119 {
120 return ldq_le_p(&s->csr[addr]);
121 }
122
123 static uint32_t vtd_get_long_raw(IntelIOMMUState *s, hwaddr addr)
124 {
125 return ldl_le_p(&s->csr[addr]);
126 }
127
128 static void vtd_set_quad_raw(IntelIOMMUState *s, hwaddr addr, uint64_t val)
129 {
130 stq_le_p(&s->csr[addr], val);
131 }
132
133 static uint32_t vtd_set_clear_mask_long(IntelIOMMUState *s, hwaddr addr,
134 uint32_t clear, uint32_t mask)
135 {
136 uint32_t new_val = (ldl_le_p(&s->csr[addr]) & ~clear) | mask;
137 stl_le_p(&s->csr[addr], new_val);
138 return new_val;
139 }
140
141 static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
142 uint64_t clear, uint64_t mask)
143 {
144 uint64_t new_val = (ldq_le_p(&s->csr[addr]) & ~clear) | mask;
145 stq_le_p(&s->csr[addr], new_val);
146 return new_val;
147 }
148
149 /* GHashTable functions */
150 static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
151 {
152 return *((const uint64_t *)v1) == *((const uint64_t *)v2);
153 }
154
155 static guint vtd_uint64_hash(gconstpointer v)
156 {
157 return (guint)*(const uint64_t *)v;
158 }
159
160 static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
161 gpointer user_data)
162 {
163 VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
164 uint16_t domain_id = *(uint16_t *)user_data;
165 return entry->domain_id == domain_id;
166 }
167
168 /* The shift of an addr for a certain level of paging structure */
169 static inline uint32_t vtd_slpt_level_shift(uint32_t level)
170 {
171 assert(level != 0);
172 return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
173 }
174
175 static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
176 {
177 return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
178 }
179
180 static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
181 gpointer user_data)
182 {
183 VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
184 VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
185 uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
186 uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
187 return (entry->domain_id == info->domain_id) &&
188 (((entry->gfn & info->mask) == gfn) ||
189 (entry->gfn == gfn_tlb));
190 }
191
192 /* Reset all the gen of VTDAddressSpace to zero and set the gen of
193 * IntelIOMMUState to 1.
194 */
195 static void vtd_reset_context_cache(IntelIOMMUState *s)
196 {
197 VTDAddressSpace *vtd_as;
198 VTDBus *vtd_bus;
199 GHashTableIter bus_it;
200 uint32_t devfn_it;
201
202 g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
203
204 VTD_DPRINTF(CACHE, "global context_cache_gen=1");
205 while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
206 for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
207 vtd_as = vtd_bus->dev_as[devfn_it];
208 if (!vtd_as) {
209 continue;
210 }
211 vtd_as->context_cache_entry.context_cache_gen = 0;
212 }
213 }
214 s->context_cache_gen = 1;
215 }
216
217 static void vtd_reset_iotlb(IntelIOMMUState *s)
218 {
219 assert(s->iotlb);
220 g_hash_table_remove_all(s->iotlb);
221 }
222
223 static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
224 uint32_t level)
225 {
226 return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) |
227 ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT);
228 }
229
230 static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
231 {
232 return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
233 }
234
235 static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
236 hwaddr addr)
237 {
238 VTDIOTLBEntry *entry;
239 uint64_t key;
240 int level;
241
242 for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
243 key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level),
244 source_id, level);
245 entry = g_hash_table_lookup(s->iotlb, &key);
246 if (entry) {
247 goto out;
248 }
249 }
250
251 out:
252 return entry;
253 }
254
255 static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
256 uint16_t domain_id, hwaddr addr, uint64_t slpte,
257 bool read_flags, bool write_flags,
258 uint32_t level)
259 {
260 VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
261 uint64_t *key = g_malloc(sizeof(*key));
262 uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
263
264 trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
265 if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
266 trace_vtd_iotlb_reset("iotlb exceeds size limit");
267 vtd_reset_iotlb(s);
268 }
269
270 entry->gfn = gfn;
271 entry->domain_id = domain_id;
272 entry->slpte = slpte;
273 entry->read_flags = read_flags;
274 entry->write_flags = write_flags;
275 entry->mask = vtd_slpt_level_page_mask(level);
276 *key = vtd_get_iotlb_key(gfn, source_id, level);
277 g_hash_table_replace(s->iotlb, key, entry);
278 }
279
280 /* Given the reg addr of both the message data and address, generate an
281 * interrupt via MSI.
282 */
283 static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg,
284 hwaddr mesg_data_reg)
285 {
286 MSIMessage msi;
287
288 assert(mesg_data_reg < DMAR_REG_SIZE);
289 assert(mesg_addr_reg < DMAR_REG_SIZE);
290
291 msi.address = vtd_get_long_raw(s, mesg_addr_reg);
292 msi.data = vtd_get_long_raw(s, mesg_data_reg);
293
294 VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32,
295 msi.address, msi.data);
296 apic_get_class()->send_msi(&msi);
297 }
298
299 /* Generate a fault event to software via MSI if conditions are met.
300 * Notice that the value of FSTS_REG being passed to it should be the one
301 * before any update.
302 */
303 static void vtd_generate_fault_event(IntelIOMMUState *s, uint32_t pre_fsts)
304 {
305 if (pre_fsts & VTD_FSTS_PPF || pre_fsts & VTD_FSTS_PFO ||
306 pre_fsts & VTD_FSTS_IQE) {
307 VTD_DPRINTF(FLOG, "there are previous interrupt conditions "
308 "to be serviced by software, fault event is not generated "
309 "(FSTS_REG 0x%"PRIx32 ")", pre_fsts);
310 return;
311 }
312 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, 0, VTD_FECTL_IP);
313 if (vtd_get_long_raw(s, DMAR_FECTL_REG) & VTD_FECTL_IM) {
314 VTD_DPRINTF(FLOG, "Interrupt Mask set, fault event is not generated");
315 } else {
316 vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG);
317 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
318 }
319 }
320
321 /* Check if the Fault (F) field of the Fault Recording Register referenced by
322 * @index is Set.
323 */
324 static bool vtd_is_frcd_set(IntelIOMMUState *s, uint16_t index)
325 {
326 /* Each reg is 128-bit */
327 hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
328 addr += 8; /* Access the high 64-bit half */
329
330 assert(index < DMAR_FRCD_REG_NR);
331
332 return vtd_get_quad_raw(s, addr) & VTD_FRCD_F;
333 }
334
335 /* Update the PPF field of Fault Status Register.
336 * Should be called whenever change the F field of any fault recording
337 * registers.
338 */
339 static void vtd_update_fsts_ppf(IntelIOMMUState *s)
340 {
341 uint32_t i;
342 uint32_t ppf_mask = 0;
343
344 for (i = 0; i < DMAR_FRCD_REG_NR; i++) {
345 if (vtd_is_frcd_set(s, i)) {
346 ppf_mask = VTD_FSTS_PPF;
347 break;
348 }
349 }
350 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_PPF, ppf_mask);
351 VTD_DPRINTF(FLOG, "set PPF of FSTS_REG to %d", ppf_mask ? 1 : 0);
352 }
353
354 static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
355 {
356 /* Each reg is 128-bit */
357 hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
358 addr += 8; /* Access the high 64-bit half */
359
360 assert(index < DMAR_FRCD_REG_NR);
361
362 vtd_set_clear_mask_quad(s, addr, 0, VTD_FRCD_F);
363 vtd_update_fsts_ppf(s);
364 }
365
366 /* Must not update F field now, should be done later */
367 static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
368 uint16_t source_id, hwaddr addr,
369 VTDFaultReason fault, bool is_write)
370 {
371 uint64_t hi = 0, lo;
372 hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
373
374 assert(index < DMAR_FRCD_REG_NR);
375
376 lo = VTD_FRCD_FI(addr);
377 hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
378 if (!is_write) {
379 hi |= VTD_FRCD_T;
380 }
381 vtd_set_quad_raw(s, frcd_reg_addr, lo);
382 vtd_set_quad_raw(s, frcd_reg_addr + 8, hi);
383 VTD_DPRINTF(FLOG, "record to FRCD_REG #%"PRIu16 ": hi 0x%"PRIx64
384 ", lo 0x%"PRIx64, index, hi, lo);
385 }
386
387 /* Try to collapse multiple pending faults from the same requester */
388 static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
389 {
390 uint32_t i;
391 uint64_t frcd_reg;
392 hwaddr addr = DMAR_FRCD_REG_OFFSET + 8; /* The high 64-bit half */
393
394 for (i = 0; i < DMAR_FRCD_REG_NR; i++) {
395 frcd_reg = vtd_get_quad_raw(s, addr);
396 VTD_DPRINTF(FLOG, "frcd_reg #%d 0x%"PRIx64, i, frcd_reg);
397 if ((frcd_reg & VTD_FRCD_F) &&
398 ((frcd_reg & VTD_FRCD_SID_MASK) == source_id)) {
399 return true;
400 }
401 addr += 16; /* 128-bit for each */
402 }
403 return false;
404 }
405
406 /* Log and report an DMAR (address translation) fault to software */
407 static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
408 hwaddr addr, VTDFaultReason fault,
409 bool is_write)
410 {
411 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
412
413 assert(fault < VTD_FR_MAX);
414
415 if (fault == VTD_FR_RESERVED_ERR) {
416 /* This is not a normal fault reason case. Drop it. */
417 return;
418 }
419 VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64
420 ", is_write %d", source_id, fault, addr, is_write);
421 if (fsts_reg & VTD_FSTS_PFO) {
422 VTD_DPRINTF(FLOG, "new fault is not recorded due to "
423 "Primary Fault Overflow");
424 return;
425 }
426 if (vtd_try_collapse_fault(s, source_id)) {
427 VTD_DPRINTF(FLOG, "new fault is not recorded due to "
428 "compression of faults");
429 return;
430 }
431 if (vtd_is_frcd_set(s, s->next_frcd_reg)) {
432 VTD_DPRINTF(FLOG, "Primary Fault Overflow and "
433 "new fault is not recorded, set PFO field");
434 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_PFO);
435 return;
436 }
437
438 vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write);
439
440 if (fsts_reg & VTD_FSTS_PPF) {
441 VTD_DPRINTF(FLOG, "there are pending faults already, "
442 "fault event is not generated");
443 vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg);
444 s->next_frcd_reg++;
445 if (s->next_frcd_reg == DMAR_FRCD_REG_NR) {
446 s->next_frcd_reg = 0;
447 }
448 } else {
449 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_FRI_MASK,
450 VTD_FSTS_FRI(s->next_frcd_reg));
451 vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg); /* Will set PPF */
452 s->next_frcd_reg++;
453 if (s->next_frcd_reg == DMAR_FRCD_REG_NR) {
454 s->next_frcd_reg = 0;
455 }
456 /* This case actually cause the PPF to be Set.
457 * So generate fault event (interrupt).
458 */
459 vtd_generate_fault_event(s, fsts_reg);
460 }
461 }
462
463 /* Handle Invalidation Queue Errors of queued invalidation interface error
464 * conditions.
465 */
466 static void vtd_handle_inv_queue_error(IntelIOMMUState *s)
467 {
468 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
469
470 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_IQE);
471 vtd_generate_fault_event(s, fsts_reg);
472 }
473
474 /* Set the IWC field and try to generate an invalidation completion interrupt */
475 static void vtd_generate_completion_event(IntelIOMMUState *s)
476 {
477 if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) {
478 trace_vtd_inv_desc_wait_irq("One pending, skip current");
479 return;
480 }
481 vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC);
482 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP);
483 if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) {
484 trace_vtd_inv_desc_wait_irq("IM in IECTL_REG is set, "
485 "new event not generated");
486 return;
487 } else {
488 /* Generate the interrupt event */
489 trace_vtd_inv_desc_wait_irq("Generating complete event");
490 vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
491 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
492 }
493 }
494
495 static inline bool vtd_root_entry_present(VTDRootEntry *root)
496 {
497 return root->val & VTD_ROOT_ENTRY_P;
498 }
499
500 static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,
501 VTDRootEntry *re)
502 {
503 dma_addr_t addr;
504
505 addr = s->root + index * sizeof(*re);
506 if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) {
507 trace_vtd_re_invalid(re->rsvd, re->val);
508 re->val = 0;
509 return -VTD_FR_ROOT_TABLE_INV;
510 }
511 re->val = le64_to_cpu(re->val);
512 return 0;
513 }
514
515 static inline bool vtd_context_entry_present(VTDContextEntry *context)
516 {
517 return context->lo & VTD_CONTEXT_ENTRY_P;
518 }
519
520 static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index,
521 VTDContextEntry *ce)
522 {
523 dma_addr_t addr;
524
525 /* we have checked that root entry is present */
526 addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce);
527 if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) {
528 trace_vtd_re_invalid(root->rsvd, root->val);
529 return -VTD_FR_CONTEXT_TABLE_INV;
530 }
531 ce->lo = le64_to_cpu(ce->lo);
532 ce->hi = le64_to_cpu(ce->hi);
533 return 0;
534 }
535
536 static inline dma_addr_t vtd_get_slpt_base_from_context(VTDContextEntry *ce)
537 {
538 return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
539 }
540
541 static inline uint64_t vtd_get_slpte_addr(uint64_t slpte)
542 {
543 return slpte & VTD_SL_PT_BASE_ADDR_MASK;
544 }
545
546 /* Whether the pte indicates the address of the page frame */
547 static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level)
548 {
549 return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK);
550 }
551
552 /* Get the content of a spte located in @base_addr[@index] */
553 static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
554 {
555 uint64_t slpte;
556
557 assert(index < VTD_SL_PT_ENTRY_NR);
558
559 if (dma_memory_read(&address_space_memory,
560 base_addr + index * sizeof(slpte), &slpte,
561 sizeof(slpte))) {
562 slpte = (uint64_t)-1;
563 return slpte;
564 }
565 slpte = le64_to_cpu(slpte);
566 return slpte;
567 }
568
569 /* Given an iova and the level of paging structure, return the offset
570 * of current level.
571 */
572 static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level)
573 {
574 return (iova >> vtd_slpt_level_shift(level)) &
575 ((1ULL << VTD_SL_LEVEL_BITS) - 1);
576 }
577
578 /* Check Capability Register to see if the @level of page-table is supported */
579 static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level)
580 {
581 return VTD_CAP_SAGAW_MASK & s->cap &
582 (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT));
583 }
584
585 /* Get the page-table level that hardware should use for the second-level
586 * page-table walk from the Address Width field of context-entry.
587 */
588 static inline uint32_t vtd_get_level_from_context_entry(VTDContextEntry *ce)
589 {
590 return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW);
591 }
592
593 static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce)
594 {
595 return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9;
596 }
597
598 static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)
599 {
600 uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
601 return 1ULL << MIN(ce_agaw, VTD_MGAW);
602 }
603
604 /* Return true if IOVA passes range check, otherwise false. */
605 static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce)
606 {
607 /*
608 * Check if @iova is above 2^X-1, where X is the minimum of MGAW
609 * in CAP_REG and AW in context-entry.
610 */
611 return !(iova & ~(vtd_iova_limit(ce) - 1));
612 }
613
614 static const uint64_t vtd_paging_entry_rsvd_field[] = {
615 [0] = ~0ULL,
616 /* For not large page */
617 [1] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
618 [2] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
619 [3] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
620 [4] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
621 /* For large page */
622 [5] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
623 [6] = 0x1ff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
624 [7] = 0x3ffff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
625 [8] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM),
626 };
627
628 static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
629 {
630 if (slpte & VTD_SL_PT_PAGE_SIZE_MASK) {
631 /* Maybe large page */
632 return slpte & vtd_paging_entry_rsvd_field[level + 4];
633 } else {
634 return slpte & vtd_paging_entry_rsvd_field[level];
635 }
636 }
637
638 /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
639 * of the translation, can be used for deciding the size of large page.
640 */
641 static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
642 uint64_t *slptep, uint32_t *slpte_level,
643 bool *reads, bool *writes)
644 {
645 dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
646 uint32_t level = vtd_get_level_from_context_entry(ce);
647 uint32_t offset;
648 uint64_t slpte;
649 uint64_t access_right_check;
650
651 if (!vtd_iova_range_check(iova, ce)) {
652 VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova);
653 return -VTD_FR_ADDR_BEYOND_MGAW;
654 }
655
656 /* FIXME: what is the Atomics request here? */
657 access_right_check = is_write ? VTD_SL_W : VTD_SL_R;
658
659 while (true) {
660 offset = vtd_iova_level_offset(iova, level);
661 slpte = vtd_get_slpte(addr, offset);
662
663 if (slpte == (uint64_t)-1) {
664 VTD_DPRINTF(GENERAL, "error: fail to access second-level paging "
665 "entry at level %"PRIu32 " for iova 0x%"PRIx64,
666 level, iova);
667 if (level == vtd_get_level_from_context_entry(ce)) {
668 /* Invalid programming of context-entry */
669 return -VTD_FR_CONTEXT_ENTRY_INV;
670 } else {
671 return -VTD_FR_PAGING_ENTRY_INV;
672 }
673 }
674 *reads = (*reads) && (slpte & VTD_SL_R);
675 *writes = (*writes) && (slpte & VTD_SL_W);
676 if (!(slpte & access_right_check)) {
677 VTD_DPRINTF(GENERAL, "error: lack of %s permission for "
678 "iova 0x%"PRIx64 " slpte 0x%"PRIx64,
679 (is_write ? "write" : "read"), iova, slpte);
680 return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
681 }
682 if (vtd_slpte_nonzero_rsvd(slpte, level)) {
683 VTD_DPRINTF(GENERAL, "error: non-zero reserved field in second "
684 "level paging entry level %"PRIu32 " slpte 0x%"PRIx64,
685 level, slpte);
686 return -VTD_FR_PAGING_ENTRY_RSVD;
687 }
688
689 if (vtd_is_last_slpte(slpte, level)) {
690 *slptep = slpte;
691 *slpte_level = level;
692 return 0;
693 }
694 addr = vtd_get_slpte_addr(slpte);
695 level--;
696 }
697 }
698
699 typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
700
701 /**
702 * vtd_page_walk_level - walk over specific level for IOVA range
703 *
704 * @addr: base GPA addr to start the walk
705 * @start: IOVA range start address
706 * @end: IOVA range end address (start <= addr < end)
707 * @hook_fn: hook func to be called when detected page
708 * @private: private data to be passed into hook func
709 * @read: whether parent level has read permission
710 * @write: whether parent level has write permission
711 * @notify_unmap: whether we should notify invalid entries
712 */
713 static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
714 uint64_t end, vtd_page_walk_hook hook_fn,
715 void *private, uint32_t level,
716 bool read, bool write, bool notify_unmap)
717 {
718 bool read_cur, write_cur, entry_valid;
719 uint32_t offset;
720 uint64_t slpte;
721 uint64_t subpage_size, subpage_mask;
722 IOMMUTLBEntry entry;
723 uint64_t iova = start;
724 uint64_t iova_next;
725 int ret = 0;
726
727 trace_vtd_page_walk_level(addr, level, start, end);
728
729 subpage_size = 1ULL << vtd_slpt_level_shift(level);
730 subpage_mask = vtd_slpt_level_page_mask(level);
731
732 while (iova < end) {
733 iova_next = (iova & subpage_mask) + subpage_size;
734
735 offset = vtd_iova_level_offset(iova, level);
736 slpte = vtd_get_slpte(addr, offset);
737
738 if (slpte == (uint64_t)-1) {
739 trace_vtd_page_walk_skip_read(iova, iova_next);
740 goto next;
741 }
742
743 if (vtd_slpte_nonzero_rsvd(slpte, level)) {
744 trace_vtd_page_walk_skip_reserve(iova, iova_next);
745 goto next;
746 }
747
748 /* Permissions are stacked with parents' */
749 read_cur = read && (slpte & VTD_SL_R);
750 write_cur = write && (slpte & VTD_SL_W);
751
752 /*
753 * As long as we have either read/write permission, this is a
754 * valid entry. The rule works for both page entries and page
755 * table entries.
756 */
757 entry_valid = read_cur | write_cur;
758
759 if (vtd_is_last_slpte(slpte, level)) {
760 entry.target_as = &address_space_memory;
761 entry.iova = iova & subpage_mask;
762 /* NOTE: this is only meaningful if entry_valid == true */
763 entry.translated_addr = vtd_get_slpte_addr(slpte);
764 entry.addr_mask = ~subpage_mask;
765 entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
766 if (!entry_valid && !notify_unmap) {
767 trace_vtd_page_walk_skip_perm(iova, iova_next);
768 goto next;
769 }
770 trace_vtd_page_walk_one(level, entry.iova, entry.translated_addr,
771 entry.addr_mask, entry.perm);
772 if (hook_fn) {
773 ret = hook_fn(&entry, private);
774 if (ret < 0) {
775 return ret;
776 }
777 }
778 } else {
779 if (!entry_valid) {
780 trace_vtd_page_walk_skip_perm(iova, iova_next);
781 goto next;
782 }
783 ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova,
784 MIN(iova_next, end), hook_fn, private,
785 level - 1, read_cur, write_cur,
786 notify_unmap);
787 if (ret < 0) {
788 return ret;
789 }
790 }
791
792 next:
793 iova = iova_next;
794 }
795
796 return 0;
797 }
798
799 /**
800 * vtd_page_walk - walk specific IOVA range, and call the hook
801 *
802 * @ce: context entry to walk upon
803 * @start: IOVA address to start the walk
804 * @end: IOVA range end address (start <= addr < end)
805 * @hook_fn: the hook that to be called for each detected area
806 * @private: private data for the hook function
807 */
808 static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
809 vtd_page_walk_hook hook_fn, void *private,
810 bool notify_unmap)
811 {
812 dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
813 uint32_t level = vtd_get_level_from_context_entry(ce);
814
815 if (!vtd_iova_range_check(start, ce)) {
816 return -VTD_FR_ADDR_BEYOND_MGAW;
817 }
818
819 if (!vtd_iova_range_check(end, ce)) {
820 /* Fix end so that it reaches the maximum */
821 end = vtd_iova_limit(ce);
822 }
823
824 return vtd_page_walk_level(addr, start, end, hook_fn, private,
825 level, true, true, notify_unmap);
826 }
827
828 /* Map a device to its corresponding domain (context-entry) */
829 static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
830 uint8_t devfn, VTDContextEntry *ce)
831 {
832 VTDRootEntry re;
833 int ret_fr;
834
835 ret_fr = vtd_get_root_entry(s, bus_num, &re);
836 if (ret_fr) {
837 return ret_fr;
838 }
839
840 if (!vtd_root_entry_present(&re)) {
841 /* Not error - it's okay we don't have root entry. */
842 trace_vtd_re_not_present(bus_num);
843 return -VTD_FR_ROOT_ENTRY_P;
844 } else if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) {
845 trace_vtd_re_invalid(re.rsvd, re.val);
846 return -VTD_FR_ROOT_ENTRY_RSVD;
847 }
848
849 ret_fr = vtd_get_context_entry_from_root(&re, devfn, ce);
850 if (ret_fr) {
851 return ret_fr;
852 }
853
854 if (!vtd_context_entry_present(ce)) {
855 /* Not error - it's okay we don't have context entry. */
856 trace_vtd_ce_not_present(bus_num, devfn);
857 return -VTD_FR_CONTEXT_ENTRY_P;
858 } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
859 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
860 trace_vtd_ce_invalid(ce->hi, ce->lo);
861 return -VTD_FR_CONTEXT_ENTRY_RSVD;
862 }
863 /* Check if the programming of context-entry is valid */
864 if (!vtd_is_level_supported(s, vtd_get_level_from_context_entry(ce))) {
865 trace_vtd_ce_invalid(ce->hi, ce->lo);
866 return -VTD_FR_CONTEXT_ENTRY_INV;
867 } else {
868 switch (ce->lo & VTD_CONTEXT_ENTRY_TT) {
869 case VTD_CONTEXT_TT_MULTI_LEVEL:
870 /* fall through */
871 case VTD_CONTEXT_TT_DEV_IOTLB:
872 break;
873 default:
874 trace_vtd_ce_invalid(ce->hi, ce->lo);
875 return -VTD_FR_CONTEXT_ENTRY_INV;
876 }
877 }
878 return 0;
879 }
880
881 static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
882 {
883 return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
884 }
885
886 static const bool vtd_qualified_faults[] = {
887 [VTD_FR_RESERVED] = false,
888 [VTD_FR_ROOT_ENTRY_P] = false,
889 [VTD_FR_CONTEXT_ENTRY_P] = true,
890 [VTD_FR_CONTEXT_ENTRY_INV] = true,
891 [VTD_FR_ADDR_BEYOND_MGAW] = true,
892 [VTD_FR_WRITE] = true,
893 [VTD_FR_READ] = true,
894 [VTD_FR_PAGING_ENTRY_INV] = true,
895 [VTD_FR_ROOT_TABLE_INV] = false,
896 [VTD_FR_CONTEXT_TABLE_INV] = false,
897 [VTD_FR_ROOT_ENTRY_RSVD] = false,
898 [VTD_FR_PAGING_ENTRY_RSVD] = true,
899 [VTD_FR_CONTEXT_ENTRY_TT] = true,
900 [VTD_FR_RESERVED_ERR] = false,
901 [VTD_FR_MAX] = false,
902 };
903
904 /* To see if a fault condition is "qualified", which is reported to software
905 * only if the FPD field in the context-entry used to process the faulting
906 * request is 0.
907 */
908 static inline bool vtd_is_qualified_fault(VTDFaultReason fault)
909 {
910 return vtd_qualified_faults[fault];
911 }
912
913 static inline bool vtd_is_interrupt_addr(hwaddr addr)
914 {
915 return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
916 }
917
918 /* Map dev to context-entry then do a paging-structures walk to do a iommu
919 * translation.
920 *
921 * Called from RCU critical section.
922 *
923 * @bus_num: The bus number
924 * @devfn: The devfn, which is the combined of device and function number
925 * @is_write: The access is a write operation
926 * @entry: IOMMUTLBEntry that contain the addr to be translated and result
927 */
928 static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
929 uint8_t devfn, hwaddr addr, bool is_write,
930 IOMMUTLBEntry *entry)
931 {
932 IntelIOMMUState *s = vtd_as->iommu_state;
933 VTDContextEntry ce;
934 uint8_t bus_num = pci_bus_num(bus);
935 VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
936 uint64_t slpte, page_mask;
937 uint32_t level;
938 uint16_t source_id = vtd_make_source_id(bus_num, devfn);
939 int ret_fr;
940 bool is_fpd_set = false;
941 bool reads = true;
942 bool writes = true;
943 VTDIOTLBEntry *iotlb_entry;
944
945 /*
946 * We have standalone memory region for interrupt addresses, we
947 * should never receive translation requests in this region.
948 */
949 assert(!vtd_is_interrupt_addr(addr));
950
951 /* Try to fetch slpte form IOTLB */
952 iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
953 if (iotlb_entry) {
954 trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
955 iotlb_entry->domain_id);
956 slpte = iotlb_entry->slpte;
957 reads = iotlb_entry->read_flags;
958 writes = iotlb_entry->write_flags;
959 page_mask = iotlb_entry->mask;
960 goto out;
961 }
962 /* Try to fetch context-entry from cache first */
963 if (cc_entry->context_cache_gen == s->context_cache_gen) {
964 trace_vtd_iotlb_cc_hit(bus_num, devfn, cc_entry->context_entry.hi,
965 cc_entry->context_entry.lo,
966 cc_entry->context_cache_gen);
967 ce = cc_entry->context_entry;
968 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
969 } else {
970 ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
971 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
972 if (ret_fr) {
973 ret_fr = -ret_fr;
974 if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
975 trace_vtd_fault_disabled();
976 } else {
977 vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
978 }
979 return;
980 }
981 /* Update context-cache */
982 trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo,
983 cc_entry->context_cache_gen,
984 s->context_cache_gen);
985 cc_entry->context_entry = ce;
986 cc_entry->context_cache_gen = s->context_cache_gen;
987 }
988
989 ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
990 &reads, &writes);
991 if (ret_fr) {
992 ret_fr = -ret_fr;
993 if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
994 trace_vtd_fault_disabled();
995 } else {
996 vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
997 }
998 return;
999 }
1000
1001 page_mask = vtd_slpt_level_page_mask(level);
1002 vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
1003 reads, writes, level);
1004 out:
1005 entry->iova = addr & page_mask;
1006 entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask;
1007 entry->addr_mask = ~page_mask;
1008 entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0);
1009 }
1010
1011 static void vtd_root_table_setup(IntelIOMMUState *s)
1012 {
1013 s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG);
1014 s->root_extended = s->root & VTD_RTADDR_RTT;
1015 s->root &= VTD_RTADDR_ADDR_MASK;
1016
1017 VTD_DPRINTF(CSR, "root_table addr 0x%"PRIx64 " %s", s->root,
1018 (s->root_extended ? "(extended)" : ""));
1019 }
1020
1021 static void vtd_iec_notify_all(IntelIOMMUState *s, bool global,
1022 uint32_t index, uint32_t mask)
1023 {
1024 x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask);
1025 }
1026
1027 static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
1028 {
1029 uint64_t value = 0;
1030 value = vtd_get_quad_raw(s, DMAR_IRTA_REG);
1031 s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1);
1032 s->intr_root = value & VTD_IRTA_ADDR_MASK;
1033 s->intr_eime = value & VTD_IRTA_EIME;
1034
1035 /* Notify global invalidation */
1036 vtd_iec_notify_all(s, true, 0, 0);
1037
1038 VTD_DPRINTF(CSR, "int remap table addr 0x%"PRIx64 " size %"PRIu32,
1039 s->intr_root, s->intr_size);
1040 }
1041
1042 static void vtd_iommu_replay_all(IntelIOMMUState *s)
1043 {
1044 IntelIOMMUNotifierNode *node;
1045
1046 QLIST_FOREACH(node, &s->notifiers_list, next) {
1047 memory_region_iommu_replay_all(&node->vtd_as->iommu);
1048 }
1049 }
1050
1051 static void vtd_context_global_invalidate(IntelIOMMUState *s)
1052 {
1053 trace_vtd_inv_desc_cc_global();
1054 s->context_cache_gen++;
1055 if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
1056 vtd_reset_context_cache(s);
1057 }
1058 /*
1059 * From VT-d spec 6.5.2.1, a global context entry invalidation
1060 * should be followed by a IOTLB global invalidation, so we should
1061 * be safe even without this. Hoewever, let's replay the region as
1062 * well to be safer, and go back here when we need finer tunes for
1063 * VT-d emulation codes.
1064 */
1065 vtd_iommu_replay_all(s);
1066 }
1067
1068
1069 /* Find the VTD address space currently associated with a given bus number,
1070 */
1071 static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
1072 {
1073 VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
1074 if (!vtd_bus) {
1075 /* Iterate over the registered buses to find the one
1076 * which currently hold this bus number, and update the bus_num lookup table:
1077 */
1078 GHashTableIter iter;
1079
1080 g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
1081 while (g_hash_table_iter_next (&iter, NULL, (void**)&vtd_bus)) {
1082 if (pci_bus_num(vtd_bus->bus) == bus_num) {
1083 s->vtd_as_by_bus_num[bus_num] = vtd_bus;
1084 return vtd_bus;
1085 }
1086 }
1087 }
1088 return vtd_bus;
1089 }
1090
1091 /* Do a context-cache device-selective invalidation.
1092 * @func_mask: FM field after shifting
1093 */
1094 static void vtd_context_device_invalidate(IntelIOMMUState *s,
1095 uint16_t source_id,
1096 uint16_t func_mask)
1097 {
1098 uint16_t mask;
1099 VTDBus *vtd_bus;
1100 VTDAddressSpace *vtd_as;
1101 uint8_t bus_n, devfn;
1102 uint16_t devfn_it;
1103
1104 trace_vtd_inv_desc_cc_devices(source_id, func_mask);
1105
1106 switch (func_mask & 3) {
1107 case 0:
1108 mask = 0; /* No bits in the SID field masked */
1109 break;
1110 case 1:
1111 mask = 4; /* Mask bit 2 in the SID field */
1112 break;
1113 case 2:
1114 mask = 6; /* Mask bit 2:1 in the SID field */
1115 break;
1116 case 3:
1117 mask = 7; /* Mask bit 2:0 in the SID field */
1118 break;
1119 }
1120 mask = ~mask;
1121
1122 bus_n = VTD_SID_TO_BUS(source_id);
1123 vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
1124 if (vtd_bus) {
1125 devfn = VTD_SID_TO_DEVFN(source_id);
1126 for (devfn_it = 0; devfn_it < X86_IOMMU_PCI_DEVFN_MAX; ++devfn_it) {
1127 vtd_as = vtd_bus->dev_as[devfn_it];
1128 if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
1129 trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
1130 VTD_PCI_FUNC(devfn_it));
1131 vtd_as->context_cache_entry.context_cache_gen = 0;
1132 /*
1133 * So a device is moving out of (or moving into) a
1134 * domain, a replay() suites here to notify all the
1135 * IOMMU_NOTIFIER_MAP registers about this change.
1136 * This won't bring bad even if we have no such
1137 * notifier registered - the IOMMU notification
1138 * framework will skip MAP notifications if that
1139 * happened.
1140 */
1141 memory_region_iommu_replay_all(&vtd_as->iommu);
1142 }
1143 }
1144 }
1145 }
1146
1147 /* Context-cache invalidation
1148 * Returns the Context Actual Invalidation Granularity.
1149 * @val: the content of the CCMD_REG
1150 */
1151 static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
1152 {
1153 uint64_t caig;
1154 uint64_t type = val & VTD_CCMD_CIRG_MASK;
1155
1156 switch (type) {
1157 case VTD_CCMD_DOMAIN_INVL:
1158 VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
1159 (uint16_t)VTD_CCMD_DID(val));
1160 /* Fall through */
1161 case VTD_CCMD_GLOBAL_INVL:
1162 VTD_DPRINTF(INV, "global invalidation");
1163 caig = VTD_CCMD_GLOBAL_INVL_A;
1164 vtd_context_global_invalidate(s);
1165 break;
1166
1167 case VTD_CCMD_DEVICE_INVL:
1168 caig = VTD_CCMD_DEVICE_INVL_A;
1169 vtd_context_device_invalidate(s, VTD_CCMD_SID(val), VTD_CCMD_FM(val));
1170 break;
1171
1172 default:
1173 VTD_DPRINTF(GENERAL, "error: invalid granularity");
1174 caig = 0;
1175 }
1176 return caig;
1177 }
1178
1179 static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
1180 {
1181 trace_vtd_iotlb_reset("global invalidation recved");
1182 vtd_reset_iotlb(s);
1183 vtd_iommu_replay_all(s);
1184 }
1185
1186 static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
1187 {
1188 IntelIOMMUNotifierNode *node;
1189 VTDContextEntry ce;
1190 VTDAddressSpace *vtd_as;
1191
1192 g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
1193 &domain_id);
1194
1195 QLIST_FOREACH(node, &s->notifiers_list, next) {
1196 vtd_as = node->vtd_as;
1197 if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
1198 vtd_as->devfn, &ce) &&
1199 domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
1200 memory_region_iommu_replay_all(&vtd_as->iommu);
1201 }
1202 }
1203 }
1204
1205 static int vtd_page_invalidate_notify_hook(IOMMUTLBEntry *entry,
1206 void *private)
1207 {
1208 memory_region_notify_iommu((MemoryRegion *)private, *entry);
1209 return 0;
1210 }
1211
1212 static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
1213 uint16_t domain_id, hwaddr addr,
1214 uint8_t am)
1215 {
1216 IntelIOMMUNotifierNode *node;
1217 VTDContextEntry ce;
1218 int ret;
1219
1220 QLIST_FOREACH(node, &(s->notifiers_list), next) {
1221 VTDAddressSpace *vtd_as = node->vtd_as;
1222 ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
1223 vtd_as->devfn, &ce);
1224 if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
1225 vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE,
1226 vtd_page_invalidate_notify_hook,
1227 (void *)&vtd_as->iommu, true);
1228 }
1229 }
1230 }
1231
1232 static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
1233 hwaddr addr, uint8_t am)
1234 {
1235 VTDIOTLBPageInvInfo info;
1236
1237 assert(am <= VTD_MAMV);
1238 info.domain_id = domain_id;
1239 info.addr = addr;
1240 info.mask = ~((1 << am) - 1);
1241 g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
1242 vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
1243 }
1244
1245 /* Flush IOTLB
1246 * Returns the IOTLB Actual Invalidation Granularity.
1247 * @val: the content of the IOTLB_REG
1248 */
1249 static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
1250 {
1251 uint64_t iaig;
1252 uint64_t type = val & VTD_TLB_FLUSH_GRANU_MASK;
1253 uint16_t domain_id;
1254 hwaddr addr;
1255 uint8_t am;
1256
1257 switch (type) {
1258 case VTD_TLB_GLOBAL_FLUSH:
1259 VTD_DPRINTF(INV, "global invalidation");
1260 iaig = VTD_TLB_GLOBAL_FLUSH_A;
1261 vtd_iotlb_global_invalidate(s);
1262 break;
1263
1264 case VTD_TLB_DSI_FLUSH:
1265 domain_id = VTD_TLB_DID(val);
1266 VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
1267 domain_id);
1268 iaig = VTD_TLB_DSI_FLUSH_A;
1269 vtd_iotlb_domain_invalidate(s, domain_id);
1270 break;
1271
1272 case VTD_TLB_PSI_FLUSH:
1273 domain_id = VTD_TLB_DID(val);
1274 addr = vtd_get_quad_raw(s, DMAR_IVA_REG);
1275 am = VTD_IVA_AM(addr);
1276 addr = VTD_IVA_ADDR(addr);
1277 VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16
1278 " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am);
1279 if (am > VTD_MAMV) {
1280 VTD_DPRINTF(GENERAL, "error: supported max address mask value is "
1281 "%"PRIu8, (uint8_t)VTD_MAMV);
1282 iaig = 0;
1283 break;
1284 }
1285 iaig = VTD_TLB_PSI_FLUSH_A;
1286 vtd_iotlb_page_invalidate(s, domain_id, addr, am);
1287 break;
1288
1289 default:
1290 VTD_DPRINTF(GENERAL, "error: invalid granularity");
1291 iaig = 0;
1292 }
1293 return iaig;
1294 }
1295
1296 static inline bool vtd_queued_inv_enable_check(IntelIOMMUState *s)
1297 {
1298 return s->iq_tail == 0;
1299 }
1300
1301 static inline bool vtd_queued_inv_disable_check(IntelIOMMUState *s)
1302 {
1303 return s->qi_enabled && (s->iq_tail == s->iq_head) &&
1304 (s->iq_last_desc_type == VTD_INV_DESC_WAIT);
1305 }
1306
1307 static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
1308 {
1309 uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG);
1310
1311 VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off"));
1312 if (en) {
1313 if (vtd_queued_inv_enable_check(s)) {
1314 s->iq = iqa_val & VTD_IQA_IQA_MASK;
1315 /* 2^(x+8) entries */
1316 s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
1317 s->qi_enabled = true;
1318 VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val);
1319 VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d",
1320 s->iq, s->iq_size);
1321 /* Ok - report back to driver */
1322 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES);
1323 } else {
1324 VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: "
1325 "tail %"PRIu16, s->iq_tail);
1326 }
1327 } else {
1328 if (vtd_queued_inv_disable_check(s)) {
1329 /* disable Queued Invalidation */
1330 vtd_set_quad_raw(s, DMAR_IQH_REG, 0);
1331 s->iq_head = 0;
1332 s->qi_enabled = false;
1333 /* Ok - report back to driver */
1334 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0);
1335 } else {
1336 VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: "
1337 "head %"PRIu16 ", tail %"PRIu16
1338 ", last_descriptor %"PRIu8,
1339 s->iq_head, s->iq_tail, s->iq_last_desc_type);
1340 }
1341 }
1342 }
1343
1344 /* Set Root Table Pointer */
1345 static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
1346 {
1347 VTD_DPRINTF(CSR, "set Root Table Pointer");
1348
1349 vtd_root_table_setup(s);
1350 /* Ok - report back to driver */
1351 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS);
1352 }
1353
1354 /* Set Interrupt Remap Table Pointer */
1355 static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
1356 {
1357 VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer");
1358
1359 vtd_interrupt_remap_table_setup(s);
1360 /* Ok - report back to driver */
1361 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
1362 }
1363
1364 static void vtd_switch_address_space(VTDAddressSpace *as)
1365 {
1366 assert(as);
1367
1368 trace_vtd_switch_address_space(pci_bus_num(as->bus),
1369 VTD_PCI_SLOT(as->devfn),
1370 VTD_PCI_FUNC(as->devfn),
1371 as->iommu_state->dmar_enabled);
1372
1373 /* Turn off first then on the other */
1374 if (as->iommu_state->dmar_enabled) {
1375 memory_region_set_enabled(&as->sys_alias, false);
1376 memory_region_set_enabled(&as->iommu, true);
1377 } else {
1378 memory_region_set_enabled(&as->iommu, false);
1379 memory_region_set_enabled(&as->sys_alias, true);
1380 }
1381 }
1382
1383 static void vtd_switch_address_space_all(IntelIOMMUState *s)
1384 {
1385 GHashTableIter iter;
1386 VTDBus *vtd_bus;
1387 int i;
1388
1389 g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
1390 while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
1391 for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) {
1392 if (!vtd_bus->dev_as[i]) {
1393 continue;
1394 }
1395 vtd_switch_address_space(vtd_bus->dev_as[i]);
1396 }
1397 }
1398 }
1399
1400 /* Handle Translation Enable/Disable */
1401 static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
1402 {
1403 if (s->dmar_enabled == en) {
1404 return;
1405 }
1406
1407 VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off"));
1408
1409 if (en) {
1410 s->dmar_enabled = true;
1411 /* Ok - report back to driver */
1412 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_TES);
1413 } else {
1414 s->dmar_enabled = false;
1415
1416 /* Clear the index of Fault Recording Register */
1417 s->next_frcd_reg = 0;
1418 /* Ok - report back to driver */
1419 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0);
1420 }
1421
1422 vtd_switch_address_space_all(s);
1423 }
1424
1425 /* Handle Interrupt Remap Enable/Disable */
1426 static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
1427 {
1428 VTD_DPRINTF(CSR, "Interrupt Remap Enable %s", (en ? "on" : "off"));
1429
1430 if (en) {
1431 s->intr_enabled = true;
1432 /* Ok - report back to driver */
1433 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRES);
1434 } else {
1435 s->intr_enabled = false;
1436 /* Ok - report back to driver */
1437 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_IRES, 0);
1438 }
1439 }
1440
1441 /* Handle write to Global Command Register */
1442 static void vtd_handle_gcmd_write(IntelIOMMUState *s)
1443 {
1444 uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG);
1445 uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG);
1446 uint32_t changed = status ^ val;
1447
1448 VTD_DPRINTF(CSR, "value 0x%"PRIx32 " status 0x%"PRIx32, val, status);
1449 if (changed & VTD_GCMD_TE) {
1450 /* Translation enable/disable */
1451 vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
1452 }
1453 if (val & VTD_GCMD_SRTP) {
1454 /* Set/update the root-table pointer */
1455 vtd_handle_gcmd_srtp(s);
1456 }
1457 if (changed & VTD_GCMD_QIE) {
1458 /* Queued Invalidation Enable */
1459 vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE);
1460 }
1461 if (val & VTD_GCMD_SIRTP) {
1462 /* Set/update the interrupt remapping root-table pointer */
1463 vtd_handle_gcmd_sirtp(s);
1464 }
1465 if (changed & VTD_GCMD_IRE) {
1466 /* Interrupt remap enable/disable */
1467 vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE);
1468 }
1469 }
1470
1471 /* Handle write to Context Command Register */
1472 static void vtd_handle_ccmd_write(IntelIOMMUState *s)
1473 {
1474 uint64_t ret;
1475 uint64_t val = vtd_get_quad_raw(s, DMAR_CCMD_REG);
1476
1477 /* Context-cache invalidation request */
1478 if (val & VTD_CCMD_ICC) {
1479 if (s->qi_enabled) {
1480 VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, "
1481 "should not use register-based invalidation");
1482 return;
1483 }
1484 ret = vtd_context_cache_invalidate(s, val);
1485 /* Invalidation completed. Change something to show */
1486 vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_ICC, 0ULL);
1487 ret = vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_CAIG_MASK,
1488 ret);
1489 VTD_DPRINTF(INV, "CCMD_REG write-back val: 0x%"PRIx64, ret);
1490 }
1491 }
1492
1493 /* Handle write to IOTLB Invalidation Register */
1494 static void vtd_handle_iotlb_write(IntelIOMMUState *s)
1495 {
1496 uint64_t ret;
1497 uint64_t val = vtd_get_quad_raw(s, DMAR_IOTLB_REG);
1498
1499 /* IOTLB invalidation request */
1500 if (val & VTD_TLB_IVT) {
1501 if (s->qi_enabled) {
1502 VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, "
1503 "should not use register-based invalidation");
1504 return;
1505 }
1506 ret = vtd_iotlb_flush(s, val);
1507 /* Invalidation completed. Change something to show */
1508 vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_IVT, 0ULL);
1509 ret = vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG,
1510 VTD_TLB_FLUSH_GRANU_MASK_A, ret);
1511 VTD_DPRINTF(INV, "IOTLB_REG write-back val: 0x%"PRIx64, ret);
1512 }
1513 }
1514
1515 /* Fetch an Invalidation Descriptor from the Invalidation Queue */
1516 static bool vtd_get_inv_desc(dma_addr_t base_addr, uint32_t offset,
1517 VTDInvDesc *inv_desc)
1518 {
1519 dma_addr_t addr = base_addr + offset * sizeof(*inv_desc);
1520 if (dma_memory_read(&address_space_memory, addr, inv_desc,
1521 sizeof(*inv_desc))) {
1522 VTD_DPRINTF(GENERAL, "error: fail to fetch Invalidation Descriptor "
1523 "base_addr 0x%"PRIx64 " offset %"PRIu32, base_addr, offset);
1524 inv_desc->lo = 0;
1525 inv_desc->hi = 0;
1526
1527 return false;
1528 }
1529 inv_desc->lo = le64_to_cpu(inv_desc->lo);
1530 inv_desc->hi = le64_to_cpu(inv_desc->hi);
1531 return true;
1532 }
1533
1534 static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
1535 {
1536 if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) ||
1537 (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) {
1538 trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
1539 return false;
1540 }
1541 if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) {
1542 /* Status Write */
1543 uint32_t status_data = (uint32_t)(inv_desc->lo >>
1544 VTD_INV_DESC_WAIT_DATA_SHIFT);
1545
1546 assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF));
1547
1548 /* FIXME: need to be masked with HAW? */
1549 dma_addr_t status_addr = inv_desc->hi;
1550 trace_vtd_inv_desc_wait_sw(status_addr, status_data);
1551 status_data = cpu_to_le32(status_data);
1552 if (dma_memory_write(&address_space_memory, status_addr, &status_data,
1553 sizeof(status_data))) {
1554 trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo);
1555 return false;
1556 }
1557 } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
1558 /* Interrupt flag */
1559 vtd_generate_completion_event(s);
1560 } else {
1561 trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
1562 return false;
1563 }
1564 return true;
1565 }
1566
1567 static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
1568 VTDInvDesc *inv_desc)
1569 {
1570 uint16_t sid, fmask;
1571
1572 if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) {
1573 trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
1574 return false;
1575 }
1576 switch (inv_desc->lo & VTD_INV_DESC_CC_G) {
1577 case VTD_INV_DESC_CC_DOMAIN:
1578 trace_vtd_inv_desc_cc_domain(
1579 (uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo));
1580 /* Fall through */
1581 case VTD_INV_DESC_CC_GLOBAL:
1582 vtd_context_global_invalidate(s);
1583 break;
1584
1585 case VTD_INV_DESC_CC_DEVICE:
1586 sid = VTD_INV_DESC_CC_SID(inv_desc->lo);
1587 fmask = VTD_INV_DESC_CC_FM(inv_desc->lo);
1588 vtd_context_device_invalidate(s, sid, fmask);
1589 break;
1590
1591 default:
1592 trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
1593 return false;
1594 }
1595 return true;
1596 }
1597
1598 static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
1599 {
1600 uint16_t domain_id;
1601 uint8_t am;
1602 hwaddr addr;
1603
1604 if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) ||
1605 (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) {
1606 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
1607 return false;
1608 }
1609
1610 switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) {
1611 case VTD_INV_DESC_IOTLB_GLOBAL:
1612 trace_vtd_inv_desc_iotlb_global();
1613 vtd_iotlb_global_invalidate(s);
1614 break;
1615
1616 case VTD_INV_DESC_IOTLB_DOMAIN:
1617 domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
1618 trace_vtd_inv_desc_iotlb_domain(domain_id);
1619 vtd_iotlb_domain_invalidate(s, domain_id);
1620 break;
1621
1622 case VTD_INV_DESC_IOTLB_PAGE:
1623 domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
1624 addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi);
1625 am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi);
1626 trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
1627 if (am > VTD_MAMV) {
1628 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
1629 return false;
1630 }
1631 vtd_iotlb_page_invalidate(s, domain_id, addr, am);
1632 break;
1633
1634 default:
1635 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
1636 return false;
1637 }
1638 return true;
1639 }
1640
1641 static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
1642 VTDInvDesc *inv_desc)
1643 {
1644 VTD_DPRINTF(INV, "inv ir glob %d index %d mask %d",
1645 inv_desc->iec.granularity,
1646 inv_desc->iec.index,
1647 inv_desc->iec.index_mask);
1648
1649 vtd_iec_notify_all(s, !inv_desc->iec.granularity,
1650 inv_desc->iec.index,
1651 inv_desc->iec.index_mask);
1652 return true;
1653 }
1654
1655 static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
1656 VTDInvDesc *inv_desc)
1657 {
1658 VTDAddressSpace *vtd_dev_as;
1659 IOMMUTLBEntry entry;
1660 struct VTDBus *vtd_bus;
1661 hwaddr addr;
1662 uint64_t sz;
1663 uint16_t sid;
1664 uint8_t devfn;
1665 bool size;
1666 uint8_t bus_num;
1667
1668 addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
1669 sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
1670 devfn = sid & 0xff;
1671 bus_num = sid >> 8;
1672 size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
1673
1674 if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
1675 (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
1676 VTD_DPRINTF(GENERAL, "error: non-zero reserved field in Device "
1677 "IOTLB Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
1678 inv_desc->hi, inv_desc->lo);
1679 return false;
1680 }
1681
1682 vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
1683 if (!vtd_bus) {
1684 goto done;
1685 }
1686
1687 vtd_dev_as = vtd_bus->dev_as[devfn];
1688 if (!vtd_dev_as) {
1689 goto done;
1690 }
1691
1692 /* According to ATS spec table 2.4:
1693 * S = 0, bits 15:12 = xxxx range size: 4K
1694 * S = 1, bits 15:12 = xxx0 range size: 8K
1695 * S = 1, bits 15:12 = xx01 range size: 16K
1696 * S = 1, bits 15:12 = x011 range size: 32K
1697 * S = 1, bits 15:12 = 0111 range size: 64K
1698 * ...
1699 */
1700 if (size) {
1701 sz = (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT);
1702 addr &= ~(sz - 1);
1703 } else {
1704 sz = VTD_PAGE_SIZE;
1705 }
1706
1707 entry.target_as = &vtd_dev_as->as;
1708 entry.addr_mask = sz - 1;
1709 entry.iova = addr;
1710 entry.perm = IOMMU_NONE;
1711 entry.translated_addr = 0;
1712 memory_region_notify_iommu(&vtd_dev_as->iommu, entry);
1713
1714 done:
1715 return true;
1716 }
1717
1718 static bool vtd_process_inv_desc(IntelIOMMUState *s)
1719 {
1720 VTDInvDesc inv_desc;
1721 uint8_t desc_type;
1722
1723 VTD_DPRINTF(INV, "iq head %"PRIu16, s->iq_head);
1724 if (!vtd_get_inv_desc(s->iq, s->iq_head, &inv_desc)) {
1725 s->iq_last_desc_type = VTD_INV_DESC_NONE;
1726 return false;
1727 }
1728 desc_type = inv_desc.lo & VTD_INV_DESC_TYPE;
1729 /* FIXME: should update at first or at last? */
1730 s->iq_last_desc_type = desc_type;
1731
1732 switch (desc_type) {
1733 case VTD_INV_DESC_CC:
1734 trace_vtd_inv_desc("context-cache", inv_desc.hi, inv_desc.lo);
1735 if (!vtd_process_context_cache_desc(s, &inv_desc)) {
1736 return false;
1737 }
1738 break;
1739
1740 case VTD_INV_DESC_IOTLB:
1741 trace_vtd_inv_desc("iotlb", inv_desc.hi, inv_desc.lo);
1742 if (!vtd_process_iotlb_desc(s, &inv_desc)) {
1743 return false;
1744 }
1745 break;
1746
1747 case VTD_INV_DESC_WAIT:
1748 trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo);
1749 if (!vtd_process_wait_desc(s, &inv_desc)) {
1750 return false;
1751 }
1752 break;
1753
1754 case VTD_INV_DESC_IEC:
1755 trace_vtd_inv_desc("iec", inv_desc.hi, inv_desc.lo);
1756 if (!vtd_process_inv_iec_desc(s, &inv_desc)) {
1757 return false;
1758 }
1759 break;
1760
1761 case VTD_INV_DESC_DEVICE:
1762 VTD_DPRINTF(INV, "Device IOTLB Invalidation Descriptor hi 0x%"PRIx64
1763 " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
1764 if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
1765 return false;
1766 }
1767 break;
1768
1769 default:
1770 trace_vtd_inv_desc_invalid(inv_desc.hi, inv_desc.lo);
1771 return false;
1772 }
1773 s->iq_head++;
1774 if (s->iq_head == s->iq_size) {
1775 s->iq_head = 0;
1776 }
1777 return true;
1778 }
1779
1780 /* Try to fetch and process more Invalidation Descriptors */
1781 static void vtd_fetch_inv_desc(IntelIOMMUState *s)
1782 {
1783 VTD_DPRINTF(INV, "fetch Invalidation Descriptors");
1784 if (s->iq_tail >= s->iq_size) {
1785 /* Detects an invalid Tail pointer */
1786 VTD_DPRINTF(GENERAL, "error: iq_tail is %"PRIu16
1787 " while iq_size is %"PRIu16, s->iq_tail, s->iq_size);
1788 vtd_handle_inv_queue_error(s);
1789 return;
1790 }
1791 while (s->iq_head != s->iq_tail) {
1792 if (!vtd_process_inv_desc(s)) {
1793 /* Invalidation Queue Errors */
1794 vtd_handle_inv_queue_error(s);
1795 break;
1796 }
1797 /* Must update the IQH_REG in time */
1798 vtd_set_quad_raw(s, DMAR_IQH_REG,
1799 (((uint64_t)(s->iq_head)) << VTD_IQH_QH_SHIFT) &
1800 VTD_IQH_QH_MASK);
1801 }
1802 }
1803
1804 /* Handle write to Invalidation Queue Tail Register */
1805 static void vtd_handle_iqt_write(IntelIOMMUState *s)
1806 {
1807 uint64_t val = vtd_get_quad_raw(s, DMAR_IQT_REG);
1808
1809 s->iq_tail = VTD_IQT_QT(val);
1810 VTD_DPRINTF(INV, "set iq tail %"PRIu16, s->iq_tail);
1811 if (s->qi_enabled && !(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) {
1812 /* Process Invalidation Queue here */
1813 vtd_fetch_inv_desc(s);
1814 }
1815 }
1816
1817 static void vtd_handle_fsts_write(IntelIOMMUState *s)
1818 {
1819 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
1820 uint32_t fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG);
1821 uint32_t status_fields = VTD_FSTS_PFO | VTD_FSTS_PPF | VTD_FSTS_IQE;
1822
1823 if ((fectl_reg & VTD_FECTL_IP) && !(fsts_reg & status_fields)) {
1824 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
1825 VTD_DPRINTF(FLOG, "all pending interrupt conditions serviced, clear "
1826 "IP field of FECTL_REG");
1827 }
1828 /* FIXME: when IQE is Clear, should we try to fetch some Invalidation
1829 * Descriptors if there are any when Queued Invalidation is enabled?
1830 */
1831 }
1832
1833 static void vtd_handle_fectl_write(IntelIOMMUState *s)
1834 {
1835 uint32_t fectl_reg;
1836 /* FIXME: when software clears the IM field, check the IP field. But do we
1837 * need to compare the old value and the new value to conclude that
1838 * software clears the IM field? Or just check if the IM field is zero?
1839 */
1840 fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG);
1841 if ((fectl_reg & VTD_FECTL_IP) && !(fectl_reg & VTD_FECTL_IM)) {
1842 vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG);
1843 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
1844 VTD_DPRINTF(FLOG, "IM field is cleared, generate "
1845 "fault event interrupt");
1846 }
1847 }
1848
1849 static void vtd_handle_ics_write(IntelIOMMUState *s)
1850 {
1851 uint32_t ics_reg = vtd_get_long_raw(s, DMAR_ICS_REG);
1852 uint32_t iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG);
1853
1854 if ((iectl_reg & VTD_IECTL_IP) && !(ics_reg & VTD_ICS_IWC)) {
1855 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
1856 VTD_DPRINTF(INV, "pending completion interrupt condition serviced, "
1857 "clear IP field of IECTL_REG");
1858 }
1859 }
1860
1861 static void vtd_handle_iectl_write(IntelIOMMUState *s)
1862 {
1863 uint32_t iectl_reg;
1864 /* FIXME: when software clears the IM field, check the IP field. But do we
1865 * need to compare the old value and the new value to conclude that
1866 * software clears the IM field? Or just check if the IM field is zero?
1867 */
1868 iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG);
1869 if ((iectl_reg & VTD_IECTL_IP) && !(iectl_reg & VTD_IECTL_IM)) {
1870 vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
1871 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
1872 VTD_DPRINTF(INV, "IM field is cleared, generate "
1873 "invalidation event interrupt");
1874 }
1875 }
1876
1877 static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
1878 {
1879 IntelIOMMUState *s = opaque;
1880 uint64_t val;
1881
1882 if (addr + size > DMAR_REG_SIZE) {
1883 VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64
1884 ", got 0x%"PRIx64 " %d",
1885 (uint64_t)DMAR_REG_SIZE, addr, size);
1886 return (uint64_t)-1;
1887 }
1888
1889 switch (addr) {
1890 /* Root Table Address Register, 64-bit */
1891 case DMAR_RTADDR_REG:
1892 if (size == 4) {
1893 val = s->root & ((1ULL << 32) - 1);
1894 } else {
1895 val = s->root;
1896 }
1897 break;
1898
1899 case DMAR_RTADDR_REG_HI:
1900 assert(size == 4);
1901 val = s->root >> 32;
1902 break;
1903
1904 /* Invalidation Queue Address Register, 64-bit */
1905 case DMAR_IQA_REG:
1906 val = s->iq | (vtd_get_quad(s, DMAR_IQA_REG) & VTD_IQA_QS);
1907 if (size == 4) {
1908 val = val & ((1ULL << 32) - 1);
1909 }
1910 break;
1911
1912 case DMAR_IQA_REG_HI:
1913 assert(size == 4);
1914 val = s->iq >> 32;
1915 break;
1916
1917 default:
1918 if (size == 4) {
1919 val = vtd_get_long(s, addr);
1920 } else {
1921 val = vtd_get_quad(s, addr);
1922 }
1923 }
1924 VTD_DPRINTF(CSR, "addr 0x%"PRIx64 " size %d val 0x%"PRIx64,
1925 addr, size, val);
1926 return val;
1927 }
1928
1929 static void vtd_mem_write(void *opaque, hwaddr addr,
1930 uint64_t val, unsigned size)
1931 {
1932 IntelIOMMUState *s = opaque;
1933
1934 if (addr + size > DMAR_REG_SIZE) {
1935 VTD_DPRINTF(GENERAL, "error: addr outside region: max 0x%"PRIx64
1936 ", got 0x%"PRIx64 " %d",
1937 (uint64_t)DMAR_REG_SIZE, addr, size);
1938 return;
1939 }
1940
1941 switch (addr) {
1942 /* Global Command Register, 32-bit */
1943 case DMAR_GCMD_REG:
1944 VTD_DPRINTF(CSR, "DMAR_GCMD_REG write addr 0x%"PRIx64
1945 ", size %d, val 0x%"PRIx64, addr, size, val);
1946 vtd_set_long(s, addr, val);
1947 vtd_handle_gcmd_write(s);
1948 break;
1949
1950 /* Context Command Register, 64-bit */
1951 case DMAR_CCMD_REG:
1952 VTD_DPRINTF(CSR, "DMAR_CCMD_REG write addr 0x%"PRIx64
1953 ", size %d, val 0x%"PRIx64, addr, size, val);
1954 if (size == 4) {
1955 vtd_set_long(s, addr, val);
1956 } else {
1957 vtd_set_quad(s, addr, val);
1958 vtd_handle_ccmd_write(s);
1959 }
1960 break;
1961
1962 case DMAR_CCMD_REG_HI:
1963 VTD_DPRINTF(CSR, "DMAR_CCMD_REG_HI write addr 0x%"PRIx64
1964 ", size %d, val 0x%"PRIx64, addr, size, val);
1965 assert(size == 4);
1966 vtd_set_long(s, addr, val);
1967 vtd_handle_ccmd_write(s);
1968 break;
1969
1970 /* IOTLB Invalidation Register, 64-bit */
1971 case DMAR_IOTLB_REG:
1972 VTD_DPRINTF(INV, "DMAR_IOTLB_REG write addr 0x%"PRIx64
1973 ", size %d, val 0x%"PRIx64, addr, size, val);
1974 if (size == 4) {
1975 vtd_set_long(s, addr, val);
1976 } else {
1977 vtd_set_quad(s, addr, val);
1978 vtd_handle_iotlb_write(s);
1979 }
1980 break;
1981
1982 case DMAR_IOTLB_REG_HI:
1983 VTD_DPRINTF(INV, "DMAR_IOTLB_REG_HI write addr 0x%"PRIx64
1984 ", size %d, val 0x%"PRIx64, addr, size, val);
1985 assert(size == 4);
1986 vtd_set_long(s, addr, val);
1987 vtd_handle_iotlb_write(s);
1988 break;
1989
1990 /* Invalidate Address Register, 64-bit */
1991 case DMAR_IVA_REG:
1992 VTD_DPRINTF(INV, "DMAR_IVA_REG write addr 0x%"PRIx64
1993 ", size %d, val 0x%"PRIx64, addr, size, val);
1994 if (size == 4) {
1995 vtd_set_long(s, addr, val);
1996 } else {
1997 vtd_set_quad(s, addr, val);
1998 }
1999 break;
2000
2001 case DMAR_IVA_REG_HI:
2002 VTD_DPRINTF(INV, "DMAR_IVA_REG_HI write addr 0x%"PRIx64
2003 ", size %d, val 0x%"PRIx64, addr, size, val);
2004 assert(size == 4);
2005 vtd_set_long(s, addr, val);
2006 break;
2007
2008 /* Fault Status Register, 32-bit */
2009 case DMAR_FSTS_REG:
2010 VTD_DPRINTF(FLOG, "DMAR_FSTS_REG write addr 0x%"PRIx64
2011 ", size %d, val 0x%"PRIx64, addr, size, val);
2012 assert(size == 4);
2013 vtd_set_long(s, addr, val);
2014 vtd_handle_fsts_write(s);
2015 break;
2016
2017 /* Fault Event Control Register, 32-bit */
2018 case DMAR_FECTL_REG:
2019 VTD_DPRINTF(FLOG, "DMAR_FECTL_REG write addr 0x%"PRIx64
2020 ", size %d, val 0x%"PRIx64, addr, size, val);
2021 assert(size == 4);
2022 vtd_set_long(s, addr, val);
2023 vtd_handle_fectl_write(s);
2024 break;
2025
2026 /* Fault Event Data Register, 32-bit */
2027 case DMAR_FEDATA_REG:
2028 VTD_DPRINTF(FLOG, "DMAR_FEDATA_REG write addr 0x%"PRIx64
2029 ", size %d, val 0x%"PRIx64, addr, size, val);
2030 assert(size == 4);
2031 vtd_set_long(s, addr, val);
2032 break;
2033
2034 /* Fault Event Address Register, 32-bit */
2035 case DMAR_FEADDR_REG:
2036 VTD_DPRINTF(FLOG, "DMAR_FEADDR_REG write addr 0x%"PRIx64
2037 ", size %d, val 0x%"PRIx64, addr, size, val);
2038 assert(size == 4);
2039 vtd_set_long(s, addr, val);
2040 break;
2041
2042 /* Fault Event Upper Address Register, 32-bit */
2043 case DMAR_FEUADDR_REG:
2044 VTD_DPRINTF(FLOG, "DMAR_FEUADDR_REG write addr 0x%"PRIx64
2045 ", size %d, val 0x%"PRIx64, addr, size, val);
2046 assert(size == 4);
2047 vtd_set_long(s, addr, val);
2048 break;
2049
2050 /* Protected Memory Enable Register, 32-bit */
2051 case DMAR_PMEN_REG:
2052 VTD_DPRINTF(CSR, "DMAR_PMEN_REG write addr 0x%"PRIx64
2053 ", size %d, val 0x%"PRIx64, addr, size, val);
2054 assert(size == 4);
2055 vtd_set_long(s, addr, val);
2056 break;
2057
2058 /* Root Table Address Register, 64-bit */
2059 case DMAR_RTADDR_REG:
2060 VTD_DPRINTF(CSR, "DMAR_RTADDR_REG write addr 0x%"PRIx64
2061 ", size %d, val 0x%"PRIx64, addr, size, val);
2062 if (size == 4) {
2063 vtd_set_long(s, addr, val);
2064 } else {
2065 vtd_set_quad(s, addr, val);
2066 }
2067 break;
2068
2069 case DMAR_RTADDR_REG_HI:
2070 VTD_DPRINTF(CSR, "DMAR_RTADDR_REG_HI write addr 0x%"PRIx64
2071 ", size %d, val 0x%"PRIx64, addr, size, val);
2072 assert(size == 4);
2073 vtd_set_long(s, addr, val);
2074 break;
2075
2076 /* Invalidation Queue Tail Register, 64-bit */
2077 case DMAR_IQT_REG:
2078 VTD_DPRINTF(INV, "DMAR_IQT_REG write addr 0x%"PRIx64
2079 ", size %d, val 0x%"PRIx64, addr, size, val);
2080 if (size == 4) {
2081 vtd_set_long(s, addr, val);
2082 } else {
2083 vtd_set_quad(s, addr, val);
2084 }
2085 vtd_handle_iqt_write(s);
2086 break;
2087
2088 case DMAR_IQT_REG_HI:
2089 VTD_DPRINTF(INV, "DMAR_IQT_REG_HI write addr 0x%"PRIx64
2090 ", size %d, val 0x%"PRIx64, addr, size, val);
2091 assert(size == 4);
2092 vtd_set_long(s, addr, val);
2093 /* 19:63 of IQT_REG is RsvdZ, do nothing here */
2094 break;
2095
2096 /* Invalidation Queue Address Register, 64-bit */
2097 case DMAR_IQA_REG:
2098 VTD_DPRINTF(INV, "DMAR_IQA_REG write addr 0x%"PRIx64
2099 ", size %d, val 0x%"PRIx64, addr, size, val);
2100 if (size == 4) {
2101 vtd_set_long(s, addr, val);
2102 } else {
2103 vtd_set_quad(s, addr, val);
2104 }
2105 break;
2106
2107 case DMAR_IQA_REG_HI:
2108 VTD_DPRINTF(INV, "DMAR_IQA_REG_HI write addr 0x%"PRIx64
2109 ", size %d, val 0x%"PRIx64, addr, size, val);
2110 assert(size == 4);
2111 vtd_set_long(s, addr, val);
2112 break;
2113
2114 /* Invalidation Completion Status Register, 32-bit */
2115 case DMAR_ICS_REG:
2116 VTD_DPRINTF(INV, "DMAR_ICS_REG write addr 0x%"PRIx64
2117 ", size %d, val 0x%"PRIx64, addr, size, val);
2118 assert(size == 4);
2119 vtd_set_long(s, addr, val);
2120 vtd_handle_ics_write(s);
2121 break;
2122
2123 /* Invalidation Event Control Register, 32-bit */
2124 case DMAR_IECTL_REG:
2125 VTD_DPRINTF(INV, "DMAR_IECTL_REG write addr 0x%"PRIx64
2126 ", size %d, val 0x%"PRIx64, addr, size, val);
2127 assert(size == 4);
2128 vtd_set_long(s, addr, val);
2129 vtd_handle_iectl_write(s);
2130 break;
2131
2132 /* Invalidation Event Data Register, 32-bit */
2133 case DMAR_IEDATA_REG:
2134 VTD_DPRINTF(INV, "DMAR_IEDATA_REG write addr 0x%"PRIx64
2135 ", size %d, val 0x%"PRIx64, addr, size, val);
2136 assert(size == 4);
2137 vtd_set_long(s, addr, val);
2138 break;
2139
2140 /* Invalidation Event Address Register, 32-bit */
2141 case DMAR_IEADDR_REG:
2142 VTD_DPRINTF(INV, "DMAR_IEADDR_REG write addr 0x%"PRIx64
2143 ", size %d, val 0x%"PRIx64, addr, size, val);
2144 assert(size == 4);
2145 vtd_set_long(s, addr, val);
2146 break;
2147
2148 /* Invalidation Event Upper Address Register, 32-bit */
2149 case DMAR_IEUADDR_REG:
2150 VTD_DPRINTF(INV, "DMAR_IEUADDR_REG write addr 0x%"PRIx64
2151 ", size %d, val 0x%"PRIx64, addr, size, val);
2152 assert(size == 4);
2153 vtd_set_long(s, addr, val);
2154 break;
2155
2156 /* Fault Recording Registers, 128-bit */
2157 case DMAR_FRCD_REG_0_0:
2158 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_0 write addr 0x%"PRIx64
2159 ", size %d, val 0x%"PRIx64, addr, size, val);
2160 if (size == 4) {
2161 vtd_set_long(s, addr, val);
2162 } else {
2163 vtd_set_quad(s, addr, val);
2164 }
2165 break;
2166
2167 case DMAR_FRCD_REG_0_1:
2168 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_1 write addr 0x%"PRIx64
2169 ", size %d, val 0x%"PRIx64, addr, size, val);
2170 assert(size == 4);
2171 vtd_set_long(s, addr, val);
2172 break;
2173
2174 case DMAR_FRCD_REG_0_2:
2175 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_2 write addr 0x%"PRIx64
2176 ", size %d, val 0x%"PRIx64, addr, size, val);
2177 if (size == 4) {
2178 vtd_set_long(s, addr, val);
2179 } else {
2180 vtd_set_quad(s, addr, val);
2181 /* May clear bit 127 (Fault), update PPF */
2182 vtd_update_fsts_ppf(s);
2183 }
2184 break;
2185
2186 case DMAR_FRCD_REG_0_3:
2187 VTD_DPRINTF(FLOG, "DMAR_FRCD_REG_0_3 write addr 0x%"PRIx64
2188 ", size %d, val 0x%"PRIx64, addr, size, val);
2189 assert(size == 4);
2190 vtd_set_long(s, addr, val);
2191 /* May clear bit 127 (Fault), update PPF */
2192 vtd_update_fsts_ppf(s);
2193 break;
2194
2195 case DMAR_IRTA_REG:
2196 VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64
2197 ", size %d, val 0x%"PRIx64, addr, size, val);
2198 if (size == 4) {
2199 vtd_set_long(s, addr, val);
2200 } else {
2201 vtd_set_quad(s, addr, val);
2202 }
2203 break;
2204
2205 case DMAR_IRTA_REG_HI:
2206 VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64
2207 ", size %d, val 0x%"PRIx64, addr, size, val);
2208 assert(size == 4);
2209 vtd_set_long(s, addr, val);
2210 break;
2211
2212 default:
2213 VTD_DPRINTF(GENERAL, "error: unhandled reg write addr 0x%"PRIx64
2214 ", size %d, val 0x%"PRIx64, addr, size, val);
2215 if (size == 4) {
2216 vtd_set_long(s, addr, val);
2217 } else {
2218 vtd_set_quad(s, addr, val);
2219 }
2220 }
2221 }
2222
2223 static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
2224 bool is_write)
2225 {
2226 VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
2227 IntelIOMMUState *s = vtd_as->iommu_state;
2228 IOMMUTLBEntry ret = {
2229 .target_as = &address_space_memory,
2230 .iova = addr,
2231 .translated_addr = 0,
2232 .addr_mask = ~(hwaddr)0,
2233 .perm = IOMMU_NONE,
2234 };
2235
2236 if (!s->dmar_enabled) {
2237 /* DMAR disabled, passthrough, use 4k-page*/
2238 ret.iova = addr & VTD_PAGE_MASK_4K;
2239 ret.translated_addr = addr & VTD_PAGE_MASK_4K;
2240 ret.addr_mask = ~VTD_PAGE_MASK_4K;
2241 ret.perm = IOMMU_RW;
2242 return ret;
2243 }
2244
2245 vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr,
2246 is_write, &ret);
2247 VTD_DPRINTF(MMU,
2248 "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
2249 " iova 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->bus),
2250 VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn),
2251 vtd_as->devfn, addr, ret.translated_addr);
2252 return ret;
2253 }
2254
2255 static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu,
2256 IOMMUNotifierFlag old,
2257 IOMMUNotifierFlag new)
2258 {
2259 VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
2260 IntelIOMMUState *s = vtd_as->iommu_state;
2261 IntelIOMMUNotifierNode *node = NULL;
2262 IntelIOMMUNotifierNode *next_node = NULL;
2263
2264 if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
2265 error_report("We need to set cache_mode=1 for intel-iommu to enable "
2266 "device assignment with IOMMU protection.");
2267 exit(1);
2268 }
2269
2270 if (old == IOMMU_NOTIFIER_NONE) {
2271 node = g_malloc0(sizeof(*node));
2272 node->vtd_as = vtd_as;
2273 QLIST_INSERT_HEAD(&s->notifiers_list, node, next);
2274 return;
2275 }
2276
2277 /* update notifier node with new flags */
2278 QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) {
2279 if (node->vtd_as == vtd_as) {
2280 if (new == IOMMU_NOTIFIER_NONE) {
2281 QLIST_REMOVE(node, next);
2282 g_free(node);
2283 }
2284 return;
2285 }
2286 }
2287 }
2288
2289 static const VMStateDescription vtd_vmstate = {
2290 .name = "iommu-intel",
2291 .version_id = 1,
2292 .minimum_version_id = 1,
2293 .priority = MIG_PRI_IOMMU,
2294 .fields = (VMStateField[]) {
2295 VMSTATE_UINT64(root, IntelIOMMUState),
2296 VMSTATE_UINT64(intr_root, IntelIOMMUState),
2297 VMSTATE_UINT64(iq, IntelIOMMUState),
2298 VMSTATE_UINT32(intr_size, IntelIOMMUState),
2299 VMSTATE_UINT16(iq_head, IntelIOMMUState),
2300 VMSTATE_UINT16(iq_tail, IntelIOMMUState),
2301 VMSTATE_UINT16(iq_size, IntelIOMMUState),
2302 VMSTATE_UINT16(next_frcd_reg, IntelIOMMUState),
2303 VMSTATE_UINT8_ARRAY(csr, IntelIOMMUState, DMAR_REG_SIZE),
2304 VMSTATE_UINT8(iq_last_desc_type, IntelIOMMUState),
2305 VMSTATE_BOOL(root_extended, IntelIOMMUState),
2306 VMSTATE_BOOL(dmar_enabled, IntelIOMMUState),
2307 VMSTATE_BOOL(qi_enabled, IntelIOMMUState),
2308 VMSTATE_BOOL(intr_enabled, IntelIOMMUState),
2309 VMSTATE_BOOL(intr_eime, IntelIOMMUState),
2310 VMSTATE_END_OF_LIST()
2311 }
2312 };
2313
2314 static const MemoryRegionOps vtd_mem_ops = {
2315 .read = vtd_mem_read,
2316 .write = vtd_mem_write,
2317 .endianness = DEVICE_LITTLE_ENDIAN,
2318 .impl = {
2319 .min_access_size = 4,
2320 .max_access_size = 8,
2321 },
2322 .valid = {
2323 .min_access_size = 4,
2324 .max_access_size = 8,
2325 },
2326 };
2327
2328 static Property vtd_properties[] = {
2329 DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0),
2330 DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
2331 ON_OFF_AUTO_AUTO),
2332 DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
2333 DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
2334 DEFINE_PROP_END_OF_LIST(),
2335 };
2336
2337 /* Read IRTE entry with specific index */
2338 static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
2339 VTD_IR_TableEntry *entry, uint16_t sid)
2340 {
2341 static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \
2342 {0xffff, 0xfffb, 0xfff9, 0xfff8};
2343 dma_addr_t addr = 0x00;
2344 uint16_t mask, source_id;
2345 uint8_t bus, bus_max, bus_min;
2346
2347 addr = iommu->intr_root + index * sizeof(*entry);
2348 if (dma_memory_read(&address_space_memory, addr, entry,
2349 sizeof(*entry))) {
2350 VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64
2351 " + %"PRIu16, iommu->intr_root, index);
2352 return -VTD_FR_IR_ROOT_INVAL;
2353 }
2354
2355 if (!entry->irte.present) {
2356 VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE"
2357 " entry index %u value 0x%"PRIx64 " 0x%"PRIx64,
2358 index, le64_to_cpu(entry->data[1]),
2359 le64_to_cpu(entry->data[0]));
2360 return -VTD_FR_IR_ENTRY_P;
2361 }
2362
2363 if (entry->irte.__reserved_0 || entry->irte.__reserved_1 ||
2364 entry->irte.__reserved_2) {
2365 VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16
2366 " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64,
2367 index, le64_to_cpu(entry->data[1]),
2368 le64_to_cpu(entry->data[0]));
2369 return -VTD_FR_IR_IRTE_RSVD;
2370 }
2371
2372 if (sid != X86_IOMMU_SID_INVALID) {
2373 /* Validate IRTE SID */
2374 source_id = le32_to_cpu(entry->irte.source_id);
2375 switch (entry->irte.sid_vtype) {
2376 case VTD_SVT_NONE:
2377 VTD_DPRINTF(IR, "No SID validation for IRTE index %d", index);
2378 break;
2379
2380 case VTD_SVT_ALL:
2381 mask = vtd_svt_mask[entry->irte.sid_q];
2382 if ((source_id & mask) != (sid & mask)) {
2383 VTD_DPRINTF(GENERAL, "SID validation for IRTE index "
2384 "%d failed (reqid 0x%04x sid 0x%04x)", index,
2385 sid, source_id);
2386 return -VTD_FR_IR_SID_ERR;
2387 }
2388 break;
2389
2390 case VTD_SVT_BUS:
2391 bus_max = source_id >> 8;
2392 bus_min = source_id & 0xff;
2393 bus = sid >> 8;
2394 if (bus > bus_max || bus < bus_min) {
2395 VTD_DPRINTF(GENERAL, "SID validation for IRTE index %d "
2396 "failed (bus %d outside %d-%d)", index, bus,
2397 bus_min, bus_max);
2398 return -VTD_FR_IR_SID_ERR;
2399 }
2400 break;
2401
2402 default:
2403 VTD_DPRINTF(GENERAL, "Invalid SVT bits (0x%x) in IRTE index "
2404 "%d", entry->irte.sid_vtype, index);
2405 /* Take this as verification failure. */
2406 return -VTD_FR_IR_SID_ERR;
2407 break;
2408 }
2409 }
2410
2411 return 0;
2412 }
2413
2414 /* Fetch IRQ information of specific IR index */
2415 static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
2416 VTDIrq *irq, uint16_t sid)
2417 {
2418 VTD_IR_TableEntry irte = {};
2419 int ret = 0;
2420
2421 ret = vtd_irte_get(iommu, index, &irte, sid);
2422 if (ret) {
2423 return ret;
2424 }
2425
2426 irq->trigger_mode = irte.irte.trigger_mode;
2427 irq->vector = irte.irte.vector;
2428 irq->delivery_mode = irte.irte.delivery_mode;
2429 irq->dest = le32_to_cpu(irte.irte.dest_id);
2430 if (!iommu->intr_eime) {
2431 #define VTD_IR_APIC_DEST_MASK (0xff00ULL)
2432 #define VTD_IR_APIC_DEST_SHIFT (8)
2433 irq->dest = (irq->dest & VTD_IR_APIC_DEST_MASK) >>
2434 VTD_IR_APIC_DEST_SHIFT;
2435 }
2436 irq->dest_mode = irte.irte.dest_mode;
2437 irq->redir_hint = irte.irte.redir_hint;
2438
2439 VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u,"
2440 "deliver:%u,dest:%u,dest_mode:%u", index,
2441 irq->trigger_mode, irq->vector, irq->delivery_mode,
2442 irq->dest, irq->dest_mode);
2443
2444 return 0;
2445 }
2446
2447 /* Generate one MSI message from VTDIrq info */
2448 static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out)
2449 {
2450 VTD_MSIMessage msg = {};
2451
2452 /* Generate address bits */
2453 msg.dest_mode = irq->dest_mode;
2454 msg.redir_hint = irq->redir_hint;
2455 msg.dest = irq->dest;
2456 msg.__addr_hi = irq->dest & 0xffffff00;
2457 msg.__addr_head = cpu_to_le32(0xfee);
2458 /* Keep this from original MSI address bits */
2459 msg.__not_used = irq->msi_addr_last_bits;
2460
2461 /* Generate data bits */
2462 msg.vector = irq->vector;
2463 msg.delivery_mode = irq->delivery_mode;
2464 msg.level = 1;
2465 msg.trigger_mode = irq->trigger_mode;
2466
2467 msg_out->address = msg.msi_addr;
2468 msg_out->data = msg.msi_data;
2469 }
2470
2471 /* Interrupt remapping for MSI/MSI-X entry */
2472 static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
2473 MSIMessage *origin,
2474 MSIMessage *translated,
2475 uint16_t sid)
2476 {
2477 int ret = 0;
2478 VTD_IR_MSIAddress addr;
2479 uint16_t index;
2480 VTDIrq irq = {};
2481
2482 assert(origin && translated);
2483
2484 if (!iommu || !iommu->intr_enabled) {
2485 goto do_not_translate;
2486 }
2487
2488 if (origin->address & VTD_MSI_ADDR_HI_MASK) {
2489 VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero"
2490 " during interrupt remapping: 0x%"PRIx32,
2491 (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> \
2492 VTD_MSI_ADDR_HI_SHIFT));
2493 return -VTD_FR_IR_REQ_RSVD;
2494 }
2495
2496 addr.data = origin->address & VTD_MSI_ADDR_LO_MASK;
2497 if (addr.addr.__head != 0xfee) {
2498 VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: "
2499 "0x%"PRIx32, addr.data);
2500 return -VTD_FR_IR_REQ_RSVD;
2501 }
2502
2503 /* This is compatible mode. */
2504 if (addr.addr.int_mode != VTD_IR_INT_FORMAT_REMAP) {
2505 goto do_not_translate;
2506 }
2507
2508 index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l);
2509
2510 #define VTD_IR_MSI_DATA_SUBHANDLE (0x0000ffff)
2511 #define VTD_IR_MSI_DATA_RESERVED (0xffff0000)
2512
2513 if (addr.addr.sub_valid) {
2514 /* See VT-d spec 5.1.2.2 and 5.1.3 on subhandle */
2515 index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE;
2516 }
2517
2518 ret = vtd_remap_irq_get(iommu, index, &irq, sid);
2519 if (ret) {
2520 return ret;
2521 }
2522
2523 if (addr.addr.sub_valid) {
2524 VTD_DPRINTF(IR, "received MSI interrupt");
2525 if (origin->data & VTD_IR_MSI_DATA_RESERVED) {
2526 VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for "
2527 "interrupt remappable entry: 0x%"PRIx32,
2528 origin->data);
2529 return -VTD_FR_IR_REQ_RSVD;
2530 }
2531 } else {
2532 uint8_t vector = origin->data & 0xff;
2533 uint8_t trigger_mode = (origin->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
2534
2535 VTD_DPRINTF(IR, "received IOAPIC interrupt");
2536 /* IOAPIC entry vector should be aligned with IRTE vector
2537 * (see vt-d spec 5.1.5.1). */
2538 if (vector != irq.vector) {
2539 VTD_DPRINTF(GENERAL, "IOAPIC vector inconsistent: "
2540 "entry: %d, IRTE: %d, index: %d",
2541 vector, irq.vector, index);
2542 }
2543
2544 /* The Trigger Mode field must match the Trigger Mode in the IRTE.
2545 * (see vt-d spec 5.1.5.1). */
2546 if (trigger_mode != irq.trigger_mode) {
2547 VTD_DPRINTF(GENERAL, "IOAPIC trigger mode inconsistent: "
2548 "entry: %u, IRTE: %u, index: %d",
2549 trigger_mode, irq.trigger_mode, index);
2550 }
2551
2552 }
2553
2554 /*
2555 * We'd better keep the last two bits, assuming that guest OS
2556 * might modify it. Keep it does not hurt after all.
2557 */
2558 irq.msi_addr_last_bits = addr.addr.__not_care;
2559
2560 /* Translate VTDIrq to MSI message */
2561 vtd_generate_msi_message(&irq, translated);
2562
2563 VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> "
2564 "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data,
2565 translated->address, translated->data);
2566 return 0;
2567
2568 do_not_translate:
2569 memcpy(translated, origin, sizeof(*origin));
2570 return 0;
2571 }
2572
2573 static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src,
2574 MSIMessage *dst, uint16_t sid)
2575 {
2576 return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu),
2577 src, dst, sid);
2578 }
2579
2580 static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr,
2581 uint64_t *data, unsigned size,
2582 MemTxAttrs attrs)
2583 {
2584 return MEMTX_OK;
2585 }
2586
2587 static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
2588 uint64_t value, unsigned size,
2589 MemTxAttrs attrs)
2590 {
2591 int ret = 0;
2592 MSIMessage from = {}, to = {};
2593 uint16_t sid = X86_IOMMU_SID_INVALID;
2594
2595 from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST;
2596 from.data = (uint32_t) value;
2597
2598 if (!attrs.unspecified) {
2599 /* We have explicit Source ID */
2600 sid = attrs.requester_id;
2601 }
2602
2603 ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid);
2604 if (ret) {
2605 /* TODO: report error */
2606 VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64
2607 " data 0x%"PRIx32, from.address, from.data);
2608 /* Drop this interrupt */
2609 return MEMTX_ERROR;
2610 }
2611
2612 VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32
2613 " for device sid 0x%04x",
2614 to.address, to.data, sid);
2615
2616 apic_get_class()->send_msi(&to);
2617
2618 return MEMTX_OK;
2619 }
2620
2621 static const MemoryRegionOps vtd_mem_ir_ops = {
2622 .read_with_attrs = vtd_mem_ir_read,
2623 .write_with_attrs = vtd_mem_ir_write,
2624 .endianness = DEVICE_LITTLE_ENDIAN,
2625 .impl = {
2626 .min_access_size = 4,
2627 .max_access_size = 4,
2628 },
2629 .valid = {
2630 .min_access_size = 4,
2631 .max_access_size = 4,
2632 },
2633 };
2634
2635 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
2636 {
2637 uintptr_t key = (uintptr_t)bus;
2638 VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
2639 VTDAddressSpace *vtd_dev_as;
2640 char name[128];
2641
2642 if (!vtd_bus) {
2643 uintptr_t *new_key = g_malloc(sizeof(*new_key));
2644 *new_key = (uintptr_t)bus;
2645 /* No corresponding free() */
2646 vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
2647 X86_IOMMU_PCI_DEVFN_MAX);
2648 vtd_bus->bus = bus;
2649 g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
2650 }
2651
2652 vtd_dev_as = vtd_bus->dev_as[devfn];
2653
2654 if (!vtd_dev_as) {
2655 snprintf(name, sizeof(name), "intel_iommu_devfn_%d", devfn);
2656 vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
2657
2658 vtd_dev_as->bus = bus;
2659 vtd_dev_as->devfn = (uint8_t)devfn;
2660 vtd_dev_as->iommu_state = s;
2661 vtd_dev_as->context_cache_entry.context_cache_gen = 0;
2662
2663 /*
2664 * Memory region relationships looks like (Address range shows
2665 * only lower 32 bits to make it short in length...):
2666 *
2667 * |-----------------+-------------------+----------|
2668 * | Name | Address range | Priority |
2669 * |-----------------+-------------------+----------+
2670 * | vtd_root | 00000000-ffffffff | 0 |
2671 * | intel_iommu | 00000000-ffffffff | 1 |
2672 * | vtd_sys_alias | 00000000-ffffffff | 1 |
2673 * | intel_iommu_ir | fee00000-feefffff | 64 |
2674 * |-----------------+-------------------+----------|
2675 *
2676 * We enable/disable DMAR by switching enablement for
2677 * vtd_sys_alias and intel_iommu regions. IR region is always
2678 * enabled.
2679 */
2680 memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s),
2681 &s->iommu_ops, "intel_iommu_dmar",
2682 UINT64_MAX);
2683 memory_region_init_alias(&vtd_dev_as->sys_alias, OBJECT(s),
2684 "vtd_sys_alias", get_system_memory(),
2685 0, memory_region_size(get_system_memory()));
2686 memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s),
2687 &vtd_mem_ir_ops, s, "intel_iommu_ir",
2688 VTD_INTERRUPT_ADDR_SIZE);
2689 memory_region_init(&vtd_dev_as->root, OBJECT(s),
2690 "vtd_root", UINT64_MAX);
2691 memory_region_add_subregion_overlap(&vtd_dev_as->root,
2692 VTD_INTERRUPT_ADDR_FIRST,
2693 &vtd_dev_as->iommu_ir, 64);
2694 address_space_init(&vtd_dev_as->as, &vtd_dev_as->root, name);
2695 memory_region_add_subregion_overlap(&vtd_dev_as->root, 0,
2696 &vtd_dev_as->sys_alias, 1);
2697 memory_region_add_subregion_overlap(&vtd_dev_as->root, 0,
2698 &vtd_dev_as->iommu, 1);
2699 vtd_switch_address_space(vtd_dev_as);
2700 }
2701 return vtd_dev_as;
2702 }
2703
2704 /* Unmap the whole range in the notifier's scope. */
2705 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
2706 {
2707 IOMMUTLBEntry entry;
2708 hwaddr size;
2709 hwaddr start = n->start;
2710 hwaddr end = n->end;
2711
2712 /*
2713 * Note: all the codes in this function has a assumption that IOVA
2714 * bits are no more than VTD_MGAW bits (which is restricted by
2715 * VT-d spec), otherwise we need to consider overflow of 64 bits.
2716 */
2717
2718 if (end > VTD_ADDRESS_SIZE) {
2719 /*
2720 * Don't need to unmap regions that is bigger than the whole
2721 * VT-d supported address space size
2722 */
2723 end = VTD_ADDRESS_SIZE;
2724 }
2725
2726 assert(start <= end);
2727 size = end - start;
2728
2729 if (ctpop64(size) != 1) {
2730 /*
2731 * This size cannot format a correct mask. Let's enlarge it to
2732 * suite the minimum available mask.
2733 */
2734 int n = 64 - clz64(size);
2735 if (n > VTD_MGAW) {
2736 /* should not happen, but in case it happens, limit it */
2737 n = VTD_MGAW;
2738 }
2739 size = 1ULL << n;
2740 }
2741
2742 entry.target_as = &address_space_memory;
2743 /* Adjust iova for the size */
2744 entry.iova = n->start & ~(size - 1);
2745 /* This field is meaningless for unmap */
2746 entry.translated_addr = 0;
2747 entry.perm = IOMMU_NONE;
2748 entry.addr_mask = size - 1;
2749
2750 trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
2751 VTD_PCI_SLOT(as->devfn),
2752 VTD_PCI_FUNC(as->devfn),
2753 entry.iova, size);
2754
2755 memory_region_notify_one(n, &entry);
2756 }
2757
2758 static void vtd_address_space_unmap_all(IntelIOMMUState *s)
2759 {
2760 IntelIOMMUNotifierNode *node;
2761 VTDAddressSpace *vtd_as;
2762 IOMMUNotifier *n;
2763
2764 QLIST_FOREACH(node, &s->notifiers_list, next) {
2765 vtd_as = node->vtd_as;
2766 IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
2767 vtd_address_space_unmap(vtd_as, n);
2768 }
2769 }
2770 }
2771
2772 static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private)
2773 {
2774 memory_region_notify_one((IOMMUNotifier *)private, entry);
2775 return 0;
2776 }
2777
2778 static void vtd_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n)
2779 {
2780 VTDAddressSpace *vtd_as = container_of(mr, VTDAddressSpace, iommu);
2781 IntelIOMMUState *s = vtd_as->iommu_state;
2782 uint8_t bus_n = pci_bus_num(vtd_as->bus);
2783 VTDContextEntry ce;
2784
2785 /*
2786 * The replay can be triggered by either a invalidation or a newly
2787 * created entry. No matter what, we release existing mappings
2788 * (it means flushing caches for UNMAP-only registers).
2789 */
2790 vtd_address_space_unmap(vtd_as, n);
2791
2792 if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
2793 trace_vtd_replay_ce_valid(bus_n, PCI_SLOT(vtd_as->devfn),
2794 PCI_FUNC(vtd_as->devfn),
2795 VTD_CONTEXT_ENTRY_DID(ce.hi),
2796 ce.hi, ce.lo);
2797 vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false);
2798 } else {
2799 trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
2800 PCI_FUNC(vtd_as->devfn));
2801 }
2802
2803 return;
2804 }
2805
2806 /* Do the initialization. It will also be called when reset, so pay
2807 * attention when adding new initialization stuff.
2808 */
2809 static void vtd_init(IntelIOMMUState *s)
2810 {
2811 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
2812
2813 memset(s->csr, 0, DMAR_REG_SIZE);
2814 memset(s->wmask, 0, DMAR_REG_SIZE);
2815 memset(s->w1cmask, 0, DMAR_REG_SIZE);
2816 memset(s->womask, 0, DMAR_REG_SIZE);
2817
2818 s->iommu_ops.translate = vtd_iommu_translate;
2819 s->iommu_ops.notify_flag_changed = vtd_iommu_notify_flag_changed;
2820 s->iommu_ops.replay = vtd_iommu_replay;
2821 s->root = 0;
2822 s->root_extended = false;
2823 s->dmar_enabled = false;
2824 s->iq_head = 0;
2825 s->iq_tail = 0;
2826 s->iq = 0;
2827 s->iq_size = 0;
2828 s->qi_enabled = false;
2829 s->iq_last_desc_type = VTD_INV_DESC_NONE;
2830 s->next_frcd_reg = 0;
2831 s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
2832 VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
2833 s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
2834
2835 if (x86_iommu->intr_supported) {
2836 s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
2837 if (s->intr_eim == ON_OFF_AUTO_ON) {
2838 s->ecap |= VTD_ECAP_EIM;
2839 }
2840 assert(s->intr_eim != ON_OFF_AUTO_AUTO);
2841 }
2842
2843 if (x86_iommu->dt_supported) {
2844 s->ecap |= VTD_ECAP_DT;
2845 }
2846
2847 if (s->caching_mode) {
2848 s->cap |= VTD_CAP_CM;
2849 }
2850
2851 vtd_reset_context_cache(s);
2852 vtd_reset_iotlb(s);
2853
2854 /* Define registers with default values and bit semantics */
2855 vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0);
2856 vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0);
2857 vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0);
2858 vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0);
2859 vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL);
2860 vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0);
2861 vtd_define_quad(s, DMAR_RTADDR_REG, 0, 0xfffffffffffff000ULL, 0);
2862 vtd_define_quad(s, DMAR_CCMD_REG, 0, 0xe0000003ffffffffULL, 0);
2863 vtd_define_quad_wo(s, DMAR_CCMD_REG, 0x3ffff0000ULL);
2864
2865 /* Advanced Fault Logging not supported */
2866 vtd_define_long(s, DMAR_FSTS_REG, 0, 0, 0x11UL);
2867 vtd_define_long(s, DMAR_FECTL_REG, 0x80000000UL, 0x80000000UL, 0);
2868 vtd_define_long(s, DMAR_FEDATA_REG, 0, 0x0000ffffUL, 0);
2869 vtd_define_long(s, DMAR_FEADDR_REG, 0, 0xfffffffcUL, 0);
2870
2871 /* Treated as RsvdZ when EIM in ECAP_REG is not supported
2872 * vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0xffffffffUL, 0);
2873 */
2874 vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0, 0);
2875
2876 /* Treated as RO for implementations that PLMR and PHMR fields reported
2877 * as Clear in the CAP_REG.
2878 * vtd_define_long(s, DMAR_PMEN_REG, 0, 0x80000000UL, 0);
2879 */
2880 vtd_define_long(s, DMAR_PMEN_REG, 0, 0, 0);
2881
2882 vtd_define_quad(s, DMAR_IQH_REG, 0, 0, 0);
2883 vtd_define_quad(s, DMAR_IQT_REG, 0, 0x7fff0ULL, 0);
2884 vtd_define_quad(s, DMAR_IQA_REG, 0, 0xfffffffffffff007ULL, 0);
2885 vtd_define_long(s, DMAR_ICS_REG, 0, 0, 0x1UL);
2886 vtd_define_long(s, DMAR_IECTL_REG, 0x80000000UL, 0x80000000UL, 0);
2887 vtd_define_long(s, DMAR_IEDATA_REG, 0, 0xffffffffUL, 0);
2888 vtd_define_long(s, DMAR_IEADDR_REG, 0, 0xfffffffcUL, 0);
2889 /* Treadted as RsvdZ when EIM in ECAP_REG is not supported */
2890 vtd_define_long(s, DMAR_IEUADDR_REG, 0, 0, 0);
2891
2892 /* IOTLB registers */
2893 vtd_define_quad(s, DMAR_IOTLB_REG, 0, 0Xb003ffff00000000ULL, 0);
2894 vtd_define_quad(s, DMAR_IVA_REG, 0, 0xfffffffffffff07fULL, 0);
2895 vtd_define_quad_wo(s, DMAR_IVA_REG, 0xfffffffffffff07fULL);
2896
2897 /* Fault Recording Registers, 128-bit */
2898 vtd_define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0);
2899 vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL);
2900
2901 /*
2902 * Interrupt remapping registers.
2903 */
2904 vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff80fULL, 0);
2905 }
2906
2907 /* Should not reset address_spaces when reset because devices will still use
2908 * the address space they got at first (won't ask the bus again).
2909 */
2910 static void vtd_reset(DeviceState *dev)
2911 {
2912 IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
2913
2914 VTD_DPRINTF(GENERAL, "");
2915 vtd_init(s);
2916
2917 /*
2918 * When device reset, throw away all mappings and external caches
2919 */
2920 vtd_address_space_unmap_all(s);
2921 }
2922
2923 static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
2924 {
2925 IntelIOMMUState *s = opaque;
2926 VTDAddressSpace *vtd_as;
2927
2928 assert(0 <= devfn && devfn < X86_IOMMU_PCI_DEVFN_MAX);
2929
2930 vtd_as = vtd_find_add_as(s, bus, devfn);
2931 return &vtd_as->as;
2932 }
2933
2934 static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
2935 {
2936 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
2937
2938 /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */
2939 if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
2940 !kvm_irqchip_is_split()) {
2941 error_setg(errp, "Intel Interrupt Remapping cannot work with "
2942 "kernel-irqchip=on, please use 'split|off'.");
2943 return false;
2944 }
2945 if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu->intr_supported) {
2946 error_setg(errp, "eim=on cannot be selected without intremap=on");
2947 return false;
2948 }
2949
2950 if (s->intr_eim == ON_OFF_AUTO_AUTO) {
2951 s->intr_eim = (kvm_irqchip_in_kernel() || s->buggy_eim)
2952 && x86_iommu->intr_supported ?
2953 ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
2954 }
2955 if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
2956 if (!kvm_irqchip_in_kernel()) {
2957 error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
2958 return false;
2959 }
2960 if (!kvm_enable_x2apic()) {
2961 error_setg(errp, "eim=on requires support on the KVM side"
2962 "(X2APIC_API, first shipped in v4.7)");
2963 return false;
2964 }
2965 }
2966
2967 return true;
2968 }
2969
2970 static void vtd_realize(DeviceState *dev, Error **errp)
2971 {
2972 PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
2973 PCIBus *bus = pcms->bus;
2974 IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
2975 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
2976
2977 VTD_DPRINTF(GENERAL, "");
2978 x86_iommu->type = TYPE_INTEL;
2979
2980 if (!vtd_decide_config(s, errp)) {
2981 return;
2982 }
2983
2984 QLIST_INIT(&s->notifiers_list);
2985 memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
2986 memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
2987 "intel_iommu", DMAR_REG_SIZE);
2988 sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem);
2989 /* No corresponding destroy */
2990 s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
2991 g_free, g_free);
2992 s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
2993 g_free, g_free);
2994 vtd_init(s);
2995 sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
2996 pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
2997 /* Pseudo address space under root PCI bus. */
2998 pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
2999 }
3000
3001 static void vtd_class_init(ObjectClass *klass, void *data)
3002 {
3003 DeviceClass *dc = DEVICE_CLASS(klass);
3004 X86IOMMUClass *x86_class = X86_IOMMU_CLASS(klass);
3005
3006 dc->reset = vtd_reset;
3007 dc->vmsd = &vtd_vmstate;
3008 dc->props = vtd_properties;
3009 dc->hotpluggable = false;
3010 x86_class->realize = vtd_realize;
3011 x86_class->int_remap = vtd_int_remap;
3012 /* Supported by the pc-q35-* machine types */
3013 dc->user_creatable = true;
3014 }
3015
3016 static const TypeInfo vtd_info = {
3017 .name = TYPE_INTEL_IOMMU_DEVICE,
3018 .parent = TYPE_X86_IOMMU_DEVICE,
3019 .instance_size = sizeof(IntelIOMMUState),
3020 .class_init = vtd_class_init,
3021 };
3022
3023 static void vtd_register_types(void)
3024 {
3025 VTD_DPRINTF(GENERAL, "");
3026 type_register_static(&vtd_info);
3027 }
3028
3029 type_init(vtd_register_types)