2 * QEMU Xen emulation: Grant table support
4 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6 * Authors: David Woodhouse <dwmw2@infradead.org>
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qapi/error.h"
18 #include "qom/object.h"
19 #include "exec/target_page.h"
20 #include "exec/address-spaces.h"
21 #include "migration/vmstate.h"
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_gnttab.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/kvm_xen.h"
32 #include "hw/xen/interface/memory.h"
33 #include "hw/xen/interface/grant_table.h"
35 #define TYPE_XEN_GNTTAB "xen-gnttab"
36 OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState
, XEN_GNTTAB
)
38 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
40 static struct gnttab_backend_ops emu_gnttab_backend_ops
;
42 struct XenGnttabState
{
54 /* Theoretically, v2 support could be added here. */
57 MemoryRegion gnt_frames
;
58 MemoryRegion
*gnt_aliases
;
59 uint64_t *gnt_frame_gpas
;
64 struct XenGnttabState
*xen_gnttab_singleton
;
66 static void xen_gnttab_realize(DeviceState
*dev
, Error
**errp
)
68 XenGnttabState
*s
= XEN_GNTTAB(dev
);
71 if (xen_mode
!= XEN_EMULATE
) {
72 error_setg(errp
, "Xen grant table support is for Xen emulation");
75 s
->max_frames
= kvm_xen_get_gnttab_max_frames();
76 memory_region_init_ram(&s
->gnt_frames
, OBJECT(dev
), "xen:grant_table",
77 XEN_PAGE_SIZE
* s
->max_frames
, &error_abort
);
78 memory_region_set_enabled(&s
->gnt_frames
, true);
79 s
->entries
.v1
= memory_region_get_ram_ptr(&s
->gnt_frames
);
81 /* Create individual page-sizes aliases for overlays */
82 s
->gnt_aliases
= (void *)g_new0(MemoryRegion
, s
->max_frames
);
83 s
->gnt_frame_gpas
= (void *)g_new(uint64_t, s
->max_frames
);
84 for (i
= 0; i
< s
->max_frames
; i
++) {
85 memory_region_init_alias(&s
->gnt_aliases
[i
], OBJECT(dev
),
87 i
* XEN_PAGE_SIZE
, XEN_PAGE_SIZE
);
88 s
->gnt_frame_gpas
[i
] = INVALID_GPA
;
92 memset(s
->entries
.v1
, 0, XEN_PAGE_SIZE
* s
->max_frames
);
93 s
->entries
.v1
[GNTTAB_RESERVED_XENSTORE
].flags
= GTF_permit_access
;
94 s
->entries
.v1
[GNTTAB_RESERVED_XENSTORE
].frame
= XEN_SPECIAL_PFN(XENSTORE
);
96 qemu_mutex_init(&s
->gnt_lock
);
98 xen_gnttab_singleton
= s
;
100 s
->map_track
= g_new0(uint8_t, s
->max_frames
* ENTRIES_PER_FRAME_V1
);
102 xen_gnttab_ops
= &emu_gnttab_backend_ops
;
105 static int xen_gnttab_post_load(void *opaque
, int version_id
)
107 XenGnttabState
*s
= XEN_GNTTAB(opaque
);
110 for (i
= 0; i
< s
->nr_frames
; i
++) {
111 if (s
->gnt_frame_gpas
[i
] != INVALID_GPA
) {
112 xen_overlay_do_map_page(&s
->gnt_aliases
[i
], s
->gnt_frame_gpas
[i
]);
118 static bool xen_gnttab_is_needed(void *opaque
)
120 return xen_mode
== XEN_EMULATE
;
123 static const VMStateDescription xen_gnttab_vmstate
= {
124 .name
= "xen_gnttab",
126 .minimum_version_id
= 1,
127 .needed
= xen_gnttab_is_needed
,
128 .post_load
= xen_gnttab_post_load
,
129 .fields
= (VMStateField
[]) {
130 VMSTATE_UINT32(nr_frames
, XenGnttabState
),
131 VMSTATE_VARRAY_UINT32(gnt_frame_gpas
, XenGnttabState
, nr_frames
, 0,
132 vmstate_info_uint64
, uint64_t),
133 VMSTATE_END_OF_LIST()
137 static void xen_gnttab_class_init(ObjectClass
*klass
, void *data
)
139 DeviceClass
*dc
= DEVICE_CLASS(klass
);
141 dc
->realize
= xen_gnttab_realize
;
142 dc
->vmsd
= &xen_gnttab_vmstate
;
145 static const TypeInfo xen_gnttab_info
= {
146 .name
= TYPE_XEN_GNTTAB
,
147 .parent
= TYPE_SYS_BUS_DEVICE
,
148 .instance_size
= sizeof(XenGnttabState
),
149 .class_init
= xen_gnttab_class_init
,
152 void xen_gnttab_create(void)
154 xen_gnttab_singleton
= XEN_GNTTAB(sysbus_create_simple(TYPE_XEN_GNTTAB
,
158 static void xen_gnttab_register_types(void)
160 type_register_static(&xen_gnttab_info
);
163 type_init(xen_gnttab_register_types
)
165 int xen_gnttab_map_page(uint64_t idx
, uint64_t gfn
)
167 XenGnttabState
*s
= xen_gnttab_singleton
;
168 uint64_t gpa
= gfn
<< XEN_PAGE_SHIFT
;
174 if (idx
>= s
->max_frames
) {
178 QEMU_IOTHREAD_LOCK_GUARD();
179 QEMU_LOCK_GUARD(&s
->gnt_lock
);
181 xen_overlay_do_map_page(&s
->gnt_aliases
[idx
], gpa
);
183 s
->gnt_frame_gpas
[idx
] = gpa
;
185 if (s
->nr_frames
<= idx
) {
186 s
->nr_frames
= idx
+ 1;
192 int xen_gnttab_set_version_op(struct gnttab_set_version
*set
)
196 switch (set
->version
) {
202 /* Behave as before set_version was introduced. */
214 int xen_gnttab_get_version_op(struct gnttab_get_version
*get
)
216 if (get
->dom
!= DOMID_SELF
&& get
->dom
!= xen_domid
) {
224 int xen_gnttab_query_size_op(struct gnttab_query_size
*size
)
226 XenGnttabState
*s
= xen_gnttab_singleton
;
232 if (size
->dom
!= DOMID_SELF
&& size
->dom
!= xen_domid
) {
233 size
->status
= GNTST_bad_domain
;
237 size
->status
= GNTST_okay
;
238 size
->nr_frames
= s
->nr_frames
;
239 size
->max_nr_frames
= s
->max_frames
;
243 /* Track per-open refs, to allow close() to clean up. */
245 MemoryRegionSection mrs
;
251 static void gnt_unref(XenGnttabState
*s
, grant_ref_t ref
,
252 MemoryRegionSection
*mrs
, int prot
)
254 if (mrs
&& mrs
->mr
) {
255 if (prot
& PROT_WRITE
) {
256 memory_region_set_dirty(mrs
->mr
, mrs
->offset_within_region
,
259 memory_region_unref(mrs
->mr
);
262 assert(s
->map_track
[ref
] != 0);
264 if (--s
->map_track
[ref
] == 0) {
265 grant_entry_v1_t
*gnt_p
= &s
->entries
.v1
[ref
];
266 qatomic_and(&gnt_p
->flags
, (uint16_t)~(GTF_reading
| GTF_writing
));
270 static uint64_t gnt_ref(XenGnttabState
*s
, grant_ref_t ref
, int prot
)
272 uint16_t mask
= GTF_type_mask
| GTF_sub_page
;
273 grant_entry_v1_t gnt
, *gnt_p
;
276 if (ref
>= s
->max_frames
* ENTRIES_PER_FRAME_V1
||
277 s
->map_track
[ref
] == UINT8_MAX
) {
281 if (prot
& PROT_WRITE
) {
282 mask
|= GTF_readonly
;
285 gnt_p
= &s
->entries
.v1
[ref
];
288 * The guest can legitimately be changing the GTF_readonly flag. Allow
289 * that, but don't let a malicious guest cause a livelock.
291 for (retries
= 0; retries
< 5; retries
++) {
294 /* Read the entry before an atomic operation on its flags */
295 gnt
= *(volatile grant_entry_v1_t
*)gnt_p
;
297 if ((gnt
.flags
& mask
) != GTF_permit_access
||
298 gnt
.domid
!= DOMID_QEMU
) {
302 new_flags
= gnt
.flags
| GTF_reading
;
303 if (prot
& PROT_WRITE
) {
304 new_flags
|= GTF_writing
;
307 if (qatomic_cmpxchg(&gnt_p
->flags
, gnt
.flags
, new_flags
) == gnt
.flags
) {
308 return (uint64_t)gnt
.frame
<< XEN_PAGE_SHIFT
;
315 struct xengntdev_handle
{
316 GHashTable
*active_maps
;
319 static int xen_be_gnttab_set_max_grants(struct xengntdev_handle
*xgt
,
325 static void *xen_be_gnttab_map_refs(struct xengntdev_handle
*xgt
,
326 uint32_t count
, uint32_t domid
,
327 uint32_t *refs
, int prot
)
329 XenGnttabState
*s
= xen_gnttab_singleton
;
330 struct active_ref
*act
;
337 if (domid
!= xen_domid
) {
342 if (!count
|| count
> 4096) {
348 * Making a contiguous mapping from potentially discontiguous grant
349 * references would be... distinctly non-trivial. We don't support it.
350 * Even changing the API to return an array of pointers, one per page,
351 * wouldn't be simple to use in PV backends because some structures
352 * actually cross page boundaries (e.g. 32-bit blkif_response ring
353 * entries are 12 bytes).
360 QEMU_LOCK_GUARD(&s
->gnt_lock
);
362 act
= g_hash_table_lookup(xgt
->active_maps
, GINT_TO_POINTER(refs
[0]));
364 if ((prot
& PROT_WRITE
) && !(act
->prot
& PROT_WRITE
)) {
365 if (gnt_ref(s
, refs
[0], prot
) == INVALID_GPA
) {
368 act
->prot
|= PROT_WRITE
;
372 uint64_t gpa
= gnt_ref(s
, refs
[0], prot
);
373 if (gpa
== INVALID_GPA
) {
378 act
= g_new0(struct active_ref
, 1);
381 act
->mrs
= memory_region_find(get_system_memory(), gpa
, XEN_PAGE_SIZE
);
384 !int128_lt(act
->mrs
.size
, int128_make64(XEN_PAGE_SIZE
)) &&
385 memory_region_get_ram_addr(act
->mrs
.mr
) != RAM_ADDR_INVALID
) {
386 act
->virtaddr
= qemu_map_ram_ptr(act
->mrs
.mr
->ram_block
,
387 act
->mrs
.offset_within_region
);
389 if (!act
->virtaddr
) {
390 gnt_unref(s
, refs
[0], &act
->mrs
, 0);
396 s
->map_track
[refs
[0]]++;
397 g_hash_table_insert(xgt
->active_maps
, GINT_TO_POINTER(refs
[0]), act
);
400 return act
->virtaddr
;
403 static gboolean
do_unmap(gpointer key
, gpointer value
, gpointer user_data
)
405 XenGnttabState
*s
= user_data
;
406 grant_ref_t gref
= GPOINTER_TO_INT(key
);
407 struct active_ref
*act
= value
;
409 gnt_unref(s
, gref
, &act
->mrs
, act
->prot
);
414 static int xen_be_gnttab_unmap(struct xengntdev_handle
*xgt
,
415 void *start_address
, uint32_t *refs
,
418 XenGnttabState
*s
= xen_gnttab_singleton
;
419 struct active_ref
*act
;
429 QEMU_LOCK_GUARD(&s
->gnt_lock
);
431 act
= g_hash_table_lookup(xgt
->active_maps
, GINT_TO_POINTER(refs
[0]));
436 if (act
->virtaddr
!= start_address
) {
440 if (!--act
->refcnt
) {
441 do_unmap(GINT_TO_POINTER(refs
[0]), act
, s
);
442 g_hash_table_remove(xgt
->active_maps
, GINT_TO_POINTER(refs
[0]));
449 * This looks a bit like the one for true Xen in xen-operations.c but
450 * in emulation we don't support multi-page mappings. And under Xen we
451 * *want* the multi-page mappings so we have fewer bounces through the
452 * kernel and the hypervisor. So the code paths end up being similar,
455 static int xen_be_gnttab_copy(struct xengntdev_handle
*xgt
, bool to_domain
,
456 uint32_t domid
, XenGrantCopySegment
*segs
,
457 uint32_t nr_segs
, Error
**errp
)
459 int prot
= to_domain
? PROT_WRITE
: PROT_READ
;
462 for (i
= 0; i
< nr_segs
; i
++) {
463 XenGrantCopySegment
*seg
= &segs
[i
];
465 uint32_t ref
= to_domain
? seg
->dest
.foreign
.ref
:
466 seg
->source
.foreign
.ref
;
468 page
= xen_be_gnttab_map_refs(xgt
, 1, domid
, &ref
, prot
);
471 error_setg_errno(errp
, errno
,
472 "xen_be_gnttab_map_refs failed");
478 memcpy(page
+ seg
->dest
.foreign
.offset
, seg
->source
.virt
,
481 memcpy(seg
->dest
.virt
, page
+ seg
->source
.foreign
.offset
,
485 if (xen_be_gnttab_unmap(xgt
, page
, &ref
, 1)) {
487 error_setg_errno(errp
, errno
, "xen_be_gnttab_unmap failed");
496 static struct xengntdev_handle
*xen_be_gnttab_open(void)
498 struct xengntdev_handle
*xgt
= g_new0(struct xengntdev_handle
, 1);
500 xgt
->active_maps
= g_hash_table_new(g_direct_hash
, g_direct_equal
);
504 static int xen_be_gnttab_close(struct xengntdev_handle
*xgt
)
506 XenGnttabState
*s
= xen_gnttab_singleton
;
512 g_hash_table_foreach_remove(xgt
->active_maps
, do_unmap
, s
);
513 g_hash_table_destroy(xgt
->active_maps
);
518 static struct gnttab_backend_ops emu_gnttab_backend_ops
= {
519 .open
= xen_be_gnttab_open
,
520 .close
= xen_be_gnttab_close
,
521 .grant_copy
= xen_be_gnttab_copy
,
522 .set_max_grants
= xen_be_gnttab_set_max_grants
,
523 .map_refs
= xen_be_gnttab_map_refs
,
524 .unmap
= xen_be_gnttab_unmap
,
527 int xen_gnttab_reset(void)
529 XenGnttabState
*s
= xen_gnttab_singleton
;
535 QEMU_LOCK_GUARD(&s
->gnt_lock
);
539 memset(s
->entries
.v1
, 0, XEN_PAGE_SIZE
* s
->max_frames
);
541 s
->entries
.v1
[GNTTAB_RESERVED_XENSTORE
].flags
= GTF_permit_access
;
542 s
->entries
.v1
[GNTTAB_RESERVED_XENSTORE
].frame
= XEN_SPECIAL_PFN(XENSTORE
);
544 memset(s
->map_track
, 0, s
->max_frames
* ENTRIES_PER_FRAME_V1
);