]>
Commit | Line | Data |
---|---|---|
a28b0fc0 DW |
1 | /* |
2 | * QEMU Xen emulation: Grant table support | |
3 | * | |
4 | * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. | |
5 | * | |
6 | * Authors: David Woodhouse <dwmw2@infradead.org> | |
7 | * | |
8 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
9 | * See the COPYING file in the top-level directory. | |
10 | */ | |
11 | ||
12 | #include "qemu/osdep.h" | |
13 | #include "qemu/host-utils.h" | |
14 | #include "qemu/module.h" | |
15 | #include "qemu/lockable.h" | |
16 | #include "qemu/main-loop.h" | |
17 | #include "qapi/error.h" | |
18 | #include "qom/object.h" | |
19 | #include "exec/target_page.h" | |
20 | #include "exec/address-spaces.h" | |
21 | #include "migration/vmstate.h" | |
22 | ||
23 | #include "hw/sysbus.h" | |
24 | #include "hw/xen/xen.h" | |
b08d88e3 | 25 | #include "hw/xen/xen_backend_ops.h" |
a28b0fc0 DW |
26 | #include "xen_overlay.h" |
27 | #include "xen_gnttab.h" | |
28 | ||
29 | #include "sysemu/kvm.h" | |
30 | #include "sysemu/kvm_xen.h" | |
31 | ||
32 | #include "hw/xen/interface/memory.h" | |
33 | #include "hw/xen/interface/grant_table.h" | |
34 | ||
35 | #define TYPE_XEN_GNTTAB "xen-gnttab" | |
36 | OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB) | |
37 | ||
e33cb789 DW |
38 | #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) |
39 | ||
b08d88e3 DW |
40 | static struct gnttab_backend_ops emu_gnttab_backend_ops; |
41 | ||
a28b0fc0 DW |
42 | struct XenGnttabState { |
43 | /*< private >*/ | |
44 | SysBusDevice busdev; | |
45 | /*< public >*/ | |
46 | ||
e33cb789 DW |
47 | QemuMutex gnt_lock; |
48 | ||
a28b0fc0 DW |
49 | uint32_t nr_frames; |
50 | uint32_t max_frames; | |
e33cb789 DW |
51 | |
52 | union { | |
53 | grant_entry_v1_t *v1; | |
54 | /* Theoretically, v2 support could be added here. */ | |
55 | } entries; | |
56 | ||
57 | MemoryRegion gnt_frames; | |
58 | MemoryRegion *gnt_aliases; | |
59 | uint64_t *gnt_frame_gpas; | |
b08d88e3 DW |
60 | |
61 | uint8_t *map_track; | |
a28b0fc0 DW |
62 | }; |
63 | ||
64 | struct XenGnttabState *xen_gnttab_singleton; | |
65 | ||
66 | static void xen_gnttab_realize(DeviceState *dev, Error **errp) | |
67 | { | |
68 | XenGnttabState *s = XEN_GNTTAB(dev); | |
e33cb789 | 69 | int i; |
a28b0fc0 DW |
70 | |
71 | if (xen_mode != XEN_EMULATE) { | |
72 | error_setg(errp, "Xen grant table support is for Xen emulation"); | |
73 | return; | |
74 | } | |
a28b0fc0 | 75 | s->max_frames = kvm_xen_get_gnttab_max_frames(); |
e33cb789 DW |
76 | memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table", |
77 | XEN_PAGE_SIZE * s->max_frames, &error_abort); | |
78 | memory_region_set_enabled(&s->gnt_frames, true); | |
79 | s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames); | |
e33cb789 DW |
80 | |
81 | /* Create individual page-sizes aliases for overlays */ | |
82 | s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames); | |
83 | s->gnt_frame_gpas = (void *)g_new(uint64_t, s->max_frames); | |
84 | for (i = 0; i < s->max_frames; i++) { | |
85 | memory_region_init_alias(&s->gnt_aliases[i], OBJECT(dev), | |
86 | NULL, &s->gnt_frames, | |
87 | i * XEN_PAGE_SIZE, XEN_PAGE_SIZE); | |
88 | s->gnt_frame_gpas[i] = INVALID_GPA; | |
89 | } | |
90 | ||
de26b261 DW |
91 | s->nr_frames = 0; |
92 | memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); | |
b08d88e3 DW |
93 | s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; |
94 | s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); | |
de26b261 | 95 | |
e33cb789 DW |
96 | qemu_mutex_init(&s->gnt_lock); |
97 | ||
98 | xen_gnttab_singleton = s; | |
b08d88e3 DW |
99 | |
100 | s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1); | |
101 | ||
102 | xen_gnttab_ops = &emu_gnttab_backend_ops; | |
e33cb789 DW |
103 | } |
104 | ||
105 | static int xen_gnttab_post_load(void *opaque, int version_id) | |
106 | { | |
107 | XenGnttabState *s = XEN_GNTTAB(opaque); | |
108 | uint32_t i; | |
109 | ||
110 | for (i = 0; i < s->nr_frames; i++) { | |
111 | if (s->gnt_frame_gpas[i] != INVALID_GPA) { | |
112 | xen_overlay_do_map_page(&s->gnt_aliases[i], s->gnt_frame_gpas[i]); | |
113 | } | |
114 | } | |
115 | return 0; | |
a28b0fc0 DW |
116 | } |
117 | ||
118 | static bool xen_gnttab_is_needed(void *opaque) | |
119 | { | |
120 | return xen_mode == XEN_EMULATE; | |
121 | } | |
122 | ||
123 | static const VMStateDescription xen_gnttab_vmstate = { | |
124 | .name = "xen_gnttab", | |
125 | .version_id = 1, | |
126 | .minimum_version_id = 1, | |
127 | .needed = xen_gnttab_is_needed, | |
e33cb789 | 128 | .post_load = xen_gnttab_post_load, |
a28b0fc0 DW |
129 | .fields = (VMStateField[]) { |
130 | VMSTATE_UINT32(nr_frames, XenGnttabState), | |
e33cb789 DW |
131 | VMSTATE_VARRAY_UINT32(gnt_frame_gpas, XenGnttabState, nr_frames, 0, |
132 | vmstate_info_uint64, uint64_t), | |
a28b0fc0 DW |
133 | VMSTATE_END_OF_LIST() |
134 | } | |
135 | }; | |
136 | ||
137 | static void xen_gnttab_class_init(ObjectClass *klass, void *data) | |
138 | { | |
139 | DeviceClass *dc = DEVICE_CLASS(klass); | |
140 | ||
141 | dc->realize = xen_gnttab_realize; | |
142 | dc->vmsd = &xen_gnttab_vmstate; | |
143 | } | |
144 | ||
145 | static const TypeInfo xen_gnttab_info = { | |
146 | .name = TYPE_XEN_GNTTAB, | |
147 | .parent = TYPE_SYS_BUS_DEVICE, | |
148 | .instance_size = sizeof(XenGnttabState), | |
149 | .class_init = xen_gnttab_class_init, | |
150 | }; | |
151 | ||
152 | void xen_gnttab_create(void) | |
153 | { | |
154 | xen_gnttab_singleton = XEN_GNTTAB(sysbus_create_simple(TYPE_XEN_GNTTAB, | |
155 | -1, NULL)); | |
156 | } | |
157 | ||
158 | static void xen_gnttab_register_types(void) | |
159 | { | |
160 | type_register_static(&xen_gnttab_info); | |
161 | } | |
162 | ||
163 | type_init(xen_gnttab_register_types) | |
164 | ||
165 | int xen_gnttab_map_page(uint64_t idx, uint64_t gfn) | |
166 | { | |
e33cb789 DW |
167 | XenGnttabState *s = xen_gnttab_singleton; |
168 | uint64_t gpa = gfn << XEN_PAGE_SHIFT; | |
169 | ||
170 | if (!s) { | |
171 | return -ENOTSUP; | |
172 | } | |
173 | ||
174 | if (idx >= s->max_frames) { | |
175 | return -EINVAL; | |
176 | } | |
177 | ||
178 | QEMU_IOTHREAD_LOCK_GUARD(); | |
179 | QEMU_LOCK_GUARD(&s->gnt_lock); | |
180 | ||
181 | xen_overlay_do_map_page(&s->gnt_aliases[idx], gpa); | |
182 | ||
183 | s->gnt_frame_gpas[idx] = gpa; | |
184 | ||
185 | if (s->nr_frames <= idx) { | |
186 | s->nr_frames = idx + 1; | |
187 | } | |
188 | ||
189 | return 0; | |
a28b0fc0 DW |
190 | } |
191 | ||
28b7ae94 DW |
192 | int xen_gnttab_set_version_op(struct gnttab_set_version *set) |
193 | { | |
194 | int ret; | |
195 | ||
196 | switch (set->version) { | |
197 | case 1: | |
198 | ret = 0; | |
199 | break; | |
200 | ||
201 | case 2: | |
202 | /* Behave as before set_version was introduced. */ | |
203 | ret = -ENOSYS; | |
204 | break; | |
205 | ||
206 | default: | |
207 | ret = -EINVAL; | |
208 | } | |
209 | ||
210 | set->version = 1; | |
211 | return ret; | |
212 | } | |
213 | ||
214 | int xen_gnttab_get_version_op(struct gnttab_get_version *get) | |
215 | { | |
216 | if (get->dom != DOMID_SELF && get->dom != xen_domid) { | |
217 | return -ESRCH; | |
218 | } | |
219 | ||
220 | get->version = 1; | |
221 | return 0; | |
222 | } | |
b46f9745 DW |
223 | |
224 | int xen_gnttab_query_size_op(struct gnttab_query_size *size) | |
225 | { | |
226 | XenGnttabState *s = xen_gnttab_singleton; | |
227 | ||
228 | if (!s) { | |
229 | return -ENOTSUP; | |
230 | } | |
231 | ||
232 | if (size->dom != DOMID_SELF && size->dom != xen_domid) { | |
233 | size->status = GNTST_bad_domain; | |
234 | return 0; | |
235 | } | |
236 | ||
237 | size->status = GNTST_okay; | |
238 | size->nr_frames = s->nr_frames; | |
239 | size->max_nr_frames = s->max_frames; | |
240 | return 0; | |
241 | } | |
b08d88e3 DW |
242 | |
243 | /* Track per-open refs, to allow close() to clean up. */ | |
244 | struct active_ref { | |
245 | MemoryRegionSection mrs; | |
246 | void *virtaddr; | |
247 | uint32_t refcnt; | |
248 | int prot; | |
249 | }; | |
250 | ||
251 | static void gnt_unref(XenGnttabState *s, grant_ref_t ref, | |
252 | MemoryRegionSection *mrs, int prot) | |
253 | { | |
254 | if (mrs && mrs->mr) { | |
255 | if (prot & PROT_WRITE) { | |
256 | memory_region_set_dirty(mrs->mr, mrs->offset_within_region, | |
257 | XEN_PAGE_SIZE); | |
258 | } | |
259 | memory_region_unref(mrs->mr); | |
260 | mrs->mr = NULL; | |
261 | } | |
262 | assert(s->map_track[ref] != 0); | |
263 | ||
264 | if (--s->map_track[ref] == 0) { | |
265 | grant_entry_v1_t *gnt_p = &s->entries.v1[ref]; | |
266 | qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing)); | |
267 | } | |
268 | } | |
269 | ||
270 | static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot) | |
271 | { | |
272 | uint16_t mask = GTF_type_mask | GTF_sub_page; | |
273 | grant_entry_v1_t gnt, *gnt_p; | |
274 | int retries = 0; | |
275 | ||
276 | if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 || | |
277 | s->map_track[ref] == UINT8_MAX) { | |
278 | return INVALID_GPA; | |
279 | } | |
280 | ||
281 | if (prot & PROT_WRITE) { | |
282 | mask |= GTF_readonly; | |
283 | } | |
284 | ||
285 | gnt_p = &s->entries.v1[ref]; | |
286 | ||
287 | /* | |
288 | * The guest can legitimately be changing the GTF_readonly flag. Allow | |
289 | * that, but don't let a malicious guest cause a livelock. | |
290 | */ | |
291 | for (retries = 0; retries < 5; retries++) { | |
292 | uint16_t new_flags; | |
293 | ||
294 | /* Read the entry before an atomic operation on its flags */ | |
295 | gnt = *(volatile grant_entry_v1_t *)gnt_p; | |
296 | ||
297 | if ((gnt.flags & mask) != GTF_permit_access || | |
298 | gnt.domid != DOMID_QEMU) { | |
299 | return INVALID_GPA; | |
300 | } | |
301 | ||
302 | new_flags = gnt.flags | GTF_reading; | |
303 | if (prot & PROT_WRITE) { | |
304 | new_flags |= GTF_writing; | |
305 | } | |
306 | ||
307 | if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) { | |
308 | return (uint64_t)gnt.frame << XEN_PAGE_SHIFT; | |
309 | } | |
310 | } | |
311 | ||
312 | return INVALID_GPA; | |
313 | } | |
314 | ||
315 | struct xengntdev_handle { | |
316 | GHashTable *active_maps; | |
317 | }; | |
318 | ||
319 | static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt, | |
320 | uint32_t nr_grants) | |
321 | { | |
322 | return 0; | |
323 | } | |
324 | ||
325 | static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt, | |
326 | uint32_t count, uint32_t domid, | |
327 | uint32_t *refs, int prot) | |
328 | { | |
329 | XenGnttabState *s = xen_gnttab_singleton; | |
330 | struct active_ref *act; | |
331 | ||
332 | if (!s) { | |
333 | errno = ENOTSUP; | |
334 | return NULL; | |
335 | } | |
336 | ||
337 | if (domid != xen_domid) { | |
338 | errno = EINVAL; | |
339 | return NULL; | |
340 | } | |
341 | ||
342 | if (!count || count > 4096) { | |
343 | errno = EINVAL; | |
344 | return NULL; | |
345 | } | |
346 | ||
347 | /* | |
348 | * Making a contiguous mapping from potentially discontiguous grant | |
349 | * references would be... distinctly non-trivial. We don't support it. | |
350 | * Even changing the API to return an array of pointers, one per page, | |
351 | * wouldn't be simple to use in PV backends because some structures | |
352 | * actually cross page boundaries (e.g. 32-bit blkif_response ring | |
353 | * entries are 12 bytes). | |
354 | */ | |
355 | if (count != 1) { | |
356 | errno = EINVAL; | |
357 | return NULL; | |
358 | } | |
359 | ||
360 | QEMU_LOCK_GUARD(&s->gnt_lock); | |
361 | ||
362 | act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); | |
363 | if (act) { | |
364 | if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) { | |
365 | if (gnt_ref(s, refs[0], prot) == INVALID_GPA) { | |
366 | return NULL; | |
367 | } | |
368 | act->prot |= PROT_WRITE; | |
369 | } | |
370 | act->refcnt++; | |
371 | } else { | |
372 | uint64_t gpa = gnt_ref(s, refs[0], prot); | |
373 | if (gpa == INVALID_GPA) { | |
374 | errno = EINVAL; | |
375 | return NULL; | |
376 | } | |
377 | ||
378 | act = g_new0(struct active_ref, 1); | |
379 | act->prot = prot; | |
380 | act->refcnt = 1; | |
381 | act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE); | |
382 | ||
383 | if (act->mrs.mr && | |
384 | !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) && | |
385 | memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) { | |
386 | act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block, | |
387 | act->mrs.offset_within_region); | |
388 | } | |
389 | if (!act->virtaddr) { | |
390 | gnt_unref(s, refs[0], &act->mrs, 0); | |
391 | g_free(act); | |
392 | errno = EINVAL; | |
393 | return NULL; | |
394 | } | |
395 | ||
396 | s->map_track[refs[0]]++; | |
397 | g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act); | |
398 | } | |
399 | ||
400 | return act->virtaddr; | |
401 | } | |
402 | ||
403 | static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data) | |
404 | { | |
405 | XenGnttabState *s = user_data; | |
406 | grant_ref_t gref = GPOINTER_TO_INT(key); | |
407 | struct active_ref *act = value; | |
408 | ||
409 | gnt_unref(s, gref, &act->mrs, act->prot); | |
410 | g_free(act); | |
411 | return true; | |
412 | } | |
413 | ||
414 | static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt, | |
415 | void *start_address, uint32_t *refs, | |
416 | uint32_t count) | |
417 | { | |
418 | XenGnttabState *s = xen_gnttab_singleton; | |
419 | struct active_ref *act; | |
420 | ||
421 | if (!s) { | |
422 | return -ENOTSUP; | |
423 | } | |
424 | ||
425 | if (count != 1) { | |
426 | return -EINVAL; | |
427 | } | |
428 | ||
429 | QEMU_LOCK_GUARD(&s->gnt_lock); | |
430 | ||
431 | act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); | |
432 | if (!act) { | |
433 | return -ENOENT; | |
434 | } | |
435 | ||
436 | if (act->virtaddr != start_address) { | |
437 | return -EINVAL; | |
438 | } | |
439 | ||
440 | if (!--act->refcnt) { | |
441 | do_unmap(GINT_TO_POINTER(refs[0]), act, s); | |
442 | g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0])); | |
443 | } | |
444 | ||
445 | return 0; | |
446 | } | |
447 | ||
448 | /* | |
449 | * This looks a bit like the one for true Xen in xen-operations.c but | |
450 | * in emulation we don't support multi-page mappings. And under Xen we | |
451 | * *want* the multi-page mappings so we have fewer bounces through the | |
452 | * kernel and the hypervisor. So the code paths end up being similar, | |
453 | * but different. | |
454 | */ | |
455 | static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain, | |
456 | uint32_t domid, XenGrantCopySegment *segs, | |
457 | uint32_t nr_segs, Error **errp) | |
458 | { | |
459 | int prot = to_domain ? PROT_WRITE : PROT_READ; | |
460 | unsigned int i; | |
461 | ||
462 | for (i = 0; i < nr_segs; i++) { | |
463 | XenGrantCopySegment *seg = &segs[i]; | |
464 | void *page; | |
465 | uint32_t ref = to_domain ? seg->dest.foreign.ref : | |
466 | seg->source.foreign.ref; | |
467 | ||
468 | page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot); | |
469 | if (!page) { | |
470 | if (errp) { | |
471 | error_setg_errno(errp, errno, | |
472 | "xen_be_gnttab_map_refs failed"); | |
473 | } | |
474 | return -errno; | |
475 | } | |
476 | ||
477 | if (to_domain) { | |
478 | memcpy(page + seg->dest.foreign.offset, seg->source.virt, | |
479 | seg->len); | |
480 | } else { | |
481 | memcpy(seg->dest.virt, page + seg->source.foreign.offset, | |
482 | seg->len); | |
483 | } | |
484 | ||
485 | if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) { | |
486 | if (errp) { | |
487 | error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed"); | |
488 | } | |
489 | return -errno; | |
490 | } | |
491 | } | |
492 | ||
493 | return 0; | |
494 | } | |
495 | ||
496 | static struct xengntdev_handle *xen_be_gnttab_open(void) | |
497 | { | |
498 | struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1); | |
499 | ||
500 | xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal); | |
501 | return xgt; | |
502 | } | |
503 | ||
504 | static int xen_be_gnttab_close(struct xengntdev_handle *xgt) | |
505 | { | |
506 | XenGnttabState *s = xen_gnttab_singleton; | |
507 | ||
508 | if (!s) { | |
509 | return -ENOTSUP; | |
510 | } | |
511 | ||
512 | g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s); | |
513 | g_hash_table_destroy(xgt->active_maps); | |
514 | g_free(xgt); | |
515 | return 0; | |
516 | } | |
517 | ||
518 | static struct gnttab_backend_ops emu_gnttab_backend_ops = { | |
519 | .open = xen_be_gnttab_open, | |
520 | .close = xen_be_gnttab_close, | |
521 | .grant_copy = xen_be_gnttab_copy, | |
522 | .set_max_grants = xen_be_gnttab_set_max_grants, | |
523 | .map_refs = xen_be_gnttab_map_refs, | |
524 | .unmap = xen_be_gnttab_unmap, | |
525 | }; | |
526 | ||
de26b261 DW |
527 | int xen_gnttab_reset(void) |
528 | { | |
529 | XenGnttabState *s = xen_gnttab_singleton; | |
530 | ||
531 | if (!s) { | |
532 | return -ENOTSUP; | |
533 | } | |
534 | ||
535 | QEMU_LOCK_GUARD(&s->gnt_lock); | |
536 | ||
537 | s->nr_frames = 0; | |
538 | ||
539 | memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); | |
540 | ||
541 | s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; | |
542 | s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); | |
543 | ||
de26b261 DW |
544 | return 0; |
545 | } |