]> git.proxmox.com Git - mirror_qemu.git/blob - hw/virtio/virtio-mem.c
hw/ufs: Fix incorrect register fields
[mirror_qemu.git] / hw / virtio / virtio-mem.c
1 /*
2 * Virtio MEM device
3 *
4 * Copyright (C) 2020 Red Hat, Inc.
5 *
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2.
10 * See the COPYING file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "qemu/iov.h"
15 #include "qemu/cutils.h"
16 #include "qemu/error-report.h"
17 #include "qemu/units.h"
18 #include "sysemu/numa.h"
19 #include "sysemu/sysemu.h"
20 #include "sysemu/reset.h"
21 #include "sysemu/runstate.h"
22 #include "hw/virtio/virtio.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "hw/virtio/virtio-mem.h"
25 #include "qapi/error.h"
26 #include "qapi/visitor.h"
27 #include "exec/ram_addr.h"
28 #include "migration/misc.h"
29 #include "hw/boards.h"
30 #include "hw/qdev-properties.h"
31 #include CONFIG_DEVICES
32 #include "trace.h"
33
34 static const VMStateDescription vmstate_virtio_mem_device_early;
35
36 /*
37 * We only had legacy x86 guests that did not support
38 * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
39 */
40 #if defined(TARGET_X86_64) || defined(TARGET_I386)
41 #define VIRTIO_MEM_HAS_LEGACY_GUESTS
42 #endif
43
44 /*
45 * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
46 * bitmap small.
47 */
48 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
49
50 static uint32_t virtio_mem_default_thp_size(void)
51 {
52 uint32_t default_thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE;
53
54 #if defined(__x86_64__) || defined(__arm__) || defined(__powerpc64__)
55 default_thp_size = 2 * MiB;
56 #elif defined(__aarch64__)
57 if (qemu_real_host_page_size() == 4 * KiB) {
58 default_thp_size = 2 * MiB;
59 } else if (qemu_real_host_page_size() == 16 * KiB) {
60 default_thp_size = 32 * MiB;
61 } else if (qemu_real_host_page_size() == 64 * KiB) {
62 default_thp_size = 512 * MiB;
63 }
64 #endif
65
66 return default_thp_size;
67 }
68
69 /*
70 * We want to have a reasonable default block size such that
71 * 1. We avoid splitting THPs when unplugging memory, which degrades
72 * performance.
73 * 2. We avoid placing THPs for plugged blocks that also cover unplugged
74 * blocks.
75 *
76 * The actual THP size might differ between Linux kernels, so we try to probe
77 * it. In the future (if we ever run into issues regarding 2.), we might want
78 * to disable THP in case we fail to properly probe the THP size, or if the
79 * block size is configured smaller than the THP size.
80 */
81 static uint32_t thp_size;
82
83 #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
84 static uint32_t virtio_mem_thp_size(void)
85 {
86 gchar *content = NULL;
87 const char *endptr;
88 uint64_t tmp;
89
90 if (thp_size) {
91 return thp_size;
92 }
93
94 /*
95 * Try to probe the actual THP size, fallback to (sane but eventually
96 * incorrect) default sizes.
97 */
98 if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
99 !qemu_strtou64(content, &endptr, 0, &tmp) &&
100 (!endptr || *endptr == '\n')) {
101 /* Sanity-check the value and fallback to something reasonable. */
102 if (!tmp || !is_power_of_2(tmp)) {
103 warn_report("Read unsupported THP size: %" PRIx64, tmp);
104 } else {
105 thp_size = tmp;
106 }
107 }
108
109 if (!thp_size) {
110 thp_size = virtio_mem_default_thp_size();
111 warn_report("Could not detect THP size, falling back to %" PRIx64
112 " MiB.", thp_size / MiB);
113 }
114
115 g_free(content);
116 return thp_size;
117 }
118
119 static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
120 {
121 const uint64_t page_size = qemu_ram_pagesize(rb);
122
123 /* We can have hugetlbfs with a page size smaller than the THP size. */
124 if (page_size == qemu_real_host_page_size()) {
125 return MAX(page_size, virtio_mem_thp_size());
126 }
127 return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
128 }
129
130 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
131 static bool virtio_mem_has_shared_zeropage(RAMBlock *rb)
132 {
133 /*
134 * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE
135 * anonymous RAM. In any other case, reading unplugged *can* populate a
136 * fresh page, consuming actual memory.
137 */
138 return !qemu_ram_is_shared(rb) && qemu_ram_get_fd(rb) < 0 &&
139 qemu_ram_pagesize(rb) == qemu_real_host_page_size();
140 }
141 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
142
143 /*
144 * Size the usable region bigger than the requested size if possible. Esp.
145 * Linux guests will only add (aligned) memory blocks in case they fully
146 * fit into the usable region, but plug+online only a subset of the pages.
147 * The memory block size corresponds mostly to the section size.
148 *
149 * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and
150 * a section size of 512MB on arm64 (as long as the start address is properly
151 * aligned, similar to ordinary DIMMs).
152 *
153 * We can change this at any time and maybe even make it configurable if
154 * necessary (as the section size can change). But it's more likely that the
155 * section size will rather get smaller and not bigger over time.
156 */
157 #if defined(TARGET_X86_64) || defined(TARGET_I386)
158 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
159 #elif defined(TARGET_ARM)
160 #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB))
161 #else
162 #error VIRTIO_MEM_USABLE_EXTENT not defined
163 #endif
164
165 static bool virtio_mem_is_busy(void)
166 {
167 /*
168 * Postcopy cannot handle concurrent discards and we don't want to migrate
169 * pages on-demand with stale content when plugging new blocks.
170 *
171 * For precopy, we don't want unplugged blocks in our migration stream, and
172 * when plugging new blocks, the page content might differ between source
173 * and destination (observable by the guest when not initializing pages
174 * after plugging them) until we're running on the destination (as we didn't
175 * migrate these blocks when they were unplugged).
176 */
177 return migration_in_incoming_postcopy() || !migration_is_idle();
178 }
179
180 typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
181 uint64_t offset, uint64_t size);
182
183 static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
184 virtio_mem_range_cb cb)
185 {
186 unsigned long first_zero_bit, last_zero_bit;
187 uint64_t offset, size;
188 int ret = 0;
189
190 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
191 while (first_zero_bit < vmem->bitmap_size) {
192 offset = first_zero_bit * vmem->block_size;
193 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
194 first_zero_bit + 1) - 1;
195 size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
196
197 ret = cb(vmem, arg, offset, size);
198 if (ret) {
199 break;
200 }
201 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
202 last_zero_bit + 2);
203 }
204 return ret;
205 }
206
207 static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void *arg,
208 virtio_mem_range_cb cb)
209 {
210 unsigned long first_bit, last_bit;
211 uint64_t offset, size;
212 int ret = 0;
213
214 first_bit = find_first_bit(vmem->bitmap, vmem->bitmap_size);
215 while (first_bit < vmem->bitmap_size) {
216 offset = first_bit * vmem->block_size;
217 last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
218 first_bit + 1) - 1;
219 size = (last_bit - first_bit + 1) * vmem->block_size;
220
221 ret = cb(vmem, arg, offset, size);
222 if (ret) {
223 break;
224 }
225 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
226 last_bit + 2);
227 }
228 return ret;
229 }
230
231 /*
232 * Adjust the memory section to cover the intersection with the given range.
233 *
234 * Returns false if the intersection is empty, otherwise returns true.
235 */
236 static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s,
237 uint64_t offset, uint64_t size)
238 {
239 uint64_t start = MAX(s->offset_within_region, offset);
240 uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
241 offset + size);
242
243 if (end <= start) {
244 return false;
245 }
246
247 s->offset_within_address_space += start - s->offset_within_region;
248 s->offset_within_region = start;
249 s->size = int128_make64(end - start);
250 return true;
251 }
252
253 typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
254
255 static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
256 MemoryRegionSection *s,
257 void *arg,
258 virtio_mem_section_cb cb)
259 {
260 unsigned long first_bit, last_bit;
261 uint64_t offset, size;
262 int ret = 0;
263
264 first_bit = s->offset_within_region / vmem->block_size;
265 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
266 while (first_bit < vmem->bitmap_size) {
267 MemoryRegionSection tmp = *s;
268
269 offset = first_bit * vmem->block_size;
270 last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
271 first_bit + 1) - 1;
272 size = (last_bit - first_bit + 1) * vmem->block_size;
273
274 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
275 break;
276 }
277 ret = cb(&tmp, arg);
278 if (ret) {
279 break;
280 }
281 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
282 last_bit + 2);
283 }
284 return ret;
285 }
286
287 static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
288 MemoryRegionSection *s,
289 void *arg,
290 virtio_mem_section_cb cb)
291 {
292 unsigned long first_bit, last_bit;
293 uint64_t offset, size;
294 int ret = 0;
295
296 first_bit = s->offset_within_region / vmem->block_size;
297 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
298 while (first_bit < vmem->bitmap_size) {
299 MemoryRegionSection tmp = *s;
300
301 offset = first_bit * vmem->block_size;
302 last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
303 first_bit + 1) - 1;
304 size = (last_bit - first_bit + 1) * vmem->block_size;
305
306 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
307 break;
308 }
309 ret = cb(&tmp, arg);
310 if (ret) {
311 break;
312 }
313 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
314 last_bit + 2);
315 }
316 return ret;
317 }
318
319 static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
320 {
321 RamDiscardListener *rdl = arg;
322
323 return rdl->notify_populate(rdl, s);
324 }
325
326 static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
327 {
328 RamDiscardListener *rdl = arg;
329
330 rdl->notify_discard(rdl, s);
331 return 0;
332 }
333
334 static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
335 uint64_t size)
336 {
337 RamDiscardListener *rdl;
338
339 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
340 MemoryRegionSection tmp = *rdl->section;
341
342 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
343 continue;
344 }
345 rdl->notify_discard(rdl, &tmp);
346 }
347 }
348
349 static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
350 uint64_t size)
351 {
352 RamDiscardListener *rdl, *rdl2;
353 int ret = 0;
354
355 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
356 MemoryRegionSection tmp = *rdl->section;
357
358 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
359 continue;
360 }
361 ret = rdl->notify_populate(rdl, &tmp);
362 if (ret) {
363 break;
364 }
365 }
366
367 if (ret) {
368 /* Notify all already-notified listeners. */
369 QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
370 MemoryRegionSection tmp = *rdl2->section;
371
372 if (rdl2 == rdl) {
373 break;
374 }
375 if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
376 continue;
377 }
378 rdl2->notify_discard(rdl2, &tmp);
379 }
380 }
381 return ret;
382 }
383
384 static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
385 {
386 RamDiscardListener *rdl;
387
388 if (!vmem->size) {
389 return;
390 }
391
392 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
393 if (rdl->double_discard_supported) {
394 rdl->notify_discard(rdl, rdl->section);
395 } else {
396 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
397 virtio_mem_notify_discard_cb);
398 }
399 }
400 }
401
402 static bool virtio_mem_is_range_plugged(const VirtIOMEM *vmem,
403 uint64_t start_gpa, uint64_t size)
404 {
405 const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
406 const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1;
407 unsigned long found_bit;
408
409 /* We fake a shorter bitmap to avoid searching too far. */
410 found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit);
411 return found_bit > last_bit;
412 }
413
414 static bool virtio_mem_is_range_unplugged(const VirtIOMEM *vmem,
415 uint64_t start_gpa, uint64_t size)
416 {
417 const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
418 const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1;
419 unsigned long found_bit;
420
421 /* We fake a shorter bitmap to avoid searching too far. */
422 found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit);
423 return found_bit > last_bit;
424 }
425
426 static void virtio_mem_set_range_plugged(VirtIOMEM *vmem, uint64_t start_gpa,
427 uint64_t size)
428 {
429 const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size;
430 const unsigned long nbits = size / vmem->block_size;
431
432 bitmap_set(vmem->bitmap, bit, nbits);
433 }
434
435 static void virtio_mem_set_range_unplugged(VirtIOMEM *vmem, uint64_t start_gpa,
436 uint64_t size)
437 {
438 const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size;
439 const unsigned long nbits = size / vmem->block_size;
440
441 bitmap_clear(vmem->bitmap, bit, nbits);
442 }
443
444 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem,
445 struct virtio_mem_resp *resp)
446 {
447 VirtIODevice *vdev = VIRTIO_DEVICE(vmem);
448 VirtQueue *vq = vmem->vq;
449
450 trace_virtio_mem_send_response(le16_to_cpu(resp->type));
451 iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp));
452
453 virtqueue_push(vq, elem, sizeof(*resp));
454 virtio_notify(vdev, vq);
455 }
456
457 static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
458 VirtQueueElement *elem,
459 uint16_t type)
460 {
461 struct virtio_mem_resp resp = {
462 .type = cpu_to_le16(type),
463 };
464
465 virtio_mem_send_response(vmem, elem, &resp);
466 }
467
468 static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
469 uint64_t size)
470 {
471 if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
472 return false;
473 }
474 if (gpa + size < gpa || !size) {
475 return false;
476 }
477 if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) {
478 return false;
479 }
480 if (gpa + size > vmem->addr + vmem->usable_region_size) {
481 return false;
482 }
483 return true;
484 }
485
486 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
487 uint64_t size, bool plug)
488 {
489 const uint64_t offset = start_gpa - vmem->addr;
490 RAMBlock *rb = vmem->memdev->mr.ram_block;
491 int ret = 0;
492
493 if (virtio_mem_is_busy()) {
494 return -EBUSY;
495 }
496
497 if (!plug) {
498 if (ram_block_discard_range(rb, offset, size)) {
499 return -EBUSY;
500 }
501 virtio_mem_notify_unplug(vmem, offset, size);
502 virtio_mem_set_range_unplugged(vmem, start_gpa, size);
503 return 0;
504 }
505
506 if (vmem->prealloc) {
507 void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
508 int fd = memory_region_get_fd(&vmem->memdev->mr);
509 Error *local_err = NULL;
510
511 qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err);
512 if (local_err) {
513 static bool warned;
514
515 /*
516 * Warn only once, we don't want to fill the log with these
517 * warnings.
518 */
519 if (!warned) {
520 warn_report_err(local_err);
521 warned = true;
522 } else {
523 error_free(local_err);
524 }
525 ret = -EBUSY;
526 }
527 }
528
529 if (!ret) {
530 ret = virtio_mem_notify_plug(vmem, offset, size);
531 }
532 if (ret) {
533 /* Could be preallocation or a notifier populated memory. */
534 ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
535 return -EBUSY;
536 }
537
538 virtio_mem_set_range_plugged(vmem, start_gpa, size);
539 return 0;
540 }
541
542 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa,
543 uint16_t nb_blocks, bool plug)
544 {
545 const uint64_t size = nb_blocks * vmem->block_size;
546 int ret;
547
548 if (!virtio_mem_valid_range(vmem, gpa, size)) {
549 return VIRTIO_MEM_RESP_ERROR;
550 }
551
552 if (plug && (vmem->size + size > vmem->requested_size)) {
553 return VIRTIO_MEM_RESP_NACK;
554 }
555
556 /* test if really all blocks are in the opposite state */
557 if ((plug && !virtio_mem_is_range_unplugged(vmem, gpa, size)) ||
558 (!plug && !virtio_mem_is_range_plugged(vmem, gpa, size))) {
559 return VIRTIO_MEM_RESP_ERROR;
560 }
561
562 ret = virtio_mem_set_block_state(vmem, gpa, size, plug);
563 if (ret) {
564 return VIRTIO_MEM_RESP_BUSY;
565 }
566 if (plug) {
567 vmem->size += size;
568 } else {
569 vmem->size -= size;
570 }
571 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
572 return VIRTIO_MEM_RESP_ACK;
573 }
574
575 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
576 struct virtio_mem_req *req)
577 {
578 const uint64_t gpa = le64_to_cpu(req->u.plug.addr);
579 const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks);
580 uint16_t type;
581
582 trace_virtio_mem_plug_request(gpa, nb_blocks);
583 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true);
584 virtio_mem_send_response_simple(vmem, elem, type);
585 }
586
587 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
588 struct virtio_mem_req *req)
589 {
590 const uint64_t gpa = le64_to_cpu(req->u.unplug.addr);
591 const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks);
592 uint16_t type;
593
594 trace_virtio_mem_unplug_request(gpa, nb_blocks);
595 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false);
596 virtio_mem_send_response_simple(vmem, elem, type);
597 }
598
599 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
600 uint64_t requested_size,
601 bool can_shrink)
602 {
603 uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr),
604 requested_size + VIRTIO_MEM_USABLE_EXTENT);
605
606 /* The usable region size always has to be multiples of the block size. */
607 newsize = QEMU_ALIGN_UP(newsize, vmem->block_size);
608
609 if (!requested_size) {
610 newsize = 0;
611 }
612
613 if (newsize < vmem->usable_region_size && !can_shrink) {
614 return;
615 }
616
617 trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize);
618 vmem->usable_region_size = newsize;
619 }
620
621 static int virtio_mem_unplug_all(VirtIOMEM *vmem)
622 {
623 RAMBlock *rb = vmem->memdev->mr.ram_block;
624
625 if (vmem->size) {
626 if (virtio_mem_is_busy()) {
627 return -EBUSY;
628 }
629 if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
630 return -EBUSY;
631 }
632 virtio_mem_notify_unplug_all(vmem);
633
634 bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
635 vmem->size = 0;
636 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
637 }
638
639 trace_virtio_mem_unplugged_all();
640 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
641 return 0;
642 }
643
644 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem,
645 VirtQueueElement *elem)
646 {
647 trace_virtio_mem_unplug_all_request();
648 if (virtio_mem_unplug_all(vmem)) {
649 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY);
650 } else {
651 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK);
652 }
653 }
654
655 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem,
656 struct virtio_mem_req *req)
657 {
658 const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks);
659 const uint64_t gpa = le64_to_cpu(req->u.state.addr);
660 const uint64_t size = nb_blocks * vmem->block_size;
661 struct virtio_mem_resp resp = {
662 .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK),
663 };
664
665 trace_virtio_mem_state_request(gpa, nb_blocks);
666 if (!virtio_mem_valid_range(vmem, gpa, size)) {
667 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR);
668 return;
669 }
670
671 if (virtio_mem_is_range_plugged(vmem, gpa, size)) {
672 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED);
673 } else if (virtio_mem_is_range_unplugged(vmem, gpa, size)) {
674 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED);
675 } else {
676 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED);
677 }
678 trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state));
679 virtio_mem_send_response(vmem, elem, &resp);
680 }
681
682 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq)
683 {
684 const int len = sizeof(struct virtio_mem_req);
685 VirtIOMEM *vmem = VIRTIO_MEM(vdev);
686 VirtQueueElement *elem;
687 struct virtio_mem_req req;
688 uint16_t type;
689
690 while (true) {
691 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
692 if (!elem) {
693 return;
694 }
695
696 if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) {
697 virtio_error(vdev, "virtio-mem protocol violation: invalid request"
698 " size: %d", len);
699 virtqueue_detach_element(vq, elem, 0);
700 g_free(elem);
701 return;
702 }
703
704 if (iov_size(elem->in_sg, elem->in_num) <
705 sizeof(struct virtio_mem_resp)) {
706 virtio_error(vdev, "virtio-mem protocol violation: not enough space"
707 " for response: %zu",
708 iov_size(elem->in_sg, elem->in_num));
709 virtqueue_detach_element(vq, elem, 0);
710 g_free(elem);
711 return;
712 }
713
714 type = le16_to_cpu(req.type);
715 switch (type) {
716 case VIRTIO_MEM_REQ_PLUG:
717 virtio_mem_plug_request(vmem, elem, &req);
718 break;
719 case VIRTIO_MEM_REQ_UNPLUG:
720 virtio_mem_unplug_request(vmem, elem, &req);
721 break;
722 case VIRTIO_MEM_REQ_UNPLUG_ALL:
723 virtio_mem_unplug_all_request(vmem, elem);
724 break;
725 case VIRTIO_MEM_REQ_STATE:
726 virtio_mem_state_request(vmem, elem, &req);
727 break;
728 default:
729 virtio_error(vdev, "virtio-mem protocol violation: unknown request"
730 " type: %d", type);
731 virtqueue_detach_element(vq, elem, 0);
732 g_free(elem);
733 return;
734 }
735
736 g_free(elem);
737 }
738 }
739
740 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data)
741 {
742 VirtIOMEM *vmem = VIRTIO_MEM(vdev);
743 struct virtio_mem_config *config = (void *) config_data;
744
745 config->block_size = cpu_to_le64(vmem->block_size);
746 config->node_id = cpu_to_le16(vmem->node);
747 config->requested_size = cpu_to_le64(vmem->requested_size);
748 config->plugged_size = cpu_to_le64(vmem->size);
749 config->addr = cpu_to_le64(vmem->addr);
750 config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr));
751 config->usable_region_size = cpu_to_le64(vmem->usable_region_size);
752 }
753
754 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features,
755 Error **errp)
756 {
757 MachineState *ms = MACHINE(qdev_get_machine());
758 VirtIOMEM *vmem = VIRTIO_MEM(vdev);
759
760 if (ms->numa_state) {
761 #if defined(CONFIG_ACPI)
762 virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM);
763 #endif
764 }
765 assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO);
766 if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) {
767 virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE);
768 }
769 return features;
770 }
771
772 static int virtio_mem_validate_features(VirtIODevice *vdev)
773 {
774 if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) &&
775 !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) {
776 return -EFAULT;
777 }
778 return 0;
779 }
780
781 static void virtio_mem_system_reset(void *opaque)
782 {
783 VirtIOMEM *vmem = VIRTIO_MEM(opaque);
784
785 /*
786 * During usual resets, we will unplug all memory and shrink the usable
787 * region size. This is, however, not possible in all scenarios. Then,
788 * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL).
789 */
790 virtio_mem_unplug_all(vmem);
791 }
792
793 static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
794 {
795 MachineState *ms = MACHINE(qdev_get_machine());
796 int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
797 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
798 VirtIOMEM *vmem = VIRTIO_MEM(dev);
799 uint64_t page_size;
800 RAMBlock *rb;
801 int ret;
802
803 if (!vmem->memdev) {
804 error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP);
805 return;
806 } else if (host_memory_backend_is_mapped(vmem->memdev)) {
807 error_setg(errp, "'%s' property specifies a busy memdev: %s",
808 VIRTIO_MEM_MEMDEV_PROP,
809 object_get_canonical_path_component(OBJECT(vmem->memdev)));
810 return;
811 } else if (!memory_region_is_ram(&vmem->memdev->mr) ||
812 memory_region_is_rom(&vmem->memdev->mr) ||
813 !vmem->memdev->mr.ram_block) {
814 error_setg(errp, "'%s' property specifies an unsupported memdev",
815 VIRTIO_MEM_MEMDEV_PROP);
816 return;
817 } else if (vmem->memdev->prealloc) {
818 error_setg(errp, "'%s' property specifies a memdev with preallocation"
819 " enabled: %s. Instead, specify 'prealloc=on' for the"
820 " virtio-mem device. ", VIRTIO_MEM_MEMDEV_PROP,
821 object_get_canonical_path_component(OBJECT(vmem->memdev)));
822 return;
823 }
824
825 if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) ||
826 (!nb_numa_nodes && vmem->node)) {
827 error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds"
828 "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP,
829 vmem->node, nb_numa_nodes ? nb_numa_nodes : 1);
830 return;
831 }
832
833 if (enable_mlock) {
834 error_setg(errp, "Incompatible with mlock");
835 return;
836 }
837
838 rb = vmem->memdev->mr.ram_block;
839 page_size = qemu_ram_pagesize(rb);
840
841 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
842 switch (vmem->unplugged_inaccessible) {
843 case ON_OFF_AUTO_AUTO:
844 if (virtio_mem_has_shared_zeropage(rb)) {
845 vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF;
846 } else {
847 vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
848 }
849 break;
850 case ON_OFF_AUTO_OFF:
851 if (!virtio_mem_has_shared_zeropage(rb)) {
852 warn_report("'%s' property set to 'off' with a memdev that does"
853 " not support the shared zeropage.",
854 VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
855 }
856 break;
857 default:
858 break;
859 }
860 #else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
861 vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
862 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
863
864 /*
865 * If the block size wasn't configured by the user, use a sane default. This
866 * allows using hugetlbfs backends of any page size without manual
867 * intervention.
868 */
869 if (!vmem->block_size) {
870 vmem->block_size = virtio_mem_default_block_size(rb);
871 }
872
873 if (vmem->block_size < page_size) {
874 error_setg(errp, "'%s' property has to be at least the page size (0x%"
875 PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
876 return;
877 } else if (vmem->block_size < virtio_mem_default_block_size(rb)) {
878 warn_report("'%s' property is smaller than the default block size (%"
879 PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
880 virtio_mem_default_block_size(rb) / MiB);
881 }
882 if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
883 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
884 ")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
885 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
886 return;
887 } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) {
888 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
889 ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP,
890 vmem->block_size);
891 return;
892 } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr),
893 vmem->block_size)) {
894 error_setg(errp, "'%s' property memdev size has to be multiples of"
895 "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP,
896 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
897 return;
898 }
899
900 if (ram_block_coordinated_discard_require(true)) {
901 error_setg(errp, "Discarding RAM is disabled");
902 return;
903 }
904
905 /*
906 * We don't know at this point whether shared RAM is migrated using
907 * QEMU or migrated using the file content. "x-ignore-shared" will be
908 * configured after realizing the device. So in case we have an
909 * incoming migration, simply always skip the discard step.
910 *
911 * Otherwise, make sure that we start with a clean slate: either the
912 * memory backend might get reused or the shared file might still have
913 * memory allocated.
914 */
915 if (!runstate_check(RUN_STATE_INMIGRATE)) {
916 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
917 if (ret) {
918 error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
919 ram_block_coordinated_discard_require(false);
920 return;
921 }
922 }
923
924 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
925
926 vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) /
927 vmem->block_size;
928 vmem->bitmap = bitmap_new(vmem->bitmap_size);
929
930 virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config));
931 vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
932
933 host_memory_backend_set_mapped(vmem->memdev, true);
934 vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
935 if (vmem->early_migration) {
936 vmstate_register(VMSTATE_IF(vmem), VMSTATE_INSTANCE_ID_ANY,
937 &vmstate_virtio_mem_device_early, vmem);
938 }
939 qemu_register_reset(virtio_mem_system_reset, vmem);
940
941 /*
942 * Set ourselves as RamDiscardManager before the plug handler maps the
943 * memory region and exposes it via an address space.
944 */
945 memory_region_set_ram_discard_manager(&vmem->memdev->mr,
946 RAM_DISCARD_MANAGER(vmem));
947 }
948
949 static void virtio_mem_device_unrealize(DeviceState *dev)
950 {
951 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
952 VirtIOMEM *vmem = VIRTIO_MEM(dev);
953
954 /*
955 * The unplug handler unmapped the memory region, it cannot be
956 * found via an address space anymore. Unset ourselves.
957 */
958 memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
959 qemu_unregister_reset(virtio_mem_system_reset, vmem);
960 if (vmem->early_migration) {
961 vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early,
962 vmem);
963 }
964 vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
965 host_memory_backend_set_mapped(vmem->memdev, false);
966 virtio_del_queue(vdev, 0);
967 virtio_cleanup(vdev);
968 g_free(vmem->bitmap);
969 ram_block_coordinated_discard_require(false);
970 }
971
972 static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
973 uint64_t offset, uint64_t size)
974 {
975 RAMBlock *rb = vmem->memdev->mr.ram_block;
976
977 return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
978 }
979
980 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
981 {
982 /* Make sure all memory is really discarded after migration. */
983 return virtio_mem_for_each_unplugged_range(vmem, NULL,
984 virtio_mem_discard_range_cb);
985 }
986
987 static int virtio_mem_post_load(void *opaque, int version_id)
988 {
989 VirtIOMEM *vmem = VIRTIO_MEM(opaque);
990 RamDiscardListener *rdl;
991 int ret;
992
993 /*
994 * We started out with all memory discarded and our memory region is mapped
995 * into an address space. Replay, now that we updated the bitmap.
996 */
997 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
998 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
999 virtio_mem_notify_populate_cb);
1000 if (ret) {
1001 return ret;
1002 }
1003 }
1004
1005 /*
1006 * If shared RAM is migrated using the file content and not using QEMU,
1007 * don't mess with preallocation and postcopy.
1008 */
1009 if (migrate_ram_is_ignored(vmem->memdev->mr.ram_block)) {
1010 return 0;
1011 }
1012
1013 if (vmem->prealloc && !vmem->early_migration) {
1014 warn_report("Proper preallocation with migration requires a newer QEMU machine");
1015 }
1016
1017 if (migration_in_incoming_postcopy()) {
1018 return 0;
1019 }
1020
1021 return virtio_mem_restore_unplugged(vmem);
1022 }
1023
1024 static int virtio_mem_prealloc_range_cb(const VirtIOMEM *vmem, void *arg,
1025 uint64_t offset, uint64_t size)
1026 {
1027 void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
1028 int fd = memory_region_get_fd(&vmem->memdev->mr);
1029 Error *local_err = NULL;
1030
1031 qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err);
1032 if (local_err) {
1033 error_report_err(local_err);
1034 return -ENOMEM;
1035 }
1036 return 0;
1037 }
1038
1039 static int virtio_mem_post_load_early(void *opaque, int version_id)
1040 {
1041 VirtIOMEM *vmem = VIRTIO_MEM(opaque);
1042 RAMBlock *rb = vmem->memdev->mr.ram_block;
1043 int ret;
1044
1045 if (!vmem->prealloc) {
1046 return 0;
1047 }
1048
1049 /*
1050 * If shared RAM is migrated using the file content and not using QEMU,
1051 * don't mess with preallocation and postcopy.
1052 */
1053 if (migrate_ram_is_ignored(rb)) {
1054 return 0;
1055 }
1056
1057 /*
1058 * We restored the bitmap and verified that the basic properties
1059 * match on source and destination, so we can go ahead and preallocate
1060 * memory for all plugged memory blocks, before actual RAM migration starts
1061 * touching this memory.
1062 */
1063 ret = virtio_mem_for_each_plugged_range(vmem, NULL,
1064 virtio_mem_prealloc_range_cb);
1065 if (ret) {
1066 return ret;
1067 }
1068
1069 /*
1070 * This is tricky: postcopy wants to start with a clean slate. On
1071 * POSTCOPY_INCOMING_ADVISE, postcopy code discards all (ordinarily
1072 * preallocated) RAM such that postcopy will work as expected later.
1073 *
1074 * However, we run after POSTCOPY_INCOMING_ADVISE -- but before actual
1075 * RAM migration. So let's discard all memory again. This looks like an
1076 * expensive NOP, but actually serves a purpose: we made sure that we
1077 * were able to allocate all required backend memory once. We cannot
1078 * guarantee that the backend memory we will free will remain free
1079 * until we need it during postcopy, but at least we can catch the
1080 * obvious setup issues this way.
1081 */
1082 if (migration_incoming_postcopy_advised()) {
1083 if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
1084 return -EBUSY;
1085 }
1086 }
1087 return 0;
1088 }
1089
1090 typedef struct VirtIOMEMMigSanityChecks {
1091 VirtIOMEM *parent;
1092 uint64_t addr;
1093 uint64_t region_size;
1094 uint64_t block_size;
1095 uint32_t node;
1096 } VirtIOMEMMigSanityChecks;
1097
1098 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque)
1099 {
1100 VirtIOMEMMigSanityChecks *tmp = opaque;
1101 VirtIOMEM *vmem = tmp->parent;
1102
1103 tmp->addr = vmem->addr;
1104 tmp->region_size = memory_region_size(&vmem->memdev->mr);
1105 tmp->block_size = vmem->block_size;
1106 tmp->node = vmem->node;
1107 return 0;
1108 }
1109
1110 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id)
1111 {
1112 VirtIOMEMMigSanityChecks *tmp = opaque;
1113 VirtIOMEM *vmem = tmp->parent;
1114 const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr);
1115
1116 if (tmp->addr != vmem->addr) {
1117 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
1118 VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr);
1119 return -EINVAL;
1120 }
1121 /*
1122 * Note: Preparation for resizable memory regions. The maximum size
1123 * of the memory region must not change during migration.
1124 */
1125 if (tmp->region_size != new_region_size) {
1126 error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%"
1127 PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size,
1128 new_region_size);
1129 return -EINVAL;
1130 }
1131 if (tmp->block_size != vmem->block_size) {
1132 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
1133 VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size,
1134 vmem->block_size);
1135 return -EINVAL;
1136 }
1137 if (tmp->node != vmem->node) {
1138 error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32,
1139 VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node);
1140 return -EINVAL;
1141 }
1142 return 0;
1143 }
1144
1145 static const VMStateDescription vmstate_virtio_mem_sanity_checks = {
1146 .name = "virtio-mem-device/sanity-checks",
1147 .pre_save = virtio_mem_mig_sanity_checks_pre_save,
1148 .post_load = virtio_mem_mig_sanity_checks_post_load,
1149 .fields = (VMStateField[]) {
1150 VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks),
1151 VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks),
1152 VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks),
1153 VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks),
1154 VMSTATE_END_OF_LIST(),
1155 },
1156 };
1157
1158 static bool virtio_mem_vmstate_field_exists(void *opaque, int version_id)
1159 {
1160 const VirtIOMEM *vmem = VIRTIO_MEM(opaque);
1161
1162 /* With early migration, these fields were already migrated. */
1163 return !vmem->early_migration;
1164 }
1165
1166 static const VMStateDescription vmstate_virtio_mem_device = {
1167 .name = "virtio-mem-device",
1168 .minimum_version_id = 1,
1169 .version_id = 1,
1170 .priority = MIG_PRI_VIRTIO_MEM,
1171 .post_load = virtio_mem_post_load,
1172 .fields = (VMStateField[]) {
1173 VMSTATE_WITH_TMP_TEST(VirtIOMEM, virtio_mem_vmstate_field_exists,
1174 VirtIOMEMMigSanityChecks,
1175 vmstate_virtio_mem_sanity_checks),
1176 VMSTATE_UINT64(usable_region_size, VirtIOMEM),
1177 VMSTATE_UINT64_TEST(size, VirtIOMEM, virtio_mem_vmstate_field_exists),
1178 VMSTATE_UINT64(requested_size, VirtIOMEM),
1179 VMSTATE_BITMAP_TEST(bitmap, VirtIOMEM, virtio_mem_vmstate_field_exists,
1180 0, bitmap_size),
1181 VMSTATE_END_OF_LIST()
1182 },
1183 };
1184
1185 /*
1186 * Transfer properties that are immutable while migration is active early,
1187 * such that we have have this information around before migrating any RAM
1188 * content.
1189 *
1190 * Note that virtio_mem_is_busy() makes sure these properties can no longer
1191 * change on the migration source until migration completed.
1192 *
1193 * With QEMU compat machines, we transmit these properties later, via
1194 * vmstate_virtio_mem_device instead -- see virtio_mem_vmstate_field_exists().
1195 */
1196 static const VMStateDescription vmstate_virtio_mem_device_early = {
1197 .name = "virtio-mem-device-early",
1198 .minimum_version_id = 1,
1199 .version_id = 1,
1200 .early_setup = true,
1201 .post_load = virtio_mem_post_load_early,
1202 .fields = (VMStateField[]) {
1203 VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
1204 vmstate_virtio_mem_sanity_checks),
1205 VMSTATE_UINT64(size, VirtIOMEM),
1206 VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size),
1207 VMSTATE_END_OF_LIST()
1208 },
1209 };
1210
1211 static const VMStateDescription vmstate_virtio_mem = {
1212 .name = "virtio-mem",
1213 .minimum_version_id = 1,
1214 .version_id = 1,
1215 .fields = (VMStateField[]) {
1216 VMSTATE_VIRTIO_DEVICE,
1217 VMSTATE_END_OF_LIST()
1218 },
1219 };
1220
1221 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem,
1222 VirtioMEMDeviceInfo *vi)
1223 {
1224 vi->memaddr = vmem->addr;
1225 vi->node = vmem->node;
1226 vi->requested_size = vmem->requested_size;
1227 vi->size = vmem->size;
1228 vi->max_size = memory_region_size(&vmem->memdev->mr);
1229 vi->block_size = vmem->block_size;
1230 vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev));
1231 }
1232
1233 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
1234 {
1235 if (!vmem->memdev) {
1236 error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
1237 return NULL;
1238 }
1239
1240 return &vmem->memdev->mr;
1241 }
1242
1243 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
1244 Notifier *notifier)
1245 {
1246 notifier_list_add(&vmem->size_change_notifiers, notifier);
1247 }
1248
1249 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem,
1250 Notifier *notifier)
1251 {
1252 notifier_remove(notifier);
1253 }
1254
1255 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name,
1256 void *opaque, Error **errp)
1257 {
1258 const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1259 uint64_t value = vmem->size;
1260
1261 visit_type_size(v, name, &value, errp);
1262 }
1263
1264 static void virtio_mem_get_requested_size(Object *obj, Visitor *v,
1265 const char *name, void *opaque,
1266 Error **errp)
1267 {
1268 const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1269 uint64_t value = vmem->requested_size;
1270
1271 visit_type_size(v, name, &value, errp);
1272 }
1273
1274 static void virtio_mem_set_requested_size(Object *obj, Visitor *v,
1275 const char *name, void *opaque,
1276 Error **errp)
1277 {
1278 VirtIOMEM *vmem = VIRTIO_MEM(obj);
1279 uint64_t value;
1280
1281 if (!visit_type_size(v, name, &value, errp)) {
1282 return;
1283 }
1284
1285 /*
1286 * The block size and memory backend are not fixed until the device was
1287 * realized. realize() will verify these properties then.
1288 */
1289 if (DEVICE(obj)->realized) {
1290 if (!QEMU_IS_ALIGNED(value, vmem->block_size)) {
1291 error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64
1292 ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP,
1293 vmem->block_size);
1294 return;
1295 } else if (value > memory_region_size(&vmem->memdev->mr)) {
1296 error_setg(errp, "'%s' cannot exceed the memory backend size"
1297 "(0x%" PRIx64 ")", name,
1298 memory_region_size(&vmem->memdev->mr));
1299 return;
1300 }
1301
1302 if (value != vmem->requested_size) {
1303 virtio_mem_resize_usable_region(vmem, value, false);
1304 vmem->requested_size = value;
1305 }
1306 /*
1307 * Trigger a config update so the guest gets notified. We trigger
1308 * even if the size didn't change (especially helpful for debugging).
1309 */
1310 virtio_notify_config(VIRTIO_DEVICE(vmem));
1311 } else {
1312 vmem->requested_size = value;
1313 }
1314 }
1315
1316 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
1317 void *opaque, Error **errp)
1318 {
1319 const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1320 uint64_t value = vmem->block_size;
1321
1322 /*
1323 * If not configured by the user (and we're not realized yet), use the
1324 * default block size we would use with the current memory backend.
1325 */
1326 if (!value) {
1327 if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
1328 value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
1329 } else {
1330 value = virtio_mem_thp_size();
1331 }
1332 }
1333
1334 visit_type_size(v, name, &value, errp);
1335 }
1336
1337 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
1338 void *opaque, Error **errp)
1339 {
1340 VirtIOMEM *vmem = VIRTIO_MEM(obj);
1341 uint64_t value;
1342
1343 if (DEVICE(obj)->realized) {
1344 error_setg(errp, "'%s' cannot be changed", name);
1345 return;
1346 }
1347
1348 if (!visit_type_size(v, name, &value, errp)) {
1349 return;
1350 }
1351
1352 if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) {
1353 error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name,
1354 VIRTIO_MEM_MIN_BLOCK_SIZE);
1355 return;
1356 } else if (!is_power_of_2(value)) {
1357 error_setg(errp, "'%s' property has to be a power of two", name);
1358 return;
1359 }
1360 vmem->block_size = value;
1361 }
1362
1363 static void virtio_mem_instance_init(Object *obj)
1364 {
1365 VirtIOMEM *vmem = VIRTIO_MEM(obj);
1366
1367 notifier_list_init(&vmem->size_change_notifiers);
1368 QLIST_INIT(&vmem->rdl_list);
1369
1370 object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
1371 NULL, NULL, NULL);
1372 object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size",
1373 virtio_mem_get_requested_size,
1374 virtio_mem_set_requested_size, NULL, NULL);
1375 object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size",
1376 virtio_mem_get_block_size, virtio_mem_set_block_size,
1377 NULL, NULL);
1378 }
1379
1380 static Property virtio_mem_properties[] = {
1381 DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
1382 DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
1383 DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false),
1384 DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev,
1385 TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1386 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
1387 DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM,
1388 unplugged_inaccessible, ON_OFF_AUTO_ON),
1389 #endif
1390 DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM,
1391 early_migration, true),
1392 DEFINE_PROP_END_OF_LIST(),
1393 };
1394
1395 static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
1396 const MemoryRegion *mr)
1397 {
1398 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1399
1400 g_assert(mr == &vmem->memdev->mr);
1401 return vmem->block_size;
1402 }
1403
1404 static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
1405 const MemoryRegionSection *s)
1406 {
1407 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1408 uint64_t start_gpa = vmem->addr + s->offset_within_region;
1409 uint64_t end_gpa = start_gpa + int128_get64(s->size);
1410
1411 g_assert(s->mr == &vmem->memdev->mr);
1412
1413 start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
1414 end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
1415
1416 if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
1417 return false;
1418 }
1419
1420 return virtio_mem_is_range_plugged(vmem, start_gpa, end_gpa - start_gpa);
1421 }
1422
1423 struct VirtIOMEMReplayData {
1424 void *fn;
1425 void *opaque;
1426 };
1427
1428 static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
1429 {
1430 struct VirtIOMEMReplayData *data = arg;
1431
1432 return ((ReplayRamPopulate)data->fn)(s, data->opaque);
1433 }
1434
1435 static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
1436 MemoryRegionSection *s,
1437 ReplayRamPopulate replay_fn,
1438 void *opaque)
1439 {
1440 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1441 struct VirtIOMEMReplayData data = {
1442 .fn = replay_fn,
1443 .opaque = opaque,
1444 };
1445
1446 g_assert(s->mr == &vmem->memdev->mr);
1447 return virtio_mem_for_each_plugged_section(vmem, s, &data,
1448 virtio_mem_rdm_replay_populated_cb);
1449 }
1450
1451 static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
1452 void *arg)
1453 {
1454 struct VirtIOMEMReplayData *data = arg;
1455
1456 ((ReplayRamDiscard)data->fn)(s, data->opaque);
1457 return 0;
1458 }
1459
1460 static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
1461 MemoryRegionSection *s,
1462 ReplayRamDiscard replay_fn,
1463 void *opaque)
1464 {
1465 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1466 struct VirtIOMEMReplayData data = {
1467 .fn = replay_fn,
1468 .opaque = opaque,
1469 };
1470
1471 g_assert(s->mr == &vmem->memdev->mr);
1472 virtio_mem_for_each_unplugged_section(vmem, s, &data,
1473 virtio_mem_rdm_replay_discarded_cb);
1474 }
1475
1476 static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
1477 RamDiscardListener *rdl,
1478 MemoryRegionSection *s)
1479 {
1480 VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1481 int ret;
1482
1483 g_assert(s->mr == &vmem->memdev->mr);
1484 rdl->section = memory_region_section_new_copy(s);
1485
1486 QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
1487 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1488 virtio_mem_notify_populate_cb);
1489 if (ret) {
1490 error_report("%s: Replaying plugged ranges failed: %s", __func__,
1491 strerror(-ret));
1492 }
1493 }
1494
1495 static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
1496 RamDiscardListener *rdl)
1497 {
1498 VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1499
1500 g_assert(rdl->section->mr == &vmem->memdev->mr);
1501 if (vmem->size) {
1502 if (rdl->double_discard_supported) {
1503 rdl->notify_discard(rdl, rdl->section);
1504 } else {
1505 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1506 virtio_mem_notify_discard_cb);
1507 }
1508 }
1509
1510 memory_region_section_free_copy(rdl->section);
1511 rdl->section = NULL;
1512 QLIST_REMOVE(rdl, next);
1513 }
1514
1515 static void virtio_mem_unplug_request_check(VirtIOMEM *vmem, Error **errp)
1516 {
1517 if (vmem->unplugged_inaccessible == ON_OFF_AUTO_OFF) {
1518 /*
1519 * We could allow it with a usable region size of 0, but let's just
1520 * not care about that legacy setting.
1521 */
1522 error_setg(errp, "virtio-mem device cannot get unplugged while"
1523 " '" VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "' != 'on'");
1524 return;
1525 }
1526
1527 if (vmem->size) {
1528 error_setg(errp, "virtio-mem device cannot get unplugged while"
1529 " '" VIRTIO_MEM_SIZE_PROP "' != '0'");
1530 return;
1531 }
1532 if (vmem->requested_size) {
1533 error_setg(errp, "virtio-mem device cannot get unplugged while"
1534 " '" VIRTIO_MEM_REQUESTED_SIZE_PROP "' != '0'");
1535 return;
1536 }
1537 }
1538
1539 static void virtio_mem_class_init(ObjectClass *klass, void *data)
1540 {
1541 DeviceClass *dc = DEVICE_CLASS(klass);
1542 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1543 VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
1544 RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
1545
1546 device_class_set_props(dc, virtio_mem_properties);
1547 dc->vmsd = &vmstate_virtio_mem;
1548
1549 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1550 vdc->realize = virtio_mem_device_realize;
1551 vdc->unrealize = virtio_mem_device_unrealize;
1552 vdc->get_config = virtio_mem_get_config;
1553 vdc->get_features = virtio_mem_get_features;
1554 vdc->validate_features = virtio_mem_validate_features;
1555 vdc->vmsd = &vmstate_virtio_mem_device;
1556
1557 vmc->fill_device_info = virtio_mem_fill_device_info;
1558 vmc->get_memory_region = virtio_mem_get_memory_region;
1559 vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
1560 vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
1561 vmc->unplug_request_check = virtio_mem_unplug_request_check;
1562
1563 rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
1564 rdmc->is_populated = virtio_mem_rdm_is_populated;
1565 rdmc->replay_populated = virtio_mem_rdm_replay_populated;
1566 rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
1567 rdmc->register_listener = virtio_mem_rdm_register_listener;
1568 rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
1569 }
1570
1571 static const TypeInfo virtio_mem_info = {
1572 .name = TYPE_VIRTIO_MEM,
1573 .parent = TYPE_VIRTIO_DEVICE,
1574 .instance_size = sizeof(VirtIOMEM),
1575 .instance_init = virtio_mem_instance_init,
1576 .class_init = virtio_mem_class_init,
1577 .class_size = sizeof(VirtIOMEMClass),
1578 .interfaces = (InterfaceInfo[]) {
1579 { TYPE_RAM_DISCARD_MANAGER },
1580 { }
1581 },
1582 };
1583
1584 static void virtio_register_types(void)
1585 {
1586 type_register_static(&virtio_mem_info);
1587 }
1588
1589 type_init(virtio_register_types)