#include "qemu/timer.h"
#include "hw/virtio/virtio.h"
#include "hw/mem/pc-dimm.h"
+#include "hw/qdev-properties.h"
#include "sysemu/balloon.h"
#include "hw/virtio/virtio-balloon.h"
#include "exec/address-spaces.h"
return pbp->base_gpa == base_gpa;
}
+static bool virtio_balloon_inhibited(void)
+{
+ /* Postcopy cannot deal with concurrent discards, so it's special. */
+ return ram_block_discard_is_disabled() || migration_in_incoming_postcopy();
+}
+
static void balloon_inflate_page(VirtIOBalloon *balloon,
MemoryRegion *mr, hwaddr mr_offset,
PartiallyBalloonedPage *pbp)
VirtIOBalloon *s = opaque;
int i;
- visit_start_struct(v, name, NULL, 0, &err);
- if (err) {
+ if (!visit_start_struct(v, name, NULL, 0, &err)) {
goto out;
}
- visit_type_int(v, "last-update", &s->stats_last_update, &err);
- if (err) {
+ if (!visit_type_int(v, "last-update", &s->stats_last_update, &err)) {
goto out_end;
}
- visit_start_struct(v, "stats", NULL, 0, &err);
- if (err) {
+ if (!visit_start_struct(v, "stats", NULL, 0, &err)) {
goto out_end;
}
for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) {
- visit_type_uint64(v, balloon_stat_names[i], &s->stats[i], &err);
- if (err) {
+ if (!visit_type_uint64(v, balloon_stat_names[i], &s->stats[i], &err)) {
goto out_nested;
}
}
Error **errp)
{
VirtIOBalloon *s = opaque;
- Error *local_err = NULL;
int64_t value;
- visit_type_int(v, name, &value, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (!visit_type_int(v, name, &value, errp)) {
return;
}
balloon_stats_change_timer(s, 0);
}
+static void virtio_balloon_handle_report(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
+ VirtQueueElement *elem;
+
+ while ((elem = virtqueue_pop(vq, sizeof(VirtQueueElement)))) {
+ unsigned int i;
+
+ /*
+ * When we discard the page it has the effect of removing the page
+ * from the hypervisor itself and causing it to be zeroed when it
+ * is returned to us. So we must not discard the page if it is
+ * accessible by another device or process, or if the guest is
+ * expecting it to retain a non-zero value.
+ */
+ if (virtio_balloon_inhibited() || dev->poison_val) {
+ goto skip_element;
+ }
+
+ for (i = 0; i < elem->in_num; i++) {
+ void *addr = elem->in_sg[i].iov_base;
+ size_t size = elem->in_sg[i].iov_len;
+ ram_addr_t ram_offset;
+ RAMBlock *rb;
+
+ /*
+ * There is no need to check the memory section to see if
+ * it is ram/readonly/romd like there is for handle_output
+ * below. If the region is not meant to be written to then
+ * address_space_map will have allocated a bounce buffer
+ * and it will be freed in address_space_unmap and trigger
+ * and unassigned_mem_write before failing to copy over the
+ * buffer. If more than one bad descriptor is provided it
+ * will return NULL after the first bounce buffer and fail
+ * to map any resources.
+ */
+ rb = qemu_ram_block_from_host(addr, false, &ram_offset);
+ if (!rb) {
+ trace_virtio_balloon_bad_addr(elem->in_addr[i]);
+ continue;
+ }
+
+ /*
+ * For now we will simply ignore unaligned memory regions, or
+ * regions that overrun the end of the RAMBlock.
+ */
+ if (!QEMU_IS_ALIGNED(ram_offset | size, qemu_ram_pagesize(rb)) ||
+ (ram_offset + size) > qemu_ram_get_used_length(rb)) {
+ continue;
+ }
+
+ ram_block_discard_range(rb, ram_offset, size);
+ }
+
+skip_element:
+ virtqueue_push(vq, elem, 0);
+ virtio_notify(vdev, vq);
+ g_free(elem);
+ }
+}
+
static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
trace_virtio_balloon_handle_output(memory_region_name(section.mr),
pa);
- if (!qemu_balloon_is_inhibited()) {
+ if (!virtio_balloon_inhibited()) {
if (vq == s->ivq) {
balloon_inflate_page(s, section.mr,
section.offset_within_region, &pbp);
{
VirtIODevice *vdev = VIRTIO_DEVICE(s);
- s->free_page_report_status = FREE_PAGE_REPORT_S_DONE;
- virtio_notify_config(vdev);
+ if (s->free_page_report_status != FREE_PAGE_REPORT_S_DONE) {
+ /* See virtio_balloon_free_page_stop() */
+ qemu_mutex_lock(&s->free_page_lock);
+ s->free_page_report_status = FREE_PAGE_REPORT_S_DONE;
+ qemu_mutex_unlock(&s->free_page_lock);
+ virtio_notify_config(vdev);
+ }
}
static int
case PRECOPY_NOTIFY_SETUP:
precopy_enable_free_page_optimization();
break;
- case PRECOPY_NOTIFY_COMPLETE:
- case PRECOPY_NOTIFY_CLEANUP:
case PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC:
virtio_balloon_free_page_stop(dev);
break;
case PRECOPY_NOTIFY_AFTER_BITMAP_SYNC:
if (vdev->vm_running) {
virtio_balloon_free_page_start(dev);
- } else {
- virtio_balloon_free_page_done(dev);
+ break;
}
+ /*
+ * Set S_DONE before migrating the vmstate, so the guest will reuse
+ * all hinted pages once running on the destination. Fall through.
+ */
+ case PRECOPY_NOTIFY_CLEANUP:
+ /*
+ * Especially, if something goes wrong during precopy or if migration
+ * is canceled, we have to properly communicate S_DONE to the VM.
+ */
+ virtio_balloon_free_page_done(dev);
+ break;
+ case PRECOPY_NOTIFY_COMPLETE:
break;
default:
virtio_error(vdev, "%s: %d reason unknown", __func__, pnd->reason);
config.num_pages = cpu_to_le32(dev->num_pages);
config.actual = cpu_to_le32(dev->actual);
+ config.poison_val = cpu_to_le32(dev->poison_val);
if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
config.free_page_report_cmd_id =
return size;
}
+static bool virtio_balloon_page_poison_support(void *opaque)
+{
+ VirtIOBalloon *s = opaque;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+ return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
+}
+
static void virtio_balloon_set_config(VirtIODevice *vdev,
const uint8_t *config_data)
{
qapi_event_send_balloon_change(vm_ram_size -
((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
}
+ dev->poison_val = 0;
+ if (virtio_balloon_page_poison_support(dev)) {
+ dev->poison_val = le32_to_cpu(config.poison_val);
+ }
trace_virtio_balloon_set_config(dev->actual, oldactual);
}
}
};
+static const VMStateDescription vmstate_virtio_balloon_page_poison = {
+ .name = "vitio-balloon-device/page-poison",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = virtio_balloon_page_poison_support,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(poison_val, VirtIOBalloon),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_virtio_balloon_device = {
.name = "virtio-balloon-device",
.version_id = 1,
},
.subsections = (const VMStateDescription * []) {
&vmstate_virtio_balloon_free_page_report,
+ &vmstate_virtio_balloon_page_poison,
NULL
}
};
return;
}
+ if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT) &&
+ !s->iothread) {
+ error_setg(errp, "'free-page-hint' requires 'iothread' to be set");
+ virtio_cleanup(vdev);
+ return;
+ }
+
s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE,
virtio_balloon_handle_free_page_vq);
- s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
- s->free_page_report_cmd_id =
- VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
- s->free_page_report_notify.notify =
- virtio_balloon_free_page_report_notify;
precopy_add_notifier(&s->free_page_report_notify);
- if (s->iothread) {
- object_ref(OBJECT(s->iothread));
- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
- virtio_ballloon_get_free_page_hints, s);
- qemu_mutex_init(&s->free_page_lock);
- qemu_cond_init(&s->free_page_cond);
- s->block_iothread = false;
- } else {
- /* Simply disable this feature if the iothread wasn't created. */
- s->host_features &= ~(1 << VIRTIO_BALLOON_F_FREE_PAGE_HINT);
- virtio_error(vdev, "iothread is missing");
- }
+
+ object_ref(OBJECT(s->iothread));
+ s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
+ virtio_ballloon_get_free_page_hints, s);
}
+
+ if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) {
+ s->reporting_vq = virtio_add_queue(vdev, 32,
+ virtio_balloon_handle_report);
+ }
+
reset_stats(s);
}
-static void virtio_balloon_device_unrealize(DeviceState *dev, Error **errp)
+static void virtio_balloon_device_unrealize(DeviceState *dev)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOBalloon *s = VIRTIO_BALLOON(dev);
- if (virtio_balloon_free_page_support(s)) {
+ if (s->free_page_bh) {
qemu_bh_delete(s->free_page_bh);
+ object_unref(OBJECT(s->iothread));
virtio_balloon_free_page_stop(s);
precopy_remove_notifier(&s->free_page_report_notify);
}
balloon_stats_destroy_timer(s);
qemu_remove_balloon_handler(s);
+
+ virtio_delete_queue(s->ivq);
+ virtio_delete_queue(s->dvq);
+ virtio_delete_queue(s->svq);
+ if (s->free_page_vq) {
+ virtio_delete_queue(s->free_page_vq);
+ }
+ if (s->reporting_vq) {
+ virtio_delete_queue(s->reporting_vq);
+ }
virtio_cleanup(vdev);
}
g_free(s->stats_vq_elem);
s->stats_vq_elem = NULL;
}
+
+ s->poison_val = 0;
}
static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
{
VirtIOBalloon *s = VIRTIO_BALLOON(obj);
+ qemu_mutex_init(&s->free_page_lock);
+ qemu_cond_init(&s->free_page_cond);
+ s->free_page_report_cmd_id = VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN;
+ s->free_page_report_notify.notify = virtio_balloon_free_page_report_notify;
+
object_property_add(obj, "guest-stats", "guest statistics",
- balloon_stats_get_all, NULL, NULL, s, NULL);
+ balloon_stats_get_all, NULL, NULL, s);
object_property_add(obj, "guest-stats-polling-interval", "int",
balloon_stats_get_poll_interval,
balloon_stats_set_poll_interval,
- NULL, s, NULL);
+ NULL, s);
}
static const VMStateDescription vmstate_virtio_balloon = {
VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false),
DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features,
VIRTIO_BALLOON_F_FREE_PAGE_HINT, false),
+ DEFINE_PROP_BIT("page-poison", VirtIOBalloon, host_features,
+ VIRTIO_BALLOON_F_PAGE_POISON, true),
+ DEFINE_PROP_BIT("free-page-reporting", VirtIOBalloon, host_features,
+ VIRTIO_BALLOON_F_REPORTING, false),
/* QEMU 4.0 accidentally changed the config size even when free-page-hint
* is disabled, resulting in QEMU 3.1 migration incompatibility. This
* property retains this quirk for QEMU 4.1 machine types.
DeviceClass *dc = DEVICE_CLASS(klass);
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
- dc->props = virtio_balloon_properties;
+ device_class_set_props(dc, virtio_balloon_properties);
dc->vmsd = &vmstate_virtio_balloon;
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
vdc->realize = virtio_balloon_device_realize;