fi
if test "$virtio_blk_data_plane" = "yes" ; then
- echo "CONFIG_VIRTIO_BLK_DATA_PLANE=y" >> $config_host_mak
+ echo 'CONFIG_VIRTIO_BLK_DATA_PLANE=$(CONFIG_VIRTIO)' >> $config_host_mak
fi
# USB host support
# Per-target files
# virtio has to be here due to weird dependency between PCI and virtio-net.
# need to fix this properly
-obj-$(CONFIG_VIRTIO) += dataplane/
-obj-$(CONFIG_VIRTIO) += virtio.o virtio-blk.o virtio-balloon.o virtio-net.o
-obj-$(CONFIG_VIRTIO) += virtio-serial-bus.o virtio-scsi.o
-obj-$(CONFIG_SOFTMMU) += vhost_net.o
-obj-$(CONFIG_VHOST_NET) += vhost.o
obj-$(CONFIG_VGA) += vga.o
# Inter-VM PCI shared memory & VFIO PCI device assignment
common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
common-obj-$(CONFIG_XEN_BACKEND) += xen_disk.o
common-obj-$(CONFIG_ECC) += ecc.o
+
+obj-$(CONFIG_VIRTIO) += virtio-blk.o
+obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/
--- /dev/null
+obj-y += ioq.o virtio-blk.o
--- /dev/null
+/*
+ * Linux AIO request queue
+ *
+ * Copyright 2012 IBM, Corp.
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "ioq.h"
+
+void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs)
+{
+ int rc;
+
+ ioq->fd = fd;
+ ioq->max_reqs = max_reqs;
+
+ memset(&ioq->io_ctx, 0, sizeof ioq->io_ctx);
+ rc = io_setup(max_reqs, &ioq->io_ctx);
+ if (rc != 0) {
+ fprintf(stderr, "ioq io_setup failed %d\n", rc);
+ exit(1);
+ }
+
+ rc = event_notifier_init(&ioq->io_notifier, 0);
+ if (rc != 0) {
+ fprintf(stderr, "ioq io event notifier creation failed %d\n", rc);
+ exit(1);
+ }
+
+ ioq->freelist = g_malloc0(sizeof ioq->freelist[0] * max_reqs);
+ ioq->freelist_idx = 0;
+
+ ioq->queue = g_malloc0(sizeof ioq->queue[0] * max_reqs);
+ ioq->queue_idx = 0;
+}
+
+void ioq_cleanup(IOQueue *ioq)
+{
+ g_free(ioq->freelist);
+ g_free(ioq->queue);
+
+ event_notifier_cleanup(&ioq->io_notifier);
+ io_destroy(ioq->io_ctx);
+}
+
+EventNotifier *ioq_get_notifier(IOQueue *ioq)
+{
+ return &ioq->io_notifier;
+}
+
+struct iocb *ioq_get_iocb(IOQueue *ioq)
+{
+ /* Underflow cannot happen since ioq is sized for max_reqs */
+ assert(ioq->freelist_idx != 0);
+
+ struct iocb *iocb = ioq->freelist[--ioq->freelist_idx];
+ ioq->queue[ioq->queue_idx++] = iocb;
+ return iocb;
+}
+
+void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb)
+{
+ /* Overflow cannot happen since ioq is sized for max_reqs */
+ assert(ioq->freelist_idx != ioq->max_reqs);
+
+ ioq->freelist[ioq->freelist_idx++] = iocb;
+}
+
+struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov,
+ unsigned int count, long long offset)
+{
+ struct iocb *iocb = ioq_get_iocb(ioq);
+
+ if (read) {
+ io_prep_preadv(iocb, ioq->fd, iov, count, offset);
+ } else {
+ io_prep_pwritev(iocb, ioq->fd, iov, count, offset);
+ }
+ io_set_eventfd(iocb, event_notifier_get_fd(&ioq->io_notifier));
+ return iocb;
+}
+
+int ioq_submit(IOQueue *ioq)
+{
+ int rc = io_submit(ioq->io_ctx, ioq->queue_idx, ioq->queue);
+ ioq->queue_idx = 0; /* reset */
+ return rc;
+}
+
+int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion,
+ void *opaque)
+{
+ struct io_event events[ioq->max_reqs];
+ int nevents, i;
+
+ do {
+ nevents = io_getevents(ioq->io_ctx, 0, ioq->max_reqs, events, NULL);
+ } while (nevents < 0 && errno == EINTR);
+ if (nevents < 0) {
+ return nevents;
+ }
+
+ for (i = 0; i < nevents; i++) {
+ ssize_t ret = ((uint64_t)events[i].res2 << 32) | events[i].res;
+
+ completion(events[i].obj, ret, opaque);
+ ioq_put_iocb(ioq, events[i].obj);
+ }
+ return nevents;
+}
--- /dev/null
+/*
+ * Linux AIO request queue
+ *
+ * Copyright 2012 IBM, Corp.
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef IOQ_H
+#define IOQ_H
+
+#include <libaio.h>
+#include "qemu/event_notifier.h"
+
+typedef struct {
+ int fd; /* file descriptor */
+ unsigned int max_reqs; /* max length of freelist and queue */
+
+ io_context_t io_ctx; /* Linux AIO context */
+ EventNotifier io_notifier; /* Linux AIO eventfd */
+
+ /* Requests can complete in any order so a free list is necessary to manage
+ * available iocbs.
+ */
+ struct iocb **freelist; /* free iocbs */
+ unsigned int freelist_idx;
+
+ /* Multiple requests are queued up before submitting them all in one go */
+ struct iocb **queue; /* queued iocbs */
+ unsigned int queue_idx;
+} IOQueue;
+
+void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs);
+void ioq_cleanup(IOQueue *ioq);
+EventNotifier *ioq_get_notifier(IOQueue *ioq);
+struct iocb *ioq_get_iocb(IOQueue *ioq);
+void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb);
+struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov,
+ unsigned int count, long long offset);
+int ioq_submit(IOQueue *ioq);
+
+static inline unsigned int ioq_num_queued(IOQueue *ioq)
+{
+ return ioq->queue_idx;
+}
+
+typedef void IOQueueCompletion(struct iocb *iocb, ssize_t ret, void *opaque);
+int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion,
+ void *opaque);
+
+#endif /* IOQ_H */
--- /dev/null
+/*
+ * Dedicated thread for virtio-blk I/O processing
+ *
+ * Copyright 2012 IBM, Corp.
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu/iov.h"
+#include "qemu/thread.h"
+#include "qemu/error-report.h"
+#include "hw/virtio/dataplane/vring.h"
+#include "ioq.h"
+#include "migration/migration.h"
+#include "block/block.h"
+#include "hw/virtio/virtio-blk.h"
+#include "virtio-blk.h"
+#include "block/aio.h"
+
+enum {
+ SEG_MAX = 126, /* maximum number of I/O segments */
+ VRING_MAX = SEG_MAX + 2, /* maximum number of vring descriptors */
+ REQ_MAX = VRING_MAX, /* maximum number of requests in the vring,
+ * is VRING_MAX / 2 with traditional and
+ * VRING_MAX with indirect descriptors */
+};
+
+typedef struct {
+ struct iocb iocb; /* Linux AIO control block */
+ QEMUIOVector *inhdr; /* iovecs for virtio_blk_inhdr */
+ unsigned int head; /* vring descriptor index */
+ struct iovec *bounce_iov; /* used if guest buffers are unaligned */
+ QEMUIOVector *read_qiov; /* for read completion /w bounce buffer */
+} VirtIOBlockRequest;
+
+struct VirtIOBlockDataPlane {
+ bool started;
+ bool stopping;
+ QEMUBH *start_bh;
+ QemuThread thread;
+
+ VirtIOBlkConf *blk;
+ int fd; /* image file descriptor */
+
+ VirtIODevice *vdev;
+ Vring vring; /* virtqueue vring */
+ EventNotifier *guest_notifier; /* irq */
+
+ /* Note that these EventNotifiers are assigned by value. This is
+ * fine as long as you do not call event_notifier_cleanup on them
+ * (because you don't own the file descriptor or handle; you just
+ * use it).
+ */
+ AioContext *ctx;
+ EventNotifier io_notifier; /* Linux AIO completion */
+ EventNotifier host_notifier; /* doorbell */
+
+ IOQueue ioqueue; /* Linux AIO queue (should really be per
+ dataplane thread) */
+ VirtIOBlockRequest requests[REQ_MAX]; /* pool of requests, managed by the
+ queue */
+
+ unsigned int num_reqs;
+
+ Error *migration_blocker;
+};
+
+/* Raise an interrupt to signal guest, if necessary */
+static void notify_guest(VirtIOBlockDataPlane *s)
+{
+ if (!vring_should_notify(s->vdev, &s->vring)) {
+ return;
+ }
+
+ event_notifier_set(s->guest_notifier);
+}
+
+static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
+{
+ VirtIOBlockDataPlane *s = opaque;
+ VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
+ struct virtio_blk_inhdr hdr;
+ int len;
+
+ if (likely(ret >= 0)) {
+ hdr.status = VIRTIO_BLK_S_OK;
+ len = ret;
+ } else {
+ hdr.status = VIRTIO_BLK_S_IOERR;
+ len = 0;
+ }
+
+ trace_virtio_blk_data_plane_complete_request(s, req->head, ret);
+
+ if (req->read_qiov) {
+ assert(req->bounce_iov);
+ qemu_iovec_from_buf(req->read_qiov, 0, req->bounce_iov->iov_base, len);
+ qemu_iovec_destroy(req->read_qiov);
+ g_slice_free(QEMUIOVector, req->read_qiov);
+ }
+
+ if (req->bounce_iov) {
+ qemu_vfree(req->bounce_iov->iov_base);
+ g_slice_free(struct iovec, req->bounce_iov);
+ }
+
+ qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
+ qemu_iovec_destroy(req->inhdr);
+ g_slice_free(QEMUIOVector, req->inhdr);
+
+ /* According to the virtio specification len should be the number of bytes
+ * written to, but for virtio-blk it seems to be the number of bytes
+ * transferred plus the status bytes.
+ */
+ vring_push(&s->vring, req->head, len + sizeof(hdr));
+
+ s->num_reqs--;
+}
+
+static void complete_request_early(VirtIOBlockDataPlane *s, unsigned int head,
+ QEMUIOVector *inhdr, unsigned char status)
+{
+ struct virtio_blk_inhdr hdr = {
+ .status = status,
+ };
+
+ qemu_iovec_from_buf(inhdr, 0, &hdr, sizeof(hdr));
+ qemu_iovec_destroy(inhdr);
+ g_slice_free(QEMUIOVector, inhdr);
+
+ vring_push(&s->vring, head, sizeof(hdr));
+ notify_guest(s);
+}
+
+/* Get disk serial number */
+static void do_get_id_cmd(VirtIOBlockDataPlane *s,
+ struct iovec *iov, unsigned int iov_cnt,
+ unsigned int head, QEMUIOVector *inhdr)
+{
+ char id[VIRTIO_BLK_ID_BYTES];
+
+ /* Serial number not NUL-terminated when shorter than buffer */
+ strncpy(id, s->blk->serial ? s->blk->serial : "", sizeof(id));
+ iov_from_buf(iov, iov_cnt, 0, id, sizeof(id));
+ complete_request_early(s, head, inhdr, VIRTIO_BLK_S_OK);
+}
+
+static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
+ struct iovec *iov, unsigned int iov_cnt,
+ long long offset, unsigned int head,
+ QEMUIOVector *inhdr)
+{
+ struct iocb *iocb;
+ QEMUIOVector qiov;
+ struct iovec *bounce_iov = NULL;
+ QEMUIOVector *read_qiov = NULL;
+
+ qemu_iovec_init_external(&qiov, iov, iov_cnt);
+ if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) {
+ void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size);
+
+ if (read) {
+ /* Need to copy back from bounce buffer on completion */
+ read_qiov = g_slice_new(QEMUIOVector);
+ qemu_iovec_init(read_qiov, iov_cnt);
+ qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
+ } else {
+ qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size);
+ }
+
+ /* Redirect I/O to aligned bounce buffer */
+ bounce_iov = g_slice_new(struct iovec);
+ bounce_iov->iov_base = bounce_buffer;
+ bounce_iov->iov_len = qiov.size;
+ iov = bounce_iov;
+ iov_cnt = 1;
+ }
+
+ iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset);
+
+ /* Fill in virtio block metadata needed for completion */
+ VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
+ req->head = head;
+ req->inhdr = inhdr;
+ req->bounce_iov = bounce_iov;
+ req->read_qiov = read_qiov;
+ return 0;
+}
+
+static int process_request(IOQueue *ioq, struct iovec iov[],
+ unsigned int out_num, unsigned int in_num,
+ unsigned int head)
+{
+ VirtIOBlockDataPlane *s = container_of(ioq, VirtIOBlockDataPlane, ioqueue);
+ struct iovec *in_iov = &iov[out_num];
+ struct virtio_blk_outhdr outhdr;
+ QEMUIOVector *inhdr;
+ size_t in_size;
+
+ /* Copy in outhdr */
+ if (unlikely(iov_to_buf(iov, out_num, 0, &outhdr,
+ sizeof(outhdr)) != sizeof(outhdr))) {
+ error_report("virtio-blk request outhdr too short");
+ return -EFAULT;
+ }
+ iov_discard_front(&iov, &out_num, sizeof(outhdr));
+
+ /* Grab inhdr for later */
+ in_size = iov_size(in_iov, in_num);
+ if (in_size < sizeof(struct virtio_blk_inhdr)) {
+ error_report("virtio_blk request inhdr too short");
+ return -EFAULT;
+ }
+ inhdr = g_slice_new(QEMUIOVector);
+ qemu_iovec_init(inhdr, 1);
+ qemu_iovec_concat_iov(inhdr, in_iov, in_num,
+ in_size - sizeof(struct virtio_blk_inhdr),
+ sizeof(struct virtio_blk_inhdr));
+ iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
+
+ /* TODO Linux sets the barrier bit even when not advertised! */
+ outhdr.type &= ~VIRTIO_BLK_T_BARRIER;
+
+ switch (outhdr.type) {
+ case VIRTIO_BLK_T_IN:
+ do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, head, inhdr);
+ return 0;
+
+ case VIRTIO_BLK_T_OUT:
+ do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, head, inhdr);
+ return 0;
+
+ case VIRTIO_BLK_T_SCSI_CMD:
+ /* TODO support SCSI commands */
+ complete_request_early(s, head, inhdr, VIRTIO_BLK_S_UNSUPP);
+ return 0;
+
+ case VIRTIO_BLK_T_FLUSH:
+ /* TODO fdsync not supported by Linux AIO, do it synchronously here! */
+ if (qemu_fdatasync(s->fd) < 0) {
+ complete_request_early(s, head, inhdr, VIRTIO_BLK_S_IOERR);
+ } else {
+ complete_request_early(s, head, inhdr, VIRTIO_BLK_S_OK);
+ }
+ return 0;
+
+ case VIRTIO_BLK_T_GET_ID:
+ do_get_id_cmd(s, in_iov, in_num, head, inhdr);
+ return 0;
+
+ default:
+ error_report("virtio-blk unsupported request type %#x", outhdr.type);
+ qemu_iovec_destroy(inhdr);
+ g_slice_free(QEMUIOVector, inhdr);
+ return -EFAULT;
+ }
+}
+
+static int flush_true(EventNotifier *e)
+{
+ return true;
+}
+
+static void handle_notify(EventNotifier *e)
+{
+ VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
+ host_notifier);
+
+ /* There is one array of iovecs into which all new requests are extracted
+ * from the vring. Requests are read from the vring and the translated
+ * descriptors are written to the iovecs array. The iovecs do not have to
+ * persist across handle_notify() calls because the kernel copies the
+ * iovecs on io_submit().
+ *
+ * Handling io_submit() EAGAIN may require storing the requests across
+ * handle_notify() calls until the kernel has sufficient resources to
+ * accept more I/O. This is not implemented yet.
+ */
+ struct iovec iovec[VRING_MAX];
+ struct iovec *end = &iovec[VRING_MAX];
+ struct iovec *iov = iovec;
+
+ /* When a request is read from the vring, the index of the first descriptor
+ * (aka head) is returned so that the completed request can be pushed onto
+ * the vring later.
+ *
+ * The number of hypervisor read-only iovecs is out_num. The number of
+ * hypervisor write-only iovecs is in_num.
+ */
+ int head;
+ unsigned int out_num = 0, in_num = 0;
+ unsigned int num_queued;
+
+ event_notifier_test_and_clear(&s->host_notifier);
+ for (;;) {
+ /* Disable guest->host notifies to avoid unnecessary vmexits */
+ vring_disable_notification(s->vdev, &s->vring);
+
+ for (;;) {
+ head = vring_pop(s->vdev, &s->vring, iov, end, &out_num, &in_num);
+ if (head < 0) {
+ break; /* no more requests */
+ }
+
+ trace_virtio_blk_data_plane_process_request(s, out_num, in_num,
+ head);
+
+ if (process_request(&s->ioqueue, iov, out_num, in_num, head) < 0) {
+ vring_set_broken(&s->vring);
+ break;
+ }
+ iov += out_num + in_num;
+ }
+
+ if (likely(head == -EAGAIN)) { /* vring emptied */
+ /* Re-enable guest->host notifies and stop processing the vring.
+ * But if the guest has snuck in more descriptors, keep processing.
+ */
+ if (vring_enable_notification(s->vdev, &s->vring)) {
+ break;
+ }
+ } else { /* head == -ENOBUFS or fatal error, iovecs[] is depleted */
+ /* Since there are no iovecs[] left, stop processing for now. Do
+ * not re-enable guest->host notifies since the I/O completion
+ * handler knows to check for more vring descriptors anyway.
+ */
+ break;
+ }
+ }
+
+ num_queued = ioq_num_queued(&s->ioqueue);
+ if (num_queued > 0) {
+ s->num_reqs += num_queued;
+
+ int rc = ioq_submit(&s->ioqueue);
+ if (unlikely(rc < 0)) {
+ fprintf(stderr, "ioq_submit failed %d\n", rc);
+ exit(1);
+ }
+ }
+}
+
+static int flush_io(EventNotifier *e)
+{
+ VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
+ io_notifier);
+
+ return s->num_reqs > 0;
+}
+
+static void handle_io(EventNotifier *e)
+{
+ VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
+ io_notifier);
+
+ event_notifier_test_and_clear(&s->io_notifier);
+ if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
+ notify_guest(s);
+ }
+
+ /* If there were more requests than iovecs, the vring will not be empty yet
+ * so check again. There should now be enough resources to process more
+ * requests.
+ */
+ if (unlikely(vring_more_avail(&s->vring))) {
+ handle_notify(&s->host_notifier);
+ }
+}
+
+static void *data_plane_thread(void *opaque)
+{
+ VirtIOBlockDataPlane *s = opaque;
+
+ do {
+ aio_poll(s->ctx, true);
+ } while (!s->stopping || s->num_reqs > 0);
+ return NULL;
+}
+
+static void start_data_plane_bh(void *opaque)
+{
+ VirtIOBlockDataPlane *s = opaque;
+
+ qemu_bh_delete(s->start_bh);
+ s->start_bh = NULL;
+ qemu_thread_create(&s->thread, data_plane_thread,
+ s, QEMU_THREAD_JOINABLE);
+}
+
+bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
+ VirtIOBlockDataPlane **dataplane)
+{
+ VirtIOBlockDataPlane *s;
+ int fd;
+
+ *dataplane = NULL;
+
+ if (!blk->data_plane) {
+ return true;
+ }
+
+ if (blk->scsi) {
+ error_report("device is incompatible with x-data-plane, use scsi=off");
+ return false;
+ }
+
+ if (blk->config_wce) {
+ error_report("device is incompatible with x-data-plane, "
+ "use config-wce=off");
+ return false;
+ }
+
+ fd = raw_get_aio_fd(blk->conf.bs);
+ if (fd < 0) {
+ error_report("drive is incompatible with x-data-plane, "
+ "use format=raw,cache=none,aio=native");
+ return false;
+ }
+
+ s = g_new0(VirtIOBlockDataPlane, 1);
+ s->vdev = vdev;
+ s->fd = fd;
+ s->blk = blk;
+
+ /* Prevent block operations that conflict with data plane thread */
+ bdrv_set_in_use(blk->conf.bs, 1);
+
+ error_setg(&s->migration_blocker,
+ "x-data-plane does not support migration");
+ migrate_add_blocker(s->migration_blocker);
+
+ *dataplane = s;
+ return true;
+}
+
+void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
+{
+ if (!s) {
+ return;
+ }
+
+ virtio_blk_data_plane_stop(s);
+ migrate_del_blocker(s->migration_blocker);
+ error_free(s->migration_blocker);
+ bdrv_set_in_use(s->blk->conf.bs, 0);
+ g_free(s);
+}
+
+void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
+{
+ VirtQueue *vq;
+ int i;
+
+ if (s->started) {
+ return;
+ }
+
+ vq = virtio_get_queue(s->vdev, 0);
+ if (!vring_setup(&s->vring, s->vdev, 0)) {
+ return;
+ }
+
+ s->ctx = aio_context_new();
+
+ /* Set up guest notifier (irq) */
+ if (s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1,
+ true) != 0) {
+ fprintf(stderr, "virtio-blk failed to set guest notifier, "
+ "ensure -enable-kvm is set\n");
+ exit(1);
+ }
+ s->guest_notifier = virtio_queue_get_guest_notifier(vq);
+
+ /* Set up virtqueue notify */
+ if (s->vdev->binding->set_host_notifier(s->vdev->binding_opaque,
+ 0, true) != 0) {
+ fprintf(stderr, "virtio-blk failed to set host notifier\n");
+ exit(1);
+ }
+ s->host_notifier = *virtio_queue_get_host_notifier(vq);
+ aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify, flush_true);
+
+ /* Set up ioqueue */
+ ioq_init(&s->ioqueue, s->fd, REQ_MAX);
+ for (i = 0; i < ARRAY_SIZE(s->requests); i++) {
+ ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb);
+ }
+ s->io_notifier = *ioq_get_notifier(&s->ioqueue);
+ aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io, flush_io);
+
+ s->started = true;
+ trace_virtio_blk_data_plane_start(s);
+
+ /* Kick right away to begin processing requests already in vring */
+ event_notifier_set(virtio_queue_get_host_notifier(vq));
+
+ /* Spawn thread in BH so it inherits iothread cpusets */
+ s->start_bh = qemu_bh_new(start_data_plane_bh, s);
+ qemu_bh_schedule(s->start_bh);
+}
+
+void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
+{
+ if (!s->started || s->stopping) {
+ return;
+ }
+ s->stopping = true;
+ trace_virtio_blk_data_plane_stop(s);
+
+ /* Stop thread or cancel pending thread creation BH */
+ if (s->start_bh) {
+ qemu_bh_delete(s->start_bh);
+ s->start_bh = NULL;
+ } else {
+ aio_notify(s->ctx);
+ qemu_thread_join(&s->thread);
+ }
+
+ aio_set_event_notifier(s->ctx, &s->io_notifier, NULL, NULL);
+ ioq_cleanup(&s->ioqueue);
+
+ aio_set_event_notifier(s->ctx, &s->host_notifier, NULL, NULL);
+ s->vdev->binding->set_host_notifier(s->vdev->binding_opaque, 0, false);
+
+ aio_context_unref(s->ctx);
+
+ /* Clean up guest notifier (irq) */
+ s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1, false);
+
+ vring_teardown(&s->vring);
+ s->started = false;
+ s->stopping = false;
+}
--- /dev/null
+/*
+ * Dedicated thread for virtio-blk I/O processing
+ *
+ * Copyright 2012 IBM, Corp.
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_DATAPLANE_VIRTIO_BLK_H
+#define HW_DATAPLANE_VIRTIO_BLK_H
+
+#include "hw/virtio/virtio.h"
+
+typedef struct VirtIOBlockDataPlane VirtIOBlockDataPlane;
+
+bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
+ VirtIOBlockDataPlane **dataplane);
+void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s);
+void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s);
+void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s);
+void virtio_blk_data_plane_drain(VirtIOBlockDataPlane *s);
+
+#endif /* HW_DATAPLANE_VIRTIO_BLK_H */
--- /dev/null
+/*
+ * Virtio Block Device
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "hw/block/block.h"
+#include "sysemu/blockdev.h"
+#include "hw/virtio/virtio-blk.h"
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+# include "dataplane/virtio-blk.h"
+#endif
+#include "block/scsi.h"
+#ifdef __linux__
+# include <scsi/sg.h>
+#endif
+#include "hw/virtio/virtio-bus.h"
+
+typedef struct VirtIOBlockReq
+{
+ VirtIOBlock *dev;
+ VirtQueueElement elem;
+ struct virtio_blk_inhdr *in;
+ struct virtio_blk_outhdr *out;
+ struct virtio_scsi_inhdr *scsi;
+ QEMUIOVector qiov;
+ struct VirtIOBlockReq *next;
+ BlockAcctCookie acct;
+} VirtIOBlockReq;
+
+static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
+{
+ VirtIOBlock *s = req->dev;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+ trace_virtio_blk_req_complete(req, status);
+
+ stb_p(&req->in->status, status);
+ virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
+ virtio_notify(vdev, s->vq);
+}
+
+static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
+ bool is_read)
+{
+ BlockErrorAction action = bdrv_get_error_action(req->dev->bs, is_read, error);
+ VirtIOBlock *s = req->dev;
+
+ if (action == BDRV_ACTION_STOP) {
+ req->next = s->rq;
+ s->rq = req;
+ } else if (action == BDRV_ACTION_REPORT) {
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+ bdrv_acct_done(s->bs, &req->acct);
+ g_free(req);
+ }
+
+ bdrv_error_action(s->bs, action, is_read, error);
+ return action != BDRV_ACTION_IGNORE;
+}
+
+static void virtio_blk_rw_complete(void *opaque, int ret)
+{
+ VirtIOBlockReq *req = opaque;
+
+ trace_virtio_blk_rw_complete(req, ret);
+
+ if (ret) {
+ bool is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT);
+ if (virtio_blk_handle_rw_error(req, -ret, is_read))
+ return;
+ }
+
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+ bdrv_acct_done(req->dev->bs, &req->acct);
+ g_free(req);
+}
+
+static void virtio_blk_flush_complete(void *opaque, int ret)
+{
+ VirtIOBlockReq *req = opaque;
+
+ if (ret) {
+ if (virtio_blk_handle_rw_error(req, -ret, 0)) {
+ return;
+ }
+ }
+
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+ bdrv_acct_done(req->dev->bs, &req->acct);
+ g_free(req);
+}
+
+static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
+{
+ VirtIOBlockReq *req = g_malloc(sizeof(*req));
+ req->dev = s;
+ req->qiov.size = 0;
+ req->next = NULL;
+ return req;
+}
+
+static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
+{
+ VirtIOBlockReq *req = virtio_blk_alloc_request(s);
+
+ if (req != NULL) {
+ if (!virtqueue_pop(s->vq, &req->elem)) {
+ g_free(req);
+ return NULL;
+ }
+ }
+
+ return req;
+}
+
+static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
+{
+#ifdef __linux__
+ int ret;
+ int i;
+#endif
+ int status = VIRTIO_BLK_S_OK;
+
+ /*
+ * We require at least one output segment each for the virtio_blk_outhdr
+ * and the SCSI command block.
+ *
+ * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
+ * and the sense buffer pointer in the input segments.
+ */
+ if (req->elem.out_num < 2 || req->elem.in_num < 3) {
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+ g_free(req);
+ return;
+ }
+
+ /*
+ * The scsi inhdr is placed in the second-to-last input segment, just
+ * before the regular inhdr.
+ */
+ req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
+
+ if (!req->dev->blk.scsi) {
+ status = VIRTIO_BLK_S_UNSUPP;
+ goto fail;
+ }
+
+ /*
+ * No support for bidirection commands yet.
+ */
+ if (req->elem.out_num > 2 && req->elem.in_num > 3) {
+ status = VIRTIO_BLK_S_UNSUPP;
+ goto fail;
+ }
+
+#ifdef __linux__
+ struct sg_io_hdr hdr;
+ memset(&hdr, 0, sizeof(struct sg_io_hdr));
+ hdr.interface_id = 'S';
+ hdr.cmd_len = req->elem.out_sg[1].iov_len;
+ hdr.cmdp = req->elem.out_sg[1].iov_base;
+ hdr.dxfer_len = 0;
+
+ if (req->elem.out_num > 2) {
+ /*
+ * If there are more than the minimally required 2 output segments
+ * there is write payload starting from the third iovec.
+ */
+ hdr.dxfer_direction = SG_DXFER_TO_DEV;
+ hdr.iovec_count = req->elem.out_num - 2;
+
+ for (i = 0; i < hdr.iovec_count; i++)
+ hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len;
+
+ hdr.dxferp = req->elem.out_sg + 2;
+
+ } else if (req->elem.in_num > 3) {
+ /*
+ * If we have more than 3 input segments the guest wants to actually
+ * read data.
+ */
+ hdr.dxfer_direction = SG_DXFER_FROM_DEV;
+ hdr.iovec_count = req->elem.in_num - 3;
+ for (i = 0; i < hdr.iovec_count; i++)
+ hdr.dxfer_len += req->elem.in_sg[i].iov_len;
+
+ hdr.dxferp = req->elem.in_sg;
+ } else {
+ /*
+ * Some SCSI commands don't actually transfer any data.
+ */
+ hdr.dxfer_direction = SG_DXFER_NONE;
+ }
+
+ hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base;
+ hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len;
+
+ ret = bdrv_ioctl(req->dev->bs, SG_IO, &hdr);
+ if (ret) {
+ status = VIRTIO_BLK_S_UNSUPP;
+ goto fail;
+ }
+
+ /*
+ * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi)
+ * clear the masked_status field [hence status gets cleared too, see
+ * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED
+ * status has occurred. However they do set DRIVER_SENSE in driver_status
+ * field. Also a (sb_len_wr > 0) indicates there is a sense buffer.
+ */
+ if (hdr.status == 0 && hdr.sb_len_wr > 0) {
+ hdr.status = CHECK_CONDITION;
+ }
+
+ stl_p(&req->scsi->errors,
+ hdr.status | (hdr.msg_status << 8) |
+ (hdr.host_status << 16) | (hdr.driver_status << 24));
+ stl_p(&req->scsi->residual, hdr.resid);
+ stl_p(&req->scsi->sense_len, hdr.sb_len_wr);
+ stl_p(&req->scsi->data_len, hdr.dxfer_len);
+
+ virtio_blk_req_complete(req, status);
+ g_free(req);
+ return;
+#else
+ abort();
+#endif
+
+fail:
+ /* Just put anything nonzero so that the ioctl fails in the guest. */
+ stl_p(&req->scsi->errors, 255);
+ virtio_blk_req_complete(req, status);
+ g_free(req);
+}
+
+typedef struct MultiReqBuffer {
+ BlockRequest blkreq[32];
+ unsigned int num_writes;
+} MultiReqBuffer;
+
+static void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb)
+{
+ int i, ret;
+
+ if (!mrb->num_writes) {
+ return;
+ }
+
+ ret = bdrv_aio_multiwrite(bs, mrb->blkreq, mrb->num_writes);
+ if (ret != 0) {
+ for (i = 0; i < mrb->num_writes; i++) {
+ if (mrb->blkreq[i].error) {
+ virtio_blk_rw_complete(mrb->blkreq[i].opaque, -EIO);
+ }
+ }
+ }
+
+ mrb->num_writes = 0;
+}
+
+static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
+{
+ bdrv_acct_start(req->dev->bs, &req->acct, 0, BDRV_ACCT_FLUSH);
+
+ /*
+ * Make sure all outstanding writes are posted to the backing device.
+ */
+ virtio_submit_multiwrite(req->dev->bs, mrb);
+ bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req);
+}
+
+static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
+{
+ BlockRequest *blkreq;
+ uint64_t sector;
+
+ sector = ldq_p(&req->out->sector);
+
+ bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
+
+ trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512);
+
+ if (sector & req->dev->sector_mask) {
+ virtio_blk_rw_complete(req, -EIO);
+ return;
+ }
+ if (req->qiov.size % req->dev->conf->logical_block_size) {
+ virtio_blk_rw_complete(req, -EIO);
+ return;
+ }
+
+ if (mrb->num_writes == 32) {
+ virtio_submit_multiwrite(req->dev->bs, mrb);
+ }
+
+ blkreq = &mrb->blkreq[mrb->num_writes];
+ blkreq->sector = sector;
+ blkreq->nb_sectors = req->qiov.size / BDRV_SECTOR_SIZE;
+ blkreq->qiov = &req->qiov;
+ blkreq->cb = virtio_blk_rw_complete;
+ blkreq->opaque = req;
+ blkreq->error = 0;
+
+ mrb->num_writes++;
+}
+
+static void virtio_blk_handle_read(VirtIOBlockReq *req)
+{
+ uint64_t sector;
+
+ sector = ldq_p(&req->out->sector);
+
+ bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
+
+ trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512);
+
+ if (sector & req->dev->sector_mask) {
+ virtio_blk_rw_complete(req, -EIO);
+ return;
+ }
+ if (req->qiov.size % req->dev->conf->logical_block_size) {
+ virtio_blk_rw_complete(req, -EIO);
+ return;
+ }
+ bdrv_aio_readv(req->dev->bs, sector, &req->qiov,
+ req->qiov.size / BDRV_SECTOR_SIZE,
+ virtio_blk_rw_complete, req);
+}
+
+static void virtio_blk_handle_request(VirtIOBlockReq *req,
+ MultiReqBuffer *mrb)
+{
+ uint32_t type;
+
+ if (req->elem.out_num < 1 || req->elem.in_num < 1) {
+ error_report("virtio-blk missing headers");
+ exit(1);
+ }
+
+ if (req->elem.out_sg[0].iov_len < sizeof(*req->out) ||
+ req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) {
+ error_report("virtio-blk header not in correct element");
+ exit(1);
+ }
+
+ req->out = (void *)req->elem.out_sg[0].iov_base;
+ req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base;
+
+ type = ldl_p(&req->out->type);
+
+ if (type & VIRTIO_BLK_T_FLUSH) {
+ virtio_blk_handle_flush(req, mrb);
+ } else if (type & VIRTIO_BLK_T_SCSI_CMD) {
+ virtio_blk_handle_scsi(req);
+ } else if (type & VIRTIO_BLK_T_GET_ID) {
+ VirtIOBlock *s = req->dev;
+
+ /*
+ * NB: per existing s/n string convention the string is
+ * terminated by '\0' only when shorter than buffer.
+ */
+ strncpy(req->elem.in_sg[0].iov_base,
+ s->blk.serial ? s->blk.serial : "",
+ MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+ g_free(req);
+ } else if (type & VIRTIO_BLK_T_OUT) {
+ qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
+ req->elem.out_num - 1);
+ virtio_blk_handle_write(req, mrb);
+ } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) {
+ /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */
+ qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0],
+ req->elem.in_num - 1);
+ virtio_blk_handle_read(req);
+ } else {
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
+ g_free(req);
+ }
+}
+
+static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOBlock *s = VIRTIO_BLK(vdev);
+ VirtIOBlockReq *req;
+ MultiReqBuffer mrb = {
+ .num_writes = 0,
+ };
+
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+ /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
+ * dataplane here instead of waiting for .set_status().
+ */
+ if (s->dataplane) {
+ virtio_blk_data_plane_start(s->dataplane);
+ return;
+ }
+#endif
+
+ while ((req = virtio_blk_get_request(s))) {
+ virtio_blk_handle_request(req, &mrb);
+ }
+
+ virtio_submit_multiwrite(s->bs, &mrb);
+
+ /*
+ * FIXME: Want to check for completions before returning to guest mode,
+ * so cached reads and writes are reported as quickly as possible. But
+ * that should be done in the generic block layer.
+ */
+}
+
+static void virtio_blk_dma_restart_bh(void *opaque)
+{
+ VirtIOBlock *s = opaque;
+ VirtIOBlockReq *req = s->rq;
+ MultiReqBuffer mrb = {
+ .num_writes = 0,
+ };
+
+ qemu_bh_delete(s->bh);
+ s->bh = NULL;
+
+ s->rq = NULL;
+
+ while (req) {
+ virtio_blk_handle_request(req, &mrb);
+ req = req->next;
+ }
+
+ virtio_submit_multiwrite(s->bs, &mrb);
+}
+
+static void virtio_blk_dma_restart_cb(void *opaque, int running,
+ RunState state)
+{
+ VirtIOBlock *s = opaque;
+
+ if (!running) {
+ return;
+ }
+
+ if (!s->bh) {
+ s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
+ qemu_bh_schedule(s->bh);
+ }
+}
+
+static void virtio_blk_reset(VirtIODevice *vdev)
+{
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+ VirtIOBlock *s = VIRTIO_BLK(vdev);
+
+ if (s->dataplane) {
+ virtio_blk_data_plane_stop(s->dataplane);
+ }
+#endif
+
+ /*
+ * This should cancel pending requests, but can't do nicely until there
+ * are per-device request lists.
+ */
+ bdrv_drain_all();
+}
+
+/* coalesce internal state, copy to pci i/o region 0
+ */
+static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
+{
+ VirtIOBlock *s = VIRTIO_BLK(vdev);
+ struct virtio_blk_config blkcfg;
+ uint64_t capacity;
+ int blk_size = s->conf->logical_block_size;
+
+ bdrv_get_geometry(s->bs, &capacity);
+ memset(&blkcfg, 0, sizeof(blkcfg));
+ stq_raw(&blkcfg.capacity, capacity);
+ stl_raw(&blkcfg.seg_max, 128 - 2);
+ stw_raw(&blkcfg.cylinders, s->conf->cyls);
+ stl_raw(&blkcfg.blk_size, blk_size);
+ stw_raw(&blkcfg.min_io_size, s->conf->min_io_size / blk_size);
+ stw_raw(&blkcfg.opt_io_size, s->conf->opt_io_size / blk_size);
+ blkcfg.heads = s->conf->heads;
+ /*
+ * We must ensure that the block device capacity is a multiple of
+ * the logical block size. If that is not the case, lets use
+ * sector_mask to adopt the geometry to have a correct picture.
+ * For those devices where the capacity is ok for the given geometry
+ * we dont touch the sector value of the geometry, since some devices
+ * (like s390 dasd) need a specific value. Here the capacity is already
+ * cyls*heads*secs*blk_size and the sector value is not block size
+ * divided by 512 - instead it is the amount of blk_size blocks
+ * per track (cylinder).
+ */
+ if (bdrv_getlength(s->bs) / s->conf->heads / s->conf->secs % blk_size) {
+ blkcfg.sectors = s->conf->secs & ~s->sector_mask;
+ } else {
+ blkcfg.sectors = s->conf->secs;
+ }
+ blkcfg.size_max = 0;
+ blkcfg.physical_block_exp = get_physical_block_exp(s->conf);
+ blkcfg.alignment_offset = 0;
+ blkcfg.wce = bdrv_enable_write_cache(s->bs);
+ memcpy(config, &blkcfg, sizeof(struct virtio_blk_config));
+}
+
+static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
+{
+ VirtIOBlock *s = VIRTIO_BLK(vdev);
+ struct virtio_blk_config blkcfg;
+
+ memcpy(&blkcfg, config, sizeof(blkcfg));
+ bdrv_set_enable_write_cache(s->bs, blkcfg.wce != 0);
+}
+
+static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features)
+{
+ VirtIOBlock *s = VIRTIO_BLK(vdev);
+
+ features |= (1 << VIRTIO_BLK_F_SEG_MAX);
+ features |= (1 << VIRTIO_BLK_F_GEOMETRY);
+ features |= (1 << VIRTIO_BLK_F_TOPOLOGY);
+ features |= (1 << VIRTIO_BLK_F_BLK_SIZE);
+ features |= (1 << VIRTIO_BLK_F_SCSI);
+
+ if (s->blk.config_wce) {
+ features |= (1 << VIRTIO_BLK_F_CONFIG_WCE);
+ }
+ if (bdrv_enable_write_cache(s->bs))
+ features |= (1 << VIRTIO_BLK_F_WCE);
+
+ if (bdrv_is_read_only(s->bs))
+ features |= 1 << VIRTIO_BLK_F_RO;
+
+ return features;
+}
+
+static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
+{
+ VirtIOBlock *s = VIRTIO_BLK(vdev);
+ uint32_t features;
+
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+ if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER |
+ VIRTIO_CONFIG_S_DRIVER_OK))) {
+ virtio_blk_data_plane_stop(s->dataplane);
+ }
+#endif
+
+ if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ return;
+ }
+
+ features = vdev->guest_features;
+ bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE)));
+}
+
+static void virtio_blk_save(QEMUFile *f, void *opaque)
+{
+ VirtIOBlock *s = opaque;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ VirtIOBlockReq *req = s->rq;
+
+ virtio_save(vdev, f);
+
+ while (req) {
+ qemu_put_sbyte(f, 1);
+ qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
+ req = req->next;
+ }
+ qemu_put_sbyte(f, 0);
+}
+
+static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
+{
+ VirtIOBlock *s = opaque;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ int ret;
+
+ if (version_id != 2)
+ return -EINVAL;
+
+ ret = virtio_load(vdev, f);
+ if (ret) {
+ return ret;
+ }
+
+ while (qemu_get_sbyte(f)) {
+ VirtIOBlockReq *req = virtio_blk_alloc_request(s);
+ qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
+ req->next = s->rq;
+ s->rq = req;
+
+ virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
+ req->elem.in_num, 1);
+ virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
+ req->elem.out_num, 0);
+ }
+
+ return 0;
+}
+
+static void virtio_blk_resize(void *opaque)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+
+ virtio_notify_config(vdev);
+}
+
+static const BlockDevOps virtio_block_ops = {
+ .resize_cb = virtio_blk_resize,
+};
+
+void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk)
+{
+ VirtIOBlock *s = VIRTIO_BLK(dev);
+ memcpy(&(s->blk), blk, sizeof(struct VirtIOBlkConf));
+}
+
+static int virtio_blk_device_init(VirtIODevice *vdev)
+{
+ DeviceState *qdev = DEVICE(vdev);
+ VirtIOBlock *s = VIRTIO_BLK(vdev);
+ VirtIOBlkConf *blk = &(s->blk);
+ static int virtio_blk_id;
+
+ if (!blk->conf.bs) {
+ error_report("drive property not set");
+ return -1;
+ }
+ if (!bdrv_is_inserted(blk->conf.bs)) {
+ error_report("Device needs media, but drive is empty");
+ return -1;
+ }
+
+ blkconf_serial(&blk->conf, &blk->serial);
+ if (blkconf_geometry(&blk->conf, NULL, 65535, 255, 255) < 0) {
+ return -1;
+ }
+
+ virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
+ sizeof(struct virtio_blk_config));
+
+ vdev->get_config = virtio_blk_update_config;
+ vdev->set_config = virtio_blk_set_config;
+ vdev->get_features = virtio_blk_get_features;
+ vdev->set_status = virtio_blk_set_status;
+ vdev->reset = virtio_blk_reset;
+ s->bs = blk->conf.bs;
+ s->conf = &blk->conf;
+ memcpy(&(s->blk), blk, sizeof(struct VirtIOBlkConf));
+ s->rq = NULL;
+ s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1;
+
+ s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output);
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+ if (!virtio_blk_data_plane_create(vdev, blk, &s->dataplane)) {
+ virtio_common_cleanup(vdev);
+ return -1;
+ }
+#endif
+
+ s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
+ register_savevm(qdev, "virtio-blk", virtio_blk_id++, 2,
+ virtio_blk_save, virtio_blk_load, s);
+ bdrv_set_dev_ops(s->bs, &virtio_block_ops, s);
+ bdrv_set_buffer_alignment(s->bs, s->conf->logical_block_size);
+
+ bdrv_iostatus_enable(s->bs);
+
+ add_boot_device_path(s->conf->bootindex, qdev, "/disk@0,0");
+ return 0;
+}
+
+static int virtio_blk_device_exit(DeviceState *dev)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VirtIOBlock *s = VIRTIO_BLK(dev);
+#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
+ virtio_blk_data_plane_destroy(s->dataplane);
+ s->dataplane = NULL;
+#endif
+ qemu_del_vm_change_state_handler(s->change);
+ unregister_savevm(dev, "virtio-blk", s);
+ blockdev_mark_auto_del(s->bs);
+ virtio_common_cleanup(vdev);
+ return 0;
+}
+
+static Property virtio_blk_properties[] = {
+ DEFINE_VIRTIO_BLK_PROPERTIES(VirtIOBlock, blk),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_blk_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+ dc->exit = virtio_blk_device_exit;
+ dc->props = virtio_blk_properties;
+ vdc->init = virtio_blk_device_init;
+ vdc->get_config = virtio_blk_update_config;
+ vdc->set_config = virtio_blk_set_config;
+ vdc->get_features = virtio_blk_get_features;
+ vdc->set_status = virtio_blk_set_status;
+ vdc->reset = virtio_blk_reset;
+}
+
+static const TypeInfo virtio_device_info = {
+ .name = TYPE_VIRTIO_BLK,
+ .parent = TYPE_VIRTIO_DEVICE,
+ .instance_size = sizeof(VirtIOBlock),
+ .class_init = virtio_blk_class_init,
+};
+
+static void virtio_register_types(void)
+{
+ type_register_static(&virtio_device_info);
+}
+
+type_init(virtio_register_types)
common-obj-$(CONFIG_XILINX) += xilinx_uartlite.o
common-obj-$(CONFIG_XEN_BACKEND) += xen_console.o
common-obj-$(CONFIG_CADENCE) += cadence_uart.o
+
+obj-$(CONFIG_VIRTIO) += virtio-serial-bus.o
--- /dev/null
+/*
+ * A bus for connecting virtio serial and console ports
+ *
+ * Copyright (C) 2009, 2010 Red Hat, Inc.
+ *
+ * Author(s):
+ * Amit Shah <amit.shah@redhat.com>
+ *
+ * Some earlier parts are:
+ * Copyright IBM, Corp. 2008
+ * authored by
+ * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/iov.h"
+#include "monitor/monitor.h"
+#include "qemu/queue.h"
+#include "hw/sysbus.h"
+#include "trace.h"
+#include "hw/virtio/virtio-serial.h"
+
+static VirtIOSerialPort *find_port_by_id(VirtIOSerial *vser, uint32_t id)
+{
+ VirtIOSerialPort *port;
+
+ if (id == VIRTIO_CONSOLE_BAD_ID) {
+ return NULL;
+ }
+
+ QTAILQ_FOREACH(port, &vser->ports, next) {
+ if (port->id == id)
+ return port;
+ }
+ return NULL;
+}
+
+static VirtIOSerialPort *find_port_by_vq(VirtIOSerial *vser, VirtQueue *vq)
+{
+ VirtIOSerialPort *port;
+
+ QTAILQ_FOREACH(port, &vser->ports, next) {
+ if (port->ivq == vq || port->ovq == vq)
+ return port;
+ }
+ return NULL;
+}
+
+static bool use_multiport(VirtIOSerial *vser)
+{
+ return vser->vdev.guest_features & (1 << VIRTIO_CONSOLE_F_MULTIPORT);
+}
+
+static size_t write_to_port(VirtIOSerialPort *port,
+ const uint8_t *buf, size_t size)
+{
+ VirtQueueElement elem;
+ VirtQueue *vq;
+ size_t offset;
+
+ vq = port->ivq;
+ if (!virtio_queue_ready(vq)) {
+ return 0;
+ }
+
+ offset = 0;
+ while (offset < size) {
+ size_t len;
+
+ if (!virtqueue_pop(vq, &elem)) {
+ break;
+ }
+
+ len = iov_from_buf(elem.in_sg, elem.in_num, 0,
+ buf + offset, size - offset);
+ offset += len;
+
+ virtqueue_push(vq, &elem, len);
+ }
+
+ virtio_notify(&port->vser->vdev, vq);
+ return offset;
+}
+
+static void discard_vq_data(VirtQueue *vq, VirtIODevice *vdev)
+{
+ VirtQueueElement elem;
+
+ if (!virtio_queue_ready(vq)) {
+ return;
+ }
+ while (virtqueue_pop(vq, &elem)) {
+ virtqueue_push(vq, &elem, 0);
+ }
+ virtio_notify(vdev, vq);
+}
+
+static void do_flush_queued_data(VirtIOSerialPort *port, VirtQueue *vq,
+ VirtIODevice *vdev)
+{
+ VirtIOSerialPortClass *vsc;
+
+ assert(port);
+ assert(virtio_queue_ready(vq));
+
+ vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
+
+ while (!port->throttled) {
+ unsigned int i;
+
+ /* Pop an elem only if we haven't left off a previous one mid-way */
+ if (!port->elem.out_num) {
+ if (!virtqueue_pop(vq, &port->elem)) {
+ break;
+ }
+ port->iov_idx = 0;
+ port->iov_offset = 0;
+ }
+
+ for (i = port->iov_idx; i < port->elem.out_num; i++) {
+ size_t buf_size;
+ ssize_t ret;
+
+ buf_size = port->elem.out_sg[i].iov_len - port->iov_offset;
+ ret = vsc->have_data(port,
+ port->elem.out_sg[i].iov_base
+ + port->iov_offset,
+ buf_size);
+ if (port->throttled) {
+ port->iov_idx = i;
+ if (ret > 0) {
+ port->iov_offset += ret;
+ }
+ break;
+ }
+ port->iov_offset = 0;
+ }
+ if (port->throttled) {
+ break;
+ }
+ virtqueue_push(vq, &port->elem, 0);
+ port->elem.out_num = 0;
+ }
+ virtio_notify(vdev, vq);
+}
+
+static void flush_queued_data(VirtIOSerialPort *port)
+{
+ assert(port);
+
+ if (!virtio_queue_ready(port->ovq)) {
+ return;
+ }
+ do_flush_queued_data(port, port->ovq, &port->vser->vdev);
+}
+
+static size_t send_control_msg(VirtIOSerial *vser, void *buf, size_t len)
+{
+ VirtQueueElement elem;
+ VirtQueue *vq;
+
+ vq = vser->c_ivq;
+ if (!virtio_queue_ready(vq)) {
+ return 0;
+ }
+ if (!virtqueue_pop(vq, &elem)) {
+ return 0;
+ }
+
+ memcpy(elem.in_sg[0].iov_base, buf, len);
+
+ virtqueue_push(vq, &elem, len);
+ virtio_notify(&vser->vdev, vq);
+ return len;
+}
+
+static size_t send_control_event(VirtIOSerial *vser, uint32_t port_id,
+ uint16_t event, uint16_t value)
+{
+ struct virtio_console_control cpkt;
+
+ stl_p(&cpkt.id, port_id);
+ stw_p(&cpkt.event, event);
+ stw_p(&cpkt.value, value);
+
+ trace_virtio_serial_send_control_event(port_id, event, value);
+ return send_control_msg(vser, &cpkt, sizeof(cpkt));
+}
+
+/* Functions for use inside qemu to open and read from/write to ports */
+int virtio_serial_open(VirtIOSerialPort *port)
+{
+ /* Don't allow opening an already-open port */
+ if (port->host_connected) {
+ return 0;
+ }
+ /* Send port open notification to the guest */
+ port->host_connected = true;
+ send_control_event(port->vser, port->id, VIRTIO_CONSOLE_PORT_OPEN, 1);
+
+ return 0;
+}
+
+int virtio_serial_close(VirtIOSerialPort *port)
+{
+ port->host_connected = false;
+ /*
+ * If there's any data the guest sent which the app didn't
+ * consume, reset the throttling flag and discard the data.
+ */
+ port->throttled = false;
+ discard_vq_data(port->ovq, &port->vser->vdev);
+
+ send_control_event(port->vser, port->id, VIRTIO_CONSOLE_PORT_OPEN, 0);
+
+ return 0;
+}
+
+/* Individual ports/apps call this function to write to the guest. */
+ssize_t virtio_serial_write(VirtIOSerialPort *port, const uint8_t *buf,
+ size_t size)
+{
+ if (!port || !port->host_connected || !port->guest_connected) {
+ return 0;
+ }
+ return write_to_port(port, buf, size);
+}
+
+/*
+ * Readiness of the guest to accept data on a port.
+ * Returns max. data the guest can receive
+ */
+size_t virtio_serial_guest_ready(VirtIOSerialPort *port)
+{
+ VirtQueue *vq = port->ivq;
+ unsigned int bytes;
+
+ if (!virtio_queue_ready(vq) ||
+ !(port->vser->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK) ||
+ virtio_queue_empty(vq)) {
+ return 0;
+ }
+ if (use_multiport(port->vser) && !port->guest_connected) {
+ return 0;
+ }
+ virtqueue_get_avail_bytes(vq, &bytes, NULL, 4096, 0);
+ return bytes;
+}
+
+static void flush_queued_data_bh(void *opaque)
+{
+ VirtIOSerialPort *port = opaque;
+
+ flush_queued_data(port);
+}
+
+void virtio_serial_throttle_port(VirtIOSerialPort *port, bool throttle)
+{
+ if (!port) {
+ return;
+ }
+
+ trace_virtio_serial_throttle_port(port->id, throttle);
+ port->throttled = throttle;
+ if (throttle) {
+ return;
+ }
+ qemu_bh_schedule(port->bh);
+}
+
+/* Guest wants to notify us of some event */
+static void handle_control_message(VirtIOSerial *vser, void *buf, size_t len)
+{
+ struct VirtIOSerialPort *port;
+ VirtIOSerialPortClass *vsc;
+ struct virtio_console_control cpkt, *gcpkt;
+ uint8_t *buffer;
+ size_t buffer_len;
+
+ gcpkt = buf;
+
+ if (len < sizeof(cpkt)) {
+ /* The guest sent an invalid control packet */
+ return;
+ }
+
+ cpkt.event = lduw_p(&gcpkt->event);
+ cpkt.value = lduw_p(&gcpkt->value);
+
+ trace_virtio_serial_handle_control_message(cpkt.event, cpkt.value);
+
+ if (cpkt.event == VIRTIO_CONSOLE_DEVICE_READY) {
+ if (!cpkt.value) {
+ error_report("virtio-serial-bus: Guest failure in adding device %s",
+ vser->bus.qbus.name);
+ return;
+ }
+ /*
+ * The device is up, we can now tell the device about all the
+ * ports we have here.
+ */
+ QTAILQ_FOREACH(port, &vser->ports, next) {
+ send_control_event(vser, port->id, VIRTIO_CONSOLE_PORT_ADD, 1);
+ }
+ return;
+ }
+
+ port = find_port_by_id(vser, ldl_p(&gcpkt->id));
+ if (!port) {
+ error_report("virtio-serial-bus: Unexpected port id %u for device %s",
+ ldl_p(&gcpkt->id), vser->bus.qbus.name);
+ return;
+ }
+
+ trace_virtio_serial_handle_control_message_port(port->id);
+
+ vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
+
+ switch(cpkt.event) {
+ case VIRTIO_CONSOLE_PORT_READY:
+ if (!cpkt.value) {
+ error_report("virtio-serial-bus: Guest failure in adding port %u for device %s",
+ port->id, vser->bus.qbus.name);
+ break;
+ }
+ /*
+ * Now that we know the guest asked for the port name, we're
+ * sure the guest has initialised whatever state is necessary
+ * for this port. Now's a good time to let the guest know if
+ * this port is a console port so that the guest can hook it
+ * up to hvc.
+ */
+ if (vsc->is_console) {
+ send_control_event(vser, port->id, VIRTIO_CONSOLE_CONSOLE_PORT, 1);
+ }
+
+ if (port->name) {
+ stl_p(&cpkt.id, port->id);
+ stw_p(&cpkt.event, VIRTIO_CONSOLE_PORT_NAME);
+ stw_p(&cpkt.value, 1);
+
+ buffer_len = sizeof(cpkt) + strlen(port->name) + 1;
+ buffer = g_malloc(buffer_len);
+
+ memcpy(buffer, &cpkt, sizeof(cpkt));
+ memcpy(buffer + sizeof(cpkt), port->name, strlen(port->name));
+ buffer[buffer_len - 1] = 0;
+
+ send_control_msg(vser, buffer, buffer_len);
+ g_free(buffer);
+ }
+
+ if (port->host_connected) {
+ send_control_event(vser, port->id, VIRTIO_CONSOLE_PORT_OPEN, 1);
+ }
+
+ /*
+ * When the guest has asked us for this information it means
+ * the guest is all setup and has its virtqueues
+ * initialised. If some app is interested in knowing about
+ * this event, let it know.
+ */
+ if (vsc->guest_ready) {
+ vsc->guest_ready(port);
+ }
+ break;
+
+ case VIRTIO_CONSOLE_PORT_OPEN:
+ port->guest_connected = cpkt.value;
+ if (vsc->set_guest_connected) {
+ /* Send the guest opened notification if an app is interested */
+ vsc->set_guest_connected(port, cpkt.value);
+ }
+ break;
+ }
+}
+
+static void control_in(VirtIODevice *vdev, VirtQueue *vq)
+{
+}
+
+static void control_out(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtQueueElement elem;
+ VirtIOSerial *vser;
+ uint8_t *buf;
+ size_t len;
+
+ vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
+
+ len = 0;
+ buf = NULL;
+ while (virtqueue_pop(vq, &elem)) {
+ size_t cur_len;
+
+ cur_len = iov_size(elem.out_sg, elem.out_num);
+ /*
+ * Allocate a new buf only if we didn't have one previously or
+ * if the size of the buf differs
+ */
+ if (cur_len > len) {
+ g_free(buf);
+
+ buf = g_malloc(cur_len);
+ len = cur_len;
+ }
+ iov_to_buf(elem.out_sg, elem.out_num, 0, buf, cur_len);
+
+ handle_control_message(vser, buf, cur_len);
+ virtqueue_push(vq, &elem, 0);
+ }
+ g_free(buf);
+ virtio_notify(vdev, vq);
+}
+
+/* Guest wrote something to some port. */
+static void handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOSerial *vser;
+ VirtIOSerialPort *port;
+
+ vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
+ port = find_port_by_vq(vser, vq);
+
+ if (!port || !port->host_connected) {
+ discard_vq_data(vq, vdev);
+ return;
+ }
+
+ if (!port->throttled) {
+ do_flush_queued_data(port, vq, vdev);
+ return;
+ }
+}
+
+static void handle_input(VirtIODevice *vdev, VirtQueue *vq)
+{
+}
+
+static uint32_t get_features(VirtIODevice *vdev, uint32_t features)
+{
+ VirtIOSerial *vser;
+
+ vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
+
+ if (vser->bus.max_nr_ports > 1) {
+ features |= (1 << VIRTIO_CONSOLE_F_MULTIPORT);
+ }
+ return features;
+}
+
+/* Guest requested config info */
+static void get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+ VirtIOSerial *vser;
+
+ vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
+ memcpy(config_data, &vser->config, sizeof(struct virtio_console_config));
+}
+
+static void set_config(VirtIODevice *vdev, const uint8_t *config_data)
+{
+ struct virtio_console_config config;
+
+ memcpy(&config, config_data, sizeof(config));
+}
+
+static void guest_reset(VirtIOSerial *vser)
+{
+ VirtIOSerialPort *port;
+ VirtIOSerialPortClass *vsc;
+
+ QTAILQ_FOREACH(port, &vser->ports, next) {
+ vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
+ if (port->guest_connected) {
+ port->guest_connected = false;
+ if (vsc->set_guest_connected) {
+ vsc->set_guest_connected(port, false);
+ }
+ }
+ }
+}
+
+static void set_status(VirtIODevice *vdev, uint8_t status)
+{
+ VirtIOSerial *vser;
+ VirtIOSerialPort *port;
+
+ vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
+ port = find_port_by_id(vser, 0);
+
+ if (port && !use_multiport(port->vser)
+ && (status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ /*
+ * Non-multiport guests won't be able to tell us guest
+ * open/close status. Such guests can only have a port at id
+ * 0, so set guest_connected for such ports as soon as guest
+ * is up.
+ */
+ port->guest_connected = true;
+ }
+ if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ guest_reset(vser);
+ }
+}
+
+static void vser_reset(VirtIODevice *vdev)
+{
+ VirtIOSerial *vser;
+
+ vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
+ guest_reset(vser);
+}
+
+static void virtio_serial_save(QEMUFile *f, void *opaque)
+{
+ VirtIOSerial *s = opaque;
+ VirtIOSerialPort *port;
+ uint32_t nr_active_ports;
+ unsigned int i, max_nr_ports;
+
+ /* The virtio device */
+ virtio_save(&s->vdev, f);
+
+ /* The config space */
+ qemu_put_be16s(f, &s->config.cols);
+ qemu_put_be16s(f, &s->config.rows);
+
+ qemu_put_be32s(f, &s->config.max_nr_ports);
+
+ /* The ports map */
+ max_nr_ports = tswap32(s->config.max_nr_ports);
+ for (i = 0; i < (max_nr_ports + 31) / 32; i++) {
+ qemu_put_be32s(f, &s->ports_map[i]);
+ }
+
+ /* Ports */
+
+ nr_active_ports = 0;
+ QTAILQ_FOREACH(port, &s->ports, next) {
+ nr_active_ports++;
+ }
+
+ qemu_put_be32s(f, &nr_active_ports);
+
+ /*
+ * Items in struct VirtIOSerialPort.
+ */
+ QTAILQ_FOREACH(port, &s->ports, next) {
+ uint32_t elem_popped;
+
+ qemu_put_be32s(f, &port->id);
+ qemu_put_byte(f, port->guest_connected);
+ qemu_put_byte(f, port->host_connected);
+
+ elem_popped = 0;
+ if (port->elem.out_num) {
+ elem_popped = 1;
+ }
+ qemu_put_be32s(f, &elem_popped);
+ if (elem_popped) {
+ qemu_put_be32s(f, &port->iov_idx);
+ qemu_put_be64s(f, &port->iov_offset);
+
+ qemu_put_buffer(f, (unsigned char *)&port->elem,
+ sizeof(port->elem));
+ }
+ }
+}
+
+static void virtio_serial_post_load_timer_cb(void *opaque)
+{
+ uint32_t i;
+ VirtIOSerial *s = opaque;
+ VirtIOSerialPort *port;
+ uint8_t host_connected;
+ VirtIOSerialPortClass *vsc;
+
+ if (!s->post_load) {
+ return;
+ }
+ for (i = 0 ; i < s->post_load->nr_active_ports; ++i) {
+ port = s->post_load->connected[i].port;
+ host_connected = s->post_load->connected[i].host_connected;
+ if (host_connected != port->host_connected) {
+ /*
+ * We have to let the guest know of the host connection
+ * status change
+ */
+ send_control_event(s, port->id, VIRTIO_CONSOLE_PORT_OPEN,
+ port->host_connected);
+ }
+ vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
+ if (vsc->set_guest_connected) {
+ vsc->set_guest_connected(port, port->guest_connected);
+ }
+ }
+ g_free(s->post_load->connected);
+ qemu_free_timer(s->post_load->timer);
+ g_free(s->post_load);
+ s->post_load = NULL;
+}
+
+static int fetch_active_ports_list(QEMUFile *f, int version_id,
+ VirtIOSerial *s, uint32_t nr_active_ports)
+{
+ uint32_t i;
+
+ s->post_load = g_malloc0(sizeof(*s->post_load));
+ s->post_load->nr_active_ports = nr_active_ports;
+ s->post_load->connected =
+ g_malloc0(sizeof(*s->post_load->connected) * nr_active_ports);
+
+ s->post_load->timer = qemu_new_timer_ns(vm_clock,
+ virtio_serial_post_load_timer_cb,
+ s);
+
+ /* Items in struct VirtIOSerialPort */
+ for (i = 0; i < nr_active_ports; i++) {
+ VirtIOSerialPort *port;
+ uint32_t id;
+
+ id = qemu_get_be32(f);
+ port = find_port_by_id(s, id);
+ if (!port) {
+ return -EINVAL;
+ }
+
+ port->guest_connected = qemu_get_byte(f);
+ s->post_load->connected[i].port = port;
+ s->post_load->connected[i].host_connected = qemu_get_byte(f);
+
+ if (version_id > 2) {
+ uint32_t elem_popped;
+
+ qemu_get_be32s(f, &elem_popped);
+ if (elem_popped) {
+ qemu_get_be32s(f, &port->iov_idx);
+ qemu_get_be64s(f, &port->iov_offset);
+
+ qemu_get_buffer(f, (unsigned char *)&port->elem,
+ sizeof(port->elem));
+ virtqueue_map_sg(port->elem.in_sg, port->elem.in_addr,
+ port->elem.in_num, 1);
+ virtqueue_map_sg(port->elem.out_sg, port->elem.out_addr,
+ port->elem.out_num, 1);
+
+ /*
+ * Port was throttled on source machine. Let's
+ * unthrottle it here so data starts flowing again.
+ */
+ virtio_serial_throttle_port(port, false);
+ }
+ }
+ }
+ qemu_mod_timer(s->post_load->timer, 1);
+ return 0;
+}
+
+static int virtio_serial_load(QEMUFile *f, void *opaque, int version_id)
+{
+ VirtIOSerial *s = opaque;
+ uint32_t max_nr_ports, nr_active_ports, ports_map;
+ unsigned int i;
+ int ret;
+
+ if (version_id > 3) {
+ return -EINVAL;
+ }
+
+ /* The virtio device */
+ ret = virtio_load(&s->vdev, f);
+ if (ret) {
+ return ret;
+ }
+
+ if (version_id < 2) {
+ return 0;
+ }
+
+ /* The config space */
+ qemu_get_be16s(f, &s->config.cols);
+ qemu_get_be16s(f, &s->config.rows);
+
+ qemu_get_be32s(f, &max_nr_ports);
+ tswap32s(&max_nr_ports);
+ if (max_nr_ports > tswap32(s->config.max_nr_ports)) {
+ /* Source could have had more ports than us. Fail migration. */
+ return -EINVAL;
+ }
+
+ for (i = 0; i < (max_nr_ports + 31) / 32; i++) {
+ qemu_get_be32s(f, &ports_map);
+
+ if (ports_map != s->ports_map[i]) {
+ /*
+ * Ports active on source and destination don't
+ * match. Fail migration.
+ */
+ return -EINVAL;
+ }
+ }
+
+ qemu_get_be32s(f, &nr_active_ports);
+
+ if (nr_active_ports) {
+ ret = fetch_active_ports_list(f, version_id, s, nr_active_ports);
+ if (ret) {
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void virtser_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent);
+
+static Property virtser_props[] = {
+ DEFINE_PROP_UINT32("nr", VirtIOSerialPort, id, VIRTIO_CONSOLE_BAD_ID),
+ DEFINE_PROP_STRING("name", VirtIOSerialPort, name),
+ DEFINE_PROP_END_OF_LIST()
+};
+
+#define TYPE_VIRTIO_SERIAL_BUS "virtio-serial-bus"
+#define VIRTIO_SERIAL_BUS(obj) \
+ OBJECT_CHECK(VirtIOSerialBus, (obj), TYPE_VIRTIO_SERIAL_BUS)
+
+static void virtser_bus_class_init(ObjectClass *klass, void *data)
+{
+ BusClass *k = BUS_CLASS(klass);
+ k->print_dev = virtser_bus_dev_print;
+}
+
+static const TypeInfo virtser_bus_info = {
+ .name = TYPE_VIRTIO_SERIAL_BUS,
+ .parent = TYPE_BUS,
+ .instance_size = sizeof(VirtIOSerialBus),
+ .class_init = virtser_bus_class_init,
+};
+
+static void virtser_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent)
+{
+ VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, qdev);
+
+ monitor_printf(mon, "%*sport %d, guest %s, host %s, throttle %s\n",
+ indent, "", port->id,
+ port->guest_connected ? "on" : "off",
+ port->host_connected ? "on" : "off",
+ port->throttled ? "on" : "off");
+}
+
+/* This function is only used if a port id is not provided by the user */
+static uint32_t find_free_port_id(VirtIOSerial *vser)
+{
+ unsigned int i, max_nr_ports;
+
+ max_nr_ports = tswap32(vser->config.max_nr_ports);
+ for (i = 0; i < (max_nr_ports + 31) / 32; i++) {
+ uint32_t map, bit;
+
+ map = vser->ports_map[i];
+ bit = ffs(~map);
+ if (bit) {
+ return (bit - 1) + i * 32;
+ }
+ }
+ return VIRTIO_CONSOLE_BAD_ID;
+}
+
+static void mark_port_added(VirtIOSerial *vser, uint32_t port_id)
+{
+ unsigned int i;
+
+ i = port_id / 32;
+ vser->ports_map[i] |= 1U << (port_id % 32);
+}
+
+static void add_port(VirtIOSerial *vser, uint32_t port_id)
+{
+ mark_port_added(vser, port_id);
+ send_control_event(vser, port_id, VIRTIO_CONSOLE_PORT_ADD, 1);
+}
+
+static void remove_port(VirtIOSerial *vser, uint32_t port_id)
+{
+ VirtIOSerialPort *port;
+ unsigned int i;
+
+ i = port_id / 32;
+ vser->ports_map[i] &= ~(1U << (port_id % 32));
+
+ port = find_port_by_id(vser, port_id);
+ /*
+ * This function is only called from qdev's unplug callback; if we
+ * get a NULL port here, we're in trouble.
+ */
+ assert(port);
+
+ /* Flush out any unconsumed buffers first */
+ discard_vq_data(port->ovq, &port->vser->vdev);
+
+ send_control_event(vser, port->id, VIRTIO_CONSOLE_PORT_REMOVE, 1);
+}
+
+static int virtser_port_qdev_init(DeviceState *qdev)
+{
+ VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, qdev);
+ VirtIOSerialPortClass *vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
+ VirtIOSerialBus *bus = DO_UPCAST(VirtIOSerialBus, qbus, qdev->parent_bus);
+ int ret, max_nr_ports;
+ bool plugging_port0;
+
+ port->vser = bus->vser;
+ port->bh = qemu_bh_new(flush_queued_data_bh, port);
+
+ assert(vsc->have_data);
+
+ /*
+ * Is the first console port we're seeing? If so, put it up at
+ * location 0. This is done for backward compatibility (old
+ * kernel, new qemu).
+ */
+ plugging_port0 = vsc->is_console && !find_port_by_id(port->vser, 0);
+
+ if (find_port_by_id(port->vser, port->id)) {
+ error_report("virtio-serial-bus: A port already exists at id %u",
+ port->id);
+ return -1;
+ }
+
+ if (port->id == VIRTIO_CONSOLE_BAD_ID) {
+ if (plugging_port0) {
+ port->id = 0;
+ } else {
+ port->id = find_free_port_id(port->vser);
+ if (port->id == VIRTIO_CONSOLE_BAD_ID) {
+ error_report("virtio-serial-bus: Maximum port limit for this device reached");
+ return -1;
+ }
+ }
+ }
+
+ max_nr_ports = tswap32(port->vser->config.max_nr_ports);
+ if (port->id >= max_nr_ports) {
+ error_report("virtio-serial-bus: Out-of-range port id specified, max. allowed: %u",
+ max_nr_ports - 1);
+ return -1;
+ }
+
+ ret = vsc->init(port);
+ if (ret) {
+ return ret;
+ }
+
+ port->elem.out_num = 0;
+
+ QTAILQ_INSERT_TAIL(&port->vser->ports, port, next);
+ port->ivq = port->vser->ivqs[port->id];
+ port->ovq = port->vser->ovqs[port->id];
+
+ add_port(port->vser, port->id);
+
+ /* Send an update to the guest about this new port added */
+ virtio_notify_config(&port->vser->vdev);
+
+ return ret;
+}
+
+static int virtser_port_qdev_exit(DeviceState *qdev)
+{
+ VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, qdev);
+ VirtIOSerialPortClass *vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
+ VirtIOSerial *vser = port->vser;
+
+ qemu_bh_delete(port->bh);
+ remove_port(port->vser, port->id);
+
+ QTAILQ_REMOVE(&vser->ports, port, next);
+
+ if (vsc->exit) {
+ vsc->exit(port);
+ }
+ return 0;
+}
+
+VirtIODevice *virtio_serial_init(DeviceState *dev, virtio_serial_conf *conf)
+{
+ VirtIOSerial *vser;
+ VirtIODevice *vdev;
+ uint32_t i, max_supported_ports;
+
+ if (!conf->max_virtserial_ports)
+ return NULL;
+
+ /* Each port takes 2 queues, and one pair is for the control queue */
+ max_supported_ports = VIRTIO_PCI_QUEUE_MAX / 2 - 1;
+
+ if (conf->max_virtserial_ports > max_supported_ports) {
+ error_report("maximum ports supported: %u", max_supported_ports);
+ return NULL;
+ }
+
+ vdev = virtio_common_init("virtio-serial", VIRTIO_ID_CONSOLE,
+ sizeof(struct virtio_console_config),
+ sizeof(VirtIOSerial));
+
+ vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
+
+ /* Spawn a new virtio-serial bus on which the ports will ride as devices */
+ qbus_create_inplace(&vser->bus.qbus, TYPE_VIRTIO_SERIAL_BUS, dev, NULL);
+ vser->bus.qbus.allow_hotplug = 1;
+ vser->bus.vser = vser;
+ QTAILQ_INIT(&vser->ports);
+
+ vser->bus.max_nr_ports = conf->max_virtserial_ports;
+ vser->ivqs = g_malloc(conf->max_virtserial_ports * sizeof(VirtQueue *));
+ vser->ovqs = g_malloc(conf->max_virtserial_ports * sizeof(VirtQueue *));
+
+ /* Add a queue for host to guest transfers for port 0 (backward compat) */
+ vser->ivqs[0] = virtio_add_queue(vdev, 128, handle_input);
+ /* Add a queue for guest to host transfers for port 0 (backward compat) */
+ vser->ovqs[0] = virtio_add_queue(vdev, 128, handle_output);
+
+ /* TODO: host to guest notifications can get dropped
+ * if the queue fills up. Implement queueing in host,
+ * this might also make it possible to reduce the control
+ * queue size: as guest preposts buffers there,
+ * this will save 4Kbyte of guest memory per entry. */
+
+ /* control queue: host to guest */
+ vser->c_ivq = virtio_add_queue(vdev, 32, control_in);
+ /* control queue: guest to host */
+ vser->c_ovq = virtio_add_queue(vdev, 32, control_out);
+
+ for (i = 1; i < vser->bus.max_nr_ports; i++) {
+ /* Add a per-port queue for host to guest transfers */
+ vser->ivqs[i] = virtio_add_queue(vdev, 128, handle_input);
+ /* Add a per-per queue for guest to host transfers */
+ vser->ovqs[i] = virtio_add_queue(vdev, 128, handle_output);
+ }
+
+ vser->config.max_nr_ports = tswap32(conf->max_virtserial_ports);
+ vser->ports_map = g_malloc0(((conf->max_virtserial_ports + 31) / 32)
+ * sizeof(vser->ports_map[0]));
+ /*
+ * Reserve location 0 for a console port for backward compat
+ * (old kernel, new qemu)
+ */
+ mark_port_added(vser, 0);
+
+ vser->vdev.get_features = get_features;
+ vser->vdev.get_config = get_config;
+ vser->vdev.set_config = set_config;
+ vser->vdev.set_status = set_status;
+ vser->vdev.reset = vser_reset;
+
+ vser->qdev = dev;
+
+ vser->post_load = NULL;
+
+ /*
+ * Register for the savevm section with the virtio-console name
+ * to preserve backward compat
+ */
+ register_savevm(dev, "virtio-console", -1, 3, virtio_serial_save,
+ virtio_serial_load, vser);
+
+ return vdev;
+}
+
+void virtio_serial_exit(VirtIODevice *vdev)
+{
+ VirtIOSerial *vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
+
+ unregister_savevm(vser->qdev, "virtio-console", vser);
+
+ g_free(vser->ivqs);
+ g_free(vser->ovqs);
+ g_free(vser->ports_map);
+ if (vser->post_load) {
+ g_free(vser->post_load->connected);
+ qemu_del_timer(vser->post_load->timer);
+ qemu_free_timer(vser->post_load->timer);
+ g_free(vser->post_load);
+ }
+ virtio_cleanup(vdev);
+}
+
+static void virtio_serial_port_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *k = DEVICE_CLASS(klass);
+ k->init = virtser_port_qdev_init;
+ k->bus_type = TYPE_VIRTIO_SERIAL_BUS;
+ k->exit = virtser_port_qdev_exit;
+ k->unplug = qdev_simple_unplug_cb;
+ k->props = virtser_props;
+}
+
+static const TypeInfo virtio_serial_port_type_info = {
+ .name = TYPE_VIRTIO_SERIAL_PORT,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(VirtIOSerialPort),
+ .abstract = true,
+ .class_size = sizeof(VirtIOSerialPortClass),
+ .class_init = virtio_serial_port_class_init,
+};
+
+static void virtio_serial_register_types(void)
+{
+ type_register_static(&virtser_bus_info);
+ type_register_static(&virtio_serial_port_type_info);
+}
+
+type_init(virtio_serial_register_types)
+++ /dev/null
-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += hostmem.o vring.o ioq.o virtio-blk.o
+++ /dev/null
-/*
- * Thread-safe guest to host memory mapping
- *
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "exec/address-spaces.h"
-#include "hw/virtio/dataplane/hostmem.h"
-
-static int hostmem_lookup_cmp(const void *phys_, const void *region_)
-{
- hwaddr phys = *(const hwaddr *)phys_;
- const HostMemRegion *region = region_;
-
- if (phys < region->guest_addr) {
- return -1;
- } else if (phys >= region->guest_addr + region->size) {
- return 1;
- } else {
- return 0;
- }
-}
-
-/**
- * Map guest physical address to host pointer
- */
-void *hostmem_lookup(HostMem *hostmem, hwaddr phys, hwaddr len, bool is_write)
-{
- HostMemRegion *region;
- void *host_addr = NULL;
- hwaddr offset_within_region;
-
- qemu_mutex_lock(&hostmem->current_regions_lock);
- region = bsearch(&phys, hostmem->current_regions,
- hostmem->num_current_regions,
- sizeof(hostmem->current_regions[0]),
- hostmem_lookup_cmp);
- if (!region) {
- goto out;
- }
- if (is_write && region->readonly) {
- goto out;
- }
- offset_within_region = phys - region->guest_addr;
- if (len <= region->size - offset_within_region) {
- host_addr = region->host_addr + offset_within_region;
- }
-out:
- qemu_mutex_unlock(&hostmem->current_regions_lock);
-
- return host_addr;
-}
-
-/**
- * Install new regions list
- */
-static void hostmem_listener_commit(MemoryListener *listener)
-{
- HostMem *hostmem = container_of(listener, HostMem, listener);
-
- qemu_mutex_lock(&hostmem->current_regions_lock);
- g_free(hostmem->current_regions);
- hostmem->current_regions = hostmem->new_regions;
- hostmem->num_current_regions = hostmem->num_new_regions;
- qemu_mutex_unlock(&hostmem->current_regions_lock);
-
- /* Reset new regions list */
- hostmem->new_regions = NULL;
- hostmem->num_new_regions = 0;
-}
-
-/**
- * Add a MemoryRegionSection to the new regions list
- */
-static void hostmem_append_new_region(HostMem *hostmem,
- MemoryRegionSection *section)
-{
- void *ram_ptr = memory_region_get_ram_ptr(section->mr);
- size_t num = hostmem->num_new_regions;
- size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]);
-
- hostmem->new_regions = g_realloc(hostmem->new_regions, new_size);
- hostmem->new_regions[num] = (HostMemRegion){
- .host_addr = ram_ptr + section->offset_within_region,
- .guest_addr = section->offset_within_address_space,
- .size = section->size,
- .readonly = section->readonly,
- };
- hostmem->num_new_regions++;
-}
-
-static void hostmem_listener_append_region(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- HostMem *hostmem = container_of(listener, HostMem, listener);
-
- /* Ignore non-RAM regions, we may not be able to map them */
- if (!memory_region_is_ram(section->mr)) {
- return;
- }
-
- /* Ignore regions with dirty logging, we cannot mark them dirty */
- if (memory_region_is_logging(section->mr)) {
- return;
- }
-
- hostmem_append_new_region(hostmem, section);
-}
-
-/* We don't implement most MemoryListener callbacks, use these nop stubs */
-static void hostmem_listener_dummy(MemoryListener *listener)
-{
-}
-
-static void hostmem_listener_section_dummy(MemoryListener *listener,
- MemoryRegionSection *section)
-{
-}
-
-static void hostmem_listener_eventfd_dummy(MemoryListener *listener,
- MemoryRegionSection *section,
- bool match_data, uint64_t data,
- EventNotifier *e)
-{
-}
-
-static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener,
- MemoryRegionSection *section,
- hwaddr addr, hwaddr len)
-{
-}
-
-void hostmem_init(HostMem *hostmem)
-{
- memset(hostmem, 0, sizeof(*hostmem));
-
- qemu_mutex_init(&hostmem->current_regions_lock);
-
- hostmem->listener = (MemoryListener){
- .begin = hostmem_listener_dummy,
- .commit = hostmem_listener_commit,
- .region_add = hostmem_listener_append_region,
- .region_del = hostmem_listener_section_dummy,
- .region_nop = hostmem_listener_append_region,
- .log_start = hostmem_listener_section_dummy,
- .log_stop = hostmem_listener_section_dummy,
- .log_sync = hostmem_listener_section_dummy,
- .log_global_start = hostmem_listener_dummy,
- .log_global_stop = hostmem_listener_dummy,
- .eventfd_add = hostmem_listener_eventfd_dummy,
- .eventfd_del = hostmem_listener_eventfd_dummy,
- .coalesced_mmio_add = hostmem_listener_coalesced_mmio_dummy,
- .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy,
- .priority = 10,
- };
-
- memory_listener_register(&hostmem->listener, &address_space_memory);
- if (hostmem->num_new_regions > 0) {
- hostmem_listener_commit(&hostmem->listener);
- }
-}
-
-void hostmem_finalize(HostMem *hostmem)
-{
- memory_listener_unregister(&hostmem->listener);
- g_free(hostmem->new_regions);
- g_free(hostmem->current_regions);
- qemu_mutex_destroy(&hostmem->current_regions_lock);
-}
+++ /dev/null
-/*
- * Linux AIO request queue
- *
- * Copyright 2012 IBM, Corp.
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "ioq.h"
-
-void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs)
-{
- int rc;
-
- ioq->fd = fd;
- ioq->max_reqs = max_reqs;
-
- memset(&ioq->io_ctx, 0, sizeof ioq->io_ctx);
- rc = io_setup(max_reqs, &ioq->io_ctx);
- if (rc != 0) {
- fprintf(stderr, "ioq io_setup failed %d\n", rc);
- exit(1);
- }
-
- rc = event_notifier_init(&ioq->io_notifier, 0);
- if (rc != 0) {
- fprintf(stderr, "ioq io event notifier creation failed %d\n", rc);
- exit(1);
- }
-
- ioq->freelist = g_malloc0(sizeof ioq->freelist[0] * max_reqs);
- ioq->freelist_idx = 0;
-
- ioq->queue = g_malloc0(sizeof ioq->queue[0] * max_reqs);
- ioq->queue_idx = 0;
-}
-
-void ioq_cleanup(IOQueue *ioq)
-{
- g_free(ioq->freelist);
- g_free(ioq->queue);
-
- event_notifier_cleanup(&ioq->io_notifier);
- io_destroy(ioq->io_ctx);
-}
-
-EventNotifier *ioq_get_notifier(IOQueue *ioq)
-{
- return &ioq->io_notifier;
-}
-
-struct iocb *ioq_get_iocb(IOQueue *ioq)
-{
- /* Underflow cannot happen since ioq is sized for max_reqs */
- assert(ioq->freelist_idx != 0);
-
- struct iocb *iocb = ioq->freelist[--ioq->freelist_idx];
- ioq->queue[ioq->queue_idx++] = iocb;
- return iocb;
-}
-
-void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb)
-{
- /* Overflow cannot happen since ioq is sized for max_reqs */
- assert(ioq->freelist_idx != ioq->max_reqs);
-
- ioq->freelist[ioq->freelist_idx++] = iocb;
-}
-
-struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov,
- unsigned int count, long long offset)
-{
- struct iocb *iocb = ioq_get_iocb(ioq);
-
- if (read) {
- io_prep_preadv(iocb, ioq->fd, iov, count, offset);
- } else {
- io_prep_pwritev(iocb, ioq->fd, iov, count, offset);
- }
- io_set_eventfd(iocb, event_notifier_get_fd(&ioq->io_notifier));
- return iocb;
-}
-
-int ioq_submit(IOQueue *ioq)
-{
- int rc = io_submit(ioq->io_ctx, ioq->queue_idx, ioq->queue);
- ioq->queue_idx = 0; /* reset */
- return rc;
-}
-
-int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion,
- void *opaque)
-{
- struct io_event events[ioq->max_reqs];
- int nevents, i;
-
- do {
- nevents = io_getevents(ioq->io_ctx, 0, ioq->max_reqs, events, NULL);
- } while (nevents < 0 && errno == EINTR);
- if (nevents < 0) {
- return nevents;
- }
-
- for (i = 0; i < nevents; i++) {
- ssize_t ret = ((uint64_t)events[i].res2 << 32) | events[i].res;
-
- completion(events[i].obj, ret, opaque);
- ioq_put_iocb(ioq, events[i].obj);
- }
- return nevents;
-}
+++ /dev/null
-/*
- * Linux AIO request queue
- *
- * Copyright 2012 IBM, Corp.
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef IOQ_H
-#define IOQ_H
-
-#include <libaio.h>
-#include "qemu/event_notifier.h"
-
-typedef struct {
- int fd; /* file descriptor */
- unsigned int max_reqs; /* max length of freelist and queue */
-
- io_context_t io_ctx; /* Linux AIO context */
- EventNotifier io_notifier; /* Linux AIO eventfd */
-
- /* Requests can complete in any order so a free list is necessary to manage
- * available iocbs.
- */
- struct iocb **freelist; /* free iocbs */
- unsigned int freelist_idx;
-
- /* Multiple requests are queued up before submitting them all in one go */
- struct iocb **queue; /* queued iocbs */
- unsigned int queue_idx;
-} IOQueue;
-
-void ioq_init(IOQueue *ioq, int fd, unsigned int max_reqs);
-void ioq_cleanup(IOQueue *ioq);
-EventNotifier *ioq_get_notifier(IOQueue *ioq);
-struct iocb *ioq_get_iocb(IOQueue *ioq);
-void ioq_put_iocb(IOQueue *ioq, struct iocb *iocb);
-struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov,
- unsigned int count, long long offset);
-int ioq_submit(IOQueue *ioq);
-
-static inline unsigned int ioq_num_queued(IOQueue *ioq)
-{
- return ioq->queue_idx;
-}
-
-typedef void IOQueueCompletion(struct iocb *iocb, ssize_t ret, void *opaque);
-int ioq_run_completion(IOQueue *ioq, IOQueueCompletion *completion,
- void *opaque);
-
-#endif /* IOQ_H */
+++ /dev/null
-/*
- * Dedicated thread for virtio-blk I/O processing
- *
- * Copyright 2012 IBM, Corp.
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "trace.h"
-#include "qemu/iov.h"
-#include "qemu/thread.h"
-#include "qemu/error-report.h"
-#include "hw/virtio/dataplane/vring.h"
-#include "ioq.h"
-#include "migration/migration.h"
-#include "block/block.h"
-#include "hw/virtio/virtio-blk.h"
-#include "virtio-blk.h"
-#include "block/aio.h"
-
-enum {
- SEG_MAX = 126, /* maximum number of I/O segments */
- VRING_MAX = SEG_MAX + 2, /* maximum number of vring descriptors */
- REQ_MAX = VRING_MAX, /* maximum number of requests in the vring,
- * is VRING_MAX / 2 with traditional and
- * VRING_MAX with indirect descriptors */
-};
-
-typedef struct {
- struct iocb iocb; /* Linux AIO control block */
- QEMUIOVector *inhdr; /* iovecs for virtio_blk_inhdr */
- unsigned int head; /* vring descriptor index */
- struct iovec *bounce_iov; /* used if guest buffers are unaligned */
- QEMUIOVector *read_qiov; /* for read completion /w bounce buffer */
-} VirtIOBlockRequest;
-
-struct VirtIOBlockDataPlane {
- bool started;
- bool stopping;
- QEMUBH *start_bh;
- QemuThread thread;
-
- VirtIOBlkConf *blk;
- int fd; /* image file descriptor */
-
- VirtIODevice *vdev;
- Vring vring; /* virtqueue vring */
- EventNotifier *guest_notifier; /* irq */
-
- /* Note that these EventNotifiers are assigned by value. This is
- * fine as long as you do not call event_notifier_cleanup on them
- * (because you don't own the file descriptor or handle; you just
- * use it).
- */
- AioContext *ctx;
- EventNotifier io_notifier; /* Linux AIO completion */
- EventNotifier host_notifier; /* doorbell */
-
- IOQueue ioqueue; /* Linux AIO queue (should really be per
- dataplane thread) */
- VirtIOBlockRequest requests[REQ_MAX]; /* pool of requests, managed by the
- queue */
-
- unsigned int num_reqs;
-
- Error *migration_blocker;
-};
-
-/* Raise an interrupt to signal guest, if necessary */
-static void notify_guest(VirtIOBlockDataPlane *s)
-{
- if (!vring_should_notify(s->vdev, &s->vring)) {
- return;
- }
-
- event_notifier_set(s->guest_notifier);
-}
-
-static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
-{
- VirtIOBlockDataPlane *s = opaque;
- VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
- struct virtio_blk_inhdr hdr;
- int len;
-
- if (likely(ret >= 0)) {
- hdr.status = VIRTIO_BLK_S_OK;
- len = ret;
- } else {
- hdr.status = VIRTIO_BLK_S_IOERR;
- len = 0;
- }
-
- trace_virtio_blk_data_plane_complete_request(s, req->head, ret);
-
- if (req->read_qiov) {
- assert(req->bounce_iov);
- qemu_iovec_from_buf(req->read_qiov, 0, req->bounce_iov->iov_base, len);
- qemu_iovec_destroy(req->read_qiov);
- g_slice_free(QEMUIOVector, req->read_qiov);
- }
-
- if (req->bounce_iov) {
- qemu_vfree(req->bounce_iov->iov_base);
- g_slice_free(struct iovec, req->bounce_iov);
- }
-
- qemu_iovec_from_buf(req->inhdr, 0, &hdr, sizeof(hdr));
- qemu_iovec_destroy(req->inhdr);
- g_slice_free(QEMUIOVector, req->inhdr);
-
- /* According to the virtio specification len should be the number of bytes
- * written to, but for virtio-blk it seems to be the number of bytes
- * transferred plus the status bytes.
- */
- vring_push(&s->vring, req->head, len + sizeof(hdr));
-
- s->num_reqs--;
-}
-
-static void complete_request_early(VirtIOBlockDataPlane *s, unsigned int head,
- QEMUIOVector *inhdr, unsigned char status)
-{
- struct virtio_blk_inhdr hdr = {
- .status = status,
- };
-
- qemu_iovec_from_buf(inhdr, 0, &hdr, sizeof(hdr));
- qemu_iovec_destroy(inhdr);
- g_slice_free(QEMUIOVector, inhdr);
-
- vring_push(&s->vring, head, sizeof(hdr));
- notify_guest(s);
-}
-
-/* Get disk serial number */
-static void do_get_id_cmd(VirtIOBlockDataPlane *s,
- struct iovec *iov, unsigned int iov_cnt,
- unsigned int head, QEMUIOVector *inhdr)
-{
- char id[VIRTIO_BLK_ID_BYTES];
-
- /* Serial number not NUL-terminated when shorter than buffer */
- strncpy(id, s->blk->serial ? s->blk->serial : "", sizeof(id));
- iov_from_buf(iov, iov_cnt, 0, id, sizeof(id));
- complete_request_early(s, head, inhdr, VIRTIO_BLK_S_OK);
-}
-
-static int do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
- struct iovec *iov, unsigned int iov_cnt,
- long long offset, unsigned int head,
- QEMUIOVector *inhdr)
-{
- struct iocb *iocb;
- QEMUIOVector qiov;
- struct iovec *bounce_iov = NULL;
- QEMUIOVector *read_qiov = NULL;
-
- qemu_iovec_init_external(&qiov, iov, iov_cnt);
- if (!bdrv_qiov_is_aligned(s->blk->conf.bs, &qiov)) {
- void *bounce_buffer = qemu_blockalign(s->blk->conf.bs, qiov.size);
-
- if (read) {
- /* Need to copy back from bounce buffer on completion */
- read_qiov = g_slice_new(QEMUIOVector);
- qemu_iovec_init(read_qiov, iov_cnt);
- qemu_iovec_concat_iov(read_qiov, iov, iov_cnt, 0, qiov.size);
- } else {
- qemu_iovec_to_buf(&qiov, 0, bounce_buffer, qiov.size);
- }
-
- /* Redirect I/O to aligned bounce buffer */
- bounce_iov = g_slice_new(struct iovec);
- bounce_iov->iov_base = bounce_buffer;
- bounce_iov->iov_len = qiov.size;
- iov = bounce_iov;
- iov_cnt = 1;
- }
-
- iocb = ioq_rdwr(&s->ioqueue, read, iov, iov_cnt, offset);
-
- /* Fill in virtio block metadata needed for completion */
- VirtIOBlockRequest *req = container_of(iocb, VirtIOBlockRequest, iocb);
- req->head = head;
- req->inhdr = inhdr;
- req->bounce_iov = bounce_iov;
- req->read_qiov = read_qiov;
- return 0;
-}
-
-static int process_request(IOQueue *ioq, struct iovec iov[],
- unsigned int out_num, unsigned int in_num,
- unsigned int head)
-{
- VirtIOBlockDataPlane *s = container_of(ioq, VirtIOBlockDataPlane, ioqueue);
- struct iovec *in_iov = &iov[out_num];
- struct virtio_blk_outhdr outhdr;
- QEMUIOVector *inhdr;
- size_t in_size;
-
- /* Copy in outhdr */
- if (unlikely(iov_to_buf(iov, out_num, 0, &outhdr,
- sizeof(outhdr)) != sizeof(outhdr))) {
- error_report("virtio-blk request outhdr too short");
- return -EFAULT;
- }
- iov_discard_front(&iov, &out_num, sizeof(outhdr));
-
- /* Grab inhdr for later */
- in_size = iov_size(in_iov, in_num);
- if (in_size < sizeof(struct virtio_blk_inhdr)) {
- error_report("virtio_blk request inhdr too short");
- return -EFAULT;
- }
- inhdr = g_slice_new(QEMUIOVector);
- qemu_iovec_init(inhdr, 1);
- qemu_iovec_concat_iov(inhdr, in_iov, in_num,
- in_size - sizeof(struct virtio_blk_inhdr),
- sizeof(struct virtio_blk_inhdr));
- iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
-
- /* TODO Linux sets the barrier bit even when not advertised! */
- outhdr.type &= ~VIRTIO_BLK_T_BARRIER;
-
- switch (outhdr.type) {
- case VIRTIO_BLK_T_IN:
- do_rdwr_cmd(s, true, in_iov, in_num, outhdr.sector * 512, head, inhdr);
- return 0;
-
- case VIRTIO_BLK_T_OUT:
- do_rdwr_cmd(s, false, iov, out_num, outhdr.sector * 512, head, inhdr);
- return 0;
-
- case VIRTIO_BLK_T_SCSI_CMD:
- /* TODO support SCSI commands */
- complete_request_early(s, head, inhdr, VIRTIO_BLK_S_UNSUPP);
- return 0;
-
- case VIRTIO_BLK_T_FLUSH:
- /* TODO fdsync not supported by Linux AIO, do it synchronously here! */
- if (qemu_fdatasync(s->fd) < 0) {
- complete_request_early(s, head, inhdr, VIRTIO_BLK_S_IOERR);
- } else {
- complete_request_early(s, head, inhdr, VIRTIO_BLK_S_OK);
- }
- return 0;
-
- case VIRTIO_BLK_T_GET_ID:
- do_get_id_cmd(s, in_iov, in_num, head, inhdr);
- return 0;
-
- default:
- error_report("virtio-blk unsupported request type %#x", outhdr.type);
- qemu_iovec_destroy(inhdr);
- g_slice_free(QEMUIOVector, inhdr);
- return -EFAULT;
- }
-}
-
-static int flush_true(EventNotifier *e)
-{
- return true;
-}
-
-static void handle_notify(EventNotifier *e)
-{
- VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
- host_notifier);
-
- /* There is one array of iovecs into which all new requests are extracted
- * from the vring. Requests are read from the vring and the translated
- * descriptors are written to the iovecs array. The iovecs do not have to
- * persist across handle_notify() calls because the kernel copies the
- * iovecs on io_submit().
- *
- * Handling io_submit() EAGAIN may require storing the requests across
- * handle_notify() calls until the kernel has sufficient resources to
- * accept more I/O. This is not implemented yet.
- */
- struct iovec iovec[VRING_MAX];
- struct iovec *end = &iovec[VRING_MAX];
- struct iovec *iov = iovec;
-
- /* When a request is read from the vring, the index of the first descriptor
- * (aka head) is returned so that the completed request can be pushed onto
- * the vring later.
- *
- * The number of hypervisor read-only iovecs is out_num. The number of
- * hypervisor write-only iovecs is in_num.
- */
- int head;
- unsigned int out_num = 0, in_num = 0;
- unsigned int num_queued;
-
- event_notifier_test_and_clear(&s->host_notifier);
- for (;;) {
- /* Disable guest->host notifies to avoid unnecessary vmexits */
- vring_disable_notification(s->vdev, &s->vring);
-
- for (;;) {
- head = vring_pop(s->vdev, &s->vring, iov, end, &out_num, &in_num);
- if (head < 0) {
- break; /* no more requests */
- }
-
- trace_virtio_blk_data_plane_process_request(s, out_num, in_num,
- head);
-
- if (process_request(&s->ioqueue, iov, out_num, in_num, head) < 0) {
- vring_set_broken(&s->vring);
- break;
- }
- iov += out_num + in_num;
- }
-
- if (likely(head == -EAGAIN)) { /* vring emptied */
- /* Re-enable guest->host notifies and stop processing the vring.
- * But if the guest has snuck in more descriptors, keep processing.
- */
- if (vring_enable_notification(s->vdev, &s->vring)) {
- break;
- }
- } else { /* head == -ENOBUFS or fatal error, iovecs[] is depleted */
- /* Since there are no iovecs[] left, stop processing for now. Do
- * not re-enable guest->host notifies since the I/O completion
- * handler knows to check for more vring descriptors anyway.
- */
- break;
- }
- }
-
- num_queued = ioq_num_queued(&s->ioqueue);
- if (num_queued > 0) {
- s->num_reqs += num_queued;
-
- int rc = ioq_submit(&s->ioqueue);
- if (unlikely(rc < 0)) {
- fprintf(stderr, "ioq_submit failed %d\n", rc);
- exit(1);
- }
- }
-}
-
-static int flush_io(EventNotifier *e)
-{
- VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
- io_notifier);
-
- return s->num_reqs > 0;
-}
-
-static void handle_io(EventNotifier *e)
-{
- VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
- io_notifier);
-
- event_notifier_test_and_clear(&s->io_notifier);
- if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
- notify_guest(s);
- }
-
- /* If there were more requests than iovecs, the vring will not be empty yet
- * so check again. There should now be enough resources to process more
- * requests.
- */
- if (unlikely(vring_more_avail(&s->vring))) {
- handle_notify(&s->host_notifier);
- }
-}
-
-static void *data_plane_thread(void *opaque)
-{
- VirtIOBlockDataPlane *s = opaque;
-
- do {
- aio_poll(s->ctx, true);
- } while (!s->stopping || s->num_reqs > 0);
- return NULL;
-}
-
-static void start_data_plane_bh(void *opaque)
-{
- VirtIOBlockDataPlane *s = opaque;
-
- qemu_bh_delete(s->start_bh);
- s->start_bh = NULL;
- qemu_thread_create(&s->thread, data_plane_thread,
- s, QEMU_THREAD_JOINABLE);
-}
-
-bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
- VirtIOBlockDataPlane **dataplane)
-{
- VirtIOBlockDataPlane *s;
- int fd;
-
- *dataplane = NULL;
-
- if (!blk->data_plane) {
- return true;
- }
-
- if (blk->scsi) {
- error_report("device is incompatible with x-data-plane, use scsi=off");
- return false;
- }
-
- if (blk->config_wce) {
- error_report("device is incompatible with x-data-plane, "
- "use config-wce=off");
- return false;
- }
-
- fd = raw_get_aio_fd(blk->conf.bs);
- if (fd < 0) {
- error_report("drive is incompatible with x-data-plane, "
- "use format=raw,cache=none,aio=native");
- return false;
- }
-
- s = g_new0(VirtIOBlockDataPlane, 1);
- s->vdev = vdev;
- s->fd = fd;
- s->blk = blk;
-
- /* Prevent block operations that conflict with data plane thread */
- bdrv_set_in_use(blk->conf.bs, 1);
-
- error_setg(&s->migration_blocker,
- "x-data-plane does not support migration");
- migrate_add_blocker(s->migration_blocker);
-
- *dataplane = s;
- return true;
-}
-
-void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
-{
- if (!s) {
- return;
- }
-
- virtio_blk_data_plane_stop(s);
- migrate_del_blocker(s->migration_blocker);
- error_free(s->migration_blocker);
- bdrv_set_in_use(s->blk->conf.bs, 0);
- g_free(s);
-}
-
-void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
-{
- VirtQueue *vq;
- int i;
-
- if (s->started) {
- return;
- }
-
- vq = virtio_get_queue(s->vdev, 0);
- if (!vring_setup(&s->vring, s->vdev, 0)) {
- return;
- }
-
- s->ctx = aio_context_new();
-
- /* Set up guest notifier (irq) */
- if (s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1,
- true) != 0) {
- fprintf(stderr, "virtio-blk failed to set guest notifier, "
- "ensure -enable-kvm is set\n");
- exit(1);
- }
- s->guest_notifier = virtio_queue_get_guest_notifier(vq);
-
- /* Set up virtqueue notify */
- if (s->vdev->binding->set_host_notifier(s->vdev->binding_opaque,
- 0, true) != 0) {
- fprintf(stderr, "virtio-blk failed to set host notifier\n");
- exit(1);
- }
- s->host_notifier = *virtio_queue_get_host_notifier(vq);
- aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify, flush_true);
-
- /* Set up ioqueue */
- ioq_init(&s->ioqueue, s->fd, REQ_MAX);
- for (i = 0; i < ARRAY_SIZE(s->requests); i++) {
- ioq_put_iocb(&s->ioqueue, &s->requests[i].iocb);
- }
- s->io_notifier = *ioq_get_notifier(&s->ioqueue);
- aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io, flush_io);
-
- s->started = true;
- trace_virtio_blk_data_plane_start(s);
-
- /* Kick right away to begin processing requests already in vring */
- event_notifier_set(virtio_queue_get_host_notifier(vq));
-
- /* Spawn thread in BH so it inherits iothread cpusets */
- s->start_bh = qemu_bh_new(start_data_plane_bh, s);
- qemu_bh_schedule(s->start_bh);
-}
-
-void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
-{
- if (!s->started || s->stopping) {
- return;
- }
- s->stopping = true;
- trace_virtio_blk_data_plane_stop(s);
-
- /* Stop thread or cancel pending thread creation BH */
- if (s->start_bh) {
- qemu_bh_delete(s->start_bh);
- s->start_bh = NULL;
- } else {
- aio_notify(s->ctx);
- qemu_thread_join(&s->thread);
- }
-
- aio_set_event_notifier(s->ctx, &s->io_notifier, NULL, NULL);
- ioq_cleanup(&s->ioqueue);
-
- aio_set_event_notifier(s->ctx, &s->host_notifier, NULL, NULL);
- s->vdev->binding->set_host_notifier(s->vdev->binding_opaque, 0, false);
-
- aio_context_unref(s->ctx);
-
- /* Clean up guest notifier (irq) */
- s->vdev->binding->set_guest_notifiers(s->vdev->binding_opaque, 1, false);
-
- vring_teardown(&s->vring);
- s->started = false;
- s->stopping = false;
-}
+++ /dev/null
-/*
- * Dedicated thread for virtio-blk I/O processing
- *
- * Copyright 2012 IBM, Corp.
- * Copyright 2012 Red Hat, Inc. and/or its affiliates
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef HW_DATAPLANE_VIRTIO_BLK_H
-#define HW_DATAPLANE_VIRTIO_BLK_H
-
-#include "hw/virtio/virtio.h"
-
-typedef struct VirtIOBlockDataPlane VirtIOBlockDataPlane;
-
-bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
- VirtIOBlockDataPlane **dataplane);
-void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s);
-void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s);
-void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s);
-void virtio_blk_data_plane_drain(VirtIOBlockDataPlane *s);
-
-#endif /* HW_DATAPLANE_VIRTIO_BLK_H */
+++ /dev/null
-/* Copyright 2012 Red Hat, Inc.
- * Copyright IBM, Corp. 2012
- *
- * Based on Linux 2.6.39 vhost code:
- * Copyright (C) 2009 Red Hat, Inc.
- * Copyright (C) 2006 Rusty Russell IBM Corporation
- *
- * Author: Michael S. Tsirkin <mst@redhat.com>
- * Stefan Hajnoczi <stefanha@redhat.com>
- *
- * Inspiration, some code, and most witty comments come from
- * Documentation/virtual/lguest/lguest.c, by Rusty Russell
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- */
-
-#include "trace.h"
-#include "hw/virtio/dataplane/vring.h"
-#include "qemu/error-report.h"
-
-/* Map the guest's vring to host memory */
-bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
-{
- hwaddr vring_addr = virtio_queue_get_ring_addr(vdev, n);
- hwaddr vring_size = virtio_queue_get_ring_size(vdev, n);
- void *vring_ptr;
-
- vring->broken = false;
-
- hostmem_init(&vring->hostmem);
- vring_ptr = hostmem_lookup(&vring->hostmem, vring_addr, vring_size, true);
- if (!vring_ptr) {
- error_report("Failed to map vring "
- "addr %#" HWADDR_PRIx " size %" HWADDR_PRIu,
- vring_addr, vring_size);
- vring->broken = true;
- return false;
- }
-
- vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096);
-
- vring->last_avail_idx = 0;
- vring->last_used_idx = 0;
- vring->signalled_used = 0;
- vring->signalled_used_valid = false;
-
- trace_vring_setup(virtio_queue_get_ring_addr(vdev, n),
- vring->vr.desc, vring->vr.avail, vring->vr.used);
- return true;
-}
-
-void vring_teardown(Vring *vring)
-{
- hostmem_finalize(&vring->hostmem);
-}
-
-/* Disable guest->host notifies */
-void vring_disable_notification(VirtIODevice *vdev, Vring *vring)
-{
- if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
- vring->vr.used->flags |= VRING_USED_F_NO_NOTIFY;
- }
-}
-
-/* Enable guest->host notifies
- *
- * Return true if the vring is empty, false if there are more requests.
- */
-bool vring_enable_notification(VirtIODevice *vdev, Vring *vring)
-{
- if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
- vring_avail_event(&vring->vr) = vring->vr.avail->idx;
- } else {
- vring->vr.used->flags &= ~VRING_USED_F_NO_NOTIFY;
- }
- smp_mb(); /* ensure update is seen before reading avail_idx */
- return !vring_more_avail(vring);
-}
-
-/* This is stolen from linux/drivers/vhost/vhost.c:vhost_notify() */
-bool vring_should_notify(VirtIODevice *vdev, Vring *vring)
-{
- uint16_t old, new;
- bool v;
- /* Flush out used index updates. This is paired
- * with the barrier that the Guest executes when enabling
- * interrupts. */
- smp_mb();
-
- if ((vdev->guest_features & VIRTIO_F_NOTIFY_ON_EMPTY) &&
- unlikely(vring->vr.avail->idx == vring->last_avail_idx)) {
- return true;
- }
-
- if (!(vdev->guest_features & VIRTIO_RING_F_EVENT_IDX)) {
- return !(vring->vr.avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
- }
- old = vring->signalled_used;
- v = vring->signalled_used_valid;
- new = vring->signalled_used = vring->last_used_idx;
- vring->signalled_used_valid = true;
-
- if (unlikely(!v)) {
- return true;
- }
-
- return vring_need_event(vring_used_event(&vring->vr), new, old);
-}
-
-/* This is stolen from linux/drivers/vhost/vhost.c. */
-static int get_indirect(Vring *vring,
- struct iovec iov[], struct iovec *iov_end,
- unsigned int *out_num, unsigned int *in_num,
- struct vring_desc *indirect)
-{
- struct vring_desc desc;
- unsigned int i = 0, count, found = 0;
-
- /* Sanity check */
- if (unlikely(indirect->len % sizeof(desc))) {
- error_report("Invalid length in indirect descriptor: "
- "len %#x not multiple of %#zx",
- indirect->len, sizeof(desc));
- vring->broken = true;
- return -EFAULT;
- }
-
- count = indirect->len / sizeof(desc);
- /* Buffers are chained via a 16 bit next field, so
- * we can have at most 2^16 of these. */
- if (unlikely(count > USHRT_MAX + 1)) {
- error_report("Indirect buffer length too big: %d", indirect->len);
- vring->broken = true;
- return -EFAULT;
- }
-
- do {
- struct vring_desc *desc_ptr;
-
- /* Translate indirect descriptor */
- desc_ptr = hostmem_lookup(&vring->hostmem,
- indirect->addr + found * sizeof(desc),
- sizeof(desc), false);
- if (!desc_ptr) {
- error_report("Failed to map indirect descriptor "
- "addr %#" PRIx64 " len %zu",
- (uint64_t)indirect->addr + found * sizeof(desc),
- sizeof(desc));
- vring->broken = true;
- return -EFAULT;
- }
- desc = *desc_ptr;
-
- /* Ensure descriptor has been loaded before accessing fields */
- barrier(); /* read_barrier_depends(); */
-
- if (unlikely(++found > count)) {
- error_report("Loop detected: last one at %u "
- "indirect size %u", i, count);
- vring->broken = true;
- return -EFAULT;
- }
-
- if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
- error_report("Nested indirect descriptor");
- vring->broken = true;
- return -EFAULT;
- }
-
- /* Stop for now if there are not enough iovecs available. */
- if (iov >= iov_end) {
- return -ENOBUFS;
- }
-
- iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
- desc.flags & VRING_DESC_F_WRITE);
- if (!iov->iov_base) {
- error_report("Failed to map indirect descriptor"
- "addr %#" PRIx64 " len %u",
- (uint64_t)desc.addr, desc.len);
- vring->broken = true;
- return -EFAULT;
- }
- iov->iov_len = desc.len;
- iov++;
-
- /* If this is an input descriptor, increment that count. */
- if (desc.flags & VRING_DESC_F_WRITE) {
- *in_num += 1;
- } else {
- /* If it's an output descriptor, they're all supposed
- * to come before any input descriptors. */
- if (unlikely(*in_num)) {
- error_report("Indirect descriptor "
- "has out after in: idx %u", i);
- vring->broken = true;
- return -EFAULT;
- }
- *out_num += 1;
- }
- i = desc.next;
- } while (desc.flags & VRING_DESC_F_NEXT);
- return 0;
-}
-
-/* This looks in the virtqueue and for the first available buffer, and converts
- * it to an iovec for convenient access. Since descriptors consist of some
- * number of output then some number of input descriptors, it's actually two
- * iovecs, but we pack them into one and note how many of each there were.
- *
- * This function returns the descriptor number found, or vq->num (which is
- * never a valid descriptor number) if none was found. A negative code is
- * returned on error.
- *
- * Stolen from linux/drivers/vhost/vhost.c.
- */
-int vring_pop(VirtIODevice *vdev, Vring *vring,
- struct iovec iov[], struct iovec *iov_end,
- unsigned int *out_num, unsigned int *in_num)
-{
- struct vring_desc desc;
- unsigned int i, head, found = 0, num = vring->vr.num;
- uint16_t avail_idx, last_avail_idx;
-
- /* If there was a fatal error then refuse operation */
- if (vring->broken) {
- return -EFAULT;
- }
-
- /* Check it isn't doing very strange things with descriptor numbers. */
- last_avail_idx = vring->last_avail_idx;
- avail_idx = vring->vr.avail->idx;
- barrier(); /* load indices now and not again later */
-
- if (unlikely((uint16_t)(avail_idx - last_avail_idx) > num)) {
- error_report("Guest moved used index from %u to %u",
- last_avail_idx, avail_idx);
- vring->broken = true;
- return -EFAULT;
- }
-
- /* If there's nothing new since last we looked. */
- if (avail_idx == last_avail_idx) {
- return -EAGAIN;
- }
-
- /* Only get avail ring entries after they have been exposed by guest. */
- smp_rmb();
-
- /* Grab the next descriptor number they're advertising, and increment
- * the index we've seen. */
- head = vring->vr.avail->ring[last_avail_idx % num];
-
- /* If their number is silly, that's an error. */
- if (unlikely(head >= num)) {
- error_report("Guest says index %u > %u is available", head, num);
- vring->broken = true;
- return -EFAULT;
- }
-
- if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
- vring_avail_event(&vring->vr) = vring->vr.avail->idx;
- }
-
- /* When we start there are none of either input nor output. */
- *out_num = *in_num = 0;
-
- i = head;
- do {
- if (unlikely(i >= num)) {
- error_report("Desc index is %u > %u, head = %u", i, num, head);
- vring->broken = true;
- return -EFAULT;
- }
- if (unlikely(++found > num)) {
- error_report("Loop detected: last one at %u vq size %u head %u",
- i, num, head);
- vring->broken = true;
- return -EFAULT;
- }
- desc = vring->vr.desc[i];
-
- /* Ensure descriptor is loaded before accessing fields */
- barrier();
-
- if (desc.flags & VRING_DESC_F_INDIRECT) {
- int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc);
- if (ret < 0) {
- return ret;
- }
- continue;
- }
-
- /* If there are not enough iovecs left, stop for now. The caller
- * should check if there are more descs available once they have dealt
- * with the current set.
- */
- if (iov >= iov_end) {
- return -ENOBUFS;
- }
-
- /* TODO handle non-contiguous memory across region boundaries */
- iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
- desc.flags & VRING_DESC_F_WRITE);
- if (!iov->iov_base) {
- error_report("Failed to map vring desc addr %#" PRIx64 " len %u",
- (uint64_t)desc.addr, desc.len);
- vring->broken = true;
- return -EFAULT;
- }
- iov->iov_len = desc.len;
- iov++;
-
- if (desc.flags & VRING_DESC_F_WRITE) {
- /* If this is an input descriptor,
- * increment that count. */
- *in_num += 1;
- } else {
- /* If it's an output descriptor, they're all supposed
- * to come before any input descriptors. */
- if (unlikely(*in_num)) {
- error_report("Descriptor has out after in: idx %d", i);
- vring->broken = true;
- return -EFAULT;
- }
- *out_num += 1;
- }
- i = desc.next;
- } while (desc.flags & VRING_DESC_F_NEXT);
-
- /* On success, increment avail index. */
- vring->last_avail_idx++;
- return head;
-}
-
-/* After we've used one of their buffers, we tell them about it.
- *
- * Stolen from linux/drivers/vhost/vhost.c.
- */
-void vring_push(Vring *vring, unsigned int head, int len)
-{
- struct vring_used_elem *used;
- uint16_t new;
-
- /* Don't touch vring if a fatal error occurred */
- if (vring->broken) {
- return;
- }
-
- /* The virtqueue contains a ring of used buffers. Get a pointer to the
- * next entry in that used ring. */
- used = &vring->vr.used->ring[vring->last_used_idx % vring->vr.num];
- used->id = head;
- used->len = len;
-
- /* Make sure buffer is written before we update index. */
- smp_wmb();
-
- new = vring->vr.used->idx = ++vring->last_used_idx;
- if (unlikely((int16_t)(new - vring->signalled_used) < (uint16_t)1)) {
- vring->signalled_used_valid = false;
- }
-}
common-obj-$(CONFIG_XILINX_AXI) += xilinx_axienet.o
common-obj-$(CONFIG_CADENCE) += cadence_gem.o
+
+obj-$(CONFIG_VIRTIO) += virtio-net.o
+obj-y += vhost_net.o
--- /dev/null
+/*
+ * vhost-net support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "net/net.h"
+#include "net/tap.h"
+
+#include "hw/virtio/virtio-net.h"
+#include "net/vhost_net.h"
+#include "qemu/error-report.h"
+
+#include "config.h"
+
+#ifdef CONFIG_VHOST_NET
+#include <linux/vhost.h>
+#include <sys/socket.h>
+#include <linux/kvm.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/virtio_ring.h>
+#include <netpacket/packet.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+
+#include <stdio.h>
+
+#include "hw/virtio/vhost.h"
+
+struct vhost_net {
+ struct vhost_dev dev;
+ struct vhost_virtqueue vqs[2];
+ int backend;
+ NetClientState *nc;
+};
+
+unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
+{
+ /* Clear features not supported by host kernel. */
+ if (!(net->dev.features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY))) {
+ features &= ~(1 << VIRTIO_F_NOTIFY_ON_EMPTY);
+ }
+ if (!(net->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC))) {
+ features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
+ }
+ if (!(net->dev.features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
+ features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
+ }
+ if (!(net->dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF))) {
+ features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
+ }
+ return features;
+}
+
+void vhost_net_ack_features(struct vhost_net *net, unsigned features)
+{
+ net->dev.acked_features = net->dev.backend_features;
+ if (features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) {
+ net->dev.acked_features |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY);
+ }
+ if (features & (1 << VIRTIO_RING_F_INDIRECT_DESC)) {
+ net->dev.acked_features |= (1 << VIRTIO_RING_F_INDIRECT_DESC);
+ }
+ if (features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
+ net->dev.acked_features |= (1 << VIRTIO_RING_F_EVENT_IDX);
+ }
+ if (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
+ net->dev.acked_features |= (1 << VIRTIO_NET_F_MRG_RXBUF);
+ }
+}
+
+static int vhost_net_get_fd(NetClientState *backend)
+{
+ switch (backend->info->type) {
+ case NET_CLIENT_OPTIONS_KIND_TAP:
+ return tap_get_fd(backend);
+ default:
+ fprintf(stderr, "vhost-net requires tap backend\n");
+ return -EBADFD;
+ }
+}
+
+struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
+ bool force)
+{
+ int r;
+ struct vhost_net *net = g_malloc(sizeof *net);
+ if (!backend) {
+ fprintf(stderr, "vhost-net requires backend to be setup\n");
+ goto fail;
+ }
+ r = vhost_net_get_fd(backend);
+ if (r < 0) {
+ goto fail;
+ }
+ net->nc = backend;
+ net->dev.backend_features = tap_has_vnet_hdr(backend) ? 0 :
+ (1 << VHOST_NET_F_VIRTIO_NET_HDR);
+ net->backend = r;
+
+ net->dev.nvqs = 2;
+ net->dev.vqs = net->vqs;
+
+ r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force);
+ if (r < 0) {
+ goto fail;
+ }
+ if (!tap_has_vnet_hdr_len(backend,
+ sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
+ net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
+ }
+ if (~net->dev.features & net->dev.backend_features) {
+ fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n",
+ (uint64_t)(~net->dev.features & net->dev.backend_features));
+ vhost_dev_cleanup(&net->dev);
+ goto fail;
+ }
+
+ /* Set sane init value. Override when guest acks. */
+ vhost_net_ack_features(net, 0);
+ return net;
+fail:
+ g_free(net);
+ return NULL;
+}
+
+bool vhost_net_query(VHostNetState *net, VirtIODevice *dev)
+{
+ return vhost_dev_query(&net->dev, dev);
+}
+
+static int vhost_net_start_one(struct vhost_net *net,
+ VirtIODevice *dev,
+ int vq_index)
+{
+ struct vhost_vring_file file = { };
+ int r;
+
+ if (net->dev.started) {
+ return 0;
+ }
+
+ net->dev.nvqs = 2;
+ net->dev.vqs = net->vqs;
+ net->dev.vq_index = vq_index;
+
+ r = vhost_dev_enable_notifiers(&net->dev, dev);
+ if (r < 0) {
+ goto fail_notifiers;
+ }
+
+ r = vhost_dev_start(&net->dev, dev);
+ if (r < 0) {
+ goto fail_start;
+ }
+
+ net->nc->info->poll(net->nc, false);
+ qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
+ file.fd = net->backend;
+ for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
+ r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+ }
+ return 0;
+fail:
+ file.fd = -1;
+ while (file.index-- > 0) {
+ int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
+ assert(r >= 0);
+ }
+ net->nc->info->poll(net->nc, true);
+ vhost_dev_stop(&net->dev, dev);
+fail_start:
+ vhost_dev_disable_notifiers(&net->dev, dev);
+fail_notifiers:
+ return r;
+}
+
+static void vhost_net_stop_one(struct vhost_net *net,
+ VirtIODevice *dev)
+{
+ struct vhost_vring_file file = { .fd = -1 };
+
+ if (!net->dev.started) {
+ return;
+ }
+
+ for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
+ int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
+ assert(r >= 0);
+ }
+ net->nc->info->poll(net->nc, true);
+ vhost_dev_stop(&net->dev, dev);
+ vhost_dev_disable_notifiers(&net->dev, dev);
+}
+
+int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
+ int total_queues)
+{
+ int r, i = 0;
+
+ if (!dev->binding->set_guest_notifiers) {
+ error_report("binding does not support guest notifiers");
+ r = -ENOSYS;
+ goto err;
+ }
+
+ for (i = 0; i < total_queues; i++) {
+ r = vhost_net_start_one(tap_get_vhost_net(ncs[i].peer), dev, i * 2);
+
+ if (r < 0) {
+ goto err;
+ }
+ }
+
+ r = dev->binding->set_guest_notifiers(dev->binding_opaque,
+ total_queues * 2,
+ true);
+ if (r < 0) {
+ error_report("Error binding guest notifier: %d", -r);
+ goto err;
+ }
+
+ return 0;
+
+err:
+ while (--i >= 0) {
+ vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
+ }
+ return r;
+}
+
+void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
+ int total_queues)
+{
+ int i, r;
+
+ r = dev->binding->set_guest_notifiers(dev->binding_opaque,
+ total_queues * 2,
+ false);
+ if (r < 0) {
+ fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
+ fflush(stderr);
+ }
+ assert(r >= 0);
+
+ for (i = 0; i < total_queues; i++) {
+ vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
+ }
+}
+
+void vhost_net_cleanup(struct vhost_net *net)
+{
+ vhost_dev_cleanup(&net->dev);
+ g_free(net);
+}
+
+bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
+{
+ return vhost_virtqueue_pending(&net->dev, idx);
+}
+
+void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+ int idx, bool mask)
+{
+ vhost_virtqueue_mask(&net->dev, dev, idx, mask);
+}
+#else
+struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
+ bool force)
+{
+ error_report("vhost-net support is not compiled in");
+ return NULL;
+}
+
+bool vhost_net_query(VHostNetState *net, VirtIODevice *dev)
+{
+ return false;
+}
+
+int vhost_net_start(VirtIODevice *dev,
+ NetClientState *ncs,
+ int total_queues)
+{
+ return -ENOSYS;
+}
+void vhost_net_stop(VirtIODevice *dev,
+ NetClientState *ncs,
+ int total_queues)
+{
+}
+
+void vhost_net_cleanup(struct vhost_net *net)
+{
+}
+
+unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
+{
+ return features;
+}
+void vhost_net_ack_features(struct vhost_net *net, unsigned features)
+{
+}
+
+bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
+{
+ return -ENOSYS;
+}
+
+void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+ int idx, bool mask)
+{
+}
+#endif
--- /dev/null
+/*
+ * Virtio Network Device
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/iov.h"
+#include "hw/virtio/virtio.h"
+#include "net/net.h"
+#include "net/checksum.h"
+#include "net/tap.h"
+#include "qemu/error-report.h"
+#include "qemu/timer.h"
+#include "hw/virtio/virtio-net.h"
+#include "net/vhost_net.h"
+
+#define VIRTIO_NET_VM_VERSION 11
+
+#define MAC_TABLE_ENTRIES 64
+#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
+
+/*
+ * Calculate the number of bytes up to and including the given 'field' of
+ * 'container'.
+ */
+#define endof(container, field) \
+ (offsetof(container, field) + sizeof(((container *)0)->field))
+
+typedef struct VirtIOFeature {
+ uint32_t flags;
+ size_t end;
+} VirtIOFeature;
+
+static VirtIOFeature feature_sizes[] = {
+ {.flags = 1 << VIRTIO_NET_F_MAC,
+ .end = endof(struct virtio_net_config, mac)},
+ {.flags = 1 << VIRTIO_NET_F_STATUS,
+ .end = endof(struct virtio_net_config, status)},
+ {.flags = 1 << VIRTIO_NET_F_MQ,
+ .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
+ {}
+};
+
+static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
+{
+ VirtIONet *n = qemu_get_nic_opaque(nc);
+
+ return &n->vqs[nc->queue_index];
+}
+
+static int vq2q(int queue_index)
+{
+ return queue_index / 2;
+}
+
+/* TODO
+ * - we could suppress RX interrupt if we were so inclined.
+ */
+
+static VirtIONet *to_virtio_net(VirtIODevice *vdev)
+{
+ return (VirtIONet *)vdev;
+}
+
+static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ struct virtio_net_config netcfg;
+
+ stw_p(&netcfg.status, n->status);
+ stw_p(&netcfg.max_virtqueue_pairs, n->max_queues);
+ memcpy(netcfg.mac, n->mac, ETH_ALEN);
+ memcpy(config, &netcfg, n->config_size);
+}
+
+static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ struct virtio_net_config netcfg = {};
+
+ memcpy(&netcfg, config, n->config_size);
+
+ if (!(n->vdev.guest_features >> VIRTIO_NET_F_CTRL_MAC_ADDR & 1) &&
+ memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
+ memcpy(n->mac, netcfg.mac, ETH_ALEN);
+ qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
+ }
+}
+
+static bool virtio_net_started(VirtIONet *n, uint8_t status)
+{
+ return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
+ (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
+}
+
+static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
+{
+ NetClientState *nc = qemu_get_queue(n->nic);
+ int queues = n->multiqueue ? n->max_queues : 1;
+
+ if (!nc->peer) {
+ return;
+ }
+ if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
+ return;
+ }
+
+ if (!tap_get_vhost_net(nc->peer)) {
+ return;
+ }
+
+ if (!!n->vhost_started == virtio_net_started(n, status) &&
+ !nc->peer->link_down) {
+ return;
+ }
+ if (!n->vhost_started) {
+ int r;
+ if (!vhost_net_query(tap_get_vhost_net(nc->peer), &n->vdev)) {
+ return;
+ }
+ n->vhost_started = 1;
+ r = vhost_net_start(&n->vdev, n->nic->ncs, queues);
+ if (r < 0) {
+ error_report("unable to start vhost net: %d: "
+ "falling back on userspace virtio", -r);
+ n->vhost_started = 0;
+ }
+ } else {
+ vhost_net_stop(&n->vdev, n->nic->ncs, queues);
+ n->vhost_started = 0;
+ }
+}
+
+static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ VirtIONetQueue *q;
+ int i;
+ uint8_t queue_status;
+
+ virtio_net_vhost_status(n, status);
+
+ for (i = 0; i < n->max_queues; i++) {
+ q = &n->vqs[i];
+
+ if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
+ queue_status = 0;
+ } else {
+ queue_status = status;
+ }
+
+ if (!q->tx_waiting) {
+ continue;
+ }
+
+ if (virtio_net_started(n, queue_status) && !n->vhost_started) {
+ if (q->tx_timer) {
+ qemu_mod_timer(q->tx_timer,
+ qemu_get_clock_ns(vm_clock) + n->tx_timeout);
+ } else {
+ qemu_bh_schedule(q->tx_bh);
+ }
+ } else {
+ if (q->tx_timer) {
+ qemu_del_timer(q->tx_timer);
+ } else {
+ qemu_bh_cancel(q->tx_bh);
+ }
+ }
+ }
+}
+
+static void virtio_net_set_link_status(NetClientState *nc)
+{
+ VirtIONet *n = qemu_get_nic_opaque(nc);
+ uint16_t old_status = n->status;
+
+ if (nc->link_down)
+ n->status &= ~VIRTIO_NET_S_LINK_UP;
+ else
+ n->status |= VIRTIO_NET_S_LINK_UP;
+
+ if (n->status != old_status)
+ virtio_notify_config(&n->vdev);
+
+ virtio_net_set_status(&n->vdev, n->vdev.status);
+}
+
+static void virtio_net_reset(VirtIODevice *vdev)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+
+ /* Reset back to compatibility mode */
+ n->promisc = 1;
+ n->allmulti = 0;
+ n->alluni = 0;
+ n->nomulti = 0;
+ n->nouni = 0;
+ n->nobcast = 0;
+ /* multiqueue is disabled by default */
+ n->curr_queues = 1;
+
+ /* Flush any MAC and VLAN filter table state */
+ n->mac_table.in_use = 0;
+ n->mac_table.first_multi = 0;
+ n->mac_table.multi_overflow = 0;
+ n->mac_table.uni_overflow = 0;
+ memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
+ memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
+ memset(n->vlans, 0, MAX_VLAN >> 3);
+}
+
+static void peer_test_vnet_hdr(VirtIONet *n)
+{
+ NetClientState *nc = qemu_get_queue(n->nic);
+ if (!nc->peer) {
+ return;
+ }
+
+ if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
+ return;
+ }
+
+ n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
+}
+
+static int peer_has_vnet_hdr(VirtIONet *n)
+{
+ return n->has_vnet_hdr;
+}
+
+static int peer_has_ufo(VirtIONet *n)
+{
+ if (!peer_has_vnet_hdr(n))
+ return 0;
+
+ n->has_ufo = tap_has_ufo(qemu_get_queue(n->nic)->peer);
+
+ return n->has_ufo;
+}
+
+static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
+{
+ int i;
+ NetClientState *nc;
+
+ n->mergeable_rx_bufs = mergeable_rx_bufs;
+
+ n->guest_hdr_len = n->mergeable_rx_bufs ?
+ sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
+
+ for (i = 0; i < n->max_queues; i++) {
+ nc = qemu_get_subqueue(n->nic, i);
+
+ if (peer_has_vnet_hdr(n) &&
+ tap_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
+ tap_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
+ n->host_hdr_len = n->guest_hdr_len;
+ }
+ }
+}
+
+static int peer_attach(VirtIONet *n, int index)
+{
+ NetClientState *nc = qemu_get_subqueue(n->nic, index);
+
+ if (!nc->peer) {
+ return 0;
+ }
+
+ if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
+ return 0;
+ }
+
+ return tap_enable(nc->peer);
+}
+
+static int peer_detach(VirtIONet *n, int index)
+{
+ NetClientState *nc = qemu_get_subqueue(n->nic, index);
+
+ if (!nc->peer) {
+ return 0;
+ }
+
+ if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
+ return 0;
+ }
+
+ return tap_disable(nc->peer);
+}
+
+static void virtio_net_set_queues(VirtIONet *n)
+{
+ int i;
+
+ for (i = 0; i < n->max_queues; i++) {
+ if (i < n->curr_queues) {
+ assert(!peer_attach(n, i));
+ } else {
+ assert(!peer_detach(n, i));
+ }
+ }
+}
+
+static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
+
+static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ NetClientState *nc = qemu_get_queue(n->nic);
+
+ features |= (1 << VIRTIO_NET_F_MAC);
+
+ if (!peer_has_vnet_hdr(n)) {
+ features &= ~(0x1 << VIRTIO_NET_F_CSUM);
+ features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
+ features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
+ features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);
+
+ features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
+ features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
+ features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
+ features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
+ }
+
+ if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
+ features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
+ features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
+ }
+
+ if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
+ return features;
+ }
+ if (!tap_get_vhost_net(nc->peer)) {
+ return features;
+ }
+ return vhost_net_get_features(tap_get_vhost_net(nc->peer), features);
+}
+
+static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
+{
+ uint32_t features = 0;
+
+ /* Linux kernel 2.6.25. It understood MAC (as everyone must),
+ * but also these: */
+ features |= (1 << VIRTIO_NET_F_MAC);
+ features |= (1 << VIRTIO_NET_F_CSUM);
+ features |= (1 << VIRTIO_NET_F_HOST_TSO4);
+ features |= (1 << VIRTIO_NET_F_HOST_TSO6);
+ features |= (1 << VIRTIO_NET_F_HOST_ECN);
+
+ return features;
+}
+
+static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ int i;
+
+ virtio_net_set_multiqueue(n, !!(features & (1 << VIRTIO_NET_F_MQ)),
+ !!(features & (1 << VIRTIO_NET_F_CTRL_VQ)));
+
+ virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
+
+ if (n->has_vnet_hdr) {
+ tap_set_offload(qemu_get_subqueue(n->nic, 0)->peer,
+ (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
+ (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
+ (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
+ (features >> VIRTIO_NET_F_GUEST_ECN) & 1,
+ (features >> VIRTIO_NET_F_GUEST_UFO) & 1);
+ }
+
+ for (i = 0; i < n->max_queues; i++) {
+ NetClientState *nc = qemu_get_subqueue(n->nic, i);
+
+ if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
+ continue;
+ }
+ if (!tap_get_vhost_net(nc->peer)) {
+ continue;
+ }
+ vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
+ }
+}
+
+static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
+ struct iovec *iov, unsigned int iov_cnt)
+{
+ uint8_t on;
+ size_t s;
+
+ s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
+ if (s != sizeof(on)) {
+ return VIRTIO_NET_ERR;
+ }
+
+ if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
+ n->promisc = on;
+ } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
+ n->allmulti = on;
+ } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
+ n->alluni = on;
+ } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
+ n->nomulti = on;
+ } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
+ n->nouni = on;
+ } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
+ n->nobcast = on;
+ } else {
+ return VIRTIO_NET_ERR;
+ }
+
+ return VIRTIO_NET_OK;
+}
+
+static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
+ struct iovec *iov, unsigned int iov_cnt)
+{
+ struct virtio_net_ctrl_mac mac_data;
+ size_t s;
+
+ if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
+ if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
+ return VIRTIO_NET_ERR;
+ }
+ s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
+ assert(s == sizeof(n->mac));
+ qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
+ return VIRTIO_NET_OK;
+ }
+
+ if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
+ return VIRTIO_NET_ERR;
+ }
+
+ n->mac_table.in_use = 0;
+ n->mac_table.first_multi = 0;
+ n->mac_table.uni_overflow = 0;
+ n->mac_table.multi_overflow = 0;
+ memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
+
+ s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
+ sizeof(mac_data.entries));
+ mac_data.entries = ldl_p(&mac_data.entries);
+ if (s != sizeof(mac_data.entries)) {
+ return VIRTIO_NET_ERR;
+ }
+ iov_discard_front(&iov, &iov_cnt, s);
+
+ if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
+ return VIRTIO_NET_ERR;
+ }
+
+ if (mac_data.entries <= MAC_TABLE_ENTRIES) {
+ s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
+ mac_data.entries * ETH_ALEN);
+ if (s != mac_data.entries * ETH_ALEN) {
+ return VIRTIO_NET_ERR;
+ }
+ n->mac_table.in_use += mac_data.entries;
+ } else {
+ n->mac_table.uni_overflow = 1;
+ }
+
+ iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
+
+ n->mac_table.first_multi = n->mac_table.in_use;
+
+ s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
+ sizeof(mac_data.entries));
+ mac_data.entries = ldl_p(&mac_data.entries);
+ if (s != sizeof(mac_data.entries)) {
+ return VIRTIO_NET_ERR;
+ }
+
+ iov_discard_front(&iov, &iov_cnt, s);
+
+ if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
+ return VIRTIO_NET_ERR;
+ }
+
+ if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
+ s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
+ mac_data.entries * ETH_ALEN);
+ if (s != mac_data.entries * ETH_ALEN) {
+ return VIRTIO_NET_ERR;
+ }
+ n->mac_table.in_use += mac_data.entries;
+ } else {
+ n->mac_table.multi_overflow = 1;
+ }
+
+ return VIRTIO_NET_OK;
+}
+
+static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
+ struct iovec *iov, unsigned int iov_cnt)
+{
+ uint16_t vid;
+ size_t s;
+
+ s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
+ vid = lduw_p(&vid);
+ if (s != sizeof(vid)) {
+ return VIRTIO_NET_ERR;
+ }
+
+ if (vid >= MAX_VLAN)
+ return VIRTIO_NET_ERR;
+
+ if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
+ n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
+ else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
+ n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
+ else
+ return VIRTIO_NET_ERR;
+
+ return VIRTIO_NET_OK;
+}
+
+static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
+ struct iovec *iov, unsigned int iov_cnt)
+{
+ struct virtio_net_ctrl_mq mq;
+ size_t s;
+ uint16_t queues;
+
+ s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
+ if (s != sizeof(mq)) {
+ return VIRTIO_NET_ERR;
+ }
+
+ if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
+ return VIRTIO_NET_ERR;
+ }
+
+ queues = lduw_p(&mq.virtqueue_pairs);
+
+ if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
+ queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
+ queues > n->max_queues ||
+ !n->multiqueue) {
+ return VIRTIO_NET_ERR;
+ }
+
+ n->curr_queues = queues;
+ /* stop the backend before changing the number of queues to avoid handling a
+ * disabled queue */
+ virtio_net_set_status(&n->vdev, n->vdev.status);
+ virtio_net_set_queues(n);
+
+ return VIRTIO_NET_OK;
+}
+static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ struct virtio_net_ctrl_hdr ctrl;
+ virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+ VirtQueueElement elem;
+ size_t s;
+ struct iovec *iov;
+ unsigned int iov_cnt;
+
+ while (virtqueue_pop(vq, &elem)) {
+ if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
+ iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
+ error_report("virtio-net ctrl missing headers");
+ exit(1);
+ }
+
+ iov = elem.out_sg;
+ iov_cnt = elem.out_num;
+ s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
+ iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
+ if (s != sizeof(ctrl)) {
+ status = VIRTIO_NET_ERR;
+ } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
+ status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
+ } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
+ status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
+ } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
+ status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
+ } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
+ status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
+ }
+
+ s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status));
+ assert(s == sizeof(status));
+
+ virtqueue_push(vq, &elem, sizeof(status));
+ virtio_notify(vdev, vq);
+ }
+}
+
+/* RX */
+
+static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ int queue_index = vq2q(virtio_get_queue_index(vq));
+
+ qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
+}
+
+static int virtio_net_can_receive(NetClientState *nc)
+{
+ VirtIONet *n = qemu_get_nic_opaque(nc);
+ VirtIONetQueue *q = virtio_net_get_subqueue(nc);
+
+ if (!n->vdev.vm_running) {
+ return 0;
+ }
+
+ if (nc->queue_index >= n->curr_queues) {
+ return 0;
+ }
+
+ if (!virtio_queue_ready(q->rx_vq) ||
+ !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ return 0;
+ }
+
+ return 1;
+}
+
+static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
+{
+ VirtIONet *n = q->n;
+ if (virtio_queue_empty(q->rx_vq) ||
+ (n->mergeable_rx_bufs &&
+ !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
+ virtio_queue_set_notification(q->rx_vq, 1);
+
+ /* To avoid a race condition where the guest has made some buffers
+ * available after the above check but before notification was
+ * enabled, check for available buffers again.
+ */
+ if (virtio_queue_empty(q->rx_vq) ||
+ (n->mergeable_rx_bufs &&
+ !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
+ return 0;
+ }
+ }
+
+ virtio_queue_set_notification(q->rx_vq, 0);
+ return 1;
+}
+
+/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
+ * it never finds out that the packets don't have valid checksums. This
+ * causes dhclient to get upset. Fedora's carried a patch for ages to
+ * fix this with Xen but it hasn't appeared in an upstream release of
+ * dhclient yet.
+ *
+ * To avoid breaking existing guests, we catch udp packets and add
+ * checksums. This is terrible but it's better than hacking the guest
+ * kernels.
+ *
+ * N.B. if we introduce a zero-copy API, this operation is no longer free so
+ * we should provide a mechanism to disable it to avoid polluting the host
+ * cache.
+ */
+static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
+ uint8_t *buf, size_t size)
+{
+ if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
+ (size > 27 && size < 1500) && /* normal sized MTU */
+ (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
+ (buf[23] == 17) && /* ip.protocol == UDP */
+ (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
+ net_checksum_calculate(buf, size);
+ hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ }
+}
+
+static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
+ const void *buf, size_t size)
+{
+ if (n->has_vnet_hdr) {
+ /* FIXME this cast is evil */
+ void *wbuf = (void *)buf;
+ work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
+ size - n->host_hdr_len);
+ iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
+ } else {
+ struct virtio_net_hdr hdr = {
+ .flags = 0,
+ .gso_type = VIRTIO_NET_HDR_GSO_NONE
+ };
+ iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
+ }
+}
+
+static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
+{
+ static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ static const uint8_t vlan[] = {0x81, 0x00};
+ uint8_t *ptr = (uint8_t *)buf;
+ int i;
+
+ if (n->promisc)
+ return 1;
+
+ ptr += n->host_hdr_len;
+
+ if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
+ int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
+ if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
+ return 0;
+ }
+
+ if (ptr[0] & 1) { // multicast
+ if (!memcmp(ptr, bcast, sizeof(bcast))) {
+ return !n->nobcast;
+ } else if (n->nomulti) {
+ return 0;
+ } else if (n->allmulti || n->mac_table.multi_overflow) {
+ return 1;
+ }
+
+ for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
+ if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
+ return 1;
+ }
+ }
+ } else { // unicast
+ if (n->nouni) {
+ return 0;
+ } else if (n->alluni || n->mac_table.uni_overflow) {
+ return 1;
+ } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
+ return 1;
+ }
+
+ for (i = 0; i < n->mac_table.first_multi; i++) {
+ if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
+{
+ VirtIONet *n = qemu_get_nic_opaque(nc);
+ VirtIONetQueue *q = virtio_net_get_subqueue(nc);
+ struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
+ struct virtio_net_hdr_mrg_rxbuf mhdr;
+ unsigned mhdr_cnt = 0;
+ size_t offset, i, guest_offset;
+
+ if (!virtio_net_can_receive(nc)) {
+ return -1;
+ }
+
+ /* hdr_len refers to the header we supply to the guest */
+ if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
+ return 0;
+ }
+
+ if (!receive_filter(n, buf, size))
+ return size;
+
+ offset = i = 0;
+
+ while (offset < size) {
+ VirtQueueElement elem;
+ int len, total;
+ const struct iovec *sg = elem.in_sg;
+
+ total = 0;
+
+ if (virtqueue_pop(q->rx_vq, &elem) == 0) {
+ if (i == 0)
+ return -1;
+ error_report("virtio-net unexpected empty queue: "
+ "i %zd mergeable %d offset %zd, size %zd, "
+ "guest hdr len %zd, host hdr len %zd guest features 0x%x",
+ i, n->mergeable_rx_bufs, offset, size,
+ n->guest_hdr_len, n->host_hdr_len, n->vdev.guest_features);
+ exit(1);
+ }
+
+ if (elem.in_num < 1) {
+ error_report("virtio-net receive queue contains no in buffers");
+ exit(1);
+ }
+
+ if (i == 0) {
+ assert(offset == 0);
+ if (n->mergeable_rx_bufs) {
+ mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
+ sg, elem.in_num,
+ offsetof(typeof(mhdr), num_buffers),
+ sizeof(mhdr.num_buffers));
+ }
+
+ receive_header(n, sg, elem.in_num, buf, size);
+ offset = n->host_hdr_len;
+ total += n->guest_hdr_len;
+ guest_offset = n->guest_hdr_len;
+ } else {
+ guest_offset = 0;
+ }
+
+ /* copy in packet. ugh */
+ len = iov_from_buf(sg, elem.in_num, guest_offset,
+ buf + offset, size - offset);
+ total += len;
+ offset += len;
+ /* If buffers can't be merged, at this point we
+ * must have consumed the complete packet.
+ * Otherwise, drop it. */
+ if (!n->mergeable_rx_bufs && offset < size) {
+#if 0
+ error_report("virtio-net truncated non-mergeable packet: "
+ "i %zd mergeable %d offset %zd, size %zd, "
+ "guest hdr len %zd, host hdr len %zd",
+ i, n->mergeable_rx_bufs,
+ offset, size, n->guest_hdr_len, n->host_hdr_len);
+#endif
+ return size;
+ }
+
+ /* signal other side */
+ virtqueue_fill(q->rx_vq, &elem, total, i++);
+ }
+
+ if (mhdr_cnt) {
+ stw_p(&mhdr.num_buffers, i);
+ iov_from_buf(mhdr_sg, mhdr_cnt,
+ 0,
+ &mhdr.num_buffers, sizeof mhdr.num_buffers);
+ }
+
+ virtqueue_flush(q->rx_vq, i);
+ virtio_notify(&n->vdev, q->rx_vq);
+
+ return size;
+}
+
+static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
+
+static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
+{
+ VirtIONet *n = qemu_get_nic_opaque(nc);
+ VirtIONetQueue *q = virtio_net_get_subqueue(nc);
+
+ virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
+ virtio_notify(&n->vdev, q->tx_vq);
+
+ q->async_tx.elem.out_num = q->async_tx.len = 0;
+
+ virtio_queue_set_notification(q->tx_vq, 1);
+ virtio_net_flush_tx(q);
+}
+
+/* TX */
+static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
+{
+ VirtIONet *n = q->n;
+ VirtQueueElement elem;
+ int32_t num_packets = 0;
+ int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
+ if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ return num_packets;
+ }
+
+ assert(n->vdev.vm_running);
+
+ if (q->async_tx.elem.out_num) {
+ virtio_queue_set_notification(q->tx_vq, 0);
+ return num_packets;
+ }
+
+ while (virtqueue_pop(q->tx_vq, &elem)) {
+ ssize_t ret, len;
+ unsigned int out_num = elem.out_num;
+ struct iovec *out_sg = &elem.out_sg[0];
+ struct iovec sg[VIRTQUEUE_MAX_SIZE];
+
+ if (out_num < 1) {
+ error_report("virtio-net header not in first element");
+ exit(1);
+ }
+
+ /*
+ * If host wants to see the guest header as is, we can
+ * pass it on unchanged. Otherwise, copy just the parts
+ * that host is interested in.
+ */
+ assert(n->host_hdr_len <= n->guest_hdr_len);
+ if (n->host_hdr_len != n->guest_hdr_len) {
+ unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
+ out_sg, out_num,
+ 0, n->host_hdr_len);
+ sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
+ out_sg, out_num,
+ n->guest_hdr_len, -1);
+ out_num = sg_num;
+ out_sg = sg;
+ }
+
+ len = n->guest_hdr_len;
+
+ ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
+ out_sg, out_num, virtio_net_tx_complete);
+ if (ret == 0) {
+ virtio_queue_set_notification(q->tx_vq, 0);
+ q->async_tx.elem = elem;
+ q->async_tx.len = len;
+ return -EBUSY;
+ }
+
+ len += ret;
+
+ virtqueue_push(q->tx_vq, &elem, 0);
+ virtio_notify(&n->vdev, q->tx_vq);
+
+ if (++num_packets >= n->tx_burst) {
+ break;
+ }
+ }
+ return num_packets;
+}
+
+static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
+
+ /* This happens when device was stopped but VCPU wasn't. */
+ if (!n->vdev.vm_running) {
+ q->tx_waiting = 1;
+ return;
+ }
+
+ if (q->tx_waiting) {
+ virtio_queue_set_notification(vq, 1);
+ qemu_del_timer(q->tx_timer);
+ q->tx_waiting = 0;
+ virtio_net_flush_tx(q);
+ } else {
+ qemu_mod_timer(q->tx_timer,
+ qemu_get_clock_ns(vm_clock) + n->tx_timeout);
+ q->tx_waiting = 1;
+ virtio_queue_set_notification(vq, 0);
+ }
+}
+
+static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
+
+ if (unlikely(q->tx_waiting)) {
+ return;
+ }
+ q->tx_waiting = 1;
+ /* This happens when device was stopped but VCPU wasn't. */
+ if (!n->vdev.vm_running) {
+ return;
+ }
+ virtio_queue_set_notification(vq, 0);
+ qemu_bh_schedule(q->tx_bh);
+}
+
+static void virtio_net_tx_timer(void *opaque)
+{
+ VirtIONetQueue *q = opaque;
+ VirtIONet *n = q->n;
+ assert(n->vdev.vm_running);
+
+ q->tx_waiting = 0;
+
+ /* Just in case the driver is not ready on more */
+ if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
+ return;
+
+ virtio_queue_set_notification(q->tx_vq, 1);
+ virtio_net_flush_tx(q);
+}
+
+static void virtio_net_tx_bh(void *opaque)
+{
+ VirtIONetQueue *q = opaque;
+ VirtIONet *n = q->n;
+ int32_t ret;
+
+ assert(n->vdev.vm_running);
+
+ q->tx_waiting = 0;
+
+ /* Just in case the driver is not ready on more */
+ if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
+ return;
+
+ ret = virtio_net_flush_tx(q);
+ if (ret == -EBUSY) {
+ return; /* Notification re-enable handled by tx_complete */
+ }
+
+ /* If we flush a full burst of packets, assume there are
+ * more coming and immediately reschedule */
+ if (ret >= n->tx_burst) {
+ qemu_bh_schedule(q->tx_bh);
+ q->tx_waiting = 1;
+ return;
+ }
+
+ /* If less than a full burst, re-enable notification and flush
+ * anything that may have come in while we weren't looking. If
+ * we find something, assume the guest is still active and reschedule */
+ virtio_queue_set_notification(q->tx_vq, 1);
+ if (virtio_net_flush_tx(q) > 0) {
+ virtio_queue_set_notification(q->tx_vq, 0);
+ qemu_bh_schedule(q->tx_bh);
+ q->tx_waiting = 1;
+ }
+}
+
+static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl)
+{
+ VirtIODevice *vdev = &n->vdev;
+ int i, max = multiqueue ? n->max_queues : 1;
+
+ n->multiqueue = multiqueue;
+
+ for (i = 2; i <= n->max_queues * 2 + 1; i++) {
+ virtio_del_queue(vdev, i);
+ }
+
+ for (i = 1; i < max; i++) {
+ n->vqs[i].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
+ if (n->vqs[i].tx_timer) {
+ n->vqs[i].tx_vq =
+ virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
+ n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock,
+ virtio_net_tx_timer,
+ &n->vqs[i]);
+ } else {
+ n->vqs[i].tx_vq =
+ virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
+ n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]);
+ }
+
+ n->vqs[i].tx_waiting = 0;
+ n->vqs[i].n = n;
+ }
+
+ if (ctrl) {
+ n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
+ }
+
+ virtio_net_set_queues(n);
+}
+
+static void virtio_net_save(QEMUFile *f, void *opaque)
+{
+ int i;
+ VirtIONet *n = opaque;
+
+ /* At this point, backend must be stopped, otherwise
+ * it might keep writing to memory. */
+ assert(!n->vhost_started);
+ virtio_save(&n->vdev, f);
+
+ qemu_put_buffer(f, n->mac, ETH_ALEN);
+ qemu_put_be32(f, n->vqs[0].tx_waiting);
+ qemu_put_be32(f, n->mergeable_rx_bufs);
+ qemu_put_be16(f, n->status);
+ qemu_put_byte(f, n->promisc);
+ qemu_put_byte(f, n->allmulti);
+ qemu_put_be32(f, n->mac_table.in_use);
+ qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
+ qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
+ qemu_put_be32(f, n->has_vnet_hdr);
+ qemu_put_byte(f, n->mac_table.multi_overflow);
+ qemu_put_byte(f, n->mac_table.uni_overflow);
+ qemu_put_byte(f, n->alluni);
+ qemu_put_byte(f, n->nomulti);
+ qemu_put_byte(f, n->nouni);
+ qemu_put_byte(f, n->nobcast);
+ qemu_put_byte(f, n->has_ufo);
+ if (n->max_queues > 1) {
+ qemu_put_be16(f, n->max_queues);
+ qemu_put_be16(f, n->curr_queues);
+ for (i = 1; i < n->curr_queues; i++) {
+ qemu_put_be32(f, n->vqs[i].tx_waiting);
+ }
+ }
+}
+
+static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
+{
+ VirtIONet *n = opaque;
+ int ret, i, link_down;
+
+ if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
+ return -EINVAL;
+
+ ret = virtio_load(&n->vdev, f);
+ if (ret) {
+ return ret;
+ }
+
+ qemu_get_buffer(f, n->mac, ETH_ALEN);
+ n->vqs[0].tx_waiting = qemu_get_be32(f);
+
+ virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
+
+ if (version_id >= 3)
+ n->status = qemu_get_be16(f);
+
+ if (version_id >= 4) {
+ if (version_id < 8) {
+ n->promisc = qemu_get_be32(f);
+ n->allmulti = qemu_get_be32(f);
+ } else {
+ n->promisc = qemu_get_byte(f);
+ n->allmulti = qemu_get_byte(f);
+ }
+ }
+
+ if (version_id >= 5) {
+ n->mac_table.in_use = qemu_get_be32(f);
+ /* MAC_TABLE_ENTRIES may be different from the saved image */
+ if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
+ qemu_get_buffer(f, n->mac_table.macs,
+ n->mac_table.in_use * ETH_ALEN);
+ } else if (n->mac_table.in_use) {
+ uint8_t *buf = g_malloc0(n->mac_table.in_use);
+ qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN);
+ g_free(buf);
+ n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
+ n->mac_table.in_use = 0;
+ }
+ }
+
+ if (version_id >= 6)
+ qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
+
+ if (version_id >= 7) {
+ if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
+ error_report("virtio-net: saved image requires vnet_hdr=on");
+ return -1;
+ }
+
+ if (n->has_vnet_hdr) {
+ tap_set_offload(qemu_get_queue(n->nic)->peer,
+ (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
+ (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
+ (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
+ (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN) & 1,
+ (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO) & 1);
+ }
+ }
+
+ if (version_id >= 9) {
+ n->mac_table.multi_overflow = qemu_get_byte(f);
+ n->mac_table.uni_overflow = qemu_get_byte(f);
+ }
+
+ if (version_id >= 10) {
+ n->alluni = qemu_get_byte(f);
+ n->nomulti = qemu_get_byte(f);
+ n->nouni = qemu_get_byte(f);
+ n->nobcast = qemu_get_byte(f);
+ }
+
+ if (version_id >= 11) {
+ if (qemu_get_byte(f) && !peer_has_ufo(n)) {
+ error_report("virtio-net: saved image requires TUN_F_UFO support");
+ return -1;
+ }
+ }
+
+ if (n->max_queues > 1) {
+ if (n->max_queues != qemu_get_be16(f)) {
+ error_report("virtio-net: different max_queues ");
+ return -1;
+ }
+
+ n->curr_queues = qemu_get_be16(f);
+ for (i = 1; i < n->curr_queues; i++) {
+ n->vqs[i].tx_waiting = qemu_get_be32(f);
+ }
+ }
+
+ virtio_net_set_queues(n);
+
+ /* Find the first multicast entry in the saved MAC filter */
+ for (i = 0; i < n->mac_table.in_use; i++) {
+ if (n->mac_table.macs[i * ETH_ALEN] & 1) {
+ break;
+ }
+ }
+ n->mac_table.first_multi = i;
+
+ /* nc.link_down can't be migrated, so infer link_down according
+ * to link status bit in n->status */
+ link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
+ for (i = 0; i < n->max_queues; i++) {
+ qemu_get_subqueue(n->nic, i)->link_down = link_down;
+ }
+
+ return 0;
+}
+
+static void virtio_net_cleanup(NetClientState *nc)
+{
+ VirtIONet *n = qemu_get_nic_opaque(nc);
+
+ n->nic = NULL;
+}
+
+static NetClientInfo net_virtio_info = {
+ .type = NET_CLIENT_OPTIONS_KIND_NIC,
+ .size = sizeof(NICState),
+ .can_receive = virtio_net_can_receive,
+ .receive = virtio_net_receive,
+ .cleanup = virtio_net_cleanup,
+ .link_status_changed = virtio_net_set_link_status,
+};
+
+static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
+ assert(n->vhost_started);
+ return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx);
+}
+
+static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
+ bool mask)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
+ assert(n->vhost_started);
+ vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer),
+ vdev, idx, mask);
+}
+
+VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
+ virtio_net_conf *net, uint32_t host_features)
+{
+ VirtIONet *n;
+ int i, config_size = 0;
+
+ for (i = 0; feature_sizes[i].flags != 0; i++) {
+ if (host_features & feature_sizes[i].flags) {
+ config_size = MAX(feature_sizes[i].end, config_size);
+ }
+ }
+
+ n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
+ config_size, sizeof(VirtIONet));
+
+ n->config_size = config_size;
+ n->vdev.get_config = virtio_net_get_config;
+ n->vdev.set_config = virtio_net_set_config;
+ n->vdev.get_features = virtio_net_get_features;
+ n->vdev.set_features = virtio_net_set_features;
+ n->vdev.bad_features = virtio_net_bad_features;
+ n->vdev.reset = virtio_net_reset;
+ n->vdev.set_status = virtio_net_set_status;
+ n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
+ n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
+ n->max_queues = MAX(conf->queues, 1);
+ n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
+ n->vqs[0].rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
+ n->curr_queues = 1;
+ n->vqs[0].n = n;
+ n->tx_timeout = net->txtimer;
+
+ if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
+ error_report("virtio-net: "
+ "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
+ net->tx);
+ error_report("Defaulting to \"bh\"");
+ }
+
+ if (net->tx && !strcmp(net->tx, "timer")) {
+ n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
+ virtio_net_handle_tx_timer);
+ n->vqs[0].tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer,
+ &n->vqs[0]);
+ } else {
+ n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
+ virtio_net_handle_tx_bh);
+ n->vqs[0].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[0]);
+ }
+ n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
+ qemu_macaddr_default_if_unset(&conf->macaddr);
+ memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
+ n->status = VIRTIO_NET_S_LINK_UP;
+
+ n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
+ peer_test_vnet_hdr(n);
+ if (peer_has_vnet_hdr(n)) {
+ for (i = 0; i < n->max_queues; i++) {
+ tap_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
+ }
+ n->host_hdr_len = sizeof(struct virtio_net_hdr);
+ } else {
+ n->host_hdr_len = 0;
+ }
+
+ qemu_format_nic_info_str(qemu_get_queue(n->nic), conf->macaddr.a);
+
+ n->vqs[0].tx_waiting = 0;
+ n->tx_burst = net->txburst;
+ virtio_net_set_mrg_rx_bufs(n, 0);
+ n->promisc = 1; /* for compatibility */
+
+ n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
+
+ n->vlans = g_malloc0(MAX_VLAN >> 3);
+
+ n->qdev = dev;
+ register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
+ virtio_net_save, virtio_net_load, n);
+
+ add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");
+
+ return &n->vdev;
+}
+
+void virtio_net_exit(VirtIODevice *vdev)
+{
+ VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
+ int i;
+
+ /* This will stop vhost backend if appropriate. */
+ virtio_net_set_status(vdev, 0);
+
+ unregister_savevm(n->qdev, "virtio-net", n);
+
+ g_free(n->mac_table.macs);
+ g_free(n->vlans);
+
+ for (i = 0; i < n->max_queues; i++) {
+ VirtIONetQueue *q = &n->vqs[i];
+ NetClientState *nc = qemu_get_subqueue(n->nic, i);
+
+ qemu_purge_queued_packets(nc);
+
+ if (q->tx_timer) {
+ qemu_del_timer(q->tx_timer);
+ qemu_free_timer(q->tx_timer);
+ } else {
+ qemu_bh_delete(q->tx_bh);
+ }
+ }
+
+ g_free(n->vqs);
+ qemu_del_nic(n->nic);
+ virtio_cleanup(&n->vdev);
+}
common-obj-$(CONFIG_MEGASAS_SCSI_PCI) += megasas.o
common-obj-$(CONFIG_ESP) += esp.o
common-obj-$(CONFIG_ESP_PCI) += esp-pci.o
+obj-$(CONFIG_VIRTIO) += virtio-scsi.o
--- /dev/null
+/*
+ * Virtio SCSI HBA
+ *
+ * Copyright IBM, Corp. 2010
+ * Copyright Red Hat, Inc. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "hw/virtio/virtio-scsi.h"
+#include "qemu/error-report.h"
+#include <hw/scsi/scsi.h>
+#include <block/scsi.h>
+#include <hw/virtio/virtio-bus.h>
+
+#define VIRTIO_SCSI_VQ_SIZE 128
+#define VIRTIO_SCSI_CDB_SIZE 32
+#define VIRTIO_SCSI_SENSE_SIZE 96
+#define VIRTIO_SCSI_MAX_CHANNEL 0
+#define VIRTIO_SCSI_MAX_TARGET 255
+#define VIRTIO_SCSI_MAX_LUN 16383
+
+/* Response codes */
+#define VIRTIO_SCSI_S_OK 0
+#define VIRTIO_SCSI_S_OVERRUN 1
+#define VIRTIO_SCSI_S_ABORTED 2
+#define VIRTIO_SCSI_S_BAD_TARGET 3
+#define VIRTIO_SCSI_S_RESET 4
+#define VIRTIO_SCSI_S_BUSY 5
+#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6
+#define VIRTIO_SCSI_S_TARGET_FAILURE 7
+#define VIRTIO_SCSI_S_NEXUS_FAILURE 8
+#define VIRTIO_SCSI_S_FAILURE 9
+#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10
+#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11
+#define VIRTIO_SCSI_S_INCORRECT_LUN 12
+
+/* Controlq type codes. */
+#define VIRTIO_SCSI_T_TMF 0
+#define VIRTIO_SCSI_T_AN_QUERY 1
+#define VIRTIO_SCSI_T_AN_SUBSCRIBE 2
+
+/* Valid TMF subtypes. */
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1
+#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2
+#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3
+#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4
+#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7
+
+/* Events. */
+#define VIRTIO_SCSI_T_EVENTS_MISSED 0x80000000
+#define VIRTIO_SCSI_T_NO_EVENT 0
+#define VIRTIO_SCSI_T_TRANSPORT_RESET 1
+#define VIRTIO_SCSI_T_ASYNC_NOTIFY 2
+#define VIRTIO_SCSI_T_PARAM_CHANGE 3
+
+/* Reasons for transport reset event */
+#define VIRTIO_SCSI_EVT_RESET_HARD 0
+#define VIRTIO_SCSI_EVT_RESET_RESCAN 1
+#define VIRTIO_SCSI_EVT_RESET_REMOVED 2
+
+/* SCSI command request, followed by data-out */
+typedef struct {
+ uint8_t lun[8]; /* Logical Unit Number */
+ uint64_t tag; /* Command identifier */
+ uint8_t task_attr; /* Task attribute */
+ uint8_t prio;
+ uint8_t crn;
+ uint8_t cdb[];
+} QEMU_PACKED VirtIOSCSICmdReq;
+
+/* Response, followed by sense data and data-in */
+typedef struct {
+ uint32_t sense_len; /* Sense data length */
+ uint32_t resid; /* Residual bytes in data buffer */
+ uint16_t status_qualifier; /* Status qualifier */
+ uint8_t status; /* Command completion status */
+ uint8_t response; /* Response values */
+ uint8_t sense[];
+} QEMU_PACKED VirtIOSCSICmdResp;
+
+/* Task Management Request */
+typedef struct {
+ uint32_t type;
+ uint32_t subtype;
+ uint8_t lun[8];
+ uint64_t tag;
+} QEMU_PACKED VirtIOSCSICtrlTMFReq;
+
+typedef struct {
+ uint8_t response;
+} QEMU_PACKED VirtIOSCSICtrlTMFResp;
+
+/* Asynchronous notification query/subscription */
+typedef struct {
+ uint32_t type;
+ uint8_t lun[8];
+ uint32_t event_requested;
+} QEMU_PACKED VirtIOSCSICtrlANReq;
+
+typedef struct {
+ uint32_t event_actual;
+ uint8_t response;
+} QEMU_PACKED VirtIOSCSICtrlANResp;
+
+typedef struct {
+ uint32_t event;
+ uint8_t lun[8];
+ uint32_t reason;
+} QEMU_PACKED VirtIOSCSIEvent;
+
+typedef struct {
+ uint32_t num_queues;
+ uint32_t seg_max;
+ uint32_t max_sectors;
+ uint32_t cmd_per_lun;
+ uint32_t event_info_size;
+ uint32_t sense_size;
+ uint32_t cdb_size;
+ uint16_t max_channel;
+ uint16_t max_target;
+ uint32_t max_lun;
+} QEMU_PACKED VirtIOSCSIConfig;
+
+typedef struct VirtIOSCSIReq {
+ VirtIOSCSI *dev;
+ VirtQueue *vq;
+ VirtQueueElement elem;
+ QEMUSGList qsgl;
+ SCSIRequest *sreq;
+ union {
+ char *buf;
+ VirtIOSCSICmdReq *cmd;
+ VirtIOSCSICtrlTMFReq *tmf;
+ VirtIOSCSICtrlANReq *an;
+ } req;
+ union {
+ char *buf;
+ VirtIOSCSICmdResp *cmd;
+ VirtIOSCSICtrlTMFResp *tmf;
+ VirtIOSCSICtrlANResp *an;
+ VirtIOSCSIEvent *event;
+ } resp;
+} VirtIOSCSIReq;
+
+static inline int virtio_scsi_get_lun(uint8_t *lun)
+{
+ return ((lun[2] << 8) | lun[3]) & 0x3FFF;
+}
+
+static inline SCSIDevice *virtio_scsi_device_find(VirtIOSCSI *s, uint8_t *lun)
+{
+ if (lun[0] != 1) {
+ return NULL;
+ }
+ if (lun[2] != 0 && !(lun[2] >= 0x40 && lun[2] < 0x80)) {
+ return NULL;
+ }
+ return scsi_device_find(&s->bus, 0, lun[1], virtio_scsi_get_lun(lun));
+}
+
+static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
+{
+ VirtIOSCSI *s = req->dev;
+ VirtQueue *vq = req->vq;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ virtqueue_push(vq, &req->elem, req->qsgl.size + req->elem.in_sg[0].iov_len);
+ qemu_sglist_destroy(&req->qsgl);
+ if (req->sreq) {
+ req->sreq->hba_private = NULL;
+ scsi_req_unref(req->sreq);
+ }
+ g_free(req);
+ virtio_notify(vdev, vq);
+}
+
+static void virtio_scsi_bad_req(void)
+{
+ error_report("wrong size for virtio-scsi headers");
+ exit(1);
+}
+
+static void qemu_sgl_init_external(QEMUSGList *qsgl, struct iovec *sg,
+ hwaddr *addr, int num)
+{
+ qemu_sglist_init(qsgl, num, &dma_context_memory);
+ while (num--) {
+ qemu_sglist_add(qsgl, *(addr++), (sg++)->iov_len);
+ }
+}
+
+static void virtio_scsi_parse_req(VirtIOSCSI *s, VirtQueue *vq,
+ VirtIOSCSIReq *req)
+{
+ assert(req->elem.in_num);
+ req->vq = vq;
+ req->dev = s;
+ req->sreq = NULL;
+ if (req->elem.out_num) {
+ req->req.buf = req->elem.out_sg[0].iov_base;
+ }
+ req->resp.buf = req->elem.in_sg[0].iov_base;
+
+ if (req->elem.out_num > 1) {
+ qemu_sgl_init_external(&req->qsgl, &req->elem.out_sg[1],
+ &req->elem.out_addr[1],
+ req->elem.out_num - 1);
+ } else {
+ qemu_sgl_init_external(&req->qsgl, &req->elem.in_sg[1],
+ &req->elem.in_addr[1],
+ req->elem.in_num - 1);
+ }
+}
+
+static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq)
+{
+ VirtIOSCSIReq *req;
+ req = g_malloc(sizeof(*req));
+ if (!virtqueue_pop(vq, &req->elem)) {
+ g_free(req);
+ return NULL;
+ }
+
+ virtio_scsi_parse_req(s, vq, req);
+ return req;
+}
+
+static void virtio_scsi_save_request(QEMUFile *f, SCSIRequest *sreq)
+{
+ VirtIOSCSIReq *req = sreq->hba_private;
+ uint32_t n = virtio_queue_get_id(req->vq) - 2;
+
+ assert(n < req->dev->conf.num_queues);
+ qemu_put_be32s(f, &n);
+ qemu_put_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem));
+}
+
+static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq)
+{
+ SCSIBus *bus = sreq->bus;
+ VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
+ VirtIOSCSIReq *req;
+ uint32_t n;
+
+ req = g_malloc(sizeof(*req));
+ qemu_get_be32s(f, &n);
+ assert(n < s->conf.num_queues);
+ qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem));
+ virtio_scsi_parse_req(s, s->cmd_vqs[n], req);
+
+ scsi_req_ref(sreq);
+ req->sreq = sreq;
+ if (req->sreq->cmd.mode != SCSI_XFER_NONE) {
+ int req_mode =
+ (req->elem.in_num > 1 ? SCSI_XFER_FROM_DEV : SCSI_XFER_TO_DEV);
+
+ assert(req->sreq->cmd.mode == req_mode);
+ }
+ return req;
+}
+
+static void virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
+{
+ SCSIDevice *d = virtio_scsi_device_find(s, req->req.tmf->lun);
+ SCSIRequest *r, *next;
+ BusChild *kid;
+ int target;
+
+ /* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE". */
+ req->resp.tmf->response = VIRTIO_SCSI_S_OK;
+
+ switch (req->req.tmf->subtype) {
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK:
+ case VIRTIO_SCSI_T_TMF_QUERY_TASK:
+ if (!d) {
+ goto fail;
+ }
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf->lun)) {
+ goto incorrect_lun;
+ }
+ QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
+ VirtIOSCSIReq *cmd_req = r->hba_private;
+ if (cmd_req && cmd_req->req.cmd->tag == req->req.tmf->tag) {
+ break;
+ }
+ }
+ if (r) {
+ /*
+ * Assert that the request has not been completed yet, we
+ * check for it in the loop above.
+ */
+ assert(r->hba_private);
+ if (req->req.tmf->subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK) {
+ /* "If the specified command is present in the task set, then
+ * return a service response set to FUNCTION SUCCEEDED".
+ */
+ req->resp.tmf->response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
+ } else {
+ scsi_req_cancel(r);
+ }
+ }
+ break;
+
+ case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
+ if (!d) {
+ goto fail;
+ }
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf->lun)) {
+ goto incorrect_lun;
+ }
+ s->resetting++;
+ qdev_reset_all(&d->qdev);
+ s->resetting--;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
+ case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
+ case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET:
+ if (!d) {
+ goto fail;
+ }
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf->lun)) {
+ goto incorrect_lun;
+ }
+ QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
+ if (r->hba_private) {
+ if (req->req.tmf->subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK_SET) {
+ /* "If there is any command present in the task set, then
+ * return a service response set to FUNCTION SUCCEEDED".
+ */
+ req->resp.tmf->response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
+ break;
+ } else {
+ scsi_req_cancel(r);
+ }
+ }
+ }
+ break;
+
+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
+ target = req->req.tmf->lun[1];
+ s->resetting++;
+ QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
+ d = DO_UPCAST(SCSIDevice, qdev, kid->child);
+ if (d->channel == 0 && d->id == target) {
+ qdev_reset_all(&d->qdev);
+ }
+ }
+ s->resetting--;
+ break;
+
+ case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
+ default:
+ req->resp.tmf->response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
+ break;
+ }
+
+ return;
+
+incorrect_lun:
+ req->resp.tmf->response = VIRTIO_SCSI_S_INCORRECT_LUN;
+ return;
+
+fail:
+ req->resp.tmf->response = VIRTIO_SCSI_S_BAD_TARGET;
+}
+
+static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOSCSI *s = (VirtIOSCSI *)vdev;
+ VirtIOSCSIReq *req;
+
+ while ((req = virtio_scsi_pop_req(s, vq))) {
+ int out_size, in_size;
+ if (req->elem.out_num < 1 || req->elem.in_num < 1) {
+ virtio_scsi_bad_req();
+ continue;
+ }
+
+ out_size = req->elem.out_sg[0].iov_len;
+ in_size = req->elem.in_sg[0].iov_len;
+ if (req->req.tmf->type == VIRTIO_SCSI_T_TMF) {
+ if (out_size < sizeof(VirtIOSCSICtrlTMFReq) ||
+ in_size < sizeof(VirtIOSCSICtrlTMFResp)) {
+ virtio_scsi_bad_req();
+ }
+ virtio_scsi_do_tmf(s, req);
+
+ } else if (req->req.tmf->type == VIRTIO_SCSI_T_AN_QUERY ||
+ req->req.tmf->type == VIRTIO_SCSI_T_AN_SUBSCRIBE) {
+ if (out_size < sizeof(VirtIOSCSICtrlANReq) ||
+ in_size < sizeof(VirtIOSCSICtrlANResp)) {
+ virtio_scsi_bad_req();
+ }
+ req->resp.an->event_actual = 0;
+ req->resp.an->response = VIRTIO_SCSI_S_OK;
+ }
+ virtio_scsi_complete_req(req);
+ }
+}
+
+static void virtio_scsi_command_complete(SCSIRequest *r, uint32_t status,
+ size_t resid)
+{
+ VirtIOSCSIReq *req = r->hba_private;
+ uint32_t sense_len;
+
+ req->resp.cmd->response = VIRTIO_SCSI_S_OK;
+ req->resp.cmd->status = status;
+ if (req->resp.cmd->status == GOOD) {
+ req->resp.cmd->resid = tswap32(resid);
+ } else {
+ req->resp.cmd->resid = 0;
+ sense_len = scsi_req_get_sense(r, req->resp.cmd->sense,
+ VIRTIO_SCSI_SENSE_SIZE);
+ req->resp.cmd->sense_len = tswap32(sense_len);
+ }
+ virtio_scsi_complete_req(req);
+}
+
+static QEMUSGList *virtio_scsi_get_sg_list(SCSIRequest *r)
+{
+ VirtIOSCSIReq *req = r->hba_private;
+
+ return &req->qsgl;
+}
+
+static void virtio_scsi_request_cancelled(SCSIRequest *r)
+{
+ VirtIOSCSIReq *req = r->hba_private;
+
+ if (!req) {
+ return;
+ }
+ if (req->dev->resetting) {
+ req->resp.cmd->response = VIRTIO_SCSI_S_RESET;
+ } else {
+ req->resp.cmd->response = VIRTIO_SCSI_S_ABORTED;
+ }
+ virtio_scsi_complete_req(req);
+}
+
+static void virtio_scsi_fail_cmd_req(VirtIOSCSIReq *req)
+{
+ req->resp.cmd->response = VIRTIO_SCSI_S_FAILURE;
+ virtio_scsi_complete_req(req);
+}
+
+static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOSCSI *s = (VirtIOSCSI *)vdev;
+ VirtIOSCSIReq *req;
+ int n;
+
+ while ((req = virtio_scsi_pop_req(s, vq))) {
+ SCSIDevice *d;
+ int out_size, in_size;
+ if (req->elem.out_num < 1 || req->elem.in_num < 1) {
+ virtio_scsi_bad_req();
+ }
+
+ out_size = req->elem.out_sg[0].iov_len;
+ in_size = req->elem.in_sg[0].iov_len;
+ if (out_size < sizeof(VirtIOSCSICmdReq) + s->cdb_size ||
+ in_size < sizeof(VirtIOSCSICmdResp) + s->sense_size) {
+ virtio_scsi_bad_req();
+ }
+
+ if (req->elem.out_num > 1 && req->elem.in_num > 1) {
+ virtio_scsi_fail_cmd_req(req);
+ continue;
+ }
+
+ d = virtio_scsi_device_find(s, req->req.cmd->lun);
+ if (!d) {
+ req->resp.cmd->response = VIRTIO_SCSI_S_BAD_TARGET;
+ virtio_scsi_complete_req(req);
+ continue;
+ }
+ req->sreq = scsi_req_new(d, req->req.cmd->tag,
+ virtio_scsi_get_lun(req->req.cmd->lun),
+ req->req.cmd->cdb, req);
+
+ if (req->sreq->cmd.mode != SCSI_XFER_NONE) {
+ int req_mode =
+ (req->elem.in_num > 1 ? SCSI_XFER_FROM_DEV : SCSI_XFER_TO_DEV);
+
+ if (req->sreq->cmd.mode != req_mode ||
+ req->sreq->cmd.xfer > req->qsgl.size) {
+ req->resp.cmd->response = VIRTIO_SCSI_S_OVERRUN;
+ virtio_scsi_complete_req(req);
+ continue;
+ }
+ }
+
+ n = scsi_req_enqueue(req->sreq);
+ if (n) {
+ scsi_req_continue(req->sreq);
+ }
+ }
+}
+
+static void virtio_scsi_get_config(VirtIODevice *vdev,
+ uint8_t *config)
+{
+ VirtIOSCSIConfig *scsiconf = (VirtIOSCSIConfig *)config;
+ VirtIOSCSI *s = (VirtIOSCSI *)vdev;
+
+ stl_raw(&scsiconf->num_queues, s->conf.num_queues);
+ stl_raw(&scsiconf->seg_max, 128 - 2);
+ stl_raw(&scsiconf->max_sectors, s->conf.max_sectors);
+ stl_raw(&scsiconf->cmd_per_lun, s->conf.cmd_per_lun);
+ stl_raw(&scsiconf->event_info_size, sizeof(VirtIOSCSIEvent));
+ stl_raw(&scsiconf->sense_size, s->sense_size);
+ stl_raw(&scsiconf->cdb_size, s->cdb_size);
+ stw_raw(&scsiconf->max_channel, VIRTIO_SCSI_MAX_CHANNEL);
+ stw_raw(&scsiconf->max_target, VIRTIO_SCSI_MAX_TARGET);
+ stl_raw(&scsiconf->max_lun, VIRTIO_SCSI_MAX_LUN);
+}
+
+static void virtio_scsi_set_config(VirtIODevice *vdev,
+ const uint8_t *config)
+{
+ VirtIOSCSIConfig *scsiconf = (VirtIOSCSIConfig *)config;
+ VirtIOSCSI *s = (VirtIOSCSI *)vdev;
+
+ if ((uint32_t) ldl_raw(&scsiconf->sense_size) >= 65536 ||
+ (uint32_t) ldl_raw(&scsiconf->cdb_size) >= 256) {
+ error_report("bad data written to virtio-scsi configuration space");
+ exit(1);
+ }
+
+ s->sense_size = ldl_raw(&scsiconf->sense_size);
+ s->cdb_size = ldl_raw(&scsiconf->cdb_size);
+}
+
+static uint32_t virtio_scsi_get_features(VirtIODevice *vdev,
+ uint32_t requested_features)
+{
+ return requested_features;
+}
+
+static void virtio_scsi_reset(VirtIODevice *vdev)
+{
+ VirtIOSCSI *s = (VirtIOSCSI *)vdev;
+
+ s->resetting++;
+ qbus_reset_all(&s->bus.qbus);
+ s->resetting--;
+
+ s->sense_size = VIRTIO_SCSI_SENSE_SIZE;
+ s->cdb_size = VIRTIO_SCSI_CDB_SIZE;
+ s->events_dropped = false;
+}
+
+/* The device does not have anything to save beyond the virtio data.
+ * Request data is saved with callbacks from SCSI devices.
+ */
+static void virtio_scsi_save(QEMUFile *f, void *opaque)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+ virtio_save(vdev, f);
+}
+
+static int virtio_scsi_load(QEMUFile *f, void *opaque, int version_id)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+ int ret;
+
+ ret = virtio_load(vdev, f);
+ if (ret) {
+ return ret;
+ }
+ return 0;
+}
+
+static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
+ uint32_t event, uint32_t reason)
+{
+ VirtIOSCSIReq *req = virtio_scsi_pop_req(s, s->event_vq);
+ VirtIOSCSIEvent *evt;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ int in_size;
+
+ if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ return;
+ }
+
+ if (!req) {
+ s->events_dropped = true;
+ return;
+ }
+
+ if (req->elem.out_num || req->elem.in_num != 1) {
+ virtio_scsi_bad_req();
+ }
+
+ if (s->events_dropped) {
+ event |= VIRTIO_SCSI_T_EVENTS_MISSED;
+ s->events_dropped = false;
+ }
+
+ in_size = req->elem.in_sg[0].iov_len;
+ if (in_size < sizeof(VirtIOSCSIEvent)) {
+ virtio_scsi_bad_req();
+ }
+
+ evt = req->resp.event;
+ memset(evt, 0, sizeof(VirtIOSCSIEvent));
+ evt->event = event;
+ evt->reason = reason;
+ if (!dev) {
+ assert(event == VIRTIO_SCSI_T_NO_EVENT);
+ } else {
+ evt->lun[0] = 1;
+ evt->lun[1] = dev->id;
+
+ /* Linux wants us to keep the same encoding we use for REPORT LUNS. */
+ if (dev->lun >= 256) {
+ evt->lun[2] = (dev->lun >> 8) | 0x40;
+ }
+ evt->lun[3] = dev->lun & 0xFF;
+ }
+ virtio_scsi_complete_req(req);
+}
+
+static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOSCSI *s = VIRTIO_SCSI(vdev);
+
+ if (s->events_dropped) {
+ virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
+ }
+}
+
+static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense)
+{
+ VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+ if (((vdev->guest_features >> VIRTIO_SCSI_F_CHANGE) & 1) &&
+ dev->type != TYPE_ROM) {
+ virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE,
+ sense.asc | (sense.ascq << 8));
+ }
+}
+
+static void virtio_scsi_hotplug(SCSIBus *bus, SCSIDevice *dev)
+{
+ VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+ if ((vdev->guest_features >> VIRTIO_SCSI_F_HOTPLUG) & 1) {
+ virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_TRANSPORT_RESET,
+ VIRTIO_SCSI_EVT_RESET_RESCAN);
+ }
+}
+
+static void virtio_scsi_hot_unplug(SCSIBus *bus, SCSIDevice *dev)
+{
+ VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+ if ((vdev->guest_features >> VIRTIO_SCSI_F_HOTPLUG) & 1) {
+ virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_TRANSPORT_RESET,
+ VIRTIO_SCSI_EVT_RESET_REMOVED);
+ }
+}
+
+static struct SCSIBusInfo virtio_scsi_scsi_info = {
+ .tcq = true,
+ .max_channel = VIRTIO_SCSI_MAX_CHANNEL,
+ .max_target = VIRTIO_SCSI_MAX_TARGET,
+ .max_lun = VIRTIO_SCSI_MAX_LUN,
+
+ .complete = virtio_scsi_command_complete,
+ .cancel = virtio_scsi_request_cancelled,
+ .change = virtio_scsi_change,
+ .hotplug = virtio_scsi_hotplug,
+ .hot_unplug = virtio_scsi_hot_unplug,
+ .get_sg_list = virtio_scsi_get_sg_list,
+ .save_request = virtio_scsi_save_request,
+ .load_request = virtio_scsi_load_request,
+};
+
+static int virtio_scsi_device_init(VirtIODevice *vdev)
+{
+ DeviceState *qdev = DEVICE(vdev);
+ VirtIOSCSI *s = VIRTIO_SCSI(vdev);
+ static int virtio_scsi_id;
+ int i;
+
+ virtio_init(VIRTIO_DEVICE(s), "virtio-scsi", VIRTIO_ID_SCSI,
+ sizeof(VirtIOSCSIConfig));
+
+ s->cmd_vqs = g_malloc0(s->conf.num_queues * sizeof(VirtQueue *));
+
+ /* TODO set up vdev function pointers */
+ vdev->get_config = virtio_scsi_get_config;
+ vdev->set_config = virtio_scsi_set_config;
+ vdev->get_features = virtio_scsi_get_features;
+ vdev->reset = virtio_scsi_reset;
+
+ s->ctrl_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE,
+ virtio_scsi_handle_ctrl);
+ s->event_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE,
+ virtio_scsi_handle_event);
+ for (i = 0; i < s->conf.num_queues; i++) {
+ s->cmd_vqs[i] = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE,
+ virtio_scsi_handle_cmd);
+ }
+
+ scsi_bus_new(&s->bus, qdev, &virtio_scsi_scsi_info);
+ if (!qdev->hotplugged) {
+ scsi_bus_legacy_handle_cmdline(&s->bus);
+ }
+
+ register_savevm(qdev, "virtio-scsi", virtio_scsi_id++, 1,
+ virtio_scsi_save, virtio_scsi_load, s);
+
+ return 0;
+}
+
+static int virtio_scsi_device_exit(DeviceState *qdev)
+{
+ VirtIOSCSI *s = VIRTIO_SCSI(qdev);
+ VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
+
+ unregister_savevm(qdev, "virtio-scsi", s);
+ g_free(s->cmd_vqs);
+ virtio_common_cleanup(vdev);
+ return 0;
+}
+
+static Property virtio_scsi_properties[] = {
+ DEFINE_VIRTIO_SCSI_PROPERTIES(VirtIOSCSI, conf),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_scsi_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+ dc->exit = virtio_scsi_device_exit;
+ dc->props = virtio_scsi_properties;
+ vdc->init = virtio_scsi_device_init;
+ vdc->get_config = virtio_scsi_get_config;
+ vdc->set_config = virtio_scsi_set_config;
+ vdc->get_features = virtio_scsi_get_features;
+ vdc->reset = virtio_scsi_reset;
+}
+
+static const TypeInfo virtio_scsi_info = {
+ .name = TYPE_VIRTIO_SCSI,
+ .parent = TYPE_VIRTIO_DEVICE,
+ .instance_size = sizeof(VirtIOSCSI),
+ .class_init = virtio_scsi_class_init,
+};
+
+static void virtio_register_types(void)
+{
+ type_register_static(&virtio_scsi_info);
+}
+
+type_init(virtio_register_types)
+++ /dev/null
-/*
- * vhost support
- *
- * Copyright Red Hat, Inc. 2010
- *
- * Authors:
- * Michael S. Tsirkin <mst@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include <sys/ioctl.h>
-#include "hw/virtio/vhost.h"
-#include "hw/hw.h"
-#include "qemu/range.h"
-#include <linux/vhost.h>
-#include "exec/address-spaces.h"
-
-static void vhost_dev_sync_region(struct vhost_dev *dev,
- MemoryRegionSection *section,
- uint64_t mfirst, uint64_t mlast,
- uint64_t rfirst, uint64_t rlast)
-{
- uint64_t start = MAX(mfirst, rfirst);
- uint64_t end = MIN(mlast, rlast);
- vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK;
- vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
- uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
-
- if (end < start) {
- return;
- }
- assert(end / VHOST_LOG_CHUNK < dev->log_size);
- assert(start / VHOST_LOG_CHUNK < dev->log_size);
-
- for (;from < to; ++from) {
- vhost_log_chunk_t log;
- int bit;
- /* We first check with non-atomic: much cheaper,
- * and we expect non-dirty to be the common case. */
- if (!*from) {
- addr += VHOST_LOG_CHUNK;
- continue;
- }
- /* Data must be read atomically. We don't really
- * need the barrier semantics of __sync
- * builtins, but it's easier to use them than
- * roll our own. */
- log = __sync_fetch_and_and(from, 0);
- while ((bit = sizeof(log) > sizeof(int) ?
- ffsll(log) : ffs(log))) {
- hwaddr page_addr;
- hwaddr section_offset;
- hwaddr mr_offset;
- bit -= 1;
- page_addr = addr + bit * VHOST_LOG_PAGE;
- section_offset = page_addr - section->offset_within_address_space;
- mr_offset = section_offset + section->offset_within_region;
- memory_region_set_dirty(section->mr, mr_offset, VHOST_LOG_PAGE);
- log &= ~(0x1ull << bit);
- }
- addr += VHOST_LOG_CHUNK;
- }
-}
-
-static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
- MemoryRegionSection *section,
- hwaddr first,
- hwaddr last)
-{
- int i;
- hwaddr start_addr;
- hwaddr end_addr;
-
- if (!dev->log_enabled || !dev->started) {
- return 0;
- }
- start_addr = section->offset_within_address_space;
- end_addr = range_get_last(start_addr, section->size);
- start_addr = MAX(first, start_addr);
- end_addr = MIN(last, end_addr);
-
- for (i = 0; i < dev->mem->nregions; ++i) {
- struct vhost_memory_region *reg = dev->mem->regions + i;
- vhost_dev_sync_region(dev, section, start_addr, end_addr,
- reg->guest_phys_addr,
- range_get_last(reg->guest_phys_addr,
- reg->memory_size));
- }
- for (i = 0; i < dev->nvqs; ++i) {
- struct vhost_virtqueue *vq = dev->vqs + i;
- vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
- range_get_last(vq->used_phys, vq->used_size));
- }
- return 0;
-}
-
-static void vhost_log_sync(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- struct vhost_dev *dev = container_of(listener, struct vhost_dev,
- memory_listener);
- vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL);
-}
-
-static void vhost_log_sync_range(struct vhost_dev *dev,
- hwaddr first, hwaddr last)
-{
- int i;
- /* FIXME: this is N^2 in number of sections */
- for (i = 0; i < dev->n_mem_sections; ++i) {
- MemoryRegionSection *section = &dev->mem_sections[i];
- vhost_sync_dirty_bitmap(dev, section, first, last);
- }
-}
-
-/* Assign/unassign. Keep an unsorted array of non-overlapping
- * memory regions in dev->mem. */
-static void vhost_dev_unassign_memory(struct vhost_dev *dev,
- uint64_t start_addr,
- uint64_t size)
-{
- int from, to, n = dev->mem->nregions;
- /* Track overlapping/split regions for sanity checking. */
- int overlap_start = 0, overlap_end = 0, overlap_middle = 0, split = 0;
-
- for (from = 0, to = 0; from < n; ++from, ++to) {
- struct vhost_memory_region *reg = dev->mem->regions + to;
- uint64_t reglast;
- uint64_t memlast;
- uint64_t change;
-
- /* clone old region */
- if (to != from) {
- memcpy(reg, dev->mem->regions + from, sizeof *reg);
- }
-
- /* No overlap is simple */
- if (!ranges_overlap(reg->guest_phys_addr, reg->memory_size,
- start_addr, size)) {
- continue;
- }
-
- /* Split only happens if supplied region
- * is in the middle of an existing one. Thus it can not
- * overlap with any other existing region. */
- assert(!split);
-
- reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
- memlast = range_get_last(start_addr, size);
-
- /* Remove whole region */
- if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
- --dev->mem->nregions;
- --to;
- ++overlap_middle;
- continue;
- }
-
- /* Shrink region */
- if (memlast >= reglast) {
- reg->memory_size = start_addr - reg->guest_phys_addr;
- assert(reg->memory_size);
- assert(!overlap_end);
- ++overlap_end;
- continue;
- }
-
- /* Shift region */
- if (start_addr <= reg->guest_phys_addr) {
- change = memlast + 1 - reg->guest_phys_addr;
- reg->memory_size -= change;
- reg->guest_phys_addr += change;
- reg->userspace_addr += change;
- assert(reg->memory_size);
- assert(!overlap_start);
- ++overlap_start;
- continue;
- }
-
- /* This only happens if supplied region
- * is in the middle of an existing one. Thus it can not
- * overlap with any other existing region. */
- assert(!overlap_start);
- assert(!overlap_end);
- assert(!overlap_middle);
- /* Split region: shrink first part, shift second part. */
- memcpy(dev->mem->regions + n, reg, sizeof *reg);
- reg->memory_size = start_addr - reg->guest_phys_addr;
- assert(reg->memory_size);
- change = memlast + 1 - reg->guest_phys_addr;
- reg = dev->mem->regions + n;
- reg->memory_size -= change;
- assert(reg->memory_size);
- reg->guest_phys_addr += change;
- reg->userspace_addr += change;
- /* Never add more than 1 region */
- assert(dev->mem->nregions == n);
- ++dev->mem->nregions;
- ++split;
- }
-}
-
-/* Called after unassign, so no regions overlap the given range. */
-static void vhost_dev_assign_memory(struct vhost_dev *dev,
- uint64_t start_addr,
- uint64_t size,
- uint64_t uaddr)
-{
- int from, to;
- struct vhost_memory_region *merged = NULL;
- for (from = 0, to = 0; from < dev->mem->nregions; ++from, ++to) {
- struct vhost_memory_region *reg = dev->mem->regions + to;
- uint64_t prlast, urlast;
- uint64_t pmlast, umlast;
- uint64_t s, e, u;
-
- /* clone old region */
- if (to != from) {
- memcpy(reg, dev->mem->regions + from, sizeof *reg);
- }
- prlast = range_get_last(reg->guest_phys_addr, reg->memory_size);
- pmlast = range_get_last(start_addr, size);
- urlast = range_get_last(reg->userspace_addr, reg->memory_size);
- umlast = range_get_last(uaddr, size);
-
- /* check for overlapping regions: should never happen. */
- assert(prlast < start_addr || pmlast < reg->guest_phys_addr);
- /* Not an adjacent or overlapping region - do not merge. */
- if ((prlast + 1 != start_addr || urlast + 1 != uaddr) &&
- (pmlast + 1 != reg->guest_phys_addr ||
- umlast + 1 != reg->userspace_addr)) {
- continue;
- }
-
- if (merged) {
- --to;
- assert(to >= 0);
- } else {
- merged = reg;
- }
- u = MIN(uaddr, reg->userspace_addr);
- s = MIN(start_addr, reg->guest_phys_addr);
- e = MAX(pmlast, prlast);
- uaddr = merged->userspace_addr = u;
- start_addr = merged->guest_phys_addr = s;
- size = merged->memory_size = e - s + 1;
- assert(merged->memory_size);
- }
-
- if (!merged) {
- struct vhost_memory_region *reg = dev->mem->regions + to;
- memset(reg, 0, sizeof *reg);
- reg->memory_size = size;
- assert(reg->memory_size);
- reg->guest_phys_addr = start_addr;
- reg->userspace_addr = uaddr;
- ++to;
- }
- assert(to <= dev->mem->nregions + 1);
- dev->mem->nregions = to;
-}
-
-static uint64_t vhost_get_log_size(struct vhost_dev *dev)
-{
- uint64_t log_size = 0;
- int i;
- for (i = 0; i < dev->mem->nregions; ++i) {
- struct vhost_memory_region *reg = dev->mem->regions + i;
- uint64_t last = range_get_last(reg->guest_phys_addr,
- reg->memory_size);
- log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
- }
- for (i = 0; i < dev->nvqs; ++i) {
- struct vhost_virtqueue *vq = dev->vqs + i;
- uint64_t last = vq->used_phys + vq->used_size - 1;
- log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
- }
- return log_size;
-}
-
-static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
-{
- vhost_log_chunk_t *log;
- uint64_t log_base;
- int r;
-
- log = g_malloc0(size * sizeof *log);
- log_base = (uint64_t)(unsigned long)log;
- r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
- assert(r >= 0);
- /* Sync only the range covered by the old log */
- if (dev->log_size) {
- vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
- }
- if (dev->log) {
- g_free(dev->log);
- }
- dev->log = log;
- dev->log_size = size;
-}
-
-static int vhost_verify_ring_mappings(struct vhost_dev *dev,
- uint64_t start_addr,
- uint64_t size)
-{
- int i;
- for (i = 0; i < dev->nvqs; ++i) {
- struct vhost_virtqueue *vq = dev->vqs + i;
- hwaddr l;
- void *p;
-
- if (!ranges_overlap(start_addr, size, vq->ring_phys, vq->ring_size)) {
- continue;
- }
- l = vq->ring_size;
- p = cpu_physical_memory_map(vq->ring_phys, &l, 1);
- if (!p || l != vq->ring_size) {
- fprintf(stderr, "Unable to map ring buffer for ring %d\n", i);
- return -ENOMEM;
- }
- if (p != vq->ring) {
- fprintf(stderr, "Ring buffer relocated for ring %d\n", i);
- return -EBUSY;
- }
- cpu_physical_memory_unmap(p, l, 0, 0);
- }
- return 0;
-}
-
-static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev,
- uint64_t start_addr,
- uint64_t size)
-{
- int i, n = dev->mem->nregions;
- for (i = 0; i < n; ++i) {
- struct vhost_memory_region *reg = dev->mem->regions + i;
- if (ranges_overlap(reg->guest_phys_addr, reg->memory_size,
- start_addr, size)) {
- return reg;
- }
- }
- return NULL;
-}
-
-static bool vhost_dev_cmp_memory(struct vhost_dev *dev,
- uint64_t start_addr,
- uint64_t size,
- uint64_t uaddr)
-{
- struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size);
- uint64_t reglast;
- uint64_t memlast;
-
- if (!reg) {
- return true;
- }
-
- reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
- memlast = range_get_last(start_addr, size);
-
- /* Need to extend region? */
- if (start_addr < reg->guest_phys_addr || memlast > reglast) {
- return true;
- }
- /* userspace_addr changed? */
- return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr;
-}
-
-static void vhost_set_memory(MemoryListener *listener,
- MemoryRegionSection *section,
- bool add)
-{
- struct vhost_dev *dev = container_of(listener, struct vhost_dev,
- memory_listener);
- hwaddr start_addr = section->offset_within_address_space;
- ram_addr_t size = section->size;
- bool log_dirty = memory_region_is_logging(section->mr);
- int s = offsetof(struct vhost_memory, regions) +
- (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
- uint64_t log_size;
- int r;
- void *ram;
-
- dev->mem = g_realloc(dev->mem, s);
-
- if (log_dirty) {
- add = false;
- }
-
- assert(size);
-
- /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */
- ram = memory_region_get_ram_ptr(section->mr) + section->offset_within_region;
- if (add) {
- if (!vhost_dev_cmp_memory(dev, start_addr, size, (uintptr_t)ram)) {
- /* Region exists with same address. Nothing to do. */
- return;
- }
- } else {
- if (!vhost_dev_find_reg(dev, start_addr, size)) {
- /* Removing region that we don't access. Nothing to do. */
- return;
- }
- }
-
- vhost_dev_unassign_memory(dev, start_addr, size);
- if (add) {
- /* Add given mapping, merging adjacent regions if any */
- vhost_dev_assign_memory(dev, start_addr, size, (uintptr_t)ram);
- } else {
- /* Remove old mapping for this memory, if any. */
- vhost_dev_unassign_memory(dev, start_addr, size);
- }
-
- if (!dev->started) {
- return;
- }
-
- if (dev->started) {
- r = vhost_verify_ring_mappings(dev, start_addr, size);
- assert(r >= 0);
- }
-
- if (!dev->log_enabled) {
- r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
- assert(r >= 0);
- return;
- }
- log_size = vhost_get_log_size(dev);
- /* We allocate an extra 4K bytes to log,
- * to reduce the * number of reallocations. */
-#define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
- /* To log more, must increase log size before table update. */
- if (dev->log_size < log_size) {
- vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
- }
- r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
- assert(r >= 0);
- /* To log less, can only decrease log size after table update. */
- if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
- vhost_dev_log_resize(dev, log_size);
- }
-}
-
-static bool vhost_section(MemoryRegionSection *section)
-{
- return memory_region_is_ram(section->mr);
-}
-
-static void vhost_begin(MemoryListener *listener)
-{
-}
-
-static void vhost_commit(MemoryListener *listener)
-{
-}
-
-static void vhost_region_add(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- struct vhost_dev *dev = container_of(listener, struct vhost_dev,
- memory_listener);
-
- if (!vhost_section(section)) {
- return;
- }
-
- ++dev->n_mem_sections;
- dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections,
- dev->n_mem_sections);
- dev->mem_sections[dev->n_mem_sections - 1] = *section;
- vhost_set_memory(listener, section, true);
-}
-
-static void vhost_region_del(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- struct vhost_dev *dev = container_of(listener, struct vhost_dev,
- memory_listener);
- int i;
-
- if (!vhost_section(section)) {
- return;
- }
-
- vhost_set_memory(listener, section, false);
- for (i = 0; i < dev->n_mem_sections; ++i) {
- if (dev->mem_sections[i].offset_within_address_space
- == section->offset_within_address_space) {
- --dev->n_mem_sections;
- memmove(&dev->mem_sections[i], &dev->mem_sections[i+1],
- (dev->n_mem_sections - i) * sizeof(*dev->mem_sections));
- break;
- }
- }
-}
-
-static void vhost_region_nop(MemoryListener *listener,
- MemoryRegionSection *section)
-{
-}
-
-static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
- struct vhost_virtqueue *vq,
- unsigned idx, bool enable_log)
-{
- struct vhost_vring_addr addr = {
- .index = idx,
- .desc_user_addr = (uint64_t)(unsigned long)vq->desc,
- .avail_user_addr = (uint64_t)(unsigned long)vq->avail,
- .used_user_addr = (uint64_t)(unsigned long)vq->used,
- .log_guest_addr = vq->used_phys,
- .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
- };
- int r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
- if (r < 0) {
- return -errno;
- }
- return 0;
-}
-
-static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
-{
- uint64_t features = dev->acked_features;
- int r;
- if (enable_log) {
- features |= 0x1 << VHOST_F_LOG_ALL;
- }
- r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
- return r < 0 ? -errno : 0;
-}
-
-static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
-{
- int r, t, i;
- r = vhost_dev_set_features(dev, enable_log);
- if (r < 0) {
- goto err_features;
- }
- for (i = 0; i < dev->nvqs; ++i) {
- r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
- enable_log);
- if (r < 0) {
- goto err_vq;
- }
- }
- return 0;
-err_vq:
- for (; i >= 0; --i) {
- t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
- dev->log_enabled);
- assert(t >= 0);
- }
- t = vhost_dev_set_features(dev, dev->log_enabled);
- assert(t >= 0);
-err_features:
- return r;
-}
-
-static int vhost_migration_log(MemoryListener *listener, int enable)
-{
- struct vhost_dev *dev = container_of(listener, struct vhost_dev,
- memory_listener);
- int r;
- if (!!enable == dev->log_enabled) {
- return 0;
- }
- if (!dev->started) {
- dev->log_enabled = enable;
- return 0;
- }
- if (!enable) {
- r = vhost_dev_set_log(dev, false);
- if (r < 0) {
- return r;
- }
- if (dev->log) {
- g_free(dev->log);
- }
- dev->log = NULL;
- dev->log_size = 0;
- } else {
- vhost_dev_log_resize(dev, vhost_get_log_size(dev));
- r = vhost_dev_set_log(dev, true);
- if (r < 0) {
- return r;
- }
- }
- dev->log_enabled = enable;
- return 0;
-}
-
-static void vhost_log_global_start(MemoryListener *listener)
-{
- int r;
-
- r = vhost_migration_log(listener, true);
- if (r < 0) {
- abort();
- }
-}
-
-static void vhost_log_global_stop(MemoryListener *listener)
-{
- int r;
-
- r = vhost_migration_log(listener, false);
- if (r < 0) {
- abort();
- }
-}
-
-static void vhost_log_start(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- /* FIXME: implement */
-}
-
-static void vhost_log_stop(MemoryListener *listener,
- MemoryRegionSection *section)
-{
- /* FIXME: implement */
-}
-
-static int vhost_virtqueue_start(struct vhost_dev *dev,
- struct VirtIODevice *vdev,
- struct vhost_virtqueue *vq,
- unsigned idx)
-{
- hwaddr s, l, a;
- int r;
- int vhost_vq_index = idx - dev->vq_index;
- struct vhost_vring_file file = {
- .index = vhost_vq_index
- };
- struct vhost_vring_state state = {
- .index = vhost_vq_index
- };
- struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
-
- assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
-
- vq->num = state.num = virtio_queue_get_num(vdev, idx);
- r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
- if (r) {
- return -errno;
- }
-
- state.num = virtio_queue_get_last_avail_idx(vdev, idx);
- r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
- if (r) {
- return -errno;
- }
-
- s = l = virtio_queue_get_desc_size(vdev, idx);
- a = virtio_queue_get_desc_addr(vdev, idx);
- vq->desc = cpu_physical_memory_map(a, &l, 0);
- if (!vq->desc || l != s) {
- r = -ENOMEM;
- goto fail_alloc_desc;
- }
- s = l = virtio_queue_get_avail_size(vdev, idx);
- a = virtio_queue_get_avail_addr(vdev, idx);
- vq->avail = cpu_physical_memory_map(a, &l, 0);
- if (!vq->avail || l != s) {
- r = -ENOMEM;
- goto fail_alloc_avail;
- }
- vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
- vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
- vq->used = cpu_physical_memory_map(a, &l, 1);
- if (!vq->used || l != s) {
- r = -ENOMEM;
- goto fail_alloc_used;
- }
-
- vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx);
- vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx);
- vq->ring = cpu_physical_memory_map(a, &l, 1);
- if (!vq->ring || l != s) {
- r = -ENOMEM;
- goto fail_alloc_ring;
- }
-
- r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
- if (r < 0) {
- r = -errno;
- goto fail_alloc;
- }
-
- file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
- r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
- if (r) {
- r = -errno;
- goto fail_kick;
- }
-
- /* Clear and discard previous events if any. */
- event_notifier_test_and_clear(&vq->masked_notifier);
-
- return 0;
-
-fail_kick:
-fail_alloc:
- cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
- 0, 0);
-fail_alloc_ring:
- cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
- 0, 0);
-fail_alloc_used:
- cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
- 0, 0);
-fail_alloc_avail:
- cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
- 0, 0);
-fail_alloc_desc:
- return r;
-}
-
-static void vhost_virtqueue_stop(struct vhost_dev *dev,
- struct VirtIODevice *vdev,
- struct vhost_virtqueue *vq,
- unsigned idx)
-{
- struct vhost_vring_state state = {
- .index = idx - dev->vq_index
- };
- int r;
- assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
- r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
- if (r < 0) {
- fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
- fflush(stderr);
- }
- virtio_queue_set_last_avail_idx(vdev, idx, state.num);
- assert (r >= 0);
- cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
- 0, virtio_queue_get_ring_size(vdev, idx));
- cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
- 1, virtio_queue_get_used_size(vdev, idx));
- cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
- 0, virtio_queue_get_avail_size(vdev, idx));
- cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
- 0, virtio_queue_get_desc_size(vdev, idx));
-}
-
-static void vhost_eventfd_add(MemoryListener *listener,
- MemoryRegionSection *section,
- bool match_data, uint64_t data, EventNotifier *e)
-{
-}
-
-static void vhost_eventfd_del(MemoryListener *listener,
- MemoryRegionSection *section,
- bool match_data, uint64_t data, EventNotifier *e)
-{
-}
-
-static int vhost_virtqueue_init(struct vhost_dev *dev,
- struct vhost_virtqueue *vq, int n)
-{
- struct vhost_vring_file file = {
- .index = n,
- };
- int r = event_notifier_init(&vq->masked_notifier, 0);
- if (r < 0) {
- return r;
- }
-
- file.fd = event_notifier_get_fd(&vq->masked_notifier);
- r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
- if (r) {
- r = -errno;
- goto fail_call;
- }
- return 0;
-fail_call:
- event_notifier_cleanup(&vq->masked_notifier);
- return r;
-}
-
-static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
-{
- event_notifier_cleanup(&vq->masked_notifier);
-}
-
-int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
- bool force)
-{
- uint64_t features;
- int i, r;
- if (devfd >= 0) {
- hdev->control = devfd;
- } else {
- hdev->control = open(devpath, O_RDWR);
- if (hdev->control < 0) {
- return -errno;
- }
- }
- r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
- if (r < 0) {
- goto fail;
- }
-
- r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
- if (r < 0) {
- goto fail;
- }
-
- for (i = 0; i < hdev->nvqs; ++i) {
- r = vhost_virtqueue_init(hdev, hdev->vqs + i, i);
- if (r < 0) {
- goto fail_vq;
- }
- }
- hdev->features = features;
-
- hdev->memory_listener = (MemoryListener) {
- .begin = vhost_begin,
- .commit = vhost_commit,
- .region_add = vhost_region_add,
- .region_del = vhost_region_del,
- .region_nop = vhost_region_nop,
- .log_start = vhost_log_start,
- .log_stop = vhost_log_stop,
- .log_sync = vhost_log_sync,
- .log_global_start = vhost_log_global_start,
- .log_global_stop = vhost_log_global_stop,
- .eventfd_add = vhost_eventfd_add,
- .eventfd_del = vhost_eventfd_del,
- .priority = 10
- };
- hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
- hdev->n_mem_sections = 0;
- hdev->mem_sections = NULL;
- hdev->log = NULL;
- hdev->log_size = 0;
- hdev->log_enabled = false;
- hdev->started = false;
- memory_listener_register(&hdev->memory_listener, &address_space_memory);
- hdev->force = force;
- return 0;
-fail_vq:
- while (--i >= 0) {
- vhost_virtqueue_cleanup(hdev->vqs + i);
- }
-fail:
- r = -errno;
- close(hdev->control);
- return r;
-}
-
-void vhost_dev_cleanup(struct vhost_dev *hdev)
-{
- int i;
- for (i = 0; i < hdev->nvqs; ++i) {
- vhost_virtqueue_cleanup(hdev->vqs + i);
- }
- memory_listener_unregister(&hdev->memory_listener);
- g_free(hdev->mem);
- g_free(hdev->mem_sections);
- close(hdev->control);
-}
-
-bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
-{
- return !vdev->binding->query_guest_notifiers ||
- vdev->binding->query_guest_notifiers(vdev->binding_opaque) ||
- hdev->force;
-}
-
-/* Stop processing guest IO notifications in qemu.
- * Start processing them in vhost in kernel.
- */
-int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
-{
- int i, r;
- if (!vdev->binding->set_host_notifier) {
- fprintf(stderr, "binding does not support host notifiers\n");
- r = -ENOSYS;
- goto fail;
- }
-
- for (i = 0; i < hdev->nvqs; ++i) {
- r = vdev->binding->set_host_notifier(vdev->binding_opaque,
- hdev->vq_index + i,
- true);
- if (r < 0) {
- fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r);
- goto fail_vq;
- }
- }
-
- return 0;
-fail_vq:
- while (--i >= 0) {
- r = vdev->binding->set_host_notifier(vdev->binding_opaque,
- hdev->vq_index + i,
- false);
- if (r < 0) {
- fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r);
- fflush(stderr);
- }
- assert (r >= 0);
- }
-fail:
- return r;
-}
-
-/* Stop processing guest IO notifications in vhost.
- * Start processing them in qemu.
- * This might actually run the qemu handlers right away,
- * so virtio in qemu must be completely setup when this is called.
- */
-void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
-{
- int i, r;
-
- for (i = 0; i < hdev->nvqs; ++i) {
- r = vdev->binding->set_host_notifier(vdev->binding_opaque,
- hdev->vq_index + i,
- false);
- if (r < 0) {
- fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r);
- fflush(stderr);
- }
- assert (r >= 0);
- }
-}
-
-/* Test and clear event pending status.
- * Should be called after unmask to avoid losing events.
- */
-bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
-{
- struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
- assert(hdev->started);
- assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
- return event_notifier_test_and_clear(&vq->masked_notifier);
-}
-
-/* Mask/unmask events from this vq. */
-void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
- bool mask)
-{
- struct VirtQueue *vvq = virtio_get_queue(vdev, n);
- int r, index = n - hdev->vq_index;
-
- assert(hdev->started);
- assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
-
- struct vhost_vring_file file = {
- .index = index
- };
- if (mask) {
- file.fd = event_notifier_get_fd(&hdev->vqs[index].masked_notifier);
- } else {
- file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
- }
- r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file);
- assert(r >= 0);
-}
-
-/* Host notifiers must be enabled at this point. */
-int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
-{
- int i, r;
-
- hdev->started = true;
-
- r = vhost_dev_set_features(hdev, hdev->log_enabled);
- if (r < 0) {
- goto fail_features;
- }
- r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem);
- if (r < 0) {
- r = -errno;
- goto fail_mem;
- }
- for (i = 0; i < hdev->nvqs; ++i) {
- r = vhost_virtqueue_start(hdev,
- vdev,
- hdev->vqs + i,
- hdev->vq_index + i);
- if (r < 0) {
- goto fail_vq;
- }
- }
-
- if (hdev->log_enabled) {
- hdev->log_size = vhost_get_log_size(hdev);
- hdev->log = hdev->log_size ?
- g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL;
- r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
- (uint64_t)(unsigned long)hdev->log);
- if (r < 0) {
- r = -errno;
- goto fail_log;
- }
- }
-
- return 0;
-fail_log:
-fail_vq:
- while (--i >= 0) {
- vhost_virtqueue_stop(hdev,
- vdev,
- hdev->vqs + i,
- hdev->vq_index + i);
- }
- i = hdev->nvqs;
-fail_mem:
-fail_features:
-
- hdev->started = false;
- return r;
-}
-
-/* Host notifiers must be enabled at this point. */
-void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
-{
- int i;
-
- for (i = 0; i < hdev->nvqs; ++i) {
- vhost_virtqueue_stop(hdev,
- vdev,
- hdev->vqs + i,
- hdev->vq_index + i);
- }
- vhost_log_sync_range(hdev, 0, ~0x0ull);
-
- hdev->started = false;
- g_free(hdev->log);
- hdev->log = NULL;
- hdev->log_size = 0;
-}
-
+++ /dev/null
-/*
- * vhost-net support
- *
- * Copyright Red Hat, Inc. 2010
- *
- * Authors:
- * Michael S. Tsirkin <mst@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "net/net.h"
-#include "net/tap.h"
-
-#include "hw/virtio/virtio-net.h"
-#include "net/vhost_net.h"
-#include "qemu/error-report.h"
-
-#include "config.h"
-
-#ifdef CONFIG_VHOST_NET
-#include <linux/vhost.h>
-#include <sys/socket.h>
-#include <linux/kvm.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <linux/virtio_ring.h>
-#include <netpacket/packet.h>
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <netinet/in.h>
-
-#include <stdio.h>
-
-#include "hw/virtio/vhost.h"
-
-struct vhost_net {
- struct vhost_dev dev;
- struct vhost_virtqueue vqs[2];
- int backend;
- NetClientState *nc;
-};
-
-unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
-{
- /* Clear features not supported by host kernel. */
- if (!(net->dev.features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY))) {
- features &= ~(1 << VIRTIO_F_NOTIFY_ON_EMPTY);
- }
- if (!(net->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC))) {
- features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
- }
- if (!(net->dev.features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
- features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
- }
- if (!(net->dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF))) {
- features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
- }
- return features;
-}
-
-void vhost_net_ack_features(struct vhost_net *net, unsigned features)
-{
- net->dev.acked_features = net->dev.backend_features;
- if (features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) {
- net->dev.acked_features |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY);
- }
- if (features & (1 << VIRTIO_RING_F_INDIRECT_DESC)) {
- net->dev.acked_features |= (1 << VIRTIO_RING_F_INDIRECT_DESC);
- }
- if (features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
- net->dev.acked_features |= (1 << VIRTIO_RING_F_EVENT_IDX);
- }
- if (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
- net->dev.acked_features |= (1 << VIRTIO_NET_F_MRG_RXBUF);
- }
-}
-
-static int vhost_net_get_fd(NetClientState *backend)
-{
- switch (backend->info->type) {
- case NET_CLIENT_OPTIONS_KIND_TAP:
- return tap_get_fd(backend);
- default:
- fprintf(stderr, "vhost-net requires tap backend\n");
- return -EBADFD;
- }
-}
-
-struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
- bool force)
-{
- int r;
- struct vhost_net *net = g_malloc(sizeof *net);
- if (!backend) {
- fprintf(stderr, "vhost-net requires backend to be setup\n");
- goto fail;
- }
- r = vhost_net_get_fd(backend);
- if (r < 0) {
- goto fail;
- }
- net->nc = backend;
- net->dev.backend_features = tap_has_vnet_hdr(backend) ? 0 :
- (1 << VHOST_NET_F_VIRTIO_NET_HDR);
- net->backend = r;
-
- net->dev.nvqs = 2;
- net->dev.vqs = net->vqs;
-
- r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force);
- if (r < 0) {
- goto fail;
- }
- if (!tap_has_vnet_hdr_len(backend,
- sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
- net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
- }
- if (~net->dev.features & net->dev.backend_features) {
- fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n",
- (uint64_t)(~net->dev.features & net->dev.backend_features));
- vhost_dev_cleanup(&net->dev);
- goto fail;
- }
-
- /* Set sane init value. Override when guest acks. */
- vhost_net_ack_features(net, 0);
- return net;
-fail:
- g_free(net);
- return NULL;
-}
-
-bool vhost_net_query(VHostNetState *net, VirtIODevice *dev)
-{
- return vhost_dev_query(&net->dev, dev);
-}
-
-static int vhost_net_start_one(struct vhost_net *net,
- VirtIODevice *dev,
- int vq_index)
-{
- struct vhost_vring_file file = { };
- int r;
-
- if (net->dev.started) {
- return 0;
- }
-
- net->dev.nvqs = 2;
- net->dev.vqs = net->vqs;
- net->dev.vq_index = vq_index;
-
- r = vhost_dev_enable_notifiers(&net->dev, dev);
- if (r < 0) {
- goto fail_notifiers;
- }
-
- r = vhost_dev_start(&net->dev, dev);
- if (r < 0) {
- goto fail_start;
- }
-
- net->nc->info->poll(net->nc, false);
- qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
- file.fd = net->backend;
- for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
- r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
- if (r < 0) {
- r = -errno;
- goto fail;
- }
- }
- return 0;
-fail:
- file.fd = -1;
- while (file.index-- > 0) {
- int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
- assert(r >= 0);
- }
- net->nc->info->poll(net->nc, true);
- vhost_dev_stop(&net->dev, dev);
-fail_start:
- vhost_dev_disable_notifiers(&net->dev, dev);
-fail_notifiers:
- return r;
-}
-
-static void vhost_net_stop_one(struct vhost_net *net,
- VirtIODevice *dev)
-{
- struct vhost_vring_file file = { .fd = -1 };
-
- if (!net->dev.started) {
- return;
- }
-
- for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
- int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
- assert(r >= 0);
- }
- net->nc->info->poll(net->nc, true);
- vhost_dev_stop(&net->dev, dev);
- vhost_dev_disable_notifiers(&net->dev, dev);
-}
-
-int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
- int total_queues)
-{
- int r, i = 0;
-
- if (!dev->binding->set_guest_notifiers) {
- error_report("binding does not support guest notifiers");
- r = -ENOSYS;
- goto err;
- }
-
- for (i = 0; i < total_queues; i++) {
- r = vhost_net_start_one(tap_get_vhost_net(ncs[i].peer), dev, i * 2);
-
- if (r < 0) {
- goto err;
- }
- }
-
- r = dev->binding->set_guest_notifiers(dev->binding_opaque,
- total_queues * 2,
- true);
- if (r < 0) {
- error_report("Error binding guest notifier: %d", -r);
- goto err;
- }
-
- return 0;
-
-err:
- while (--i >= 0) {
- vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
- }
- return r;
-}
-
-void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
- int total_queues)
-{
- int i, r;
-
- r = dev->binding->set_guest_notifiers(dev->binding_opaque,
- total_queues * 2,
- false);
- if (r < 0) {
- fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
- fflush(stderr);
- }
- assert(r >= 0);
-
- for (i = 0; i < total_queues; i++) {
- vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
- }
-}
-
-void vhost_net_cleanup(struct vhost_net *net)
-{
- vhost_dev_cleanup(&net->dev);
- g_free(net);
-}
-
-bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
-{
- return vhost_virtqueue_pending(&net->dev, idx);
-}
-
-void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
- int idx, bool mask)
-{
- vhost_virtqueue_mask(&net->dev, dev, idx, mask);
-}
-#else
-struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
- bool force)
-{
- error_report("vhost-net support is not compiled in");
- return NULL;
-}
-
-bool vhost_net_query(VHostNetState *net, VirtIODevice *dev)
-{
- return false;
-}
-
-int vhost_net_start(VirtIODevice *dev,
- NetClientState *ncs,
- int total_queues)
-{
- return -ENOSYS;
-}
-void vhost_net_stop(VirtIODevice *dev,
- NetClientState *ncs,
- int total_queues)
-{
-}
-
-void vhost_net_cleanup(struct vhost_net *net)
-{
-}
-
-unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
-{
- return features;
-}
-void vhost_net_ack_features(struct vhost_net *net, unsigned features)
-{
-}
-
-bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
-{
- return -ENOSYS;
-}
-
-void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
- int idx, bool mask)
-{
-}
-#endif
+++ /dev/null
-/*
- * Virtio Balloon Device
- *
- * Copyright IBM, Corp. 2008
- * Copyright (C) 2011 Red Hat, Inc.
- * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/iov.h"
-#include "qemu/timer.h"
-#include "qemu-common.h"
-#include "hw/virtio/virtio.h"
-#include "hw/i386/pc.h"
-#include "cpu.h"
-#include "sysemu/balloon.h"
-#include "hw/virtio/virtio-balloon.h"
-#include "sysemu/kvm.h"
-#include "exec/address-spaces.h"
-#include "qapi/visitor.h"
-
-#if defined(__linux__)
-#include <sys/mman.h>
-#endif
-
-#include "hw/virtio/virtio-bus.h"
-
-static void balloon_page(void *addr, int deflate)
-{
-#if defined(__linux__)
- if (!kvm_enabled() || kvm_has_sync_mmu())
- qemu_madvise(addr, TARGET_PAGE_SIZE,
- deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
-#endif
-}
-
-static const char *balloon_stat_names[] = {
- [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
- [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
- [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults",
- [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults",
- [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory",
- [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory",
- [VIRTIO_BALLOON_S_NR] = NULL
-};
-
-/*
- * reset_stats - Mark all items in the stats array as unset
- *
- * This function needs to be called at device intialization and before
- * before updating to a set of newly-generated stats. This will ensure that no
- * stale values stick around in case the guest reports a subset of the supported
- * statistics.
- */
-static inline void reset_stats(VirtIOBalloon *dev)
-{
- int i;
- for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
-}
-
-static bool balloon_stats_supported(const VirtIOBalloon *s)
-{
- VirtIODevice *vdev = VIRTIO_DEVICE(s);
- return vdev->guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ);
-}
-
-static bool balloon_stats_enabled(const VirtIOBalloon *s)
-{
- return s->stats_poll_interval > 0;
-}
-
-static void balloon_stats_destroy_timer(VirtIOBalloon *s)
-{
- if (balloon_stats_enabled(s)) {
- qemu_del_timer(s->stats_timer);
- qemu_free_timer(s->stats_timer);
- s->stats_timer = NULL;
- s->stats_poll_interval = 0;
- }
-}
-
-static void balloon_stats_change_timer(VirtIOBalloon *s, int secs)
-{
- qemu_mod_timer(s->stats_timer, qemu_get_clock_ms(vm_clock) + secs * 1000);
-}
-
-static void balloon_stats_poll_cb(void *opaque)
-{
- VirtIOBalloon *s = opaque;
- VirtIODevice *vdev = VIRTIO_DEVICE(s);
-
- if (!balloon_stats_supported(s)) {
- /* re-schedule */
- balloon_stats_change_timer(s, s->stats_poll_interval);
- return;
- }
-
- virtqueue_push(s->svq, &s->stats_vq_elem, s->stats_vq_offset);
- virtio_notify(vdev, s->svq);
-}
-
-static void balloon_stats_get_all(Object *obj, struct Visitor *v,
- void *opaque, const char *name, Error **errp)
-{
- VirtIOBalloon *s = opaque;
- int i;
-
- if (!s->stats_last_update) {
- error_setg(errp, "guest hasn't updated any stats yet");
- return;
- }
-
- visit_start_struct(v, NULL, "guest-stats", name, 0, errp);
- visit_type_int(v, &s->stats_last_update, "last-update", errp);
-
- visit_start_struct(v, NULL, NULL, "stats", 0, errp);
- for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) {
- visit_type_int64(v, (int64_t *) &s->stats[i], balloon_stat_names[i],
- errp);
- }
- visit_end_struct(v, errp);
-
- visit_end_struct(v, errp);
-}
-
-static void balloon_stats_get_poll_interval(Object *obj, struct Visitor *v,
- void *opaque, const char *name,
- Error **errp)
-{
- VirtIOBalloon *s = opaque;
- visit_type_int(v, &s->stats_poll_interval, name, errp);
-}
-
-static void balloon_stats_set_poll_interval(Object *obj, struct Visitor *v,
- void *opaque, const char *name,
- Error **errp)
-{
- VirtIOBalloon *s = opaque;
- int64_t value;
-
- visit_type_int(v, &value, name, errp);
- if (error_is_set(errp)) {
- return;
- }
-
- if (value < 0) {
- error_setg(errp, "timer value must be greater than zero");
- return;
- }
-
- if (value == s->stats_poll_interval) {
- return;
- }
-
- if (value == 0) {
- /* timer=0 disables the timer */
- balloon_stats_destroy_timer(s);
- return;
- }
-
- if (balloon_stats_enabled(s)) {
- /* timer interval change */
- s->stats_poll_interval = value;
- balloon_stats_change_timer(s, value);
- return;
- }
-
- /* create a new timer */
- g_assert(s->stats_timer == NULL);
- s->stats_timer = qemu_new_timer_ms(vm_clock, balloon_stats_poll_cb, s);
- s->stats_poll_interval = value;
- balloon_stats_change_timer(s, 0);
-}
-
-static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
-{
- VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
- VirtQueueElement elem;
- MemoryRegionSection section;
-
- while (virtqueue_pop(vq, &elem)) {
- size_t offset = 0;
- uint32_t pfn;
-
- while (iov_to_buf(elem.out_sg, elem.out_num, offset, &pfn, 4) == 4) {
- ram_addr_t pa;
- ram_addr_t addr;
-
- pa = (ram_addr_t)ldl_p(&pfn) << VIRTIO_BALLOON_PFN_SHIFT;
- offset += 4;
-
- /* FIXME: remove get_system_memory(), but how? */
- section = memory_region_find(get_system_memory(), pa, 1);
- if (!section.size || !memory_region_is_ram(section.mr))
- continue;
-
- /* Using memory_region_get_ram_ptr is bending the rules a bit, but
- should be OK because we only want a single page. */
- addr = section.offset_within_region;
- balloon_page(memory_region_get_ram_ptr(section.mr) + addr,
- !!(vq == s->dvq));
- }
-
- virtqueue_push(vq, &elem, offset);
- virtio_notify(vdev, vq);
- }
-}
-
-static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
-{
- VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
- VirtQueueElement *elem = &s->stats_vq_elem;
- VirtIOBalloonStat stat;
- size_t offset = 0;
- qemu_timeval tv;
-
- if (!virtqueue_pop(vq, elem)) {
- goto out;
- }
-
- /* Initialize the stats to get rid of any stale values. This is only
- * needed to handle the case where a guest supports fewer stats than it
- * used to (ie. it has booted into an old kernel).
- */
- reset_stats(s);
-
- while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat))
- == sizeof(stat)) {
- uint16_t tag = tswap16(stat.tag);
- uint64_t val = tswap64(stat.val);
-
- offset += sizeof(stat);
- if (tag < VIRTIO_BALLOON_S_NR)
- s->stats[tag] = val;
- }
- s->stats_vq_offset = offset;
-
- if (qemu_gettimeofday(&tv) < 0) {
- fprintf(stderr, "warning: %s: failed to get time of day\n", __func__);
- goto out;
- }
-
- s->stats_last_update = tv.tv_sec;
-
-out:
- if (balloon_stats_enabled(s)) {
- balloon_stats_change_timer(s, s->stats_poll_interval);
- }
-}
-
-static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
-{
- VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
- struct virtio_balloon_config config;
-
- config.num_pages = cpu_to_le32(dev->num_pages);
- config.actual = cpu_to_le32(dev->actual);
-
- memcpy(config_data, &config, 8);
-}
-
-static void virtio_balloon_set_config(VirtIODevice *vdev,
- const uint8_t *config_data)
-{
- VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
- struct virtio_balloon_config config;
- uint32_t oldactual = dev->actual;
- memcpy(&config, config_data, 8);
- dev->actual = le32_to_cpu(config.actual);
- if (dev->actual != oldactual) {
- qemu_balloon_changed(ram_size -
- (dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
- }
-}
-
-static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
-{
- f |= (1 << VIRTIO_BALLOON_F_STATS_VQ);
- return f;
-}
-
-static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
-{
- VirtIOBalloon *dev = opaque;
- info->actual = ram_size - ((uint64_t) dev->actual <<
- VIRTIO_BALLOON_PFN_SHIFT);
-}
-
-static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
-{
- VirtIOBalloon *dev = VIRTIO_BALLOON(opaque);
- VirtIODevice *vdev = VIRTIO_DEVICE(dev);
-
- if (target > ram_size) {
- target = ram_size;
- }
- if (target) {
- dev->num_pages = (ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
- virtio_notify_config(vdev);
- }
-}
-
-static void virtio_balloon_save(QEMUFile *f, void *opaque)
-{
- VirtIOBalloon *s = VIRTIO_BALLOON(opaque);
- VirtIODevice *vdev = VIRTIO_DEVICE(s);
-
- virtio_save(vdev, f);
-
- qemu_put_be32(f, s->num_pages);
- qemu_put_be32(f, s->actual);
-}
-
-static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
-{
- VirtIOBalloon *s = VIRTIO_BALLOON(opaque);
- VirtIODevice *vdev = VIRTIO_DEVICE(s);
- int ret;
-
- if (version_id != 1)
- return -EINVAL;
-
- ret = virtio_load(vdev, f);
- if (ret) {
- return ret;
- }
-
- s->num_pages = qemu_get_be32(f);
- s->actual = qemu_get_be32(f);
- return 0;
-}
-
-static int virtio_balloon_device_init(VirtIODevice *vdev)
-{
- DeviceState *qdev = DEVICE(vdev);
- VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
- int ret;
-
- virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON, 8);
-
- vdev->get_config = virtio_balloon_get_config;
- vdev->set_config = virtio_balloon_set_config;
- vdev->get_features = virtio_balloon_get_features;
-
- ret = qemu_add_balloon_handler(virtio_balloon_to_target,
- virtio_balloon_stat, s);
-
- if (ret < 0) {
- virtio_common_cleanup(VIRTIO_DEVICE(s));
- return -1;
- }
-
- s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
- s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
- s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
-
- register_savevm(qdev, "virtio-balloon", -1, 1,
- virtio_balloon_save, virtio_balloon_load, s);
-
- object_property_add(OBJECT(qdev), "guest-stats", "guest statistics",
- balloon_stats_get_all, NULL, NULL, s, NULL);
-
- object_property_add(OBJECT(qdev), "guest-stats-polling-interval", "int",
- balloon_stats_get_poll_interval,
- balloon_stats_set_poll_interval,
- NULL, s, NULL);
- return 0;
-}
-
-static int virtio_balloon_device_exit(DeviceState *qdev)
-{
- VirtIOBalloon *s = VIRTIO_BALLOON(qdev);
- VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
-
- balloon_stats_destroy_timer(s);
- qemu_remove_balloon_handler(s);
- unregister_savevm(qdev, "virtio-balloon", s);
- virtio_common_cleanup(vdev);
- return 0;
-}
-
-static Property virtio_balloon_properties[] = {
- DEFINE_PROP_END_OF_LIST(),
-};
-
-static void virtio_balloon_class_init(ObjectClass *klass, void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(klass);
- VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
- dc->exit = virtio_balloon_device_exit;
- dc->props = virtio_balloon_properties;
- vdc->init = virtio_balloon_device_init;
- vdc->get_config = virtio_balloon_get_config;
- vdc->set_config = virtio_balloon_set_config;
- vdc->get_features = virtio_balloon_get_features;
-}
-
-static const TypeInfo virtio_balloon_info = {
- .name = TYPE_VIRTIO_BALLOON,
- .parent = TYPE_VIRTIO_DEVICE,
- .instance_size = sizeof(VirtIOBalloon),
- .class_init = virtio_balloon_class_init,
-};
-
-static void virtio_register_types(void)
-{
- type_register_static(&virtio_balloon_info);
-}
-
-type_init(virtio_register_types)
+++ /dev/null
-/*
- * Virtio Block Device
- *
- * Copyright IBM, Corp. 2007
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu-common.h"
-#include "qemu/error-report.h"
-#include "trace.h"
-#include "hw/block/block.h"
-#include "sysemu/blockdev.h"
-#include "hw/virtio/virtio-blk.h"
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
-# include "dataplane/virtio-blk.h"
-#endif
-#include "block/scsi.h"
-#ifdef __linux__
-# include <scsi/sg.h>
-#endif
-#include "hw/virtio/virtio-bus.h"
-
-typedef struct VirtIOBlockReq
-{
- VirtIOBlock *dev;
- VirtQueueElement elem;
- struct virtio_blk_inhdr *in;
- struct virtio_blk_outhdr *out;
- struct virtio_scsi_inhdr *scsi;
- QEMUIOVector qiov;
- struct VirtIOBlockReq *next;
- BlockAcctCookie acct;
-} VirtIOBlockReq;
-
-static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
-{
- VirtIOBlock *s = req->dev;
- VirtIODevice *vdev = VIRTIO_DEVICE(s);
-
- trace_virtio_blk_req_complete(req, status);
-
- stb_p(&req->in->status, status);
- virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
- virtio_notify(vdev, s->vq);
-}
-
-static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
- bool is_read)
-{
- BlockErrorAction action = bdrv_get_error_action(req->dev->bs, is_read, error);
- VirtIOBlock *s = req->dev;
-
- if (action == BDRV_ACTION_STOP) {
- req->next = s->rq;
- s->rq = req;
- } else if (action == BDRV_ACTION_REPORT) {
- virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
- bdrv_acct_done(s->bs, &req->acct);
- g_free(req);
- }
-
- bdrv_error_action(s->bs, action, is_read, error);
- return action != BDRV_ACTION_IGNORE;
-}
-
-static void virtio_blk_rw_complete(void *opaque, int ret)
-{
- VirtIOBlockReq *req = opaque;
-
- trace_virtio_blk_rw_complete(req, ret);
-
- if (ret) {
- bool is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT);
- if (virtio_blk_handle_rw_error(req, -ret, is_read))
- return;
- }
-
- virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
- bdrv_acct_done(req->dev->bs, &req->acct);
- g_free(req);
-}
-
-static void virtio_blk_flush_complete(void *opaque, int ret)
-{
- VirtIOBlockReq *req = opaque;
-
- if (ret) {
- if (virtio_blk_handle_rw_error(req, -ret, 0)) {
- return;
- }
- }
-
- virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
- bdrv_acct_done(req->dev->bs, &req->acct);
- g_free(req);
-}
-
-static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
-{
- VirtIOBlockReq *req = g_malloc(sizeof(*req));
- req->dev = s;
- req->qiov.size = 0;
- req->next = NULL;
- return req;
-}
-
-static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
-{
- VirtIOBlockReq *req = virtio_blk_alloc_request(s);
-
- if (req != NULL) {
- if (!virtqueue_pop(s->vq, &req->elem)) {
- g_free(req);
- return NULL;
- }
- }
-
- return req;
-}
-
-static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
-{
-#ifdef __linux__
- int ret;
- int i;
-#endif
- int status = VIRTIO_BLK_S_OK;
-
- /*
- * We require at least one output segment each for the virtio_blk_outhdr
- * and the SCSI command block.
- *
- * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
- * and the sense buffer pointer in the input segments.
- */
- if (req->elem.out_num < 2 || req->elem.in_num < 3) {
- virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
- g_free(req);
- return;
- }
-
- /*
- * The scsi inhdr is placed in the second-to-last input segment, just
- * before the regular inhdr.
- */
- req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
-
- if (!req->dev->blk.scsi) {
- status = VIRTIO_BLK_S_UNSUPP;
- goto fail;
- }
-
- /*
- * No support for bidirection commands yet.
- */
- if (req->elem.out_num > 2 && req->elem.in_num > 3) {
- status = VIRTIO_BLK_S_UNSUPP;
- goto fail;
- }
-
-#ifdef __linux__
- struct sg_io_hdr hdr;
- memset(&hdr, 0, sizeof(struct sg_io_hdr));
- hdr.interface_id = 'S';
- hdr.cmd_len = req->elem.out_sg[1].iov_len;
- hdr.cmdp = req->elem.out_sg[1].iov_base;
- hdr.dxfer_len = 0;
-
- if (req->elem.out_num > 2) {
- /*
- * If there are more than the minimally required 2 output segments
- * there is write payload starting from the third iovec.
- */
- hdr.dxfer_direction = SG_DXFER_TO_DEV;
- hdr.iovec_count = req->elem.out_num - 2;
-
- for (i = 0; i < hdr.iovec_count; i++)
- hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len;
-
- hdr.dxferp = req->elem.out_sg + 2;
-
- } else if (req->elem.in_num > 3) {
- /*
- * If we have more than 3 input segments the guest wants to actually
- * read data.
- */
- hdr.dxfer_direction = SG_DXFER_FROM_DEV;
- hdr.iovec_count = req->elem.in_num - 3;
- for (i = 0; i < hdr.iovec_count; i++)
- hdr.dxfer_len += req->elem.in_sg[i].iov_len;
-
- hdr.dxferp = req->elem.in_sg;
- } else {
- /*
- * Some SCSI commands don't actually transfer any data.
- */
- hdr.dxfer_direction = SG_DXFER_NONE;
- }
-
- hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base;
- hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len;
-
- ret = bdrv_ioctl(req->dev->bs, SG_IO, &hdr);
- if (ret) {
- status = VIRTIO_BLK_S_UNSUPP;
- goto fail;
- }
-
- /*
- * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi)
- * clear the masked_status field [hence status gets cleared too, see
- * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED
- * status has occurred. However they do set DRIVER_SENSE in driver_status
- * field. Also a (sb_len_wr > 0) indicates there is a sense buffer.
- */
- if (hdr.status == 0 && hdr.sb_len_wr > 0) {
- hdr.status = CHECK_CONDITION;
- }
-
- stl_p(&req->scsi->errors,
- hdr.status | (hdr.msg_status << 8) |
- (hdr.host_status << 16) | (hdr.driver_status << 24));
- stl_p(&req->scsi->residual, hdr.resid);
- stl_p(&req->scsi->sense_len, hdr.sb_len_wr);
- stl_p(&req->scsi->data_len, hdr.dxfer_len);
-
- virtio_blk_req_complete(req, status);
- g_free(req);
- return;
-#else
- abort();
-#endif
-
-fail:
- /* Just put anything nonzero so that the ioctl fails in the guest. */
- stl_p(&req->scsi->errors, 255);
- virtio_blk_req_complete(req, status);
- g_free(req);
-}
-
-typedef struct MultiReqBuffer {
- BlockRequest blkreq[32];
- unsigned int num_writes;
-} MultiReqBuffer;
-
-static void virtio_submit_multiwrite(BlockDriverState *bs, MultiReqBuffer *mrb)
-{
- int i, ret;
-
- if (!mrb->num_writes) {
- return;
- }
-
- ret = bdrv_aio_multiwrite(bs, mrb->blkreq, mrb->num_writes);
- if (ret != 0) {
- for (i = 0; i < mrb->num_writes; i++) {
- if (mrb->blkreq[i].error) {
- virtio_blk_rw_complete(mrb->blkreq[i].opaque, -EIO);
- }
- }
- }
-
- mrb->num_writes = 0;
-}
-
-static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
-{
- bdrv_acct_start(req->dev->bs, &req->acct, 0, BDRV_ACCT_FLUSH);
-
- /*
- * Make sure all outstanding writes are posted to the backing device.
- */
- virtio_submit_multiwrite(req->dev->bs, mrb);
- bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req);
-}
-
-static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
-{
- BlockRequest *blkreq;
- uint64_t sector;
-
- sector = ldq_p(&req->out->sector);
-
- bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
-
- trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512);
-
- if (sector & req->dev->sector_mask) {
- virtio_blk_rw_complete(req, -EIO);
- return;
- }
- if (req->qiov.size % req->dev->conf->logical_block_size) {
- virtio_blk_rw_complete(req, -EIO);
- return;
- }
-
- if (mrb->num_writes == 32) {
- virtio_submit_multiwrite(req->dev->bs, mrb);
- }
-
- blkreq = &mrb->blkreq[mrb->num_writes];
- blkreq->sector = sector;
- blkreq->nb_sectors = req->qiov.size / BDRV_SECTOR_SIZE;
- blkreq->qiov = &req->qiov;
- blkreq->cb = virtio_blk_rw_complete;
- blkreq->opaque = req;
- blkreq->error = 0;
-
- mrb->num_writes++;
-}
-
-static void virtio_blk_handle_read(VirtIOBlockReq *req)
-{
- uint64_t sector;
-
- sector = ldq_p(&req->out->sector);
-
- bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
-
- trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512);
-
- if (sector & req->dev->sector_mask) {
- virtio_blk_rw_complete(req, -EIO);
- return;
- }
- if (req->qiov.size % req->dev->conf->logical_block_size) {
- virtio_blk_rw_complete(req, -EIO);
- return;
- }
- bdrv_aio_readv(req->dev->bs, sector, &req->qiov,
- req->qiov.size / BDRV_SECTOR_SIZE,
- virtio_blk_rw_complete, req);
-}
-
-static void virtio_blk_handle_request(VirtIOBlockReq *req,
- MultiReqBuffer *mrb)
-{
- uint32_t type;
-
- if (req->elem.out_num < 1 || req->elem.in_num < 1) {
- error_report("virtio-blk missing headers");
- exit(1);
- }
-
- if (req->elem.out_sg[0].iov_len < sizeof(*req->out) ||
- req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) {
- error_report("virtio-blk header not in correct element");
- exit(1);
- }
-
- req->out = (void *)req->elem.out_sg[0].iov_base;
- req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base;
-
- type = ldl_p(&req->out->type);
-
- if (type & VIRTIO_BLK_T_FLUSH) {
- virtio_blk_handle_flush(req, mrb);
- } else if (type & VIRTIO_BLK_T_SCSI_CMD) {
- virtio_blk_handle_scsi(req);
- } else if (type & VIRTIO_BLK_T_GET_ID) {
- VirtIOBlock *s = req->dev;
-
- /*
- * NB: per existing s/n string convention the string is
- * terminated by '\0' only when shorter than buffer.
- */
- strncpy(req->elem.in_sg[0].iov_base,
- s->blk.serial ? s->blk.serial : "",
- MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
- virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
- g_free(req);
- } else if (type & VIRTIO_BLK_T_OUT) {
- qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
- req->elem.out_num - 1);
- virtio_blk_handle_write(req, mrb);
- } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) {
- /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */
- qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0],
- req->elem.in_num - 1);
- virtio_blk_handle_read(req);
- } else {
- virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
- g_free(req);
- }
-}
-
-static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
-{
- VirtIOBlock *s = VIRTIO_BLK(vdev);
- VirtIOBlockReq *req;
- MultiReqBuffer mrb = {
- .num_writes = 0,
- };
-
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
- /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
- * dataplane here instead of waiting for .set_status().
- */
- if (s->dataplane) {
- virtio_blk_data_plane_start(s->dataplane);
- return;
- }
-#endif
-
- while ((req = virtio_blk_get_request(s))) {
- virtio_blk_handle_request(req, &mrb);
- }
-
- virtio_submit_multiwrite(s->bs, &mrb);
-
- /*
- * FIXME: Want to check for completions before returning to guest mode,
- * so cached reads and writes are reported as quickly as possible. But
- * that should be done in the generic block layer.
- */
-}
-
-static void virtio_blk_dma_restart_bh(void *opaque)
-{
- VirtIOBlock *s = opaque;
- VirtIOBlockReq *req = s->rq;
- MultiReqBuffer mrb = {
- .num_writes = 0,
- };
-
- qemu_bh_delete(s->bh);
- s->bh = NULL;
-
- s->rq = NULL;
-
- while (req) {
- virtio_blk_handle_request(req, &mrb);
- req = req->next;
- }
-
- virtio_submit_multiwrite(s->bs, &mrb);
-}
-
-static void virtio_blk_dma_restart_cb(void *opaque, int running,
- RunState state)
-{
- VirtIOBlock *s = opaque;
-
- if (!running) {
- return;
- }
-
- if (!s->bh) {
- s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
- qemu_bh_schedule(s->bh);
- }
-}
-
-static void virtio_blk_reset(VirtIODevice *vdev)
-{
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
- VirtIOBlock *s = VIRTIO_BLK(vdev);
-
- if (s->dataplane) {
- virtio_blk_data_plane_stop(s->dataplane);
- }
-#endif
-
- /*
- * This should cancel pending requests, but can't do nicely until there
- * are per-device request lists.
- */
- bdrv_drain_all();
-}
-
-/* coalesce internal state, copy to pci i/o region 0
- */
-static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
-{
- VirtIOBlock *s = VIRTIO_BLK(vdev);
- struct virtio_blk_config blkcfg;
- uint64_t capacity;
- int blk_size = s->conf->logical_block_size;
-
- bdrv_get_geometry(s->bs, &capacity);
- memset(&blkcfg, 0, sizeof(blkcfg));
- stq_raw(&blkcfg.capacity, capacity);
- stl_raw(&blkcfg.seg_max, 128 - 2);
- stw_raw(&blkcfg.cylinders, s->conf->cyls);
- stl_raw(&blkcfg.blk_size, blk_size);
- stw_raw(&blkcfg.min_io_size, s->conf->min_io_size / blk_size);
- stw_raw(&blkcfg.opt_io_size, s->conf->opt_io_size / blk_size);
- blkcfg.heads = s->conf->heads;
- /*
- * We must ensure that the block device capacity is a multiple of
- * the logical block size. If that is not the case, lets use
- * sector_mask to adopt the geometry to have a correct picture.
- * For those devices where the capacity is ok for the given geometry
- * we dont touch the sector value of the geometry, since some devices
- * (like s390 dasd) need a specific value. Here the capacity is already
- * cyls*heads*secs*blk_size and the sector value is not block size
- * divided by 512 - instead it is the amount of blk_size blocks
- * per track (cylinder).
- */
- if (bdrv_getlength(s->bs) / s->conf->heads / s->conf->secs % blk_size) {
- blkcfg.sectors = s->conf->secs & ~s->sector_mask;
- } else {
- blkcfg.sectors = s->conf->secs;
- }
- blkcfg.size_max = 0;
- blkcfg.physical_block_exp = get_physical_block_exp(s->conf);
- blkcfg.alignment_offset = 0;
- blkcfg.wce = bdrv_enable_write_cache(s->bs);
- memcpy(config, &blkcfg, sizeof(struct virtio_blk_config));
-}
-
-static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config)
-{
- VirtIOBlock *s = VIRTIO_BLK(vdev);
- struct virtio_blk_config blkcfg;
-
- memcpy(&blkcfg, config, sizeof(blkcfg));
- bdrv_set_enable_write_cache(s->bs, blkcfg.wce != 0);
-}
-
-static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features)
-{
- VirtIOBlock *s = VIRTIO_BLK(vdev);
-
- features |= (1 << VIRTIO_BLK_F_SEG_MAX);
- features |= (1 << VIRTIO_BLK_F_GEOMETRY);
- features |= (1 << VIRTIO_BLK_F_TOPOLOGY);
- features |= (1 << VIRTIO_BLK_F_BLK_SIZE);
- features |= (1 << VIRTIO_BLK_F_SCSI);
-
- if (s->blk.config_wce) {
- features |= (1 << VIRTIO_BLK_F_CONFIG_WCE);
- }
- if (bdrv_enable_write_cache(s->bs))
- features |= (1 << VIRTIO_BLK_F_WCE);
-
- if (bdrv_is_read_only(s->bs))
- features |= 1 << VIRTIO_BLK_F_RO;
-
- return features;
-}
-
-static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
-{
- VirtIOBlock *s = VIRTIO_BLK(vdev);
- uint32_t features;
-
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
- if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER |
- VIRTIO_CONFIG_S_DRIVER_OK))) {
- virtio_blk_data_plane_stop(s->dataplane);
- }
-#endif
-
- if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
- return;
- }
-
- features = vdev->guest_features;
- bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE)));
-}
-
-static void virtio_blk_save(QEMUFile *f, void *opaque)
-{
- VirtIOBlock *s = opaque;
- VirtIODevice *vdev = VIRTIO_DEVICE(s);
- VirtIOBlockReq *req = s->rq;
-
- virtio_save(vdev, f);
-
- while (req) {
- qemu_put_sbyte(f, 1);
- qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
- req = req->next;
- }
- qemu_put_sbyte(f, 0);
-}
-
-static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
-{
- VirtIOBlock *s = opaque;
- VirtIODevice *vdev = VIRTIO_DEVICE(s);
- int ret;
-
- if (version_id != 2)
- return -EINVAL;
-
- ret = virtio_load(vdev, f);
- if (ret) {
- return ret;
- }
-
- while (qemu_get_sbyte(f)) {
- VirtIOBlockReq *req = virtio_blk_alloc_request(s);
- qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
- req->next = s->rq;
- s->rq = req;
-
- virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
- req->elem.in_num, 1);
- virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
- req->elem.out_num, 0);
- }
-
- return 0;
-}
-
-static void virtio_blk_resize(void *opaque)
-{
- VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
-
- virtio_notify_config(vdev);
-}
-
-static const BlockDevOps virtio_block_ops = {
- .resize_cb = virtio_blk_resize,
-};
-
-void virtio_blk_set_conf(DeviceState *dev, VirtIOBlkConf *blk)
-{
- VirtIOBlock *s = VIRTIO_BLK(dev);
- memcpy(&(s->blk), blk, sizeof(struct VirtIOBlkConf));
-}
-
-static int virtio_blk_device_init(VirtIODevice *vdev)
-{
- DeviceState *qdev = DEVICE(vdev);
- VirtIOBlock *s = VIRTIO_BLK(vdev);
- VirtIOBlkConf *blk = &(s->blk);
- static int virtio_blk_id;
-
- if (!blk->conf.bs) {
- error_report("drive property not set");
- return -1;
- }
- if (!bdrv_is_inserted(blk->conf.bs)) {
- error_report("Device needs media, but drive is empty");
- return -1;
- }
-
- blkconf_serial(&blk->conf, &blk->serial);
- if (blkconf_geometry(&blk->conf, NULL, 65535, 255, 255) < 0) {
- return -1;
- }
-
- virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
- sizeof(struct virtio_blk_config));
-
- vdev->get_config = virtio_blk_update_config;
- vdev->set_config = virtio_blk_set_config;
- vdev->get_features = virtio_blk_get_features;
- vdev->set_status = virtio_blk_set_status;
- vdev->reset = virtio_blk_reset;
- s->bs = blk->conf.bs;
- s->conf = &blk->conf;
- memcpy(&(s->blk), blk, sizeof(struct VirtIOBlkConf));
- s->rq = NULL;
- s->sector_mask = (s->conf->logical_block_size / BDRV_SECTOR_SIZE) - 1;
-
- s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output);
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
- if (!virtio_blk_data_plane_create(vdev, blk, &s->dataplane)) {
- virtio_common_cleanup(vdev);
- return -1;
- }
-#endif
-
- s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
- register_savevm(qdev, "virtio-blk", virtio_blk_id++, 2,
- virtio_blk_save, virtio_blk_load, s);
- bdrv_set_dev_ops(s->bs, &virtio_block_ops, s);
- bdrv_set_buffer_alignment(s->bs, s->conf->logical_block_size);
-
- bdrv_iostatus_enable(s->bs);
-
- add_boot_device_path(s->conf->bootindex, qdev, "/disk@0,0");
- return 0;
-}
-
-static int virtio_blk_device_exit(DeviceState *dev)
-{
- VirtIODevice *vdev = VIRTIO_DEVICE(dev);
- VirtIOBlock *s = VIRTIO_BLK(dev);
-#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
- virtio_blk_data_plane_destroy(s->dataplane);
- s->dataplane = NULL;
-#endif
- qemu_del_vm_change_state_handler(s->change);
- unregister_savevm(dev, "virtio-blk", s);
- blockdev_mark_auto_del(s->bs);
- virtio_common_cleanup(vdev);
- return 0;
-}
-
-static Property virtio_blk_properties[] = {
- DEFINE_VIRTIO_BLK_PROPERTIES(VirtIOBlock, blk),
- DEFINE_PROP_END_OF_LIST(),
-};
-
-static void virtio_blk_class_init(ObjectClass *klass, void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(klass);
- VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
- dc->exit = virtio_blk_device_exit;
- dc->props = virtio_blk_properties;
- vdc->init = virtio_blk_device_init;
- vdc->get_config = virtio_blk_update_config;
- vdc->set_config = virtio_blk_set_config;
- vdc->get_features = virtio_blk_get_features;
- vdc->set_status = virtio_blk_set_status;
- vdc->reset = virtio_blk_reset;
-}
-
-static const TypeInfo virtio_device_info = {
- .name = TYPE_VIRTIO_BLK,
- .parent = TYPE_VIRTIO_DEVICE,
- .instance_size = sizeof(VirtIOBlock),
- .class_init = virtio_blk_class_init,
-};
-
-static void virtio_register_types(void)
-{
- type_register_static(&virtio_device_info);
-}
-
-type_init(virtio_register_types)
+++ /dev/null
-/*
- * Virtio Network Device
- *
- * Copyright IBM, Corp. 2007
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/iov.h"
-#include "hw/virtio/virtio.h"
-#include "net/net.h"
-#include "net/checksum.h"
-#include "net/tap.h"
-#include "qemu/error-report.h"
-#include "qemu/timer.h"
-#include "hw/virtio/virtio-net.h"
-#include "net/vhost_net.h"
-
-#define VIRTIO_NET_VM_VERSION 11
-
-#define MAC_TABLE_ENTRIES 64
-#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
-
-/*
- * Calculate the number of bytes up to and including the given 'field' of
- * 'container'.
- */
-#define endof(container, field) \
- (offsetof(container, field) + sizeof(((container *)0)->field))
-
-typedef struct VirtIOFeature {
- uint32_t flags;
- size_t end;
-} VirtIOFeature;
-
-static VirtIOFeature feature_sizes[] = {
- {.flags = 1 << VIRTIO_NET_F_MAC,
- .end = endof(struct virtio_net_config, mac)},
- {.flags = 1 << VIRTIO_NET_F_STATUS,
- .end = endof(struct virtio_net_config, status)},
- {.flags = 1 << VIRTIO_NET_F_MQ,
- .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
- {}
-};
-
-static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
-{
- VirtIONet *n = qemu_get_nic_opaque(nc);
-
- return &n->vqs[nc->queue_index];
-}
-
-static int vq2q(int queue_index)
-{
- return queue_index / 2;
-}
-
-/* TODO
- * - we could suppress RX interrupt if we were so inclined.
- */
-
-static VirtIONet *to_virtio_net(VirtIODevice *vdev)
-{
- return (VirtIONet *)vdev;
-}
-
-static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
-{
- VirtIONet *n = to_virtio_net(vdev);
- struct virtio_net_config netcfg;
-
- stw_p(&netcfg.status, n->status);
- stw_p(&netcfg.max_virtqueue_pairs, n->max_queues);
- memcpy(netcfg.mac, n->mac, ETH_ALEN);
- memcpy(config, &netcfg, n->config_size);
-}
-
-static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
-{
- VirtIONet *n = to_virtio_net(vdev);
- struct virtio_net_config netcfg = {};
-
- memcpy(&netcfg, config, n->config_size);
-
- if (!(n->vdev.guest_features >> VIRTIO_NET_F_CTRL_MAC_ADDR & 1) &&
- memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
- memcpy(n->mac, netcfg.mac, ETH_ALEN);
- qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
- }
-}
-
-static bool virtio_net_started(VirtIONet *n, uint8_t status)
-{
- return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
- (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
-}
-
-static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
-{
- NetClientState *nc = qemu_get_queue(n->nic);
- int queues = n->multiqueue ? n->max_queues : 1;
-
- if (!nc->peer) {
- return;
- }
- if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
- return;
- }
-
- if (!tap_get_vhost_net(nc->peer)) {
- return;
- }
-
- if (!!n->vhost_started == virtio_net_started(n, status) &&
- !nc->peer->link_down) {
- return;
- }
- if (!n->vhost_started) {
- int r;
- if (!vhost_net_query(tap_get_vhost_net(nc->peer), &n->vdev)) {
- return;
- }
- n->vhost_started = 1;
- r = vhost_net_start(&n->vdev, n->nic->ncs, queues);
- if (r < 0) {
- error_report("unable to start vhost net: %d: "
- "falling back on userspace virtio", -r);
- n->vhost_started = 0;
- }
- } else {
- vhost_net_stop(&n->vdev, n->nic->ncs, queues);
- n->vhost_started = 0;
- }
-}
-
-static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
-{
- VirtIONet *n = to_virtio_net(vdev);
- VirtIONetQueue *q;
- int i;
- uint8_t queue_status;
-
- virtio_net_vhost_status(n, status);
-
- for (i = 0; i < n->max_queues; i++) {
- q = &n->vqs[i];
-
- if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
- queue_status = 0;
- } else {
- queue_status = status;
- }
-
- if (!q->tx_waiting) {
- continue;
- }
-
- if (virtio_net_started(n, queue_status) && !n->vhost_started) {
- if (q->tx_timer) {
- qemu_mod_timer(q->tx_timer,
- qemu_get_clock_ns(vm_clock) + n->tx_timeout);
- } else {
- qemu_bh_schedule(q->tx_bh);
- }
- } else {
- if (q->tx_timer) {
- qemu_del_timer(q->tx_timer);
- } else {
- qemu_bh_cancel(q->tx_bh);
- }
- }
- }
-}
-
-static void virtio_net_set_link_status(NetClientState *nc)
-{
- VirtIONet *n = qemu_get_nic_opaque(nc);
- uint16_t old_status = n->status;
-
- if (nc->link_down)
- n->status &= ~VIRTIO_NET_S_LINK_UP;
- else
- n->status |= VIRTIO_NET_S_LINK_UP;
-
- if (n->status != old_status)
- virtio_notify_config(&n->vdev);
-
- virtio_net_set_status(&n->vdev, n->vdev.status);
-}
-
-static void virtio_net_reset(VirtIODevice *vdev)
-{
- VirtIONet *n = to_virtio_net(vdev);
-
- /* Reset back to compatibility mode */
- n->promisc = 1;
- n->allmulti = 0;
- n->alluni = 0;
- n->nomulti = 0;
- n->nouni = 0;
- n->nobcast = 0;
- /* multiqueue is disabled by default */
- n->curr_queues = 1;
-
- /* Flush any MAC and VLAN filter table state */
- n->mac_table.in_use = 0;
- n->mac_table.first_multi = 0;
- n->mac_table.multi_overflow = 0;
- n->mac_table.uni_overflow = 0;
- memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
- memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
- memset(n->vlans, 0, MAX_VLAN >> 3);
-}
-
-static void peer_test_vnet_hdr(VirtIONet *n)
-{
- NetClientState *nc = qemu_get_queue(n->nic);
- if (!nc->peer) {
- return;
- }
-
- if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
- return;
- }
-
- n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
-}
-
-static int peer_has_vnet_hdr(VirtIONet *n)
-{
- return n->has_vnet_hdr;
-}
-
-static int peer_has_ufo(VirtIONet *n)
-{
- if (!peer_has_vnet_hdr(n))
- return 0;
-
- n->has_ufo = tap_has_ufo(qemu_get_queue(n->nic)->peer);
-
- return n->has_ufo;
-}
-
-static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
-{
- int i;
- NetClientState *nc;
-
- n->mergeable_rx_bufs = mergeable_rx_bufs;
-
- n->guest_hdr_len = n->mergeable_rx_bufs ?
- sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
-
- for (i = 0; i < n->max_queues; i++) {
- nc = qemu_get_subqueue(n->nic, i);
-
- if (peer_has_vnet_hdr(n) &&
- tap_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
- tap_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
- n->host_hdr_len = n->guest_hdr_len;
- }
- }
-}
-
-static int peer_attach(VirtIONet *n, int index)
-{
- NetClientState *nc = qemu_get_subqueue(n->nic, index);
-
- if (!nc->peer) {
- return 0;
- }
-
- if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
- return 0;
- }
-
- return tap_enable(nc->peer);
-}
-
-static int peer_detach(VirtIONet *n, int index)
-{
- NetClientState *nc = qemu_get_subqueue(n->nic, index);
-
- if (!nc->peer) {
- return 0;
- }
-
- if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
- return 0;
- }
-
- return tap_disable(nc->peer);
-}
-
-static void virtio_net_set_queues(VirtIONet *n)
-{
- int i;
-
- for (i = 0; i < n->max_queues; i++) {
- if (i < n->curr_queues) {
- assert(!peer_attach(n, i));
- } else {
- assert(!peer_detach(n, i));
- }
- }
-}
-
-static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);
-
-static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
-{
- VirtIONet *n = to_virtio_net(vdev);
- NetClientState *nc = qemu_get_queue(n->nic);
-
- features |= (1 << VIRTIO_NET_F_MAC);
-
- if (!peer_has_vnet_hdr(n)) {
- features &= ~(0x1 << VIRTIO_NET_F_CSUM);
- features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
- features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
- features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);
-
- features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
- features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
- features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
- features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
- }
-
- if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
- features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
- features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
- }
-
- if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
- return features;
- }
- if (!tap_get_vhost_net(nc->peer)) {
- return features;
- }
- return vhost_net_get_features(tap_get_vhost_net(nc->peer), features);
-}
-
-static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
-{
- uint32_t features = 0;
-
- /* Linux kernel 2.6.25. It understood MAC (as everyone must),
- * but also these: */
- features |= (1 << VIRTIO_NET_F_MAC);
- features |= (1 << VIRTIO_NET_F_CSUM);
- features |= (1 << VIRTIO_NET_F_HOST_TSO4);
- features |= (1 << VIRTIO_NET_F_HOST_TSO6);
- features |= (1 << VIRTIO_NET_F_HOST_ECN);
-
- return features;
-}
-
-static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
-{
- VirtIONet *n = to_virtio_net(vdev);
- int i;
-
- virtio_net_set_multiqueue(n, !!(features & (1 << VIRTIO_NET_F_MQ)),
- !!(features & (1 << VIRTIO_NET_F_CTRL_VQ)));
-
- virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
-
- if (n->has_vnet_hdr) {
- tap_set_offload(qemu_get_subqueue(n->nic, 0)->peer,
- (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
- (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
- (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
- (features >> VIRTIO_NET_F_GUEST_ECN) & 1,
- (features >> VIRTIO_NET_F_GUEST_UFO) & 1);
- }
-
- for (i = 0; i < n->max_queues; i++) {
- NetClientState *nc = qemu_get_subqueue(n->nic, i);
-
- if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
- continue;
- }
- if (!tap_get_vhost_net(nc->peer)) {
- continue;
- }
- vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
- }
-}
-
-static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
- struct iovec *iov, unsigned int iov_cnt)
-{
- uint8_t on;
- size_t s;
-
- s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
- if (s != sizeof(on)) {
- return VIRTIO_NET_ERR;
- }
-
- if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
- n->promisc = on;
- } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
- n->allmulti = on;
- } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
- n->alluni = on;
- } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
- n->nomulti = on;
- } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
- n->nouni = on;
- } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
- n->nobcast = on;
- } else {
- return VIRTIO_NET_ERR;
- }
-
- return VIRTIO_NET_OK;
-}
-
-static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
- struct iovec *iov, unsigned int iov_cnt)
-{
- struct virtio_net_ctrl_mac mac_data;
- size_t s;
-
- if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
- if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
- return VIRTIO_NET_ERR;
- }
- s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
- assert(s == sizeof(n->mac));
- qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
- return VIRTIO_NET_OK;
- }
-
- if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
- return VIRTIO_NET_ERR;
- }
-
- n->mac_table.in_use = 0;
- n->mac_table.first_multi = 0;
- n->mac_table.uni_overflow = 0;
- n->mac_table.multi_overflow = 0;
- memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
-
- s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
- sizeof(mac_data.entries));
- mac_data.entries = ldl_p(&mac_data.entries);
- if (s != sizeof(mac_data.entries)) {
- return VIRTIO_NET_ERR;
- }
- iov_discard_front(&iov, &iov_cnt, s);
-
- if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
- return VIRTIO_NET_ERR;
- }
-
- if (mac_data.entries <= MAC_TABLE_ENTRIES) {
- s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
- mac_data.entries * ETH_ALEN);
- if (s != mac_data.entries * ETH_ALEN) {
- return VIRTIO_NET_ERR;
- }
- n->mac_table.in_use += mac_data.entries;
- } else {
- n->mac_table.uni_overflow = 1;
- }
-
- iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
-
- n->mac_table.first_multi = n->mac_table.in_use;
-
- s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
- sizeof(mac_data.entries));
- mac_data.entries = ldl_p(&mac_data.entries);
- if (s != sizeof(mac_data.entries)) {
- return VIRTIO_NET_ERR;
- }
-
- iov_discard_front(&iov, &iov_cnt, s);
-
- if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
- return VIRTIO_NET_ERR;
- }
-
- if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
- s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
- mac_data.entries * ETH_ALEN);
- if (s != mac_data.entries * ETH_ALEN) {
- return VIRTIO_NET_ERR;
- }
- n->mac_table.in_use += mac_data.entries;
- } else {
- n->mac_table.multi_overflow = 1;
- }
-
- return VIRTIO_NET_OK;
-}
-
-static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
- struct iovec *iov, unsigned int iov_cnt)
-{
- uint16_t vid;
- size_t s;
-
- s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
- vid = lduw_p(&vid);
- if (s != sizeof(vid)) {
- return VIRTIO_NET_ERR;
- }
-
- if (vid >= MAX_VLAN)
- return VIRTIO_NET_ERR;
-
- if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
- n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
- else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
- n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
- else
- return VIRTIO_NET_ERR;
-
- return VIRTIO_NET_OK;
-}
-
-static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
- struct iovec *iov, unsigned int iov_cnt)
-{
- struct virtio_net_ctrl_mq mq;
- size_t s;
- uint16_t queues;
-
- s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
- if (s != sizeof(mq)) {
- return VIRTIO_NET_ERR;
- }
-
- if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
- return VIRTIO_NET_ERR;
- }
-
- queues = lduw_p(&mq.virtqueue_pairs);
-
- if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
- queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
- queues > n->max_queues ||
- !n->multiqueue) {
- return VIRTIO_NET_ERR;
- }
-
- n->curr_queues = queues;
- /* stop the backend before changing the number of queues to avoid handling a
- * disabled queue */
- virtio_net_set_status(&n->vdev, n->vdev.status);
- virtio_net_set_queues(n);
-
- return VIRTIO_NET_OK;
-}
-static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
-{
- VirtIONet *n = to_virtio_net(vdev);
- struct virtio_net_ctrl_hdr ctrl;
- virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
- VirtQueueElement elem;
- size_t s;
- struct iovec *iov;
- unsigned int iov_cnt;
-
- while (virtqueue_pop(vq, &elem)) {
- if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
- iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
- error_report("virtio-net ctrl missing headers");
- exit(1);
- }
-
- iov = elem.out_sg;
- iov_cnt = elem.out_num;
- s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
- iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
- if (s != sizeof(ctrl)) {
- status = VIRTIO_NET_ERR;
- } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
- status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
- } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
- status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
- } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
- status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
- } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
- status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
- }
-
- s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status));
- assert(s == sizeof(status));
-
- virtqueue_push(vq, &elem, sizeof(status));
- virtio_notify(vdev, vq);
- }
-}
-
-/* RX */
-
-static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
-{
- VirtIONet *n = to_virtio_net(vdev);
- int queue_index = vq2q(virtio_get_queue_index(vq));
-
- qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
-}
-
-static int virtio_net_can_receive(NetClientState *nc)
-{
- VirtIONet *n = qemu_get_nic_opaque(nc);
- VirtIONetQueue *q = virtio_net_get_subqueue(nc);
-
- if (!n->vdev.vm_running) {
- return 0;
- }
-
- if (nc->queue_index >= n->curr_queues) {
- return 0;
- }
-
- if (!virtio_queue_ready(q->rx_vq) ||
- !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
- return 0;
- }
-
- return 1;
-}
-
-static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
-{
- VirtIONet *n = q->n;
- if (virtio_queue_empty(q->rx_vq) ||
- (n->mergeable_rx_bufs &&
- !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
- virtio_queue_set_notification(q->rx_vq, 1);
-
- /* To avoid a race condition where the guest has made some buffers
- * available after the above check but before notification was
- * enabled, check for available buffers again.
- */
- if (virtio_queue_empty(q->rx_vq) ||
- (n->mergeable_rx_bufs &&
- !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
- return 0;
- }
- }
-
- virtio_queue_set_notification(q->rx_vq, 0);
- return 1;
-}
-
-/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
- * it never finds out that the packets don't have valid checksums. This
- * causes dhclient to get upset. Fedora's carried a patch for ages to
- * fix this with Xen but it hasn't appeared in an upstream release of
- * dhclient yet.
- *
- * To avoid breaking existing guests, we catch udp packets and add
- * checksums. This is terrible but it's better than hacking the guest
- * kernels.
- *
- * N.B. if we introduce a zero-copy API, this operation is no longer free so
- * we should provide a mechanism to disable it to avoid polluting the host
- * cache.
- */
-static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
- uint8_t *buf, size_t size)
-{
- if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
- (size > 27 && size < 1500) && /* normal sized MTU */
- (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
- (buf[23] == 17) && /* ip.protocol == UDP */
- (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
- net_checksum_calculate(buf, size);
- hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
- }
-}
-
-static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
- const void *buf, size_t size)
-{
- if (n->has_vnet_hdr) {
- /* FIXME this cast is evil */
- void *wbuf = (void *)buf;
- work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
- size - n->host_hdr_len);
- iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
- } else {
- struct virtio_net_hdr hdr = {
- .flags = 0,
- .gso_type = VIRTIO_NET_HDR_GSO_NONE
- };
- iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
- }
-}
-
-static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
-{
- static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
- static const uint8_t vlan[] = {0x81, 0x00};
- uint8_t *ptr = (uint8_t *)buf;
- int i;
-
- if (n->promisc)
- return 1;
-
- ptr += n->host_hdr_len;
-
- if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
- int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
- if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
- return 0;
- }
-
- if (ptr[0] & 1) { // multicast
- if (!memcmp(ptr, bcast, sizeof(bcast))) {
- return !n->nobcast;
- } else if (n->nomulti) {
- return 0;
- } else if (n->allmulti || n->mac_table.multi_overflow) {
- return 1;
- }
-
- for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
- if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
- return 1;
- }
- }
- } else { // unicast
- if (n->nouni) {
- return 0;
- } else if (n->alluni || n->mac_table.uni_overflow) {
- return 1;
- } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
- return 1;
- }
-
- for (i = 0; i < n->mac_table.first_multi; i++) {
- if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
- return 1;
- }
- }
- }
-
- return 0;
-}
-
-static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
-{
- VirtIONet *n = qemu_get_nic_opaque(nc);
- VirtIONetQueue *q = virtio_net_get_subqueue(nc);
- struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
- struct virtio_net_hdr_mrg_rxbuf mhdr;
- unsigned mhdr_cnt = 0;
- size_t offset, i, guest_offset;
-
- if (!virtio_net_can_receive(nc)) {
- return -1;
- }
-
- /* hdr_len refers to the header we supply to the guest */
- if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
- return 0;
- }
-
- if (!receive_filter(n, buf, size))
- return size;
-
- offset = i = 0;
-
- while (offset < size) {
- VirtQueueElement elem;
- int len, total;
- const struct iovec *sg = elem.in_sg;
-
- total = 0;
-
- if (virtqueue_pop(q->rx_vq, &elem) == 0) {
- if (i == 0)
- return -1;
- error_report("virtio-net unexpected empty queue: "
- "i %zd mergeable %d offset %zd, size %zd, "
- "guest hdr len %zd, host hdr len %zd guest features 0x%x",
- i, n->mergeable_rx_bufs, offset, size,
- n->guest_hdr_len, n->host_hdr_len, n->vdev.guest_features);
- exit(1);
- }
-
- if (elem.in_num < 1) {
- error_report("virtio-net receive queue contains no in buffers");
- exit(1);
- }
-
- if (i == 0) {
- assert(offset == 0);
- if (n->mergeable_rx_bufs) {
- mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
- sg, elem.in_num,
- offsetof(typeof(mhdr), num_buffers),
- sizeof(mhdr.num_buffers));
- }
-
- receive_header(n, sg, elem.in_num, buf, size);
- offset = n->host_hdr_len;
- total += n->guest_hdr_len;
- guest_offset = n->guest_hdr_len;
- } else {
- guest_offset = 0;
- }
-
- /* copy in packet. ugh */
- len = iov_from_buf(sg, elem.in_num, guest_offset,
- buf + offset, size - offset);
- total += len;
- offset += len;
- /* If buffers can't be merged, at this point we
- * must have consumed the complete packet.
- * Otherwise, drop it. */
- if (!n->mergeable_rx_bufs && offset < size) {
-#if 0
- error_report("virtio-net truncated non-mergeable packet: "
- "i %zd mergeable %d offset %zd, size %zd, "
- "guest hdr len %zd, host hdr len %zd",
- i, n->mergeable_rx_bufs,
- offset, size, n->guest_hdr_len, n->host_hdr_len);
-#endif
- return size;
- }
-
- /* signal other side */
- virtqueue_fill(q->rx_vq, &elem, total, i++);
- }
-
- if (mhdr_cnt) {
- stw_p(&mhdr.num_buffers, i);
- iov_from_buf(mhdr_sg, mhdr_cnt,
- 0,
- &mhdr.num_buffers, sizeof mhdr.num_buffers);
- }
-
- virtqueue_flush(q->rx_vq, i);
- virtio_notify(&n->vdev, q->rx_vq);
-
- return size;
-}
-
-static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
-
-static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
-{
- VirtIONet *n = qemu_get_nic_opaque(nc);
- VirtIONetQueue *q = virtio_net_get_subqueue(nc);
-
- virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
- virtio_notify(&n->vdev, q->tx_vq);
-
- q->async_tx.elem.out_num = q->async_tx.len = 0;
-
- virtio_queue_set_notification(q->tx_vq, 1);
- virtio_net_flush_tx(q);
-}
-
-/* TX */
-static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
-{
- VirtIONet *n = q->n;
- VirtQueueElement elem;
- int32_t num_packets = 0;
- int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
- if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
- return num_packets;
- }
-
- assert(n->vdev.vm_running);
-
- if (q->async_tx.elem.out_num) {
- virtio_queue_set_notification(q->tx_vq, 0);
- return num_packets;
- }
-
- while (virtqueue_pop(q->tx_vq, &elem)) {
- ssize_t ret, len;
- unsigned int out_num = elem.out_num;
- struct iovec *out_sg = &elem.out_sg[0];
- struct iovec sg[VIRTQUEUE_MAX_SIZE];
-
- if (out_num < 1) {
- error_report("virtio-net header not in first element");
- exit(1);
- }
-
- /*
- * If host wants to see the guest header as is, we can
- * pass it on unchanged. Otherwise, copy just the parts
- * that host is interested in.
- */
- assert(n->host_hdr_len <= n->guest_hdr_len);
- if (n->host_hdr_len != n->guest_hdr_len) {
- unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
- out_sg, out_num,
- 0, n->host_hdr_len);
- sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
- out_sg, out_num,
- n->guest_hdr_len, -1);
- out_num = sg_num;
- out_sg = sg;
- }
-
- len = n->guest_hdr_len;
-
- ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
- out_sg, out_num, virtio_net_tx_complete);
- if (ret == 0) {
- virtio_queue_set_notification(q->tx_vq, 0);
- q->async_tx.elem = elem;
- q->async_tx.len = len;
- return -EBUSY;
- }
-
- len += ret;
-
- virtqueue_push(q->tx_vq, &elem, 0);
- virtio_notify(&n->vdev, q->tx_vq);
-
- if (++num_packets >= n->tx_burst) {
- break;
- }
- }
- return num_packets;
-}
-
-static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
-{
- VirtIONet *n = to_virtio_net(vdev);
- VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
-
- /* This happens when device was stopped but VCPU wasn't. */
- if (!n->vdev.vm_running) {
- q->tx_waiting = 1;
- return;
- }
-
- if (q->tx_waiting) {
- virtio_queue_set_notification(vq, 1);
- qemu_del_timer(q->tx_timer);
- q->tx_waiting = 0;
- virtio_net_flush_tx(q);
- } else {
- qemu_mod_timer(q->tx_timer,
- qemu_get_clock_ns(vm_clock) + n->tx_timeout);
- q->tx_waiting = 1;
- virtio_queue_set_notification(vq, 0);
- }
-}
-
-static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
-{
- VirtIONet *n = to_virtio_net(vdev);
- VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
-
- if (unlikely(q->tx_waiting)) {
- return;
- }
- q->tx_waiting = 1;
- /* This happens when device was stopped but VCPU wasn't. */
- if (!n->vdev.vm_running) {
- return;
- }
- virtio_queue_set_notification(vq, 0);
- qemu_bh_schedule(q->tx_bh);
-}
-
-static void virtio_net_tx_timer(void *opaque)
-{
- VirtIONetQueue *q = opaque;
- VirtIONet *n = q->n;
- assert(n->vdev.vm_running);
-
- q->tx_waiting = 0;
-
- /* Just in case the driver is not ready on more */
- if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
- return;
-
- virtio_queue_set_notification(q->tx_vq, 1);
- virtio_net_flush_tx(q);
-}
-
-static void virtio_net_tx_bh(void *opaque)
-{
- VirtIONetQueue *q = opaque;
- VirtIONet *n = q->n;
- int32_t ret;
-
- assert(n->vdev.vm_running);
-
- q->tx_waiting = 0;
-
- /* Just in case the driver is not ready on more */
- if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
- return;
-
- ret = virtio_net_flush_tx(q);
- if (ret == -EBUSY) {
- return; /* Notification re-enable handled by tx_complete */
- }
-
- /* If we flush a full burst of packets, assume there are
- * more coming and immediately reschedule */
- if (ret >= n->tx_burst) {
- qemu_bh_schedule(q->tx_bh);
- q->tx_waiting = 1;
- return;
- }
-
- /* If less than a full burst, re-enable notification and flush
- * anything that may have come in while we weren't looking. If
- * we find something, assume the guest is still active and reschedule */
- virtio_queue_set_notification(q->tx_vq, 1);
- if (virtio_net_flush_tx(q) > 0) {
- virtio_queue_set_notification(q->tx_vq, 0);
- qemu_bh_schedule(q->tx_bh);
- q->tx_waiting = 1;
- }
-}
-
-static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl)
-{
- VirtIODevice *vdev = &n->vdev;
- int i, max = multiqueue ? n->max_queues : 1;
-
- n->multiqueue = multiqueue;
-
- for (i = 2; i <= n->max_queues * 2 + 1; i++) {
- virtio_del_queue(vdev, i);
- }
-
- for (i = 1; i < max; i++) {
- n->vqs[i].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
- if (n->vqs[i].tx_timer) {
- n->vqs[i].tx_vq =
- virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
- n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock,
- virtio_net_tx_timer,
- &n->vqs[i]);
- } else {
- n->vqs[i].tx_vq =
- virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
- n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]);
- }
-
- n->vqs[i].tx_waiting = 0;
- n->vqs[i].n = n;
- }
-
- if (ctrl) {
- n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
- }
-
- virtio_net_set_queues(n);
-}
-
-static void virtio_net_save(QEMUFile *f, void *opaque)
-{
- int i;
- VirtIONet *n = opaque;
-
- /* At this point, backend must be stopped, otherwise
- * it might keep writing to memory. */
- assert(!n->vhost_started);
- virtio_save(&n->vdev, f);
-
- qemu_put_buffer(f, n->mac, ETH_ALEN);
- qemu_put_be32(f, n->vqs[0].tx_waiting);
- qemu_put_be32(f, n->mergeable_rx_bufs);
- qemu_put_be16(f, n->status);
- qemu_put_byte(f, n->promisc);
- qemu_put_byte(f, n->allmulti);
- qemu_put_be32(f, n->mac_table.in_use);
- qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
- qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
- qemu_put_be32(f, n->has_vnet_hdr);
- qemu_put_byte(f, n->mac_table.multi_overflow);
- qemu_put_byte(f, n->mac_table.uni_overflow);
- qemu_put_byte(f, n->alluni);
- qemu_put_byte(f, n->nomulti);
- qemu_put_byte(f, n->nouni);
- qemu_put_byte(f, n->nobcast);
- qemu_put_byte(f, n->has_ufo);
- if (n->max_queues > 1) {
- qemu_put_be16(f, n->max_queues);
- qemu_put_be16(f, n->curr_queues);
- for (i = 1; i < n->curr_queues; i++) {
- qemu_put_be32(f, n->vqs[i].tx_waiting);
- }
- }
-}
-
-static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
-{
- VirtIONet *n = opaque;
- int ret, i, link_down;
-
- if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
- return -EINVAL;
-
- ret = virtio_load(&n->vdev, f);
- if (ret) {
- return ret;
- }
-
- qemu_get_buffer(f, n->mac, ETH_ALEN);
- n->vqs[0].tx_waiting = qemu_get_be32(f);
-
- virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
-
- if (version_id >= 3)
- n->status = qemu_get_be16(f);
-
- if (version_id >= 4) {
- if (version_id < 8) {
- n->promisc = qemu_get_be32(f);
- n->allmulti = qemu_get_be32(f);
- } else {
- n->promisc = qemu_get_byte(f);
- n->allmulti = qemu_get_byte(f);
- }
- }
-
- if (version_id >= 5) {
- n->mac_table.in_use = qemu_get_be32(f);
- /* MAC_TABLE_ENTRIES may be different from the saved image */
- if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
- qemu_get_buffer(f, n->mac_table.macs,
- n->mac_table.in_use * ETH_ALEN);
- } else if (n->mac_table.in_use) {
- uint8_t *buf = g_malloc0(n->mac_table.in_use);
- qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN);
- g_free(buf);
- n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
- n->mac_table.in_use = 0;
- }
- }
-
- if (version_id >= 6)
- qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
-
- if (version_id >= 7) {
- if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
- error_report("virtio-net: saved image requires vnet_hdr=on");
- return -1;
- }
-
- if (n->has_vnet_hdr) {
- tap_set_offload(qemu_get_queue(n->nic)->peer,
- (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
- (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
- (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
- (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN) & 1,
- (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO) & 1);
- }
- }
-
- if (version_id >= 9) {
- n->mac_table.multi_overflow = qemu_get_byte(f);
- n->mac_table.uni_overflow = qemu_get_byte(f);
- }
-
- if (version_id >= 10) {
- n->alluni = qemu_get_byte(f);
- n->nomulti = qemu_get_byte(f);
- n->nouni = qemu_get_byte(f);
- n->nobcast = qemu_get_byte(f);
- }
-
- if (version_id >= 11) {
- if (qemu_get_byte(f) && !peer_has_ufo(n)) {
- error_report("virtio-net: saved image requires TUN_F_UFO support");
- return -1;
- }
- }
-
- if (n->max_queues > 1) {
- if (n->max_queues != qemu_get_be16(f)) {
- error_report("virtio-net: different max_queues ");
- return -1;
- }
-
- n->curr_queues = qemu_get_be16(f);
- for (i = 1; i < n->curr_queues; i++) {
- n->vqs[i].tx_waiting = qemu_get_be32(f);
- }
- }
-
- virtio_net_set_queues(n);
-
- /* Find the first multicast entry in the saved MAC filter */
- for (i = 0; i < n->mac_table.in_use; i++) {
- if (n->mac_table.macs[i * ETH_ALEN] & 1) {
- break;
- }
- }
- n->mac_table.first_multi = i;
-
- /* nc.link_down can't be migrated, so infer link_down according
- * to link status bit in n->status */
- link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
- for (i = 0; i < n->max_queues; i++) {
- qemu_get_subqueue(n->nic, i)->link_down = link_down;
- }
-
- return 0;
-}
-
-static void virtio_net_cleanup(NetClientState *nc)
-{
- VirtIONet *n = qemu_get_nic_opaque(nc);
-
- n->nic = NULL;
-}
-
-static NetClientInfo net_virtio_info = {
- .type = NET_CLIENT_OPTIONS_KIND_NIC,
- .size = sizeof(NICState),
- .can_receive = virtio_net_can_receive,
- .receive = virtio_net_receive,
- .cleanup = virtio_net_cleanup,
- .link_status_changed = virtio_net_set_link_status,
-};
-
-static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
-{
- VirtIONet *n = to_virtio_net(vdev);
- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
- assert(n->vhost_started);
- return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx);
-}
-
-static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
- bool mask)
-{
- VirtIONet *n = to_virtio_net(vdev);
- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
- assert(n->vhost_started);
- vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer),
- vdev, idx, mask);
-}
-
-VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
- virtio_net_conf *net, uint32_t host_features)
-{
- VirtIONet *n;
- int i, config_size = 0;
-
- for (i = 0; feature_sizes[i].flags != 0; i++) {
- if (host_features & feature_sizes[i].flags) {
- config_size = MAX(feature_sizes[i].end, config_size);
- }
- }
-
- n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
- config_size, sizeof(VirtIONet));
-
- n->config_size = config_size;
- n->vdev.get_config = virtio_net_get_config;
- n->vdev.set_config = virtio_net_set_config;
- n->vdev.get_features = virtio_net_get_features;
- n->vdev.set_features = virtio_net_set_features;
- n->vdev.bad_features = virtio_net_bad_features;
- n->vdev.reset = virtio_net_reset;
- n->vdev.set_status = virtio_net_set_status;
- n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
- n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
- n->max_queues = MAX(conf->queues, 1);
- n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
- n->vqs[0].rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
- n->curr_queues = 1;
- n->vqs[0].n = n;
- n->tx_timeout = net->txtimer;
-
- if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
- error_report("virtio-net: "
- "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
- net->tx);
- error_report("Defaulting to \"bh\"");
- }
-
- if (net->tx && !strcmp(net->tx, "timer")) {
- n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
- virtio_net_handle_tx_timer);
- n->vqs[0].tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer,
- &n->vqs[0]);
- } else {
- n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
- virtio_net_handle_tx_bh);
- n->vqs[0].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[0]);
- }
- n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
- qemu_macaddr_default_if_unset(&conf->macaddr);
- memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
- n->status = VIRTIO_NET_S_LINK_UP;
-
- n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
- peer_test_vnet_hdr(n);
- if (peer_has_vnet_hdr(n)) {
- for (i = 0; i < n->max_queues; i++) {
- tap_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
- }
- n->host_hdr_len = sizeof(struct virtio_net_hdr);
- } else {
- n->host_hdr_len = 0;
- }
-
- qemu_format_nic_info_str(qemu_get_queue(n->nic), conf->macaddr.a);
-
- n->vqs[0].tx_waiting = 0;
- n->tx_burst = net->txburst;
- virtio_net_set_mrg_rx_bufs(n, 0);
- n->promisc = 1; /* for compatibility */
-
- n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
-
- n->vlans = g_malloc0(MAX_VLAN >> 3);
-
- n->qdev = dev;
- register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
- virtio_net_save, virtio_net_load, n);
-
- add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");
-
- return &n->vdev;
-}
-
-void virtio_net_exit(VirtIODevice *vdev)
-{
- VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
- int i;
-
- /* This will stop vhost backend if appropriate. */
- virtio_net_set_status(vdev, 0);
-
- unregister_savevm(n->qdev, "virtio-net", n);
-
- g_free(n->mac_table.macs);
- g_free(n->vlans);
-
- for (i = 0; i < n->max_queues; i++) {
- VirtIONetQueue *q = &n->vqs[i];
- NetClientState *nc = qemu_get_subqueue(n->nic, i);
-
- qemu_purge_queued_packets(nc);
-
- if (q->tx_timer) {
- qemu_del_timer(q->tx_timer);
- qemu_free_timer(q->tx_timer);
- } else {
- qemu_bh_delete(q->tx_bh);
- }
- }
-
- g_free(n->vqs);
- qemu_del_nic(n->nic);
- virtio_cleanup(&n->vdev);
-}
+++ /dev/null
-/*
- * Virtio SCSI HBA
- *
- * Copyright IBM, Corp. 2010
- * Copyright Red Hat, Inc. 2011
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Paolo Bonzini <pbonzini@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "hw/virtio/virtio-scsi.h"
-#include "qemu/error-report.h"
-#include <hw/scsi/scsi.h>
-#include <block/scsi.h>
-#include <hw/virtio/virtio-bus.h>
-
-#define VIRTIO_SCSI_VQ_SIZE 128
-#define VIRTIO_SCSI_CDB_SIZE 32
-#define VIRTIO_SCSI_SENSE_SIZE 96
-#define VIRTIO_SCSI_MAX_CHANNEL 0
-#define VIRTIO_SCSI_MAX_TARGET 255
-#define VIRTIO_SCSI_MAX_LUN 16383
-
-/* Response codes */
-#define VIRTIO_SCSI_S_OK 0
-#define VIRTIO_SCSI_S_OVERRUN 1
-#define VIRTIO_SCSI_S_ABORTED 2
-#define VIRTIO_SCSI_S_BAD_TARGET 3
-#define VIRTIO_SCSI_S_RESET 4
-#define VIRTIO_SCSI_S_BUSY 5
-#define VIRTIO_SCSI_S_TRANSPORT_FAILURE 6
-#define VIRTIO_SCSI_S_TARGET_FAILURE 7
-#define VIRTIO_SCSI_S_NEXUS_FAILURE 8
-#define VIRTIO_SCSI_S_FAILURE 9
-#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED 10
-#define VIRTIO_SCSI_S_FUNCTION_REJECTED 11
-#define VIRTIO_SCSI_S_INCORRECT_LUN 12
-
-/* Controlq type codes. */
-#define VIRTIO_SCSI_T_TMF 0
-#define VIRTIO_SCSI_T_AN_QUERY 1
-#define VIRTIO_SCSI_T_AN_SUBSCRIBE 2
-
-/* Valid TMF subtypes. */
-#define VIRTIO_SCSI_T_TMF_ABORT_TASK 0
-#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET 1
-#define VIRTIO_SCSI_T_TMF_CLEAR_ACA 2
-#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET 3
-#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET 4
-#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET 5
-#define VIRTIO_SCSI_T_TMF_QUERY_TASK 6
-#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET 7
-
-/* Events. */
-#define VIRTIO_SCSI_T_EVENTS_MISSED 0x80000000
-#define VIRTIO_SCSI_T_NO_EVENT 0
-#define VIRTIO_SCSI_T_TRANSPORT_RESET 1
-#define VIRTIO_SCSI_T_ASYNC_NOTIFY 2
-#define VIRTIO_SCSI_T_PARAM_CHANGE 3
-
-/* Reasons for transport reset event */
-#define VIRTIO_SCSI_EVT_RESET_HARD 0
-#define VIRTIO_SCSI_EVT_RESET_RESCAN 1
-#define VIRTIO_SCSI_EVT_RESET_REMOVED 2
-
-/* SCSI command request, followed by data-out */
-typedef struct {
- uint8_t lun[8]; /* Logical Unit Number */
- uint64_t tag; /* Command identifier */
- uint8_t task_attr; /* Task attribute */
- uint8_t prio;
- uint8_t crn;
- uint8_t cdb[];
-} QEMU_PACKED VirtIOSCSICmdReq;
-
-/* Response, followed by sense data and data-in */
-typedef struct {
- uint32_t sense_len; /* Sense data length */
- uint32_t resid; /* Residual bytes in data buffer */
- uint16_t status_qualifier; /* Status qualifier */
- uint8_t status; /* Command completion status */
- uint8_t response; /* Response values */
- uint8_t sense[];
-} QEMU_PACKED VirtIOSCSICmdResp;
-
-/* Task Management Request */
-typedef struct {
- uint32_t type;
- uint32_t subtype;
- uint8_t lun[8];
- uint64_t tag;
-} QEMU_PACKED VirtIOSCSICtrlTMFReq;
-
-typedef struct {
- uint8_t response;
-} QEMU_PACKED VirtIOSCSICtrlTMFResp;
-
-/* Asynchronous notification query/subscription */
-typedef struct {
- uint32_t type;
- uint8_t lun[8];
- uint32_t event_requested;
-} QEMU_PACKED VirtIOSCSICtrlANReq;
-
-typedef struct {
- uint32_t event_actual;
- uint8_t response;
-} QEMU_PACKED VirtIOSCSICtrlANResp;
-
-typedef struct {
- uint32_t event;
- uint8_t lun[8];
- uint32_t reason;
-} QEMU_PACKED VirtIOSCSIEvent;
-
-typedef struct {
- uint32_t num_queues;
- uint32_t seg_max;
- uint32_t max_sectors;
- uint32_t cmd_per_lun;
- uint32_t event_info_size;
- uint32_t sense_size;
- uint32_t cdb_size;
- uint16_t max_channel;
- uint16_t max_target;
- uint32_t max_lun;
-} QEMU_PACKED VirtIOSCSIConfig;
-
-typedef struct VirtIOSCSIReq {
- VirtIOSCSI *dev;
- VirtQueue *vq;
- VirtQueueElement elem;
- QEMUSGList qsgl;
- SCSIRequest *sreq;
- union {
- char *buf;
- VirtIOSCSICmdReq *cmd;
- VirtIOSCSICtrlTMFReq *tmf;
- VirtIOSCSICtrlANReq *an;
- } req;
- union {
- char *buf;
- VirtIOSCSICmdResp *cmd;
- VirtIOSCSICtrlTMFResp *tmf;
- VirtIOSCSICtrlANResp *an;
- VirtIOSCSIEvent *event;
- } resp;
-} VirtIOSCSIReq;
-
-static inline int virtio_scsi_get_lun(uint8_t *lun)
-{
- return ((lun[2] << 8) | lun[3]) & 0x3FFF;
-}
-
-static inline SCSIDevice *virtio_scsi_device_find(VirtIOSCSI *s, uint8_t *lun)
-{
- if (lun[0] != 1) {
- return NULL;
- }
- if (lun[2] != 0 && !(lun[2] >= 0x40 && lun[2] < 0x80)) {
- return NULL;
- }
- return scsi_device_find(&s->bus, 0, lun[1], virtio_scsi_get_lun(lun));
-}
-
-static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
-{
- VirtIOSCSI *s = req->dev;
- VirtQueue *vq = req->vq;
- VirtIODevice *vdev = VIRTIO_DEVICE(s);
- virtqueue_push(vq, &req->elem, req->qsgl.size + req->elem.in_sg[0].iov_len);
- qemu_sglist_destroy(&req->qsgl);
- if (req->sreq) {
- req->sreq->hba_private = NULL;
- scsi_req_unref(req->sreq);
- }
- g_free(req);
- virtio_notify(vdev, vq);
-}
-
-static void virtio_scsi_bad_req(void)
-{
- error_report("wrong size for virtio-scsi headers");
- exit(1);
-}
-
-static void qemu_sgl_init_external(QEMUSGList *qsgl, struct iovec *sg,
- hwaddr *addr, int num)
-{
- qemu_sglist_init(qsgl, num, &dma_context_memory);
- while (num--) {
- qemu_sglist_add(qsgl, *(addr++), (sg++)->iov_len);
- }
-}
-
-static void virtio_scsi_parse_req(VirtIOSCSI *s, VirtQueue *vq,
- VirtIOSCSIReq *req)
-{
- assert(req->elem.in_num);
- req->vq = vq;
- req->dev = s;
- req->sreq = NULL;
- if (req->elem.out_num) {
- req->req.buf = req->elem.out_sg[0].iov_base;
- }
- req->resp.buf = req->elem.in_sg[0].iov_base;
-
- if (req->elem.out_num > 1) {
- qemu_sgl_init_external(&req->qsgl, &req->elem.out_sg[1],
- &req->elem.out_addr[1],
- req->elem.out_num - 1);
- } else {
- qemu_sgl_init_external(&req->qsgl, &req->elem.in_sg[1],
- &req->elem.in_addr[1],
- req->elem.in_num - 1);
- }
-}
-
-static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq)
-{
- VirtIOSCSIReq *req;
- req = g_malloc(sizeof(*req));
- if (!virtqueue_pop(vq, &req->elem)) {
- g_free(req);
- return NULL;
- }
-
- virtio_scsi_parse_req(s, vq, req);
- return req;
-}
-
-static void virtio_scsi_save_request(QEMUFile *f, SCSIRequest *sreq)
-{
- VirtIOSCSIReq *req = sreq->hba_private;
- uint32_t n = virtio_queue_get_id(req->vq) - 2;
-
- assert(n < req->dev->conf.num_queues);
- qemu_put_be32s(f, &n);
- qemu_put_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem));
-}
-
-static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq)
-{
- SCSIBus *bus = sreq->bus;
- VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
- VirtIOSCSIReq *req;
- uint32_t n;
-
- req = g_malloc(sizeof(*req));
- qemu_get_be32s(f, &n);
- assert(n < s->conf.num_queues);
- qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem));
- virtio_scsi_parse_req(s, s->cmd_vqs[n], req);
-
- scsi_req_ref(sreq);
- req->sreq = sreq;
- if (req->sreq->cmd.mode != SCSI_XFER_NONE) {
- int req_mode =
- (req->elem.in_num > 1 ? SCSI_XFER_FROM_DEV : SCSI_XFER_TO_DEV);
-
- assert(req->sreq->cmd.mode == req_mode);
- }
- return req;
-}
-
-static void virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
-{
- SCSIDevice *d = virtio_scsi_device_find(s, req->req.tmf->lun);
- SCSIRequest *r, *next;
- BusChild *kid;
- int target;
-
- /* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE". */
- req->resp.tmf->response = VIRTIO_SCSI_S_OK;
-
- switch (req->req.tmf->subtype) {
- case VIRTIO_SCSI_T_TMF_ABORT_TASK:
- case VIRTIO_SCSI_T_TMF_QUERY_TASK:
- if (!d) {
- goto fail;
- }
- if (d->lun != virtio_scsi_get_lun(req->req.tmf->lun)) {
- goto incorrect_lun;
- }
- QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
- VirtIOSCSIReq *cmd_req = r->hba_private;
- if (cmd_req && cmd_req->req.cmd->tag == req->req.tmf->tag) {
- break;
- }
- }
- if (r) {
- /*
- * Assert that the request has not been completed yet, we
- * check for it in the loop above.
- */
- assert(r->hba_private);
- if (req->req.tmf->subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK) {
- /* "If the specified command is present in the task set, then
- * return a service response set to FUNCTION SUCCEEDED".
- */
- req->resp.tmf->response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
- } else {
- scsi_req_cancel(r);
- }
- }
- break;
-
- case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
- if (!d) {
- goto fail;
- }
- if (d->lun != virtio_scsi_get_lun(req->req.tmf->lun)) {
- goto incorrect_lun;
- }
- s->resetting++;
- qdev_reset_all(&d->qdev);
- s->resetting--;
- break;
-
- case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
- case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
- case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET:
- if (!d) {
- goto fail;
- }
- if (d->lun != virtio_scsi_get_lun(req->req.tmf->lun)) {
- goto incorrect_lun;
- }
- QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
- if (r->hba_private) {
- if (req->req.tmf->subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK_SET) {
- /* "If there is any command present in the task set, then
- * return a service response set to FUNCTION SUCCEEDED".
- */
- req->resp.tmf->response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
- break;
- } else {
- scsi_req_cancel(r);
- }
- }
- }
- break;
-
- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
- target = req->req.tmf->lun[1];
- s->resetting++;
- QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) {
- d = DO_UPCAST(SCSIDevice, qdev, kid->child);
- if (d->channel == 0 && d->id == target) {
- qdev_reset_all(&d->qdev);
- }
- }
- s->resetting--;
- break;
-
- case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
- default:
- req->resp.tmf->response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
- break;
- }
-
- return;
-
-incorrect_lun:
- req->resp.tmf->response = VIRTIO_SCSI_S_INCORRECT_LUN;
- return;
-
-fail:
- req->resp.tmf->response = VIRTIO_SCSI_S_BAD_TARGET;
-}
-
-static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
-{
- VirtIOSCSI *s = (VirtIOSCSI *)vdev;
- VirtIOSCSIReq *req;
-
- while ((req = virtio_scsi_pop_req(s, vq))) {
- int out_size, in_size;
- if (req->elem.out_num < 1 || req->elem.in_num < 1) {
- virtio_scsi_bad_req();
- continue;
- }
-
- out_size = req->elem.out_sg[0].iov_len;
- in_size = req->elem.in_sg[0].iov_len;
- if (req->req.tmf->type == VIRTIO_SCSI_T_TMF) {
- if (out_size < sizeof(VirtIOSCSICtrlTMFReq) ||
- in_size < sizeof(VirtIOSCSICtrlTMFResp)) {
- virtio_scsi_bad_req();
- }
- virtio_scsi_do_tmf(s, req);
-
- } else if (req->req.tmf->type == VIRTIO_SCSI_T_AN_QUERY ||
- req->req.tmf->type == VIRTIO_SCSI_T_AN_SUBSCRIBE) {
- if (out_size < sizeof(VirtIOSCSICtrlANReq) ||
- in_size < sizeof(VirtIOSCSICtrlANResp)) {
- virtio_scsi_bad_req();
- }
- req->resp.an->event_actual = 0;
- req->resp.an->response = VIRTIO_SCSI_S_OK;
- }
- virtio_scsi_complete_req(req);
- }
-}
-
-static void virtio_scsi_command_complete(SCSIRequest *r, uint32_t status,
- size_t resid)
-{
- VirtIOSCSIReq *req = r->hba_private;
- uint32_t sense_len;
-
- req->resp.cmd->response = VIRTIO_SCSI_S_OK;
- req->resp.cmd->status = status;
- if (req->resp.cmd->status == GOOD) {
- req->resp.cmd->resid = tswap32(resid);
- } else {
- req->resp.cmd->resid = 0;
- sense_len = scsi_req_get_sense(r, req->resp.cmd->sense,
- VIRTIO_SCSI_SENSE_SIZE);
- req->resp.cmd->sense_len = tswap32(sense_len);
- }
- virtio_scsi_complete_req(req);
-}
-
-static QEMUSGList *virtio_scsi_get_sg_list(SCSIRequest *r)
-{
- VirtIOSCSIReq *req = r->hba_private;
-
- return &req->qsgl;
-}
-
-static void virtio_scsi_request_cancelled(SCSIRequest *r)
-{
- VirtIOSCSIReq *req = r->hba_private;
-
- if (!req) {
- return;
- }
- if (req->dev->resetting) {
- req->resp.cmd->response = VIRTIO_SCSI_S_RESET;
- } else {
- req->resp.cmd->response = VIRTIO_SCSI_S_ABORTED;
- }
- virtio_scsi_complete_req(req);
-}
-
-static void virtio_scsi_fail_cmd_req(VirtIOSCSIReq *req)
-{
- req->resp.cmd->response = VIRTIO_SCSI_S_FAILURE;
- virtio_scsi_complete_req(req);
-}
-
-static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq)
-{
- VirtIOSCSI *s = (VirtIOSCSI *)vdev;
- VirtIOSCSIReq *req;
- int n;
-
- while ((req = virtio_scsi_pop_req(s, vq))) {
- SCSIDevice *d;
- int out_size, in_size;
- if (req->elem.out_num < 1 || req->elem.in_num < 1) {
- virtio_scsi_bad_req();
- }
-
- out_size = req->elem.out_sg[0].iov_len;
- in_size = req->elem.in_sg[0].iov_len;
- if (out_size < sizeof(VirtIOSCSICmdReq) + s->cdb_size ||
- in_size < sizeof(VirtIOSCSICmdResp) + s->sense_size) {
- virtio_scsi_bad_req();
- }
-
- if (req->elem.out_num > 1 && req->elem.in_num > 1) {
- virtio_scsi_fail_cmd_req(req);
- continue;
- }
-
- d = virtio_scsi_device_find(s, req->req.cmd->lun);
- if (!d) {
- req->resp.cmd->response = VIRTIO_SCSI_S_BAD_TARGET;
- virtio_scsi_complete_req(req);
- continue;
- }
- req->sreq = scsi_req_new(d, req->req.cmd->tag,
- virtio_scsi_get_lun(req->req.cmd->lun),
- req->req.cmd->cdb, req);
-
- if (req->sreq->cmd.mode != SCSI_XFER_NONE) {
- int req_mode =
- (req->elem.in_num > 1 ? SCSI_XFER_FROM_DEV : SCSI_XFER_TO_DEV);
-
- if (req->sreq->cmd.mode != req_mode ||
- req->sreq->cmd.xfer > req->qsgl.size) {
- req->resp.cmd->response = VIRTIO_SCSI_S_OVERRUN;
- virtio_scsi_complete_req(req);
- continue;
- }
- }
-
- n = scsi_req_enqueue(req->sreq);
- if (n) {
- scsi_req_continue(req->sreq);
- }
- }
-}
-
-static void virtio_scsi_get_config(VirtIODevice *vdev,
- uint8_t *config)
-{
- VirtIOSCSIConfig *scsiconf = (VirtIOSCSIConfig *)config;
- VirtIOSCSI *s = (VirtIOSCSI *)vdev;
-
- stl_raw(&scsiconf->num_queues, s->conf.num_queues);
- stl_raw(&scsiconf->seg_max, 128 - 2);
- stl_raw(&scsiconf->max_sectors, s->conf.max_sectors);
- stl_raw(&scsiconf->cmd_per_lun, s->conf.cmd_per_lun);
- stl_raw(&scsiconf->event_info_size, sizeof(VirtIOSCSIEvent));
- stl_raw(&scsiconf->sense_size, s->sense_size);
- stl_raw(&scsiconf->cdb_size, s->cdb_size);
- stw_raw(&scsiconf->max_channel, VIRTIO_SCSI_MAX_CHANNEL);
- stw_raw(&scsiconf->max_target, VIRTIO_SCSI_MAX_TARGET);
- stl_raw(&scsiconf->max_lun, VIRTIO_SCSI_MAX_LUN);
-}
-
-static void virtio_scsi_set_config(VirtIODevice *vdev,
- const uint8_t *config)
-{
- VirtIOSCSIConfig *scsiconf = (VirtIOSCSIConfig *)config;
- VirtIOSCSI *s = (VirtIOSCSI *)vdev;
-
- if ((uint32_t) ldl_raw(&scsiconf->sense_size) >= 65536 ||
- (uint32_t) ldl_raw(&scsiconf->cdb_size) >= 256) {
- error_report("bad data written to virtio-scsi configuration space");
- exit(1);
- }
-
- s->sense_size = ldl_raw(&scsiconf->sense_size);
- s->cdb_size = ldl_raw(&scsiconf->cdb_size);
-}
-
-static uint32_t virtio_scsi_get_features(VirtIODevice *vdev,
- uint32_t requested_features)
-{
- return requested_features;
-}
-
-static void virtio_scsi_reset(VirtIODevice *vdev)
-{
- VirtIOSCSI *s = (VirtIOSCSI *)vdev;
-
- s->resetting++;
- qbus_reset_all(&s->bus.qbus);
- s->resetting--;
-
- s->sense_size = VIRTIO_SCSI_SENSE_SIZE;
- s->cdb_size = VIRTIO_SCSI_CDB_SIZE;
- s->events_dropped = false;
-}
-
-/* The device does not have anything to save beyond the virtio data.
- * Request data is saved with callbacks from SCSI devices.
- */
-static void virtio_scsi_save(QEMUFile *f, void *opaque)
-{
- VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
- virtio_save(vdev, f);
-}
-
-static int virtio_scsi_load(QEMUFile *f, void *opaque, int version_id)
-{
- VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
- int ret;
-
- ret = virtio_load(vdev, f);
- if (ret) {
- return ret;
- }
- return 0;
-}
-
-static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
- uint32_t event, uint32_t reason)
-{
- VirtIOSCSIReq *req = virtio_scsi_pop_req(s, s->event_vq);
- VirtIOSCSIEvent *evt;
- VirtIODevice *vdev = VIRTIO_DEVICE(s);
- int in_size;
-
- if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
- return;
- }
-
- if (!req) {
- s->events_dropped = true;
- return;
- }
-
- if (req->elem.out_num || req->elem.in_num != 1) {
- virtio_scsi_bad_req();
- }
-
- if (s->events_dropped) {
- event |= VIRTIO_SCSI_T_EVENTS_MISSED;
- s->events_dropped = false;
- }
-
- in_size = req->elem.in_sg[0].iov_len;
- if (in_size < sizeof(VirtIOSCSIEvent)) {
- virtio_scsi_bad_req();
- }
-
- evt = req->resp.event;
- memset(evt, 0, sizeof(VirtIOSCSIEvent));
- evt->event = event;
- evt->reason = reason;
- if (!dev) {
- assert(event == VIRTIO_SCSI_T_NO_EVENT);
- } else {
- evt->lun[0] = 1;
- evt->lun[1] = dev->id;
-
- /* Linux wants us to keep the same encoding we use for REPORT LUNS. */
- if (dev->lun >= 256) {
- evt->lun[2] = (dev->lun >> 8) | 0x40;
- }
- evt->lun[3] = dev->lun & 0xFF;
- }
- virtio_scsi_complete_req(req);
-}
-
-static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq)
-{
- VirtIOSCSI *s = VIRTIO_SCSI(vdev);
-
- if (s->events_dropped) {
- virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
- }
-}
-
-static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense)
-{
- VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
- VirtIODevice *vdev = VIRTIO_DEVICE(s);
-
- if (((vdev->guest_features >> VIRTIO_SCSI_F_CHANGE) & 1) &&
- dev->type != TYPE_ROM) {
- virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE,
- sense.asc | (sense.ascq << 8));
- }
-}
-
-static void virtio_scsi_hotplug(SCSIBus *bus, SCSIDevice *dev)
-{
- VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
- VirtIODevice *vdev = VIRTIO_DEVICE(s);
-
- if ((vdev->guest_features >> VIRTIO_SCSI_F_HOTPLUG) & 1) {
- virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_TRANSPORT_RESET,
- VIRTIO_SCSI_EVT_RESET_RESCAN);
- }
-}
-
-static void virtio_scsi_hot_unplug(SCSIBus *bus, SCSIDevice *dev)
-{
- VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
- VirtIODevice *vdev = VIRTIO_DEVICE(s);
-
- if ((vdev->guest_features >> VIRTIO_SCSI_F_HOTPLUG) & 1) {
- virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_TRANSPORT_RESET,
- VIRTIO_SCSI_EVT_RESET_REMOVED);
- }
-}
-
-static struct SCSIBusInfo virtio_scsi_scsi_info = {
- .tcq = true,
- .max_channel = VIRTIO_SCSI_MAX_CHANNEL,
- .max_target = VIRTIO_SCSI_MAX_TARGET,
- .max_lun = VIRTIO_SCSI_MAX_LUN,
-
- .complete = virtio_scsi_command_complete,
- .cancel = virtio_scsi_request_cancelled,
- .change = virtio_scsi_change,
- .hotplug = virtio_scsi_hotplug,
- .hot_unplug = virtio_scsi_hot_unplug,
- .get_sg_list = virtio_scsi_get_sg_list,
- .save_request = virtio_scsi_save_request,
- .load_request = virtio_scsi_load_request,
-};
-
-static int virtio_scsi_device_init(VirtIODevice *vdev)
-{
- DeviceState *qdev = DEVICE(vdev);
- VirtIOSCSI *s = VIRTIO_SCSI(vdev);
- static int virtio_scsi_id;
- int i;
-
- virtio_init(VIRTIO_DEVICE(s), "virtio-scsi", VIRTIO_ID_SCSI,
- sizeof(VirtIOSCSIConfig));
-
- s->cmd_vqs = g_malloc0(s->conf.num_queues * sizeof(VirtQueue *));
-
- /* TODO set up vdev function pointers */
- vdev->get_config = virtio_scsi_get_config;
- vdev->set_config = virtio_scsi_set_config;
- vdev->get_features = virtio_scsi_get_features;
- vdev->reset = virtio_scsi_reset;
-
- s->ctrl_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE,
- virtio_scsi_handle_ctrl);
- s->event_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE,
- virtio_scsi_handle_event);
- for (i = 0; i < s->conf.num_queues; i++) {
- s->cmd_vqs[i] = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE,
- virtio_scsi_handle_cmd);
- }
-
- scsi_bus_new(&s->bus, qdev, &virtio_scsi_scsi_info);
- if (!qdev->hotplugged) {
- scsi_bus_legacy_handle_cmdline(&s->bus);
- }
-
- register_savevm(qdev, "virtio-scsi", virtio_scsi_id++, 1,
- virtio_scsi_save, virtio_scsi_load, s);
-
- return 0;
-}
-
-static int virtio_scsi_device_exit(DeviceState *qdev)
-{
- VirtIOSCSI *s = VIRTIO_SCSI(qdev);
- VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
-
- unregister_savevm(qdev, "virtio-scsi", s);
- g_free(s->cmd_vqs);
- virtio_common_cleanup(vdev);
- return 0;
-}
-
-static Property virtio_scsi_properties[] = {
- DEFINE_VIRTIO_SCSI_PROPERTIES(VirtIOSCSI, conf),
- DEFINE_PROP_END_OF_LIST(),
-};
-
-static void virtio_scsi_class_init(ObjectClass *klass, void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(klass);
- VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
- dc->exit = virtio_scsi_device_exit;
- dc->props = virtio_scsi_properties;
- vdc->init = virtio_scsi_device_init;
- vdc->get_config = virtio_scsi_get_config;
- vdc->set_config = virtio_scsi_set_config;
- vdc->get_features = virtio_scsi_get_features;
- vdc->reset = virtio_scsi_reset;
-}
-
-static const TypeInfo virtio_scsi_info = {
- .name = TYPE_VIRTIO_SCSI,
- .parent = TYPE_VIRTIO_DEVICE,
- .instance_size = sizeof(VirtIOSCSI),
- .class_init = virtio_scsi_class_init,
-};
-
-static void virtio_register_types(void)
-{
- type_register_static(&virtio_scsi_info);
-}
-
-type_init(virtio_register_types)
+++ /dev/null
-/*
- * A bus for connecting virtio serial and console ports
- *
- * Copyright (C) 2009, 2010 Red Hat, Inc.
- *
- * Author(s):
- * Amit Shah <amit.shah@redhat.com>
- *
- * Some earlier parts are:
- * Copyright IBM, Corp. 2008
- * authored by
- * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "qemu/iov.h"
-#include "monitor/monitor.h"
-#include "qemu/queue.h"
-#include "hw/sysbus.h"
-#include "trace.h"
-#include "hw/virtio/virtio-serial.h"
-
-static VirtIOSerialPort *find_port_by_id(VirtIOSerial *vser, uint32_t id)
-{
- VirtIOSerialPort *port;
-
- if (id == VIRTIO_CONSOLE_BAD_ID) {
- return NULL;
- }
-
- QTAILQ_FOREACH(port, &vser->ports, next) {
- if (port->id == id)
- return port;
- }
- return NULL;
-}
-
-static VirtIOSerialPort *find_port_by_vq(VirtIOSerial *vser, VirtQueue *vq)
-{
- VirtIOSerialPort *port;
-
- QTAILQ_FOREACH(port, &vser->ports, next) {
- if (port->ivq == vq || port->ovq == vq)
- return port;
- }
- return NULL;
-}
-
-static bool use_multiport(VirtIOSerial *vser)
-{
- return vser->vdev.guest_features & (1 << VIRTIO_CONSOLE_F_MULTIPORT);
-}
-
-static size_t write_to_port(VirtIOSerialPort *port,
- const uint8_t *buf, size_t size)
-{
- VirtQueueElement elem;
- VirtQueue *vq;
- size_t offset;
-
- vq = port->ivq;
- if (!virtio_queue_ready(vq)) {
- return 0;
- }
-
- offset = 0;
- while (offset < size) {
- size_t len;
-
- if (!virtqueue_pop(vq, &elem)) {
- break;
- }
-
- len = iov_from_buf(elem.in_sg, elem.in_num, 0,
- buf + offset, size - offset);
- offset += len;
-
- virtqueue_push(vq, &elem, len);
- }
-
- virtio_notify(&port->vser->vdev, vq);
- return offset;
-}
-
-static void discard_vq_data(VirtQueue *vq, VirtIODevice *vdev)
-{
- VirtQueueElement elem;
-
- if (!virtio_queue_ready(vq)) {
- return;
- }
- while (virtqueue_pop(vq, &elem)) {
- virtqueue_push(vq, &elem, 0);
- }
- virtio_notify(vdev, vq);
-}
-
-static void do_flush_queued_data(VirtIOSerialPort *port, VirtQueue *vq,
- VirtIODevice *vdev)
-{
- VirtIOSerialPortClass *vsc;
-
- assert(port);
- assert(virtio_queue_ready(vq));
-
- vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
-
- while (!port->throttled) {
- unsigned int i;
-
- /* Pop an elem only if we haven't left off a previous one mid-way */
- if (!port->elem.out_num) {
- if (!virtqueue_pop(vq, &port->elem)) {
- break;
- }
- port->iov_idx = 0;
- port->iov_offset = 0;
- }
-
- for (i = port->iov_idx; i < port->elem.out_num; i++) {
- size_t buf_size;
- ssize_t ret;
-
- buf_size = port->elem.out_sg[i].iov_len - port->iov_offset;
- ret = vsc->have_data(port,
- port->elem.out_sg[i].iov_base
- + port->iov_offset,
- buf_size);
- if (port->throttled) {
- port->iov_idx = i;
- if (ret > 0) {
- port->iov_offset += ret;
- }
- break;
- }
- port->iov_offset = 0;
- }
- if (port->throttled) {
- break;
- }
- virtqueue_push(vq, &port->elem, 0);
- port->elem.out_num = 0;
- }
- virtio_notify(vdev, vq);
-}
-
-static void flush_queued_data(VirtIOSerialPort *port)
-{
- assert(port);
-
- if (!virtio_queue_ready(port->ovq)) {
- return;
- }
- do_flush_queued_data(port, port->ovq, &port->vser->vdev);
-}
-
-static size_t send_control_msg(VirtIOSerial *vser, void *buf, size_t len)
-{
- VirtQueueElement elem;
- VirtQueue *vq;
-
- vq = vser->c_ivq;
- if (!virtio_queue_ready(vq)) {
- return 0;
- }
- if (!virtqueue_pop(vq, &elem)) {
- return 0;
- }
-
- memcpy(elem.in_sg[0].iov_base, buf, len);
-
- virtqueue_push(vq, &elem, len);
- virtio_notify(&vser->vdev, vq);
- return len;
-}
-
-static size_t send_control_event(VirtIOSerial *vser, uint32_t port_id,
- uint16_t event, uint16_t value)
-{
- struct virtio_console_control cpkt;
-
- stl_p(&cpkt.id, port_id);
- stw_p(&cpkt.event, event);
- stw_p(&cpkt.value, value);
-
- trace_virtio_serial_send_control_event(port_id, event, value);
- return send_control_msg(vser, &cpkt, sizeof(cpkt));
-}
-
-/* Functions for use inside qemu to open and read from/write to ports */
-int virtio_serial_open(VirtIOSerialPort *port)
-{
- /* Don't allow opening an already-open port */
- if (port->host_connected) {
- return 0;
- }
- /* Send port open notification to the guest */
- port->host_connected = true;
- send_control_event(port->vser, port->id, VIRTIO_CONSOLE_PORT_OPEN, 1);
-
- return 0;
-}
-
-int virtio_serial_close(VirtIOSerialPort *port)
-{
- port->host_connected = false;
- /*
- * If there's any data the guest sent which the app didn't
- * consume, reset the throttling flag and discard the data.
- */
- port->throttled = false;
- discard_vq_data(port->ovq, &port->vser->vdev);
-
- send_control_event(port->vser, port->id, VIRTIO_CONSOLE_PORT_OPEN, 0);
-
- return 0;
-}
-
-/* Individual ports/apps call this function to write to the guest. */
-ssize_t virtio_serial_write(VirtIOSerialPort *port, const uint8_t *buf,
- size_t size)
-{
- if (!port || !port->host_connected || !port->guest_connected) {
- return 0;
- }
- return write_to_port(port, buf, size);
-}
-
-/*
- * Readiness of the guest to accept data on a port.
- * Returns max. data the guest can receive
- */
-size_t virtio_serial_guest_ready(VirtIOSerialPort *port)
-{
- VirtQueue *vq = port->ivq;
- unsigned int bytes;
-
- if (!virtio_queue_ready(vq) ||
- !(port->vser->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK) ||
- virtio_queue_empty(vq)) {
- return 0;
- }
- if (use_multiport(port->vser) && !port->guest_connected) {
- return 0;
- }
- virtqueue_get_avail_bytes(vq, &bytes, NULL, 4096, 0);
- return bytes;
-}
-
-static void flush_queued_data_bh(void *opaque)
-{
- VirtIOSerialPort *port = opaque;
-
- flush_queued_data(port);
-}
-
-void virtio_serial_throttle_port(VirtIOSerialPort *port, bool throttle)
-{
- if (!port) {
- return;
- }
-
- trace_virtio_serial_throttle_port(port->id, throttle);
- port->throttled = throttle;
- if (throttle) {
- return;
- }
- qemu_bh_schedule(port->bh);
-}
-
-/* Guest wants to notify us of some event */
-static void handle_control_message(VirtIOSerial *vser, void *buf, size_t len)
-{
- struct VirtIOSerialPort *port;
- VirtIOSerialPortClass *vsc;
- struct virtio_console_control cpkt, *gcpkt;
- uint8_t *buffer;
- size_t buffer_len;
-
- gcpkt = buf;
-
- if (len < sizeof(cpkt)) {
- /* The guest sent an invalid control packet */
- return;
- }
-
- cpkt.event = lduw_p(&gcpkt->event);
- cpkt.value = lduw_p(&gcpkt->value);
-
- trace_virtio_serial_handle_control_message(cpkt.event, cpkt.value);
-
- if (cpkt.event == VIRTIO_CONSOLE_DEVICE_READY) {
- if (!cpkt.value) {
- error_report("virtio-serial-bus: Guest failure in adding device %s",
- vser->bus.qbus.name);
- return;
- }
- /*
- * The device is up, we can now tell the device about all the
- * ports we have here.
- */
- QTAILQ_FOREACH(port, &vser->ports, next) {
- send_control_event(vser, port->id, VIRTIO_CONSOLE_PORT_ADD, 1);
- }
- return;
- }
-
- port = find_port_by_id(vser, ldl_p(&gcpkt->id));
- if (!port) {
- error_report("virtio-serial-bus: Unexpected port id %u for device %s",
- ldl_p(&gcpkt->id), vser->bus.qbus.name);
- return;
- }
-
- trace_virtio_serial_handle_control_message_port(port->id);
-
- vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
-
- switch(cpkt.event) {
- case VIRTIO_CONSOLE_PORT_READY:
- if (!cpkt.value) {
- error_report("virtio-serial-bus: Guest failure in adding port %u for device %s",
- port->id, vser->bus.qbus.name);
- break;
- }
- /*
- * Now that we know the guest asked for the port name, we're
- * sure the guest has initialised whatever state is necessary
- * for this port. Now's a good time to let the guest know if
- * this port is a console port so that the guest can hook it
- * up to hvc.
- */
- if (vsc->is_console) {
- send_control_event(vser, port->id, VIRTIO_CONSOLE_CONSOLE_PORT, 1);
- }
-
- if (port->name) {
- stl_p(&cpkt.id, port->id);
- stw_p(&cpkt.event, VIRTIO_CONSOLE_PORT_NAME);
- stw_p(&cpkt.value, 1);
-
- buffer_len = sizeof(cpkt) + strlen(port->name) + 1;
- buffer = g_malloc(buffer_len);
-
- memcpy(buffer, &cpkt, sizeof(cpkt));
- memcpy(buffer + sizeof(cpkt), port->name, strlen(port->name));
- buffer[buffer_len - 1] = 0;
-
- send_control_msg(vser, buffer, buffer_len);
- g_free(buffer);
- }
-
- if (port->host_connected) {
- send_control_event(vser, port->id, VIRTIO_CONSOLE_PORT_OPEN, 1);
- }
-
- /*
- * When the guest has asked us for this information it means
- * the guest is all setup and has its virtqueues
- * initialised. If some app is interested in knowing about
- * this event, let it know.
- */
- if (vsc->guest_ready) {
- vsc->guest_ready(port);
- }
- break;
-
- case VIRTIO_CONSOLE_PORT_OPEN:
- port->guest_connected = cpkt.value;
- if (vsc->set_guest_connected) {
- /* Send the guest opened notification if an app is interested */
- vsc->set_guest_connected(port, cpkt.value);
- }
- break;
- }
-}
-
-static void control_in(VirtIODevice *vdev, VirtQueue *vq)
-{
-}
-
-static void control_out(VirtIODevice *vdev, VirtQueue *vq)
-{
- VirtQueueElement elem;
- VirtIOSerial *vser;
- uint8_t *buf;
- size_t len;
-
- vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
-
- len = 0;
- buf = NULL;
- while (virtqueue_pop(vq, &elem)) {
- size_t cur_len;
-
- cur_len = iov_size(elem.out_sg, elem.out_num);
- /*
- * Allocate a new buf only if we didn't have one previously or
- * if the size of the buf differs
- */
- if (cur_len > len) {
- g_free(buf);
-
- buf = g_malloc(cur_len);
- len = cur_len;
- }
- iov_to_buf(elem.out_sg, elem.out_num, 0, buf, cur_len);
-
- handle_control_message(vser, buf, cur_len);
- virtqueue_push(vq, &elem, 0);
- }
- g_free(buf);
- virtio_notify(vdev, vq);
-}
-
-/* Guest wrote something to some port. */
-static void handle_output(VirtIODevice *vdev, VirtQueue *vq)
-{
- VirtIOSerial *vser;
- VirtIOSerialPort *port;
-
- vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
- port = find_port_by_vq(vser, vq);
-
- if (!port || !port->host_connected) {
- discard_vq_data(vq, vdev);
- return;
- }
-
- if (!port->throttled) {
- do_flush_queued_data(port, vq, vdev);
- return;
- }
-}
-
-static void handle_input(VirtIODevice *vdev, VirtQueue *vq)
-{
-}
-
-static uint32_t get_features(VirtIODevice *vdev, uint32_t features)
-{
- VirtIOSerial *vser;
-
- vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
-
- if (vser->bus.max_nr_ports > 1) {
- features |= (1 << VIRTIO_CONSOLE_F_MULTIPORT);
- }
- return features;
-}
-
-/* Guest requested config info */
-static void get_config(VirtIODevice *vdev, uint8_t *config_data)
-{
- VirtIOSerial *vser;
-
- vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
- memcpy(config_data, &vser->config, sizeof(struct virtio_console_config));
-}
-
-static void set_config(VirtIODevice *vdev, const uint8_t *config_data)
-{
- struct virtio_console_config config;
-
- memcpy(&config, config_data, sizeof(config));
-}
-
-static void guest_reset(VirtIOSerial *vser)
-{
- VirtIOSerialPort *port;
- VirtIOSerialPortClass *vsc;
-
- QTAILQ_FOREACH(port, &vser->ports, next) {
- vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
- if (port->guest_connected) {
- port->guest_connected = false;
- if (vsc->set_guest_connected) {
- vsc->set_guest_connected(port, false);
- }
- }
- }
-}
-
-static void set_status(VirtIODevice *vdev, uint8_t status)
-{
- VirtIOSerial *vser;
- VirtIOSerialPort *port;
-
- vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
- port = find_port_by_id(vser, 0);
-
- if (port && !use_multiport(port->vser)
- && (status & VIRTIO_CONFIG_S_DRIVER_OK)) {
- /*
- * Non-multiport guests won't be able to tell us guest
- * open/close status. Such guests can only have a port at id
- * 0, so set guest_connected for such ports as soon as guest
- * is up.
- */
- port->guest_connected = true;
- }
- if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
- guest_reset(vser);
- }
-}
-
-static void vser_reset(VirtIODevice *vdev)
-{
- VirtIOSerial *vser;
-
- vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
- guest_reset(vser);
-}
-
-static void virtio_serial_save(QEMUFile *f, void *opaque)
-{
- VirtIOSerial *s = opaque;
- VirtIOSerialPort *port;
- uint32_t nr_active_ports;
- unsigned int i, max_nr_ports;
-
- /* The virtio device */
- virtio_save(&s->vdev, f);
-
- /* The config space */
- qemu_put_be16s(f, &s->config.cols);
- qemu_put_be16s(f, &s->config.rows);
-
- qemu_put_be32s(f, &s->config.max_nr_ports);
-
- /* The ports map */
- max_nr_ports = tswap32(s->config.max_nr_ports);
- for (i = 0; i < (max_nr_ports + 31) / 32; i++) {
- qemu_put_be32s(f, &s->ports_map[i]);
- }
-
- /* Ports */
-
- nr_active_ports = 0;
- QTAILQ_FOREACH(port, &s->ports, next) {
- nr_active_ports++;
- }
-
- qemu_put_be32s(f, &nr_active_ports);
-
- /*
- * Items in struct VirtIOSerialPort.
- */
- QTAILQ_FOREACH(port, &s->ports, next) {
- uint32_t elem_popped;
-
- qemu_put_be32s(f, &port->id);
- qemu_put_byte(f, port->guest_connected);
- qemu_put_byte(f, port->host_connected);
-
- elem_popped = 0;
- if (port->elem.out_num) {
- elem_popped = 1;
- }
- qemu_put_be32s(f, &elem_popped);
- if (elem_popped) {
- qemu_put_be32s(f, &port->iov_idx);
- qemu_put_be64s(f, &port->iov_offset);
-
- qemu_put_buffer(f, (unsigned char *)&port->elem,
- sizeof(port->elem));
- }
- }
-}
-
-static void virtio_serial_post_load_timer_cb(void *opaque)
-{
- uint32_t i;
- VirtIOSerial *s = opaque;
- VirtIOSerialPort *port;
- uint8_t host_connected;
- VirtIOSerialPortClass *vsc;
-
- if (!s->post_load) {
- return;
- }
- for (i = 0 ; i < s->post_load->nr_active_ports; ++i) {
- port = s->post_load->connected[i].port;
- host_connected = s->post_load->connected[i].host_connected;
- if (host_connected != port->host_connected) {
- /*
- * We have to let the guest know of the host connection
- * status change
- */
- send_control_event(s, port->id, VIRTIO_CONSOLE_PORT_OPEN,
- port->host_connected);
- }
- vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
- if (vsc->set_guest_connected) {
- vsc->set_guest_connected(port, port->guest_connected);
- }
- }
- g_free(s->post_load->connected);
- qemu_free_timer(s->post_load->timer);
- g_free(s->post_load);
- s->post_load = NULL;
-}
-
-static int fetch_active_ports_list(QEMUFile *f, int version_id,
- VirtIOSerial *s, uint32_t nr_active_ports)
-{
- uint32_t i;
-
- s->post_load = g_malloc0(sizeof(*s->post_load));
- s->post_load->nr_active_ports = nr_active_ports;
- s->post_load->connected =
- g_malloc0(sizeof(*s->post_load->connected) * nr_active_ports);
-
- s->post_load->timer = qemu_new_timer_ns(vm_clock,
- virtio_serial_post_load_timer_cb,
- s);
-
- /* Items in struct VirtIOSerialPort */
- for (i = 0; i < nr_active_ports; i++) {
- VirtIOSerialPort *port;
- uint32_t id;
-
- id = qemu_get_be32(f);
- port = find_port_by_id(s, id);
- if (!port) {
- return -EINVAL;
- }
-
- port->guest_connected = qemu_get_byte(f);
- s->post_load->connected[i].port = port;
- s->post_load->connected[i].host_connected = qemu_get_byte(f);
-
- if (version_id > 2) {
- uint32_t elem_popped;
-
- qemu_get_be32s(f, &elem_popped);
- if (elem_popped) {
- qemu_get_be32s(f, &port->iov_idx);
- qemu_get_be64s(f, &port->iov_offset);
-
- qemu_get_buffer(f, (unsigned char *)&port->elem,
- sizeof(port->elem));
- virtqueue_map_sg(port->elem.in_sg, port->elem.in_addr,
- port->elem.in_num, 1);
- virtqueue_map_sg(port->elem.out_sg, port->elem.out_addr,
- port->elem.out_num, 1);
-
- /*
- * Port was throttled on source machine. Let's
- * unthrottle it here so data starts flowing again.
- */
- virtio_serial_throttle_port(port, false);
- }
- }
- }
- qemu_mod_timer(s->post_load->timer, 1);
- return 0;
-}
-
-static int virtio_serial_load(QEMUFile *f, void *opaque, int version_id)
-{
- VirtIOSerial *s = opaque;
- uint32_t max_nr_ports, nr_active_ports, ports_map;
- unsigned int i;
- int ret;
-
- if (version_id > 3) {
- return -EINVAL;
- }
-
- /* The virtio device */
- ret = virtio_load(&s->vdev, f);
- if (ret) {
- return ret;
- }
-
- if (version_id < 2) {
- return 0;
- }
-
- /* The config space */
- qemu_get_be16s(f, &s->config.cols);
- qemu_get_be16s(f, &s->config.rows);
-
- qemu_get_be32s(f, &max_nr_ports);
- tswap32s(&max_nr_ports);
- if (max_nr_ports > tswap32(s->config.max_nr_ports)) {
- /* Source could have had more ports than us. Fail migration. */
- return -EINVAL;
- }
-
- for (i = 0; i < (max_nr_ports + 31) / 32; i++) {
- qemu_get_be32s(f, &ports_map);
-
- if (ports_map != s->ports_map[i]) {
- /*
- * Ports active on source and destination don't
- * match. Fail migration.
- */
- return -EINVAL;
- }
- }
-
- qemu_get_be32s(f, &nr_active_ports);
-
- if (nr_active_ports) {
- ret = fetch_active_ports_list(f, version_id, s, nr_active_ports);
- if (ret) {
- return ret;
- }
- }
- return 0;
-}
-
-static void virtser_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent);
-
-static Property virtser_props[] = {
- DEFINE_PROP_UINT32("nr", VirtIOSerialPort, id, VIRTIO_CONSOLE_BAD_ID),
- DEFINE_PROP_STRING("name", VirtIOSerialPort, name),
- DEFINE_PROP_END_OF_LIST()
-};
-
-#define TYPE_VIRTIO_SERIAL_BUS "virtio-serial-bus"
-#define VIRTIO_SERIAL_BUS(obj) \
- OBJECT_CHECK(VirtIOSerialBus, (obj), TYPE_VIRTIO_SERIAL_BUS)
-
-static void virtser_bus_class_init(ObjectClass *klass, void *data)
-{
- BusClass *k = BUS_CLASS(klass);
- k->print_dev = virtser_bus_dev_print;
-}
-
-static const TypeInfo virtser_bus_info = {
- .name = TYPE_VIRTIO_SERIAL_BUS,
- .parent = TYPE_BUS,
- .instance_size = sizeof(VirtIOSerialBus),
- .class_init = virtser_bus_class_init,
-};
-
-static void virtser_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent)
-{
- VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, qdev);
-
- monitor_printf(mon, "%*sport %d, guest %s, host %s, throttle %s\n",
- indent, "", port->id,
- port->guest_connected ? "on" : "off",
- port->host_connected ? "on" : "off",
- port->throttled ? "on" : "off");
-}
-
-/* This function is only used if a port id is not provided by the user */
-static uint32_t find_free_port_id(VirtIOSerial *vser)
-{
- unsigned int i, max_nr_ports;
-
- max_nr_ports = tswap32(vser->config.max_nr_ports);
- for (i = 0; i < (max_nr_ports + 31) / 32; i++) {
- uint32_t map, bit;
-
- map = vser->ports_map[i];
- bit = ffs(~map);
- if (bit) {
- return (bit - 1) + i * 32;
- }
- }
- return VIRTIO_CONSOLE_BAD_ID;
-}
-
-static void mark_port_added(VirtIOSerial *vser, uint32_t port_id)
-{
- unsigned int i;
-
- i = port_id / 32;
- vser->ports_map[i] |= 1U << (port_id % 32);
-}
-
-static void add_port(VirtIOSerial *vser, uint32_t port_id)
-{
- mark_port_added(vser, port_id);
- send_control_event(vser, port_id, VIRTIO_CONSOLE_PORT_ADD, 1);
-}
-
-static void remove_port(VirtIOSerial *vser, uint32_t port_id)
-{
- VirtIOSerialPort *port;
- unsigned int i;
-
- i = port_id / 32;
- vser->ports_map[i] &= ~(1U << (port_id % 32));
-
- port = find_port_by_id(vser, port_id);
- /*
- * This function is only called from qdev's unplug callback; if we
- * get a NULL port here, we're in trouble.
- */
- assert(port);
-
- /* Flush out any unconsumed buffers first */
- discard_vq_data(port->ovq, &port->vser->vdev);
-
- send_control_event(vser, port->id, VIRTIO_CONSOLE_PORT_REMOVE, 1);
-}
-
-static int virtser_port_qdev_init(DeviceState *qdev)
-{
- VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, qdev);
- VirtIOSerialPortClass *vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
- VirtIOSerialBus *bus = DO_UPCAST(VirtIOSerialBus, qbus, qdev->parent_bus);
- int ret, max_nr_ports;
- bool plugging_port0;
-
- port->vser = bus->vser;
- port->bh = qemu_bh_new(flush_queued_data_bh, port);
-
- assert(vsc->have_data);
-
- /*
- * Is the first console port we're seeing? If so, put it up at
- * location 0. This is done for backward compatibility (old
- * kernel, new qemu).
- */
- plugging_port0 = vsc->is_console && !find_port_by_id(port->vser, 0);
-
- if (find_port_by_id(port->vser, port->id)) {
- error_report("virtio-serial-bus: A port already exists at id %u",
- port->id);
- return -1;
- }
-
- if (port->id == VIRTIO_CONSOLE_BAD_ID) {
- if (plugging_port0) {
- port->id = 0;
- } else {
- port->id = find_free_port_id(port->vser);
- if (port->id == VIRTIO_CONSOLE_BAD_ID) {
- error_report("virtio-serial-bus: Maximum port limit for this device reached");
- return -1;
- }
- }
- }
-
- max_nr_ports = tswap32(port->vser->config.max_nr_ports);
- if (port->id >= max_nr_ports) {
- error_report("virtio-serial-bus: Out-of-range port id specified, max. allowed: %u",
- max_nr_ports - 1);
- return -1;
- }
-
- ret = vsc->init(port);
- if (ret) {
- return ret;
- }
-
- port->elem.out_num = 0;
-
- QTAILQ_INSERT_TAIL(&port->vser->ports, port, next);
- port->ivq = port->vser->ivqs[port->id];
- port->ovq = port->vser->ovqs[port->id];
-
- add_port(port->vser, port->id);
-
- /* Send an update to the guest about this new port added */
- virtio_notify_config(&port->vser->vdev);
-
- return ret;
-}
-
-static int virtser_port_qdev_exit(DeviceState *qdev)
-{
- VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, qdev);
- VirtIOSerialPortClass *vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
- VirtIOSerial *vser = port->vser;
-
- qemu_bh_delete(port->bh);
- remove_port(port->vser, port->id);
-
- QTAILQ_REMOVE(&vser->ports, port, next);
-
- if (vsc->exit) {
- vsc->exit(port);
- }
- return 0;
-}
-
-VirtIODevice *virtio_serial_init(DeviceState *dev, virtio_serial_conf *conf)
-{
- VirtIOSerial *vser;
- VirtIODevice *vdev;
- uint32_t i, max_supported_ports;
-
- if (!conf->max_virtserial_ports)
- return NULL;
-
- /* Each port takes 2 queues, and one pair is for the control queue */
- max_supported_ports = VIRTIO_PCI_QUEUE_MAX / 2 - 1;
-
- if (conf->max_virtserial_ports > max_supported_ports) {
- error_report("maximum ports supported: %u", max_supported_ports);
- return NULL;
- }
-
- vdev = virtio_common_init("virtio-serial", VIRTIO_ID_CONSOLE,
- sizeof(struct virtio_console_config),
- sizeof(VirtIOSerial));
-
- vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
-
- /* Spawn a new virtio-serial bus on which the ports will ride as devices */
- qbus_create_inplace(&vser->bus.qbus, TYPE_VIRTIO_SERIAL_BUS, dev, NULL);
- vser->bus.qbus.allow_hotplug = 1;
- vser->bus.vser = vser;
- QTAILQ_INIT(&vser->ports);
-
- vser->bus.max_nr_ports = conf->max_virtserial_ports;
- vser->ivqs = g_malloc(conf->max_virtserial_ports * sizeof(VirtQueue *));
- vser->ovqs = g_malloc(conf->max_virtserial_ports * sizeof(VirtQueue *));
-
- /* Add a queue for host to guest transfers for port 0 (backward compat) */
- vser->ivqs[0] = virtio_add_queue(vdev, 128, handle_input);
- /* Add a queue for guest to host transfers for port 0 (backward compat) */
- vser->ovqs[0] = virtio_add_queue(vdev, 128, handle_output);
-
- /* TODO: host to guest notifications can get dropped
- * if the queue fills up. Implement queueing in host,
- * this might also make it possible to reduce the control
- * queue size: as guest preposts buffers there,
- * this will save 4Kbyte of guest memory per entry. */
-
- /* control queue: host to guest */
- vser->c_ivq = virtio_add_queue(vdev, 32, control_in);
- /* control queue: guest to host */
- vser->c_ovq = virtio_add_queue(vdev, 32, control_out);
-
- for (i = 1; i < vser->bus.max_nr_ports; i++) {
- /* Add a per-port queue for host to guest transfers */
- vser->ivqs[i] = virtio_add_queue(vdev, 128, handle_input);
- /* Add a per-per queue for guest to host transfers */
- vser->ovqs[i] = virtio_add_queue(vdev, 128, handle_output);
- }
-
- vser->config.max_nr_ports = tswap32(conf->max_virtserial_ports);
- vser->ports_map = g_malloc0(((conf->max_virtserial_ports + 31) / 32)
- * sizeof(vser->ports_map[0]));
- /*
- * Reserve location 0 for a console port for backward compat
- * (old kernel, new qemu)
- */
- mark_port_added(vser, 0);
-
- vser->vdev.get_features = get_features;
- vser->vdev.get_config = get_config;
- vser->vdev.set_config = set_config;
- vser->vdev.set_status = set_status;
- vser->vdev.reset = vser_reset;
-
- vser->qdev = dev;
-
- vser->post_load = NULL;
-
- /*
- * Register for the savevm section with the virtio-console name
- * to preserve backward compat
- */
- register_savevm(dev, "virtio-console", -1, 3, virtio_serial_save,
- virtio_serial_load, vser);
-
- return vdev;
-}
-
-void virtio_serial_exit(VirtIODevice *vdev)
-{
- VirtIOSerial *vser = DO_UPCAST(VirtIOSerial, vdev, vdev);
-
- unregister_savevm(vser->qdev, "virtio-console", vser);
-
- g_free(vser->ivqs);
- g_free(vser->ovqs);
- g_free(vser->ports_map);
- if (vser->post_load) {
- g_free(vser->post_load->connected);
- qemu_del_timer(vser->post_load->timer);
- qemu_free_timer(vser->post_load->timer);
- g_free(vser->post_load);
- }
- virtio_cleanup(vdev);
-}
-
-static void virtio_serial_port_class_init(ObjectClass *klass, void *data)
-{
- DeviceClass *k = DEVICE_CLASS(klass);
- k->init = virtser_port_qdev_init;
- k->bus_type = TYPE_VIRTIO_SERIAL_BUS;
- k->exit = virtser_port_qdev_exit;
- k->unplug = qdev_simple_unplug_cb;
- k->props = virtser_props;
-}
-
-static const TypeInfo virtio_serial_port_type_info = {
- .name = TYPE_VIRTIO_SERIAL_PORT,
- .parent = TYPE_DEVICE,
- .instance_size = sizeof(VirtIOSerialPort),
- .abstract = true,
- .class_size = sizeof(VirtIOSerialPortClass),
- .class_init = virtio_serial_port_class_init,
-};
-
-static void virtio_serial_register_types(void)
-{
- type_register_static(&virtser_bus_info);
- type_register_static(&virtio_serial_port_type_info);
-}
-
-type_init(virtio_serial_register_types)
+++ /dev/null
-/*
- * Virtio Support
- *
- * Copyright IBM, Corp. 2007
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include <inttypes.h>
-
-#include "trace.h"
-#include "qemu/error-report.h"
-#include "hw/virtio/virtio.h"
-#include "qemu/atomic.h"
-#include "hw/virtio/virtio-bus.h"
-
-/* The alignment to use between consumer and producer parts of vring.
- * x86 pagesize again. */
-#define VIRTIO_PCI_VRING_ALIGN 4096
-
-typedef struct VRingDesc
-{
- uint64_t addr;
- uint32_t len;
- uint16_t flags;
- uint16_t next;
-} VRingDesc;
-
-typedef struct VRingAvail
-{
- uint16_t flags;
- uint16_t idx;
- uint16_t ring[0];
-} VRingAvail;
-
-typedef struct VRingUsedElem
-{
- uint32_t id;
- uint32_t len;
-} VRingUsedElem;
-
-typedef struct VRingUsed
-{
- uint16_t flags;
- uint16_t idx;
- VRingUsedElem ring[0];
-} VRingUsed;
-
-typedef struct VRing
-{
- unsigned int num;
- hwaddr desc;
- hwaddr avail;
- hwaddr used;
-} VRing;
-
-struct VirtQueue
-{
- VRing vring;
- hwaddr pa;
- uint16_t last_avail_idx;
- /* Last used index value we have signalled on */
- uint16_t signalled_used;
-
- /* Last used index value we have signalled on */
- bool signalled_used_valid;
-
- /* Notification enabled? */
- bool notification;
-
- uint16_t queue_index;
-
- int inuse;
-
- uint16_t vector;
- void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
- VirtIODevice *vdev;
- EventNotifier guest_notifier;
- EventNotifier host_notifier;
-};
-
-/* virt queue functions */
-static void virtqueue_init(VirtQueue *vq)
-{
- hwaddr pa = vq->pa;
-
- vq->vring.desc = pa;
- vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
- vq->vring.used = vring_align(vq->vring.avail +
- offsetof(VRingAvail, ring[vq->vring.num]),
- VIRTIO_PCI_VRING_ALIGN);
-}
-
-static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i)
-{
- hwaddr pa;
- pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
- return ldq_phys(pa);
-}
-
-static inline uint32_t vring_desc_len(hwaddr desc_pa, int i)
-{
- hwaddr pa;
- pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
- return ldl_phys(pa);
-}
-
-static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i)
-{
- hwaddr pa;
- pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
- return lduw_phys(pa);
-}
-
-static inline uint16_t vring_desc_next(hwaddr desc_pa, int i)
-{
- hwaddr pa;
- pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
- return lduw_phys(pa);
-}
-
-static inline uint16_t vring_avail_flags(VirtQueue *vq)
-{
- hwaddr pa;
- pa = vq->vring.avail + offsetof(VRingAvail, flags);
- return lduw_phys(pa);
-}
-
-static inline uint16_t vring_avail_idx(VirtQueue *vq)
-{
- hwaddr pa;
- pa = vq->vring.avail + offsetof(VRingAvail, idx);
- return lduw_phys(pa);
-}
-
-static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
-{
- hwaddr pa;
- pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
- return lduw_phys(pa);
-}
-
-static inline uint16_t vring_used_event(VirtQueue *vq)
-{
- return vring_avail_ring(vq, vq->vring.num);
-}
-
-static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
-{
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
- stl_phys(pa, val);
-}
-
-static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
-{
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
- stl_phys(pa, val);
-}
-
-static uint16_t vring_used_idx(VirtQueue *vq)
-{
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, idx);
- return lduw_phys(pa);
-}
-
-static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
-{
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, idx);
- stw_phys(pa, val);
-}
-
-static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
-{
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, flags);
- stw_phys(pa, lduw_phys(pa) | mask);
-}
-
-static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
-{
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, flags);
- stw_phys(pa, lduw_phys(pa) & ~mask);
-}
-
-static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
-{
- hwaddr pa;
- if (!vq->notification) {
- return;
- }
- pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
- stw_phys(pa, val);
-}
-
-void virtio_queue_set_notification(VirtQueue *vq, int enable)
-{
- vq->notification = enable;
- if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
- vring_avail_event(vq, vring_avail_idx(vq));
- } else if (enable) {
- vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
- } else {
- vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
- }
- if (enable) {
- /* Expose avail event/used flags before caller checks the avail idx. */
- smp_mb();
- }
-}
-
-int virtio_queue_ready(VirtQueue *vq)
-{
- return vq->vring.avail != 0;
-}
-
-int virtio_queue_empty(VirtQueue *vq)
-{
- return vring_avail_idx(vq) == vq->last_avail_idx;
-}
-
-void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
- unsigned int len, unsigned int idx)
-{
- unsigned int offset;
- int i;
-
- trace_virtqueue_fill(vq, elem, len, idx);
-
- offset = 0;
- for (i = 0; i < elem->in_num; i++) {
- size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
-
- cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
- elem->in_sg[i].iov_len,
- 1, size);
-
- offset += size;
- }
-
- for (i = 0; i < elem->out_num; i++)
- cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
- elem->out_sg[i].iov_len,
- 0, elem->out_sg[i].iov_len);
-
- idx = (idx + vring_used_idx(vq)) % vq->vring.num;
-
- /* Get a pointer to the next entry in the used ring. */
- vring_used_ring_id(vq, idx, elem->index);
- vring_used_ring_len(vq, idx, len);
-}
-
-void virtqueue_flush(VirtQueue *vq, unsigned int count)
-{
- uint16_t old, new;
- /* Make sure buffer is written before we update index. */
- smp_wmb();
- trace_virtqueue_flush(vq, count);
- old = vring_used_idx(vq);
- new = old + count;
- vring_used_idx_set(vq, new);
- vq->inuse -= count;
- if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
- vq->signalled_used_valid = false;
-}
-
-void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
- unsigned int len)
-{
- virtqueue_fill(vq, elem, len, 0);
- virtqueue_flush(vq, 1);
-}
-
-static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
-{
- uint16_t num_heads = vring_avail_idx(vq) - idx;
-
- /* Check it isn't doing very strange things with descriptor numbers. */
- if (num_heads > vq->vring.num) {
- error_report("Guest moved used index from %u to %u",
- idx, vring_avail_idx(vq));
- exit(1);
- }
- /* On success, callers read a descriptor at vq->last_avail_idx.
- * Make sure descriptor read does not bypass avail index read. */
- if (num_heads) {
- smp_rmb();
- }
-
- return num_heads;
-}
-
-static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
-{
- unsigned int head;
-
- /* Grab the next descriptor number they're advertising, and increment
- * the index we've seen. */
- head = vring_avail_ring(vq, idx % vq->vring.num);
-
- /* If their number is silly, that's a fatal mistake. */
- if (head >= vq->vring.num) {
- error_report("Guest says index %u is available", head);
- exit(1);
- }
-
- return head;
-}
-
-static unsigned virtqueue_next_desc(hwaddr desc_pa,
- unsigned int i, unsigned int max)
-{
- unsigned int next;
-
- /* If this descriptor says it doesn't chain, we're done. */
- if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
- return max;
-
- /* Check they're not leading us off end of descriptors. */
- next = vring_desc_next(desc_pa, i);
- /* Make sure compiler knows to grab that: we don't want it changing! */
- smp_wmb();
-
- if (next >= max) {
- error_report("Desc next is %u", next);
- exit(1);
- }
-
- return next;
-}
-
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
- unsigned int *out_bytes,
- unsigned max_in_bytes, unsigned max_out_bytes)
-{
- unsigned int idx;
- unsigned int total_bufs, in_total, out_total;
-
- idx = vq->last_avail_idx;
-
- total_bufs = in_total = out_total = 0;
- while (virtqueue_num_heads(vq, idx)) {
- unsigned int max, num_bufs, indirect = 0;
- hwaddr desc_pa;
- int i;
-
- max = vq->vring.num;
- num_bufs = total_bufs;
- i = virtqueue_get_head(vq, idx++);
- desc_pa = vq->vring.desc;
-
- if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
- if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
- error_report("Invalid size for indirect buffer table");
- exit(1);
- }
-
- /* If we've got too many, that implies a descriptor loop. */
- if (num_bufs >= max) {
- error_report("Looped descriptor");
- exit(1);
- }
-
- /* loop over the indirect descriptor table */
- indirect = 1;
- max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
- num_bufs = i = 0;
- desc_pa = vring_desc_addr(desc_pa, i);
- }
-
- do {
- /* If we've got too many, that implies a descriptor loop. */
- if (++num_bufs > max) {
- error_report("Looped descriptor");
- exit(1);
- }
-
- if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
- in_total += vring_desc_len(desc_pa, i);
- } else {
- out_total += vring_desc_len(desc_pa, i);
- }
- if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
- goto done;
- }
- } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
-
- if (!indirect)
- total_bufs = num_bufs;
- else
- total_bufs++;
- }
-done:
- if (in_bytes) {
- *in_bytes = in_total;
- }
- if (out_bytes) {
- *out_bytes = out_total;
- }
-}
-
-int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
- unsigned int out_bytes)
-{
- unsigned int in_total, out_total;
-
- virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
- return in_bytes <= in_total && out_bytes <= out_total;
-}
-
-void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
- size_t num_sg, int is_write)
-{
- unsigned int i;
- hwaddr len;
-
- for (i = 0; i < num_sg; i++) {
- len = sg[i].iov_len;
- sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
- if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
- error_report("virtio: trying to map MMIO memory");
- exit(1);
- }
- }
-}
-
-int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
-{
- unsigned int i, head, max;
- hwaddr desc_pa = vq->vring.desc;
-
- if (!virtqueue_num_heads(vq, vq->last_avail_idx))
- return 0;
-
- /* When we start there are none of either input nor output. */
- elem->out_num = elem->in_num = 0;
-
- max = vq->vring.num;
-
- i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
- if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
- vring_avail_event(vq, vring_avail_idx(vq));
- }
-
- if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
- if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
- error_report("Invalid size for indirect buffer table");
- exit(1);
- }
-
- /* loop over the indirect descriptor table */
- max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
- desc_pa = vring_desc_addr(desc_pa, i);
- i = 0;
- }
-
- /* Collect all the descriptors */
- do {
- struct iovec *sg;
-
- if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
- if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
- error_report("Too many write descriptors in indirect table");
- exit(1);
- }
- elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
- sg = &elem->in_sg[elem->in_num++];
- } else {
- if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
- error_report("Too many read descriptors in indirect table");
- exit(1);
- }
- elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
- sg = &elem->out_sg[elem->out_num++];
- }
-
- sg->iov_len = vring_desc_len(desc_pa, i);
-
- /* If we've got too many, that implies a descriptor loop. */
- if ((elem->in_num + elem->out_num) > max) {
- error_report("Looped descriptor");
- exit(1);
- }
- } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
-
- /* Now map what we have collected */
- virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
- virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
-
- elem->index = head;
-
- vq->inuse++;
-
- trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
- return elem->in_num + elem->out_num;
-}
-
-/* virtio device */
-static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
-{
- if (vdev->binding->notify) {
- vdev->binding->notify(vdev->binding_opaque, vector);
- }
-}
-
-void virtio_update_irq(VirtIODevice *vdev)
-{
- virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
-}
-
-void virtio_set_status(VirtIODevice *vdev, uint8_t val)
-{
- trace_virtio_set_status(vdev, val);
-
- if (vdev->set_status) {
- vdev->set_status(vdev, val);
- }
- vdev->status = val;
-}
-
-void virtio_reset(void *opaque)
-{
- VirtIODevice *vdev = opaque;
- int i;
-
- virtio_set_status(vdev, 0);
-
- if (vdev->reset)
- vdev->reset(vdev);
-
- vdev->guest_features = 0;
- vdev->queue_sel = 0;
- vdev->status = 0;
- vdev->isr = 0;
- vdev->config_vector = VIRTIO_NO_VECTOR;
- virtio_notify_vector(vdev, vdev->config_vector);
-
- for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
- vdev->vq[i].vring.desc = 0;
- vdev->vq[i].vring.avail = 0;
- vdev->vq[i].vring.used = 0;
- vdev->vq[i].last_avail_idx = 0;
- vdev->vq[i].pa = 0;
- vdev->vq[i].vector = VIRTIO_NO_VECTOR;
- vdev->vq[i].signalled_used = 0;
- vdev->vq[i].signalled_used_valid = false;
- vdev->vq[i].notification = true;
- }
-}
-
-uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
-{
- uint8_t val;
-
- vdev->get_config(vdev, vdev->config);
-
- if (addr > (vdev->config_len - sizeof(val)))
- return (uint32_t)-1;
-
- val = ldub_p(vdev->config + addr);
- return val;
-}
-
-uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
-{
- uint16_t val;
-
- vdev->get_config(vdev, vdev->config);
-
- if (addr > (vdev->config_len - sizeof(val)))
- return (uint32_t)-1;
-
- val = lduw_p(vdev->config + addr);
- return val;
-}
-
-uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
-{
- uint32_t val;
-
- vdev->get_config(vdev, vdev->config);
-
- if (addr > (vdev->config_len - sizeof(val)))
- return (uint32_t)-1;
-
- val = ldl_p(vdev->config + addr);
- return val;
-}
-
-void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
-{
- uint8_t val = data;
-
- if (addr > (vdev->config_len - sizeof(val)))
- return;
-
- stb_p(vdev->config + addr, val);
-
- if (vdev->set_config)
- vdev->set_config(vdev, vdev->config);
-}
-
-void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
-{
- uint16_t val = data;
-
- if (addr > (vdev->config_len - sizeof(val)))
- return;
-
- stw_p(vdev->config + addr, val);
-
- if (vdev->set_config)
- vdev->set_config(vdev, vdev->config);
-}
-
-void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
-{
- uint32_t val = data;
-
- if (addr > (vdev->config_len - sizeof(val)))
- return;
-
- stl_p(vdev->config + addr, val);
-
- if (vdev->set_config)
- vdev->set_config(vdev, vdev->config);
-}
-
-void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
-{
- vdev->vq[n].pa = addr;
- virtqueue_init(&vdev->vq[n]);
-}
-
-hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
-{
- return vdev->vq[n].pa;
-}
-
-int virtio_queue_get_num(VirtIODevice *vdev, int n)
-{
- return vdev->vq[n].vring.num;
-}
-
-int virtio_queue_get_id(VirtQueue *vq)
-{
- VirtIODevice *vdev = vq->vdev;
- assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
- return vq - &vdev->vq[0];
-}
-
-void virtio_queue_notify_vq(VirtQueue *vq)
-{
- if (vq->vring.desc) {
- VirtIODevice *vdev = vq->vdev;
- trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
- vq->handle_output(vdev, vq);
- }
-}
-
-void virtio_queue_notify(VirtIODevice *vdev, int n)
-{
- virtio_queue_notify_vq(&vdev->vq[n]);
-}
-
-uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
-{
- return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
- VIRTIO_NO_VECTOR;
-}
-
-void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
-{
- if (n < VIRTIO_PCI_QUEUE_MAX)
- vdev->vq[n].vector = vector;
-}
-
-VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
- void (*handle_output)(VirtIODevice *, VirtQueue *))
-{
- int i;
-
- for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
- if (vdev->vq[i].vring.num == 0)
- break;
- }
-
- if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
- abort();
-
- vdev->vq[i].vring.num = queue_size;
- vdev->vq[i].handle_output = handle_output;
-
- return &vdev->vq[i];
-}
-
-void virtio_del_queue(VirtIODevice *vdev, int n)
-{
- if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
- abort();
- }
-
- vdev->vq[n].vring.num = 0;
-}
-
-void virtio_irq(VirtQueue *vq)
-{
- trace_virtio_irq(vq);
- vq->vdev->isr |= 0x01;
- virtio_notify_vector(vq->vdev, vq->vector);
-}
-
-/* Assuming a given event_idx value from the other size, if
- * we have just incremented index from old to new_idx,
- * should we trigger an event? */
-static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
-{
- /* Note: Xen has similar logic for notification hold-off
- * in include/xen/interface/io/ring.h with req_event and req_prod
- * corresponding to event_idx + 1 and new respectively.
- * Note also that req_event and req_prod in Xen start at 1,
- * event indexes in virtio start at 0. */
- return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
-}
-
-static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
-{
- uint16_t old, new;
- bool v;
- /* We need to expose used array entries before checking used event. */
- smp_mb();
- /* Always notify when queue is empty (when feature acknowledge) */
- if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
- !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
- return true;
- }
-
- if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
- return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
- }
-
- v = vq->signalled_used_valid;
- vq->signalled_used_valid = true;
- old = vq->signalled_used;
- new = vq->signalled_used = vring_used_idx(vq);
- return !v || vring_need_event(vring_used_event(vq), new, old);
-}
-
-void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
-{
- if (!vring_notify(vdev, vq)) {
- return;
- }
-
- trace_virtio_notify(vdev, vq);
- vdev->isr |= 0x01;
- virtio_notify_vector(vdev, vq->vector);
-}
-
-void virtio_notify_config(VirtIODevice *vdev)
-{
- if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
- return;
-
- vdev->isr |= 0x03;
- virtio_notify_vector(vdev, vdev->config_vector);
-}
-
-void virtio_save(VirtIODevice *vdev, QEMUFile *f)
-{
- int i;
-
- if (vdev->binding->save_config)
- vdev->binding->save_config(vdev->binding_opaque, f);
-
- qemu_put_8s(f, &vdev->status);
- qemu_put_8s(f, &vdev->isr);
- qemu_put_be16s(f, &vdev->queue_sel);
- qemu_put_be32s(f, &vdev->guest_features);
- qemu_put_be32(f, vdev->config_len);
- qemu_put_buffer(f, vdev->config, vdev->config_len);
-
- for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
- if (vdev->vq[i].vring.num == 0)
- break;
- }
-
- qemu_put_be32(f, i);
-
- for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
- if (vdev->vq[i].vring.num == 0)
- break;
-
- qemu_put_be32(f, vdev->vq[i].vring.num);
- qemu_put_be64(f, vdev->vq[i].pa);
- qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
- if (vdev->binding->save_queue)
- vdev->binding->save_queue(vdev->binding_opaque, i, f);
- }
-}
-
-int virtio_set_features(VirtIODevice *vdev, uint32_t val)
-{
- uint32_t supported_features =
- vdev->binding->get_features(vdev->binding_opaque);
- bool bad = (val & ~supported_features) != 0;
-
- val &= supported_features;
- if (vdev->set_features) {
- vdev->set_features(vdev, val);
- }
- vdev->guest_features = val;
- return bad ? -1 : 0;
-}
-
-int virtio_load(VirtIODevice *vdev, QEMUFile *f)
-{
- int num, i, ret;
- uint32_t features;
- uint32_t supported_features;
-
- if (vdev->binding->load_config) {
- ret = vdev->binding->load_config(vdev->binding_opaque, f);
- if (ret)
- return ret;
- }
-
- qemu_get_8s(f, &vdev->status);
- qemu_get_8s(f, &vdev->isr);
- qemu_get_be16s(f, &vdev->queue_sel);
- qemu_get_be32s(f, &features);
-
- if (virtio_set_features(vdev, features) < 0) {
- supported_features = vdev->binding->get_features(vdev->binding_opaque);
- error_report("Features 0x%x unsupported. Allowed features: 0x%x",
- features, supported_features);
- return -1;
- }
- vdev->config_len = qemu_get_be32(f);
- qemu_get_buffer(f, vdev->config, vdev->config_len);
-
- num = qemu_get_be32(f);
-
- for (i = 0; i < num; i++) {
- vdev->vq[i].vring.num = qemu_get_be32(f);
- vdev->vq[i].pa = qemu_get_be64(f);
- qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
- vdev->vq[i].signalled_used_valid = false;
- vdev->vq[i].notification = true;
-
- if (vdev->vq[i].pa) {
- uint16_t nheads;
- virtqueue_init(&vdev->vq[i]);
- nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
- /* Check it isn't doing very strange things with descriptor numbers. */
- if (nheads > vdev->vq[i].vring.num) {
- error_report("VQ %d size 0x%x Guest index 0x%x "
- "inconsistent with Host index 0x%x: delta 0x%x",
- i, vdev->vq[i].vring.num,
- vring_avail_idx(&vdev->vq[i]),
- vdev->vq[i].last_avail_idx, nheads);
- return -1;
- }
- } else if (vdev->vq[i].last_avail_idx) {
- error_report("VQ %d address 0x0 "
- "inconsistent with Host index 0x%x",
- i, vdev->vq[i].last_avail_idx);
- return -1;
- }
- if (vdev->binding->load_queue) {
- ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
- if (ret)
- return ret;
- }
- }
-
- virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
- return 0;
-}
-
-void virtio_common_cleanup(VirtIODevice *vdev)
-{
- qemu_del_vm_change_state_handler(vdev->vmstate);
- g_free(vdev->config);
- g_free(vdev->vq);
-}
-
-void virtio_cleanup(VirtIODevice *vdev)
-{
- virtio_common_cleanup(vdev);
- g_free(vdev);
-}
-
-static void virtio_vmstate_change(void *opaque, int running, RunState state)
-{
- VirtIODevice *vdev = opaque;
- bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
- vdev->vm_running = running;
-
- if (backend_run) {
- virtio_set_status(vdev, vdev->status);
- }
-
- if (vdev->binding->vmstate_change) {
- vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
- }
-
- if (!backend_run) {
- virtio_set_status(vdev, vdev->status);
- }
-}
-
-void virtio_init(VirtIODevice *vdev, const char *name,
- uint16_t device_id, size_t config_size)
-{
- int i;
- vdev->device_id = device_id;
- vdev->status = 0;
- vdev->isr = 0;
- vdev->queue_sel = 0;
- vdev->config_vector = VIRTIO_NO_VECTOR;
- vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
- vdev->vm_running = runstate_is_running();
- for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
- vdev->vq[i].vector = VIRTIO_NO_VECTOR;
- vdev->vq[i].vdev = vdev;
- vdev->vq[i].queue_index = i;
- }
-
- vdev->name = name;
- vdev->config_len = config_size;
- if (vdev->config_len) {
- vdev->config = g_malloc0(config_size);
- } else {
- vdev->config = NULL;
- }
- vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
- vdev);
-}
-
-VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
- size_t config_size, size_t struct_size)
-{
- VirtIODevice *vdev;
- vdev = g_malloc0(struct_size);
- virtio_init(vdev, name, device_id, config_size);
- return vdev;
-}
-
-void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
- DeviceState *opaque)
-{
- vdev->binding = binding;
- vdev->binding_opaque = opaque;
-}
-
-hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
-{
- return vdev->vq[n].vring.desc;
-}
-
-hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
-{
- return vdev->vq[n].vring.avail;
-}
-
-hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
-{
- return vdev->vq[n].vring.used;
-}
-
-hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
-{
- return vdev->vq[n].vring.desc;
-}
-
-hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
-{
- return sizeof(VRingDesc) * vdev->vq[n].vring.num;
-}
-
-hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
-{
- return offsetof(VRingAvail, ring) +
- sizeof(uint64_t) * vdev->vq[n].vring.num;
-}
-
-hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
-{
- return offsetof(VRingUsed, ring) +
- sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
-}
-
-hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
-{
- return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
- virtio_queue_get_used_size(vdev, n);
-}
-
-uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
-{
- return vdev->vq[n].last_avail_idx;
-}
-
-void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
-{
- vdev->vq[n].last_avail_idx = idx;
-}
-
-VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
-{
- return vdev->vq + n;
-}
-
-uint16_t virtio_get_queue_index(VirtQueue *vq)
-{
- return vq->queue_index;
-}
-
-static void virtio_queue_guest_notifier_read(EventNotifier *n)
-{
- VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
- if (event_notifier_test_and_clear(n)) {
- virtio_irq(vq);
- }
-}
-
-void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
- bool with_irqfd)
-{
- if (assign && !with_irqfd) {
- event_notifier_set_handler(&vq->guest_notifier,
- virtio_queue_guest_notifier_read);
- } else {
- event_notifier_set_handler(&vq->guest_notifier, NULL);
- }
- if (!assign) {
- /* Test and clear notifier before closing it,
- * in case poll callback didn't have time to run. */
- virtio_queue_guest_notifier_read(&vq->guest_notifier);
- }
-}
-
-EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
-{
- return &vq->guest_notifier;
-}
-
-static void virtio_queue_host_notifier_read(EventNotifier *n)
-{
- VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
- if (event_notifier_test_and_clear(n)) {
- virtio_queue_notify_vq(vq);
- }
-}
-
-void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
- bool set_handler)
-{
- if (assign && set_handler) {
- event_notifier_set_handler(&vq->host_notifier,
- virtio_queue_host_notifier_read);
- } else {
- event_notifier_set_handler(&vq->host_notifier, NULL);
- }
- if (!assign) {
- /* Test and clear notifier before after disabling event,
- * in case poll callback didn't have time to run. */
- virtio_queue_host_notifier_read(&vq->host_notifier);
- }
-}
-
-EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
-{
- return &vq->host_notifier;
-}
-
-static int virtio_device_init(DeviceState *qdev)
-{
- VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
- VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(qdev);
- assert(k->init != NULL);
- if (k->init(vdev) < 0) {
- return -1;
- }
- virtio_bus_plug_device(vdev);
- return 0;
-}
-
-static void virtio_device_class_init(ObjectClass *klass, void *data)
-{
- /* Set the default value here. */
- DeviceClass *dc = DEVICE_CLASS(klass);
- dc->init = virtio_device_init;
- dc->bus_type = TYPE_VIRTIO_BUS;
-}
-
-static const TypeInfo virtio_device_info = {
- .name = TYPE_VIRTIO_DEVICE,
- .parent = TYPE_DEVICE,
- .instance_size = sizeof(VirtIODevice),
- .class_init = virtio_device_class_init,
- .abstract = true,
- .class_size = sizeof(VirtioDeviceClass),
-};
-
-static void virtio_register_types(void)
-{
- type_register_static(&virtio_device_info);
-}
-
-type_init(virtio_register_types)
common-obj-$(CONFIG_VIRTIO) += virtio-rng.o
common-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
common-obj-$(CONFIG_VIRTIO) += virtio-bus.o
+common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/
+obj-$(CONFIG_VIRTIO) += virtio.o virtio-balloon.o
+obj-$(CONFIG_VHOST_NET) += vhost.o
--- /dev/null
+common-obj-y += hostmem.o vring.o
--- /dev/null
+/*
+ * Thread-safe guest to host memory mapping
+ *
+ * Copyright 2012 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "exec/address-spaces.h"
+#include "hw/virtio/dataplane/hostmem.h"
+
+static int hostmem_lookup_cmp(const void *phys_, const void *region_)
+{
+ hwaddr phys = *(const hwaddr *)phys_;
+ const HostMemRegion *region = region_;
+
+ if (phys < region->guest_addr) {
+ return -1;
+ } else if (phys >= region->guest_addr + region->size) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * Map guest physical address to host pointer
+ */
+void *hostmem_lookup(HostMem *hostmem, hwaddr phys, hwaddr len, bool is_write)
+{
+ HostMemRegion *region;
+ void *host_addr = NULL;
+ hwaddr offset_within_region;
+
+ qemu_mutex_lock(&hostmem->current_regions_lock);
+ region = bsearch(&phys, hostmem->current_regions,
+ hostmem->num_current_regions,
+ sizeof(hostmem->current_regions[0]),
+ hostmem_lookup_cmp);
+ if (!region) {
+ goto out;
+ }
+ if (is_write && region->readonly) {
+ goto out;
+ }
+ offset_within_region = phys - region->guest_addr;
+ if (len <= region->size - offset_within_region) {
+ host_addr = region->host_addr + offset_within_region;
+ }
+out:
+ qemu_mutex_unlock(&hostmem->current_regions_lock);
+
+ return host_addr;
+}
+
+/**
+ * Install new regions list
+ */
+static void hostmem_listener_commit(MemoryListener *listener)
+{
+ HostMem *hostmem = container_of(listener, HostMem, listener);
+
+ qemu_mutex_lock(&hostmem->current_regions_lock);
+ g_free(hostmem->current_regions);
+ hostmem->current_regions = hostmem->new_regions;
+ hostmem->num_current_regions = hostmem->num_new_regions;
+ qemu_mutex_unlock(&hostmem->current_regions_lock);
+
+ /* Reset new regions list */
+ hostmem->new_regions = NULL;
+ hostmem->num_new_regions = 0;
+}
+
+/**
+ * Add a MemoryRegionSection to the new regions list
+ */
+static void hostmem_append_new_region(HostMem *hostmem,
+ MemoryRegionSection *section)
+{
+ void *ram_ptr = memory_region_get_ram_ptr(section->mr);
+ size_t num = hostmem->num_new_regions;
+ size_t new_size = (num + 1) * sizeof(hostmem->new_regions[0]);
+
+ hostmem->new_regions = g_realloc(hostmem->new_regions, new_size);
+ hostmem->new_regions[num] = (HostMemRegion){
+ .host_addr = ram_ptr + section->offset_within_region,
+ .guest_addr = section->offset_within_address_space,
+ .size = section->size,
+ .readonly = section->readonly,
+ };
+ hostmem->num_new_regions++;
+}
+
+static void hostmem_listener_append_region(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ HostMem *hostmem = container_of(listener, HostMem, listener);
+
+ /* Ignore non-RAM regions, we may not be able to map them */
+ if (!memory_region_is_ram(section->mr)) {
+ return;
+ }
+
+ /* Ignore regions with dirty logging, we cannot mark them dirty */
+ if (memory_region_is_logging(section->mr)) {
+ return;
+ }
+
+ hostmem_append_new_region(hostmem, section);
+}
+
+/* We don't implement most MemoryListener callbacks, use these nop stubs */
+static void hostmem_listener_dummy(MemoryListener *listener)
+{
+}
+
+static void hostmem_listener_section_dummy(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+}
+
+static void hostmem_listener_eventfd_dummy(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool match_data, uint64_t data,
+ EventNotifier *e)
+{
+}
+
+static void hostmem_listener_coalesced_mmio_dummy(MemoryListener *listener,
+ MemoryRegionSection *section,
+ hwaddr addr, hwaddr len)
+{
+}
+
+void hostmem_init(HostMem *hostmem)
+{
+ memset(hostmem, 0, sizeof(*hostmem));
+
+ qemu_mutex_init(&hostmem->current_regions_lock);
+
+ hostmem->listener = (MemoryListener){
+ .begin = hostmem_listener_dummy,
+ .commit = hostmem_listener_commit,
+ .region_add = hostmem_listener_append_region,
+ .region_del = hostmem_listener_section_dummy,
+ .region_nop = hostmem_listener_append_region,
+ .log_start = hostmem_listener_section_dummy,
+ .log_stop = hostmem_listener_section_dummy,
+ .log_sync = hostmem_listener_section_dummy,
+ .log_global_start = hostmem_listener_dummy,
+ .log_global_stop = hostmem_listener_dummy,
+ .eventfd_add = hostmem_listener_eventfd_dummy,
+ .eventfd_del = hostmem_listener_eventfd_dummy,
+ .coalesced_mmio_add = hostmem_listener_coalesced_mmio_dummy,
+ .coalesced_mmio_del = hostmem_listener_coalesced_mmio_dummy,
+ .priority = 10,
+ };
+
+ memory_listener_register(&hostmem->listener, &address_space_memory);
+ if (hostmem->num_new_regions > 0) {
+ hostmem_listener_commit(&hostmem->listener);
+ }
+}
+
+void hostmem_finalize(HostMem *hostmem)
+{
+ memory_listener_unregister(&hostmem->listener);
+ g_free(hostmem->new_regions);
+ g_free(hostmem->current_regions);
+ qemu_mutex_destroy(&hostmem->current_regions_lock);
+}
--- /dev/null
+/* Copyright 2012 Red Hat, Inc.
+ * Copyright IBM, Corp. 2012
+ *
+ * Based on Linux 2.6.39 vhost code:
+ * Copyright (C) 2009 Red Hat, Inc.
+ * Copyright (C) 2006 Rusty Russell IBM Corporation
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * Inspiration, some code, and most witty comments come from
+ * Documentation/virtual/lguest/lguest.c, by Rusty Russell
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "trace.h"
+#include "hw/virtio/dataplane/vring.h"
+#include "qemu/error-report.h"
+
+/* Map the guest's vring to host memory */
+bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
+{
+ hwaddr vring_addr = virtio_queue_get_ring_addr(vdev, n);
+ hwaddr vring_size = virtio_queue_get_ring_size(vdev, n);
+ void *vring_ptr;
+
+ vring->broken = false;
+
+ hostmem_init(&vring->hostmem);
+ vring_ptr = hostmem_lookup(&vring->hostmem, vring_addr, vring_size, true);
+ if (!vring_ptr) {
+ error_report("Failed to map vring "
+ "addr %#" HWADDR_PRIx " size %" HWADDR_PRIu,
+ vring_addr, vring_size);
+ vring->broken = true;
+ return false;
+ }
+
+ vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096);
+
+ vring->last_avail_idx = 0;
+ vring->last_used_idx = 0;
+ vring->signalled_used = 0;
+ vring->signalled_used_valid = false;
+
+ trace_vring_setup(virtio_queue_get_ring_addr(vdev, n),
+ vring->vr.desc, vring->vr.avail, vring->vr.used);
+ return true;
+}
+
+void vring_teardown(Vring *vring)
+{
+ hostmem_finalize(&vring->hostmem);
+}
+
+/* Disable guest->host notifies */
+void vring_disable_notification(VirtIODevice *vdev, Vring *vring)
+{
+ if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
+ vring->vr.used->flags |= VRING_USED_F_NO_NOTIFY;
+ }
+}
+
+/* Enable guest->host notifies
+ *
+ * Return true if the vring is empty, false if there are more requests.
+ */
+bool vring_enable_notification(VirtIODevice *vdev, Vring *vring)
+{
+ if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
+ vring_avail_event(&vring->vr) = vring->vr.avail->idx;
+ } else {
+ vring->vr.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+ }
+ smp_mb(); /* ensure update is seen before reading avail_idx */
+ return !vring_more_avail(vring);
+}
+
+/* This is stolen from linux/drivers/vhost/vhost.c:vhost_notify() */
+bool vring_should_notify(VirtIODevice *vdev, Vring *vring)
+{
+ uint16_t old, new;
+ bool v;
+ /* Flush out used index updates. This is paired
+ * with the barrier that the Guest executes when enabling
+ * interrupts. */
+ smp_mb();
+
+ if ((vdev->guest_features & VIRTIO_F_NOTIFY_ON_EMPTY) &&
+ unlikely(vring->vr.avail->idx == vring->last_avail_idx)) {
+ return true;
+ }
+
+ if (!(vdev->guest_features & VIRTIO_RING_F_EVENT_IDX)) {
+ return !(vring->vr.avail->flags & VRING_AVAIL_F_NO_INTERRUPT);
+ }
+ old = vring->signalled_used;
+ v = vring->signalled_used_valid;
+ new = vring->signalled_used = vring->last_used_idx;
+ vring->signalled_used_valid = true;
+
+ if (unlikely(!v)) {
+ return true;
+ }
+
+ return vring_need_event(vring_used_event(&vring->vr), new, old);
+}
+
+/* This is stolen from linux/drivers/vhost/vhost.c. */
+static int get_indirect(Vring *vring,
+ struct iovec iov[], struct iovec *iov_end,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vring_desc *indirect)
+{
+ struct vring_desc desc;
+ unsigned int i = 0, count, found = 0;
+
+ /* Sanity check */
+ if (unlikely(indirect->len % sizeof(desc))) {
+ error_report("Invalid length in indirect descriptor: "
+ "len %#x not multiple of %#zx",
+ indirect->len, sizeof(desc));
+ vring->broken = true;
+ return -EFAULT;
+ }
+
+ count = indirect->len / sizeof(desc);
+ /* Buffers are chained via a 16 bit next field, so
+ * we can have at most 2^16 of these. */
+ if (unlikely(count > USHRT_MAX + 1)) {
+ error_report("Indirect buffer length too big: %d", indirect->len);
+ vring->broken = true;
+ return -EFAULT;
+ }
+
+ do {
+ struct vring_desc *desc_ptr;
+
+ /* Translate indirect descriptor */
+ desc_ptr = hostmem_lookup(&vring->hostmem,
+ indirect->addr + found * sizeof(desc),
+ sizeof(desc), false);
+ if (!desc_ptr) {
+ error_report("Failed to map indirect descriptor "
+ "addr %#" PRIx64 " len %zu",
+ (uint64_t)indirect->addr + found * sizeof(desc),
+ sizeof(desc));
+ vring->broken = true;
+ return -EFAULT;
+ }
+ desc = *desc_ptr;
+
+ /* Ensure descriptor has been loaded before accessing fields */
+ barrier(); /* read_barrier_depends(); */
+
+ if (unlikely(++found > count)) {
+ error_report("Loop detected: last one at %u "
+ "indirect size %u", i, count);
+ vring->broken = true;
+ return -EFAULT;
+ }
+
+ if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
+ error_report("Nested indirect descriptor");
+ vring->broken = true;
+ return -EFAULT;
+ }
+
+ /* Stop for now if there are not enough iovecs available. */
+ if (iov >= iov_end) {
+ return -ENOBUFS;
+ }
+
+ iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
+ desc.flags & VRING_DESC_F_WRITE);
+ if (!iov->iov_base) {
+ error_report("Failed to map indirect descriptor"
+ "addr %#" PRIx64 " len %u",
+ (uint64_t)desc.addr, desc.len);
+ vring->broken = true;
+ return -EFAULT;
+ }
+ iov->iov_len = desc.len;
+ iov++;
+
+ /* If this is an input descriptor, increment that count. */
+ if (desc.flags & VRING_DESC_F_WRITE) {
+ *in_num += 1;
+ } else {
+ /* If it's an output descriptor, they're all supposed
+ * to come before any input descriptors. */
+ if (unlikely(*in_num)) {
+ error_report("Indirect descriptor "
+ "has out after in: idx %u", i);
+ vring->broken = true;
+ return -EFAULT;
+ }
+ *out_num += 1;
+ }
+ i = desc.next;
+ } while (desc.flags & VRING_DESC_F_NEXT);
+ return 0;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access. Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found. A negative code is
+ * returned on error.
+ *
+ * Stolen from linux/drivers/vhost/vhost.c.
+ */
+int vring_pop(VirtIODevice *vdev, Vring *vring,
+ struct iovec iov[], struct iovec *iov_end,
+ unsigned int *out_num, unsigned int *in_num)
+{
+ struct vring_desc desc;
+ unsigned int i, head, found = 0, num = vring->vr.num;
+ uint16_t avail_idx, last_avail_idx;
+
+ /* If there was a fatal error then refuse operation */
+ if (vring->broken) {
+ return -EFAULT;
+ }
+
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ last_avail_idx = vring->last_avail_idx;
+ avail_idx = vring->vr.avail->idx;
+ barrier(); /* load indices now and not again later */
+
+ if (unlikely((uint16_t)(avail_idx - last_avail_idx) > num)) {
+ error_report("Guest moved used index from %u to %u",
+ last_avail_idx, avail_idx);
+ vring->broken = true;
+ return -EFAULT;
+ }
+
+ /* If there's nothing new since last we looked. */
+ if (avail_idx == last_avail_idx) {
+ return -EAGAIN;
+ }
+
+ /* Only get avail ring entries after they have been exposed by guest. */
+ smp_rmb();
+
+ /* Grab the next descriptor number they're advertising, and increment
+ * the index we've seen. */
+ head = vring->vr.avail->ring[last_avail_idx % num];
+
+ /* If their number is silly, that's an error. */
+ if (unlikely(head >= num)) {
+ error_report("Guest says index %u > %u is available", head, num);
+ vring->broken = true;
+ return -EFAULT;
+ }
+
+ if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
+ vring_avail_event(&vring->vr) = vring->vr.avail->idx;
+ }
+
+ /* When we start there are none of either input nor output. */
+ *out_num = *in_num = 0;
+
+ i = head;
+ do {
+ if (unlikely(i >= num)) {
+ error_report("Desc index is %u > %u, head = %u", i, num, head);
+ vring->broken = true;
+ return -EFAULT;
+ }
+ if (unlikely(++found > num)) {
+ error_report("Loop detected: last one at %u vq size %u head %u",
+ i, num, head);
+ vring->broken = true;
+ return -EFAULT;
+ }
+ desc = vring->vr.desc[i];
+
+ /* Ensure descriptor is loaded before accessing fields */
+ barrier();
+
+ if (desc.flags & VRING_DESC_F_INDIRECT) {
+ int ret = get_indirect(vring, iov, iov_end, out_num, in_num, &desc);
+ if (ret < 0) {
+ return ret;
+ }
+ continue;
+ }
+
+ /* If there are not enough iovecs left, stop for now. The caller
+ * should check if there are more descs available once they have dealt
+ * with the current set.
+ */
+ if (iov >= iov_end) {
+ return -ENOBUFS;
+ }
+
+ /* TODO handle non-contiguous memory across region boundaries */
+ iov->iov_base = hostmem_lookup(&vring->hostmem, desc.addr, desc.len,
+ desc.flags & VRING_DESC_F_WRITE);
+ if (!iov->iov_base) {
+ error_report("Failed to map vring desc addr %#" PRIx64 " len %u",
+ (uint64_t)desc.addr, desc.len);
+ vring->broken = true;
+ return -EFAULT;
+ }
+ iov->iov_len = desc.len;
+ iov++;
+
+ if (desc.flags & VRING_DESC_F_WRITE) {
+ /* If this is an input descriptor,
+ * increment that count. */
+ *in_num += 1;
+ } else {
+ /* If it's an output descriptor, they're all supposed
+ * to come before any input descriptors. */
+ if (unlikely(*in_num)) {
+ error_report("Descriptor has out after in: idx %d", i);
+ vring->broken = true;
+ return -EFAULT;
+ }
+ *out_num += 1;
+ }
+ i = desc.next;
+ } while (desc.flags & VRING_DESC_F_NEXT);
+
+ /* On success, increment avail index. */
+ vring->last_avail_idx++;
+ return head;
+}
+
+/* After we've used one of their buffers, we tell them about it.
+ *
+ * Stolen from linux/drivers/vhost/vhost.c.
+ */
+void vring_push(Vring *vring, unsigned int head, int len)
+{
+ struct vring_used_elem *used;
+ uint16_t new;
+
+ /* Don't touch vring if a fatal error occurred */
+ if (vring->broken) {
+ return;
+ }
+
+ /* The virtqueue contains a ring of used buffers. Get a pointer to the
+ * next entry in that used ring. */
+ used = &vring->vr.used->ring[vring->last_used_idx % vring->vr.num];
+ used->id = head;
+ used->len = len;
+
+ /* Make sure buffer is written before we update index. */
+ smp_wmb();
+
+ new = vring->vr.used->idx = ++vring->last_used_idx;
+ if (unlikely((int16_t)(new - vring->signalled_used) < (uint16_t)1)) {
+ vring->signalled_used_valid = false;
+ }
+}
--- /dev/null
+/*
+ * vhost support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include <sys/ioctl.h>
+#include "hw/virtio/vhost.h"
+#include "hw/hw.h"
+#include "qemu/range.h"
+#include <linux/vhost.h>
+#include "exec/address-spaces.h"
+
+static void vhost_dev_sync_region(struct vhost_dev *dev,
+ MemoryRegionSection *section,
+ uint64_t mfirst, uint64_t mlast,
+ uint64_t rfirst, uint64_t rlast)
+{
+ uint64_t start = MAX(mfirst, rfirst);
+ uint64_t end = MIN(mlast, rlast);
+ vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK;
+ vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
+ uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
+
+ if (end < start) {
+ return;
+ }
+ assert(end / VHOST_LOG_CHUNK < dev->log_size);
+ assert(start / VHOST_LOG_CHUNK < dev->log_size);
+
+ for (;from < to; ++from) {
+ vhost_log_chunk_t log;
+ int bit;
+ /* We first check with non-atomic: much cheaper,
+ * and we expect non-dirty to be the common case. */
+ if (!*from) {
+ addr += VHOST_LOG_CHUNK;
+ continue;
+ }
+ /* Data must be read atomically. We don't really
+ * need the barrier semantics of __sync
+ * builtins, but it's easier to use them than
+ * roll our own. */
+ log = __sync_fetch_and_and(from, 0);
+ while ((bit = sizeof(log) > sizeof(int) ?
+ ffsll(log) : ffs(log))) {
+ hwaddr page_addr;
+ hwaddr section_offset;
+ hwaddr mr_offset;
+ bit -= 1;
+ page_addr = addr + bit * VHOST_LOG_PAGE;
+ section_offset = page_addr - section->offset_within_address_space;
+ mr_offset = section_offset + section->offset_within_region;
+ memory_region_set_dirty(section->mr, mr_offset, VHOST_LOG_PAGE);
+ log &= ~(0x1ull << bit);
+ }
+ addr += VHOST_LOG_CHUNK;
+ }
+}
+
+static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
+ MemoryRegionSection *section,
+ hwaddr first,
+ hwaddr last)
+{
+ int i;
+ hwaddr start_addr;
+ hwaddr end_addr;
+
+ if (!dev->log_enabled || !dev->started) {
+ return 0;
+ }
+ start_addr = section->offset_within_address_space;
+ end_addr = range_get_last(start_addr, section->size);
+ start_addr = MAX(first, start_addr);
+ end_addr = MIN(last, end_addr);
+
+ for (i = 0; i < dev->mem->nregions; ++i) {
+ struct vhost_memory_region *reg = dev->mem->regions + i;
+ vhost_dev_sync_region(dev, section, start_addr, end_addr,
+ reg->guest_phys_addr,
+ range_get_last(reg->guest_phys_addr,
+ reg->memory_size));
+ }
+ for (i = 0; i < dev->nvqs; ++i) {
+ struct vhost_virtqueue *vq = dev->vqs + i;
+ vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
+ range_get_last(vq->used_phys, vq->used_size));
+ }
+ return 0;
+}
+
+static void vhost_log_sync(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+ memory_listener);
+ vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL);
+}
+
+static void vhost_log_sync_range(struct vhost_dev *dev,
+ hwaddr first, hwaddr last)
+{
+ int i;
+ /* FIXME: this is N^2 in number of sections */
+ for (i = 0; i < dev->n_mem_sections; ++i) {
+ MemoryRegionSection *section = &dev->mem_sections[i];
+ vhost_sync_dirty_bitmap(dev, section, first, last);
+ }
+}
+
+/* Assign/unassign. Keep an unsorted array of non-overlapping
+ * memory regions in dev->mem. */
+static void vhost_dev_unassign_memory(struct vhost_dev *dev,
+ uint64_t start_addr,
+ uint64_t size)
+{
+ int from, to, n = dev->mem->nregions;
+ /* Track overlapping/split regions for sanity checking. */
+ int overlap_start = 0, overlap_end = 0, overlap_middle = 0, split = 0;
+
+ for (from = 0, to = 0; from < n; ++from, ++to) {
+ struct vhost_memory_region *reg = dev->mem->regions + to;
+ uint64_t reglast;
+ uint64_t memlast;
+ uint64_t change;
+
+ /* clone old region */
+ if (to != from) {
+ memcpy(reg, dev->mem->regions + from, sizeof *reg);
+ }
+
+ /* No overlap is simple */
+ if (!ranges_overlap(reg->guest_phys_addr, reg->memory_size,
+ start_addr, size)) {
+ continue;
+ }
+
+ /* Split only happens if supplied region
+ * is in the middle of an existing one. Thus it can not
+ * overlap with any other existing region. */
+ assert(!split);
+
+ reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
+ memlast = range_get_last(start_addr, size);
+
+ /* Remove whole region */
+ if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
+ --dev->mem->nregions;
+ --to;
+ ++overlap_middle;
+ continue;
+ }
+
+ /* Shrink region */
+ if (memlast >= reglast) {
+ reg->memory_size = start_addr - reg->guest_phys_addr;
+ assert(reg->memory_size);
+ assert(!overlap_end);
+ ++overlap_end;
+ continue;
+ }
+
+ /* Shift region */
+ if (start_addr <= reg->guest_phys_addr) {
+ change = memlast + 1 - reg->guest_phys_addr;
+ reg->memory_size -= change;
+ reg->guest_phys_addr += change;
+ reg->userspace_addr += change;
+ assert(reg->memory_size);
+ assert(!overlap_start);
+ ++overlap_start;
+ continue;
+ }
+
+ /* This only happens if supplied region
+ * is in the middle of an existing one. Thus it can not
+ * overlap with any other existing region. */
+ assert(!overlap_start);
+ assert(!overlap_end);
+ assert(!overlap_middle);
+ /* Split region: shrink first part, shift second part. */
+ memcpy(dev->mem->regions + n, reg, sizeof *reg);
+ reg->memory_size = start_addr - reg->guest_phys_addr;
+ assert(reg->memory_size);
+ change = memlast + 1 - reg->guest_phys_addr;
+ reg = dev->mem->regions + n;
+ reg->memory_size -= change;
+ assert(reg->memory_size);
+ reg->guest_phys_addr += change;
+ reg->userspace_addr += change;
+ /* Never add more than 1 region */
+ assert(dev->mem->nregions == n);
+ ++dev->mem->nregions;
+ ++split;
+ }
+}
+
+/* Called after unassign, so no regions overlap the given range. */
+static void vhost_dev_assign_memory(struct vhost_dev *dev,
+ uint64_t start_addr,
+ uint64_t size,
+ uint64_t uaddr)
+{
+ int from, to;
+ struct vhost_memory_region *merged = NULL;
+ for (from = 0, to = 0; from < dev->mem->nregions; ++from, ++to) {
+ struct vhost_memory_region *reg = dev->mem->regions + to;
+ uint64_t prlast, urlast;
+ uint64_t pmlast, umlast;
+ uint64_t s, e, u;
+
+ /* clone old region */
+ if (to != from) {
+ memcpy(reg, dev->mem->regions + from, sizeof *reg);
+ }
+ prlast = range_get_last(reg->guest_phys_addr, reg->memory_size);
+ pmlast = range_get_last(start_addr, size);
+ urlast = range_get_last(reg->userspace_addr, reg->memory_size);
+ umlast = range_get_last(uaddr, size);
+
+ /* check for overlapping regions: should never happen. */
+ assert(prlast < start_addr || pmlast < reg->guest_phys_addr);
+ /* Not an adjacent or overlapping region - do not merge. */
+ if ((prlast + 1 != start_addr || urlast + 1 != uaddr) &&
+ (pmlast + 1 != reg->guest_phys_addr ||
+ umlast + 1 != reg->userspace_addr)) {
+ continue;
+ }
+
+ if (merged) {
+ --to;
+ assert(to >= 0);
+ } else {
+ merged = reg;
+ }
+ u = MIN(uaddr, reg->userspace_addr);
+ s = MIN(start_addr, reg->guest_phys_addr);
+ e = MAX(pmlast, prlast);
+ uaddr = merged->userspace_addr = u;
+ start_addr = merged->guest_phys_addr = s;
+ size = merged->memory_size = e - s + 1;
+ assert(merged->memory_size);
+ }
+
+ if (!merged) {
+ struct vhost_memory_region *reg = dev->mem->regions + to;
+ memset(reg, 0, sizeof *reg);
+ reg->memory_size = size;
+ assert(reg->memory_size);
+ reg->guest_phys_addr = start_addr;
+ reg->userspace_addr = uaddr;
+ ++to;
+ }
+ assert(to <= dev->mem->nregions + 1);
+ dev->mem->nregions = to;
+}
+
+static uint64_t vhost_get_log_size(struct vhost_dev *dev)
+{
+ uint64_t log_size = 0;
+ int i;
+ for (i = 0; i < dev->mem->nregions; ++i) {
+ struct vhost_memory_region *reg = dev->mem->regions + i;
+ uint64_t last = range_get_last(reg->guest_phys_addr,
+ reg->memory_size);
+ log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
+ }
+ for (i = 0; i < dev->nvqs; ++i) {
+ struct vhost_virtqueue *vq = dev->vqs + i;
+ uint64_t last = vq->used_phys + vq->used_size - 1;
+ log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
+ }
+ return log_size;
+}
+
+static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
+{
+ vhost_log_chunk_t *log;
+ uint64_t log_base;
+ int r;
+
+ log = g_malloc0(size * sizeof *log);
+ log_base = (uint64_t)(unsigned long)log;
+ r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
+ assert(r >= 0);
+ /* Sync only the range covered by the old log */
+ if (dev->log_size) {
+ vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
+ }
+ if (dev->log) {
+ g_free(dev->log);
+ }
+ dev->log = log;
+ dev->log_size = size;
+}
+
+static int vhost_verify_ring_mappings(struct vhost_dev *dev,
+ uint64_t start_addr,
+ uint64_t size)
+{
+ int i;
+ for (i = 0; i < dev->nvqs; ++i) {
+ struct vhost_virtqueue *vq = dev->vqs + i;
+ hwaddr l;
+ void *p;
+
+ if (!ranges_overlap(start_addr, size, vq->ring_phys, vq->ring_size)) {
+ continue;
+ }
+ l = vq->ring_size;
+ p = cpu_physical_memory_map(vq->ring_phys, &l, 1);
+ if (!p || l != vq->ring_size) {
+ fprintf(stderr, "Unable to map ring buffer for ring %d\n", i);
+ return -ENOMEM;
+ }
+ if (p != vq->ring) {
+ fprintf(stderr, "Ring buffer relocated for ring %d\n", i);
+ return -EBUSY;
+ }
+ cpu_physical_memory_unmap(p, l, 0, 0);
+ }
+ return 0;
+}
+
+static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev,
+ uint64_t start_addr,
+ uint64_t size)
+{
+ int i, n = dev->mem->nregions;
+ for (i = 0; i < n; ++i) {
+ struct vhost_memory_region *reg = dev->mem->regions + i;
+ if (ranges_overlap(reg->guest_phys_addr, reg->memory_size,
+ start_addr, size)) {
+ return reg;
+ }
+ }
+ return NULL;
+}
+
+static bool vhost_dev_cmp_memory(struct vhost_dev *dev,
+ uint64_t start_addr,
+ uint64_t size,
+ uint64_t uaddr)
+{
+ struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size);
+ uint64_t reglast;
+ uint64_t memlast;
+
+ if (!reg) {
+ return true;
+ }
+
+ reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
+ memlast = range_get_last(start_addr, size);
+
+ /* Need to extend region? */
+ if (start_addr < reg->guest_phys_addr || memlast > reglast) {
+ return true;
+ }
+ /* userspace_addr changed? */
+ return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr;
+}
+
+static void vhost_set_memory(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool add)
+{
+ struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+ memory_listener);
+ hwaddr start_addr = section->offset_within_address_space;
+ ram_addr_t size = section->size;
+ bool log_dirty = memory_region_is_logging(section->mr);
+ int s = offsetof(struct vhost_memory, regions) +
+ (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
+ uint64_t log_size;
+ int r;
+ void *ram;
+
+ dev->mem = g_realloc(dev->mem, s);
+
+ if (log_dirty) {
+ add = false;
+ }
+
+ assert(size);
+
+ /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */
+ ram = memory_region_get_ram_ptr(section->mr) + section->offset_within_region;
+ if (add) {
+ if (!vhost_dev_cmp_memory(dev, start_addr, size, (uintptr_t)ram)) {
+ /* Region exists with same address. Nothing to do. */
+ return;
+ }
+ } else {
+ if (!vhost_dev_find_reg(dev, start_addr, size)) {
+ /* Removing region that we don't access. Nothing to do. */
+ return;
+ }
+ }
+
+ vhost_dev_unassign_memory(dev, start_addr, size);
+ if (add) {
+ /* Add given mapping, merging adjacent regions if any */
+ vhost_dev_assign_memory(dev, start_addr, size, (uintptr_t)ram);
+ } else {
+ /* Remove old mapping for this memory, if any. */
+ vhost_dev_unassign_memory(dev, start_addr, size);
+ }
+
+ if (!dev->started) {
+ return;
+ }
+
+ if (dev->started) {
+ r = vhost_verify_ring_mappings(dev, start_addr, size);
+ assert(r >= 0);
+ }
+
+ if (!dev->log_enabled) {
+ r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+ assert(r >= 0);
+ return;
+ }
+ log_size = vhost_get_log_size(dev);
+ /* We allocate an extra 4K bytes to log,
+ * to reduce the * number of reallocations. */
+#define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
+ /* To log more, must increase log size before table update. */
+ if (dev->log_size < log_size) {
+ vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
+ }
+ r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+ assert(r >= 0);
+ /* To log less, can only decrease log size after table update. */
+ if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
+ vhost_dev_log_resize(dev, log_size);
+ }
+}
+
+static bool vhost_section(MemoryRegionSection *section)
+{
+ return memory_region_is_ram(section->mr);
+}
+
+static void vhost_begin(MemoryListener *listener)
+{
+}
+
+static void vhost_commit(MemoryListener *listener)
+{
+}
+
+static void vhost_region_add(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+ memory_listener);
+
+ if (!vhost_section(section)) {
+ return;
+ }
+
+ ++dev->n_mem_sections;
+ dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections,
+ dev->n_mem_sections);
+ dev->mem_sections[dev->n_mem_sections - 1] = *section;
+ vhost_set_memory(listener, section, true);
+}
+
+static void vhost_region_del(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+ memory_listener);
+ int i;
+
+ if (!vhost_section(section)) {
+ return;
+ }
+
+ vhost_set_memory(listener, section, false);
+ for (i = 0; i < dev->n_mem_sections; ++i) {
+ if (dev->mem_sections[i].offset_within_address_space
+ == section->offset_within_address_space) {
+ --dev->n_mem_sections;
+ memmove(&dev->mem_sections[i], &dev->mem_sections[i+1],
+ (dev->n_mem_sections - i) * sizeof(*dev->mem_sections));
+ break;
+ }
+ }
+}
+
+static void vhost_region_nop(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+}
+
+static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq,
+ unsigned idx, bool enable_log)
+{
+ struct vhost_vring_addr addr = {
+ .index = idx,
+ .desc_user_addr = (uint64_t)(unsigned long)vq->desc,
+ .avail_user_addr = (uint64_t)(unsigned long)vq->avail,
+ .used_user_addr = (uint64_t)(unsigned long)vq->used,
+ .log_guest_addr = vq->used_phys,
+ .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
+ };
+ int r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
+ if (r < 0) {
+ return -errno;
+ }
+ return 0;
+}
+
+static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
+{
+ uint64_t features = dev->acked_features;
+ int r;
+ if (enable_log) {
+ features |= 0x1 << VHOST_F_LOG_ALL;
+ }
+ r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
+ return r < 0 ? -errno : 0;
+}
+
+static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
+{
+ int r, t, i;
+ r = vhost_dev_set_features(dev, enable_log);
+ if (r < 0) {
+ goto err_features;
+ }
+ for (i = 0; i < dev->nvqs; ++i) {
+ r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
+ enable_log);
+ if (r < 0) {
+ goto err_vq;
+ }
+ }
+ return 0;
+err_vq:
+ for (; i >= 0; --i) {
+ t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i,
+ dev->log_enabled);
+ assert(t >= 0);
+ }
+ t = vhost_dev_set_features(dev, dev->log_enabled);
+ assert(t >= 0);
+err_features:
+ return r;
+}
+
+static int vhost_migration_log(MemoryListener *listener, int enable)
+{
+ struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+ memory_listener);
+ int r;
+ if (!!enable == dev->log_enabled) {
+ return 0;
+ }
+ if (!dev->started) {
+ dev->log_enabled = enable;
+ return 0;
+ }
+ if (!enable) {
+ r = vhost_dev_set_log(dev, false);
+ if (r < 0) {
+ return r;
+ }
+ if (dev->log) {
+ g_free(dev->log);
+ }
+ dev->log = NULL;
+ dev->log_size = 0;
+ } else {
+ vhost_dev_log_resize(dev, vhost_get_log_size(dev));
+ r = vhost_dev_set_log(dev, true);
+ if (r < 0) {
+ return r;
+ }
+ }
+ dev->log_enabled = enable;
+ return 0;
+}
+
+static void vhost_log_global_start(MemoryListener *listener)
+{
+ int r;
+
+ r = vhost_migration_log(listener, true);
+ if (r < 0) {
+ abort();
+ }
+}
+
+static void vhost_log_global_stop(MemoryListener *listener)
+{
+ int r;
+
+ r = vhost_migration_log(listener, false);
+ if (r < 0) {
+ abort();
+ }
+}
+
+static void vhost_log_start(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ /* FIXME: implement */
+}
+
+static void vhost_log_stop(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ /* FIXME: implement */
+}
+
+static int vhost_virtqueue_start(struct vhost_dev *dev,
+ struct VirtIODevice *vdev,
+ struct vhost_virtqueue *vq,
+ unsigned idx)
+{
+ hwaddr s, l, a;
+ int r;
+ int vhost_vq_index = idx - dev->vq_index;
+ struct vhost_vring_file file = {
+ .index = vhost_vq_index
+ };
+ struct vhost_vring_state state = {
+ .index = vhost_vq_index
+ };
+ struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
+
+ assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
+
+ vq->num = state.num = virtio_queue_get_num(vdev, idx);
+ r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
+ if (r) {
+ return -errno;
+ }
+
+ state.num = virtio_queue_get_last_avail_idx(vdev, idx);
+ r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
+ if (r) {
+ return -errno;
+ }
+
+ s = l = virtio_queue_get_desc_size(vdev, idx);
+ a = virtio_queue_get_desc_addr(vdev, idx);
+ vq->desc = cpu_physical_memory_map(a, &l, 0);
+ if (!vq->desc || l != s) {
+ r = -ENOMEM;
+ goto fail_alloc_desc;
+ }
+ s = l = virtio_queue_get_avail_size(vdev, idx);
+ a = virtio_queue_get_avail_addr(vdev, idx);
+ vq->avail = cpu_physical_memory_map(a, &l, 0);
+ if (!vq->avail || l != s) {
+ r = -ENOMEM;
+ goto fail_alloc_avail;
+ }
+ vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
+ vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
+ vq->used = cpu_physical_memory_map(a, &l, 1);
+ if (!vq->used || l != s) {
+ r = -ENOMEM;
+ goto fail_alloc_used;
+ }
+
+ vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx);
+ vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx);
+ vq->ring = cpu_physical_memory_map(a, &l, 1);
+ if (!vq->ring || l != s) {
+ r = -ENOMEM;
+ goto fail_alloc_ring;
+ }
+
+ r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
+ if (r < 0) {
+ r = -errno;
+ goto fail_alloc;
+ }
+
+ file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
+ r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+ if (r) {
+ r = -errno;
+ goto fail_kick;
+ }
+
+ /* Clear and discard previous events if any. */
+ event_notifier_test_and_clear(&vq->masked_notifier);
+
+ return 0;
+
+fail_kick:
+fail_alloc:
+ cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
+ 0, 0);
+fail_alloc_ring:
+ cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
+ 0, 0);
+fail_alloc_used:
+ cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
+ 0, 0);
+fail_alloc_avail:
+ cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
+ 0, 0);
+fail_alloc_desc:
+ return r;
+}
+
+static void vhost_virtqueue_stop(struct vhost_dev *dev,
+ struct VirtIODevice *vdev,
+ struct vhost_virtqueue *vq,
+ unsigned idx)
+{
+ struct vhost_vring_state state = {
+ .index = idx - dev->vq_index
+ };
+ int r;
+ assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
+ r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
+ if (r < 0) {
+ fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
+ fflush(stderr);
+ }
+ virtio_queue_set_last_avail_idx(vdev, idx, state.num);
+ assert (r >= 0);
+ cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
+ 0, virtio_queue_get_ring_size(vdev, idx));
+ cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
+ 1, virtio_queue_get_used_size(vdev, idx));
+ cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
+ 0, virtio_queue_get_avail_size(vdev, idx));
+ cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
+ 0, virtio_queue_get_desc_size(vdev, idx));
+}
+
+static void vhost_eventfd_add(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool match_data, uint64_t data, EventNotifier *e)
+{
+}
+
+static void vhost_eventfd_del(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool match_data, uint64_t data, EventNotifier *e)
+{
+}
+
+static int vhost_virtqueue_init(struct vhost_dev *dev,
+ struct vhost_virtqueue *vq, int n)
+{
+ struct vhost_vring_file file = {
+ .index = n,
+ };
+ int r = event_notifier_init(&vq->masked_notifier, 0);
+ if (r < 0) {
+ return r;
+ }
+
+ file.fd = event_notifier_get_fd(&vq->masked_notifier);
+ r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+ if (r) {
+ r = -errno;
+ goto fail_call;
+ }
+ return 0;
+fail_call:
+ event_notifier_cleanup(&vq->masked_notifier);
+ return r;
+}
+
+static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
+{
+ event_notifier_cleanup(&vq->masked_notifier);
+}
+
+int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
+ bool force)
+{
+ uint64_t features;
+ int i, r;
+ if (devfd >= 0) {
+ hdev->control = devfd;
+ } else {
+ hdev->control = open(devpath, O_RDWR);
+ if (hdev->control < 0) {
+ return -errno;
+ }
+ }
+ r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
+ if (r < 0) {
+ goto fail;
+ }
+
+ r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
+ if (r < 0) {
+ goto fail;
+ }
+
+ for (i = 0; i < hdev->nvqs; ++i) {
+ r = vhost_virtqueue_init(hdev, hdev->vqs + i, i);
+ if (r < 0) {
+ goto fail_vq;
+ }
+ }
+ hdev->features = features;
+
+ hdev->memory_listener = (MemoryListener) {
+ .begin = vhost_begin,
+ .commit = vhost_commit,
+ .region_add = vhost_region_add,
+ .region_del = vhost_region_del,
+ .region_nop = vhost_region_nop,
+ .log_start = vhost_log_start,
+ .log_stop = vhost_log_stop,
+ .log_sync = vhost_log_sync,
+ .log_global_start = vhost_log_global_start,
+ .log_global_stop = vhost_log_global_stop,
+ .eventfd_add = vhost_eventfd_add,
+ .eventfd_del = vhost_eventfd_del,
+ .priority = 10
+ };
+ hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
+ hdev->n_mem_sections = 0;
+ hdev->mem_sections = NULL;
+ hdev->log = NULL;
+ hdev->log_size = 0;
+ hdev->log_enabled = false;
+ hdev->started = false;
+ memory_listener_register(&hdev->memory_listener, &address_space_memory);
+ hdev->force = force;
+ return 0;
+fail_vq:
+ while (--i >= 0) {
+ vhost_virtqueue_cleanup(hdev->vqs + i);
+ }
+fail:
+ r = -errno;
+ close(hdev->control);
+ return r;
+}
+
+void vhost_dev_cleanup(struct vhost_dev *hdev)
+{
+ int i;
+ for (i = 0; i < hdev->nvqs; ++i) {
+ vhost_virtqueue_cleanup(hdev->vqs + i);
+ }
+ memory_listener_unregister(&hdev->memory_listener);
+ g_free(hdev->mem);
+ g_free(hdev->mem_sections);
+ close(hdev->control);
+}
+
+bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+ return !vdev->binding->query_guest_notifiers ||
+ vdev->binding->query_guest_notifiers(vdev->binding_opaque) ||
+ hdev->force;
+}
+
+/* Stop processing guest IO notifications in qemu.
+ * Start processing them in vhost in kernel.
+ */
+int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+ int i, r;
+ if (!vdev->binding->set_host_notifier) {
+ fprintf(stderr, "binding does not support host notifiers\n");
+ r = -ENOSYS;
+ goto fail;
+ }
+
+ for (i = 0; i < hdev->nvqs; ++i) {
+ r = vdev->binding->set_host_notifier(vdev->binding_opaque,
+ hdev->vq_index + i,
+ true);
+ if (r < 0) {
+ fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r);
+ goto fail_vq;
+ }
+ }
+
+ return 0;
+fail_vq:
+ while (--i >= 0) {
+ r = vdev->binding->set_host_notifier(vdev->binding_opaque,
+ hdev->vq_index + i,
+ false);
+ if (r < 0) {
+ fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r);
+ fflush(stderr);
+ }
+ assert (r >= 0);
+ }
+fail:
+ return r;
+}
+
+/* Stop processing guest IO notifications in vhost.
+ * Start processing them in qemu.
+ * This might actually run the qemu handlers right away,
+ * so virtio in qemu must be completely setup when this is called.
+ */
+void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+ int i, r;
+
+ for (i = 0; i < hdev->nvqs; ++i) {
+ r = vdev->binding->set_host_notifier(vdev->binding_opaque,
+ hdev->vq_index + i,
+ false);
+ if (r < 0) {
+ fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r);
+ fflush(stderr);
+ }
+ assert (r >= 0);
+ }
+}
+
+/* Test and clear event pending status.
+ * Should be called after unmask to avoid losing events.
+ */
+bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
+{
+ struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
+ assert(hdev->started);
+ assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
+ return event_notifier_test_and_clear(&vq->masked_notifier);
+}
+
+/* Mask/unmask events from this vq. */
+void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
+ bool mask)
+{
+ struct VirtQueue *vvq = virtio_get_queue(vdev, n);
+ int r, index = n - hdev->vq_index;
+
+ assert(hdev->started);
+ assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
+
+ struct vhost_vring_file file = {
+ .index = index
+ };
+ if (mask) {
+ file.fd = event_notifier_get_fd(&hdev->vqs[index].masked_notifier);
+ } else {
+ file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
+ }
+ r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file);
+ assert(r >= 0);
+}
+
+/* Host notifiers must be enabled at this point. */
+int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+ int i, r;
+
+ hdev->started = true;
+
+ r = vhost_dev_set_features(hdev, hdev->log_enabled);
+ if (r < 0) {
+ goto fail_features;
+ }
+ r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem);
+ if (r < 0) {
+ r = -errno;
+ goto fail_mem;
+ }
+ for (i = 0; i < hdev->nvqs; ++i) {
+ r = vhost_virtqueue_start(hdev,
+ vdev,
+ hdev->vqs + i,
+ hdev->vq_index + i);
+ if (r < 0) {
+ goto fail_vq;
+ }
+ }
+
+ if (hdev->log_enabled) {
+ hdev->log_size = vhost_get_log_size(hdev);
+ hdev->log = hdev->log_size ?
+ g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL;
+ r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
+ (uint64_t)(unsigned long)hdev->log);
+ if (r < 0) {
+ r = -errno;
+ goto fail_log;
+ }
+ }
+
+ return 0;
+fail_log:
+fail_vq:
+ while (--i >= 0) {
+ vhost_virtqueue_stop(hdev,
+ vdev,
+ hdev->vqs + i,
+ hdev->vq_index + i);
+ }
+ i = hdev->nvqs;
+fail_mem:
+fail_features:
+
+ hdev->started = false;
+ return r;
+}
+
+/* Host notifiers must be enabled at this point. */
+void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+ int i;
+
+ for (i = 0; i < hdev->nvqs; ++i) {
+ vhost_virtqueue_stop(hdev,
+ vdev,
+ hdev->vqs + i,
+ hdev->vq_index + i);
+ }
+ vhost_log_sync_range(hdev, 0, ~0x0ull);
+
+ hdev->started = false;
+ g_free(hdev->log);
+ hdev->log = NULL;
+ hdev->log_size = 0;
+}
+
--- /dev/null
+/*
+ * Virtio Balloon Device
+ *
+ * Copyright IBM, Corp. 2008
+ * Copyright (C) 2011 Red Hat, Inc.
+ * Copyright (C) 2011 Amit Shah <amit.shah@redhat.com>
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/iov.h"
+#include "qemu/timer.h"
+#include "qemu-common.h"
+#include "hw/virtio/virtio.h"
+#include "hw/i386/pc.h"
+#include "cpu.h"
+#include "sysemu/balloon.h"
+#include "hw/virtio/virtio-balloon.h"
+#include "sysemu/kvm.h"
+#include "exec/address-spaces.h"
+#include "qapi/visitor.h"
+
+#if defined(__linux__)
+#include <sys/mman.h>
+#endif
+
+#include "hw/virtio/virtio-bus.h"
+
+static void balloon_page(void *addr, int deflate)
+{
+#if defined(__linux__)
+ if (!kvm_enabled() || kvm_has_sync_mmu())
+ qemu_madvise(addr, TARGET_PAGE_SIZE,
+ deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
+#endif
+}
+
+static const char *balloon_stat_names[] = {
+ [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
+ [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
+ [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults",
+ [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults",
+ [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory",
+ [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory",
+ [VIRTIO_BALLOON_S_NR] = NULL
+};
+
+/*
+ * reset_stats - Mark all items in the stats array as unset
+ *
+ * This function needs to be called at device intialization and before
+ * before updating to a set of newly-generated stats. This will ensure that no
+ * stale values stick around in case the guest reports a subset of the supported
+ * statistics.
+ */
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+ int i;
+ for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
+}
+
+static bool balloon_stats_supported(const VirtIOBalloon *s)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ return vdev->guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ);
+}
+
+static bool balloon_stats_enabled(const VirtIOBalloon *s)
+{
+ return s->stats_poll_interval > 0;
+}
+
+static void balloon_stats_destroy_timer(VirtIOBalloon *s)
+{
+ if (balloon_stats_enabled(s)) {
+ qemu_del_timer(s->stats_timer);
+ qemu_free_timer(s->stats_timer);
+ s->stats_timer = NULL;
+ s->stats_poll_interval = 0;
+ }
+}
+
+static void balloon_stats_change_timer(VirtIOBalloon *s, int secs)
+{
+ qemu_mod_timer(s->stats_timer, qemu_get_clock_ms(vm_clock) + secs * 1000);
+}
+
+static void balloon_stats_poll_cb(void *opaque)
+{
+ VirtIOBalloon *s = opaque;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+ if (!balloon_stats_supported(s)) {
+ /* re-schedule */
+ balloon_stats_change_timer(s, s->stats_poll_interval);
+ return;
+ }
+
+ virtqueue_push(s->svq, &s->stats_vq_elem, s->stats_vq_offset);
+ virtio_notify(vdev, s->svq);
+}
+
+static void balloon_stats_get_all(Object *obj, struct Visitor *v,
+ void *opaque, const char *name, Error **errp)
+{
+ VirtIOBalloon *s = opaque;
+ int i;
+
+ if (!s->stats_last_update) {
+ error_setg(errp, "guest hasn't updated any stats yet");
+ return;
+ }
+
+ visit_start_struct(v, NULL, "guest-stats", name, 0, errp);
+ visit_type_int(v, &s->stats_last_update, "last-update", errp);
+
+ visit_start_struct(v, NULL, NULL, "stats", 0, errp);
+ for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) {
+ visit_type_int64(v, (int64_t *) &s->stats[i], balloon_stat_names[i],
+ errp);
+ }
+ visit_end_struct(v, errp);
+
+ visit_end_struct(v, errp);
+}
+
+static void balloon_stats_get_poll_interval(Object *obj, struct Visitor *v,
+ void *opaque, const char *name,
+ Error **errp)
+{
+ VirtIOBalloon *s = opaque;
+ visit_type_int(v, &s->stats_poll_interval, name, errp);
+}
+
+static void balloon_stats_set_poll_interval(Object *obj, struct Visitor *v,
+ void *opaque, const char *name,
+ Error **errp)
+{
+ VirtIOBalloon *s = opaque;
+ int64_t value;
+
+ visit_type_int(v, &value, name, errp);
+ if (error_is_set(errp)) {
+ return;
+ }
+
+ if (value < 0) {
+ error_setg(errp, "timer value must be greater than zero");
+ return;
+ }
+
+ if (value == s->stats_poll_interval) {
+ return;
+ }
+
+ if (value == 0) {
+ /* timer=0 disables the timer */
+ balloon_stats_destroy_timer(s);
+ return;
+ }
+
+ if (balloon_stats_enabled(s)) {
+ /* timer interval change */
+ s->stats_poll_interval = value;
+ balloon_stats_change_timer(s, value);
+ return;
+ }
+
+ /* create a new timer */
+ g_assert(s->stats_timer == NULL);
+ s->stats_timer = qemu_new_timer_ms(vm_clock, balloon_stats_poll_cb, s);
+ s->stats_poll_interval = value;
+ balloon_stats_change_timer(s, 0);
+}
+
+static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+ VirtQueueElement elem;
+ MemoryRegionSection section;
+
+ while (virtqueue_pop(vq, &elem)) {
+ size_t offset = 0;
+ uint32_t pfn;
+
+ while (iov_to_buf(elem.out_sg, elem.out_num, offset, &pfn, 4) == 4) {
+ ram_addr_t pa;
+ ram_addr_t addr;
+
+ pa = (ram_addr_t)ldl_p(&pfn) << VIRTIO_BALLOON_PFN_SHIFT;
+ offset += 4;
+
+ /* FIXME: remove get_system_memory(), but how? */
+ section = memory_region_find(get_system_memory(), pa, 1);
+ if (!section.size || !memory_region_is_ram(section.mr))
+ continue;
+
+ /* Using memory_region_get_ram_ptr is bending the rules a bit, but
+ should be OK because we only want a single page. */
+ addr = section.offset_within_region;
+ balloon_page(memory_region_get_ram_ptr(section.mr) + addr,
+ !!(vq == s->dvq));
+ }
+
+ virtqueue_push(vq, &elem, offset);
+ virtio_notify(vdev, vq);
+ }
+}
+
+static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+ VirtQueueElement *elem = &s->stats_vq_elem;
+ VirtIOBalloonStat stat;
+ size_t offset = 0;
+ qemu_timeval tv;
+
+ if (!virtqueue_pop(vq, elem)) {
+ goto out;
+ }
+
+ /* Initialize the stats to get rid of any stale values. This is only
+ * needed to handle the case where a guest supports fewer stats than it
+ * used to (ie. it has booted into an old kernel).
+ */
+ reset_stats(s);
+
+ while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat))
+ == sizeof(stat)) {
+ uint16_t tag = tswap16(stat.tag);
+ uint64_t val = tswap64(stat.val);
+
+ offset += sizeof(stat);
+ if (tag < VIRTIO_BALLOON_S_NR)
+ s->stats[tag] = val;
+ }
+ s->stats_vq_offset = offset;
+
+ if (qemu_gettimeofday(&tv) < 0) {
+ fprintf(stderr, "warning: %s: failed to get time of day\n", __func__);
+ goto out;
+ }
+
+ s->stats_last_update = tv.tv_sec;
+
+out:
+ if (balloon_stats_enabled(s)) {
+ balloon_stats_change_timer(s, s->stats_poll_interval);
+ }
+}
+
+static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
+{
+ VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
+ struct virtio_balloon_config config;
+
+ config.num_pages = cpu_to_le32(dev->num_pages);
+ config.actual = cpu_to_le32(dev->actual);
+
+ memcpy(config_data, &config, 8);
+}
+
+static void virtio_balloon_set_config(VirtIODevice *vdev,
+ const uint8_t *config_data)
+{
+ VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
+ struct virtio_balloon_config config;
+ uint32_t oldactual = dev->actual;
+ memcpy(&config, config_data, 8);
+ dev->actual = le32_to_cpu(config.actual);
+ if (dev->actual != oldactual) {
+ qemu_balloon_changed(ram_size -
+ (dev->actual << VIRTIO_BALLOON_PFN_SHIFT));
+ }
+}
+
+static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
+{
+ f |= (1 << VIRTIO_BALLOON_F_STATS_VQ);
+ return f;
+}
+
+static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
+{
+ VirtIOBalloon *dev = opaque;
+ info->actual = ram_size - ((uint64_t) dev->actual <<
+ VIRTIO_BALLOON_PFN_SHIFT);
+}
+
+static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
+{
+ VirtIOBalloon *dev = VIRTIO_BALLOON(opaque);
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+
+ if (target > ram_size) {
+ target = ram_size;
+ }
+ if (target) {
+ dev->num_pages = (ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
+ virtio_notify_config(vdev);
+ }
+}
+
+static void virtio_balloon_save(QEMUFile *f, void *opaque)
+{
+ VirtIOBalloon *s = VIRTIO_BALLOON(opaque);
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+
+ virtio_save(vdev, f);
+
+ qemu_put_be32(f, s->num_pages);
+ qemu_put_be32(f, s->actual);
+}
+
+static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
+{
+ VirtIOBalloon *s = VIRTIO_BALLOON(opaque);
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ int ret;
+
+ if (version_id != 1)
+ return -EINVAL;
+
+ ret = virtio_load(vdev, f);
+ if (ret) {
+ return ret;
+ }
+
+ s->num_pages = qemu_get_be32(f);
+ s->actual = qemu_get_be32(f);
+ return 0;
+}
+
+static int virtio_balloon_device_init(VirtIODevice *vdev)
+{
+ DeviceState *qdev = DEVICE(vdev);
+ VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+ int ret;
+
+ virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON, 8);
+
+ vdev->get_config = virtio_balloon_get_config;
+ vdev->set_config = virtio_balloon_set_config;
+ vdev->get_features = virtio_balloon_get_features;
+
+ ret = qemu_add_balloon_handler(virtio_balloon_to_target,
+ virtio_balloon_stat, s);
+
+ if (ret < 0) {
+ virtio_common_cleanup(VIRTIO_DEVICE(s));
+ return -1;
+ }
+
+ s->ivq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
+ s->dvq = virtio_add_queue(vdev, 128, virtio_balloon_handle_output);
+ s->svq = virtio_add_queue(vdev, 128, virtio_balloon_receive_stats);
+
+ register_savevm(qdev, "virtio-balloon", -1, 1,
+ virtio_balloon_save, virtio_balloon_load, s);
+
+ object_property_add(OBJECT(qdev), "guest-stats", "guest statistics",
+ balloon_stats_get_all, NULL, NULL, s, NULL);
+
+ object_property_add(OBJECT(qdev), "guest-stats-polling-interval", "int",
+ balloon_stats_get_poll_interval,
+ balloon_stats_set_poll_interval,
+ NULL, s, NULL);
+ return 0;
+}
+
+static int virtio_balloon_device_exit(DeviceState *qdev)
+{
+ VirtIOBalloon *s = VIRTIO_BALLOON(qdev);
+ VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
+
+ balloon_stats_destroy_timer(s);
+ qemu_remove_balloon_handler(s);
+ unregister_savevm(qdev, "virtio-balloon", s);
+ virtio_common_cleanup(vdev);
+ return 0;
+}
+
+static Property virtio_balloon_properties[] = {
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_balloon_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+ dc->exit = virtio_balloon_device_exit;
+ dc->props = virtio_balloon_properties;
+ vdc->init = virtio_balloon_device_init;
+ vdc->get_config = virtio_balloon_get_config;
+ vdc->set_config = virtio_balloon_set_config;
+ vdc->get_features = virtio_balloon_get_features;
+}
+
+static const TypeInfo virtio_balloon_info = {
+ .name = TYPE_VIRTIO_BALLOON,
+ .parent = TYPE_VIRTIO_DEVICE,
+ .instance_size = sizeof(VirtIOBalloon),
+ .class_init = virtio_balloon_class_init,
+};
+
+static void virtio_register_types(void)
+{
+ type_register_static(&virtio_balloon_info);
+}
+
+type_init(virtio_register_types)
--- /dev/null
+/*
+ * Virtio Support
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <inttypes.h>
+
+#include "trace.h"
+#include "qemu/error-report.h"
+#include "hw/virtio/virtio.h"
+#include "qemu/atomic.h"
+#include "hw/virtio/virtio-bus.h"
+
+/* The alignment to use between consumer and producer parts of vring.
+ * x86 pagesize again. */
+#define VIRTIO_PCI_VRING_ALIGN 4096
+
+typedef struct VRingDesc
+{
+ uint64_t addr;
+ uint32_t len;
+ uint16_t flags;
+ uint16_t next;
+} VRingDesc;
+
+typedef struct VRingAvail
+{
+ uint16_t flags;
+ uint16_t idx;
+ uint16_t ring[0];
+} VRingAvail;
+
+typedef struct VRingUsedElem
+{
+ uint32_t id;
+ uint32_t len;
+} VRingUsedElem;
+
+typedef struct VRingUsed
+{
+ uint16_t flags;
+ uint16_t idx;
+ VRingUsedElem ring[0];
+} VRingUsed;
+
+typedef struct VRing
+{
+ unsigned int num;
+ hwaddr desc;
+ hwaddr avail;
+ hwaddr used;
+} VRing;
+
+struct VirtQueue
+{
+ VRing vring;
+ hwaddr pa;
+ uint16_t last_avail_idx;
+ /* Last used index value we have signalled on */
+ uint16_t signalled_used;
+
+ /* Last used index value we have signalled on */
+ bool signalled_used_valid;
+
+ /* Notification enabled? */
+ bool notification;
+
+ uint16_t queue_index;
+
+ int inuse;
+
+ uint16_t vector;
+ void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
+ VirtIODevice *vdev;
+ EventNotifier guest_notifier;
+ EventNotifier host_notifier;
+};
+
+/* virt queue functions */
+static void virtqueue_init(VirtQueue *vq)
+{
+ hwaddr pa = vq->pa;
+
+ vq->vring.desc = pa;
+ vq->vring.avail = pa + vq->vring.num * sizeof(VRingDesc);
+ vq->vring.used = vring_align(vq->vring.avail +
+ offsetof(VRingAvail, ring[vq->vring.num]),
+ VIRTIO_PCI_VRING_ALIGN);
+}
+
+static inline uint64_t vring_desc_addr(hwaddr desc_pa, int i)
+{
+ hwaddr pa;
+ pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
+ return ldq_phys(pa);
+}
+
+static inline uint32_t vring_desc_len(hwaddr desc_pa, int i)
+{
+ hwaddr pa;
+ pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
+ return ldl_phys(pa);
+}
+
+static inline uint16_t vring_desc_flags(hwaddr desc_pa, int i)
+{
+ hwaddr pa;
+ pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
+ return lduw_phys(pa);
+}
+
+static inline uint16_t vring_desc_next(hwaddr desc_pa, int i)
+{
+ hwaddr pa;
+ pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
+ return lduw_phys(pa);
+}
+
+static inline uint16_t vring_avail_flags(VirtQueue *vq)
+{
+ hwaddr pa;
+ pa = vq->vring.avail + offsetof(VRingAvail, flags);
+ return lduw_phys(pa);
+}
+
+static inline uint16_t vring_avail_idx(VirtQueue *vq)
+{
+ hwaddr pa;
+ pa = vq->vring.avail + offsetof(VRingAvail, idx);
+ return lduw_phys(pa);
+}
+
+static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
+{
+ hwaddr pa;
+ pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
+ return lduw_phys(pa);
+}
+
+static inline uint16_t vring_used_event(VirtQueue *vq)
+{
+ return vring_avail_ring(vq, vq->vring.num);
+}
+
+static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
+{
+ hwaddr pa;
+ pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
+ stl_phys(pa, val);
+}
+
+static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)
+{
+ hwaddr pa;
+ pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
+ stl_phys(pa, val);
+}
+
+static uint16_t vring_used_idx(VirtQueue *vq)
+{
+ hwaddr pa;
+ pa = vq->vring.used + offsetof(VRingUsed, idx);
+ return lduw_phys(pa);
+}
+
+static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
+{
+ hwaddr pa;
+ pa = vq->vring.used + offsetof(VRingUsed, idx);
+ stw_phys(pa, val);
+}
+
+static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
+{
+ hwaddr pa;
+ pa = vq->vring.used + offsetof(VRingUsed, flags);
+ stw_phys(pa, lduw_phys(pa) | mask);
+}
+
+static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
+{
+ hwaddr pa;
+ pa = vq->vring.used + offsetof(VRingUsed, flags);
+ stw_phys(pa, lduw_phys(pa) & ~mask);
+}
+
+static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
+{
+ hwaddr pa;
+ if (!vq->notification) {
+ return;
+ }
+ pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
+ stw_phys(pa, val);
+}
+
+void virtio_queue_set_notification(VirtQueue *vq, int enable)
+{
+ vq->notification = enable;
+ if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
+ vring_avail_event(vq, vring_avail_idx(vq));
+ } else if (enable) {
+ vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
+ } else {
+ vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
+ }
+ if (enable) {
+ /* Expose avail event/used flags before caller checks the avail idx. */
+ smp_mb();
+ }
+}
+
+int virtio_queue_ready(VirtQueue *vq)
+{
+ return vq->vring.avail != 0;
+}
+
+int virtio_queue_empty(VirtQueue *vq)
+{
+ return vring_avail_idx(vq) == vq->last_avail_idx;
+}
+
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+ unsigned int len, unsigned int idx)
+{
+ unsigned int offset;
+ int i;
+
+ trace_virtqueue_fill(vq, elem, len, idx);
+
+ offset = 0;
+ for (i = 0; i < elem->in_num; i++) {
+ size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
+
+ cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
+ elem->in_sg[i].iov_len,
+ 1, size);
+
+ offset += size;
+ }
+
+ for (i = 0; i < elem->out_num; i++)
+ cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
+ elem->out_sg[i].iov_len,
+ 0, elem->out_sg[i].iov_len);
+
+ idx = (idx + vring_used_idx(vq)) % vq->vring.num;
+
+ /* Get a pointer to the next entry in the used ring. */
+ vring_used_ring_id(vq, idx, elem->index);
+ vring_used_ring_len(vq, idx, len);
+}
+
+void virtqueue_flush(VirtQueue *vq, unsigned int count)
+{
+ uint16_t old, new;
+ /* Make sure buffer is written before we update index. */
+ smp_wmb();
+ trace_virtqueue_flush(vq, count);
+ old = vring_used_idx(vq);
+ new = old + count;
+ vring_used_idx_set(vq, new);
+ vq->inuse -= count;
+ if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
+ vq->signalled_used_valid = false;
+}
+
+void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
+ unsigned int len)
+{
+ virtqueue_fill(vq, elem, len, 0);
+ virtqueue_flush(vq, 1);
+}
+
+static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
+{
+ uint16_t num_heads = vring_avail_idx(vq) - idx;
+
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ if (num_heads > vq->vring.num) {
+ error_report("Guest moved used index from %u to %u",
+ idx, vring_avail_idx(vq));
+ exit(1);
+ }
+ /* On success, callers read a descriptor at vq->last_avail_idx.
+ * Make sure descriptor read does not bypass avail index read. */
+ if (num_heads) {
+ smp_rmb();
+ }
+
+ return num_heads;
+}
+
+static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
+{
+ unsigned int head;
+
+ /* Grab the next descriptor number they're advertising, and increment
+ * the index we've seen. */
+ head = vring_avail_ring(vq, idx % vq->vring.num);
+
+ /* If their number is silly, that's a fatal mistake. */
+ if (head >= vq->vring.num) {
+ error_report("Guest says index %u is available", head);
+ exit(1);
+ }
+
+ return head;
+}
+
+static unsigned virtqueue_next_desc(hwaddr desc_pa,
+ unsigned int i, unsigned int max)
+{
+ unsigned int next;
+
+ /* If this descriptor says it doesn't chain, we're done. */
+ if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
+ return max;
+
+ /* Check they're not leading us off end of descriptors. */
+ next = vring_desc_next(desc_pa, i);
+ /* Make sure compiler knows to grab that: we don't want it changing! */
+ smp_wmb();
+
+ if (next >= max) {
+ error_report("Desc next is %u", next);
+ exit(1);
+ }
+
+ return next;
+}
+
+void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+ unsigned int *out_bytes,
+ unsigned max_in_bytes, unsigned max_out_bytes)
+{
+ unsigned int idx;
+ unsigned int total_bufs, in_total, out_total;
+
+ idx = vq->last_avail_idx;
+
+ total_bufs = in_total = out_total = 0;
+ while (virtqueue_num_heads(vq, idx)) {
+ unsigned int max, num_bufs, indirect = 0;
+ hwaddr desc_pa;
+ int i;
+
+ max = vq->vring.num;
+ num_bufs = total_bufs;
+ i = virtqueue_get_head(vq, idx++);
+ desc_pa = vq->vring.desc;
+
+ if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
+ if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
+ error_report("Invalid size for indirect buffer table");
+ exit(1);
+ }
+
+ /* If we've got too many, that implies a descriptor loop. */
+ if (num_bufs >= max) {
+ error_report("Looped descriptor");
+ exit(1);
+ }
+
+ /* loop over the indirect descriptor table */
+ indirect = 1;
+ max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
+ num_bufs = i = 0;
+ desc_pa = vring_desc_addr(desc_pa, i);
+ }
+
+ do {
+ /* If we've got too many, that implies a descriptor loop. */
+ if (++num_bufs > max) {
+ error_report("Looped descriptor");
+ exit(1);
+ }
+
+ if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
+ in_total += vring_desc_len(desc_pa, i);
+ } else {
+ out_total += vring_desc_len(desc_pa, i);
+ }
+ if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+ goto done;
+ }
+ } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
+
+ if (!indirect)
+ total_bufs = num_bufs;
+ else
+ total_bufs++;
+ }
+done:
+ if (in_bytes) {
+ *in_bytes = in_total;
+ }
+ if (out_bytes) {
+ *out_bytes = out_total;
+ }
+}
+
+int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
+ unsigned int out_bytes)
+{
+ unsigned int in_total, out_total;
+
+ virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
+ return in_bytes <= in_total && out_bytes <= out_total;
+}
+
+void virtqueue_map_sg(struct iovec *sg, hwaddr *addr,
+ size_t num_sg, int is_write)
+{
+ unsigned int i;
+ hwaddr len;
+
+ for (i = 0; i < num_sg; i++) {
+ len = sg[i].iov_len;
+ sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
+ if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
+ error_report("virtio: trying to map MMIO memory");
+ exit(1);
+ }
+ }
+}
+
+int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
+{
+ unsigned int i, head, max;
+ hwaddr desc_pa = vq->vring.desc;
+
+ if (!virtqueue_num_heads(vq, vq->last_avail_idx))
+ return 0;
+
+ /* When we start there are none of either input nor output. */
+ elem->out_num = elem->in_num = 0;
+
+ max = vq->vring.num;
+
+ i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
+ if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
+ vring_avail_event(vq, vring_avail_idx(vq));
+ }
+
+ if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
+ if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
+ error_report("Invalid size for indirect buffer table");
+ exit(1);
+ }
+
+ /* loop over the indirect descriptor table */
+ max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
+ desc_pa = vring_desc_addr(desc_pa, i);
+ i = 0;
+ }
+
+ /* Collect all the descriptors */
+ do {
+ struct iovec *sg;
+
+ if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
+ if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
+ error_report("Too many write descriptors in indirect table");
+ exit(1);
+ }
+ elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
+ sg = &elem->in_sg[elem->in_num++];
+ } else {
+ if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
+ error_report("Too many read descriptors in indirect table");
+ exit(1);
+ }
+ elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
+ sg = &elem->out_sg[elem->out_num++];
+ }
+
+ sg->iov_len = vring_desc_len(desc_pa, i);
+
+ /* If we've got too many, that implies a descriptor loop. */
+ if ((elem->in_num + elem->out_num) > max) {
+ error_report("Looped descriptor");
+ exit(1);
+ }
+ } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
+
+ /* Now map what we have collected */
+ virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
+ virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
+
+ elem->index = head;
+
+ vq->inuse++;
+
+ trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
+ return elem->in_num + elem->out_num;
+}
+
+/* virtio device */
+static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
+{
+ if (vdev->binding->notify) {
+ vdev->binding->notify(vdev->binding_opaque, vector);
+ }
+}
+
+void virtio_update_irq(VirtIODevice *vdev)
+{
+ virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+}
+
+void virtio_set_status(VirtIODevice *vdev, uint8_t val)
+{
+ trace_virtio_set_status(vdev, val);
+
+ if (vdev->set_status) {
+ vdev->set_status(vdev, val);
+ }
+ vdev->status = val;
+}
+
+void virtio_reset(void *opaque)
+{
+ VirtIODevice *vdev = opaque;
+ int i;
+
+ virtio_set_status(vdev, 0);
+
+ if (vdev->reset)
+ vdev->reset(vdev);
+
+ vdev->guest_features = 0;
+ vdev->queue_sel = 0;
+ vdev->status = 0;
+ vdev->isr = 0;
+ vdev->config_vector = VIRTIO_NO_VECTOR;
+ virtio_notify_vector(vdev, vdev->config_vector);
+
+ for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
+ vdev->vq[i].vring.desc = 0;
+ vdev->vq[i].vring.avail = 0;
+ vdev->vq[i].vring.used = 0;
+ vdev->vq[i].last_avail_idx = 0;
+ vdev->vq[i].pa = 0;
+ vdev->vq[i].vector = VIRTIO_NO_VECTOR;
+ vdev->vq[i].signalled_used = 0;
+ vdev->vq[i].signalled_used_valid = false;
+ vdev->vq[i].notification = true;
+ }
+}
+
+uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
+{
+ uint8_t val;
+
+ vdev->get_config(vdev, vdev->config);
+
+ if (addr > (vdev->config_len - sizeof(val)))
+ return (uint32_t)-1;
+
+ val = ldub_p(vdev->config + addr);
+ return val;
+}
+
+uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
+{
+ uint16_t val;
+
+ vdev->get_config(vdev, vdev->config);
+
+ if (addr > (vdev->config_len - sizeof(val)))
+ return (uint32_t)-1;
+
+ val = lduw_p(vdev->config + addr);
+ return val;
+}
+
+uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
+{
+ uint32_t val;
+
+ vdev->get_config(vdev, vdev->config);
+
+ if (addr > (vdev->config_len - sizeof(val)))
+ return (uint32_t)-1;
+
+ val = ldl_p(vdev->config + addr);
+ return val;
+}
+
+void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
+{
+ uint8_t val = data;
+
+ if (addr > (vdev->config_len - sizeof(val)))
+ return;
+
+ stb_p(vdev->config + addr, val);
+
+ if (vdev->set_config)
+ vdev->set_config(vdev, vdev->config);
+}
+
+void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
+{
+ uint16_t val = data;
+
+ if (addr > (vdev->config_len - sizeof(val)))
+ return;
+
+ stw_p(vdev->config + addr, val);
+
+ if (vdev->set_config)
+ vdev->set_config(vdev, vdev->config);
+}
+
+void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
+{
+ uint32_t val = data;
+
+ if (addr > (vdev->config_len - sizeof(val)))
+ return;
+
+ stl_p(vdev->config + addr, val);
+
+ if (vdev->set_config)
+ vdev->set_config(vdev, vdev->config);
+}
+
+void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
+{
+ vdev->vq[n].pa = addr;
+ virtqueue_init(&vdev->vq[n]);
+}
+
+hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].pa;
+}
+
+int virtio_queue_get_num(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.num;
+}
+
+int virtio_queue_get_id(VirtQueue *vq)
+{
+ VirtIODevice *vdev = vq->vdev;
+ assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_PCI_QUEUE_MAX]);
+ return vq - &vdev->vq[0];
+}
+
+void virtio_queue_notify_vq(VirtQueue *vq)
+{
+ if (vq->vring.desc) {
+ VirtIODevice *vdev = vq->vdev;
+ trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+ vq->handle_output(vdev, vq);
+ }
+}
+
+void virtio_queue_notify(VirtIODevice *vdev, int n)
+{
+ virtio_queue_notify_vq(&vdev->vq[n]);
+}
+
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
+{
+ return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
+ VIRTIO_NO_VECTOR;
+}
+
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
+{
+ if (n < VIRTIO_PCI_QUEUE_MAX)
+ vdev->vq[n].vector = vector;
+}
+
+VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
+ void (*handle_output)(VirtIODevice *, VirtQueue *))
+{
+ int i;
+
+ for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
+ if (vdev->vq[i].vring.num == 0)
+ break;
+ }
+
+ if (i == VIRTIO_PCI_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
+ abort();
+
+ vdev->vq[i].vring.num = queue_size;
+ vdev->vq[i].handle_output = handle_output;
+
+ return &vdev->vq[i];
+}
+
+void virtio_del_queue(VirtIODevice *vdev, int n)
+{
+ if (n < 0 || n >= VIRTIO_PCI_QUEUE_MAX) {
+ abort();
+ }
+
+ vdev->vq[n].vring.num = 0;
+}
+
+void virtio_irq(VirtQueue *vq)
+{
+ trace_virtio_irq(vq);
+ vq->vdev->isr |= 0x01;
+ virtio_notify_vector(vq->vdev, vq->vector);
+}
+
+/* Assuming a given event_idx value from the other size, if
+ * we have just incremented index from old to new_idx,
+ * should we trigger an event? */
+static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
+{
+ /* Note: Xen has similar logic for notification hold-off
+ * in include/xen/interface/io/ring.h with req_event and req_prod
+ * corresponding to event_idx + 1 and new respectively.
+ * Note also that req_event and req_prod in Xen start at 1,
+ * event indexes in virtio start at 0. */
+ return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
+}
+
+static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+ uint16_t old, new;
+ bool v;
+ /* We need to expose used array entries before checking used event. */
+ smp_mb();
+ /* Always notify when queue is empty (when feature acknowledge) */
+ if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
+ !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
+ return true;
+ }
+
+ if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
+ return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
+ }
+
+ v = vq->signalled_used_valid;
+ vq->signalled_used_valid = true;
+ old = vq->signalled_used;
+ new = vq->signalled_used = vring_used_idx(vq);
+ return !v || vring_need_event(vring_used_event(vq), new, old);
+}
+
+void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+ if (!vring_notify(vdev, vq)) {
+ return;
+ }
+
+ trace_virtio_notify(vdev, vq);
+ vdev->isr |= 0x01;
+ virtio_notify_vector(vdev, vq->vector);
+}
+
+void virtio_notify_config(VirtIODevice *vdev)
+{
+ if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
+ return;
+
+ vdev->isr |= 0x03;
+ virtio_notify_vector(vdev, vdev->config_vector);
+}
+
+void virtio_save(VirtIODevice *vdev, QEMUFile *f)
+{
+ int i;
+
+ if (vdev->binding->save_config)
+ vdev->binding->save_config(vdev->binding_opaque, f);
+
+ qemu_put_8s(f, &vdev->status);
+ qemu_put_8s(f, &vdev->isr);
+ qemu_put_be16s(f, &vdev->queue_sel);
+ qemu_put_be32s(f, &vdev->guest_features);
+ qemu_put_be32(f, vdev->config_len);
+ qemu_put_buffer(f, vdev->config, vdev->config_len);
+
+ for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
+ if (vdev->vq[i].vring.num == 0)
+ break;
+ }
+
+ qemu_put_be32(f, i);
+
+ for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
+ if (vdev->vq[i].vring.num == 0)
+ break;
+
+ qemu_put_be32(f, vdev->vq[i].vring.num);
+ qemu_put_be64(f, vdev->vq[i].pa);
+ qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
+ if (vdev->binding->save_queue)
+ vdev->binding->save_queue(vdev->binding_opaque, i, f);
+ }
+}
+
+int virtio_set_features(VirtIODevice *vdev, uint32_t val)
+{
+ uint32_t supported_features =
+ vdev->binding->get_features(vdev->binding_opaque);
+ bool bad = (val & ~supported_features) != 0;
+
+ val &= supported_features;
+ if (vdev->set_features) {
+ vdev->set_features(vdev, val);
+ }
+ vdev->guest_features = val;
+ return bad ? -1 : 0;
+}
+
+int virtio_load(VirtIODevice *vdev, QEMUFile *f)
+{
+ int num, i, ret;
+ uint32_t features;
+ uint32_t supported_features;
+
+ if (vdev->binding->load_config) {
+ ret = vdev->binding->load_config(vdev->binding_opaque, f);
+ if (ret)
+ return ret;
+ }
+
+ qemu_get_8s(f, &vdev->status);
+ qemu_get_8s(f, &vdev->isr);
+ qemu_get_be16s(f, &vdev->queue_sel);
+ qemu_get_be32s(f, &features);
+
+ if (virtio_set_features(vdev, features) < 0) {
+ supported_features = vdev->binding->get_features(vdev->binding_opaque);
+ error_report("Features 0x%x unsupported. Allowed features: 0x%x",
+ features, supported_features);
+ return -1;
+ }
+ vdev->config_len = qemu_get_be32(f);
+ qemu_get_buffer(f, vdev->config, vdev->config_len);
+
+ num = qemu_get_be32(f);
+
+ for (i = 0; i < num; i++) {
+ vdev->vq[i].vring.num = qemu_get_be32(f);
+ vdev->vq[i].pa = qemu_get_be64(f);
+ qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
+ vdev->vq[i].signalled_used_valid = false;
+ vdev->vq[i].notification = true;
+
+ if (vdev->vq[i].pa) {
+ uint16_t nheads;
+ virtqueue_init(&vdev->vq[i]);
+ nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ if (nheads > vdev->vq[i].vring.num) {
+ error_report("VQ %d size 0x%x Guest index 0x%x "
+ "inconsistent with Host index 0x%x: delta 0x%x",
+ i, vdev->vq[i].vring.num,
+ vring_avail_idx(&vdev->vq[i]),
+ vdev->vq[i].last_avail_idx, nheads);
+ return -1;
+ }
+ } else if (vdev->vq[i].last_avail_idx) {
+ error_report("VQ %d address 0x0 "
+ "inconsistent with Host index 0x%x",
+ i, vdev->vq[i].last_avail_idx);
+ return -1;
+ }
+ if (vdev->binding->load_queue) {
+ ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
+ if (ret)
+ return ret;
+ }
+ }
+
+ virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+ return 0;
+}
+
+void virtio_common_cleanup(VirtIODevice *vdev)
+{
+ qemu_del_vm_change_state_handler(vdev->vmstate);
+ g_free(vdev->config);
+ g_free(vdev->vq);
+}
+
+void virtio_cleanup(VirtIODevice *vdev)
+{
+ virtio_common_cleanup(vdev);
+ g_free(vdev);
+}
+
+static void virtio_vmstate_change(void *opaque, int running, RunState state)
+{
+ VirtIODevice *vdev = opaque;
+ bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
+ vdev->vm_running = running;
+
+ if (backend_run) {
+ virtio_set_status(vdev, vdev->status);
+ }
+
+ if (vdev->binding->vmstate_change) {
+ vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
+ }
+
+ if (!backend_run) {
+ virtio_set_status(vdev, vdev->status);
+ }
+}
+
+void virtio_init(VirtIODevice *vdev, const char *name,
+ uint16_t device_id, size_t config_size)
+{
+ int i;
+ vdev->device_id = device_id;
+ vdev->status = 0;
+ vdev->isr = 0;
+ vdev->queue_sel = 0;
+ vdev->config_vector = VIRTIO_NO_VECTOR;
+ vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
+ vdev->vm_running = runstate_is_running();
+ for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
+ vdev->vq[i].vector = VIRTIO_NO_VECTOR;
+ vdev->vq[i].vdev = vdev;
+ vdev->vq[i].queue_index = i;
+ }
+
+ vdev->name = name;
+ vdev->config_len = config_size;
+ if (vdev->config_len) {
+ vdev->config = g_malloc0(config_size);
+ } else {
+ vdev->config = NULL;
+ }
+ vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
+ vdev);
+}
+
+VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
+ size_t config_size, size_t struct_size)
+{
+ VirtIODevice *vdev;
+ vdev = g_malloc0(struct_size);
+ virtio_init(vdev, name, device_id, config_size);
+ return vdev;
+}
+
+void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
+ DeviceState *opaque)
+{
+ vdev->binding = binding;
+ vdev->binding_opaque = opaque;
+}
+
+hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.desc;
+}
+
+hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.avail;
+}
+
+hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.used;
+}
+
+hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.desc;
+}
+
+hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
+{
+ return sizeof(VRingDesc) * vdev->vq[n].vring.num;
+}
+
+hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
+{
+ return offsetof(VRingAvail, ring) +
+ sizeof(uint64_t) * vdev->vq[n].vring.num;
+}
+
+hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
+{
+ return offsetof(VRingUsed, ring) +
+ sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
+}
+
+hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
+ virtio_queue_get_used_size(vdev, n);
+}
+
+uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].last_avail_idx;
+}
+
+void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
+{
+ vdev->vq[n].last_avail_idx = idx;
+}
+
+VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
+{
+ return vdev->vq + n;
+}
+
+uint16_t virtio_get_queue_index(VirtQueue *vq)
+{
+ return vq->queue_index;
+}
+
+static void virtio_queue_guest_notifier_read(EventNotifier *n)
+{
+ VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
+ if (event_notifier_test_and_clear(n)) {
+ virtio_irq(vq);
+ }
+}
+
+void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
+ bool with_irqfd)
+{
+ if (assign && !with_irqfd) {
+ event_notifier_set_handler(&vq->guest_notifier,
+ virtio_queue_guest_notifier_read);
+ } else {
+ event_notifier_set_handler(&vq->guest_notifier, NULL);
+ }
+ if (!assign) {
+ /* Test and clear notifier before closing it,
+ * in case poll callback didn't have time to run. */
+ virtio_queue_guest_notifier_read(&vq->guest_notifier);
+ }
+}
+
+EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
+{
+ return &vq->guest_notifier;
+}
+
+static void virtio_queue_host_notifier_read(EventNotifier *n)
+{
+ VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
+ if (event_notifier_test_and_clear(n)) {
+ virtio_queue_notify_vq(vq);
+ }
+}
+
+void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
+ bool set_handler)
+{
+ if (assign && set_handler) {
+ event_notifier_set_handler(&vq->host_notifier,
+ virtio_queue_host_notifier_read);
+ } else {
+ event_notifier_set_handler(&vq->host_notifier, NULL);
+ }
+ if (!assign) {
+ /* Test and clear notifier before after disabling event,
+ * in case poll callback didn't have time to run. */
+ virtio_queue_host_notifier_read(&vq->host_notifier);
+ }
+}
+
+EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
+{
+ return &vq->host_notifier;
+}
+
+static int virtio_device_init(DeviceState *qdev)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
+ VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(qdev);
+ assert(k->init != NULL);
+ if (k->init(vdev) < 0) {
+ return -1;
+ }
+ virtio_bus_plug_device(vdev);
+ return 0;
+}
+
+static void virtio_device_class_init(ObjectClass *klass, void *data)
+{
+ /* Set the default value here. */
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ dc->init = virtio_device_init;
+ dc->bus_type = TYPE_VIRTIO_BUS;
+}
+
+static const TypeInfo virtio_device_info = {
+ .name = TYPE_VIRTIO_DEVICE,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(VirtIODevice),
+ .class_init = virtio_device_class_init,
+ .abstract = true,
+ .class_size = sizeof(VirtioDeviceClass),
+};
+
+static void virtio_register_types(void)
+{
+ type_register_static(&virtio_device_info);
+}
+
+type_init(virtio_register_types)